Module:Multilingual description

From Chaosvermittlung Wiki
Jump to navigation Jump to search

local p = {}

--[==[

 Remap some "valid" language codes that are still unknown, but are known by another code
 in order to get a visible language name (and if possible, BCP47 conformance)!

--]==] local remappedLanguages = {

   ['als'] = 'gsw', -- legacy broken codes (but known) to changed new code (also known, but conforming to BCP47)
   ['bat-smg'] = 'sgs', -- same remark
   ['be-x-old'] = 'be-tarask', -- same remark
   ['bh'] = 'bho', -- same remark
   ['bu'] = 'my', -- same remark
   ['fiu-vro'] = 'vro', -- same remark
   ['nrm'] = 'nrf', -- same remark (usage of 'nrm' on Wikimedia for Norman conflicts with the standard 'nrm' which actually refers to an unrelated language)
   ['simple'] = 'en', -- same remark
   ['zh-classical'] = 'lzh', -- same remark
   ['zh-min-nan'] = 'nan', -- same remark
   ['zh-wuu'] = 'wuu', -- same remark
   ['zh-yue'] = 'yue', -- same remark
   ['en-us'] = 'en', -- both codes are conforming and supported, only the second one is known
   ['fa-af'] = 'fa', -- both codes are conforming and supported, only the second one is known, actually means "Eastern Dari"
   ['fr-x-galo'] = 'fr', -- both codes are conforming and supported, only the second one is known
   ['ha-latn'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950's.
   ['ha-arab'] = 'ha', -- both codes are conforming and supported, only the second one is known, the Arabic script is historic, without clear orthography
   ['ko-kr'] = 'ko', -- both codes are conforming and supported, only the second one is known
   ['ku-latn'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Latin script is the default since the 1950's.
   ['ku-cyrl'] = 'ku', -- both codes are conforming and supported, only the second one is known, the Cyrillic script is still used
   ['no'] = 'nb', -- both codes are conforming and supported, but the 1st one is now used only for meaning the second one in MediaWiki
   ['prd'] = 'fa', -- both codes are conforming and supported, only the second one is known, prd is "Parsi-Dari"
   ['tgl'] = 'tl', -- both codes are conforming and supported, only the second one is known
   ['sr-cyrl'] = 'sr-ec', -- this alternate known code is non-standard and in fact not supported, but has a correct native name
   ['sr-latn'] = 'sr-el', -- same remark

}

-- kind is either 'deprecated' or 'unsupported' local function addTracking(descriptions, kind)

   local categorize
   if kind == 'deprecated' then
       categorize = ' using deprecated language codes'
   else
       categorize = ' using unsupported language codes'
   end
   table.insert(descriptions, )

end

local sortedKnownLanguageTags = require('Module:Multilingual description/sort') local dir = require('Module:Dir').select local function addDescription(descriptions, lang, description, update)

   if type(description) == 'string' then
       description = mw.text.trim(description)
       if description:len() > 0 then
           table.insert(descriptions, mw.getCurrentFrame():expandTemplate{
               title = 'Ls',
               args = {
                   lang,
                   description,
                   dir = dir(lang, 'rtl', 'ltr'),
                   classes = 'description',
                   update = update
               }
           })
       end
   end

end

local function _mld(args)

   -- Shallow copy of arguments (because keys in args cannot be unset if args is hollow, in a parent frame outside Lua)
   -- DO NOT copy the metatable that exposes only a *read-only* interface with accessors to PHP arrays (mw.clone does NOT work)!
   local descriptions = {}
   for lang, description in pairs(args) do
       if type(lang) == 'string' and type(description) == 'string' then
           -- MediaWiki trims the names of named argument and their values, but does not remove HTML comments in these names
           -- (some Mld contain parameters like "| sk  = ...")
           lang = lang:gsub('<!%-%-.-%-%->', ):gsub('^%s*(.-)%s*$','%1')
           descriptions[lang] = description
       end
   end
   args = descriptions
   descriptions = {}
   local remapped = false
   for cur, alt in pairs(remappedLanguages) do
       if args[cur] and not(mw.language.isSupportedLanguage(cur) and mw.language.isKnownLanguageTag(cur))
               and (mw.language.isSupportedLanguage(alt) and mw.language.isKnownLanguageTag(alt)) then
           remapped = true
           if args[alt] == nil then -- only if this does not conflict
               args[alt] = args[cur] -- set key for alternate known language
           end
           args[cur] = nil -- unset the standard unknown key
       end
   end
   -- First all known languages in order if they have description
   for _, lang in ipairs(sortedKnownLanguageTags) do
       if args[lang] ~= nil then
           addDescription(descriptions, lang, args[lang], nil)
           args[lang] = nil
       end
   end
   -- Append other unknown languages, but only if they are supported
   local unsupported = false
   for lang, description in pairs(args) do
       if mw.language.isSupportedLanguage(lang) then
           addDescription(descriptions, lang, description, nil)
       else
           addDescription(descriptions, lang, description, lang)
           unsupported = true
       end
   end
   if remapped then
       addTracking(descriptions, 'deprecated')
   end
   if unsupported then
       addTracking(descriptions, 'unsupported')
   end

--mw.logObject(descriptions)

   return table.concat(descriptions)

end

function p.mld(frame)

   local args = (frame:getParent() or {}).args or {}
   return _mld(args)

end

setmetatable(p, {quickTests = function()

   local input = {
       [1] = 'One?', -- discarded
       unsupported = 'What?', -- unsupported
       en = ' ', -- empty description after trimming (discarded)
       als = 'GSW', -- will be remapped
       ['en-gb'] = 'EN-GB ', -- trimming at end
       ['en-ca'] = 'EN-CA ',
       de = ' DE', -- trimming at start
       fr = ' FR ', -- trimming both ends
       rue = 'RUE',
       ru = 'RU',
       ko = 'KO',
       ja = 'JA',
       zh = 'ZH',
       he = 'HE',
       ur = 'UR',
       ar = 'AR',
       ro = 'RO',
       ['be-tarask'] = 'BE-TARASK',
       dv = 'DV',
   }
   local expect = {}
   --[=[
   This is the exact order to expect according to native language names,
   and after discarding empty descriptions or unsupported language codes.
   --]=]
   addDescription(expect, 'gsw', 'GSW') --Alemannisch -- remapped
   addDescription(expect, 'en-gb', 'EN-GB') --British English
   addDescription(expect, 'en-ca', 'EN-CA ') --Canadian English
   addDescription(expect, 'de', 'DE') --Deutsch
   addDescription(expect, 'fr', 'FR') --français
   addDescription(expect, 'ro', 'RO') --română
   addDescription(expect, 'be-tarask', 'BE-TARASK') --беларуская (тарашкевіца)
   addDescription(expect, 'rue', 'RUE') --русиньскый
   addDescription(expect, 'ru', 'RU') --русский
   addDescription(expect, 'ko', 'KO') --한국어
   addDescription(expect, 'ja', 'JA') --日本語
   addDescription(expect, 'zh', 'ZH') --中文
   addDescription(expect, 'he', 'HE') --עברית
   addDescription(expect, 'ur', 'UR') --اردو
   addDescription(expect, 'ar', 'AR') --العربية
   addDescription(expect, 'dv', 'DV') --ދިވެހިބަސް
   addDescription(expect, 'unsupported', 'What?', 'unsupported')
   addTracking(expect, 'deprecated')
   addTracking(expect, 'unsupported')
   expect = table.concat(expect)
   local actual = _mld(input)
   if (actual ~= expect) then
       mw.log('expect:\n' .. expect)
       mw.log('actual:\n' .. actual)
       return false
   end
   return true

end}) --[==[ Type this to run tests in the Lua console: =getmetatable(p).quickTests() -- should return true --]==] return p