Module:Citation/CS1: Difference between revisions
From Vigyanwiki
No edit summary |
m (1 revision imported) |
||
| (21 intermediate revisions by 7 users not shown) | |||
| Line 1: | Line 1: | ||
require (' | require ('strict'); | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
| Line 10: | Line 10: | ||
local utilities; -- functions in Module:Citation/CS1/Utilities | local utilities; -- functions in Module:Citation/CS1/Utilities | ||
local z ={}; -- table of tables in Module:Citation/CS1/Utilities | local z = {}; -- table of tables in Module:Citation/CS1/Utilities | ||
local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers | local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers | ||
| Line 154: | Line 154: | ||
end | end | ||
for _, d in ipairs ( | for _, d in ipairs (cfg.single_letter_2nd_lvl_domains_t) do -- look for single letter second level domain names for these top level domains | ||
if domain:match ('%f[%w][%w]%.' .. d) then | if domain:match ('%f[%w][%w]%.' .. d) then | ||
return true | return true | ||
| Line 265: | Line 265: | ||
local function link_title_ok (link, lorig, title, torig) | local function link_title_ok (link, lorig, title, torig) | ||
local orig; | local orig; | ||
if utilities.is_set (link) then -- don't bother if <param>-link doesn't have a value | if utilities.is_set (link) then -- don't bother if <param>-link doesn't have a value | ||
if not link_param_ok (link) then -- check |<param>-link= markup | if not link_param_ok (link) then -- check |<param>-link= markup | ||
| Line 401: | Line 401: | ||
utilities.set_message ('err_bare_url_missing_title', {utilities.wrap_style ('parameter', source)}); | utilities.set_message ('err_bare_url_missing_title', {utilities.wrap_style ('parameter', source)}); | ||
else | else | ||
error (cfg.messages["bare_url_no_origin"]); | error (cfg.messages["bare_url_no_origin"]); -- programmer error; valid parameter name does not have matching meta-parameter | ||
end | end | ||
end | end | ||
| Line 529: | Line 529: | ||
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script | lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script | ||
if not utilities.is_set (lang) then | if not utilities.is_set (lang) then | ||
utilities.set_message ('err_script_parameter', {script_param, 'missing title part'}); -- prefix without 'title'; add error message | utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing title part']}); -- prefix without 'title'; add error message | ||
return ''; -- script_value was just the prefix so return empty string | return ''; -- script_value was just the prefix so return empty string | ||
end | end | ||
-- if we get this far we have prefix and script | -- if we get this far we have prefix and script | ||
name = cfg. | name = cfg.lang_tag_remap[lang] or mw.language.fetchLanguageName( lang, cfg.this_wiki_code ); -- get language name so that we can use it to categorize | ||
if utilities.is_set (name) then -- is prefix a proper ISO 639-1 language code? | if utilities.is_set (name) then -- is prefix a proper ISO 639-1 language code? | ||
script_value = script_value:gsub ('^%l+%s*:%s*', ''); -- strip prefix from script | script_value = script_value:gsub ('^%l+%s*:%s*', ''); -- strip prefix from script | ||
| Line 540: | Line 540: | ||
utilities.add_prop_cat ('script', {name, lang}) | utilities.add_prop_cat ('script', {name, lang}) | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, 'unknown language code'}); -- unknown script-language; add error message | utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['unknown language code']}); -- unknown script-language; add error message | ||
end | end | ||
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute | lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, 'invalid language code'}); -- invalid language code; add error message | utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['invalid language code']}); -- invalid language code; add error message | ||
lang = ''; -- invalid so set lang to empty string | lang = ''; -- invalid so set lang to empty string | ||
end | end | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, 'missing prefix'}); -- no language code prefix; add error message | utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing prefix']}); -- no language code prefix; add error message | ||
end | end | ||
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL | script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL | ||
| Line 764: | Line 764: | ||
if mw.ustring.find (v, cfg.indic_script) then -- it's ok if one of the Indic scripts | if mw.ustring.find (v, cfg.indic_script) then -- it's ok if one of the Indic scripts | ||
position = nil; -- unset position | position = nil; -- unset position | ||
elseif cfg. | elseif cfg.emoji_t[mw.ustring.codepoint (v, position+1)] then -- is zwj followed by a character listed in emoji{}? | ||
position = nil; -- unset position | position = nil; -- unset position | ||
end | end | ||
| Line 1,105: | Line 1,105: | ||
return table.concat(initials) -- Vancouver format does not include spaces. | return table.concat(initials) -- Vancouver format does not include spaces. | ||
end | |||
--[[--------------------------< I N T E R W I K I _ P R E F I X E N _ G E T >---------------------------------- | |||
extract interwiki prefixen from <value>. Returns two one or two values: | |||
false – no prefixen | |||
nil – prefix exists but not recognized | |||
project prefix, language prefix – when value has either of: | |||
:<project>:<language>:<article> | |||
:<language>:<project>:<article> | |||
project prefix, nil – when <value> has only a known single-letter prefix | |||
nil, language prefix – when <value> has only a known language prefix | |||
accepts single-letter project prefixen: 'd' (wikidata), 's' (wikisource), and 'w' (wikipedia) prefixes; at this | |||
writing, the other single-letter prefixen (b (wikibook), c (commons), m (meta), n (wikinews), q (wikiquote), and | |||
v (wikiversity)) are not supported. | |||
]] | |||
local function interwiki_prefixen_get (value, is_link) | |||
if not value:find (':%l+:') then -- if no prefix | |||
return false; -- abandon; boolean here to distinguish from nil fail returns later | |||
end | |||
local prefix_patterns_linked_t = { -- sequence of valid interwiki and inter project prefixen | |||
'^%[%[:([dsw]):(%l%l+):', -- wikilinked; project and language prefixes | |||
'^%[%[:(%l%l+):([dsw]):', -- wikilinked; language and project prefixes | |||
'^%[%[:([dsw]):', -- wikilinked; project prefix | |||
'^%[%[:(%l%l+):', -- wikilinked; language prefix | |||
} | |||
local prefix_patterns_unlinked_t = { -- sequence of valid interwiki and inter project prefixen | |||
'^:([dsw]):(%l%l+):', -- project and language prefixes | |||
'^:(%l%l+):([dsw]):', -- language and project prefixes | |||
'^:([dsw]):', -- project prefix | |||
'^:(%l%l+):', -- language prefix | |||
} | |||
local cap1, cap2; | |||
for _, pattern in ipairs ((is_link and prefix_patterns_linked_t) or prefix_patterns_unlinked_t) do | |||
cap1, cap2 = value:match (pattern); | |||
if cap1 then | |||
break; -- found a match so stop looking | |||
end | |||
end | |||
if cap1 and cap2 then -- when both then :project:language: or :language:project: (both forms allowed) | |||
if 1 == #cap1 then -- length == 1 then :project:language: | |||
if cfg.inter_wiki_map[cap2] then -- is language prefix in the interwiki map? | |||
return cap1, cap2; -- return interwiki project and interwiki language | |||
end | |||
else -- here when :language:project: | |||
if cfg.inter_wiki_map[cap1] then -- is language prefix in the interwiki map? | |||
return cap2, cap1; -- return interwiki project and interwiki language | |||
end | |||
end | |||
return nil; -- unknown interwiki language | |||
elseif not (cap1 or cap2) then -- both are nil? | |||
return nil; -- we got something that looks like a project prefix but isn't; return fail | |||
elseif 1 == #cap1 then -- here when one capture | |||
return cap1, nil; -- length is 1 so return project, nil language | |||
else -- here when one capture and its length it more than 1 | |||
if cfg.inter_wiki_map[cap1] then -- is language prefix in the interwiki map? | |||
return nil, cap1; -- return nil project, language | |||
end | |||
end | |||
end | end | ||
| Line 1,175: | Line 1,242: | ||
one = utilities.make_wikilink (person.link, one); -- link author/editor | one = utilities.make_wikilink (person.link, one); -- link author/editor | ||
end | end | ||
if one then -- if <one> has a value (name, mdash replacement, or mask text replacement) | if one then -- if <one> has a value (name, mdash replacement, or mask text replacement) | ||
local proj, tag = interwiki_prefixen_get (one, true); -- get the interwiki prefixen if present | |||
if 'w' == proj and ('Wikipedia' == mw.site.namespaces.Project['name']) then | |||
proj = nil; -- for stuff like :w:de:<article>, :w is unnecessary TODO: maint cat? | |||
end | |||
if proj then | |||
proj = ({['d'] = 'Wikidata', ['s'] = 'Wikisource', ['w'] = 'Wikipedia'})[proj]; -- :w (wikipedia) for linking from a non-wikipedia project | |||
if proj then | |||
one = one .. utilities.wrap_style ('interproj', proj); -- add resized leading space, brackets, static text, language name | |||
tag = nil; -- unset; don't do both project and language | |||
end | |||
end | |||
if tag == cfg.this_wiki_code then | |||
tag = nil; -- stuff like :en:<article> at en.wiki is pointless TODO: maint cat? | |||
end | |||
if tag then | |||
local lang = cfg.lang_tag_remap[tag] or cfg.mw_languages_by_tag_t[tag]; | |||
if lang then -- error messaging done in extract_names() where we know parameter names | |||
one = one .. utilities.wrap_style ('interwiki', lang); -- add resized leading space, brackets, static text, language name | |||
end | |||
end | |||
table.insert (name_list, one); -- add it to the list of names | table.insert (name_list, one); -- add it to the list of names | ||
table.insert (name_list, sep_one); -- add the proper name-list separator | table.insert (name_list, sep_one); -- add the proper name-list separator | ||
| Line 1,200: | Line 1,290: | ||
local result = table.concat (name_list); -- construct list | local result = table.concat (name_list); -- construct list | ||
if etal and utilities.is_set (result) then -- etal may be set by |display-authors=etal but we might not have a last-first list | if etal and utilities.is_set (result) then -- etal may be set by |display-authors=etal but we might not have a last-first list | ||
result = result .. sep | result = result .. sep .. cfg.messages['et al']; -- we've got a last-first list and etal so add et al. | ||
end | end | ||
| Line 1,316: | Line 1,406: | ||
semicolons. Escaped semicolons are ones used as part of selected HTML entities. | semicolons. Escaped semicolons are ones used as part of selected HTML entities. | ||
If the condition is met, the function adds the multiple name maintenance category. | If the condition is met, the function adds the multiple name maintenance category. | ||
Same test for first except that commas should not appear in given names (MOS:JR says | |||
that the generational suffix does not take a separator character). Titles, degrees, | |||
postnominals, affiliations, all normally comma separated don't belong in a citation. | |||
<name> – name parameter value | |||
<list_name> – AuthorList, EditorList, etc | |||
<limit> – number of allowed commas; 1 (default) for surnames; 0 for given names | |||
returns nothing | returns nothing | ||
| Line 1,321: | Line 1,419: | ||
]] | ]] | ||
local function name_has_mult_names (name, list_name) | local function name_has_mult_names (name, list_name, limit) | ||
local _, commas, semicolons, nbsps; | local _, commas, semicolons, nbsps; | ||
limit = limit and limit or 1; | |||
if utilities.is_set (name) then | if utilities.is_set (name) then | ||
_, commas = name:gsub (',', ''); -- count the number of commas | _, commas = name:gsub (',', ''); -- count the number of commas | ||
| Line 1,336: | Line 1,435: | ||
-- from semicolons to 'escape' them. If additional entities are added, | -- from semicolons to 'escape' them. If additional entities are added, | ||
-- they also can be subtracted. | -- they also can be subtracted. | ||
if | if limit < commas or 0 < (semicolons - nbsps) then | ||
utilities.set_message ('maint_mult_names', cfg.special_case_translation [list_name]); -- add a maint message | utilities.set_message ('maint_mult_names', cfg.special_case_translation [list_name]); -- add a maint message | ||
end | end | ||
| Line 1,343: | Line 1,442: | ||
--[[ | --[=[-------------------------< I S _ G E N E R I C >---------------------------------------------------------- | ||
Compares values assigned to various | Compares values assigned to various parameters according to the string provided as <item> in the function call. | ||
'generic_names': |last=, |first=, |editor-last=, etc | <item> can have on of two values: | ||
'generic_titles' | 'generic_names' – for name-holding parameters: |last=, |first=, |editor-last=, etc | ||
'generic_titles' – for |title= | |||
The | There are two types of generic tests. The 'accept' tests look for a pattern that should not be rejected by the | ||
' | 'reject' test. For example, | ||
|author=[[John Smith (author)|Smith, John]] | |||
would be rejected by the 'author' reject test. But piped wikilinks with 'author' disambiguation should not be | |||
rejected so the 'accept' test prevents that from happening. Accept tests are always performed before reject | |||
tests. | |||
Each of the 'accept' and 'reject' sequence tables hold tables for en.wiki (['en']) and local.wiki (['local']) | |||
that each can hold a test sequence table The sequence table holds, at index [1], a test pattern, and, at index | |||
[2], a boolean control value. The control value tells string.find() or mw.ustring.find() to do plain-text search (true) | |||
or a pattern search (false). The intent of all this complexity is to make these searches as fast as possible so | or a pattern search (false). The intent of all this complexity is to make these searches as fast as possible so | ||
that we don't run out of processing time on very large articles. | that we don't run out of processing time on very large articles. | ||
]] | Returns | ||
true when a reject test finds the pattern or string | |||
false when an accept test finds the pattern or string | |||
nil else | |||
]=] | |||
local function is_generic (item, value) | local function is_generic (item, value, wiki) | ||
local test_val; | local test_val; | ||
local str_lower = { -- use string.lower() for en.wiki (['en']) and use mw.ustring.lower() or local.wiki (['local']) | |||
['en'] = string.lower, | |||
['local'] = mw.ustring.lower, | |||
} | |||
local str_find = { -- use string.find() for en.wiki (['en']) and use mw.ustring.find() or local.wiki (['local']) | |||
['en'] = string.find, | |||
['local'] = mw.ustring.find, | |||
} | |||
local function test (val, test_t, wiki) -- local function to do the testing; <wiki> selects lower() and find() functions | |||
val = test_t[2] and str_lower[wiki](value) or val; -- when <test_t[2]> set to 'true', plaintext search using lowercase value | |||
return str_find[wiki] (val, test_t[1], 1, test_t[2]); -- return nil when not found or matched | |||
end | |||
local test_types_t = {'accept', 'reject'}; -- test accept patterns first, then reject patterns | |||
local wikis_t = {'en', 'local'}; -- do tests for each of these keys; en.wiki first, local.wiki second | |||
for _, test_type in ipairs (test_types_t) do -- for each test type | |||
for _, generic_value in pairs (cfg.special_case_translation[item][test_type]) do -- spin through the list of generic value fragments to accept or reject | |||
for _, wiki in ipairs (wikis_t) do | |||
if generic_value[wiki] then | |||
if test (value, generic_value[wiki], wiki) then -- go do the test | |||
return ('reject' == test_type); -- param value rejected, return true; false else | |||
end | |||
end | |||
end | end | ||
end | end | ||
| Line 1,406: | Line 1,530: | ||
if not accept_name then -- <last> not wrapped in accept-as-written markup | if not accept_name then -- <last> not wrapped in accept-as-written markup | ||
name_has_mult_names (last, list_name); -- check for multiple names in the parameter | name_has_mult_names (last, list_name); -- check for multiple names in the parameter | ||
name_is_numeric (last, list_name); -- check for names that are composed of digits and punctuation | name_is_numeric (last, list_name); -- check for names that are composed of digits and punctuation | ||
name_is_generic (last, last_alias); -- check for names found in the generic names list | name_is_generic (last, last_alias); -- check for names found in the generic names list | ||
| Line 1,416: | Line 1,540: | ||
if not accept_name then -- <first> not wrapped in accept-as-written markup | if not accept_name then -- <first> not wrapped in accept-as-written markup | ||
name_has_mult_names (first, list_name, 0); -- check for multiple names in the parameter; 0 is number of allowed commas in a given name | |||
name_is_numeric (first, list_name); -- check for names that are composed of digits and punctuation | name_is_numeric (first, list_name); -- check for names that are composed of digits and punctuation | ||
name_is_generic (first, first_alias); -- check for names found in the generic names list | name_is_generic (first, first_alias); -- check for names found in the generic names list | ||
| Line 1,469: | Line 1,594: | ||
link, link_alias = utilities.select_one ( args, cfg.aliases[list_name .. '-Link'], 'err_redundant_parameters', i ); | link, link_alias = utilities.select_one ( args, cfg.aliases[list_name .. '-Link'], 'err_redundant_parameters', i ); | ||
mask = utilities.select_one ( args, cfg.aliases[list_name .. '-Mask'], 'err_redundant_parameters', i ); | mask = utilities.select_one ( args, cfg.aliases[list_name .. '-Mask'], 'err_redundant_parameters', i ); | ||
if last then -- error check |lastn= alias for unknown interwiki link prefix; done here because this is where we have the parameter name | |||
local project, language = interwiki_prefixen_get (last, true); -- true because we expect interwiki links in |lastn= to be wikilinked | |||
if nil == project and nil == language then -- when both are nil | |||
utilities.set_message ('err_bad_paramlink', last_alias); -- not known, emit an error message -- TODO: err_bad_interwiki? | |||
last = utilities.remove_wiki_link (last); -- remove wikilink markup; show display value only | |||
end | |||
end | |||
if link then -- error check |linkn= alias for unknown interwiki link prefix | |||
local project, language = interwiki_prefixen_get (link, false); -- false because wiki links in |author-linkn= is an error | |||
if nil == project and nil == language then -- when both are nil | |||
utilities.set_message ('err_bad_paramlink', link_alias); -- not known, emit an error message -- TODO: err_bad_interwiki? | |||
link = nil; -- unset so we don't link | |||
link_alias = nil; | |||
end | |||
end | |||
last, etal = name_has_etal (last, etal, false, last_alias); -- find and remove variations on et al. | last, etal = name_has_etal (last, etal, false, last_alias); -- find and remove variations on et al. | ||
first, etal = name_has_etal (first, etal, false, first_alias); -- find and remove variations on et al. | first, etal = name_has_etal (first, etal, false, first_alias); -- find and remove variations on et al. | ||
| Line 1,512: | Line 1,654: | ||
This function looks for: | This function looks for: | ||
<lang_param> as a tag in cfg. | <lang_param> as a tag in cfg.lang_tag_remap{} | ||
<lang_param> as a name in cfg.lang_name_remap{} | <lang_param> as a name in cfg.lang_name_remap{} | ||
| Line 1,521: | Line 1,663: | ||
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{} | and look for the new <lang_param> in cfg.mw_languages_by_tag_t{} | ||
on success, | on success, returns name (in properly capitalized form) and matching tag (in lowercase); on failure returns nil | ||
]] | ]] | ||
| Line 1,530: | Line 1,672: | ||
local tag; | local tag; | ||
name = cfg. | name = cfg.lang_tag_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name | ||
if name then -- when <name>, <lang_param> is a tag for a remapped language name | if name then -- when <name>, <lang_param> is a tag for a remapped language name | ||
return name, | return name, lang_param_lc; -- so return <name> from remap and <lang_param_lc> | ||
end | end | ||
tag = lang_param_lc:match ('^(%a%a%a?)%-.*'); -- still assuming that <lang_param_lc> is a tag; strip script, region, variant subtags | tag = lang_param_lc:match ('^(%a%a%a?)%-.*'); -- still assuming that <lang_param_lc> is a tag; strip script, region, variant subtags | ||
name = cfg. | name = cfg.lang_tag_remap[tag]; -- attempt to get remapped language name with language subtag only | ||
if name then -- when <name>, <tag> is a tag for a remapped language name | if name then -- when <name>, <tag> is a tag for a remapped language name | ||
return name, tag; -- so return <name> from remap and <tag> | return name, tag; -- so return <name> from remap and <tag> | ||
| Line 1,554: | Line 1,696: | ||
if name then | if name then | ||
return name, | return name, lang_param_lc; -- <lang_param_lc> is a tag so return it and <name> | ||
end | end | ||
| Line 1,601: | Line 1,743: | ||
for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag | for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag | ||
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang> | name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang> | ||