Module:Citation/CS1: Difference between revisions

From Vigyanwiki
No edit summary
 
m (1 revision imported)
 
(21 intermediate revisions by 7 users not shown)
Line 1: Line 1:
require ('Module:No globals');
require ('strict');


--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
Line 10: Line 10:


local utilities; -- functions in Module:Citation/CS1/Utilities
local utilities; -- functions in Module:Citation/CS1/Utilities
local z ={}; -- table of tables in Module:Citation/CS1/Utilities
local z = {}; -- table of tables in Module:Citation/CS1/Utilities


local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers
local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers
Line 154: Line 154:
end
end


for _, d in ipairs ({'cash', 'company', 'today', 'org'}) do -- look for single letter second level domain names for these top level domains
for _, d in ipairs (cfg.single_letter_2nd_lvl_domains_t) do -- look for single letter second level domain names for these top level domains
if domain:match ('%f[%w][%w]%.' .. d) then
if domain:match ('%f[%w][%w]%.' .. d) then
return true
return true
Line 265: Line 265:


local function link_title_ok (link, lorig, title, torig)
local function link_title_ok (link, lorig, title, torig)
local orig;
local orig;
if utilities.is_set (link) then -- don't bother if <param>-link doesn't have a value
if utilities.is_set (link) then -- don't bother if <param>-link doesn't have a value
if not link_param_ok (link) then -- check |<param>-link= markup
if not link_param_ok (link) then -- check |<param>-link= markup
Line 401: Line 401:
utilities.set_message ('err_bare_url_missing_title', {utilities.wrap_style ('parameter', source)});
utilities.set_message ('err_bare_url_missing_title', {utilities.wrap_style ('parameter', source)});
else
else
error (cfg.messages["bare_url_no_origin"]);
error (cfg.messages["bare_url_no_origin"]); -- programmer error; valid parameter name does not have matching meta-parameter
end
end
end
end
Line 529: Line 529:
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
if not utilities.is_set (lang) then
if not utilities.is_set (lang) then
utilities.set_message ('err_script_parameter', {script_param, 'missing title part'}); -- prefix without 'title'; add error message
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing title part']}); -- prefix without 'title'; add error message
return ''; -- script_value was just the prefix so return empty string
return ''; -- script_value was just the prefix so return empty string
end
end
-- if we get this far we have prefix and script
-- if we get this far we have prefix and script
name = cfg.lang_code_remap[lang] or mw.language.fetchLanguageName( lang, cfg.this_wiki_code ); -- get language name so that we can use it to categorize
name = cfg.lang_tag_remap[lang] or mw.language.fetchLanguageName( lang, cfg.this_wiki_code ); -- get language name so that we can use it to categorize
if utilities.is_set (name) then -- is prefix a proper ISO 639-1 language code?
if utilities.is_set (name) then -- is prefix a proper ISO 639-1 language code?
script_value = script_value:gsub ('^%l+%s*:%s*', ''); -- strip prefix from script
script_value = script_value:gsub ('^%l+%s*:%s*', ''); -- strip prefix from script
Line 540: Line 540:
utilities.add_prop_cat ('script', {name, lang})
utilities.add_prop_cat ('script', {name, lang})
else
else
utilities.set_message ('err_script_parameter', {script_param, 'unknown language code'}); -- unknown script-language; add error message
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['unknown language code']}); -- unknown script-language; add error message
end
end
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
else
else
utilities.set_message ('err_script_parameter', {script_param, 'invalid language code'}); -- invalid language code; add error message
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['invalid language code']}); -- invalid language code; add error message
lang = ''; -- invalid so set lang to empty string
lang = ''; -- invalid so set lang to empty string
end
end
else
else
utilities.set_message ('err_script_parameter', {script_param, 'missing prefix'}); -- no language code prefix; add error message
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing prefix']}); -- no language code prefix; add error message
end
end
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL
Line 764: Line 764:
if mw.ustring.find (v, cfg.indic_script) then -- it's ok if one of the Indic scripts
if mw.ustring.find (v, cfg.indic_script) then -- it's ok if one of the Indic scripts
position = nil; -- unset position
position = nil; -- unset position
elseif cfg.emoji[mw.ustring.codepoint (v, position+1)] then -- is zwj followed by a character listed in emoji{}?
elseif cfg.emoji_t[mw.ustring.codepoint (v, position+1)] then -- is zwj followed by a character listed in emoji{}?
position = nil; -- unset position
position = nil; -- unset position
end
end
Line 1,105: Line 1,105:
return table.concat(initials) -- Vancouver format does not include spaces.
return table.concat(initials) -- Vancouver format does not include spaces.
end
--[[--------------------------< I N T E R W I K I _ P R E F I X E N _ G E T >----------------------------------
extract interwiki prefixen from <value>.  Returns two one or two values:
false – no prefixen
nil – prefix exists but not recognized
project prefix, language prefix – when value has either of:
:<project>:<language>:<article>
:<language>:<project>:<article>
project prefix, nil – when <value> has only a known single-letter prefix
nil, language prefix – when <value> has only a known language prefix
accepts single-letter project prefixen: 'd' (wikidata), 's' (wikisource), and 'w' (wikipedia) prefixes; at this
writing, the other single-letter prefixen (b (wikibook), c (commons), m (meta), n (wikinews), q (wikiquote), and
v (wikiversity)) are not supported.
]]
local function interwiki_prefixen_get (value, is_link)
if not value:find (':%l+:') then -- if no prefix
return false; -- abandon; boolean here to distinguish from nil fail returns later
end
local prefix_patterns_linked_t = { -- sequence of valid interwiki and inter project prefixen
'^%[%[:([dsw]):(%l%l+):', -- wikilinked; project and language prefixes
'^%[%[:(%l%l+):([dsw]):', -- wikilinked; language and project prefixes
'^%[%[:([dsw]):', -- wikilinked; project prefix
'^%[%[:(%l%l+):', -- wikilinked; language prefix
}
local prefix_patterns_unlinked_t = { -- sequence of valid interwiki and inter project prefixen
'^:([dsw]):(%l%l+):', -- project and language prefixes
'^:(%l%l+):([dsw]):', -- language and project prefixes
'^:([dsw]):', -- project prefix
'^:(%l%l+):', -- language prefix
}
local cap1, cap2;
for _, pattern in ipairs ((is_link and prefix_patterns_linked_t) or prefix_patterns_unlinked_t) do
cap1, cap2 = value:match (pattern);
if cap1 then
break; -- found a match so stop looking
end
end
if cap1 and cap2 then -- when both then :project:language: or :language:project: (both forms allowed)
if 1 == #cap1 then -- length == 1 then :project:language:
if cfg.inter_wiki_map[cap2] then -- is language prefix in the interwiki map?
return cap1, cap2; -- return interwiki project and interwiki language
end
else -- here when :language:project:
if cfg.inter_wiki_map[cap1] then -- is language prefix in the interwiki map?
return cap2, cap1; -- return interwiki project and interwiki language
end
end
return nil; -- unknown interwiki language
elseif not (cap1 or cap2) then -- both are nil?
return nil; -- we got something that looks like a project prefix but isn't; return fail
elseif 1 == #cap1 then -- here when one capture
return cap1, nil; -- length is 1 so return project, nil language
else -- here when one capture and its length it more than 1
if cfg.inter_wiki_map[cap1] then -- is language prefix in the interwiki map?
return nil, cap1; -- return nil project, language
end
end
end
end


Line 1,175: Line 1,242:
one = utilities.make_wikilink (person.link, one); -- link author/editor
one = utilities.make_wikilink (person.link, one); -- link author/editor
end
end
if one then -- if <one> has a value (name, mdash replacement, or mask text replacement)
if one then -- if <one> has a value (name, mdash replacement, or mask text replacement)
local proj, tag = interwiki_prefixen_get (one, true); -- get the interwiki prefixen if present
if 'w' == proj and ('Wikipedia' == mw.site.namespaces.Project['name']) then
proj = nil; -- for stuff like :w:de:<article>, :w is unnecessary TODO: maint cat?
end
if proj then
proj = ({['d'] = 'Wikidata', ['s'] = 'Wikisource', ['w'] = 'Wikipedia'})[proj]; -- :w (wikipedia) for linking from a non-wikipedia project
if proj then
one = one .. utilities.wrap_style ('interproj', proj); -- add resized leading space, brackets, static text, language name
tag = nil; -- unset; don't do both project and language
end
end
if tag == cfg.this_wiki_code then
tag = nil; -- stuff like :en:<article> at en.wiki is pointless TODO: maint cat?
end
if tag then
local lang = cfg.lang_tag_remap[tag] or cfg.mw_languages_by_tag_t[tag];
if lang then -- error messaging done in extract_names() where we know parameter names
one = one .. utilities.wrap_style ('interwiki', lang); -- add resized leading space, brackets, static text, language name
end
end
table.insert (name_list, one); -- add it to the list of names
table.insert (name_list, one); -- add it to the list of names
table.insert (name_list, sep_one); -- add the proper name-list separator
table.insert (name_list, sep_one); -- add the proper name-list separator
Line 1,200: Line 1,290:
local result = table.concat (name_list); -- construct list
local result = table.concat (name_list); -- construct list
if etal and utilities.is_set (result) then -- etal may be set by |display-authors=etal but we might not have a last-first list
if etal and utilities.is_set (result) then -- etal may be set by |display-authors=etal but we might not have a last-first list
result = result .. sep .. ' ' .. cfg.messages['et al']; -- we've got a last-first list and etal so add et al.
result = result .. sep .. cfg.messages['et al']; -- we've got a last-first list and etal so add et al.
end
end
Line 1,316: Line 1,406:
semicolons. Escaped semicolons are ones used as part of selected HTML entities.
semicolons. Escaped semicolons are ones used as part of selected HTML entities.
If the condition is met, the function adds the multiple name maintenance category.
If the condition is met, the function adds the multiple name maintenance category.
Same test for first except that commas should not appear in given names (MOS:JR says
that the generational suffix does not take a separator character).  Titles, degrees,
postnominals, affiliations, all normally comma separated don't belong in a citation.
<name> – name parameter value
<list_name> – AuthorList, EditorList, etc
<limit> – number of allowed commas; 1 (default) for surnames; 0 for given names


returns nothing
returns nothing
Line 1,321: Line 1,419:
]]
]]


local function name_has_mult_names (name, list_name)
local function name_has_mult_names (name, list_name, limit)
local _, commas, semicolons, nbsps;
local _, commas, semicolons, nbsps;
limit = limit and limit or 1;
if utilities.is_set (name) then
if utilities.is_set (name) then
_, commas = name:gsub (',', ''); -- count the number of commas
_, commas = name:gsub (',', ''); -- count the number of commas
Line 1,336: Line 1,435:
-- from semicolons to 'escape' them. If additional entities are added,
-- from semicolons to 'escape' them. If additional entities are added,
-- they also can be subtracted.
-- they also can be subtracted.
if 1 < commas or 0 < (semicolons - nbsps) then
if limit < commas or 0 < (semicolons - nbsps) then
utilities.set_message ('maint_mult_names', cfg.special_case_translation [list_name]); -- add a maint message
utilities.set_message ('maint_mult_names', cfg.special_case_translation [list_name]); -- add a maint message
end
end
Line 1,343: Line 1,442:




--[[--------------------------< I S _ G E N E R I C >----------------------------------------------------------
--[=[-------------------------< I S _ G E N E R I C >----------------------------------------------------------


Compares values assigned to various parameter according to the string provided as <item> in the function call:
Compares values assigned to various parameters according to the string provided as <item> in the function call.
'generic_names': |last=, |first=, |editor-last=, etc value against list of known generic name patterns
<item> can have on of two values:
'generic_titles': |title=
'generic_names' – for name-holding parameters: |last=, |first=, |editor-last=, etc
Returns true when pattern matches; nil else
'generic_titles' – for |title=


The k/v pairs in cfg.special_case_translation[item] each contain two tables, one for English and one for another
There are two types of generic tests.  The 'accept' tests look for a pattern that should not be rejected by the
'local' language.Each of those tables contain another table that holds the string or pattern (whole or fragment)
'reject' test. For example,
in index [1]. index [2] is a Boolean that tells string.find() or mw.ustring.find() to do plain-text search (true)
|author=[[John Smith (author)|Smith, John]]
would be rejected by the 'author' reject test.  But piped wikilinks with 'author' disambiguation should not be
rejected so the 'accept' test prevents that from happening.  Accept tests are always performed before reject
tests.
 
Each of the 'accept' and 'reject' sequence tables hold tables for en.wiki (['en']) and local.wiki (['local'])
that each can hold a test sequence table  The sequence table holds, at index [1], a test pattern, and, at index
[2], a boolean control value.  The control value tells string.find() or mw.ustring.find() to do plain-text search (true)
or a pattern search (false).  The intent of all this complexity is to make these searches as fast as possible so
or a pattern search (false).  The intent of all this complexity is to make these searches as fast as possible so
that we don't run out of processing time on very large articles.
that we don't run out of processing time on very large articles.


]]
Returns
true when a reject test finds the pattern or string
false when an accept test finds the pattern or string
nil else
 
]=]


local function is_generic (item, value)
local function is_generic (item, value, wiki)
local test_val;
local test_val;
local str_lower = { -- use string.lower() for en.wiki (['en']) and use mw.ustring.lower() or local.wiki (['local'])
['en'] = string.lower,
['local'] = mw.ustring.lower,
}
local str_find = { -- use string.find() for en.wiki (['en']) and use mw.ustring.find() or local.wiki (['local'])
['en'] = string.find,
['local'] = mw.ustring.find,
}


for _, generic_value in ipairs (cfg.special_case_translation[item]) do -- spin through the list of known generic value fragments
local function test (val, test_t, wiki) -- local function to do the testing; <wiki> selects lower() and find() functions
test_val = generic_value['en'][2] and value:lower() or value; -- when set to 'true', plaintext search using lowercase value
val = test_t[2] and str_lower[wiki](value) or val; -- when <test_t[2]> set to 'true', plaintext search using lowercase value
return str_find[wiki] (val, test_t[1], 1, test_t[2]); -- return nil when not found or matched
end
local test_types_t = {'accept', 'reject'}; -- test accept patterns first, then reject patterns
local wikis_t = {'en', 'local'}; -- do tests for each of these keys; en.wiki first, local.wiki second


if test_val:find (generic_value['en'][1], 1, generic_value['en'][2]) then
for _, test_type in ipairs (test_types_t) do -- for each test type
return true; -- found English generic value so done
for _, generic_value in pairs (cfg.special_case_translation[item][test_type]) do -- spin through the list of generic value fragments to accept or reject
 
for _, wiki in ipairs (wikis_t) do
elseif generic_value['local'] then -- to keep work load down, generic_<value>['local'] should be nil except when there is a local version of the generic value
if generic_value[wiki] then
test_val = generic_value['local'][2] and mw.ustring.lower(value) or value; -- when set to 'true', plaintext search using lowercase value
if test (value, generic_value[wiki], wiki) then -- go do the test
 
return ('reject' == test_type); -- param value rejected, return true; false else
if mw.ustring.find (test_val, generic_value['local'][1], 1, generic_value['local'][2]) then -- mw.ustring() because might not be Latin script
end
return true; -- found local generic value so done
end
end
end
end
end
Line 1,406: Line 1,530:


if not accept_name then -- <last> not wrapped in accept-as-written markup
if not accept_name then -- <last> not wrapped in accept-as-written markup
name_has_mult_names (last, list_name); -- check for multiple names in the parameter (last only)
name_has_mult_names (last, list_name); -- check for multiple names in the parameter
name_is_numeric (last, list_name); -- check for names that are composed of digits and punctuation
name_is_numeric (last, list_name); -- check for names that are composed of digits and punctuation
name_is_generic (last, last_alias); -- check for names found in the generic names list
name_is_generic (last, last_alias); -- check for names found in the generic names list
Line 1,416: Line 1,540:


if not accept_name then -- <first> not wrapped in accept-as-written markup
if not accept_name then -- <first> not wrapped in accept-as-written markup
name_has_mult_names (first, list_name, 0); -- check for multiple names in the parameter; 0 is number of allowed commas in a given name
name_is_numeric (first, list_name); -- check for names that are composed of digits and punctuation
name_is_numeric (first, list_name); -- check for names that are composed of digits and punctuation
name_is_generic (first, first_alias); -- check for names found in the generic names list
name_is_generic (first, first_alias); -- check for names found in the generic names list
Line 1,469: Line 1,594:
link, link_alias = utilities.select_one ( args, cfg.aliases[list_name .. '-Link'], 'err_redundant_parameters', i );
link, link_alias = utilities.select_one ( args, cfg.aliases[list_name .. '-Link'], 'err_redundant_parameters', i );
mask = utilities.select_one ( args, cfg.aliases[list_name .. '-Mask'], 'err_redundant_parameters', i );
mask = utilities.select_one ( args, cfg.aliases[list_name .. '-Mask'], 'err_redundant_parameters', i );
 
if last then -- error check |lastn= alias for unknown interwiki link prefix; done here because this is where we have the parameter name
local project, language = interwiki_prefixen_get (last, true); -- true because we expect interwiki links in |lastn= to be wikilinked
if nil == project and nil == language then -- when both are nil
utilities.set_message ('err_bad_paramlink', last_alias); -- not known, emit an error message -- TODO: err_bad_interwiki?
last = utilities.remove_wiki_link (last); -- remove wikilink markup; show display value only
end
end
if link then -- error check |linkn= alias for unknown interwiki link prefix
local project, language = interwiki_prefixen_get (link, false); -- false because wiki links in |author-linkn= is an error
if nil == project and nil == language then -- when both are nil
utilities.set_message ('err_bad_paramlink', link_alias); -- not known, emit an error message -- TODO: err_bad_interwiki?
link = nil; -- unset so we don't link
link_alias = nil;
end
end
last, etal = name_has_etal (last, etal, false, last_alias); -- find and remove variations on et al.
last, etal = name_has_etal (last, etal, false, last_alias); -- find and remove variations on et al.
first, etal = name_has_etal (first, etal, false, first_alias); -- find and remove variations on et al.
first, etal = name_has_etal (first, etal, false, first_alias); -- find and remove variations on et al.
Line 1,512: Line 1,654:


This function looks for:
This function looks for:
<lang_param> as a tag in cfg.lang_code_remap{}
<lang_param> as a tag in cfg.lang_tag_remap{}
<lang_param> as a name in cfg.lang_name_remap{}
<lang_param> as a name in cfg.lang_name_remap{}
Line 1,521: Line 1,663:
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{}
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{}


on success, return name and matching tag; on failure return nil
on success, returns name (in properly capitalized form) and matching tag (in lowercase); on failure returns nil


]]
]]
Line 1,530: Line 1,672:
local tag;
local tag;


name = cfg.lang_code_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name  
name = cfg.lang_tag_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name  
if name then -- when <name>, <lang_param> is a tag for a remapped language name
if name then -- when <name>, <lang_param> is a tag for a remapped language name
return name, lang_param; -- so return <name> from remap and <lang_param>
return name, lang_param_lc; -- so return <name> from remap and <lang_param_lc>
end
end


tag = lang_param_lc:match ('^(%a%a%a?)%-.*'); -- still assuming that <lang_param_lc> is a tag; strip script, region, variant subtags
tag = lang_param_lc:match ('^(%a%a%a?)%-.*'); -- still assuming that <lang_param_lc> is a tag; strip script, region, variant subtags
name = cfg.lang_code_remap[tag]; -- attempt to get remapped language name with language subtag only
name = cfg.lang_tag_remap[tag]; -- attempt to get remapped language name with language subtag only
if name then -- when <name>, <tag> is a tag for a remapped language name
if name then -- when <name>, <tag> is a tag for a remapped language name
return name, tag; -- so return <name> from remap and <tag>
return name, tag; -- so return <name> from remap and <tag>
Line 1,554: Line 1,696:
if name then
if name then
return name, lang_param; -- <lang_param_lc> is a tag so return <name> and the tag
return name, lang_param_lc; -- <lang_param_lc> is a tag so return it and <name>
end
end
Line 1,601: Line 1,743:


for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag
for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang>
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang>