Module:Citation/CS1: Difference between revisions
From Vigyanwiki
m (1 revision imported) |
No edit summary |
||
| Line 1: | Line 1: | ||
require (' | require ('Module:No globals'); | ||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
| Line 529: | Line 529: | ||
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script | lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script | ||
if not utilities.is_set (lang) then | if not utilities.is_set (lang) then | ||
utilities.set_message ('err_script_parameter', {script_param, | utilities.set_message ('err_script_parameter', {script_param, 'missing title part'}); -- prefix without 'title'; add error message | ||
return ''; -- script_value was just the prefix so return empty string | return ''; -- script_value was just the prefix so return empty string | ||
end | end | ||
| Line 540: | Line 540: | ||
utilities.add_prop_cat ('script', {name, lang}) | utilities.add_prop_cat ('script', {name, lang}) | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, | utilities.set_message ('err_script_parameter', {script_param, 'unknown language code'}); -- unknown script-language; add error message | ||
end | end | ||
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute | lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, | utilities.set_message ('err_script_parameter', {script_param, 'invalid language code'}); -- invalid language code; add error message | ||
lang = ''; -- invalid so set lang to empty string | lang = ''; -- invalid so set lang to empty string | ||
end | end | ||
else | else | ||
utilities.set_message ('err_script_parameter', {script_param, | utilities.set_message ('err_script_parameter', {script_param, 'missing prefix'}); -- no language code prefix; add error message | ||
end | end | ||
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL | script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL | ||
| Line 1,343: | Line 1,343: | ||
--[ | --[[--------------------------< I S _ G E N E R I C >---------------------------------------------------------- | ||
Compares values assigned to various | Compares values assigned to various parameter according to the string provided as <item> in the function call: | ||
'generic_names': |last=, |first=, |editor-last=, etc value against list of known generic name patterns | |||
'generic_names' | 'generic_titles': |title= | ||
'generic_titles' | Returns true when pattern matches; nil else | ||
The k/v pairs in cfg.special_case_translation[item] each contain two tables, one for English and one for another | |||
'local' language.Each of those tables contain another table that holds the string or pattern (whole or fragment) | |||
in index [1]. index [2] is a Boolean that tells string.find() or mw.ustring.find() to do plain-text search (true) | |||
Each of the | |||
[2] | |||
or a pattern search (false). The intent of all this complexity is to make these searches as fast as possible so | or a pattern search (false). The intent of all this complexity is to make these searches as fast as possible so | ||
that we don't run out of processing time on very large articles. | that we don't run out of processing time on very large articles. | ||
]] | |||
local function is_generic (item, value) | |||
local test_val; | |||
]=] | for _, generic_value in ipairs (cfg.special_case_translation[item]) do -- spin through the list of known generic value fragments | ||
test_val = generic_value['en'][2] and value:lower() or value; -- when set to 'true', plaintext search using lowercase value | |||
if test_val:find (generic_value['en'][1], 1, generic_value['en'][2]) then | |||
return true; -- found English generic value so done | |||
elseif generic_value['local'] then -- to keep work load down, generic_<value>['local'] should be nil except when there is a local version of the generic value | |||
test_val = generic_value['local'][2] and mw.ustring.lower(value) or value; -- when set to 'true', plaintext search using lowercase value | |||
if mw.ustring.find (test_val, generic_value['local'][1], 1, generic_value['local'][2]) then -- mw.ustring() because might not be Latin script | |||
return true; -- found local generic value so done | |||
end | end | ||
end | end | ||
| Line 1,546: | Line 1,521: | ||
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{} | and look for the new <lang_param> in cfg.mw_languages_by_tag_t{} | ||
on success, | on success, return name and matching tag; on failure return nil | ||
]] | ]] | ||
| Line 1,557: | Line 1,532: | ||
name = cfg.lang_code_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name | name = cfg.lang_code_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name | ||
if name then -- when <name>, <lang_param> is a tag for a remapped language name | if name then -- when <name>, <lang_param> is a tag for a remapped language name | ||
return name, | return name, lang_param; -- so return <name> from remap and <lang_param> | ||
end | end | ||
| Line 1,579: | Line 1,554: | ||
if name then | if name then | ||
return name, | return name, lang_param; -- <lang_param_lc> is a tag so return <name> and the tag | ||
end | end | ||
| Line 1,626: | Line 1,601: | ||
for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag | for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag | ||
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang> | name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang> | ||
if utilities.is_set (tag) then | if utilities.is_set (tag) then | ||
lang_subtag = tag:gsub ('^(%a%a%a?)%-.*', '%1'); | lang_subtag = tag:lower():gsub ('^(%a%a%a?)%-.*', '%1'); -- for categorization, strip any IETF-like tags from language tag | ||
if cfg.this_wiki_code ~= lang_subtag then -- when the language is not the same as this wiki's language | if cfg.this_wiki_code ~= lang_subtag then -- when the language is not the same as this wiki's language | ||
if 2 == lang_subtag:len() then -- and is a two-character tag | if 2 == lang_subtag:len() then -- and is a two-character tag | ||
utilities.add_prop_cat ('foreign-lang-source', {name, lang_subtag}, lang_subtag); -- categorize it; tag appended to allow for multiple language categorization | |||
else -- or is a recognized language (but has a three-character tag) | else -- or is a recognized language (but has a three-character tag) | ||
utilities.add_prop_cat ('foreign-lang-source-2', {lang_subtag}, lang_subtag); -- categorize it differently TODO: support multiple three-character tag categories per cs1|2 template? | utilities.add_prop_cat ('foreign-lang-source-2', {lang_subtag}, lang_subtag); -- categorize it differently TODO: support multiple three-character tag categories per cs1|2 template? | ||
| Line 2,527: | Line 2,501: | ||
if 'citation' == config.CitationClass then | if 'citation' == config.CitationClass then | ||
if utilities.is_set (Periodical) then | if utilities.is_set (Periodical) then | ||
if not utilities.in_array (Periodical_origin, | if not utilities.in_array (Periodical_origin, {'website', 'mailinglist'}) then -- {{citation}} does not render volume for these 'periodicals' --TODO: move 'array' to ~/Configuration | ||
Volume = A['Volume']; -- but does for all other 'periodicals' | Volume = A['Volume']; -- but does for all other 'periodicals' | ||
end | end | ||
| Line 2,544: | Line 2,518: | ||
local Issue; | local Issue; | ||
if 'citation' == config.CitationClass then | if 'citation' == config.CitationClass then | ||
if utilities.is_set (Periodical) and utilities.in_array (Periodical_origin, | if utilities.is_set (Periodical) and utilities.in_array (Periodical_origin, {'journal', 'magazine', 'newspaper', 'periodical', 'work'}) or -- {{citation}} renders issue for these 'periodicals'--TODO: move 'array' to ~/Configuration | ||
Issue = utilities.hyphen_to_dash (A['Issue']); | utilities.is_set (ScriptPeriodical) and utilities.in_array (ScriptPeriodical_origin, {'script-journal', 'script-magazine', 'script-newspaper', 'script-periodical', 'script-work'}) then -- and these 'script-periodicals' | ||
Issue = utilities.hyphen_to_dash (A['Issue']); | |||
end | end | ||
elseif utilities.in_array (config.CitationClass, cfg.templates_using_issue) then -- conference & map books do not support issue; {{citation}} listed here because included in settings table | elseif utilities.in_array (config.CitationClass, cfg.templates_using_issue) then -- conference & map books do not support issue; {{citation}} listed here because included in settings table | ||
| Line 2,831: | Line 2,806: | ||
ChapterUrlAccess = UrlAccess; | ChapterUrlAccess = UrlAccess; | ||
ChapterURL_origin = URL_origin; | ChapterURL_origin = URL_origin; | ||
Title = Series; -- promote series to title | Title = Series; -- promote series to title | ||
TitleLink = SeriesLink; | TitleLink = SeriesLink; | ||
| Line 2,845: | Line 2,819: | ||
TransTitle = ''; | TransTitle = ''; | ||
ScriptTitle = ''; | ScriptTitle = ''; | ||
else -- now oddities that are cite serial | else -- now oddities that are cite serial | ||
| Line 3,018: | Line 2,991: | ||
end | end | ||
if utilities.is_set (URL) | if utilities.is_set (URL) and utilities.is_set (AccessDate) then -- access date requires |url=; identifier-created URL is not |url= | ||
utilities.set_message ('err_accessdate_missing_url'); -- add an error message | |||
AccessDate = ''; -- unset | |||
end | end | ||
end | end | ||
| Line 3,856: | Line 3,822: | ||
local template_name = ('citation' == config.CitationClass) and 'citation' or 'cite ' .. (cfg.citation_class_map_t[config.CitationClass] or config.CitationClass); | local template_name = ('citation' == config.CitationClass) and 'citation' or 'cite ' .. (cfg.citation_class_map_t[config.CitationClass] or config.CitationClass); | ||
local template_link = '[[Template:' .. template_name .. '|' .. template_name .. ']]'; | local template_link = '[[Template:' .. template_name .. '|' .. template_name .. ']]'; -- TODO: if kept, these require some sort of i18n | ||
local msg_prefix = '<code class="cs1-code">{{' .. template_link .. '}}</code>: '; | local msg_prefix = '<code class="cs1-code">{{' .. template_link .. '}}</code>: '; | ||
| Line 4,076: | Line 4,042: | ||
local function citation(frame) | local function citation(frame) | ||
Frame = frame; -- save a copy in case we need to display an error message in preview mode | Frame = frame; -- save a copy in case we need to display an error message in preview mode | ||
is_sandbox = nil ~= string.find (frame:getTitle(), 'sandbox', 1, true); | |||
is_sandbox = nil ~= string.find (frame:getTitle(), sandbox, 1, true); | |||
local pframe = frame:getParent() | local pframe = frame:getParent() | ||
local styles; | local styles; | ||
cfg = mw.loadData ('Module:Citation/CS1/Configuration' | if is_sandbox then -- did the {{#invoke:}} use sandbox version? | ||
cfg = mw.loadData ('Module:Citation/CS1/Configuration/sandbox'); -- load sandbox versions of support modules | |||
whitelist = mw.loadData ('Module:Citation/CS1/Whitelist/sandbox'); | |||
utilities = require ('Module:Citation/CS1/Utilities/sandbox'); | |||
validation = require ('Module:Citation/CS1/Date_validation/sandbox'); | |||
identifiers = require ('Module:Citation/CS1/Identifiers/sandbox'); | |||
metadata = require ('Module:Citation/CS1/COinS/sandbox'); | |||
styles = 'Module:Citation/CS1/sandbox/styles.css'; | |||
else -- otherwise | |||
cfg = mw.loadData ('Module:Citation/CS1/Configuration'); -- load live versions of support modules | |||
whitelist = mw.loadData ('Module:Citation/CS1/Whitelist'); | |||
utilities = require ('Module:Citation/CS1/Utilities'); | |||
validation = require ('Module:Citation/CS1/Date_validation'); | |||
identifiers = require ('Module:Citation/CS1/Identifiers'); | |||
metadata = require ('Module:Citation/CS1/COinS'); | |||
styles = 'Module:Citation/CS1/styles.css'; | |||
end | |||
utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables | utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables | ||
Revision as of 16:57, 27 October 2022
<section begin=header />
| File:Ambox warning orange.svg | This Lua module is used on approximately 5,400,000 pages, or roughly 13063% of all pages. To avoid major disruption and server load, any changes should be tested in the module's /sandbox or /testcases subpages, or in your own module sandbox. The tested changes can be added to this page in a single edit. Consider discussing changes on the talk page before implementing them. |
| File:Full-protection-shackle.svg | This module is subject to page protection. It is a highly visible module in use by a very large number of pages, or is substituted very frequently. Because vandalism or mistakes would affect many pages, and even trivial editing might cause substantial load on the servers, it is protected from editing. |
<section end=header />
This module and associated sub-modules support the Citation Style 1 and Citation Style 2 citation templates. In general, it is not intended to be called directly, but is called by one of the core CS1 and CS2 templates. <section begin=module_components_table /> These files comprise the module support for CS1|2 citation templates:
<section end=module_components_table />
Other documentation:
- Module talk:Citation/CS1/Feature requests
- Module talk:Citation/CS1/COinS
- Module:Cs1 documentation support – a set of functions (some experimental) that extract information from the module suite for the purpose of documenting CS1|2
- Module:Citation/CS1/doc/Category list – lists of category names taken directly from Module:Citation/CS1/Configuration and Module:Citation/CS1/Configuration/sandbox
testcases
- Module:Citation/CS1/testcases (run)
- Module:Citation/CS1/testcases/errors (run) – error and maintenance messaging
- Module:Citation/CS1/testcases/dates (run) – date validation
- Module:Citation/CS1/testcases/identifiers (run) – identifiers
- Module:Citation/CS1/testcases/anchor (run) – CITEREF anchors
require ('Module:No globals');
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >--------------------------------------
each of these counts against the Lua upvalue limit
]]
local validation; -- functions in Module:Citation/CS1/Date_validation
local utilities; -- functions in Module:Citation/CS1/Utilities
local z ={}; -- table of tables in Module:Citation/CS1/Utilities
local identifiers; -- functions and tables in Module:Citation/CS1/Identifiers
local metadata; -- functions in Module:Citation/CS1/COinS
local cfg = {}; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local whitelist = {}; -- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist
--[[------------------< P A G E S C O P E V A R I A B L E S >---------------
declare variables here that have page-wide scope that are not brought in from
other modules; that are created here and used here
]]
local added_deprecated_cat; -- Boolean flag so that the category is added only once
local added_vanc_errs; -- Boolean flag so we only emit one Vancouver error / category
local added_generic_name_errs; -- Boolean flag so we only emit one generic name error / category and stop testing names once an error is encountered
local Frame; -- holds the module's frame table
local is_preview_mode; -- true when article is in preview mode; false when using 'Preview page with this template' (previewing the module)
local is_sandbox; -- true when using sandbox modules to render citation
--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------
Locates and returns the first set value in a table of values where the order established in the table,
left-to-right (or top-to-bottom), is the order in which the values are evaluated. Returns nil if none are set.
This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs. With the pairs
version the order of evaluation could not be guaranteed. With the ipairs version, a nil value would terminate
the for-loop before it reached the actual end of the list.
]]
local function first_set (list, count)
local i = 1;
while i <= count do -- loop through all items in list
if utilities.is_set( list[i] ) then
return list[i]; -- return the first set list member
end
i = i + 1; -- point to next
end
end
--[[--------------------------< A D D _ V A N C _ E R R O R >----------------------------------------------------
Adds a single Vancouver system error message to the template's output regardless of how many error actually exist.
To prevent duplication, added_vanc_errs is nil until an error message is emitted.
added_vanc_errs is a Boolean declared in page scope variables above
]]
local function add_vanc_error (source, position)
if added_vanc_errs then return end
added_vanc_errs = true; -- note that we've added this category
utilities.set_message ('err_vancouver', {source, position});
end
--[[--------------------------< I S _ S C H E M E >------------------------------------------------------------
does this thing that purports to be a URI scheme seem to be a valid scheme? The scheme is checked to see if it
is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says:
Scheme names consist of a sequence of characters beginning with a
letter and followed by any combination of letters, digits, plus
("+"), period ("."), or hyphen ("-").
returns true if it does, else false
]]
local function is_scheme (scheme)
return scheme and scheme:match ('^%a[%a%d%+%.%-]*:'); -- true if scheme is set and matches the pattern
end
--[=[-------------------------< I S _ D O M A I N _ N A M E >--------------------------------------------------
Does this thing that purports to be a domain name seem to be a valid domain name?
Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5
BNF defined here: https://tools.ietf.org/html/rfc4234
Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15;
see also [[Single-letter second-level domain]]
list of TLDs: https://www.iana.org/domains/root/db
RFC 952 (modified by RFC 1123) requires the first and last character of a hostname to be a letter or a digit. Between
the first and last characters the name may use letters, digits, and the hyphen.
Also allowed are IPv4 addresses. IPv6 not supported
domain is expected to be stripped of any path so that the last character in the last character of the TLD. tld
is two or more alpha characters. Any preceding '//' (from splitting a URL with a scheme) will be stripped
here. Perhaps not necessary but retained in case it is necessary for IPv4 dot decimal.
There are several tests:
the first character of the whole domain name including subdomains must be a letter or a digit
internationalized domain name (ASCII characters with .xn-- ASCII Compatible Encoding (ACE) prefix xn-- in the TLD) see https://tools.ietf.org/html/rfc3490
single-letter/digit second-level domains in the .org, .cash, and .today TLDs
q, x, and z SL domains in the .com TLD
i and q SL domains in the .net TLD
single-letter SL domains in the ccTLDs (where the ccTLD is two letters)
two-character SL domains in gTLDs (where the gTLD is two or more letters)
three-plus-character SL domains in gTLDs (where the gTLD is two or more letters)
IPv4 dot-decimal address format; TLD not allowed
returns true if domain appears to be a proper name and TLD or IPv4 address, else false
]=]
local function is_domain_name (domain)
if not domain then
return false; -- if not set, abandon
end
domain = domain:gsub ('^//', ''); -- strip '//' from domain name if present; done here so we only have to do it once
if not domain:match ('^[%w]') then -- first character must be letter or digit
return false;
end
if domain:match ('^%a+:') then -- hack to detect things that look like s:Page:Title where Page: is namespace at Wikisource
return false;
end
local patterns = { -- patterns that look like URLs
'%f[%w][%w][%w%-]+[%w]%.%a%a+$', -- three or more character hostname.hostname or hostname.tld
'%f[%w][%w][%w%-]+[%w]%.xn%-%-[%w]+$', -- internationalized domain name with ACE prefix
'%f[%a][qxz]%.com$', -- assigned one character .com hostname (x.com times out 2015-12-10)
'%f[%a][iq]%.net$', -- assigned one character .net hostname (q.net registered but not active 2015-12-10)
'%f[%w][%w]%.%a%a$', -- one character hostname and ccTLD (2 chars)
'%f[%w][%w][%w]%.%a%a+$', -- two character hostname and TLD
'^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?', -- IPv4 address
}
for _, pattern in ipairs (patterns) do -- loop through the patterns list
if domain:match (pattern) then
return true; -- if a match then we think that this thing that purports to be a URL is a URL
end
end
for _, d in ipairs ({'cash', 'company', 'today', 'org'}) do -- look for single letter second level domain names for these top level domains
if domain:match ('%f[%w][%w]%.' .. d) then
return true
end
end
return false; -- no matches, we don't know what this thing is
end
--[[--------------------------< I S _ U R L >------------------------------------------------------------------
returns true if the scheme and domain parts of a URL appear to be a valid URL; else false.
This function is the last step in the validation process. This function is separate because there are cases that
are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external
wikilinks.
]]
local function is_url (scheme, domain)
if utilities.is_set (scheme) then -- if scheme is set check it and domain
return is_scheme (scheme) and is_domain_name (domain);
else
return is_domain_name (domain); -- scheme not set when URL is protocol-relative
end
end
--[[--------------------------< S P L I T _ U R L >------------------------------------------------------------
Split a URL into a scheme, authority indicator, and domain.
First remove Fully Qualified Domain Name terminator (a dot following TLD) (if any) and any path(/), query(?) or fragment(#).
If protocol-relative URL, return nil scheme and domain else return nil for both scheme and domain.
When not protocol-relative, get scheme, authority indicator, and domain. If there is an authority indicator (one
or more '/' characters immediately following the scheme's colon), make sure that there are only 2.
Any URL that does not have news: scheme must have authority indicator (//). TODO: are there other common schemes
like news: that don't use authority indicator?
Strip off any port and path;
]]
local function split_url (url_str)
local scheme, authority, domain;
url_str = url_str:gsub ('([%a%d])%.?[/%?#].*$', '%1'); -- strip FQDN terminator and path(/), query(?), fragment (#) (the capture prevents false replacement of '//')
if url_str:match ('^//%S*') then -- if there is what appears to be a protocol-relative URL
domain = url_str:match ('^//(%S*)')
elseif url_str:match ('%S-:/*%S+') then -- if there is what appears to be a scheme, optional authority indicator, and domain name
scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)'); -- extract the scheme, authority indicator, and domain portions
if utilities.is_set (authority) then
authority = authority:gsub ('//', '', 1); -- replace place 1 pair of '/' with nothing;
if utilities.is_set(authority) then -- if anything left (1 or 3+ '/' where authority should be) then
return scheme;