Module:Citation/CS1: Difference between revisions

From Vigyanwiki
m (1 revision imported)
No edit summary
Line 1: Line 1:
require ('strict');
require ('Module:No globals');


--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
--[[--------------------------< F O R W A R D  D E C L A R A T I O N S >--------------------------------------
Line 529: Line 529:
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
lang = script_value:match('^(%l%l%l?)%s*:%s*%S.*'); -- get the language prefix or nil if there is no script
if not utilities.is_set (lang) then
if not utilities.is_set (lang) then
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing title part']}); -- prefix without 'title'; add error message
utilities.set_message ('err_script_parameter', {script_param, 'missing title part'}); -- prefix without 'title'; add error message
return ''; -- script_value was just the prefix so return empty string
return ''; -- script_value was just the prefix so return empty string
end
end
Line 540: Line 540:
utilities.add_prop_cat ('script', {name, lang})
utilities.add_prop_cat ('script', {name, lang})
else
else
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['unknown language code']}); -- unknown script-language; add error message
utilities.set_message ('err_script_parameter', {script_param, 'unknown language code'}); -- unknown script-language; add error message
end
end
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
lang = ' lang="' .. lang .. '" '; -- convert prefix into a lang attribute
else
else
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['invalid language code']}); -- invalid language code; add error message
utilities.set_message ('err_script_parameter', {script_param, 'invalid language code'}); -- invalid language code; add error message
lang = ''; -- invalid so set lang to empty string
lang = ''; -- invalid so set lang to empty string
end
end
else
else
utilities.set_message ('err_script_parameter', {script_param, cfg.err_msg_supl['missing prefix']}); -- no language code prefix; add error message
utilities.set_message ('err_script_parameter', {script_param, 'missing prefix'}); -- no language code prefix; add error message
end
end
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL
script_value = utilities.substitute (cfg.presentation['bdi'], {lang, script_value}); -- isolate in case script is RTL
Line 1,343: Line 1,343:




--[=[-------------------------< I S _ G E N E R I C >----------------------------------------------------------
--[[--------------------------< I S _ G E N E R I C >----------------------------------------------------------


Compares values assigned to various parameters according to the string provided as <item> in the function call.
Compares values assigned to various parameter according to the string provided as <item> in the function call:
<item> can have on of two values:
'generic_names': |last=, |first=, |editor-last=, etc value against list of known generic name patterns
'generic_names' – for name-holding parameters: |last=, |first=, |editor-last=, etc
'generic_titles': |title=
'generic_titles' – for |title=
Returns true when pattern matches; nil else


There are two types of generic tests.  The 'accept' tests look for a pattern that should not be rejected by the
The k/v pairs in cfg.special_case_translation[item] each contain two tables, one for English and one for another
'reject' test. For example,
'local' language.Each of those tables contain another table that holds the string or pattern (whole or fragment)
|author=[[John Smith (author)|Smith, John]]
in index [1]. index [2] is a Boolean that tells string.find() or mw.ustring.find() to do plain-text search (true)
would be rejected by the 'author' reject test.  But piped wikilinks with 'author' disambiguation should not be
rejected so the 'accept' test prevents that from happening.  Accept tests are always performed before reject
tests.
 
Each of the 'accept' and 'reject' sequence tables hold tables for en.wiki (['en']) and local.wiki (['local'])
that each can hold a test sequence table  The sequence table holds, at index [1], a test pattern, and, at index
[2], a boolean control value.  The control value tells string.find() or mw.ustring.find() to do plain-text search (true)
or a pattern search (false).  The intent of all this complexity is to make these searches as fast as possible so
or a pattern search (false).  The intent of all this complexity is to make these searches as fast as possible so
that we don't run out of processing time on very large articles.
that we don't run out of processing time on very large articles.


Returns
]]
true when a reject test finds the pattern or string
 
false when an accept test finds the pattern or string
local function is_generic (item, value)
nil else
local test_val;


]=]
for _, generic_value in ipairs (cfg.special_case_translation[item]) do -- spin through the list of known generic value fragments
test_val = generic_value['en'][2] and value:lower() or value; -- when set to 'true', plaintext search using lowercase value


local function is_generic (item, value, wiki)
if test_val:find (generic_value['en'][1], 1, generic_value['en'][2]) then
local test_val;
return true; -- found English generic value so done
local str_lower = { -- use string.lower() for en.wiki (['en']) and use mw.ustring.lower() or local.wiki (['local'])
['en'] = string.lower,
['local'] = mw.ustring.lower,
}
local str_find = { -- use string.find() for en.wiki (['en']) and use mw.ustring.find() or local.wiki (['local'])
['en'] = string.find,
['local'] = mw.ustring.find,
}


local function test (val, test_t, wiki) -- local function to do the testing; <wiki> selects lower() and find() functions
elseif generic_value['local'] then -- to keep work load down, generic_<value>['local'] should be nil except when there is a local version of the generic value
val = test_t[2] and str_lower[wiki](value) or val; -- when <test_t[2]> set to 'true', plaintext search using lowercase value
test_val = generic_value['local'][2] and mw.ustring.lower(value) or value; -- when set to 'true', plaintext search using lowercase value
return str_find[wiki] (val, test_t[1], 1, test_t[2]); -- return nil when not found or matched
end
local test_types_t = {'accept', 'reject'}; -- test accept patterns first, then reject patterns
local wikis_t = {'en', 'local'}; -- do tests for each of these keys; en.wiki first, local.wiki second


for _, test_type in ipairs (test_types_t) do -- for each test type
if mw.ustring.find (test_val, generic_value['local'][1], 1, generic_value['local'][2]) then -- mw.ustring() because might not be Latin script
for _, generic_value in pairs (cfg.special_case_translation[item][test_type]) do -- spin through the list of generic value fragments to accept or reject
return true; -- found local generic value so done
for _, wiki in ipairs (wikis_t) do
if generic_value[wiki] then
if test (value, generic_value[wiki], wiki) then -- go do the test
return ('reject' == test_type); -- param value rejected, return true; false else
end
end
end
end
end
end
Line 1,546: Line 1,521:
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{}
and look for the new <lang_param> in cfg.mw_languages_by_tag_t{}


on success, returns name (in properly capitalized form) and matching tag (in lowercase); on failure returns nil
on success, return name and matching tag; on failure return nil


]]
]]
Line 1,557: Line 1,532:
name = cfg.lang_code_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name  
name = cfg.lang_code_remap[lang_param_lc]; -- assume <lang_param_lc> is a tag; attempt to get remapped language name  
if name then -- when <name>, <lang_param> is a tag for a remapped language name
if name then -- when <name>, <lang_param> is a tag for a remapped language name
return name, lang_param_lc; -- so return <name> from remap and <lang_param_lc>
return name, lang_param; -- so return <name> from remap and <lang_param>
end
end


Line 1,579: Line 1,554:
if name then
if name then
return name, lang_param_lc; -- <lang_param_lc> is a tag so return it and <name>
return name, lang_param; -- <lang_param_lc> is a tag so return <name> and the tag
end
end
Line 1,626: Line 1,601:


for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag
for _, lang in ipairs (names_t) do -- reuse lang here because we don't yet know if lang is a language name or a language tag
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang>; <name> has proper capitalization; <tag> is lowercase
name, tag = name_tag_get (lang); -- attempt to get name/tag pair for <lang>


if utilities.is_set (tag) then
if utilities.is_set (tag) then
lang_subtag = tag:gsub ('^(%a%a%a?)%-.*', '%1'); -- for categorization, strip any IETF-like tags from language tag
lang_subtag = tag:lower():gsub ('^(%a%a%a?)%-.*', '%1'); -- for categorization, strip any IETF-like tags from language tag


if cfg.this_wiki_code ~= lang_subtag then -- when the language is not the same as this wiki's language
if cfg.this_wiki_code ~= lang_subtag then -- when the language is not the same as this wiki's language
if 2 == lang_subtag:len() then -- and is a two-character tag
if 2 == lang_subtag:len() then -- and is a two-character tag
-- utilities.add_prop_cat ('foreign-lang-source', {name, lang_subtag}, lang_subtag); -- categorize it; tag appended to allow for multiple language categorization
utilities.add_prop_cat ('foreign-lang-source', {name, lang_subtag}, lang_subtag); -- categorize it; tag appended to allow for multiple language categorization
utilities.add_prop_cat ('foreign-lang-source', {name, tag}, lang_subtag); -- categorize it; tag appended to allow for multiple language categorization
else -- or is a recognized language (but has a three-character tag)
else -- or is a recognized language (but has a three-character tag)
utilities.add_prop_cat ('foreign-lang-source-2', {lang_subtag}, lang_subtag); -- categorize it differently TODO: support multiple three-character tag categories per cs1|2 template?
utilities.add_prop_cat ('foreign-lang-source-2', {lang_subtag}, lang_subtag); -- categorize it differently TODO: support multiple three-character tag categories per cs1|2 template?
Line 2,527: Line 2,501:
if 'citation' == config.CitationClass then
if 'citation' == config.CitationClass then
if utilities.is_set (Periodical) then
if utilities.is_set (Periodical) then
if not utilities.in_array (Periodical_origin, cfg.citation_no_volume_t) then -- {{citation}} does not render |volume= when these parameters are used
if not utilities.in_array (Periodical_origin, {'website', 'mailinglist'}) then -- {{citation}} does not render volume for these 'periodicals' --TODO: move 'array' to ~/Configuration
Volume = A['Volume']; -- but does for all other 'periodicals'
Volume = A['Volume']; -- but does for all other 'periodicals'
end
end
Line 2,544: Line 2,518:
local Issue;
local Issue;
if 'citation' == config.CitationClass then
if 'citation' == config.CitationClass then
if utilities.is_set (Periodical) and utilities.in_array (Periodical_origin, cfg.citation_issue_t) then -- {{citation}} may render |issue= when these parameters are used
if utilities.is_set (Periodical) and utilities.in_array (Periodical_origin, {'journal', 'magazine', 'newspaper', 'periodical', 'work'}) or -- {{citation}} renders issue for these 'periodicals'--TODO: move 'array' to ~/Configuration
Issue = utilities.hyphen_to_dash (A['Issue']);
utilities.is_set (ScriptPeriodical) and utilities.in_array (ScriptPeriodical_origin, {'script-journal', 'script-magazine', 'script-newspaper', 'script-periodical', 'script-work'}) then -- and these 'script-periodicals'
Issue = utilities.hyphen_to_dash (A['Issue']);
end
end
elseif utilities.in_array (config.CitationClass, cfg.templates_using_issue) then -- conference & map books do not support issue; {{citation}} listed here because included in settings table
elseif utilities.in_array (config.CitationClass, cfg.templates_using_issue) then -- conference & map books do not support issue; {{citation}} listed here because included in settings table
Line 2,831: Line 2,806:
ChapterUrlAccess = UrlAccess;
ChapterUrlAccess = UrlAccess;
ChapterURL_origin = URL_origin;
ChapterURL_origin = URL_origin;
ChapterFormat = Format;
 
Title = Series; -- promote series to title
Title = Series; -- promote series to title
TitleLink = SeriesLink;
TitleLink = SeriesLink;
Line 2,845: Line 2,819:
TransTitle = '';
TransTitle = '';
ScriptTitle = '';
ScriptTitle = '';
Format = '';
else -- now oddities that are cite serial
else -- now oddities that are cite serial
Line 3,018: Line 2,991:
  end
  end


if utilities.is_set (URL) then -- set when using an identifier-created URL
if utilities.is_set (URL) and utilities.is_set (AccessDate) then -- access date requires |url=; identifier-created URL is not |url=
if utilities.is_set (AccessDate) then -- |access-date= requires |url=; identifier-created URL is not |url=
utilities.set_message ('err_accessdate_missing_url'); -- add an error message
utilities.set_message ('err_accessdate_missing_url'); -- add an error message
AccessDate = ''; -- unset
AccessDate = ''; -- unset
end
 
if utilities.is_set (ArchiveURL) then -- |archive-url= requires |url=; identifier-created URL is not |url=
utilities.set_message ('err_archive_missing_url'); -- add an error message
ArchiveURL = ''; -- unset
end
end
end
end
end
Line 3,856: Line 3,822:


local template_name = ('citation' == config.CitationClass) and 'citation' or 'cite ' .. (cfg.citation_class_map_t[config.CitationClass] or config.CitationClass);
local template_name = ('citation' == config.CitationClass) and 'citation' or 'cite ' .. (cfg.citation_class_map_t[config.CitationClass] or config.CitationClass);
local template_link = '[[Template:' .. template_name .. '|' .. template_name .. ']]';
local template_link = '[[Template:' .. template_name .. '|' .. template_name .. ']]'; -- TODO: if kept, these require some sort of i18n
local msg_prefix = '<code class="cs1-code">{{' .. template_link .. '}}</code>: ';
local msg_prefix = '<code class="cs1-code">{{' .. template_link .. '}}</code>: ';


Line 4,076: Line 4,042:
local function citation(frame)
local function citation(frame)
Frame = frame; -- save a copy in case we need to display an error message in preview mode
Frame = frame; -- save a copy in case we need to display an error message in preview mode
local sandbox = '/sandbox' -- i18n: replace this rvalue with the name that your wiki uses to identify sandbox subpages
is_sandbox = nil ~= string.find (frame:getTitle(), 'sandbox', 1, true);
is_sandbox = nil ~= string.find (frame:getTitle(), sandbox, 1, true); -- is this invoke the sandbox module?
sandbox = is_sandbox and sandbox or ''; -- use i18n sandbox to load sandbox modules when this module is the sandox; live modules else
 
local pframe = frame:getParent()
local pframe = frame:getParent()
local styles;
local styles;
cfg = mw.loadData ('Module:Citation/CS1/Configuration' .. sandbox); -- load sandbox versions of support modules when {{#invoke:Citation/CS1/sandbox|...}}; live modules else
if is_sandbox then -- did the {{#invoke:}} use sandbox version?
whitelist = mw.loadData ('Module:Citation/CS1/Whitelist' .. sandbox);
cfg = mw.loadData ('Module:Citation/CS1/Configuration/sandbox'); -- load sandbox versions of support modules
utilities = require ('Module:Citation/CS1/Utilities' .. sandbox);
whitelist = mw.loadData ('Module:Citation/CS1/Whitelist/sandbox');
validation = require ('Module:Citation/CS1/Date_validation' .. sandbox);
utilities = require ('Module:Citation/CS1/Utilities/sandbox');
identifiers = require ('Module:Citation/CS1/Identifiers' .. sandbox);
validation = require ('Module:Citation/CS1/Date_validation/sandbox');
metadata = require ('Module:Citation/CS1/COinS' .. sandbox);
identifiers = require ('Module:Citation/CS1/Identifiers/sandbox');
styles = 'Module:Citation/CS1' .. sandbox .. '/styles.css';
metadata = require ('Module:Citation/CS1/COinS/sandbox');
styles = 'Module:Citation/CS1/sandbox/styles.css';
else -- otherwise
cfg = mw.loadData ('Module:Citation/CS1/Configuration'); -- load live versions of support modules
whitelist = mw.loadData ('Module:Citation/CS1/Whitelist');
utilities = require ('Module:Citation/CS1/Utilities');
validation = require ('Module:Citation/CS1/Date_validation');
identifiers = require ('Module:Citation/CS1/Identifiers');
metadata = require ('Module:Citation/CS1/COinS');
styles = 'Module:Citation/CS1/styles.css';
end


utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables
utilities.set_selected_modules (cfg); -- so that functions in Utilities can see the selected cfg tables

Revision as of 16:57, 27 October 2022

<section begin=header />

Lua error in Module:TNT at line 182: Missing Commons dataset I18n/Module:TNT.tab.

<section end=header />

This module and associated sub-modules support the Citation Style 1 and Citation Style 2 citation templates. In general, it is not intended to be called directly, but is called by one of the core CS1 and CS2 templates. <section begin=module_components_table /> These files comprise the module support for CS1|2 citation templates:

CS1 | CS2 modules
live sandbox diff description
sysop Module:Citation/CS1 Module:Citation/CS1/sandbox [edit] diff Rendering and support functions
Module:Citation/CS1/Configuration Module:Citation/CS1/Configuration/sandbox [edit] diff Translation tables; error and identifier handlers
Module:Citation/CS1/Whitelist Module:Citation/CS1/Whitelist/sandbox [edit] diff List of active and deprecated CS1|2 parameters
Module:Citation/CS1/Date validation Module:Citation/CS1/Date validation/sandbox [edit] diff Date format validation functions
Module:Citation/CS1/Identifiers Module:Citation/CS1/Identifiers/sandbox [edit] diff Functions that support the named identifiers (ISBN, DOI, PMID, etc.)
Module:Citation/CS1/Utilities Module:Citation/CS1/Utilities/sandbox [edit] diff Common functions and tables
Module:Citation/CS1/COinS Module:Citation/CS1/COinS/sandbox [edit] diff Functions that render a CS1|2 template's metadata
Module:Citation/CS1/styles.css Module:Citation/CS1/sandbox/styles.css [edit] diff CSS styles applied to the CS1|2 templates
auto confirmed Module:Citation/CS1/Suggestions Module:Citation/CS1/Suggestions/sandbox [edit] diff List that maps common erroneous parameter names to valid parameter names

<section end=module_components_table />

Other documentation:

testcases


require ('Module:No globals');

--[[--------------------------< F O R W A R D   D E C L A R A T I O N S >--------------------------------------

each of these counts against the Lua upvalue limit

]]

local validation;																-- functions in Module:Citation/CS1/Date_validation

local utilities;																-- functions in Module:Citation/CS1/Utilities
local z ={};																	-- table of tables in Module:Citation/CS1/Utilities

local identifiers;																-- functions and tables in Module:Citation/CS1/Identifiers
local metadata;																	-- functions in Module:Citation/CS1/COinS
local cfg = {};																	-- table of configuration tables that are defined in Module:Citation/CS1/Configuration
local whitelist = {};															-- table of tables listing valid template parameter names; defined in Module:Citation/CS1/Whitelist


--[[------------------< P A G E   S C O P E   V A R I A B L E S >---------------

declare variables here that have page-wide scope that are not brought in from
other modules; that are created here and used here

]]

local added_deprecated_cat;														-- Boolean flag so that the category is added only once
local added_vanc_errs;															-- Boolean flag so we only emit one Vancouver error / category
local added_generic_name_errs;													-- Boolean flag so we only emit one generic name error / category and stop testing names once an error is encountered
local Frame;																	-- holds the module's frame table
local is_preview_mode;															-- true when article is in preview mode; false when using 'Preview page with this template' (previewing the module)
local is_sandbox;																-- true when using sandbox modules to render citation


--[[--------------------------< F I R S T _ S E T >------------------------------------------------------------

Locates and returns the first set value in a table of values where the order established in the table,
left-to-right (or top-to-bottom), is the order in which the values are evaluated.  Returns nil if none are set.

This version replaces the original 'for _, val in pairs do' and a similar version that used ipairs.  With the pairs
version the order of evaluation could not be guaranteed.  With the ipairs version, a nil value would terminate
the for-loop before it reached the actual end of the list.

]]

local function first_set (list, count)
	local i = 1;
	while i <= count do															-- loop through all items in list
		if utilities.is_set( list[i] ) then
			return list[i];														-- return the first set list member
		end
		i = i + 1;																-- point to next
	end
end


--[[--------------------------< A D D _ V A N C _ E R R O R >----------------------------------------------------

Adds a single Vancouver system error message to the template's output regardless of how many error actually exist.
To prevent duplication, added_vanc_errs is nil until an error message is emitted.

added_vanc_errs is a Boolean declared in page scope variables above

]]

local function add_vanc_error (source, position)
	if added_vanc_errs then return end
		
	added_vanc_errs = true;														-- note that we've added this category
	utilities.set_message ('err_vancouver', {source, position});
end


--[[--------------------------< I S _ S C H E M E >------------------------------------------------------------

does this thing that purports to be a URI scheme seem to be a valid scheme?  The scheme is checked to see if it
is in agreement with http://tools.ietf.org/html/std66#section-3.1 which says:
	Scheme names consist of a sequence of characters beginning with a
   letter and followed by any combination of letters, digits, plus
   ("+"), period ("."), or hyphen ("-").

returns true if it does, else false

]]

local function is_scheme (scheme)
	return scheme and scheme:match ('^%a[%a%d%+%.%-]*:');						-- true if scheme is set and matches the pattern
end


--[=[-------------------------< I S _ D O M A I N _ N A M E >--------------------------------------------------

Does this thing that purports to be a domain name seem to be a valid domain name?

Syntax defined here: http://tools.ietf.org/html/rfc1034#section-3.5
BNF defined here: https://tools.ietf.org/html/rfc4234
Single character names are generally reserved; see https://tools.ietf.org/html/draft-ietf-dnsind-iana-dns-01#page-15;
	see also [[Single-letter second-level domain]]
list of TLDs: https://www.iana.org/domains/root/db

RFC 952 (modified by RFC 1123) requires the first and last character of a hostname to be a letter or a digit.  Between
the first and last characters the name may use letters, digits, and the hyphen.

Also allowed are IPv4 addresses. IPv6 not supported

domain is expected to be stripped of any path so that the last character in the last character of the TLD.  tld
is two or more alpha characters.  Any preceding '//' (from splitting a URL with a scheme) will be stripped
here.  Perhaps not necessary but retained in case it is necessary for IPv4 dot decimal.

There are several tests:
	the first character of the whole domain name including subdomains must be a letter or a digit
	internationalized domain name (ASCII characters with .xn-- ASCII Compatible Encoding (ACE) prefix xn-- in the TLD) see https://tools.ietf.org/html/rfc3490
	single-letter/digit second-level domains in the .org, .cash, and .today TLDs
	q, x, and z SL domains in the .com TLD
	i and q SL domains in the .net TLD
	single-letter SL domains in the ccTLDs (where the ccTLD is two letters)
	two-character SL domains in gTLDs (where the gTLD is two or more letters)
	three-plus-character SL domains in gTLDs (where the gTLD is two or more letters)
	IPv4 dot-decimal address format; TLD not allowed

returns true if domain appears to be a proper name and TLD or IPv4 address, else false

]=]

local function is_domain_name (domain)
	if not domain then
		return false;															-- if not set, abandon
	end
	
	domain = domain:gsub ('^//', '');											-- strip '//' from domain name if present; done here so we only have to do it once
	
	if not domain:match ('^[%w]') then											-- first character must be letter or digit
		return false;
	end

	if domain:match ('^%a+:') then												-- hack to detect things that look like s:Page:Title where Page: is namespace at Wikisource
		return false;
	end

	local patterns = {															-- patterns that look like URLs
		'%f[%w][%w][%w%-]+[%w]%.%a%a+$',										-- three or more character hostname.hostname or hostname.tld
		'%f[%w][%w][%w%-]+[%w]%.xn%-%-[%w]+$',									-- internationalized domain name with ACE prefix
		'%f[%a][qxz]%.com$',													-- assigned one character .com hostname (x.com times out 2015-12-10)
		'%f[%a][iq]%.net$',														-- assigned one character .net hostname (q.net registered but not active 2015-12-10)
		'%f[%w][%w]%.%a%a$',													-- one character hostname and ccTLD (2 chars)
		'%f[%w][%w][%w]%.%a%a+$',												-- two character hostname and TLD
		'^%d%d?%d?%.%d%d?%d?%.%d%d?%d?%.%d%d?%d?',								-- IPv4 address
		}

	for _, pattern in ipairs (patterns) do										-- loop through the patterns list
		if domain:match (pattern) then
			return true;														-- if a match then we think that this thing that purports to be a URL is a URL
		end
	end

	for _, d in ipairs ({'cash', 'company', 'today', 'org'}) do					-- look for single letter second level domain names for these top level domains
		if domain:match ('%f[%w][%w]%.' .. d) then
			return true
		end
	end
	return false;																-- no matches, we don't know what this thing is
end


--[[--------------------------< I S _ U R L >------------------------------------------------------------------

returns true if the scheme and domain parts of a URL appear to be a valid URL; else false.

This function is the last step in the validation process.  This function is separate because there are cases that
are not covered by split_url(), for example is_parameter_ext_wikilink() which is looking for bracketted external
wikilinks.

]]

local function is_url (scheme, domain)
	if utilities.is_set (scheme) then											-- if scheme is set check it and domain
		return is_scheme (scheme) and is_domain_name (domain);
	else
		return is_domain_name (domain);											-- scheme not set when URL is protocol-relative
	end
end


--[[--------------------------< S P L I T _ U R L >------------------------------------------------------------

Split a URL into a scheme, authority indicator, and domain.

First remove Fully Qualified Domain Name terminator (a dot following TLD) (if any) and any path(/), query(?) or fragment(#).

If protocol-relative URL, return nil scheme and domain else return nil for both scheme and domain.

When not protocol-relative, get scheme, authority indicator, and domain.  If there is an authority indicator (one
or more '/' characters immediately following the scheme's colon), make sure that there are only 2.

Any URL that does not have news: scheme must have authority indicator (//).  TODO: are there other common schemes
like news: that don't use authority indicator?

Strip off any port and path;

]]

local function split_url (url_str)
	local scheme, authority, domain;
	
	url_str = url_str:gsub ('([%a%d])%.?[/%?#].*$', '%1');						-- strip FQDN terminator and path(/), query(?), fragment (#) (the capture prevents false replacement of '//')

	if url_str:match ('^//%S*') then											-- if there is what appears to be a protocol-relative URL
		domain = url_str:match ('^//(%S*)')
	elseif url_str:match ('%S-:/*%S+') then										-- if there is what appears to be a scheme, optional authority indicator, and domain name
		scheme, authority, domain = url_str:match ('(%S-:)(/*)(%S+)');			-- extract the scheme, authority indicator, and domain portions
		if utilities.is_set (authority) then
			authority = authority:gsub ('//', '', 1);							-- replace place 1 pair of '/' with nothing;
			if utilities.is_set(authority) then									-- if anything left (1 or 3+ '/' where authority should be) then
				return scheme;													-- return scheme only making domain nil which will cause an error message
			end
		else
			if not scheme:match ('^news:') then									-- except for news:..., MediaWiki won't link URLs that do not have authority indicator; TODO: a better way to do this test?
				return scheme;													-- return scheme only making domain nil which will cause an error message
			end
		end
		domain = domain:gsub ('(%a):%d+', '%1');								-- strip port number if present
	end
	
	return scheme, domain;
end


--[[--------------------------< L I N K _ P A R A M _ O K >---------------------------------------------------

checks the content of |title-link=, |series-link=, |author-link=, etc. for properly formatted content: no wikilinks, no URLs

Link parameters are to hold the title of a Wikipedia article, so none of the WP:TITLESPECIALCHARACTERS are allowed:
	# < > [ ] | { } _
except the underscore which is used as a space in wiki URLs and # which is used for section links

returns false when the value contains any of these characters.

When there are no illegal characters, this function returns TRUE if value DOES NOT appear to be a valid URL (the
|<param>-link= parameter is ok); else false when value appears to be a valid URL (the |<param>-link= parameter is NOT ok).

]]

local function link_param_ok (value)
	local scheme, domain;
	if value:find ('[<>%[%]|{}]') then											-- if any prohibited characters
		return false;
	end

	scheme, domain = split_url (value);											-- get scheme or nil and domain or nil from URL; 
	return not is_url (scheme, domain);											-- return true if value DOES NOT appear to be a valid URL
end


--[[--------------------------< L I N K _ T I T L E _ O K >---------------------------------------------------

Use link_param_ok() to validate |<param>-link= value and its ma