Module:Internet Archive
From ChaldeanWiki
Documentation for this module may be created at Module:Internet Archive/doc
--[[ For functions related to Internet Archive Notes: 1. Internet Archive runs Lucene search engine (old site). The Beta (new site) runs Elasticsearch. The search strategies will be updated when the switch occurs, the current strategy is based on Lucene. 2. Program flowchart: Check if author has birth-death data on Wikidata If birth-death data Check number of words in name Create URL for a 1,2,3,4 or 5+ word name. Include birth-death data. If accented characters in name append wildcard search to end. If not birth-death data Repeat same as above but exclude birth-death data. 3. Lucene has a number of known issues with searching A. Names with accented letters (é) - aka extended ascii - are problematic. There are records on IA in which the accent has been dropped thus é is e in the record. Thus a search strategy has to use wildcards in place of extended ascii characters. The "?" wildcard does not work correctly on Lucene and thus recommend "*". Wildcards severely slow down search times and after about 5 or 8 wildcards it may even time out. Thus, only use wildcards in a single expression within a search string. B. Extended ascii doesn't work correctly if 1. not surrounded in quotes and 2. search string contains numbers and/or wildcards somewhere in it and 3. multiple () statements. The extended ascii character becomes interpreted as ascii eg. é -> é Try for example : (Évariste Régis Huc) OR (Évariste Régis Huc 1813-1860) OR (É. R. Huc) OR (É. R. Huc 1813-1860) OR (Évariste R. Huc) OR (Évariste R. Huc 1813-1860) OR (Évariste Huc) OR (Évariste Huc 1813-1860) C. B can be nullified by enclosing the string in quotes, but this creates a literal string and many permutations must be searched on ("John Smith" OR "Smith, John" etc). For names longer than 2 words it could exceed URL limits. URLs are limited to about 2000 characters to account for most browsers (IE is 2083). Thus, search strategies used are a balance between possibilities and URL length. ]] local p = {} --[[ For Template:Internet Archive author ]] function p.author(frame) local pframe = frame:getParent() local args = pframe.args local tname = "Internet Archive author" -- name of calling template. Change if template rename. local name = nil -- article name (default: current page) local dname = nil -- display name (default: current page name) local sname = nil -- search name (default: current page name) local byabout = "Works by or about" local tagline = "at [[Internet Archive]]" local urlhead = "//archive.org/search.php?query=" --- Determine name if args.name == "" or args.name == nil then name = mw.title.getCurrentTitle().text dname = name sname = dname else name = mw.text.trim(args.name) dname = name sname = dname end if args.sname ~= nil and args.sname ~= "" then sname = mw.text.trim(args.sname) end if args.dname ~= nil and args.dname ~= "" then dname = mw.text.trim(args.dname) end dname = mw.ustring.gsub(dname,"%s%(.*%)", "") -- remove disambiguation () sname = mw.ustring.gsub(sname,"%s%(.*%)", "") --- Determine tagline if args.coda ~= "" and args.coda ~= nil then tagline = tagline .. " " .. mw.text.trim(args.coda) end --- Custom search. Do early to avoid unnecessary processing. if args.search ~= "" and args.search ~= nil then local search = p.ia_url_encode(mw.text.trim(args.search)) return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline end -- Determine media string local media = p.mediaTypes(args.media) -- Determine date of birth and death string local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ") local birth = temp[1] local death = temp[2] if birth == "Error" or death == "Error" then return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist." end --- Split sname into words and count words local N = mw.text.split(sname, " ") local l, count = mw.ustring.gsub(sname, "%S+", "") --[[ Format URL ]] -- If no dob and dod if birth == "none" or death == "none" then if count == 1 then local nameurl = p.ia_url_encode(sname) local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 2 then local FIRST = 1 local LAST = 2 local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) -- Last, First local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST] local SALL = S1..S3 -- Last, First local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] -- Last, F. local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E" local CALL = C1..C3..C5 -- First Last local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] local TALL = T1 -- Last, First local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] local DALL = D1..D3.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 3 then local myurl = p.threeWords(N) if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. myurl .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. myurl .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 4 then local FIRST = 1 local SECOND = 2 local THIRD = 3 local LAST = 4 local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) local secondinitial = mw.ustring.sub(N[SECOND], 1, 1) local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1) -- Last, First Second Third local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD] -- First Second Third Last local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- Last, First Second Third local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD] -- First Second Third Last local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- Last, F. S. T. local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E" -- First Second Third Last local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- First Second Third Last local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] local SALL = S1..S2..C1..C2..C3..T1..D1.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else local X1 = "%20OR%20" .. p.ia_url_encode(sname) return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count > 4 then local nameurl = p.ia_url_encode(sname) local search = "%28" .. nameurl .. "%29" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline end end else -- Date available if count == 1 then local nameurl = p.ia_url_encode(sname) local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 2 then local FIRST = 1 local LAST = 2 local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) -- Last, First, birthyear-deathyear local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death -- Last, First local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last, birthyear-deathyear local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death -- First Last local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST] local SALL = S1..S2..S3..S4 -- Last, First, birthyear-deathyear local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death -- Last, First local C2 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last, birthyear-deathyear local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death -- First Last local C4 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] -- Last, F. local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E" local CALL = C1..C2..C3..C4..C5 -- First Last local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] local TALL = T1 -- Last, First, birthyear-deathyear local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death -- Last, First local D2 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last, birthyear-deathyear local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death -- First Last local D4 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] local DALL = D1..D2..D3..D4.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 3 then local myurl = p.threeWords(N) local mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20"..N[3].."%29" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. myurl .. wild .. "%29" .. mydate .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. myurl .. "%29" .. mydate .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count == 4 then local FIRST = 1 local SECOND = 2 local THIRD = 3 local LAST = 4 local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) local secondinitial = mw.ustring.sub(N[SECOND], 1, 1) local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1) -- Last, First Second Third, birthyear-deathyear local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death -- First Second Third Last local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- Last, First Second Third, birthyear-deathyear local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death -- First Second Third Last local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- Last, F. S. T. local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E" -- First Second Third Last local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] -- First Second Third Last local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST] local SALL = S1..S2..C1..C2..C3..T1..D1.."%22" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline else local X1 = "%20OR%20" .. p.ia_url_encode(sname) return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline end end if count > 4 then local nameurl = p.ia_url_encode(sname) local search = "%28" .. nameurl .. "%29" if p.ia_extendedascii(sname) == 1 then local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline else return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline end end end return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]" end function p.threeWords(N) local FIRST = 1 local MIDDLE = 2 local LAST = 3 local firstinitial = mw.ustring.sub(N[FIRST], 1, 1) local middleinitial = mw.ustring.sub(N[MIDDLE], 1, 1) -- CAUTION: This is near the max 2000 character URL limit for most browsers when using long names -- such as "René-Nicolas Dufriche Desgenettes". -- Last, First Middle local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] -- Last, First M. local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" -- Last, F. M. local S3 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" -- First Middle Last local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] -- First M. Last local S5 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] -- F. M. Last local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] -- Last, First local S7 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST] -- First Last local S8 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST] local SALL = S1..S2..S3..S4..S5..S6..S7..S8 -- First Middle Last local C1 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] -- First M. Last local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] -- F. M. Last local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] -- F. Middle Last local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..N[MIDDLE].."%20"..N[LAST] -- Last, First Middle local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] -- Last, First M. local C6 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" -- Last, F. M. local C7 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E" -- Last, F. M. local C8 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..N[MIDDLE] -- First Last local C9 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST] -- Last, First local C10 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST] local CALL = C1..C2..C3..C4..C5..C6..C7..C8..C9..C10 -- First Middle Last local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] -- First M. Last local T2 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] -- F. M. Last local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] -- First Last local T4 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST] local TALL = T1..T2..T3..T4 -- First Middle Last local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST] -- First M. Last local D2 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST] -- F. M. Last local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST] -- Last, First Middle local D4 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE] -- Last, First M. local D5 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E" -- First Last local D6 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST] -- Last, First local D7 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST] local DALL = D1..D2..D3..D4..D5..D6..D7.."%22" return SALL .. CALL .. TALL .. DALL end function p.mediaTypes(argsmedia) local media = "" if argsmedia ~="" and argsmedia ~=nil then local medialist = mw.text.split(mw.text.trim(argsmedia), " ") local al, acount = mw.ustring.gsub(mw.text.trim(argsmedia), "%S+", "") local i = 0 repeat -- the following could be condensed but repetitive for clarity i = i + 1 if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then if(i == 1) then media = media .. p.ia_url_encode("(mediatype:texts") else media = media .. p.ia_url_encode(" OR mediatype:texts") end end if(mw.ustring.lower(medialist[i]) == "audio") then if(i == 1) then media = media .. p.ia_url_encode("(mediatype:audio") else media = media .. p.ia_url_encode(" OR mediatype:audio") end end if(mw.ustring.lower(medialist[i]) == "video") then if(i == 1) then media = media .. p.ia_url_encode("(mediatype:video") else media = media .. p.ia_url_encode(" OR mediatype:video") end end until i == acount if media ~= nil then media = media .. ")%20AND%20" else media = "" end else media = "" end return media end -- Alt way to get b/d dates via getContent() function p.bdDateAlt(argsbirth, argsdeath, name) local pagetext = nil local birth = "none" local death = "none" -- Load the page local t = mw.title.new(name) if(t.exists) then pagetext = t:getContent() end if pagetext == nil then return "Error" end -- Remove false positives pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "") -- "Category:1900 births" if argsbirth == "" or argsbirth == nil then local birthcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-births%s-%]%]" ) if birthcheck ~= nil then birth = mw.ustring.match(birthcheck, "%d+%.?%d*") else birth = "none" end else birth = mw.text.trim(argsbirth) end -- "Category:2000 deaths" if argsdeath == "" or argsdeath == nil then local deathcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-deaths%s-%]%]" ) if deathcheck ~= nil then death = mw.ustring.match(deathcheck, "%d+%.?%d*") else death = "none" end else death = mw.text.trim(argsdeath) end return birth .. " " .. death end -- Get b/d dates via Wikidata. -- function p.bdDate(argsbirth, argsdeath, name) local pagetext = nil local birth = "none" local death = "none" entity = mw.wikibase.getEntityObject() if not entity or not entity.claims then -- Alternative if template not on a page in mainspace. This is needed since Wikidata can only be retrieved -- for the article where the template is located. return p.bdDateAlt(argsbirth, argsdeath, name) end -- Note: The below uses formatPropertyValues() to get and format the date from Wikidata. -- For an alternative method, see sandbox revision dated 5:58 am, 15 October 2014 if argsbirth == "" or argsbirth == nil then local birthtable = entity:formatPropertyValues( 'P569' ) local birthsplit = mw.text.split(birthtable["value"], " ") local l, count = mw.ustring.gsub(birthtable["value"], "%S+", "") if count > 0 then if string.find(birthsplit[count], "^%d") then birth = birthsplit[count] elseif string.find(birthsplit[count], "BCE") then birth = birthsplit[count - 1] elseif string.find(birthsplit[count], "BC") then birth = birthsplit[count - 1] elseif string.find(birthsplit[count], "AD") then birth = birthsplit[count - 1] end end else birth = mw.text.trim(argsbirth) end if argsdeath == "" or argsdeath == nil then local deathtable = entity:formatPropertyValues( 'P570' ) local deathsplit = mw.text.split(deathtable["value"], " ") local l, count = mw.ustring.gsub(deathtable["value"], "%S+", "") if count > 0 then if string.find(deathsplit[count], "^%d") then death = deathsplit[count] elseif string.find(deathsplit[count], "BCE") then death = deathsplit[count - 1] elseif string.find(deathsplit[count], "BC") then death = deathsplit[count - 1] elseif string.find(deathsplit[count], "AD") then death = deathsplit[count - 1] end end else death = mw.text.trim(argsdeath) end if birth == "none" and death == "none" then -- Alternative if Wikidata is missing data -- return p.bdDateAlt(name) return birth .. " " .. death else return birth .. " " .. death end end --- URL-encode a string --- http://lua-users.org/wiki/StringRecipes --- function p.ia_url_encode(str) if (str) then str = mw.ustring.gsub (str, "\n", "\r\n") str = mw.ustring.gsub (str, "([^%w %-%_%.%~])", function (c) return mw.ustring.format ("%%%02X", string.byte(c)) end) str = mw.ustring.gsub (str, " ", "+") end return str end -- Does str contain extended ascii? 1 = yes function p.ia_extendedascii(str) for i = 1, str:len() do if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then --do nothing else return 1 end end return 0 end -- Replace all extended ascii characters with wildcard '*' function p.ia_extendedasciireplace(str) local s = "" local j = 0 local k = 0 for i = 1, str:len() do k = str:byte(i) if k >= 32 and k <= 126 then -- For list of Lucene special characters needing to be escaped: -- http://lucene.apache.org/core/4_10_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters -- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name. -- Also ' (39) since it is sometimes the extended character ’ if k == 45 or k == 34 or k == 39 then s = s .. "*" else s = s .. str:sub(i,i) end else if j == 1 then s = s .. "*" j = 2 end if j == 0 then j = 1 end if j == 2 then j = 0 end end end return s end return p