Last modified on 6 March 2015, at 10:58

Module:Internet Archive

Revision as of 10:58, 6 March 2015 by Green Cardamom (Talk) (date support for 3-words)

(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Documentation for this module may be created at Module:Internet Archive/doc

--[[ 

For functions related to Internet Archive

Notes: 

1. Internet Archive runs Lucene search engine (old site). The Beta (new site) runs Elasticsearch. 
   The search strategies will be updated when the switch occurs, the current strategy is based on Lucene.

2. Program flowchart:

     Check if author has birth-death data on Wikidata
       If birth-death data
         Check number of words in name
           Create URL for a 1,2,3,4 or 5+ word name. Include birth-death data.
           If accented characters in name append wildcard search to end.
       If not birth-death data 
         Repeat same as above but exclude birth-death data.

3. Lucene has a number of known issues with searching

   A. Names with accented letters (é) - aka extended ascii - are problematic. There are records on IA in which the accent has been
      dropped thus é is e in the record. Thus a search strategy has to use wildcards in place of extended ascii characters. The "?"
      wildcard does not work correctly on Lucene and thus recommend "*". Wildcards severely slow down search times and after about 
      5 or 8 wildcards it may even time out. Thus, only use wildcards in a single expression within a search string.
   B. Extended ascii doesn't work correctly if 1. not surrounded in quotes and 2. search string contains numbers and/or wildcards
      somewhere in it and 3. multiple () statements. The extended ascii character becomes interpreted as ascii eg. é -> é 
      Try for example : (Évariste Régis Huc) OR (Évariste Régis Huc 1813-1860) OR (É. R. Huc) OR (É. R. Huc 1813-1860) OR (Évariste R. Huc) OR (Évariste R. Huc 1813-1860) OR (Évariste Huc) OR (Évariste Huc 1813-1860)
   C. B can be nullified by enclosing the string in quotes, but this creates a literal string and many permutations must be searched
      on ("John Smith" OR "Smith, John" etc). For names longer than 2 words it could exceed URL limits. URLs are limited to about 2000
      characters to account for most browsers (IE is 2083). Thus, search strategies used are a balance between possibilities 
      and URL length.

]]

local p = {}

--[[ 

For Template:Internet Archive author

]]
function p.author(frame)

  local pframe = frame:getParent()
  local args = pframe.args

  local tname = "Internet Archive author" -- name of calling template. Change if template rename.
  
  local name = nil -- article name (default: current page)
  local dname = nil -- display name (default: current page name)
  local sname = nil -- search name (default: current page name)
  local byabout = "Works by or about"
  local tagline = "at [[Internet Archive]]"
  local urlhead = "//archive.org/search.php?query="

  --- Determine name
  if args.name == "" or args.name == nil then
    name = mw.title.getCurrentTitle().text
    dname = name
    sname = dname
  else
    name = mw.text.trim(args.name)
    dname = name
    sname = dname
  end
  if args.sname ~= nil and args.sname ~= "" then
    sname = mw.text.trim(args.sname)
  end
  if args.dname ~= nil and args.dname ~= "" then
    dname = mw.text.trim(args.dname)
  end
  dname = mw.ustring.gsub(dname,"%s%(.*%)", "") -- remove disambiguation () 
  sname = mw.ustring.gsub(sname,"%s%(.*%)", "")
 
  --- Determine tagline
  if args.coda ~= "" and args.coda ~= nil then
    tagline = tagline .. " " .. mw.text.trim(args.coda)
  end

  --- Custom search. Do early to avoid unnecessary processing. 
  if args.search ~= "" and args.search ~= nil then
    local search = p.ia_url_encode(mw.text.trim(args.search))
    return "[" .. urlhead .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
  end

  -- Determine media string
  local media = p.mediaTypes(args.media)

  -- Determine date of birth and death string
  local temp = mw.text.split(p.bdDate(args.birth, args.death, name), " ")
  local birth = temp[1]
  local death = temp[2]
  if birth == "Error" or death == "Error" then
    return "Error in [[:Template:"..tname.."]]: [[" ..name.. "]] doesn't exist."
  end
    
  --- Split sname into words and count words
  local N = mw.text.split(sname, " ")
  local l, count = mw.ustring.gsub(sname, "%S+", "")
 
  --[[ 

      Format URL

  ]]

  -- If no dob and dod
  if birth == "none" or death == "none" then

    if count == 1 then

      local nameurl = p.ia_url_encode(sname)
      local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22"
      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else 
        return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

    if count == 2 then
      local FIRST  = 1
      local LAST   = 2

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)

      -- Last, First
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST]
      local SALL = S1..S3
      -- Last, First
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST]
      -- Last, F.
      local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E"
      local CALL = C1..C3..C5
      -- First Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST]
      local TALL = T1
      -- Last, First
      local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST]
      local DALL = D1..D3.."%22"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end
      
    end

    if count == 3 then

      local myurl = p.threeWords(N)

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. myurl .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        return "[" .. urlhead .. media .. myurl .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

    if count == 4 then
      local FIRST  = 1
      local SECOND = 2
      local THIRD  = 3
      local LAST   = 4

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local secondinitial  = mw.ustring.sub(N[SECOND], 1, 1)
      local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1)
 
      -- Last, First Second Third
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD]
      -- First Second Third Last
      local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, First Second Third
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD]
      -- First Second Third Last
      local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, F. S. T.
      local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E"
      -- First Second Third Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- First Second Third Last
      local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      local SALL = S1..S2..C1..C2..C3..T1..D1.."%22"
 
      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        local X1 = "%20OR%20" .. p.ia_url_encode(sname)
        return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end
    end

    if count > 4 then

      local nameurl = p.ia_url_encode(sname)
      local search = "%28" .. nameurl .. "%29"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else 
        return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

  else -- Date available

    if count == 1 then

      local nameurl = p.ia_url_encode(sname)
      local search = "%28subject%3A%22"..nameurl.."%22%20OR%20creator%3A%22"..nameurl.."%22%20OR%20description%3A%22"..nameurl.."%22%20OR%20title%3A%22"..nameurl.."%22"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. search .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else 
        return "[" .. urlhead .. media .. search .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end
    end

    if count == 2 then
      local FIRST  = 1
      local LAST   = 2

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)

      -- Last, First, birthyear-deathyear
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local S3 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST]
      local SALL = S1..S2..S3..S4
      -- Last, First, birthyear-deathyear
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local C2 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local C3 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local C4 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST]
      -- Last, F.
      local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E"
      local CALL = C1..C2..C3..C4..C5
      -- First Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST]
      local TALL = T1
      -- Last, First, birthyear-deathyear
      local D1 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%2C%20"..birth.."-"..death
      -- Last, First
      local D2 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last, birthyear-deathyear
      local D3 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST].."%2C%20"..birth.."-"..death
      -- First Last
      local D4 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST]
      local DALL = D1..D2..D3..D4.."%22"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        return "[" .. urlhead .. media .. SALL .. CALL .. TALL .. DALL .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

    if count == 3 then

      local myurl = p.threeWords(N)
      local mydate = "%20OR%20%28%22"..birth.."-"..death.."%22%20AND%20"..N[3].."%29"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. myurl .. wild .. "%29" .. mydate .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        return "[" .. urlhead .. media .. myurl .. "%29" .. mydate .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

    if count == 4 then
      local FIRST  = 1
      local SECOND = 2
      local THIRD  = 3
      local LAST   = 4

      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local secondinitial  = mw.ustring.sub(N[SECOND], 1, 1)
      local thirdinitial = mw.ustring.sub(N[THIRD], 1, 1)

      -- Last, First Second Third, birthyear-deathyear
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death
      -- First Second Third Last
      local S2 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, First Second Third, birthyear-deathyear
      local C1 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%2C%20"..birth.."-"..death
      -- First Second Third Last
      local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- Last, F. S. T.
      local C3 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..secondinitial.."%2E%20"..thirdinitial.."%2E"
      -- First Second Third Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      -- First Second Third Last
      local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[SECOND].."%20"..N[THIRD].."%20"..N[LAST]
      local SALL = S1..S2..C1..C2..C3..T1..D1.."%22"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname)) 
        return "[" .. urlhead .. media .. SALL .. wild .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else
        local X1 = "%20OR%20" .. p.ia_url_encode(sname)
        return "[" .. urlhead .. media .. SALL .. X1 .. "%29" .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

    if count > 4 then

      local nameurl = p.ia_url_encode(sname)
      local search = "%28" .. nameurl .. "%29"

      if p.ia_extendedascii(sname) == 1 then
        local wild = "%20OR%20" .. p.ia_url_encode(p.ia_extendedasciireplace(sname))
        return "[" .. urlhead .. media .. search .. wild .. " " .. byabout .. " " .. dname .. "] " .. tagline
      else 
        return "[" .. urlhead .. media .. search .. " " .. byabout .. " " .. dname .. "] " .. tagline
      end

    end

  end
  return "Unknown error (1). Please check documentation for [[Template:"..tname.."]]"

end

function p.threeWords(N)

      local FIRST  = 1
      local MIDDLE = 2
      local LAST   = 3
    
      local firstinitial  = mw.ustring.sub(N[FIRST], 1, 1)
      local middleinitial = mw.ustring.sub(N[MIDDLE], 1, 1)

      -- CAUTION: This is near the max 2000 character URL limit for most browsers when using long names 
      --          such as "René-Nicolas Dufriche Desgenettes". 

      -- Last, First Middle
      local S1 = "%28subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE]
      -- Last, First M.
      local S2 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E"
      -- Last, F. M.
      local S3 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E"
      -- First Middle Last
      local S4 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST]
      -- First M. Last
      local S5 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST]
      -- F. M. Last
      local S6 = "%22%20OR%20subject%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST]
      -- Last, First
      local S7 = "%22%20OR%20subject%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      -- First Last
      local S8 = "%22%20OR%20subject%3A%22"..N[FIRST].."%20"..N[LAST]
      local SALL = S1..S2..S3..S4..S5..S6..S7..S8
      -- First Middle Last
      local C1 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST]
      -- First M. Last
      local C2 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST]
      -- F. M. Last
      local C3 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST]
      -- F. Middle Last
      local C4 = "%22%20OR%20creator%3A%22"..firstinitial.."%2E%20"..N[MIDDLE].."%20"..N[LAST]
      -- Last, First Middle
      local C5 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE]
      -- Last, First M.
      local C6 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E"
      -- Last, F. M.
      local C7 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..middleinitial.."%2E"
      -- Last, F. M.
      local C8 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..firstinitial.."%2E%20"..N[MIDDLE]
      -- First Last
      local C9 = "%22%20OR%20creator%3A%22"..N[FIRST].."%20"..N[LAST]
      -- Last, First
      local C10 = "%22%20OR%20creator%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      local CALL = C1..C2..C3..C4..C5..C6..C7..C8..C9..C10
      -- First Middle Last
      local T1 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST]
      -- First M. Last
      local T2 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST]
      -- F. M. Last
      local T3 = "%22%20OR%20title%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST]
      -- First Last
      local T4 = "%22%20OR%20title%3A%22"..N[FIRST].."%20"..N[LAST]
      local TALL = T1..T2..T3..T4
      -- First Middle Last
      local D1 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[MIDDLE].."%20"..N[LAST]
      -- First M. Last
      local D2 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..middleinitial.."%2E%20"..N[LAST]
      -- F. M. Last
      local D3 = "%22%20OR%20description%3A%22"..firstinitial.."%2E%20"..middleinitial.."%2E%20"..N[LAST]
      -- Last, First Middle
      local D4 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..N[MIDDLE]
      -- Last, First M.
      local D5 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST].."%20"..middleinitial.."%2E"
      -- First Last
      local D6 = "%22%20OR%20description%3A%22"..N[FIRST].."%20"..N[LAST]
      -- Last, First
      local D7 = "%22%20OR%20description%3A%22"..N[LAST].."%2C%20"..N[FIRST]
      local DALL = D1..D2..D3..D4..D5..D6..D7.."%22"

      return SALL .. CALL .. TALL .. DALL

end

function p.mediaTypes(argsmedia)

  local media = ""

  if argsmedia ~="" and argsmedia ~=nil then
    local medialist = mw.text.split(mw.text.trim(argsmedia), " ")
    local al, acount = mw.ustring.gsub(mw.text.trim(argsmedia), "%S+", "")
    local i = 0
    repeat -- the following could be condensed but repetitive for clarity 
      i = i + 1
      if(mw.ustring.lower(medialist[i]) == "text" or mw.ustring.lower(medialist[i]) == "texts") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:texts")        
        else
          media = media .. p.ia_url_encode(" OR mediatype:texts")         
        end
      end
      if(mw.ustring.lower(medialist[i]) == "audio") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:audio")
        else
          media = media .. p.ia_url_encode(" OR mediatype:audio")
        end
      end
      if(mw.ustring.lower(medialist[i]) == "video") then
        if(i == 1) then
          media = media .. p.ia_url_encode("(mediatype:video")
        else
          media = media .. p.ia_url_encode(" OR mediatype:video")
        end
      end
    until i == acount
    if media ~= nil then
      media = media .. ")%20AND%20"
    else
      media = ""
    end
  else
    media = ""
  end
  return media
end

-- Alt way to get b/d dates via getContent()
function p.bdDateAlt(argsbirth, argsdeath, name)

    local pagetext = nil
    local birth = "none"
    local death = "none"

    -- Load the page
    local t = mw.title.new(name)
    if(t.exists) then
      pagetext = t:getContent()
    end
    if pagetext == nil then 
      return "Error"     
    end
 
    -- Remove false positives
    pagetext = mw.ustring.gsub( mw.ustring.gsub(pagetext, "<!--.--->", ""), "<nowiki>.-</nowiki>", "")
 
    -- "Category:1900 births" 
    if argsbirth == "" or argsbirth == nil then
      local birthcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-births%s-%]%]" )
      if birthcheck ~= nil then
        birth = mw.ustring.match(birthcheck, "%d+%.?%d*")
      else
        birth = "none"
      end
    else
      birth = mw.text.trim(argsbirth)
    end

    -- "Category:2000 deaths" 
    if argsdeath == "" or argsdeath == nil then
      local deathcheck = mw.ustring.match(pagetext, "%[%[%s-[Cc]ategory:%s-%d+%.?%d*%s-deaths%s-%]%]" )
      if deathcheck ~= nil then
        death = mw.ustring.match(deathcheck, "%d+%.?%d*")
      else
        death = "none"
      end
    else
      death = mw.text.trim(argsdeath)
    end

    return birth .. " " .. death

end

-- Get b/d dates via Wikidata.
-- ‎ 
function p.bdDate(argsbirth, argsdeath, name)

  local pagetext = nil
  local birth = "none"
  local death = "none"

  
  entity = mw.wikibase.getEntityObject()
  if not entity or not entity.claims then 
    -- Alternative if template not on a page in mainspace. This is needed since Wikidata can only be retrieved
    -- for the article where the template is located.
    return p.bdDateAlt(argsbirth, argsdeath, name)
  end

  -- Note: The below uses formatPropertyValues() to get and format the date from Wikidata.
  --       For an alternative method, see sandbox revision dated 5:58 am, 15 October 2014
  if argsbirth == "" or argsbirth == nil then
    local birthtable = entity:formatPropertyValues( 'P569' )
    local birthsplit = mw.text.split(birthtable["value"], " ")
    local l, count = mw.ustring.gsub(birthtable["value"], "%S+", "")
    if count > 0 then
      if string.find(birthsplit[count], "^%d") then
        birth = birthsplit[count]
      elseif string.find(birthsplit[count], "BCE") then
        birth = birthsplit[count - 1]
      elseif string.find(birthsplit[count], "BC") then
        birth = birthsplit[count - 1]
      elseif string.find(birthsplit[count], "AD") then
        birth = birthsplit[count - 1]
      end
    end
  else
    birth = mw.text.trim(argsbirth)
  end

  if argsdeath == "" or argsdeath == nil then
    local deathtable = entity:formatPropertyValues( 'P570' )
    local deathsplit = mw.text.split(deathtable["value"], " ")
    local l, count = mw.ustring.gsub(deathtable["value"], "%S+", "")
    if count > 0 then
      if string.find(deathsplit[count], "^%d") then
        death = deathsplit[count]
      elseif string.find(deathsplit[count], "BCE") then
        death = deathsplit[count - 1]
      elseif string.find(deathsplit[count], "BC") then
        death = deathsplit[count - 1]
      elseif string.find(deathsplit[count], "AD") then
        death = deathsplit[count - 1]
      end
    end
  else
    death = mw.text.trim(argsdeath)
  end

  if birth == "none" and death == "none" then 
    -- Alternative if Wikidata is missing data
    -- return p.bdDateAlt(name)
    return birth .. " " .. death
  else
    return birth .. " " .. death
  end

end

--- URL-encode a string
--- http://lua-users.org/wiki/StringRecipes
---
function p.ia_url_encode(str)
  if (str) then
    str = mw.ustring.gsub (str, "\n", "\r\n")
    str = mw.ustring.gsub (str, "([^%w %-%_%.%~])",
        function (c) return mw.ustring.format ("%%%02X", string.byte(c)) end)
    str = mw.ustring.gsub (str, " ", "+")
  end
  return str	
end

-- Does str contain extended ascii? 1 = yes
function p.ia_extendedascii(str)
    for i = 1, str:len() do
      if (str:byte(i) >= 32 and str:byte(i) <= 126) and str:byte(i) ~= 39 then
        --do nothing
      else
        return 1
      end
    end
    return 0
end

-- Replace all extended ascii characters with wildcard '*'
function p.ia_extendedasciireplace(str)
    local s = ""
    local j = 0
    local k = 0    
    for i = 1, str:len() do
      k = str:byte(i)
      if k >= 32 and k <= 126 then
-- For list of Lucene special characters needing to be escaped: 
-- http://lucene.apache.org/core/4_10_0/queryparser/org/apache/lucene/queryparser/classic/package-summary.html#Escaping_Special_Characters
-- We only worry about - (45) and " (34) since the others are unlikely to appear in a proper name.
-- Also ' (39) since it is sometimes the extended character ’
        if k == 45 or k == 34 or k == 39 then 
          s = s .. "*" 
        else
          s = s .. str:sub(i,i)
        end
      else
        if j == 1 then
          s = s .. "*"
          j = 2
        end
        if j == 0 then j = 1 end
        if j == 2 then j = 0 end
      end
    end
    return s
end

return p