Jump to content

Module:Urltowiki

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Mr. Stradivarius (talk | contribs) at 13:35, 8 April 2013 (better logic for URLs with invalid language names). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

-- This module takes a URL from a Wikimedia project and returns the equivalent wikitext. 
-- Any actions such as edit, history, etc., are stripped, and percent-encoded characters 
-- are converted to normal text.

p = {}

local interwiki_table = {
    -- Projects.
    { domain = "wikipedia.org"           , iw_prefix = "w"         , title_prefix = "/wiki/" },
    { domain = "wiktionary.org"          , iw_prefix = "wikt"      , title_prefix = "/wiki/" },
    { domain = "wikinews.org"            , iw_prefix = "n"         , title_prefix = "/wiki/" },
    { domain = "wikibooks.org"           , iw_prefix = "b"         , title_prefix = "/wiki/" },
    { domain = "wikiquote.org"           , iw_prefix = "q"         , title_prefix = "/wiki/" },
    { domain = "wikisource.org"          , iw_prefix = "s"         , title_prefix = "/wiki/" },
    { domain = "species.wikimedia.org"   , iw_prefix = "species"   , title_prefix = "/wiki/" },
    { domain = "wikiversity.org"         , iw_prefix = "v"         , title_prefix = "/wiki/" },
    { domain = "wikivoyage.org"          , iw_prefix = "voy"       , title_prefix = "/wiki/" },
    { domain = "wikimediafoundation.org" , iw_prefix = "wmf"       , title_prefix = "/wiki/" },
    { domain = "commons.wikimedia.org"   , iw_prefix = "commons"   , title_prefix = "/wiki/" },
    { domain = "wikidata.org"            , iw_prefix = "d"         , title_prefix = "/wiki/" },
    { domain = "meta.wikimedia.org"      , iw_prefix = "m"         , title_prefix = "/wiki/" },
    { domain = "incubator.wikimedia.org" , iw_prefix = "incubator" , title_prefix = "/wiki/" },
    { domain = "strategy.wikimedia.org"  , iw_prefix = "strategy"  , title_prefix = "/wiki/" },
    { domain = "mediawiki.org"           , iw_prefix = "mw"        , title_prefix = "/wiki/" },
    { domain = "bugzilla.wikimedia.org"  , iw_prefix = "bugzilla"  , title_prefix = "/show_bug.cgi?id=" },
    { domain = "test.wikipedia.org"      , iw_prefix = "testwiki"  , title_prefix = "/wiki/" },
    -- Chapters.
    { domain = "wikimedia.org.ar"        , iw_prefix = "wmar"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.org.au"        , iw_prefix = "wmau"      , title_prefix = "/wiki/" },
    { domain = "be.wikimedia.org"        , iw_prefix = "wmbe"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.ca"            , iw_prefix = "wmca"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.de"            , iw_prefix = "wmde"      , title_prefix = "/wiki/" },
    { domain = "fi.wikimedia.org"        , iw_prefix = "wmfi"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.hk"            , iw_prefix = "wmhk"      , title_prefix = "/index.php/" },
    { domain = "wiki.media.hu"           , iw_prefix = "wmhu"      , title_prefix = "/wiki/" },
    { domain = "wiki.wikimedia.in"       , iw_prefix = "wmin"      , title_prefix = "/" },
    { domain = "wikimedia.org.id"        , iw_prefix = "wmid"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.org.il"        , iw_prefix = "wmil"      , title_prefix = "/" },
    { domain = "wikimedia.it"            , iw_prefix = "wmit"      , title_prefix = "/wiki/" },
    { domain = "nl.wikimedia.org"        , iw_prefix = "wmnl"      , title_prefix = "/wiki/" },
    { domain = "no.wikimedia.org"        , iw_prefix = "wmno"      , title_prefix = "/wiki/" },
    { domain = "pl.wikimedia.org"        , iw_prefix = "wmpl"      , title_prefix = "/wiki/" },
    { domain = "no.wikimedia.org"        , iw_prefix = "wmno"      , title_prefix = "/wiki/" },
    { domain = "ru.wikimedia.org"        , iw_prefix = "wmru"      , title_prefix = "/wiki/" },
    { domain = "rs.wikimedia.org"        , iw_prefix = "wmrs"      , title_prefix = "/wiki/" },
    { domain = "se.wikimedia.org"        , iw_prefix = "wmse"      , title_prefix = "/wiki/" },
    { domain = "wikimedia.ch"            , iw_prefix = "wmch"      , title_prefix = "/" },
    { domain = "tw.wikimedia.org"        , iw_prefix = "wmtw"      , title_prefix = "/wiki/index.php5/" },
    { domain = "uk.wikimedia.org"        , iw_prefix = "wmuk"      , title_prefix = "/wiki/" },
}

local function convertInterwiki(url)
    local host = url.host
    
    -- If host is nil then the text passed to the module is probably a URL fragment,
    -- not a full URL. Return blank so that it can get percent-de-encoded as it is.
    if not host then
        return false
    end
    
    -- Find the entry for the host in the interwiki table.
    local host_id
    for i,v in ipairs(interwiki_table) do
        if mw.ustring.match(host, interwiki_table[i].domain) then
            host_id = i
            break
        end
    end
    -- If no valid Wikimedia site is found, return blank so that whatever the input
    -- was can be percent-de-encoded.
    if not host_id then
        return false
    end
    
    -- Find the language.
    local lang = mw.ustring.match(host, "^(.-)%.") -- Find the text before the first period.
    local domain_lang = mw.ustring.match(interwiki_table[host_id].domain, "^(.-)%.")
    -- Return false if the text isn't a valid language code or if there's already a language code in the host name.
    if not ( lang and mw.language.isSupportedLanguage(lang) ) or mw.language.isSupportedLanguage(domain_lang) then
        lang = false
    end
    
    -- Check if the language is the same as the current language.
    local current_host = mw.uri.new(mw.title.getCurrentTitle():fullUrl()).host
    local same_lang
    if lang == mw.ustring.match(current_host, "^(.-)%.") then
        same_lang = true
    end
    
    -- If the url points to a page on the same site as the module is invoked from,
    -- return false, as there is no need for an interwiki prefix in this case.
    if host == current_host then
        return false, host_id
    end
    
    -- Check if the project is the same as the current project (but in a different language).
    local current_host_id, same_host
    for i,v in ipairs(interwiki_table) do
        if mw.ustring.match(current_host, interwiki_table[i].domain) then
            current_host_id = i
            break
        end
    end
    if current_host_id == host_id then
        same_host = true
    end
    
    -- Return the interwiki prefix, omitting the language or the project code if
    -- it is not necessary.
    local project = interwiki_table[host_id].iw_prefix
    if same_lang then
        return project, host_id
    elseif same_host then
        return lang, host_id
    elseif not lang then -- If the language code is bad but the rest of the host name is ok.
        return false
    else
        return project .. ":" .. lang, host_id
    end   
end

local function _urlToWiki(args)
    url = args[1] or error("No URL specified")
    url = mw.ustring.match(url, "^%s*(.*%S)") or ""  -- Trim whitespace.
    url = mw.uri.new(url)
    
    -- Get the interwiki prefix.
    local interwiki, host_id = convertInterwiki(url)
    
    -- Get the page title.
    local pagetitle, title_prefix
    if host_id then
        title_prefix = interwiki_table[host_id].title_prefix
    end
    -- If the URL path starts with the title prefix in the interwiki table, use that to get the title.
    if title_prefix and mw.ustring.sub(url.path, 1, mw.ustring.len(title_prefix)) == title_prefix then
        pagetitle = mw.ustring.sub(url.path, mw.ustring.len(title_prefix) + 1, -1)
    -- Else, if the URL is a history "index.php", use url.query.title. Check for title_prefix
    -- in case the URL isn't of a Wikimedia site.
    elseif title_prefix and mw.ustring.match(url.path, "index%.php") and url.query.title then
        pagetitle = url.query.title
    -- Otherwise, use the whole URL as the title.
    else
        pagetitle = tostring(url)
    end
    
    -- Get the fragment and pre-process percent-encoded characters.
    local fragment = url.fragment
    if fragment then
        fragment = mw.ustring.gsub(fragment, "%.([0-9A-F][0-9A-F])", "%%%1")
    end
    
    -- Assemble the wikilink.
    local wikitext = pagetitle
    if interwiki then
        wikitext = interwiki .. ":" .. wikitext
    end
    if fragment and not (args.section == "no") then
        wikitext = wikitext .. "#" .. fragment
    end
    
    -- Decode percent-encoded characters and convert underscores to spaces.
    wikitext = mw.uri.decode(wikitext, "WIKI")
    
    return wikitext
end

function p.urlToWiki(frame)
    local args
    if frame == mw.getCurrentFrame() then
        -- We're being called via #invoke. If the invoking template passed any args, use
        -- them. Otherwise, use the args that were passed into the template.
        args = frame:getParent().args
        for k, v in pairs(frame.args) do
            args = frame.args
            break
        end
    else
        -- We're being called from another module or from the debug console, so assume
        -- the args are passed in directly.
        args = frame
    end
    return _urlToWiki(args)
end

return p