Jump to content

Module:User:Huhu9001/000

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Huhu9001 (talk | contribs) at 18:34, 1 February 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local export = {}

--local m_ja = require('Module:ja')
local function str_hira_to_kata(s)
    return (mw.ustring.gsub(s, '[ぁ-ゖ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
end
local function str_kata_to_hira(s)
    return (mw.ustring.gsub(s, '[ァ-ヶ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
end

local function str_ucompare(s1, s2)
    local result = {}

    local string1, string2
    local len1, len2
    local id1, id2

    if #s1 <= #s2 then --make string1 the shorter string
        string1, string2 = s1, s2
        len1, len2 = #s1, #s2
        id1, id2 = 1, 2
    else
        string1, string2 = s2, s1
        len1, len2 = #s2, #s1
        id1, id2 = 2, 1
    end

	-- "holes": a different part of the two strings
	-- try to find where are the "holes"
    local holes = {}
    local holes_number = 0
    holes[0] = 0
    local function next_holes()
		-- e.g. 'abc'
		-- -> '?bc'
		-- -> 'a?c'
		-- -> 'ab?'
		-- -> '??c'
		-- -> '?b?'
		-- -> 'a??'
		-- -> '???'
        local hnm = holes_number
        local hnmv
        while hnm > 0 do
            hnmv = holes[hnm]
            if hnmv + holes_number < len1 + hnm then
                for i = 1, holes_number + 1 - hnm do
                    holes[hnm + i - 1] = hnmv + i
                end
                return false
            else
                hnm = hnm - 1
            end
        end
        holes_number = holes_number + 1
        for i = 1, holes_number do
            holes[i] = i
        end
        return true
    end

    local string1_fragments
    local string1_fragment
    local string1_pattern
    local matched1, matched2
    local match_success = 0 -- 0 means no result yet; >0 means found result with (holes_number+1-match_success) "holes"
	local n_holes_connected
    while holes_number <= len1 do
        string1_fragments = {}
		n_holes_connected = 1
        for i = 1, holes_number do
            string1_fragment = mw.ustring.sub(string1, holes[i - 1] + 1, holes[i] - 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
            if string1_fragment ~= '' then
				table.insert(string1_fragments, '(' .. string1_fragment .. ')')
            elseif i == 1 then
                table.insert(string1_fragments, string1_fragment)
			else
				n_holes_connected = n_holes_connected + 1 -- if the two "holes" are connected reduce the number of "holes" by 1
			end
        end
		if match_success <= n_holes_connected then
            string1_fragment = mw.ustring.sub(string1, holes[holes_number] + 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
            if string1_fragment ~= '' then
				table.insert(string1_fragments, '(' .. string1_fragment .. ')')
            else
                table.insert(string1_fragments, string1_fragment)
			end
			string1_pattern = '^' .. table.concat(string1_fragments, '(..-)') .. '$'
            string2_pattern = '^' .. table.concat(string1_fragments, '(.-)') .. '$'
			matched2 = {mw.ustring.match(string2, string2_pattern)}
			if #matched2 > 0 then
				matched1 = {mw.ustring.match(string1, string1_pattern)}
				if #matched1 > 0 then
					if match_success < n_holes_connected then result = {} end
					match_success = n_holes_connected
					table.insert(result, {[id1] = matched1, [id2] = matched2})
				end
			end
		end
        if next_holes() and match_success > 0 then break end
    end
    return result
end

local function str_is_link(s)
    local pos
    if not s then return false end
    return s:find'%[%[..-%]%]' ~= nil
end

local function str_parse_link(s)
    local t = {}
    local lt
    local i1, i2
    local i_o = 1
    local i_n = s:find('%[%[', i_o)
    while i_n do
        i1, i2 = s:find('%[%[', i_n + 1), s:find('%]%]', i_n + 2)
        if not i2 then break end
        while i1 and i1 < i2 do
            i_n = i1
            i1 = s:find('%[%[', i_n + 1)
        end
        if i_o < i_n then table.insert(t, {
            text = s:sub(i_o, i_n - 1),
        }) end
        if i_n + 2 < i2 then
            lt = s:sub(i_n + 2, i2 - 1)
            i1 = lt:find('|')
            if i1 then
                if i1 + 1 <= #lt then table.insert(t, {
                    text = lt:sub(i1 + 1),
                    linkto = i1 > 1 and lt:sub(1, i1 - 1) or nil,
                }) end
            else
                table.insert(t, {
                    text = lt,
                    linkto = lt,
                })
            end
        end
        i_o = i2 + 2
        i_n = s:find('%[%[', i_o)
    end
    if i_o <= #s then table.insert(t, {
        text = s:sub(i_o),
    }) end
    return t
end

local function map_merge(link_map, ruby_map)
    local r = {}
    local id_l, id_r = 0, 0
    local len_cut = 0
    local len_cut_old = 0
	while true do
        if len_cut_old <= 0 then
            id_l = id_l + 1
            if id_l > #link_map then break end
            len_cut = len_cut + #link_map[id_l].text
        end
        if len_cut_old >= 0 then
            id_r = id_r + 1
            if id_r > #ruby_map then break end
            len_cut = len_cut - #ruby_map[id_r].text
        end
        table.insert(r, {
            text = len_cut < 0 and link_map[id_l].text:sub(math.min(0, -len_cut_old)) or ruby_map[id_r].text:sub(math.min(0, len_cut_old)),
            linkto = link_map[id_l].linkto,
            ruby = ruby_map[id_r].ruby,
        })
        if len_cut_old < 0 then r[#r - 1].ruby_next = r[#r] end
        len_cut_old = len_cut
	end
    return r
end

function export.to_text(ruby_map, options)
    local r = {}
    for _, v in ipairs(ruby_map) do
        table.insert(r, v.text)
    end
    return table.concat(r)
end

-- "options.markup": Use custom markups. See below.
-- Note: If custom markups are used, the conversion is very likely irreversible.
-- "options.ruby_link = nil": Use original link.
-- "options.ruby_link = ''": All rubies point to no link.
-- "options.ruby_link = (a non-empty string)": All rubies point to this link.
function export.to_markup(ruby_map, options)
    options = options or {}
    omarkup = options.markup or {}

    -- Custom markups
    local lb = omarkup.link_border_left or '[['
    local lm = omarkup.link_border_middle or '|'
    local lf = omarkup.link_border_right or ']]'
    local rb = omarkup.ruby_border_left or '['
    local rm = omarkup.ruby_border_middle or ']('
    local rf = omarkup.ruby_border_right or ')'

    local function _fr(text, linkto, ruby, back, front, ruby_out)
		if ruby_out then
			if linkto and text ~= '' then
				text = lb .. linkto .. lm .. text .. lf
			end
			if ruby then
				text = (back and rb or '') .. text .. (front and rm .. ruby .. rf or '')
			end
		else
			if ruby and text ~= '' then
				text = rb .. text .. rm .. ruby .. rf
			end
			if linkto then
				text = (back and lb .. linkto .. lm or '') .. text .. (front and lf or '')
			end
		end
		return text
	end

    local text = {}
    local text_f
    local id = 1
    local id2, eff_f, link_cur
    if options.ruby_link then
        local ruby_link
        while id <= #ruby_map do
            if ruby_map[id].ruby ~= nil then
                id2 = id + 1
                eff_f = false
                text_f = {}
                while id2 <= #ruby_map and ruby_map[id2 - 1].ruby_next == ruby_map[id2] do id2 = id2 + 1 end
                for i = id, id2 - 1 do
                    ruby_link = options.ruby_link ~= '' and '[[' .. options.ruby_link .. '|' .. ruby_map[i].ruby .. ']]' or ruby_map[i].ruby
                    table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_link, i == id, i == id2 - 1, true))
                    eff_f = eff_f or ruby_map[i].text ~= ''
                end
                if eff_f then table.insert(text, table.concat(text_f)) end
                id = id2
            else
                table.insert(text, _fr(ruby_map[id].text, ruby_map[id].linkto, nil, nil, nil, true))
                id = id + 1
            end
        end
    else
        while id <= #ruby_map do
            if ruby_map[id].ruby ~= nil or ruby_map[id].linkto ~= nil then
                id2 = id + 1
                eff_f = false
                text_f = {}
                if id2 <= #ruby_map and ruby_map[id].ruby_next == ruby_map[id2] then
                    id2 = id2 + 1
                    while id2 <= #ruby_map and ruby_map[id2 - 1].ruby_next == ruby_map[id2] do id2 = id2 + 1 end
                    for i = id, id2 - 1 do
                        table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, true))
                        eff_f = eff_f or ruby_map[i].text ~= ''
                    end
                elseif not str_is_link(ruby_map[id].ruby) then
                    link_cur = ruby_map[id].linkto
                    while id2 <= #ruby_map and not ruby_map[id2].ruby_next and ruby_map[id2].linkto == link_cur and not str_is_link(ruby_map[id2].ruby) do id2 = id2 + 1 end
                    for i = id, id2 - 1 do
                        table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, false))
                        eff_f = eff_f or ruby_map[i].text ~= ''
                    end
                else
                    table.insert(text_f, _fr(ruby_map[id].text, ruby_map[id].linkto, ruby_map[id].ruby, true, true, true))
                    eff_f = ruby_map[id].text ~= ''
                end
                if eff_f then table.insert(text, table.concat(text_f)) end
                id = id2
            else
                table.insert(text, ruby_map[id].text)
                id = id + 1
            end
        end
    end

    return table.concat(text)
end

-- The options are the same as "function export.to_markup"
function export.to_element(ruby_map, options)
    options = options or {}
    omarkup = options.markup or {}

    local markup1 = options.markup
    local markup2 = {
        link_border_left = omarkup.link_border_left or '[[',
        link_border_middle = omarkup.link_border_middle or '|',
        link_border_right = omarkup.link_border_right or ']]',
        ruby_border_left = omarkup.ruby_border_left or '<ruby>',
        ruby_border_middle = omarkup.ruby_border_middle or '<rp>(</rp><rt>',
        ruby_border_right = omarkup.ruby_border_right or '</rt><rp>)</rp></ruby>',
    }
    options.markup = markup2
    local r = export.to_markup(ruby_map, options)
    options.markup = markup1
    return r
end

function export.parse_markup(markup, options)
    local ruby = {}
    local link_map = str_parse_link(markup:gsub('(%b[])(%b())', function(m1, m2)
        table.insert(ruby, m2:sub(2, -2))
        return m1:sub(2, -2)
    end))
    local plain_text = export.to_text(str_parse_link(markup))

    local ruby_map = {}
    local p0 = 1
    local ruby_n = 1
    local s_text, s_ruby
    plain_text:gsub('()(%b[])(%b())()', function(p1, m1, m2, p2)
        s_text = plain_text:sub(p0, p1 - 1)
        if s_text ~= '' then table.insert(ruby_map, {text = s_text}) end
        s_text = m1:sub(2, -2)
        s_ruby = ruby[ruby_n]
        if s_text ~= '' then
            table.insert(ruby_map, {
                text = s_text,
                ruby = s_ruby ~= '' and s_ruby or nil,
            })
        end
        p0 = p2
        ruby_n = ruby_n + 1
    end)
    s_text = plain_text:sub(p0)
    if s_text ~= '' then table.insert(ruby_map, {text = s_text}) end

    return map_merge(link_map, ruby_map)
end

-- "options.try == nil": Lauch an error when the initial match failed.
-- "options.try == 'return'": Return "nil, (error information)" when the initial match failed.
-- "options.try == 'force'": Try every possible pattern when the initial match failed.
-- "options.space == nil": Remove spaces between kana or kanji but preserve elsewhere.
-- "options.space == 'all'": Preserve all spaces.
-- "options.space == 'none'": Remove all spaces.
function export.parse_text(term, kana, options)
	options = options or {}

	local pattern_kana = 'ぁ-ゖァ-ヶ' -- signs subject to hira-kata matching
	local pattern_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-𯨟0-9A-Za-z〆' -- signs that can have ruby, but not spaces
	local pattern_rubiable_probable = 'A-Za-z0-9α-ωΑ-Ω' -- signs that can have both ruby and spaces
	local pattern_mute_probable = '%.゠・' -- signs that may appear in term, but not kana

	-- Remove links: [[A|B]] -> B, [[C]] -> C
	-- Create the link map
	-- e.g. "[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]"
	local link_map = str_parse_link(term:gsub('%%', '')) -- remove '%'
    if not kana then return link_map end
	--[[link_map = {
        {text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
        {text = 'の'},
        {text = '推理小説', linkto = '推理小説'},
    }]]

	-- Remove romaji markup
	kana = kana:gsub('[%^%-%.]', '') -- remove '^', '-', '.', preserve '%', ' '

    -- Create the ruby map
	-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
	-- ("ぽお" is not allowed)
	local ruby_map = {}
    local plain_term_raw = export.to_text(str_parse_link(term))
    local plain_kana_raw = export.to_text(str_parse_link(kana))
	local plain_term = mw.text.split(plain_term_raw, '%%')
	local plain_kana = mw.text.split(plain_kana_raw, '%%')
	if #plain_term == #plain_kana then
        for i, plain_term_i in ipairs(plain_term) do
    		local pattern_ruby = {}
    		local id1, id2
    		local char_s
    		id2 = 1
    		while true do
    			id1 = id2
    			char_s = mw.ustring.sub(plain_term_i, id1, id1)
    			if mw.ustring.find(char_s, '^['..pattern_kanji_probable..pattern_rubiable_probable..']$') then
    				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kanji_probable..pattern_rubiable_probable..']', id1)
    				table.insert(pattern_ruby, '(..-)')
    				if not id2 then break end
    			elseif mw.ustring.find(char_s, '^['..pattern_kana..']$') then
    				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kana..']', id1)
                    table.insert(pattern_ruby, '( ?' .. mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
                        if mw.ustring.find(m1, '^[ヶゖケ]$') then
                            return "[" .. str_kata_to_hira(m1) .. str_hira_to_kata(m1) .. "かがこカガコ] ?"
                        else
                            return "[" .. str_kata_to_hira(m1) .. str_hira_to_kata(m1) .. "] ?"
                        end
                    end) .. ')')
                    if not id2 then break end
    			else
    				id2 = mw.ustring.find(plain_term_i, '['..pattern_kanji_probable..pattern_rubiable_probable..pattern_kana..']', id1)
                    table.insert(pattern_ruby, '('..mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
                        local m1_m = m1
                        if m1:find('^[%(%)%.%%%+%-%*%?%[%]%^%$]$') then m1_m = '%' .. m1_m end
                        if mw.ustring.find(m1, '^['..pattern_mute_probable..']$') then m1_m = '[' .. m1_m .. ' -]?' end
                        return m1_m
                    end)..')')
                    if not id2 then break end
    			end
    		end
    		local pattern_ruby_s = table.concat(pattern_ruby)
    		-- 'アラン・ポーの推理 小説' to '( ?[あア] ?[らラ] ?[んン] ?)([・ -]?)( ?[ぽポ] ?)(ー)( ?[のノ] ?)(..-)( )(..-)'
    		-- Excute matching
    		local ruby_map_i_ruby = {mw.ustring.match(plain_kana[i], '^'..pattern_ruby_s..'$')}
    		if #ruby_map_i_ruby > 0 then
    			local ruby_map_i_text = {mw.ustring.match(plain_term_i, '^'..pattern_ruby_s..'$')}
    			local n_match = 0
    			for _, pat in ipairs(pattern_ruby) do
    				if pat:sub(1, 1) == '(' then
    					n_match = n_match + 1
                        table.insert(ruby_map, {
                            text = ruby_map_i_text[n_match],
                            ruby = pat == '(..-)' and ruby_map_i_ruby[n_match]:gsub(' ', '') or nil, -- remove spaces from ruby
                        })
    				end
    			end
    		elseif options.try == 'force' then
                link_map = str_parse_link(term:gsub('%%', ''))
                local forced_result = str_ucompare(plain_term_i, plain_kana[i])[1]
                for ii, vv in ipairs(forced_result[1]) do
                    table.insert(ruby_map, {
                        text = vv,
                        ruby = forced_result[2][ii] ~= vv and forced_result[2][ii] or nil,
                    })
                end
            elseif options.try == 'return' then
                return nil, 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".'
            else
                error('Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".')
    		end
    	end
    elseif options.try == 'force' then
        link_map = str_parse_link(term)
        local forced_result = str_ucompare(plain_term_raw, plain_kana_raw)[1]
        for i, v in ipairs(forced_result[1]) do
            table.insert(ruby_map, {
                text = v,
                ruby = forced_result[2][i] ~= v and forced_result[2][i] or nil,
            })
        end
    elseif options.try == 'return' then
		return nil, 'Unequal numbers of the separator "%" in both forms.'
    else
		error('Unequal numbers of the separator "%" in both forms.')
	end
	--[[ruby_map = {
        {text = 'アラン・ポーの'},
        {text = '推理', ruby = 'すいり'},
        {text = '小説', ruby = 'しょうせつ'},
    }]]

    local r = map_merge(link_map, ruby_map, options)
	-- Merge the ruby and link map
    --[[return {
        {text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
        {text = 'の'},
        {text = '推理', linkto = '推理小説', ruby = 'すいり'},
        {text = '小説', linkto = '推理小説', ruby = 'しょうせつ'},
    }]]
    if options.space == 'none' then
        for _, v in ipairs(r) do
            v.text = v.text:gsub(' ', '')
        end
    elseif options.space == 'all' then
    else
        for i, v in ipairs(r) do
    		v.text = mw.ustring.gsub(v.text, '()( +)()', function(pos1, m1, pos2)
    			local char_1, char_2
    			if pos1 > 1 then
    				char_1 = mw.ustring.sub(v.text, pos1 - 1, pos1 - 1)
    			elseif i > 1 then
    				char_1 = mw.ustring.sub(r[i - 1].text, -1)
    			else
    				return m1
    			end
    			if pos2 <= #v then
    				char_2 = mw.ustring.sub(v.text, pos2, pos2)
    			elseif i < #r then
    				char_2 = mw.ustring.sub(r[i + 1].text, 1, 1)
    			else
    				return m1
    			end
    			if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
    				return ''
    			else
    				return m1
    			end
    		end)
    	end
    end
    return r
end

function export.show(frame)
    local function _f(...) return export.to_element(export.parse_text(...)) end
    local function _ff(...) return export.to_element(export.parse_markup(...)) end
	return table.concat({
        '[[Module:User:Huhu9001/000]]',
        _f('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ'),
        _f('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ', {space = 'all'}),
        _f('(This is a pen.)', '(ディス イズ ア ペン.)'),
        _f('[This is a pen.]', '[ディス イズ ア ペン.]', {space = 'none'}),
        _f('[[駄洒]]落&駄[[洒落]]', 'だじゃれ&だじゃれ'),
        _f('用ゐる', 'もちいる', {try = 'force'}),
        _f('スイーツ(笑)', 'スイーツかっこわらい', {try = 'force'}),
        _f('100%', '100パーセント', {try = 'force'}),
        _ff(table.concat({
            'apple[b[[Alpha|a]]]([[Beta|β]]α)nana',
            'apple[[Banana|[ba](βα)nana]]',
            'apple[[[Banana|ba]]]([[Beta|β]]α)[[Banana|nana]]',
        }, ', ')),
    }, '\n----\n')
end

return export