Jump to content

Module:User:Huhu9001/000

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Huhu9001 (talk | contribs) at 11:59, 30 January 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local export = {}

--local m_ja = require('Module:ja')
local m_ja = {
    hira_to_kata = function(s)
        return (mw.ustring.gsub(s, '[ぁ-ゖ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
    end,
    kata_to_hira = function(s)
        return (mw.ustring.gsub(s, '[ァ-ヶ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
    end,
}

local function str_ucompare(s1, s2)
    local result = {}

    local string1, string2
    local len1, len2
    local id1, id2

    if #s1 <= #s2 then --make string1 the shorter string
        string1, string2 = s1, s2
        len1, len2 = #s1, #s2
        id1, id2 = 1, 2
    else
        string1, string2 = s2, s1
        len1, len2 = #s2, #s1
        id1, id2 = 2, 1
    end

	-- "holes": a different part of the two strings
	-- try to find where are the "holes"
    local holes = {}
    local holes_number = 0
    holes[0] = 0
    local function next_holes()
		-- e.g. 'abc'
		-- -> '?bc'
		-- -> 'a?c'
		-- -> 'ab?'
		-- -> '??c'
		-- -> '?b?'
		-- -> 'a??'
		-- -> '???'
        local hnm = holes_number
        local hnmv
        while hnm > 0 do
            hnmv = holes[hnm]
            if hnmv + holes_number < len1 + hnm then
                for i = 1, holes_number + 1 - hnm do
                    holes[hnm + i - 1] = hnmv + i
                end
                return false
            else
                hnm = hnm - 1
            end
        end
        holes_number = holes_number + 1
        for i = 1, holes_number do
            holes[i] = i
        end
        return true
    end

    local string1_fragments
    local string1_fragment
    local string1_pattern
    local matched1, matched2
    local match_success = 0 -- 0 means no result yet; >0 means found result with (holes_number+1-match_success) "holes"
	local n_holes_connected
    while holes_number <= len1 do
        string1_fragments = {}
		n_holes_connected = 1
        for i = 1, holes_number do
            string1_fragment = mw.ustring.sub(string1, holes[i - 1] + 1, holes[i] - 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
            if string1_fragment ~= '' then
				table.insert(string1_fragments, '(' .. string1_fragment .. ')')
            elseif i == 1 then
                table.insert(string1_fragments, string1_fragment)
			else
				n_holes_connected = n_holes_connected + 1 -- if the two "holes" are connected reduce the number of "holes" by 1
			end
        end
		if match_success <= n_holes_connected then
            string1_fragment = mw.ustring.sub(string1, holes[holes_number] + 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
            if string1_fragment ~= '' then
				table.insert(string1_fragments, '(' .. string1_fragment .. ')')
            else
                table.insert(string1_fragments, string1_fragment)
			end
			string1_pattern = '^' .. table.concat(string1_fragments, '(..-)') .. '$'
            string2_pattern = '^' .. table.concat(string1_fragments, '(.-)') .. '$'
			matched2 = {mw.ustring.match(string2, string2_pattern)}
			if #matched2 > 0 then
				matched1 = {mw.ustring.match(string1, string1_pattern)}
				if #matched1 > 0 then
					if match_success < n_holes_connected then result = {} end
					match_success = n_holes_connected
					table.insert(result, {[id1] = matched1, [id2] = matched2})
				end
			end
		end
        if next_holes() and match_success > 0 then break end
    end
    return result
end

local function str_parse_link(s)
    local t = {text = {}, linkto = {}}
    local lt
    local i1, i2
    local i_o = 1
    local i_n = s:find('%[%[', i_o)
    while i_n do
        i1, i2 = s:find('%[%[', i_n + 2), s:find('%]%]', i_n + 2)
        if not i2 then break end
        while i1 and i1 < i2 do
            i_n = i1
            i1 = s:find('%[%[', i_n + 2)
        end
        if i_o < i_n then table.insert(t.text, s:sub(i_o, i_n - 1)) end
        if i_n + 2 < i2 then
            lt = s:sub(i_n + 2, i2 - 1)
            i1 = lt:find('|')
            if i1 then
                table.insert(t.text, lt:sub(i1 + 1))
                t.linkto[#t.text] = lt:sub(1, i1 - 1)
            else
                table.insert(t.text, lt)
                t.linkto[#t.text] = lt
            end
        end
        i_o = i2 + 2
        i_n = s:find('%[%[', i_o)
    end
    if i_o <= #s then table.insert(t.text, s:sub(i_o)) end
    return t
end

-- "options.try == nil": Lauch an error when the initial match failed.
-- "options.try == 'return'": Return "nil, (error information)" when the initial match failed.
-- "options.try == 'force'": Try every possible pattern when the initial match failed.
-- "options.space == nil": Remove spaces between kana or kanji but preserve elsewhere.
-- "options.space == 'all'": Preserve all spaces.
-- "options.space == 'none'": Remove all spaces.
function export.parse_text(term, kana, options)
	options = options or {}

	local pattern_kana = 'ぁ-ゖァ-ヶ' -- signs subject to hira-kata matching
	local pattern_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-𯨟0-9A-Za-z〆' -- signs that can have ruby, but not spaces
	local pattern_rubiable_probable = 'A-Za-z0-9α-ωΑ-Ω' -- signs that can have both ruby and spaces
	local pattern_mute_probable = '%.゠・' -- signs that may appear in term, but not kana

	-- Remove romaji markup
	kana = kana:gsub('[%^%-%.]', '') -- remove '^', '-', '.', preserve '%', ' '

	-- Remove links: [[A|B]] -> B, [[C]] -> C
	-- Create the link map
	-- e.g. "[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]"
	local link_map = str_parse_link(term:gsub('%%', '')) -- remove '%'
	if options.space ~= 'all' then
        for i, v in ipairs(link_map.text) do
    		link_map.text[i] = mw.ustring.gsub(v, '()( +)()', function(pos1, m1, pos2)
    			local char_1, char_2
    			if pos1 > 1 then
    				char_1 = mw.ustring.sub(v, pos1 - 1, pos1 - 1)
    			elseif i > 1 then
    				char_1 = mw.ustring.sub(link_map.text[i - 1], -1)
    			else
    				return m1
    			end
    			if pos2 <= #v then
    				char_2 = mw.ustring.sub(v, pos2, pos2)
    			elseif i < #link_map.text then
    				char_2 = mw.ustring.sub(link_map.text[i + 1], 1, 1)
    			else
    				return m1
    			end
    			if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
    				return ''
    			else
    				return m1
    			end
    		end)
            if link_map.linkto[i] then
                link_map.linkto[i] = mw.ustring.gsub(link_map.linkto[i], '()( +)()', function(pos1, m1, pos2)
                    local char_1, char_2
                    char_1 = mw.ustring.sub(link_map.linkto[i], pos1 - 1, pos1 - 1)
                    char_2 = mw.ustring.sub(link_map.linkto[i], pos2, pos2)
                    if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
        				return ''
        			else
        				return m1
        			end
                end)
            end
    	end
    end -- remove space between kanji and kana
	-- link_map = {text = {'アラン・ポー', 'の', '推理小説'}, linkto = {'エドガー・アラン・ポー', nil, '推理小説'}}

    -- Create the ruby map
	-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
	-- ("ぽお" is not allowed)
	local ruby_map = {text = {}, ruby = {}}
	local plain_term = mw.text.split(table.concat(str_parse_link(term).text), '%%')
	local plain_kana = mw.text.split(table.concat(str_parse_link(kana).text), '%%')
	if #plain_term == #plain_kana then
        for i, plain_term_i in ipairs(plain_term) do
    		local pattern_ruby = {}
    		local id1, id2, after_k
    		local char_s
    		id2 = 1
    		while true do
    			id1 = id2
    			char_s = mw.ustring.sub(plain_term_i, id1, id1)
    			if mw.ustring.find(char_s, '^['..pattern_kanji_probable..']$') then
    				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kanji_probable..']', id1)
    				if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
    				if not id2 then break end
    				after_k = true
    			elseif mw.ustring.find(char_s, '^['..pattern_rubiable_probable..']$') then
    				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_rubiable_probable..']', id1)
    				if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
    				if not id2 then break end
    				after_k = false
    			elseif mw.ustring.find(char_s, '^['..pattern_kana..']$') then
    				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kana..']', id1)
                    table.insert(pattern_ruby, '( ?' .. mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
                        if mw.ustring.find(m1, '^[ヶゖケ]$') then
                            return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "かがこカガコ] ?"
                        else
                            return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "] ?"
                        end
                    end) .. ')')
                    if not id2 then break end
                    after_k = true
    			elseif char_s == ' ' then
                    id2 = mw.ustring.find(plain_term_i, '[^ ]', id1)
                    if options.space ~= 'all' and id2 and after_k and mw.ustring.find(mw.ustring.sub(plain_term_i, id2, id2), '^['..pattern_kanji_probable..pattern_kana..']$') then
                        table.insert(pattern_ruby, mw.ustring.sub(plain_term_i, id1, id2 - 1)) -- remove space between kanji and kana (by not matching it)
                    else
                        table.insert(pattern_ruby, '(' .. mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1) .. ')')
                    end
                    if not id2 then break end
                    after_k = false
    			else
    				id2 = mw.ustring.find(plain_term_i, '['..pattern_kanji_probable..pattern_rubiable_probable..pattern_kana..' ]', id1)
                    char_s = mw.text.split(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '')
                    for _char_i, _char in ipairs(char_s) do
                        if char_s[_char_i]:find('^[%(%)%.%%%+%-%*%?%[%]%^%$]$') then
                            _char = '%' .. _char
                        end
                        if mw.ustring.find(char_s[_char_i], '^['..pattern_mute_probable..']$') then
                            _char = '[' .. _char .. ' -]?'
                        end
                        char_s[_char_i] = _char
                    end
                    table.insert(pattern_ruby, '('..table.concat(char_s)..')')
                    if not id2 then break end
                    after_k = false
    			end
    		end
    		local pattern_ruby_s = table.concat(pattern_ruby)
    		-- 'アラン・ポーの推理 小説' to '( ?[あア] ?[らラ] ?[んン] ?)([・ -]?)( ?[ぽポ] ?)(ー)( ?[のノ] ?)(..-) (..-)'
    		-- Excute matching
    		local ruby_map_i_ruby = {mw.ustring.match(plain_kana[i], '^'..pattern_ruby_s..'$')}
    		if #ruby_map_i_ruby > 0 then
    			local ruby_map_i_text = {mw.ustring.match(plain_term_i, '^'..pattern_ruby_s..'$')}
    			local n_match = 0
    			for _, pat in ipairs(pattern_ruby) do
    				if pat:sub(1, 1) == '(' then
    					n_match = n_match + 1
    					if pat == '(..-)' then
    						table.insert(ruby_map.text, ruby_map_i_text[n_match])
    						ruby_map.ruby[#ruby_map.text] = ruby_map_i_ruby[n_match]:gsub(' ', '')  -- remove spaces from ruby
    					else
    						table.insert(ruby_map.text, ruby_map_i_text[n_match])
    					end
    				end
    			end
    		elseif options.try == 'force' then
                link_map = str_parse_link(term:gsub('%%', ''))
                local forced_result = str_ucompare(plain_term_i, plain_kana[i])[1]
                for ii, vv in ipairs(forced_result[1]) do
                    table.insert(ruby_map.text, vv)
                    if forced_result[2][ii] ~= vv then ruby_map.ruby[#ruby_map.text] = forced_result[2][ii] end
                end
            elseif options.try == 'return' then
                return nil, 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".'
            else
                error('Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".')
    		end
    	end
    elseif options.try == 'force' then
        link_map = str_parse_link(term)
        local forced_result = str_ucompare(table.concat(str_parse_link(term).text), table.concat(str_parse_link(kana).text))[1]
        for i, v in ipairs(forced_result[1]) do
            table.insert(ruby_map.text, v)
            if forced_result[2][i] ~= v then ruby_map.ruby[#ruby_map.text] = forced_result[2][i] end
        end
    elseif options.try == 'return' then
		return nil, 'Unequal numbers of the separator "%" in both forms.'
    else
		error('Unequal numbers of the separator "%" in both forms.')
	end
	-- ruby_map = {text = {'アラン・ポーの', '推理', '小説'}, ruby = {nil, 'すいり', 'しょうせつ'}}

	-- Merge the ruby and link map
    local r = {}
    local id_l, id_r = 0, 0
    local len_cut = 0
    local len_cut_old = 0
	while true do
        if len_cut_old <= 0 then
            id_l = id_l + 1
            if id_l > #link_map.text then break end
            len_cut = len_cut + #link_map.text[id_l]
        end
        if len_cut_old >= 0 then
            id_r = id_r + 1
            if id_r > #ruby_map.text then break end
            len_cut = len_cut - #ruby_map.text[id_r]
        end
        table.insert(r, {
            text = len_cut < 0 and link_map.text[id_l]:sub(math.min(0, -len_cut_old)) or ruby_map.text[id_r]:sub(math.min(0, len_cut_old)),
            linkto = link_map.linkto[id_l],
            ruby = ruby_map.ruby[id_r],
        })
        if options.space == 'none' then r[#r].text = r[#r].text:gsub(' ', '') end
        if len_cut_old < 0 then r[#r - 1].ruby_next = r[#r] end
        len_cut_old = len_cut
	end
    --[[r = {
        {text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
        {text = 'の'},
        {text = '推理', linkto = '推理小説', ruby = 'すいり'},
        {text = '小説', linkto = '推理小説', ruby = 'しょうせつ'},
    }]]
    return r
end

function export.parse_markup(markup, options)
    options = options or {}

    local r = {}
    local p0 = 1
    local s_text, s_ruby
    markup:gsub('()(%b[])(%b())()', function(p1, m1, m2, p2)
        s_text = markup:sub(p0, p1 - 1)
        if s_text ~= '' then table.insert(r, {text = s_text}) end
        s_text, s_ruby = m1:sub(2, -2), m2:sub(2, -2)
        if s_text ~= '' then table.insert(r, {
            text = s_text,
            ruby = s_ruby ~= '' and s_ruby or nil
        }) end
        p0 = p2
    end)
    s_text = markup:sub(p0)
    if s_text ~= '' then table.insert(r, {text = s_text}) end
    return r
end

function export.to_element(ruby_map, options)
    options = options or {}

    local function _fr(text, linkto, ruby, back, front, ruby_out)
		local lb, lm, lf = '[[', '|', ']]'
		local rb, rm, rf = '<ruby>', '<rp>(</rp><rt>', '</rt><rp>)</rp></ruby>'
		if ruby_out then
			if linkto and text ~= '' then
				text = lb .. linkto .. lm .. text .. lf
			end
			if ruby then
				text = (back and rb or '') .. text .. (front and rm .. ruby .. rf or '')
			end
		else
			if ruby and text ~= '' then
				text = rb .. text .. rm .. ruby .. rf
			end
			if linkto then
				text = (back and lb .. linkto .. lm or '') .. text .. (front and lf or '')
			end
		end
		return text
	end

    local text = {}
    local text_f
    local id = 1
    local id2, eff_f, link_cur
    while id <= #ruby_map do
        if ruby_map[id].ruby ~= nil or ruby_map[id].linkto ~= nil then
            id2 = id + 1
            eff_f = false
            text_f = {}
            if id2 <= #ruby_map and ruby_map[id].ruby_next == ruby_map[id2] then
                id2 = id2 + 1
                while id2 <= #ruby_map and ruby_map[id2 - 1].ruby_next == ruby_map[id2] do id2 = id2 + 1 end
                for i = id, id2 - 1 do
                    table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, true))
                    eff_f = eff_f or ruby_map[i].text ~= ''
                end
            else
                link_cur = ruby_map[id].linkto
                while id2 <= #ruby_map and not ruby_map[id2].ruby_next and ruby_map[id2].linkto == link_cur do id2 = id2 + 1 end
                for i = id, id2 - 1 do
                    table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, false))
                    eff_f = eff_f or ruby_map[i].text ~= ''
                end
            end
            if eff_f then table.insert(text, table.concat(text_f)) end
            id = id2
        else
            table.insert(text, ruby_map[id].text)
            id = id + 1
        end
    end

    return table.concat(text)
end

function export.show()
    local function _f(...) return export.to_element(export.parse_text(...)) end
    local function _ff(...) return export.to_element(export.parse_markup(...)) end
	return table.concat({
        '[[Module:User:Huhu9001/000]]',
        _f('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ'),
        _f('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ', {space = 'all'}),
        _f('(This is a pen.)', '(ディス イズ ア ペン.)'),
        _f('(This is a pen.)', '(ディス イズ ア ペン.)', {space = 'none'}),
        _f('[[駄洒]]落&駄[[洒落]]', 'だじゃれ&だじゃれ'),
        _f('用ゐる', 'もちいる', {try = 'force'}),
        _f('スイーツ(笑)', 'スイーツかっこわらい', {try = 'force'}),
        _f('100%', '100パーセント', {try = 'force'}),
        _ff('apple[ba](βα)nana'),
    }, '\n----\n')
end

return export