Jump to content

Module:User:Huhu9001/000

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Huhu9001 (talk | contribs) at 09:37, 30 January 2020. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local export = {}

--local m_ja = require('Module:ja')
local m_ja = {
    hira_to_kata = function(s)
        return (mw.ustring.gsub(s, '[ぁ-ゖ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
    end,
    kata_to_hira = function(s)
        return (mw.ustring.gsub(s, '[ァ-ヶ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
    end,
}

-- see also Template:ja-r
-- meant to be called from another module
-- "options.try_match == true": Return "nil, (error information)" instead of launching an error.
function export.parse_text(term, kana, options)
	options = options or {}

	local err_info
	local pattern_kana = 'ぁ-ゖァ-ヶ' -- signs subject to hira-kata matching
	local pattern_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-𯨟0-9A-Za-z〆' -- signs that can have ruby, but not spaces
	local pattern_rubiable_probable = 'A-Za-z0-9α-ωΑ-Ω' -- signs that can have both ruby and spaces
	local pattern_mute_probable = '%.゠・' -- signs that may appear in term, but not kana

	-- Remove romaji markup
	kana = kana:gsub('[%^%-%.]', '') -- remove '^', '-', '.', preserve '%', ' '

	-- Remove links: [[A|B]] -> B, [[C]] -> C
	-- Create the link map
	-- e.g. "[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]"
	local function _parse_link(s)
		local t = {text = {}, linkto = {}}
		local lt
		local i1, i2
		local i_o = 1
		local i_n = s:find('%[%[', i_o)
		while i_n do
			i1, i2 = s:find('%[%[', i_n + 2), s:find('%]%]', i_n + 2)
			if not i2 then break end
			while i1 and i1 < i2 do
				i_n = i1
				i1 = s:find('%[%[', i_n + 2)
			end
			if i_o < i_n then table.insert(t.text, s:sub(i_o, i_n - 1)) end
			if i_n + 2 < i2 then
				lt = s:sub(i_n + 2, i2 - 1)
				i1 = lt:find('|')
				if i1 then
					table.insert(t.text, lt:sub(i1 + 1))
					t.linkto[#t.text] = lt:sub(1, i1 - 1)
				else
					table.insert(t.text, lt)
					t.linkto[#t.text] = lt
				end
			end
			i_o = i2 + 2
			i_n = s:find('%[%[', i_o)
		end
		if i_o <= #s then table.insert(t.text, s:sub(i_o)) end
		return t
	end
	local link_map = _parse_link(term:gsub('%%', '')) -- remove '%'
	for i, v in ipairs(link_map.text) do
		link_map.text[i] = mw.ustring.gsub(v, '()( +)()', function(pos1, m1, pos2)
			local char_1, char_2
			if pos1 > 1 then
				char_1 = mw.ustring.sub(v, pos1 - 1, pos1 - 1)
			elseif i > 1 then
				char_1 = mw.ustring.sub(link_map.text[i - 1], -1)
			else
				return m1
			end
			if pos2 <= #v then
				char_2 = mw.ustring.sub(v, pos2, pos2)
			elseif i < #link_map.text then
				char_2 = mw.ustring.sub(link_map.text[i + 1], 1, 1)
			else
				return m1
			end
			if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
				return ''
			else
				return m1
			end
		end)
        if link_map.linkto[i] then
            link_map.linkto[i] = mw.ustring.gsub(link_map.linkto[i], '()( +)()', function(pos1, m1, pos2)
                local char_1, char_2
                char_1 = mw.ustring.sub(link_map.linkto[i], pos1 - 1, pos1 - 1)
                char_2 = mw.ustring.sub(link_map.linkto[i], pos2, pos2)
                if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
    				return ''
    			else
    				return m1
    			end
            end)
        end
	end -- remove space between kanji and kana
	-- link_map = {text = {'アラン・ポー', 'の', '推理小説'}, linkto = {'エドガー・アラン・ポー', nil, '推理小説'}}

	-- Texts for ruby match
	local plain_term = mw.text.split(table.concat(_parse_link(term).text), '%%')
	local plain_kana = mw.text.split(table.concat(_parse_link(kana).text), '%%')
	if #plain_term ~= #plain_kana then
		err_info = 'Unequal numbers of the separator "%" in both forms.'
		if options.try_match then return nil, err_info else error(err_info) end
	end

	-- Create the ruby map
	-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
	-- ("ぽお" is not allowed)
	local ruby_map = {text = {}, ruby = {}}
	for i, plain_term_i in ipairs(plain_term) do
		local pattern_ruby = {}
		local id1, id2, after_k
		local char_s
		id2 = 1
		while true do
			id1 = id2
			char_s = mw.ustring.sub(plain_term_i, id1, id1)
			if mw.ustring.find(char_s, '^['..pattern_kanji_probable..']$') then
				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kanji_probable..']', id1)
				if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
				if not id2 then break end
				after_k = true
			elseif mw.ustring.find(char_s, '^['..pattern_rubiable_probable..']$') then
				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_rubiable_probable..']', id1)
				if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
				if not id2 then break end
				after_k = false
			elseif mw.ustring.find(char_s, '^['..pattern_kana..']$') then
				id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kana..']', id1)
                table.insert(pattern_ruby, '( ?' .. mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
                    if mw.ustring.find(m1, '^[ヶゖケ]$') then
                        return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "かがこカガコ] ?"
                    else
                        return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "] ?"
                    end
                end) .. ')')
                if not id2 then break end
                after_k = true
			elseif char_s == ' ' then
                id2 = mw.ustring.find(plain_term_i, '[^ ]', id1)
                if id2 and after_k and mw.ustring.find(mw.ustring.sub(plain_term_i, id2, id2), '^['..pattern_kanji_probable..pattern_kana..']$') then
                    table.insert(pattern_ruby, mw.ustring.sub(plain_term_i, id1, id2 - 1)) -- remove space between kanji and kana (by not matching it)
                else
                    table.insert(pattern_ruby, '(' .. mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1) .. ')')
                end
                if not id2 then break end
                after_k = false
			else
				id2 = mw.ustring.find(plain_term_i, '['..pattern_kanji_probable..pattern_rubiable_probable..pattern_kana..' ]', id1)
                char_s = mw.text.split(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '')
                for _char_i, _char in ipairs(char_s) do
                    if char_s[_char_i]:find('^[%(%)%.%%%+%-%*%?%[%]%^%$]$') then
                        _char = '%' .. _char
                    end
                    if mw.ustring.find(char_s[_char_i], '^['..pattern_mute_probable..']$') then
                        _char = '[' .. _char .. ' -]?'
                    end
                    char_s[_char_i] = _char
                end
                table.insert(pattern_ruby, '('..table.concat(char_s)..')')
                if not id2 then break end
                after_k = false
			end
		end
		local pattern_ruby_s = table.concat(pattern_ruby)
		-- 'アラン・ポーの推理 小説' to '( ?[あア] ?[らラ] ?[んン] ?)([・ -]?)( ?[ぽポ] ?)(ー)( ?[のノ] ?)(..-) (..-)'
		-- Excute matching
		local ruby_map_i_ruby = {mw.ustring.match(plain_kana[i], '^'..pattern_ruby_s..'$')}
		if #ruby_map_i_ruby > 0 then
			local ruby_map_i_text = {mw.ustring.match(plain_term_i, '^'..pattern_ruby_s..'$')}
			local n_match = 0
			for _, pat in ipairs(pattern_ruby) do
				if pat:sub(1, 1) == '(' then
					n_match = n_match + 1
					if pat == '(..-)' then
						table.insert(ruby_map.text, ruby_map_i_text[n_match])
						ruby_map.ruby[#ruby_map.text] = ruby_map_i_ruby[n_match]:gsub(' ', '')  -- remove spaces from ruby
					else
						table.insert(ruby_map.text, ruby_map_i_text[n_match])
					end
				end
			end
		else
			if options.forced then
			else
				err_info = 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".'
				err_info = 'Can not match:' .. plain_term_i .. ', ' .. plain_kana[i] .. ', ' .. pattern_ruby_s
				if options.try_match then return nil, err_info else error(err_info) end
			end
		end
	end
	-- ruby_map = {text = {'アラン・ポーの', '推理', '小説'}, ruby = {nil, 'すいり', 'しょうせつ'}}

	-- Merge the ruby and link map
    local r = {}
    local id_l, id_r = 0, 0
    local len_cut = 0
    local len_cut_old = 0
	while true do
        if len_cut_old <= 0 then
            id_l = id_l + 1
            if id_l > #link_map.text then break end
            len_cut = len_cut + #link_map.text[id_l]
        end
        if len_cut_old >= 0 then
            id_r = id_r + 1
            if id_r > #ruby_map.text then break end
            len_cut = len_cut - #ruby_map.text[id_r]
        end
        table.insert(r, {
            text = len_cut < 0 and link_map.text[id_l]:sub(math.min(0, -len_cut_old)) or ruby_map.text[id_r]:sub(math.min(0, len_cut_old)),
            linkto = link_map.linkto[id_l],
            ruby = ruby_map.ruby[id_r],
        })
        if len_cut_old < 0 then r[#r - 1].ruby_next = r[#r] end
        len_cut_old = len_cut
	end
    --[[
    r = {
        {text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
        {text = 'の'},
        {text = '推理', linkto = '推理小説', ruby = 'すいり'},
        {text = '小説', linkto = '推理小説', ruby = 'しょうせつ'},
    }
    ]]
    return r
end

function export.to_element(ruby_map, options)
    options = options or {}

    local function _fr(text, linkto, ruby, back, front, ruby_out)
		local lb, lm, lf = '[[', '|', ']]'
		local rb, rm, rf = '<ruby>', '<rp>(</rp><rt>', '</rt><rp>)</rp></ruby>'
		if ruby_out then
			if linkto and text ~= '' then
				text = lb .. linkto .. lm .. text .. lf
			end
			if ruby then
				text = (back and rb or '') .. text .. (front and rm .. ruby .. rf or '')
			end
		else
			if ruby and text ~= '' then
				text = rb .. text .. rm .. ruby .. rf
			end
			if linkto then
				text = (back and lb .. linkto .. lm or '') .. text .. (front and lf or '')
			end
		end
		return text
	end

    local text = {}
    local text_f
    local id = 1
    local id2, eff_f, link_cur
    while id <= #ruby_map do
        if ruby_map[id].ruby ~= nil or ruby_map[id].linkto ~= nil then
            id2 = id + 1
            eff_f = false
            text_f = {}
            if id2 <= #ruby_map and ruby_map[id].ruby_next == ruby_map[id2] then
                id2 = id2 + 1
                while id2 <= #ruby_map and ruby_map[id2 - 1].ruby_next == ruby_map[id2] do id2 = id2 + 1 end
                for i = id, id2 - 1 do
                    table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, true))
                    eff_f = eff_f or ruby_map[i].text ~= ''
                end
            else
                link_cur = ruby_map[id].linkto
                while id2 <= #ruby_map and not ruby_map[id2].ruby_next and ruby_map[id2].linkto == link_cur do id2 = id2 + 1 end
                for i = id, id2 - 1 do
                    table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, false))
                    eff_f = eff_f or ruby_map[i].text ~= ''
                end
            end
            if eff_f then table.insert(text, table.concat(text_f)) end
            id = id2
        else
            table.insert(text, ruby_map[id].text)
            id = id + 1
        end
    end

    return table.concat(text)
end

function export.show()
	return table.concat({
        '[[Module:User:Huhu9001/000]]',
        export.to_element(export.parse_text('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ')),
        export.to_element(export.parse_text('(This is a pen.)', '(ディス イズ ア ペン.)')),
        export.to_element(export.parse_text('[[駄洒]]落&駄[[洒落]]', 'だじゃれ&だじゃれ')),
    }, '\n----\n')
end

return export