Module:User:Huhu9001/000
Appearance
local export = {}
--local m_ja = require('Module:ja')
local m_ja = {
hira_to_kata = function(s)
return (mw.ustring.gsub(s, '[ぁ-ゖ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
end,
kata_to_hira = function(s)
return (mw.ustring.gsub(s, '[ァ-ヶ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
end,
}
-- see also Template:ja-r
-- meant to be called from another module
-- "options.try_match == true": Return "nil, (error information)" instead of launching an error.
function export.parse_text(term, kana, options)
options = options or {}
local err_info
local pattern_kana = 'ぁ-ゖァ-ヶ' -- signs subject to hira-kata matching
local pattern_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-0-9A-Za-z〆' -- signs that can have ruby, but not spaces
local pattern_rubiable_probable = 'A-Za-z0-9α-ωΑ-Ω' -- signs that can have both ruby and spaces
local pattern_mute_probable = '%.゠・' -- signs that may appear in term, but not kana
-- Remove romaji markup
kana = kana:gsub('[%^%-%.]', '') -- remove '^', '-', '.', preserve '%', ' '
-- Remove links: [[A|B]] -> B, [[C]] -> C
-- Create the link map
-- e.g. "[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]"
local function _parse_link(s)
local t = {text = {}, linkto = {}}
local lt
local i1, i2
local i_o = 1
local i_n = s:find('%[%[', i_o)
while i_n do
i1, i2 = s:find('%[%[', i_n + 2), s:find('%]%]', i_n + 2)
if not i2 then break end
while i1 and i1 < i2 do
i_n = i1
i1 = s:find('%[%[', i_n + 2)
end
if i_o < i_n then table.insert(t.text, s:sub(i_o, i_n - 1)) end
if i_n + 2 < i2 then
lt = s:sub(i_n + 2, i2 - 1)
i1 = lt:find('|')
if i1 then
table.insert(t.text, lt:sub(i1 + 1))
t.linkto[#t.text] = lt:sub(1, i1 - 1)
else
table.insert(t.text, lt)
t.linkto[#t.text] = lt
end
end
i_o = i2 + 2
i_n = s:find('%[%[', i_o)
end
if i_o <= #s then table.insert(t.text, s:sub(i_o)) end
return t
end
local link_map = _parse_link(term:gsub('%%', '')) -- remove '%'
for i, v in ipairs(link_map.text) do
link_map.text[i] = mw.ustring.gsub(v, '()( +)()', function(pos1, m1, pos2)
local char_1, char_2
if pos1 > 1 then
char_1 = mw.ustring.sub(v, pos1 - 1, pos1 - 1)
elseif i > 1 then
char_1 = mw.ustring.sub(link_map.text[i - 1], -1)
else
return m1
end
if pos2 <= #v then
char_2 = mw.ustring.sub(v, pos2, pos2)
elseif i < #link_map.text then
char_2 = mw.ustring.sub(link_map.text[i + 1], 1, 1)
else
return m1
end
if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
return ''
else
return m1
end
end)
if link_map.linkto[i] then
link_map.linkto[i] = mw.ustring.gsub(link_map.linkto[i], '()( +)()', function(pos1, m1, pos2)
local char_1, char_2
char_1 = mw.ustring.sub(link_map.linkto[i], pos1 - 1, pos1 - 1)
char_2 = mw.ustring.sub(link_map.linkto[i], pos2, pos2)
if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
return ''
else
return m1
end
end)
end
end -- remove space between kanji and kana
-- link_map = {text = {'アラン・ポー', 'の', '推理小説'}, linkto = {'エドガー・アラン・ポー', nil, '推理小説'}}
-- Texts for ruby match
local plain_term = mw.text.split(table.concat(_parse_link(term).text), '%%')
local plain_kana = mw.text.split(table.concat(_parse_link(kana).text), '%%')
if #plain_term ~= #plain_kana then
err_info = 'Unequal numbers of the separator "%" in both forms.'
if options.try_match then return nil, err_info else error(err_info) end
end
-- Create the ruby map
-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
-- ("ぽお" is not allowed)
local ruby_map = {text = {}, ruby = {}}
for i, plain_term_i in ipairs(plain_term) do
local pattern_ruby = {}
local id1, id2, after_k
local char_s
id2 = 1
while true do
id1 = id2
char_s = mw.ustring.sub(plain_term_i, id1, id1)
if mw.ustring.find(char_s, '^['..pattern_kanji_probable..']$') then
id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kanji_probable..']', id1)
if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
if not id2 then break end
after_k = true
elseif mw.ustring.find(char_s, '^['..pattern_rubiable_probable..']$') then
id2 = mw.ustring.find(plain_term_i, '[^'..pattern_rubiable_probable..']', id1)
if pattern_ruby[#pattern_ruby] ~= '(..-)' then table.insert(pattern_ruby, '(..-)') end
if not id2 then break end
after_k = false
elseif mw.ustring.find(char_s, '^['..pattern_kana..']$') then
id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kana..']', id1)
table.insert(pattern_ruby, '( ?' .. mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
if mw.ustring.find(m1, '^[ヶゖケ]$') then
return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "かがこカガコ] ?"
else
return "[" .. m_ja.kata_to_hira(m1) .. m_ja.hira_to_kata(m1) .. "] ?"
end
end) .. ')')
if not id2 then break end
after_k = true
elseif char_s == ' ' then
id2 = mw.ustring.find(plain_term_i, '[^ ]', id1)
if id2 and after_k and mw.ustring.find(mw.ustring.sub(plain_term_i, id2, id2), '^['..pattern_kanji_probable..pattern_kana..']$') then
table.insert(pattern_ruby, mw.ustring.sub(plain_term_i, id1, id2 - 1)) -- remove space between kanji and kana (by not matching it)
else
table.insert(pattern_ruby, '(' .. mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1) .. ')')
end
if not id2 then break end
after_k = false
else
id2 = mw.ustring.find(plain_term_i, '['..pattern_kanji_probable..pattern_rubiable_probable..pattern_kana..' ]', id1)
char_s = mw.text.split(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '')
for _char_i, _char in ipairs(char_s) do
if char_s[_char_i]:find('^[%(%)%.%%%+%-%*%?%[%]%^%$]$') then
_char = '%' .. _char
end
if mw.ustring.find(char_s[_char_i], '^['..pattern_mute_probable..']$') then
_char = '[' .. _char .. ' -]?'
end
char_s[_char_i] = _char
end
table.insert(pattern_ruby, '('..table.concat(char_s)..')')
if not id2 then break end
after_k = false
end
end
local pattern_ruby_s = table.concat(pattern_ruby)
-- 'アラン・ポーの推理 小説' to '( ?[あア] ?[らラ] ?[んン] ?)([・ -]?)( ?[ぽポ] ?)(ー)( ?[のノ] ?)(..-) (..-)'
-- Excute matching
local ruby_map_i_ruby = {mw.ustring.match(plain_kana[i], '^'..pattern_ruby_s..'$')}
if #ruby_map_i_ruby > 0 then
local ruby_map_i_text = {mw.ustring.match(plain_term_i, '^'..pattern_ruby_s..'$')}
local n_match = 0
for _, pat in ipairs(pattern_ruby) do
if pat:sub(1, 1) == '(' then
n_match = n_match + 1
if pat == '(..-)' then
table.insert(ruby_map.text, ruby_map_i_text[n_match])
ruby_map.ruby[#ruby_map.text] = ruby_map_i_ruby[n_match]:gsub(' ', '') -- remove spaces from ruby
else
table.insert(ruby_map.text, ruby_map_i_text[n_match])
end
end
end
else
if options.forced then
else
err_info = 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".'
err_info = 'Can not match:' .. plain_term_i .. ', ' .. plain_kana[i] .. ', ' .. pattern_ruby_s
if options.try_match then return nil, err_info else error(err_info) end
end
end
end
-- ruby_map = {text = {'アラン・ポーの', '推理', '小説'}, ruby = {nil, 'すいり', 'しょうせつ'}}
-- Merge the ruby and link map
local r = {}
local id_l, id_r = 0, 0
local len_cut = 0
local len_cut_old = 0
while true do
if len_cut_old <= 0 then
id_l = id_l + 1
if id_l > #link_map.text then break end
len_cut = len_cut + #link_map.text[id_l]
end
if len_cut_old >= 0 then
id_r = id_r + 1
if id_r > #ruby_map.text then break end
len_cut = len_cut - #ruby_map.text[id_r]
end
table.insert(r, {
text = len_cut < 0 and link_map.text[id_l]:sub(math.min(0, -len_cut_old)) or ruby_map.text[id_r]:sub(math.min(0, len_cut_old)),
linkto = link_map.linkto[id_l],
ruby = ruby_map.ruby[id_r],
})
if len_cut_old < 0 then r[#r - 1].ruby_next = r[#r] end
len_cut_old = len_cut
end
--[[
r = {
{text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
{text = 'の'},
{text = '推理', linkto = '推理小説', ruby = 'すいり'},
{text = '小説', linkto = '推理小説', ruby = 'しょうせつ'},
}
]]
return r
end
function export.to_element(ruby_map, options)
options = options or {}
local function _fr(text, linkto, ruby, back, front, ruby_out)
local lb, lm, lf = '[[', '|', ']]'
local rb, rm, rf = '<ruby>', '<rp>(</rp><rt>', '</rt><rp>)</rp></ruby>'
if ruby_out then
if linkto and text ~= '' then
text = lb .. linkto .. lm .. text .. lf
end
if ruby then
text = (back and rb or '') .. text .. (front and rm .. ruby .. rf or '')
end
else
if ruby and text ~= '' then
text = rb .. text .. rm .. ruby .. rf
end
if linkto then
text = (back and lb .. linkto .. lm or '') .. text .. (front and lf or '')
end
end
return text
end
local text = {}
local text_f
local id = 1
local id2, eff_f, link_cur
while id <= #ruby_map do
if ruby_map[id].ruby ~= nil or ruby_map[id].linkto ~= nil then
id2 = id + 1
eff_f = false
text_f = {}
if id2 <= #ruby_map and ruby_map[id].ruby_next == ruby_map[id2] then
id2 = id2 + 1
while id2 <= #ruby_map and ruby_map[id2 - 1].ruby_next == ruby_map[id2] do id2 = id2 + 1 end
for i = id, id2 - 1 do
table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, true))
eff_f = eff_f or ruby_map[i].text ~= ''
end
else
link_cur = ruby_map[id].linkto
while id2 <= #ruby_map and not ruby_map[id2].ruby_next and ruby_map[id2].linkto == link_cur do id2 = id2 + 1 end
for i = id, id2 - 1 do
table.insert(text_f, _fr(ruby_map[i].text, ruby_map[i].linkto, ruby_map[i].ruby, i == id, i == id2 - 1, false))
eff_f = eff_f or ruby_map[i].text ~= ''
end
end
if eff_f then table.insert(text, table.concat(text_f)) end
id = id2
else
table.insert(text, ruby_map[id].text)
id = id + 1
end
end
return table.concat(text)
end
function export.show()
return table.concat({
'[[Module:User:Huhu9001/000]]',
export.to_element(export.parse_text('[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ')),
export.to_element(export.parse_text('(This is a pen.)', '(ディス イズ ア ペン.)')),
export.to_element(export.parse_text('[[駄洒]]落&駄[[洒落]]', 'だじゃれ&だじゃれ')),
}, '\n----\n')
end
return export