Module:User:Huhu9001/000
Appearance
local export = {}
local function str_hira_to_kata(s)
return (mw.ustring.gsub(s, '[ぁ-ゖ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) + 96) end))
end
local function str_kata_to_hira(s)
return (mw.ustring.gsub(s, '[ァ-ヶ]', function(m1) return mw.ustring.char(mw.ustring.codepoint(m1) - 96) end))
end
local function str_ucompare(s1, s2)
local result = {}
local string1, string2
local len1, len2
local id1, id2
if #s1 <= #s2 then --make string1 the shorter string
string1, string2 = s1, s2
len1, len2 = #s1, #s2
id1, id2 = 1, 2
else
string1, string2 = s2, s1
len1, len2 = #s2, #s1
id1, id2 = 2, 1
end
-- "holes": a different part of the two strings
-- try to find where are the "holes"
local holes = {}
local holes_number = 0
holes[0] = 0
local function next_holes()
-- e.g. 'abc'
-- -> '?bc'
-- -> 'a?c'
-- -> 'ab?'
-- -> '??c'
-- -> '?b?'
-- -> 'a??'
-- -> '???'
local hnm = holes_number
local hnmv
while hnm > 0 do
hnmv = holes[hnm]
if hnmv + holes_number < len1 + hnm then
for i = 1, holes_number + 1 - hnm do
holes[hnm + i - 1] = hnmv + i
end
return false
else
hnm = hnm - 1
end
end
holes_number = holes_number + 1
for i = 1, holes_number do
holes[i] = i
end
return true
end
local string1_fragments
local string1_fragment
local string1_pattern
local matched1, matched2
local match_success = 0 -- 0 means no result yet; >0 means found result with (holes_number+1-match_success) "holes"
local n_holes_connected
while holes_number <= len1 do
string1_fragments = {}
n_holes_connected = 1
for i = 1, holes_number do
string1_fragment = mw.ustring.sub(string1, holes[i - 1] + 1, holes[i] - 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
if string1_fragment ~= '' then
table.insert(string1_fragments, '(' .. string1_fragment .. ')')
elseif i == 1 then
table.insert(string1_fragments, string1_fragment)
else
n_holes_connected = n_holes_connected + 1 -- if the two "holes" are connected reduce the number of "holes" by 1
end
end
if match_success <= n_holes_connected then
string1_fragment = mw.ustring.sub(string1, holes[holes_number] + 1):gsub('([%(%)%.%%%+%-%*%?%[%]%^%$])', '%%%1')
if string1_fragment ~= '' then
table.insert(string1_fragments, '(' .. string1_fragment .. ')')
else
table.insert(string1_fragments, string1_fragment)
end
string1_pattern = '^' .. table.concat(string1_fragments, '(..-)') .. '$'
string2_pattern = '^' .. table.concat(string1_fragments, '(.-)') .. '$'
matched2 = {mw.ustring.match(string2, string2_pattern)}
if #matched2 > 0 then
matched1 = {mw.ustring.match(string1, string1_pattern)}
if #matched1 > 0 then
if match_success < n_holes_connected then result = {} end
match_success = n_holes_connected
table.insert(result, {[id1] = matched1, [id2] = matched2})
end
end
end
if next_holes() and match_success > 0 then break end
end
return result
end
local function str_is_link(s)
local pos
if not s then return false end
return s:find'%[%[..-%]%]' ~= nil
end
local function str_parse_link(s)
local t = {}
local lt
local i1, i2
local i_o = 1
local i_n = s:find('%[%[', i_o)
while i_n do
i1, i2 = s:find('%[%[', i_n + 1), s:find('%]%]', i_n + 2)
if not i2 then break end
while i1 and i1 < i2 do
i_n = i1
i1 = s:find('%[%[', i_n + 1)
end
if i_o < i_n then table.insert(t, {
text = s:sub(i_o, i_n - 1),
}) end
if i_n + 2 < i2 then
lt = s:sub(i_n + 2, i2 - 1)
i1 = lt:find('|')
if i1 then
if i1 + 1 <= #lt then table.insert(t, {
text = lt:sub(i1 + 1),
linkto = i1 > 1 and lt:sub(1, i1 - 1) or nil,
}) end
else
table.insert(t, {
text = lt,
linkto = lt,
})
end
end
i_o = i2 + 2
i_n = s:find('%[%[', i_o)
end
if i_o <= #s then table.insert(t, {
text = s:sub(i_o),
}) end
return t
end
local function map_merge(link_map, ruby_map)
local r = {}
local r_sub, r_insert
local len_cut
local id_l, id_r = 1, 1
local nn = false
while id_l <= #link_map and id_r <= #ruby_map do
len_cut = #link_map[id_l].text - #ruby_map[id_r].text
if str_is_link(ruby_map[id_r].ruby) or len_cut < 0 then
if ruby_map[id_r].ruby then
r_sub = {
text = {},
ruby = str_parse_link(ruby_map[id_r].ruby),
}
r_insert = r_sub.text
table.insert(r, r_sub)
else
r_insert = r
end
while len_cut < 0 do
table.insert(r_insert, {
text = link_map[id_l].text,
linkto = link_map[id_l].linkto
})
id_l = id_l + 1
len_cut = len_cut + #link_map[id_l].text
end
table.insert(r_insert, {
text = link_map[id_l].text:sub(1, -1 - len_cut),
linkto = link_map[id_l].linkto
})
if len_cut == 0 then
id_l = id_l + 1
id_r = id_r + 1
else
link_map[id_l].text = link_map[id_l].text:sub(-len_cut)
id_r = id_r + 1
end
else
if link_map[id_l].linkto then
r_sub = {
text = {},
linkto = link_map[id_l].linkto,
}
r_insert = r_sub.text
table.insert(r, r_sub)
else
r_insert = r
end
while len_cut > 0 and not str_is_link(ruby_map[id_r].ruby) do
table.insert(r_insert, {
text = ruby_map[id_r].text,
ruby = ruby_map[id_r].ruby,
})
id_r = id_r + 1
len_cut = len_cut - #ruby_map[id_r].text
end
if len_cut == 0 then
table.insert(r_insert, {
text = ruby_map[id_r].text,
ruby = ruby_map[id_r].ruby,
})
id_l = id_l + 1
id_r = id_r + 1
else
link_map[id_l].text = link_map[id_l].text:sub(-(len_cut + #ruby_map[id_r].text))
end
end
end
return r
end
function export.len(ruby_map, options)
local r = 0
for _, v in ipairs(ruby_map) do
v = v.text
r = r + (type(v) == 'string' and #v or export.len(v, options))
end
return r
end
function export.to_text(ruby_map)
local r = {}
local v_text
for _, v in ipairs(ruby_map) do
v_text = v.text
if type(v_text) == 'string' then
table.insert(r, v_text)
else
table.insert(r, export.to_text(v_text))
end
end
return table.concat(r)
end
function export.to_ruby(ruby_map)
local r = {}
local v_text
for _, v in ipairs(ruby_map) do
v_text = v.ruby or v.text
if type(v_text) == 'string' then
table.insert(r, v_text)
else
table.insert(r, export.to_ruby(v_text))
end
end
return table.concat(r)
end
-- "options.markup": Use custom markups. See below.
-- "options.break_link = true": Change [[...|<ruby>...<ruby>]] to <ruby>[[...]]<ruby>.
function export.to_markup(ruby_map, options)
options = options or {}
omarkup = options.markup or {}
-- Custom markups
local lb = omarkup.link_border_left or '[['
local lm = omarkup.link_border_middle or '|'
local lf = omarkup.link_border_right or ']]'
local rb = omarkup.ruby_border_left or '['
local rm = omarkup.ruby_border_middle or ']('
local rf = omarkup.ruby_border_right or ')'
local text = {}
local v_text, v_ruby, v_linkto
for _, v in ipairs(ruby_map) do
v_linkto, v_ruby = v.linkto, v.ruby
if type(v.text) ~= 'string' then
if options.break_link and v.linkto then
v_text = {}
for _, vv in ipairs(v.text) do
table.insert(v_text, {
text = {{
text = vv.text,
linkto = v_linkto,
}},
ruby = vv.ruby,
})
end
v_linkto, v_ruby = nil, nil
v_text = export.to_markup(v_text, options)
else
v_text = export.to_markup(v.text, options)
end
else
v_text = v.text
end
if v_text ~= '' then
if v_linkto then
if v_linkto ~= '' then table.insert(text, lb .. v_linkto .. lm .. v_text .. lf)
else table.insert(text, v_text) end
elseif v_ruby then
if type(v_ruby) ~= 'string' then v_ruby = export.to_markup(v_ruby, options) end
if v_ruby ~= '' then table.insert(text, rb .. v_text .. rm .. v_ruby .. rf)
else table.insert(text, v_text) end
else
table.insert(text, v_text)
end
end
end
return table.concat(text)
end
-- The options are the same as "function export.to_markup"
function export.to_wiki(ruby_map, options)
options = options or {}
omarkup = options.markup or {}
local markup1 = options.markup
local markup2 = {
link_border_left = omarkup.link_border_left or '[[',
link_border_middle = omarkup.link_border_middle or '|',
link_border_right = omarkup.link_border_right or ']]',
ruby_border_left = omarkup.ruby_border_left or '<ruby>',
ruby_border_middle = omarkup.ruby_border_middle or '<rp>(</rp><rt>',
ruby_border_right = omarkup.ruby_border_right or '</rt><rp>)</rp></ruby>',
}
options.markup = markup2
local r = export.to_markup(ruby_map, options)
options.markup = markup1
return r
end
function export.parse_markup(markup, options)
local ruby = {}
local link_map = str_parse_link(markup:gsub('(%b[])(%b())', function(m1, m2)
table.insert(ruby, m2:sub(2, -2))
return m1:sub(2, -2)
end))
local plain_text = export.to_text(str_parse_link(markup))
local ruby_map = {}
local p0 = 1
local ruby_n = 1
local s_text, s_ruby
plain_text:gsub('()(%b[])(%b())()', function(p1, m1, m2, p2)
if p0 < p1 then
s_text = plain_text:sub(p0, p1 - 1)
table.insert(ruby_map, {text = s_text})
end
if #m1 > 2 then
s_text = m1:sub(2, -2)
s_ruby = ruby[ruby_n]
table.insert(ruby_map, {
text = s_text,
ruby = s_ruby ~= '' and s_ruby or nil,
})
end
p0 = p2
ruby_n = ruby_n + 1
end)
if p0 <= #plain_text then
s_text = plain_text:sub(p0)
table.insert(ruby_map, {text = s_text})
end
return map_merge(link_map, ruby_map)
end
-- "options.try == nil": Lauch an error when the initial match failed.
-- "options.try == 'return'": Return "nil, (error information)" when the initial match failed.
-- "options.try == 'force'": Try every possible pattern when the initial match failed.
-- "options.space == nil": Remove spaces between kana or kanji but preserve elsewhere.
-- "options.space == 'all'": Preserve all spaces.
-- "options.space == 'none'": Remove all spaces.
-- "options.allow_ruby_link == true": Try to match the links in the rubies.
function export.parse_text(term, kana, options)
options = options or {}
local pattern_kana = 'ぁ-ゖァ-ヶ' -- signs subject to hira-kata matching
local pattern_kanji_probable = '々㐀-䶵一-鿌' .. mw.ustring.char(0xF900) .. "-" .. mw.ustring.char(0xFAD9) .. '𠀀-0-9A-Za-z〆' -- signs that can have ruby, but not spaces
local pattern_rubiable_probable = '0-9a-zA-Zα-ωΑ-Ω' -- signs that can have both ruby and spaces
local pattern_mute_probable = '%.゠・' -- signs that may appear in term, but not kana
local _remove_space
if options.space == 'none' then
_remove_space = function(_r)
for _, v in ipairs(_r) do
v.text = v.text:gsub(' ', '')
if v.linkto then v.linkto = v.linkto:gsub(' ', '') end
if v.ruby then v.ruby = v.ruby:gsub(' ', '') end
end
end
elseif options.space == 'all' then
_remove_space = function(_r)
end
else
_remove_space = function(_r)
local function _rs(s)
return mw.ustring.gsub(s, '()( +)()', function(pos1, m1, pos2)
if pos1 == 1 then return '' end
if pos2 > #s then return '' end
pos1 = pos1 - 1
if mw.ustring.find(mw.ustring.sub(s, pos1, pos1), '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(mw.ustring.sub(s, pos2, pos2), '^['..pattern_kanji_probable..pattern_kana..']$') then
return ''
else
return m1
end
end)
end
for i, v in ipairs(_r) do
v.text = mw.ustring.gsub(v.text, '()( +)()', function(pos1, m1, pos2)
local char_1, char_2
if pos1 > 1 then
char_1 = mw.ustring.sub(v.text, pos1 - 1, pos1 - 1)
elseif i > 1 then
char_1 = mw.ustring.sub(_r[i - 1].text, -1)
else
return m1
end
if pos2 <= #v.text then
char_2 = mw.ustring.sub(v.text, pos2, pos2)
elseif i < #_r then
char_2 = mw.ustring.sub(_r[i + 1].text, 1, 1)
else
return m1
end
if mw.ustring.find(char_1, '^['..pattern_kanji_probable..pattern_kana..']$') and mw.ustring.find(char_2, '^['..pattern_kanji_probable..pattern_kana..']$') then
return ''
else
return m1
end
end)
if v.linkto then v.linkto = _rs(v.linkto) end
if v.ruby then v.ruby = _rs(v.ruby) end
end
end
end
-- Create the link map
-- e.g. "[[エドガー・アラン・ポー|アラン・ポー]]の[[推理 小説]]"
local link_map = str_parse_link(term:gsub('%%', '')) -- remove '%'
_remove_space(link_map)
if not kana then return link_map end
--[[link_map = {
{text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
{text = 'の'},
{text = '推理小説', linkto = '推理小説'},
}]]
-- Remove romaji markup
kana = kana:gsub('[%^%-%.]', '') -- remove '^', '-', '.', preserve '%', ' '
-- Create the ruby map
-- e.g. 'アラン・ポーの推理 小説', 'あらん ぽー の すいり しょうせつ'
-- ("ぽお" is not allowed)
local ruby_map = {}
local plain_term_raw = export.to_text(str_parse_link(term)) -- Remove links: [[A|B]] -> B, [[C]] -> C
local plain_kana_raw = options.allow_ruby_link and kana or export.to_text(str_parse_link(kana))
local plain_term = mw.text.split(plain_term_raw, '%%')
local plain_kana = mw.text.split(plain_kana_raw, '%%')
if #plain_term == #plain_kana then
for i, plain_term_i in ipairs(plain_term) do
local pattern_ruby = {}
local id1, id2
local char_s
id2 = 1
while true do
id1 = id2
char_s = mw.ustring.sub(plain_term_i, id1, id1)
if mw.ustring.find(char_s, '^['..pattern_kanji_probable..pattern_rubiable_probable..']$') then
id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kanji_probable..pattern_rubiable_probable..']', id1)
table.insert(pattern_ruby, '(..-)')
if not id2 then break end
elseif mw.ustring.find(char_s, '^['..pattern_kana..']$') then
id2 = mw.ustring.find(plain_term_i, '[^'..pattern_kana..']', id1)
table.insert(pattern_ruby, '( ?' .. mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
if mw.ustring.find(m1, '^[ヶゖケ]$') then
return "[" .. str_kata_to_hira(m1) .. str_hira_to_kata(m1) .. "かがこカガコ] ?"
else
return "[" .. str_kata_to_hira(m1) .. str_hira_to_kata(m1) .. "] ?"
end
end) .. ')')
if not id2 then break end
else
id2 = mw.ustring.find(plain_term_i, '['..pattern_kanji_probable..pattern_rubiable_probable..pattern_kana..']', id1)
table.insert(pattern_ruby, '('..mw.ustring.gsub(mw.ustring.sub(plain_term_i, id1, (id2 or 0) - 1), '.', function(m1)
local m1_m = m1
if m1:find('^[%(%)%.%%%+%-%*%?%[%]%^%$]$') then m1_m = '%' .. m1_m end
if mw.ustring.find(m1, '^['..pattern_mute_probable..']$') then m1_m = '[' .. m1_m .. ' -]?' end
return m1_m
end)..')')
if not id2 then break end
end
end
local pattern_ruby_s = table.concat(pattern_ruby)
-- 'アラン・ポーの推理 小説' to '( ?[あア] ?[らラ] ?[んン] ?)([・ -]?)( ?[ぽポ] ?)(ー)( ?[のノ] ?)(..-)( )(..-)'
-- Excute matching
local ruby_map_i_ruby = {mw.ustring.match(plain_kana[i], '^'..pattern_ruby_s..'$')}
if #ruby_map_i_ruby > 0 then
local ruby_map_i_text = {mw.ustring.match(plain_term_i, '^'..pattern_ruby_s..'$')}
local n_match = 0
for _, pat in ipairs(pattern_ruby) do
if pat:sub(1, 1) == '(' then
n_match = n_match + 1
if pat == '(..-)' then
table.insert(ruby_map, {
text = ruby_map_i_text[n_match],
ruby = ruby_map_i_ruby[n_match],
})
else
if #ruby_map > 0 and ruby_map[#ruby_map].ruby == nil then
ruby_map[#ruby_map].text = ruby_map[#ruby_map].text .. ruby_map_i_text[n_match]
else
table.insert(ruby_map, {text = ruby_map_i_text[n_match]})
end
end
end
end
elseif options.try == 'force' then
link_map = str_parse_link(term:gsub('%%', ''))
local forced_result = str_ucompare(plain_term_i, plain_kana[i])[1]
for ii, vv in ipairs(forced_result[1]) do
table.insert(ruby_map, {
text = vv,
ruby = forced_result[2][ii] ~= vv and forced_result[2][ii] or nil,
})
end
elseif options.try == 'return' then
return nil, 'Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".'
else
error('Can not match "' .. plain_term_i .. '" and "' .. plain_kana[i] .. '".')
end
end
elseif options.try == 'force' then
link_map = str_parse_link(term)
local forced_result = str_ucompare(plain_term_raw, plain_kana_raw)[1]
for i, v in ipairs(forced_result[1]) do
table.insert(ruby_map, {
text = v,
ruby = forced_result[2][i] ~= v and forced_result[2][i] or nil,
})
end
elseif options.try == 'return' then
return nil, 'Unequal numbers of the separator "%" in both forms.'
else
error('Unequal numbers of the separator "%" in both forms.')
end
_remove_space(ruby_map)
--[[ruby_map = {
{text = 'アラン・ポーの'},
{text = '推理', ruby = 'すいり'},
{text = ''}
{text = '小説', ruby = 'しょうせつ'},
}]]
-- Merge the ruby and link map
--[[return {
{text = 'アラン・ポー', linkto = 'エドガー・アラン・ポー'},
{text = 'の'},
{text = {
{text = '推理', ruby = 'すいり'},
{text = '小説', ruby = 'しょうせつ'},
}, linkto = '推理小説'},
}]]
return map_merge(link_map, ruby_map)
end
function export.show(frame)
local function _f(s1, s2, o) return export.to_wiki(export.parse_text(s1, s2, o), o) end
local function _ff(s1, o) return export.to_wiki(export.parse_markup(s1, o), o) end
local function _f3(s1, s2, o) return frame:extensionTag('nowiki', export.to_markup(export.parse_text(s1, s2, o), o)) end
return table.concat({
'[[Module:User:Huhu9001/000]]',
_f('推%理%小%説', 'すい%り% しょう%せつ'),
_f('[[Edgar Allan Poe|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ'),
_f('[[Edgar Allan Poe|アラン・ポー]]の[[推理 小説]]', 'あらん ぽー の すいり しょうせつ', {space = 'all'}),
_f('(This is a pen.)', '(ディス イズ ア ペン.)'),
_f('[This is a pen.]', '[ディス イズ ア ペン.]', {space = 'none'}),
_f('(This is %a% pen.)', '(ディス イズ %[[Article (grammar)|ア]]% ペン.)', {allow_ruby_link = true}),
_f('自業自得', 'You [[wiktionary:deserve|deserve]] it', {allow_ruby_link = true}),
_f('[[Pun|駄洒]]落&駄[[Pun|洒落]]', 'だじゃれ&だじゃれ'),
_f('用ゐる', 'もちいる', {try = 'force'}),
_f('スイーツ(笑)', 'スイーツ かっこ わらい', {try = 'force'}),
_f('100%', '100パーセント', {try = 'force'}),
_f('[[Speech|喋る]][[Marionette|人%形]]', 'しゃべる にん%ぎょう', {break_link = true}),
_ff(table.concat({
'apple[b[[Alpha|a]]]([[Beta|β]]α)nana',
'apple[[Banana|[ba](βα)nana]]',
'apple[[[Banana|ba]]]([[Beta|β]]α)[[Banana|nana]]',
}, ', ')),
}, '\n----\n')
end
return export