Module:X-SAMPA to IPA
Appearance
local p = {}
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local find = mw.ustring.find
local length = mw.ustring.len
-- Slashes \, apostrophes ', and double quotes " are escaped with \.
-- \\ = \, \' = ', \" = "
local data = {
["a"] = { "a" },
["b"] = { "b" },
-- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["b\\"] = { "ⱱ" },
["b_<"] = { "ɓ" },
["c"] = { "c" },
["d"] = { "d" },
["d`"] = { "ɖ", has_descender = true },
["d_<"] = { "ɗ" },
-- not in official X-SAMPA; Wikipedia-specific
["d`_<"] = { "ᶑ", has_descender = true },
["e"] = { "e" },
["f"] = { "f" },
["g"] = { "ɡ", has_descender = true },
["g_<"] = { "ɠ", has_descender = true },
["h"] = { "h" },
["h\\"] = { "ɦ" },
["i"] = { "i" },
["j"] = { "j", has_descender = true },
["j\\"] = { "ʝ", has_descender = true },
["k"] = { "k" },
["l"] = { "l" },
["l`"] = { "ɭ", has_descender = true },
["l\\"] = { "ɺ" },
["m"] = { "m" },
["n"] = { "n" },
["n`"] = { "ɳ", has_descender = true },
["o"] = { "o" },
["p"] = { "p", has_descender = true },
["p\\"] = { "ɸ", has_descender = true },
["q"] = { "q", has_descender = true },
["r"] = { "r" },
["r`"] = { "ɽ", has_descender = true },
["r\\"] = { "ɹ" },
["r\\`"] = { "ɻ", has_descender = true },
["s"] = { "s" },
["s`"] = { "ʂ", has_descender = true },
["s\\"] = { "ɕ" },
["t"] = { "t" },
["t`"] = { "ʈ" },
["u"] = { "u" },
["v"] = { "v" },
["v\\"] = { "ʋ" },
["w"] = { "w" },
["x"] = { "x" },
["x\\"] = { "ɧ", has_descender = true },
["y"] = { "y", has_descender = true },
["z"] = { "z" },
["z`"] = { "ʐ", has_descender = true },
["z\\"] = { "ʑ" },
["A"] = { "ɑ" },
["B"] = { "β", has_descender = true },
["B\\"] = { "ʙ" },
["C"] = { "ç", has_descender = true },
["D"] = { "ð" },
["E"] = { "ɛ" },
["F"] = { "ɱ", has_descender = true },
["G"] = { "ɣ", has_descender = true },
["G\\"] = { "ɢ" },
["G\\_<"] = { "ʛ" },
["H"] = { "ɥ", has_descender = true },
["H\\"] = { "ʜ" },
["I"] = { "ɪ" },
["I\\"] = { "ɪ̈" },
["J"] = { "ɲ", has_descender = true },
["J\\"] = { "ɟ" },
["J\\_<"] = { "ʄ", has_descender = true },
["K"] = { "ɬ" },
["K\\"] = { "ɮ", has_descender = true },
["L"] = { "ʎ" },
["L\\"] = { "ʟ" },
["M"] = { "ɯ" },
["M\\"] = { "ɰ", has_descender = true },
["N"] = { "ŋ", has_descender = true },
["N\\"] = { "ɴ" },
["O"] = { "ɔ" },
["O\\"] = { "ʘ" },
["P"] = { "ʋ" },
["Q"] = { "ɒ" },
["R"] = { "ʁ" },
["R\\"] = { "ʀ" },
["S"] = { "ʃ", has_descender = true },
["T"] = { "θ" },
["U"] = { "ʊ" },
["U\\"] = { "ʊ̈" },
["V"] = { "ʌ" },
["W"] = { "ʍ" },
["X"] = { "χ", has_descender = true },
["X\\"] = { "ħ" },
["Y"] = { "ʏ" },
["Z"] = { "ʒ", has_descender = true },
["."] = { "." },
["\""] = { "ˈ" },
["%"] = { "ˌ" },
-- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["%\\"] = { "ᴙ" },
["'"] = { "ʲ", is_diacritic = true },
[":"] = { "ː", is_diacritic = true },
[":\\"] = { "ˑ", is_diacritic = true },
["@"] = { "ə" },
["@`"] = { "ɚ" },
["@\\"] = { "ɘ" },
["{"] = { "æ" },
["}"] = { "ʉ" },
["1"] = { "ɨ" },
["2"] = { "ø" },
["3"] = { "ɜ" },
["3`"] = { "ɝ" },
["3\\"] = { "ɞ" },
["4"] = { "ɾ" },
["5"] = { "ɫ" },
["6"] = { "ɐ" },
["7"] = { "ɤ" },
["8"] = { "ɵ" },
["9"] = { "œ" },
["&"] = { "ɶ" },
["?"] = { "ʔ" },
["?\\"] = { "ʕ" },
["<\\"] = { "ʢ" },
[">\\"] = { "ʡ" },
["^"] = { "ꜛ" },
["!"] = { "ꜜ" },
-- not in official X-SAMPA
["!!"] = { "‼" },
["!\\"] = { "ǃ" },
["|"] = { "|", has_descender = true },
["|\\"] = { "ǀ", has_descender = true },
["||"] = { "‖", has_descender = true },
["|\\|\\"] = { "ǁ", has_descender = true },
["=\\"] = { "ǂ", has_descender = true },
-- linking mark, liaison
["-\\"] = { "‿", is_diacritic = true },
-- coarticulated; not in official X-SAMPA; used by Wiktionary
["__"] = { U(0x361) },
-- fortis, strong articulation; not in official X-SAMPA; used by Wiktionary
["_:"] = { U(0x348) },
["_\""] = { U(0x308), is_diacritic = true },
-- advanced
["_+"] = { U(0x31F), with_descender = "˖", is_diacritic = true },
-- retracted
["_-"] = { U(0x320), with_descender = "˗", is_diacritic = true },
-- rising tone
["_/"] = { U(0x30C), is_diacritic = true },
-- voiceless
["_0"] = { U(0x325), with_descender = U(0x30A), is_diacritic = true },
-- syllabic
["="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true },
-- syllabic
["_="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true },
-- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["_%\\"] = { U(0x1DFD) },
-- ejective
["_>"] = { "ʼ", is_diacritic = true },
-- pharyngealized
["_?\\"] = { "ˤ", is_diacritic = true },
-- falling tone
["_\\"] = { U(0x302), is_diacritic = true },
-- non-syllabic
["_^"] = { U(0x32F), with_descender = U(0x311), is_diacritic = true },
-- no audible release
["_}"] = { U(0x31A), is_diacritic = true },
-- r-coloring (colouring), rhotacization
["`"] = { U(0x2DE), is_diacritic = true },
-- nasalization
["~"] = { U(0x303), is_diacritic = true },
-- advanced tongue root
["_A"] = { U(0x318), is_diacritic = true },
-- apical
["_a"] = { U(0x33A), is_diacritic = true },
-- extra-low tone
["_B"] = { U(0x30F), is_diacritic = true },
-- low rising tone
["_B_L"] = { U(0x1DC5), is_diacritic = true },
-- less rounded
["_c"] = { U(0x31C), is_diacritic = true },
-- dental
["_d"] = { U(0x32A), is_diacritic = true },
-- velarized or pharyngealized (dark)
["_e"] = { U(0x334), is_diacritic = true },
-- downstep
["<F>"] = { "↘" },
-- falling tone
["_F"] = { U(0x302), is_diacritic = true },
-- velarized
["_G"] = { "ˠ", is_diacritic = true },
-- high tone
["_H"] = { U(0x301), is_diacritic = true },
-- high rising tone
["_H_T"] = { U(0x1DC4), is_diacritic = true },
-- aspiration
["_h"] = { "ʰ", is_diacritic = true },
-- palatalization
["_j"] = { "ʲ", is_diacritic = true },
-- creaky voice, laryngealization, vocal fry
["_k"] = { U(0x330), is_diacritic = true },
-- low tone
["_L"] = { U(0x300), is_diacritic = true },
-- lateral release
["_l"] = { "ˡ", is_diacritic = true },
-- mid tone
["_M"] = { U(0x304), is_diacritic = true },
-- laminal
["_m"] = { U(0x33B), is_diacritic = true },
-- linguolabial
["_N"] = { U(0x33C), is_diacritic = true },
-- nasal release
["_n"] = { "ⁿ", is_diacritic = true },
-- more rounded
["_O"] = { U(0x339), is_diacritic = true },
-- lowered
["_o"] = { U(0x31E), with_descender = "˕", is_diacritic = true },
-- retracted tongue root
["_q"] = { U(0x319), is_diacritic = true },
-- global rise
["<R>"] = { "↗" },
-- rising tone
["_R"] = { U(0x30C), is_diacritic = true },
-- rising falling tone
["_R_F"] = { U(0x1DC8), is_diacritic = true },
-- raised
["_r"] = { U(0x31D), is_diacritic = true },
-- extra-high tone
["_T"] = { U(0x30B), is_diacritic = true },
-- breathy voice, murmured voice, murmur, whispery voice
["_t"] = { U(0x324), is_diacritic = true },
-- voiced
["_v"] = { U(0x32C), is_diacritic = true },
-- labialized
["_w"] = { "ʷ", is_diacritic = true },
-- extra-short
["_X"] = { U(0x306), is_diacritic = true },
-- mid-centralized
["_x"] = { U(0x33D), is_diacritic = true },
["__T"] = { "˥" },
["__H"] = { "˦" },
["__M"] = { "˧" },
["__L"] = { "˨" },
["__B"] = { "˩" },
["0"] = { "◌" }, -- dotted circle
}
local function _XSAMPAtoIPA(text)
local output = {}
local characteristics = {}
while #text > 0 do
local substrings = {
sub(text, 1, 4),
sub(text, 1, 3),
sub(text, 1, 2),
sub(text, 1, 1)
}
for i, substring in ipairs(substrings) do
local result, IPA, with_descender, has_descender, is_diacritic
if data[substring] then
result = data[substring]
IPA = result[1]
with_descender = result.with_descender
has_descender = result.has_descender
diacritic = result.is_diacritic
if with_descender then
-- Go backwords through the transcription, skipping any diacritics.
local i = 0
while characteristics[#characteristics - i].is_diacritic do
i = i + 1
end
--[[ Look at the first non-diacritic symbol before the current symbol.
If it has a descender, use the descender form of the current symbol. ]]
if characteristics[#characteristics - i].has_descender then
IPA = with_descender
end
end
elseif not substrings[i + 1] then
IPA = substring
end
if IPA then
text = sub(text, 6 - i)
table.insert(output, IPA)
table.insert(characteristics, { has_descender = has_descender, is_diacritic = is_diacritic } )
break
end
end
end
return table.concat(output)
end
function p.X2IPA(frame)
local text
if type(frame) == "table" then
text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1]
invalidParameters = {}
for key, value in pairs(frame.getParent and frame:getParent().args or {}) do
if key ~= 1 then
table.insert(invalidParameters, key)
end
end
for key, value in pairs(frame.args or {}) do
if key ~= 1 then
table.insert(invalidParameters, key)
end
end
if #invalidParameters > 0 then
if #invalidParameters > 1 then
error('The parameters "' .. table.concat(invalidParameters, '", "') .. '" are not used by this template')
else
error('The parameter "' .. key .. '" is not used by this template')
end
end
else
text = frame
end
return _XSAMPAtoIPA(text)
end
local function _IPAspan(text)
return "<span class=\"IPA\">"..text.."</span>"
end
function p.example(frame)
local args = frame.args
local parentargs = frame.getParent and frame:getParent().args
local text = parentargs and parentargs[1]
or args and args[1]
or type(frame) == "string" and frame
or error("No text provided")
local output = { " <code>{{[[mw:Manual:Substitution|subst:]][[Template:x2i|x2i]]|" }
if find(text, "=") then
table.insert(output, "1=")
end
table.insert(output, text)
table.insert(output, "}}</code>")
table.insert(output, "\n| ")
local IPA = _IPAspan(p.X2IPA(text))
table.insert(output, IPA)
return table.concat(output)
end
return p