Module:X-SAMPA to IPA
Appearance
local p = {}
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local find = mw.ustring.find
local length = mw.ustring.len
-- Slashes \, apostrophes ', and double quotes " are escaped with \.
-- \\ = \, \' = ', \" = "
local data = {
["a"] = { "a" },
["b"] = { "b" },
["b\\"] = { "ⱱ" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["b_<"] = { "ɓ" },
["c"] = { "c" },
["d"] = { "d" },
["d`"] = { "ɖ", descender = true },
["d_<"] = { "ɗ" },
["d`_<"] = { "ᶑ", descender = true }, -- not in official X-SAMPA; Wikipedia-specific
["e"] = { "e" },
["f"] = { "f" },
["g"] = { "ɡ", descender = true },
["g_<"] = { "ɠ", descender = true },
["h"] = { "h" },
["h\\"] = { "ɦ" },
["i"] = { "i" },
["j"] = { "j", descender = true },
["j\\"] = { "ʝ", descender = true },
["k"] = { "k" },
["l"] = { "l" },
["l`"] = { "ɭ", descender = true },
["l\\"] = { "ɺ" },
["m"] = { "m" },
["n"] = { "n" },
["n`"] = { "ɳ", descender = true },
["o"] = { "o" },
["p"] = { "p", descender = true },
["p\\"] = { "ɸ", descender = true },
["q"] = { "q", descender = true },
["r"] = { "r" },
["r`"] = { "ɽ", descender = true },
["r\\"] = { "ɹ" },
["r\\`"] = { "ɻ", descender = true },
["s"] = { "s" },
["s`"] = { "ʂ", descender = true },
["s\\"] = { "ɕ" },
["t"] = { "t" },
["t`"] = { "ʈ" },
["u"] = { "u" },
["v"] = { "v" },
["v\\"] = { "ʋ" },
["w"] = { "w" },
["x"] = { "x" },
["x\\"] = { "ɧ", descender = true },
["y"] = { "y", descender = true },
["z"] = { "z" },
["z`"] = { "ʐ", descender = true },
["z\\"] = { "ʑ" },
["A"] = { "ɑ" },
["B"] = { "β", descender = true },
["B\\"] = { "ʙ" },
["C"] = { "ç", descender = true },
["D"] = { "ð" },
["E"] = { "ɛ" },
["F"] = { "ɱ", descender = true },
["G"] = { "ɣ", descender = true },
["G\\"] = { "ɢ" },
["G\\_<"] = { "ʛ" },
["H"] = { "ɥ", descender = true },
["H\\"] = { "ʜ" },
["I"] = { "ɪ" },
["I\\"] = { "ɪ̈" },
["J"] = { "ɲ", descender = true },
["J\\"] = { "ɟ" },
["J\\_<"] = { "ʄ", descender = true },
["K"] = { "ɬ" },
["K\\"] = { "ɮ", descender = true },
["L"] = { "ʎ" },
["L\\"] = { "ʟ" },
["M"] = { "ɯ" },
["M\\"] = { "ɰ", descender = true },
["N"] = { "ŋ", descender = true },
["N\\"] = { "ɴ" },
["O"] = { "ɔ" },
["O\\"] = { "ʘ" },
["P"] = { "ʋ" },
["Q"] = { "ɒ" },
["R"] = { "ʁ" },
["R\\"] = { "ʀ" },
["S"] = { "ʃ", descender = true },
["T"] = { "θ" },
["U"] = { "ʊ" },
["U\\"] = { "ʊ̈" },
["V"] = { "ʌ" },
["W"] = { "ʍ" },
["X"] = { "χ", descender = true },
["X\\"] = { "ħ" },
["Y"] = { "ʏ" },
["Z"] = { "ʒ", descender = true },
["."] = { "." },
["\""] = { "ˈ" },
["%"] = { "ˌ" },
["%\\"] = { "ᴙ" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["'"] = { "ʲ", diacritic = true },
[":"] = { "ː", diacritic = true },
[":\\"] = { "ˑ", diacritic = true },
["@"] = { "ə" },
["@\\"] = { "ɘ" },
["{"] = { "æ" },
["}"] = { "ʉ" },
["1"] = { "ɨ" },
["2"] = { "ø" },
["3"] = { "ɜ" },
["3\\"] = { "ɞ" },
["4"] = { "ɾ" },
["5"] = { "ɫ" },
["6"] = { "ɐ" },
["7"] = { "ɤ" },
["8"] = { "ɵ" },
["9"] = { "œ" },
["&"] = { "ɶ" },
["?"] = { "ʔ" },
["?\\"] = { "ʕ" },
["<\\"] = { "ʢ" },
[">\\"] = { "ʡ" },
["^"] = { "ꜛ" },
["!"] = { "ꜜ" },
["!!"] = { "‼" }, -- not in official X-SAMPA
["!\\"] = { "ǃ" },
["|"] = { "|", descender = true },
["|\\"] = { "ǀ", descender = true },
["||"] = { "‖", descender = true },
["|\\|\\"] = { "ǁ", descender = true },
["=\\"] = { "ǂ", descender = true },
["-\\"] = { "‿", diacritic = true }, -- linking mark, liaison
["__"] = { U(0x361) }, -- coarticulated; not in official X-SAMPA; used by Wiktionary
["_:"] = { U(0x348) }, -- fortis, strong articulation; not in official X-SAMPA; used by Wiktionary
["_\""] = { U(0x308), diacritic = true },
["_+"] = { U(0x31F), descender = "˖", diacritic = true }, -- advanced
["_-"] = { U(0x320), descender = "˗", diacritic = true }, -- retracted
["_/"] = { U(0x30C), diacritic = true }, -- rising tone
["_0"] = { U(0x325), descender = U(0x30A), diacritic = true }, -- voiceless
["="] = { U(0x329), descender = U(0x30D), diacritic = true }, -- syllabic
["_="] = { U(0x329), descender = U(0x30D), diacritic = true }, -- syllabic
["_%\\"] = { U(0x1DFD) }, -- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary
["_>"] = { "ʼ", diacritic = true }, -- ejective
["_?\\"] = { "ˤ", diacritic = true }, -- pharyngealized
["_\\"] = { U(0x302), diacritic = true }, -- falling tone
["_^"] = { U(0x32F), descender = U(0x311), diacritic = true }, -- non-syllabic
["_}"] = { U(0x31A), diacritic = true }, -- no audible release
["`"] = { U(0x2DE), diacritic = true }, -- r-coloring (colouring), rhotacization
["~"] = { U(0x303), diacritic = true }, -- nasalization
["_A"] = { U(0x318), diacritic = true }, -- advanced tongue root
["_a"] = { U(0x33A), diacritic = true }, -- apical
["_B"] = { U(0x30F), diacritic = true }, -- extra-low tone
["_B_L"] = { U(0x1DC5), diacritic = true }, -- low rising tone
["_c"] = { U(0x31C), diacritic = true }, -- less rounded
["_d"] = { U(0x32A), diacritic = true }, -- dental
["_e"] = { U(0x334), diacritic = true }, -- velarized or pharyngealized (dark)
["<F>"] = { "↘" }, -- downstep
["_F"] = { U(0x302), diacritic = true }, -- falling tone
["_G"] = { "ˠ", diacritic = true }, -- velarized
["_H"] = { U(0x301), diacritic = true }, -- high tone
["_H_T"] = { U(0x1DC4), diacritic = true }, -- high rising tone
["_h"] = { "ʰ", diacritic = true }, -- aspiration
["_j"] = { "ʲ", diacritic = true }, -- palatalization
["_k"] = { U(0x330), diacritic = true }, -- creaky voice, laryngealization, vocal fry
["_L"] = { U(0x300), diacritic = true }, -- low tone
["_l"] = { "ˡ", diacritic = true }, -- lateral release
["_M"] = { U(0x304), diacritic = true }, -- mid tone
["_m"] = { U(0x33B), diacritic = true }, -- laminal
["_N"] = { U(0x33C), diacritic = true }, -- linguolabial
["_n"] = { "ⁿ", diacritic = true }, -- nasal release
["_O"] = { U(0x339), diacritic = true }, -- more rounded
["_o"] = { U(0x31E), descender = "˕", diacritic = true }, -- lowered
["_q"] = { U(0x319), diacritic = true }, -- retracted tongue root
["<R>"] = { "↗" }, -- global rise
["_R"] = { U(0x30C), diacritic = true }, -- rising tone
["_R_F"] = { U(0x1DC8), diacritic = true }, -- rising falling tone
["_r"] = { U(0x31D), diacritic = true }, -- raised
["_T"] = { U(0x30B), diacritic = true }, -- extra-high tone
["_t"] = { U(0x324), diacritic = true }, -- breathy voice, murmured voice, murmur, whispery voice
["_v"] = { U(0x32C), diacritic = true }, -- voiced
["_w"] = { "ʷ", diacritic = true }, -- labialized
["_X"] = { U(0x306), diacritic = true }, -- extra-short
["_x"] = { U(0x33D), diacritic = true }, -- mid-centralized
}
local function _XSAMPAtoIPA(text)
local output = {}
local characteristics = {}
while length(text) > 0 do
local substrings = { sub(text, 1, 4), sub(text, 1, 3), sub(text, 1, 2), sub(text, 1, 1) }
for i, substring in ipairs(substrings) do
local result, IPA, descender, diacritic
if data[substring] then
result = data[substring]
IPA = result[1]
descender = result.descender
diacritic = result.diacritic
if type(descender) == "string" then
local i = 0
while characteristics[#characteristics - i].diacritic do
i = i + 1
end
if characteristics[#characteristics - i].descender then
IPA = descender
end
end
elseif not substrings[i + 1] then
IPA = substring
end
if IPA then
text = sub(text, 6 - i)
table.insert(output, IPA)
table.insert(characteristics, { descender = descender, diacritic = diacritic } )
break
end
end
end
return table.concat(output)
end
function p.X2IPA(frame)
local text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1] or frame
return _XSAMPAtoIPA(text)
end
local function _IPAspan(text)
return "<span class=\"IPA\">"..text.."</span>"
end
function p.example(frame)
local args = frame.args
local parentargs = frame.getParent and frame:getParent().args
local text = parentargs and parentargs[1]
or args and args[1]
or type(frame) == "string" and frame
or error("No text provided")
local output = { " <code>{{[[mw:Manual:Substitution|subst:]][[Template:x2i|x2i]]|" }
if find(text, "=") then
table.insert(output, "1=")
end
table.insert(output, text)
table.insert(output, "}}</code>")
table.insert(output, "\n| ")
local IPA = _IPAspan(p.X2IPA(text))
table.insert(output, IPA)
return table.concat(output)
end
return p