Jump to content

Module:X-SAMPA to IPA

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Erutuon (talk | contribs) at 09:40, 26 January 2017 (simpler). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

local p = {}

local U = mw.ustring.char
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local find = mw.ustring.find
local length = mw.ustring.len

local data = {
	["["] = { "[" },
	["]"] = { "]" },
	["/"] = { "/" },
	["a"] = { "a" },
	["b"] = { "b" },
	["b_<"] = { "ɓ" },
	["c"] = { "c" },
	["d"] = { "d" },
	["d`"] = { "ɖ", descender = true },
	["d_<"] = { "ɗ" },
	["e"] = { "e" },
	["f"] = { "f" },
	["g"] = { "ɡ", descender = true  },
	["g_<"] = { "ɠ", descender = true },
	["h"] = { "h" },
	["h\\"] = { "ɦ" },
	["i"] = { "i" },
	["j"] = { "j", descender = true  },
	["j\\"] = { "ʝ", descender = true  },
	["k"] = { "k" },
	["l"] = { "l" },
	["l`"] = { "ɭ", descender = true  },
	["l\\"] = { "ɺ" },
	["m"] = { "m" },
	["n"] = { "n" },
	["n`"] = { "ɳ", descender = true  },
	["o"] = { "o" },
	["p"] = { "p", descender = true  },
	["p\\"] = { "ɸ", descender = true  },
	["q"] = { "q", descender = true  },
	["r"] = { "r" },
	["r`"] = { "ɽ", descender = true  },
	["r\\"] = { "ɹ" },
	["r\\`"] = { "ɻ", descender = true  },
	["s"] = { "s" },
	["s`"] = { "ʂ", descender = true  },
	["s\\"] = { "ɕ" },
	["t"] = { "t" },
	["t`"] = { "ʈ" },
	["u"] = { "u" },
	["v"] = { "v" },
	["v\\"] = { "ʋ" },
	["w"] = { "w" },
	["x"] = { "x" },
	["x\\"] = { "ɧ", descender = true  },
	["y"] = { "y", descender = true  },
	["z"] = { "z" },
	["z`"] = { "ʐ", descender = true  },
	["z\\"] = { "ʑ" },
	["A"] = { "ɑ" },
	["B"] = { "β", descender = true  },
	["B\\"] = { "ʙ" },
	["C"] = { "ç", descender = true  },
	["D"] = { "ð" },
	["E"] = { "ɛ" },
	["F"] = { "ɱ", descender = true  },
	["G"] = { "ɣ", descender = true  },
	["G\\"] = { "ɢ" },
	["G\\_<"] = { "ʛ" },
	["H"] = { "ɥ", descender = true  },
	["H\\"] = { "ʜ" },
	["I"] = { "ɪ" },
	["I\\"] = { "ɪ̈" },
	["J"] = { "ɲ", descender = true  },
	["J\\"] = { "ɟ" },
	["J\\_<"] = { "ʄ", descender = true  },
	["K"] = { "ɬ" },
	["K\\"] = { "ɮ", descender = true  },
	["L"] = { "ʎ" },
	["L\\"] = { "ʟ" },
	["M"] = { "ɯ" },
	["M\\"] = { "ɰ", descender = true  },
	["N"] = { "ŋ", descender = true  },
	["N\\"] = { "ɴ" },
	["O"] = { "ɔ" },
	["O\\"] = { "ʘ" },
	["P"] = { "ʋ" },
	["Q"] = { "ɒ" },
	["R"] = { "ʁ" },
	["R\\"] = { "ʀ" },
	["S"] = { "ʃ", descender = true  },
	["T"] = { "θ" },
	["U"] = { "ʊ" },
	["U\\"] = { "ʊ̈" },
	["V"] = { "ʌ" },
	["W"] = { "ʍ" },
	["X"] = { "χ", descender = true  },
	["X\\"] = { "ħ" },
	["Y"] = { "ʏ" },
	["Z"] = { "ʒ", descender = true  },
	["."] = { "." },
	["\""] = { "ˈ" },
	["%"] = { "ˌ" },
	["'"] = { "ʲ", diacritic = true },
	[":"] = { "ː", diacritic = true },
	[":\\"] = { "ˑ", diacritic = true },
	["@"] = { "ə" },
	["@\\"] = { "ɘ" },
	["{"] = { "æ" },
	["}"] = { "ʉ" },
	["1"] = { "ɨ" },
	["2"] = { "ø" },
	["3"] = { "ɜ" },
	["3\\"] = { "ɞ" },
	["4"] = { "ɾ" },
	["5"] = { "ɫ" },
	["6"] = { "ɐ" },
	["7"] = { "ɤ" },
	["8"] = { "ɵ" },
	["9"] = { "œ" },
	["&"] = { "ɶ" },
	["?"] = { "ʔ" },
	["?\\"] = { "ʕ" },
	["<\\"] = { "ʢ" },
	[">\\"] = { "ʡ" },
	["^"] = { "ꜛ" },
	["!"] = { "ꜜ" },
	["!\\"] = { "ǃ" },
	["|"] = { "|", descender = true  },
	["|\\"] = { "ǀ", descender = true  },
	["||"] = { "‖", descender = true  },
	["|\\|\\"] = { "ǁ", descender = true  },
	["=\\"] = { "ǂ", descender = true  },
	["-\\"] = { "‿", diacritic = true }, -- linking mark, liaison
	["_\""] = { U(0x308), diacritic = true },
	["_+"] = { U(0x31F), descender = "˖", diacritic = true }, -- advanced
	["_-"] = { U(0x320), descender = "˗", diacritic = true }, -- retracted
	["_/"] = { U(0x30C), diacritic = true }, -- rising tone
	["_0"] = { U(0x325), descender = U(0x30A), diacritic = true }, -- voiceless
	["="] = { U(0x329), descender = U(0x30D), diacritic = true }, -- syllabic
	["_="] = { U(0x329), descender = U(0x30D), diacritic = true }, -- syllabic
	["_>"] = { "ʼ", diacritic = true }, -- ejective
	["_?\\"] = { "ˤ", diacritic = true }, -- pharyngealized
	["_\\"] = { U(0x302), diacritic = true }, -- falling tone
	["_^"] = { U(0x32F), descender = U(0x311), diacritic = true }, -- non-syllabic
	["_}"] = { U(0x31A), diacritic = true }, -- no audible release
	["`"] = { U(0x2DE), diacritic = true }, -- r-coloring (colouring), rhotacization
	["~"] = { U(0x303), diacritic = true }, -- nasalization
	["_A"] = { U(0x318), diacritic = true }, -- advanced tongue root
	["_a"] = { U(0x33A), diacritic = true }, -- apical
	["_B"] = { U(0x30F), diacritic = true }, -- extra-low tone
	["_B_L"] = { U(0x1DC5), diacritic = true }, -- low rising tone
	["_c"] = { U(0x31C), diacritic = true }, -- less rounded
	["_d"] = { U(0x32A), diacritic = true }, -- dental
	["_e"] = { U(0x334), diacritic = true }, -- velarized or pharyngealized (dark)
	["<F>"] = { "↘" }, -- downstep
	["_F"] = { U(0x302), diacritic = true }, -- falling tone
	["_G"] = { "ˠ", diacritic = true }, -- velarized
	["_H"] = { U(0x301), diacritic = true }, -- high tone
	["_H_T"] = { U(0x1DC4), diacritic = true }, -- high rising tone
	["_h"] = { "ʰ", diacritic = true }, -- aspiration
	["_j"] = { "ʲ", diacritic = true }, -- palatalization
	["_k"] = { U(0x330), diacritic = true }, -- creaky voice, laryngealization, vocal fry
	["_L"] = { U(0x300), diacritic = true }, -- low tone
	["_l"] = { "ˡ", diacritic = true }, -- lateral release
	["_M"] = { U(0x304), diacritic = true }, -- mid tone
	["_m"] = { U(0x33B), diacritic = true }, -- laminal
	["_N"] = { U(0x33C), diacritic = true }, -- linguolabial
	["_n"] = { "ⁿ", diacritic = true }, -- nasal release
	["_O"] = { U(0x339), diacritic = true }, -- more rounded
	["_o"] = { U(0x31E), descender = "˕", diacritic = true }, -- lowered
	["_q"] = { U(0x319), diacritic = true }, -- retracted tongue root
	["<R>"] = { "↗" }, -- global rise
	["_R"] = { U(0x30C), diacritic = true }, -- rising tone
	["_R_F"] = { U(0x1DC8), diacritic = true }, -- rising falling tone
	["_r"] = { U(0x31D), diacritic = true }, -- raised
	["_T"] = { U(0x30B), diacritic = true }, -- extra-high tone
	["_t"] = { U(0x324), diacritic = true }, -- breathy voice, murmured voice, murmur, whispery voice
	["_v"] = { U(0x32C), diacritic = true }, -- voiced
	["_w"] = { "ʷ", diacritic = true }, -- labialized
	["_X"] = { U(0x306), diacritic = true }, -- extra-short
	["_x"] = { U(0x33D), diacritic = true }, -- mid-centralized
}

local function _X2IPA(text)
	local output = {}
	
	while length(text) > 0 do
		local substrings = { sub(text, 1, 4), sub(text, 1, 3), sub(text, 1, 2), sub(text, 1, 1) }
		
		for i, substring in ipairs(substrings) do
			if data[substring] then
				table.insert(output, data[substring][1])
				text = sub(text, 6 - i)
				break
			elseif i == 4 then
				table.insert(output, substring)
				text = sub(text, 2)
				break
			end
		end
	end
	
	return table.concat(output)
end

function p.X2IPA(frame)
	local text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1] or frame
	
	local output = {}
	table.insert(output, "<span class=\"IPA\">")
	table.insert(output, _X2IPA(text))
	table.insert(output, "</span>")
	
	return table.concat(output)
end

function p.example(frame)
	local text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1]
	if not text then error("No text provided") end
	
	local output = {}
	table.insert(output, " <code>&#123;&#123;[[mw:Manual:Substitution|subst:]][[Template:x2i|x2i]]")
	if find(text, "=") then
		table.insert(output, "&#124;1="..text)
	else
		table.insert(output, "&#124;"..text)
	end
	table.insert(output, "&#125;&#125;</code>")
	
	table.insert(output, "\n| ")
	local IPA = p.X2IPA(text)
	table.insert(output, IPA)
	
	return table.concat(output)
end

return p