Jump to content

Module:Diacritics

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by RexxS (talk | contribs) at 11:24, 17 November 2018 (document & return). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.

--[[
stripDiacrits replaces accented characters with their simplest equivalent.
strip_diacrits is available for export to other modules.
--]]

local p = {}

local chars = {
	a = { 'á', 'à', 'â', 'ä', 'ǎ', 'ă', 'ā', 'ã', 'å', 'ą' },
	c = { 'ć', 'ċ', 'ĉ', 'č', 'ç' },
	d = { 'ď', 'đ', 'ḍ', 'ð' },
	e = { 'é', 'è', 'ė', 'ê', 'ë', 'ě', 'ĕ', 'ē', 'ẽ', 'ę', 'ẹ' },
	f = { 'ġ', 'ĝ', 'ğ', 'ģ' },
	g = { 'ĥ', 'ħ', 'ḥ' },
	i = { 'ı', 'í', 'ì', 'î', 'ï', 'ǐ', 'ĭ', 'ī', 'ĩ', 'į' },
	j = { 'ĵ' },
	k = { 'ķ' },
	l = { 'ĺ', 'ŀ', 'ľ', 'ļ', 'ł', 'ḷ', 'ḹ' },
	m = { 'ṃ' },
	n = { 'ń', 'ň', 'ñ', 'ņ', 'ṇ', 'ŋ' },
	o = { 'ó', 'ò', 'ô', 'ö', 'ǒ', 'ŏ', 'ō', 'õ', 'ǫ', 'ọ', 'ő', 'ø' },
	r = { 'ŕ', 'ř', 'ŗ', 'ṛ', 'ṝ' },
	s = { 'ś', 'ŝ', 'š', 'ş', 'ș', 'ṣ' },
	ss = { 'ß' },
	t = { 'ť', 'ţ', 'ț', 'ṭ' },
	u = { 'ú', 'ù', 'û', 'ü', 'ǔ', 'ŭ', 'ū', 'ũ', 'ů', 'ų', 'ụ', 'ű', 'ǘ', 'ǜ', 'ǚ', 'ǖ' },
	w = { 'ŵ' },
	y = { 'ý', 'ŷ', 'ÿ', 'ỹ', 'ȳ' },
	z = { 'ź', 'ż', 'ž' },
}

local char_idx = {}
for k1, v1 in pairs(chars) do
	for k2, v2 in pairs(v1) do
		char_idx[v2] = v1
	end
end

p.strip_diacrits = function(wrd)
	if not wrd or wrd == "" then return "" end
	for ch in wrd:gmatch(".") do
		if char_idx[ch] then
			wrd = wrd:gsub(ch, char_idx[ch])
		end
	end
	return wrd
end

p.stripDiacrits = function(frame)
	return p.strip_diacrits(frame.args.word or mw.text.trm(frame.args[1]))
end

return p