Module:Ancient Greek
Appearance
![]() | This Lua module is used on approximately 650 pages and changes may be widely noticed. Test changes in the module's /sandbox or /testcases subpages, or in your own module sandbox. Consider discussing changes on the talk page before implementing them. |
Usage
This module transliterates Ancient Greek text. It is based on an old version of the Ancient Greek transliteration module on Wiktionary, with minor modifications to make it callable through a template.
{{#invoke:Ancient Greek|translit|οἷος}}
- function
The code below uses the basic string functions (for instance, str:gsub(...)
) when possible. Ustring functions have to be used when patterns contain sets with multiple-byte characters (for instance, "[αΑ]"
), or quantifiers that act on multiple-byte characters ("α+"
). And they must be used to correctly get a substring of the ith to the jth Unicode character. In other situations, basic string functions can be used, and are preferred for efficiency's sake, as they don't have to parse the string into codepoints before operating on it.
local p = {}
local m_table = require "Module:TableTools"
local checkType = require "libraryUtil".checkType
local ustring = mw.ustring
local U = ustring.char
local len = ustring.len
local sub = ustring.sub
local find = ustring.find
local gsub = ustring.gsub
local decompose = ustring.toNFD
local lower = ustring.lower
local upper = ustring.upper
local macron = U(0x304)
local breve = U(0x306)
local rough = U(0x314)
local smooth = U(0x313)
local diaeresis = U(0x308)
local acute = U(0x301)
local grave = U(0x300)
local circumflex = U(0x342)
local Latin_circumflex = U(0x302)
local subscript = U(0x345)
local i_diphthong = "[ΑΕΗΟΥΩαεηουω][Ιι]"
local u_diphthong = "[ΑΕΗΟΩαεηοω][Υυ]"
local macron_circumflex = macron .. diaeresis .. '?' .. Latin_circumflex
local is_velar = { ['κ'] = true, ['γ'] = true, ['χ'] = true, ['ξ'] = true, }
local is_diacritic = {
[macron] = true, [breve] = true,
[rough] = true, [smooth] = true, [diaeresis] = true,
[acute] = true, [grave] = true, [circumflex] = true,
[subscript] = true,
}
local function _check(funcName)
return function(argIndex, arg, expectType, nilOk)
return checkType(funcName, argIndex, arg, expectType, nilOk)
end
end
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "e" .. macron,
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "o" .. macron,
-- Consonants
["β"] = "b",
["γ"] = "g",
["δ"] = "d",
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "x",
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "ph",
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters
["ϝ"] = "w",
["ϻ"] = "ś",
["ϙ"] = "q",
["ϡ"] = "š",
["ͷ"] = "v",
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
[breve] = '',
[smooth] = '',
[rough] = '',
[circumflex] = Latin_circumflex,
[subscript] = 'i',
}
local function is_diphthong(chars, next_chars)
return (find(chars, '^' .. i_diphthong .. '$')
or find(chars, '^' .. u_diphthong .. '$') )
and not find(next_chars, "^[" .. macron .. breve .. "]?" .. diaeresis)
end
-- Concatenates a character onto an existing table value, or inserts it,
-- then removes it from the text.
local function add(list, index, chars, text)
if not chars then
error("The function add cannot act on a nil character.")
end
if list[index] then
list[index] = list[index] .. chars
else
list[index] = chars
end
return sub(text, len(chars) + 1)
end
--[=[
This breaks a word into meaningful "tokens", which are
individual letters or diphthongs with their diacritics.
Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]].
--]=]
local function tokenize(text)
local tokens = {}
local i = 0
while len(text) > 0 do
local char = sub(text, 1, 1) or ""
local chars = sub(text, 1, 2) or ""
local next_chars = sub(text, 3, 4) or ""
-- Look for a diacritic and add it to the current token. Remove it from the text.
if is_diacritic[char] then
text = add(tokens, i, char, text)
--[[ See if the next two characters form a diphthong and if so,
add them to the current token. Remove them from the text.
If there's a diaeresis, it will be immediately after
the second of the two characters, or after a macron or breve. ]]
elseif is_diphthong(chars, next_chars) then
i = i + 1
text = add(tokens, i, chars, text)
else
-- Add the current character to the next token. Remove it from the text.
i = i + 1
text = add(tokens, i, char, text)
end
end
tokens.maxindex = i
return m_table.compressSparseArray(tokens)
end
function p.transliterate(text, lang, sc)
text = decompose(text)
--[[
if text == '῾' then
return 'h'
end
--]]
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
--]]
text = gsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
text = gsub(text, "·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- substitute each character in the token for its transliteration
local translit = gsub(
mw.ustring.lower(token),
'.',
tt
)
local next_token = tokens[i + 1]
if token == 'γ' and next_token and is_velar[next_token] then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif find(token, '^[αΑ].*' .. subscript .. '$') then
-- add macron to ᾳ
translit = gsub(translit, '([aA])', '%1' .. macron)
end
if find(token, rough) then
if find(token, '[Ρρ]') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if find(translit, macron_circumflex) then
translit = gsub(translit, macron, '')
end
-- Capitalize first character of transliteration.
if token ~= lower(token) then
translit = gsub(translit, "^.", upper)
end
table.insert(output, translit)
end
return table.concat(output)
end
function p.translit(frame)
local args = mw.getCurrentFrame():getParent().args
local text = frame.args[1] or args[1]
local transliteration = p.transliterate(text)
if transliteration and not error then
return '<span title="Ancient Greek transliteration" class="Unicode" style="white-space:normal; text-decoration: none"><i>' .. transliteration .. '</i></span>'
else
return error
end
end
return p