Jump to content

Module:Unihan pronunciation

From Wikipedia, the free encyclopedia
This is the current revision of this page, as edited by Northern Moonlight (talk | contribs) at 02:26, 28 April 2025 (Good enough). The present address (URL) is a permanent link to this version.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)

local p = {}
local unicode_data = require("Module:Unicode data")

local passthrough = {
    [","] = true,
    ["-"] = true,
    [":"] = true
}

-- Helper: format a syllable, optionally putting tone as superscript
local function format_syllable(syllable, tone_mode)
    if tone_mode == "sup" and syllable then
        -- Move the last character (tone number) into <sup>
        local base = syllable:sub(1, -2)
        local tone = syllable:sub(-1)
        return base .. "<sup>" .. tone .. "</sup>"
    else
        return syllable
    end
end

--- @param frame table
function p.kCantonese(frame)
    local text = frame.args[1]
    local tone_mode = frame:getParent().args["tone"]
    local result = {}

    for char in mw.text.gsplit(text, "", true) do
        if char == " " then
            -- ignore whitespace
        elseif passthrough[char] then
            table.insert(result, char)
        else
            local code = mw.ustring.codepoint(char)
            local syllable = unicode_data.lookup_kCantonese(code)
            table.insert(result, format_syllable(syllable, tone_mode))
        end
    end

    return table.concat(result, " ")
end

return p