跳转到内容

模組:Conversion rule extractor/Extractor

本页使用了标题或全文手工转换
维基百科,自由的百科全书

local Extractor = {}

local Tpv = require('Module:Template parameter value')
local Arguments = require('Module:Arguments')

local NOTE_TA_TEMPLATES = {
    'NoteTA', 'TA', 'NoteAT', 'NoteTA/default', 'NOTETA', 'Note TA', 'Noteta', 'NoteTa', 'NoteTA/lua', '全文字词转换',
    'NoteTA-lite', 'TA-lite', 'TAL', 'TAl'
}

-- 工具函数:规范化规则字符串(trim 并确保末尾有分号)
local function normalizeRuleString(rule)
    if not rule then return nil end
    rule = mw.text.trim(rule)
    if rule == '' then return nil end -- 处理 trim 后为空的情况
    -- 移除可能存在的HTML注释 <!-- --> 等干扰处理的内容,虽然通常不该出现在规则里
    rule = rule:gsub('<!%-%-.-%-%->', '')
    rule = mw.text.trim(rule)
    if rule == '' then return nil end

    -- 确保末尾有分号
    if not rule:find(';%s*$') then
        rule = rule .. ';'
    end
    return rule
end

-- 工具函数:解析单个NoteTA模板的参数
local function parseNoteTATemplate(templateWikitext)
    local rules = { titleRule = nil, manualRules = {}, groupNames = {} }
    local params = Tpv.getParameters(templateWikitext) -- 使用简化版获取参数

    -- 提取标题规则 (T)
    if params['T'] then
        local normalized = normalizeRuleString(params['T'])
        if normalized then
            rules.titleRule = normalized
        end
    end

    -- 提取手动全文规则 (数字参数 1-30)
    for i = 1, 30 do
        local key = tostring(i)
        if params[key] then
            local normalized = normalizeRuleString(params[key])
            if normalized then
                table.insert(rules.manualRules, normalized)
            end
        end
    end

    -- 提取公共转换组名称 (G1-G30)
    for i = 1, 30 do
        local key = 'G' .. i
        if params[key] then
            local groupName = mw.text.trim(params[key])
            if groupName and groupName ~= '' then
                 -- 检查是否包含逗号或分号,可能表示一个参数里有多个组名
                 for gName in mw.text.gsplit(groupName, '[;,]') do
                     gName = mw.text.trim(gName)
                     if gName ~= '' then
                         table.insert(rules.groupNames, gName)
                     end
                 end
            end
        end
    end

    return rules
end

-- 获取页面中所有NoteTA模板的规则 (调用 parseNoteTATemplate)
function Extractor.getNoteTARules(pageTitle)
    local combinedRules = { titleRule = nil, manualRules = {}, groupNames = {} }
    local titleObj = mw.title.new(pageTitle)
    if not titleObj or not titleObj.exists then
        return combinedRules
    end

    local templateIndex = 1
    while true do
        -- 尝试获取下一个 NoteTA 类模板
        local success, templateWikitext
        local currentTemplateName = ""
        for _, tplName in ipairs(NOTE_TA_TEMPLATES) do
             success, templateWikitext = Tpv.getTemplate(pageTitle, {tplName}, { template_index = templateIndex })
             if success then
                 currentTemplateName = tplName
                 break -- 找到了一个,停止内层循环
             end
        end

        -- 如果在所有 NoteTA 名称中都找不到第 templateIndex 个实例,则退出
        if not success then
             -- 如果 templateIndex 是 1 但没找到,说明页面根本没有 NoteTA
             -- if templateIndex == 1 then mw.log('No NoteTA templates found on page:', pageTitle) end
             break
        end

        -- mw.log('Found template', currentTemplateName, 'at index:', templateIndex)
        local rules = parseNoteTATemplate(templateWikitext)

        -- 合并规则
        if rules.titleRule then
            combinedRules.titleRule = rules.titleRule -- 后面的覆盖前面的 T 规则
        end
        for _, rule in ipairs(rules.manualRules) do
            table.insert(combinedRules.manualRules, rule)
        end
        for _, name in ipairs(rules.groupNames) do
            table.insert(combinedRules.groupNames, name)
        end

        templateIndex = templateIndex + 1
    end

    -- 对转换组名称去重
    local uniqueGroupNames = {}
    local groupNameSet = {}
    for _, name in ipairs(combinedRules.groupNames) do
        if not groupNameSet[name] then
            table.insert(uniqueGroupNames, name)
            groupNameSet[name] = true
        end
    end
    combinedRules.groupNames = uniqueGroupNames

    return combinedRules
end


-- 获取指定公共转换组的规则 (使用 normalizeRuleString)
function Extractor.getGroupRules(groupNames)
    local groupRules = {}
    if not groupNames or #groupNames == 0 then
        return groupRules
    end

    for _, name in ipairs(groupNames) do
        local moduleName = 'Module:CGroup/' .. name
        -- 尝试从 Module 加载
        local success, data = pcall(mw.loadData, moduleName)
        if success and data and data.content then
            for _, item in ipairs(data.content) do
                if item.type == 'item' and item.rule then
                    local normalized = normalizeRuleString(item.rule)
                    if normalized then
                        table.insert(groupRules, normalized)
                    end
                end
            end
        else
            -- mw.log('Failed to load or parse CGroup module:', moduleName, success and "No data.content" or "pcall failed")
            -- 也可以尝试加载 Template:CGroup/name,但这超出了纯Lua规则提取的范畴,暂时忽略
        end
    end
    return groupRules
end

-- 主函数:获取指定页面的所有规则(NoteTA + 公共转换组)(使用 normalizeRuleString)
function Extractor.getAllRules(pageTitle)
    local noteTARules = Extractor.getNoteTARules(pageTitle)
    local groupRules = Extractor.getGroupRules(noteTARules.groupNames)

    local allContentRules = {}
    for _, rule in ipairs(noteTARules.manualRules) do
        table.insert(allContentRules, rule)
    end
    for _, rule in ipairs(groupRules) do
        table.insert(allContentRules, rule)
    end

    -- 去重所有内容规则
    local uniqueContentRules = {}
    local contentRuleSet = {}
    for _, rule in ipairs(allContentRules) do
        if not contentRuleSet[rule] then
            table.insert(uniqueContentRules, rule)
            contentRuleSet[rule] = true
        end
    end

    local finalRules = {
        titleRule = noteTARules.titleRule,
        contentRules = uniqueContentRules
    }
    return finalRules
end

-- 导出 normalizeRuleString 供主模块可能使用(例如对用户输入进行规范化)
Extractor.normalizeRuleString = normalizeRuleString

return Extractor