跳转到内容

模組:Conversion rule extractor

本页使用了标题或全文手工转换
维基百科,自由的百科全书

这是本页的一个历史版本,由PexEric留言 | 贡献2025年5月3日 (六) 15:52编辑。这可能和当前版本存在着巨大的差异。

local p = {}
local TPV = require('Module:Template parameter value')
local Arguments = require('Module:Arguments') -- 用于模板入口点
local mw_text = require('mw.text') -- 用于分割和裁剪
local mw_html = require('mw.html') -- 用于创建 HTML 元素

-- 以下模板的重定向检索于2025-05-03
local NOTE_TA_TEMPLATES = {
    'NoteTA', 'TA', 'NoteAT', 'NoteTA/default', 'NOTETA', 'Note TA', 'Noteta', 'NoteTa', 'NoteTA/lua', '全文字词转换',
    'NoteTA-lite', 'TA-lite', 'TAL', 'TAl'
}

local MAX_LOCAL_RULES = 30
local MAX_GROUP_RULES = 30

--[[--------------------------< 辅助函数 - CGroup 处理 >--------------------------]]
-- (loadCGroupData, extractRulesFromCGroupData 函数保持不变)
local function loadCGroupData(groupName)
    local moduleTitleStr = 'Module:CGroup/' .. groupName
    local success, data
    local titleObj = mw.title.new(moduleTitleStr)
    if titleObj and titleObj.exists then
        success, data = pcall(mw.loadData, moduleTitleStr)
        if success and type(data) == 'table' then
            return data
        else
            mw.log('ConversionRuleExtractor: Failed to load or parse CGroup module: ' .. moduleTitleStr .. (success and ' (invalid data type)' or ' (load error)'))
            return nil
        end
    end
    return nil
end

local function extractRulesFromCGroupData(data)
    local rules = {}
    if data and data.content and type(data.content) == 'table' then
        for _, item in ipairs(data.content) do
            if type(item) == 'table' and item.type == 'item' and type(item.rule) == 'string' and item.rule ~= '' then
                table.insert(rules, item.rule)
            end
        end
    end
    return rules
end


--[[--------------------------< 辅助函数 - 规则解析与过滤 >--------------------------]]
-- (extractSourceTerms, filterGroupRulesByText 函数保持不变)
local function extractSourceTerms(ruleString)
    local sources = {}
    local rulePart = ruleString
    local arrowPos = string.find(rulePart, '=>', 1, true)
    if arrowPos then
        rulePart = mw_text.trim(string.sub(rulePart, 1, arrowPos - 1))
    end
    for segment in mw_text.gsplit(rulePart, ';') do
        segment = mw_text.trim(segment)
        if segment ~= '' then
            local term
            local colonPos = string.find(segment, ':', 1, true)
            if colonPos then
                term = mw_text.trim(string.sub(segment, colonPos + 1))
            else
                term = segment
            end
            if term ~= '' then
                term = mw_text.strip(term)
                if term ~= '' then
                    table.insert(sources, term)
                end
            end
        end
    end
    return sources
end

local function filterGroupRulesByText(groupRules, textToMatch)
    if not textToMatch or textToMatch == '' or not groupRules or #groupRules == 0 then
        return {}
    end
    local filteredRules = {}
    local cleanTextToMatch = mw_text.strip(textToMatch)
    if cleanTextToMatch == '' then return {} end

    for _, ruleString in ipairs(groupRules) do
        local sourceTerms = extractSourceTerms(ruleString)
        local matched = false
        for _, term in ipairs(sourceTerms) do
            if string.find(cleanTextToMatch, term, 1, true) then
                matched = true
                break
            end
        end
        if matched then
            table.insert(filteredRules, ruleString)
        end
    end
    return filteredRules
end


--[[--------------------------< 辅助函数 - 格式化输出 >--------------------------]]
-- (formatRules 函数保持不变)
local function formatRules(rules, flag)
    if not rules or #rules == 0 then
        return ''
    end
    flag = (type(flag) == 'string' and flag ~= '') and flag or 'H'
    if flag == 'raw' then
        return table.concat(rules, "\n")
    else
        local wrapped_rules = {}
        for _, rule in ipairs(rules) do
            table.insert(wrapped_rules, "-{" .. flag .. "|" .. rule .. "}-")
        end
        return table.concat(wrapped_rules, "")
    end
end


--[[--------------------------< 核心获取函数 >--------------------------]]
-- (_internal_fetchAllRules 函数保持不变)
function p._internal_fetchAllRules(pageTitleString)
    local allRules = {
        title = nil,
        ['local'] = {},
        groups = {}
    }
    local foundTemplate = false
    local titleObjCheck = mw.title.new(pageTitleString) -- 仅用于日志记录标题有效性
    if not titleObjCheck then
         mw.log('ConversionRuleExtractor: Invalid page title provided for fetching rules: ' .. pageTitleString)
         -- 即使标题无效,仍尝试获取参数,因为 TPV 可能处理相对路径等
         -- 但标记一下问题
    end

    -- 1. 获取标题规则 (T)
    local success_t, titleRule = TPV.getParameter(pageTitleString, NOTE_TA_TEMPLATES, 'T')
    if success_t and titleRule and titleRule ~= '' then
        allRules.title = titleRule
        foundTemplate = true
    end

    -- 2. 获取本地规则 (1..MAX_LOCAL_RULES)
    for i = 1, MAX_LOCAL_RULES do
        local paramName = tostring(i)
        local success_l, localRule = TPV.getParameter(pageTitleString, NOTE_TA_TEMPLATES, paramName)
        if success_l and localRule and localRule ~= '' then
            table.insert(allRules['local'], localRule)
            foundTemplate = true
        elseif not success_l then
             if localRule == "No valid template found" and not foundTemplate then
                 if i == 1 then return nil end
             end
             break -- 优化
        end
    end

    -- 3. 获取组规则 (G1..MAX_GROUP_RULES)
    for i = 1, MAX_GROUP_RULES do
        local paramName = 'G' .. i
        local success_g, groupName = TPV.getParameter(pageTitleString, NOTE_TA_TEMPLATES, paramName)
        if success_g and groupName and groupName ~= '' then
            foundTemplate = true
            local groupData = loadCGroupData(groupName)
            if groupData then
                 local rulesFromGroup = extractRulesFromCGroupData(groupData)
                 if #rulesFromGroup > 0 then
                     table.insert(allRules.groups, { name = groupName, rules = rulesFromGroup })
                 end
            end
        elseif not success_g then
             if groupName == "No valid template found" and not foundTemplate then
                 if i == 1 then return nil end
             end
             break -- 优化
        end
    end

    if foundTemplate then
        return allRules
    else
        return nil
    end
end


--[[--------------------------< 公开函数 >--------------------------]]
-- (getFullTextRules 函数保持不变)
function p.getFullTextRules(pageTitle, flag)
    local pageTitleString
    if type(pageTitle) == 'string' then
        pageTitleString = pageTitle
    elseif type(pageTitle) == 'userdata' and getmetatable(pageTitle) == 'mw.title' then
         pageTitleString = pageTitle.fullText
    else
        return ''
    end

    local allRulesData = p._internal_fetchAllRules(pageTitleString)
    if not allRulesData then
        return ''
    end

    local combinedRules = {}
    if allRulesData['local'] then
        for _, rule in ipairs(allRulesData['local']) do table.insert(combinedRules, rule) end
    end
    if allRulesData.groups then
        for _, groupInfo in ipairs(allRulesData.groups) do
            if groupInfo.rules then
                for _, rule in ipairs(groupInfo.rules) do table.insert(combinedRules, rule) end
            end
        end
    end

    return formatRules(combinedRules, flag)
end

--[[
-- @description 获取标题转换规则... (函数描述不变)
--]]
function p.getTitleRules(pageTitle, flag, outputType, frame)
    local pageTitleString
    local titleObj -- 声明 titleObj

    -- 检查并创建 titleObj
    if type(pageTitle) == 'string' then
        pageTitleString = pageTitle
        local success_create, result_or_err = pcall(mw.title.new, pageTitleString)
        -- 修正:严格检查 pcall 成功且返回的是 userdata
        if not success_create or type(result_or_err) ~= 'userdata' then
            mw.log('ConversionRuleExtractor: Failed to create title object or invalid type returned for string: "' .. pageTitleString .. '". Error/Result: ' .. tostring(result_or_err))
            -- 如果是 context 模式,无法获取原标题,返回错误提示
            -- 如果是格式化模式,没有标题无法过滤,返回空
            return (outputType == 'context') and mw_html.create('span'):addClass('error'):wikitext('无法创建标题对象以获取文本。'):allToString() or ''
        else
            titleObj = result_or_err -- 赋值 titleObj
        end
    elseif type(pageTitle) == 'userdata' and getmetatable(pageTitle) == 'mw.title' then
         titleObj = pageTitle -- 直接使用传入的对象
         pageTitleString = titleObj.fullText -- 获取对应的字符串
    else
        -- 输入类型无效
        return (outputType == 'context') and mw_html.create('span'):addClass('error'):wikitext('无效的页面标题输入类型。'):allToString() or ''
    end

    -- 到这里,titleObj 应该是一个有效的 mw.title 对象
    -- 添加一个最终的保险检查(理论上不应触发)
    if not titleObj or type(titleObj.getText) ~= 'function' then
         mw.log('ConversionRuleExtractor: CRITICAL: titleObj is invalid or missing getText right before use. Page: ' .. (pageTitleString or 'N/A'))
         return (outputType == 'context') and mw_html.create('span'):addClass('error'):wikitext('内部错误:无法获取标题文本。'):allToString() or ''
    end

    -- 现在可以安全地调用 getText
    local titleText = titleObj:getText()

    -- 获取规则数据
    local allRulesData = p._internal_fetchAllRules(pageTitleString) -- 使用 pageTitleString 获取规则

    -- 如果没有规则数据
    if not allRulesData then
        return (outputType == 'context') and titleText or '' -- context 模式返回原标题,否则空
    end

    -- 处理 outputType = 'context'
    if outputType == 'context' then
        if not frame then
           return mw_html.create('span'):addClass('error'):wikitext("错误:type='context' 需要 frame 对象。"):allToString()
        end
        if allRulesData.title and allRulesData.title ~= '' then
            local rule_wikitext = "-{T|" .. allRulesData.title .. "}-"
            local success_preprocess, result = pcall(frame.preprocess, frame, rule_wikitext)
            if success_preprocess then
                return result
            else
                mw.log('ConversionRuleExtractor: frame:preprocess failed for T rule on page ' .. pageTitleString .. ': ' .. tostring(result))
                return mw_html.create('span'):addClass('error'):wikitext("处理T规则时出错。"):allToString()
            end
        else
            local applicableRules = {}
            if allRulesData['local'] then
                for _, rule in ipairs(allRulesData['local']) do table.insert(applicableRules, rule) end
            end
            local allGroupRules = {}
            if allRulesData.groups then
                for _, groupInfo in ipairs(allRulesData.groups) do
                    if groupInfo.rules then
                        for _, rule in ipairs(groupInfo.rules) do table.insert(allGroupRules, rule) end
                    end
                end
            end
            local filteredGroupRules = filterGroupRulesByText(allGroupRules, titleText)
            for _, rule in ipairs(filteredGroupRules) do table.insert(applicableRules, rule) end

            if #applicableRules == 0 then
                return titleText
            else
                local rules_wikitext = formatRules(applicableRules, 'H')
                local success_preprocess, result = pcall(frame.preprocess, frame, rules_wikitext .. titleText)
                 if success_preprocess then
                    return result
                else
                    mw.log('ConversionRuleExtractor: frame:preprocess failed for H rules on page ' .. pageTitleString .. ': ' .. tostring(result))
                    return mw_html.create('span'):addClass('error'):wikitext("应用H规则时出错。"):allToString()
                end
            end
        end
    else
        -- 处理格式化输出 (非 context)
        local combinedRules = {}
        if allRulesData.title and allRulesData.title ~= '' then
             table.insert(combinedRules, allRulesData.title)
        end
        if allRulesData['local'] then
            for _, rule in ipairs(allRulesData['local']) do table.insert(combinedRules, rule) end
        end
        local allGroupRules = {}
        if allRulesData.groups then
            for _, groupInfo in ipairs(allRulesData.groups) do
                if groupInfo.rules then
                    for _, rule in ipairs(groupInfo.rules) do table.insert(allGroupRules, rule) end
                end
            end
        end
        -- 需要 titleText 来过滤组规则
        local filteredGroupRules = filterGroupRulesByText(allGroupRules, titleText)
        for _, rule in ipairs(filteredGroupRules) do table.insert(combinedRules, rule) end

        return formatRules(combinedRules, flag)
    end
end


--[[--------------------------< 模板入口点 >--------------------------]]
-- (模板入口点 p.getFullText 和 p.getTitle 保持不变)
function p.getFullText(frame)
    local args = Arguments.getArgs(frame)
    local page = args[1] or args.page
    local flag = args.flag or 'H'

    if not page or page == '' then
        return mw_html.create('span')
                :addClass('error')
                :wikitext("错误:必须提供页面标题。"):allToString()
    end
    local result = p.getFullTextRules(page, flag)
    return result
end

function p.getTitle(frame)
    local args = Arguments.getArgs(frame)
    local page = args[1] or args.page
    local flag = args.flag
    local outputType = args.type

    if not page or page == '' then
         return mw_html.create('span')
                :addClass('error')
                :wikitext("错误:必须提供页面标题。"):allToString()
    end
    local result = p.getTitleRules(page, flag, outputType, frame)
    return result
end

return p