User:What7what8/test.js
外观
注意:保存之后,你必须清除浏览器缓存才能看到做出的更改。Google Chrome、Firefox、Microsoft Edge及Safari:按住⇧ Shift键并单击工具栏的“刷新”按钮。参阅Help:绕过浏览器缓存以获取更多帮助。
$.when(
mw.loader.getScript( "https://cdn.jsdelivr.net/npm/opencc-js@1.0.5/dist/umd/full.js"),
mw.loader.getScript( "https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.min.js"),
mw.loader.getScript( "https://unpkg.com/mammoth@1.4.8/mammoth.browser.min.js"),
mw.loader.getScript( "https://cdnjs.cloudflare.com/ajax/libs/jschardet/1.4.1/jschardet.min.js"),
mw.loader.getScript( "https://cdnjs.cloudflare.com/ajax/libs/jsdiff/7.0.0/diff.min.js"),
).then(
() => {
// 初始化配置
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.worker.min.js';
const cc = new OpenCC('t2s');
const visitedUrl = [];
const headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0' };
async function fetchWikiContent() {
try {
const bodyContent = $('#bodyContent')[0];
// 提取所有外部链接
const links = Array.from(bodyContent.querySelectorAll('a[href]'))
.map(a => a.href)
.filter(href =>
href.startsWith('http') &&
!href.includes('wikipedia.org')
);
const uniqueLinks = [...new Set(links)];
// 清理不需要的元素
bodyContent.querySelectorAll('cite').forEach(cite => cite.remove());
bodyContent.querySelectorAll('.references').forEach(ref => ref.remove());
// 提取文本内容
const rawText = bodyContent.textContent;
return { rawText, links: uniqueLinks };
} catch (error) {
console.error('获取维基内容失败:', error);
return { rawText: '', links: [] };
}
}
function processContent(text) {
return cc.convert(text).trim();
}
function getMatches(source, target) {
const cleanTarget = target.replace(/\s/g, '');
const diffs = Diff.diffChars(source, cleanTarget);
let currentMatch = '';
const matches = [];
diffs.forEach((part) => {
if (part.added || part.removed) {
if (currentMatch.length > 12) {
matches.push(currentMatch);
}
currentMatch = '';
} else {
currentMatch += part.value;
}
});
// 处理最后一个匹配
if (currentMatch.length > 12) {
matches.push(currentMatch);
}
return matches;
}
async function processSource(url) {
if (visitedUrl.includes(url)) return [];
visitedUrl.push(url);
try {
const response = await fetch(url, { headers });
if (!response.ok) throw new Error(`HTTP错误 ${response.status}`);
const contentType = response.headers.get('Content-Type');
const buffer = await response.arrayBuffer();
let text = '';
// 处理PDF文档
if (/application\/pdf/i.test(contentType)) {
const pdf = await pdfjsLib.getDocument({ data: buffer }).promise;
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
text += content.items.map(item => item.str).join(' ');
}
}
// 处理Word文档
else if (/application\/(msword|vnd.openxmlformats-officedocument)/i.test(contentType)) {
const result = await mammoth.extractRawText({ arrayBuffer: buffer });
text = result.value;
}
// 处理其他文本类型
else {
const arr = new Uint8Array(buffer);
const detection = jschardet.detect(arr);
const encoding = detection.confidence > 0.6 ? detection.encoding : 'utf-8';
try {
const decoder = new TextDecoder(encoding, { fatal: true });
text = decoder.decode(arr);
} catch {
const decoder = new TextDecoder('utf-8', { fatal: false });
text = decoder.decode(arr);
}
// 如果是HTML则提取正文
if (/text\/html/i.test(contentType)) {
const parser = new DOMParser();
const doc = parser.parseFromString(text, 'text/html');
text = doc.body.textContent;
}
}
return [processContent(text)];
} catch (error) {
console.error(`处理链接失败 [${url}]:`, error);
return [];
}
}
async function copyvio(){
// 获取维基内容
const { rawText, links } = await fetchWikiContent();
const processedWiki = processContent(rawText);
// 处理所有外部链接
const results = {};
for (const link of links) {
const contents = await processSource(link);
for (const content of contents) {
const matches = getMatches(processedWiki, content);
if (matches.length) {
results[link] = [...new Set([
...(results[link] || []),
...matches
])];
}
}
}
// 生成输出结果
let output = '查重结果:\n\n';
for (const [url, matches] of Object.entries(results)) {
output += `URL: ${url}\n匹配内容(${matches.length}条):\n`;
matches.forEach((match, i) => {
output += `${i + 1}. ${match.substring(0, 100)}...\n`;
});
output += '\n';
}
// 显示并下载结果
console.log(output);
const blob = new Blob([output], { type: 'text/plain;charset=utf-8' });
const a = document.createElement('a');
a.href = URL.createObjectURL(blob);
a.download = '查重结果.txt';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
}
// 启动程序
//copyvio().catch(console.error);
},
( e ) => {
// A script failed, and is not available
mw.log.error( e.message ); // => "Failed to load script"
}
);