Wikipedia:WikiProject Interlanguage Links/Scripts
Appearance
Into a mysaql database load the page and langlinks database dump files from two or more wikipedia Wikipedia:Database_download.
Extract interlanguage links into a single table for anaylysis
CREATE TABLE inter ( from_lang varchar(10), from_title varchar(255), to_lang varchar(10), to_title varchar(255) ); USE en; INSERT INTO inter.inter SELECT 'en', page.page_title, langlinks.ll_lang, langlinks.ll_title FROM page, langlinks WHERE page.page_id = langlinks.ll_from AND page.page_namespace = 0; USE de; INSERT INTO inter.inter SELECT 'de', page.page_title, langlinks.ll_lang, langlinks.ll_title FROM page, langlinks WHERE page.page_id = langlinks.ll_from AND page.page_namespace = 0; ... etc
Tidy up this table and index it:
update inter set from_title = replace( from_title, '_', ' ' ); update inter set to_title = replace( to_title, '_', ' ' ); // Clear nonsensical entries -- entries to nonsensical languages -- entries to blank titles -- entries to unlikely titles -- poss entries to nonexistant articles ? ALTER TABLE inter ADD INDEX ( from_lang, from_title ); ALTER TABLE inter ADD INDEX ( to_lang, to_title );
And extract suggested red links, ie:
CREATE TABLE suggestions ( from_lang varchar(10), from_title varchar(255), to_lang varchar(10), to_title varchar(255) ); INSERT INTO suggestions SELECT DISTINCT a.from_lang, a.from_title, b.to_lang, b.to_title FROM inter a INNER JOIN inter b ON a.to_lang = b.from_lang AND a.to_title = b.from_title LEFT JOIN inter c ON c.from_lang = a.from_lang AND c.from_title = a.from_title AND c.to_lang = b.to_lang // Not c.to_title - any link from a to c existsing is sufficient WHERE a.from_lang = 'en' AND b.to_lang != a.from_lang AND c.from_lang IS NULL; // a:x -> b:x and b:x exists and no link from b:? to a:x exists => b:x -> a:x SELECT concat( '*[[', a.to_title, ']] → [[:', a.from_lang, ':', a.from_title, ']]' ) FROM inter a INNER JOIN en.page p ON p.page_title = a.to_title AND p.page_namespace = 0 AND p.page_is_redirect = 0 LEFT JOIN inter b ON b.from_lang = a.to_lang AND b.from_title = a.to_title AND b.to_lang = a.from_lang WHERE a.from_lang = 'sv' AND a.to_lang = 'en' AND b.from_lang IS NULL;