Jump to content

Wikipedia:Odd links/SQL

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

This page contains information on how to regenerate the Wikipedia:Odd links report.



DROP TABLE IF EXISTS oddlinks;

CREATE TABLE oddlinks (
  title varchar(255) BINARY NOT NULL,
  link varchar(255) BINARY NOT NULL,
  from_ns int(8) unsigned NOT NULL,
  to_ns int(8) unsigned NOT NULL
);

DROP TABLE IF EXISTS namespaces;

CREATE TABLE namespaces (
  ns int(8) unsigned NOT NULL,
  prefix varchar(255) BINARY NOT NULL
);

INSERT INTO namespaces VALUES ( 0, '' );
INSERT INTO namespaces VALUES ( 1, 'Talk'  );
INSERT INTO namespaces VALUES ( 2, 'User' );
INSERT INTO namespaces VALUES ( 3, 'User Talk' );
INSERT INTO namespaces VALUES ( 4, 'Wikipedia' );
INSERT INTO namespaces VALUES ( 5, 'Wikipedia Talk' );
INSERT INTO namespaces VALUES ( 6, 'File' );
INSERT INTO namespaces VALUES ( 7, 'File Talk' );
INSERT INTO namespaces VALUES ( 8, 'MediaWiki' );
INSERT INTO namespaces VALUES ( 9, 'MediaWiki Talk' );
INSERT INTO namespaces VALUES ( 10, 'Template' );
INSERT INTO namespaces VALUES ( 11, 'Template Talk' );
INSERT INTO namespaces VALUES ( 12, 'Help' );
INSERT INTO namespaces VALUES ( 13, 'Help Talk' );
INSERT INTO namespaces VALUES ( 14, 'Category' );
INSERT INTO namespaces VALUES ( 15, 'Category Talk' );
INSERT INTO namespaces VALUES ( 100, 'Portal' );
INSERT INTO namespaces VALUES ( 101, 'Portal Talk' );

// Capture links from namespace 0 to various other namespaces
// This is done in sections as each million rows take around 3 minutes to process

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN  000000 AND 999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 1000000 AND 1999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 2000000 AND 2999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 3000000 AND 3999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 4000000 AND 4999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 5000000 AND 5999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 6000000 AND 6999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 7000000 AND 7999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 8000000 AND 8999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 9000000 AND 9999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 10000000 AND 10999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 11000000 AND 11999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 12000000 AND 12999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 13000000 AND 13999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 14000000 AND 14999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 15000000 AND 15999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 16000000 AND 16999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 17000000 AND 17999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 18000000 AND 18999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 19000000 AND 19999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 20000000 AND 20999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 21000000 AND 21999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 22000000 AND 22999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 23000000 AND 23999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 24000000 AND 24999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 25000000 AND 25999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 26000000 AND 26999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 27000000 AND 27999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 28000000 AND 28999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 29000000 AND 29999999;

// Do not consider pages with 'under construction' notices on them

DROP TABLE IF EXISTS pages_under_contruction;

CREATE TABLE pages_under_contruction
AS
   SELECT p.page_title
   FROM enwiki_p.page p
   INNER JOIN enwiki_p.templatelinks t ON p.page_id = t.tl_from
   WHERE t.tl_title IN ( 'Underconstruction','UnderConstruction','Under_construction','Under_Construction','Construction','Undercon','UnderCon' )
   AND   t.tl_namespace = 10;
   

SELECT concat( '* [[', o.title, ']] ? [[', n.prefix, ':', o.link, ']]' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
WHERE title IN ( 
   SELECT *
   FROM pages_under_contruction
);
ORDER BY n.ns ASC, o.title ASC;
   

DELETE FROM oddlinks WHERE title IN ( 
   SELECT *
   FROM pages_under_contruction
);


// Do not consider pages which link to User:CorenSearchBot
SELECT *
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';

DELETE
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';


// Pick out signaturs in articles - paired links to User: and matching User_Talk: namespaces
DROP TABLE IF EXISTS sigs;

CREATE TABLE sigs AS
SELECT o1.title, o1.link
FROM   oddlinks o1, oddlinks o2
WHERE o1.title = o2.title
AND   o1.link = o2.link
AND   o1.to_ns = 2
AND   o2.to_ns = 3;

SELECT concat( '* [[', title, ']] ? [[User:', link, ']] [[User talk:', link, ']]' )
FROM sigs
ORDER BY 1 ASC;

DELETE
FROM oddlinks
WHERE EXISTS
( SELECT *
  FROM sigs
  WHERE oddlinks.title = sigs.title
  AND oddlinks.link = sigs.link )
AND oddlinks.to_ns IN ( 2, 3 );

// Report remaining odd links

SELECT concat( '* [[', o.title, ']] ? [[', n.prefix, ':', o.link, ']]' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
ORDER BY n.ns ASC, o.title ASC;