Jump to content

Wikipedia:WikiProject Orphanage/Orphaned Articles/How to update

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by Docu (talk | contribs) at 17:00, 26 May 2004. The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
#Use at your own risk, no warranty implied or given
#Please debug/improve the query.

DROP TABLE IF EXISTS deorphan;
CREATE TABLE deorphan (
  l_from varchar(255) NOT NULL default '',
  l_fromid int(8) NOT NULL default '0',
  l_to int(8) NOT NULL default '0',
  KEY `l_fromid` (`l_fromid`),
  KEY `l_from` (`l_from`),
  KEY `l_to` (`l_to`)
) TYPE=MyISAM;


INSERT INTO deorphan SELECT l_from, 0, l_to
FROM links
LIMIT 10000000;


#links from namespaces other than the article namespace
#don't deorphan pages.
DELETE FROM deorphan 
WHERE l_from Like 'Talk:%'
OR l_from Like 'Wikipedia:%'
OR l_from Like 'Wikipedia_talk:%'
OR l_from Like 'User:%'
OR l_from Like 'User_talk:%'
OR l_from Like 'MediaWiki:%'
OR l_from Like 'MediaWiki_talk:%'
OR l_from Like 'Image:%'
OR l_from Like 'Image_talk:%';


UPDATE deorphan, cur
SET l_fromid = cur.cur_id
WHERE l_from = cur.cur_title;


ALTER TABLE deorphan DROP COLUMN l_from;


#links from redirects are not taken in account
DELETE 
 FROM deorphan 
USING deorphan, cur
WHERE l_fromid = cur_id
      AND cur_is_redirect = 1;


#links from disambiguation pages are not included
#i.e. a page linked only from a disambiguation page is an orphan
DELETE 
 FROM deorphan 
USING links, deorphan
WHERE (l_from LIKE 'Wikipedia:Links_to_disambiguating_pages%'
          OR l_from LIKE 'Wikipedia:Links_to_(disambiguation)_pages')
AND l_fromid = links.l_to;


#This avoids that disambiguation pages show up as orphans
INSERT INTO deorphan 
SELECT DISTINCT 999999, l_to
FROM links
WHERE
      (l_from LIKE 'Wikipedia:Links_to_disambiguating_pages%'
    OR l_from LIKE 'Wikipedia:Links_to_(disambiguation)_pages')
LIMIT 10000;


#Adds a temporary table with the orphans
DROP TABLE IF EXISTS orphans;
CREATE TABLE orphans 
SELECT cur_id 
FROM cur 
LEFT JOIN deorphan ON cur_id=l_to
WHERE l_to IS NULL 
	AND cur_namespace=0 
	AND cur_is_redirect=0
LIMIT 20000;


#Output from list (filtering some rambot stuff)
SELECT CONCAT('#[[', REPLACE (cur_title, '_', ' '), ']]') AS orphanslist
FROM cur, orphans
WHERE orphans.cur_id = cur.cur_id
AND NOT (cur_title LIKE '%(CDP)%'
	OR cur_title LIKE '%(town)%' 
	OR cur_title LIKE '%(city)%' 
	OR cur_title LIKE '%(village)%' 
	OR cur_title LIKE '%Township%')
ORDER BY Lower(cur_title)
LIMIT 20000;

Note: these queries are slow.