Jump to content

User:SQLBot/Readref.php

From Wikipedia, the free encyclopedia
This is an old revision of this page, as edited by SQL (talk | contribs) at 13:25, 27 August 2008 (+script). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
<?php
$fIn = fopen("test.txt", "r");
$fOut = fopen("refres.txt", "w");

$refs = "/(<ref |<ref>)/i";
$reflist = "/(\{\{(reflist|reference|refs|footnotes)|<references)/i";

function checkArticle( $text ) {
        global $refs, $reflist;
        $text = html_entity_decode( $text );
#       echo "$text\n";
        if( stripos( $text, "#REDIRECT" ) !== FALSE ) {
                return( FALSE );
        }
        $hasRef = preg_match($refs, $text, $mRefs);
        $hasRefList = preg_match($reflist, $text, $mRefList);

        if( isset( $mRefs[1] ) && !isset( $mRefList[1] ) ) {
                return( TRUE );
        }
}
$num = 0;
$ok = 0;
$prob = 0;
while( !feof( $fIn ) ) {
        $fLine = fgets( $fIn );
        $fLine = rtrim( ltrim( $fLine ) );
        $mTitleF = preg_match("/\<title\>(.*)\<\/title\>/i", $fLine, $mTitle);
        if($mTitleF) {
                echo "$num [$ok / $prob]: Checking $mTitle[1]... ";
                $title = $mTitle[1];
        }
        unset( $mStartTextFound );
        unset( $mEndTextFound );
        $mStartTextFound = strpos($fLine, "<text");
        $mEndTextFound = strpos($fLine, "</text>");
        if( $mStartTextFound !== FALSE && $mEndTextFound !== FALSE ) {
                preg_match("/\<text xml\:space\=\"preserve\">(.*)\<\/text\>/i", $fLine, $mText);
                echo " Got text...";
                if( !checkArticle( $mText[1] ) ) {
                        echo " No problems!\n";
                        $ok++;
                } else {
                        echo " Problem!\n";
                        fwrite($fOut, "$title\n");
                        $prob++;
                }
                $num++;
        } else if ( $mStartTextFound !== FALSE ) {
                unset( $mEndFound );
                unset( $aText );
                $aText = $fLine;
                while( !$mEndFound ) {
                        $fLine = fgets( $fIn );
                        $fLine = rtrim( ltrim( $fLine ) );
                        $aText = $aText . $fLine;
                        $mEndFound = strpos($fLine, "</text>");
                }
                echo " End Found... ";
#               if( strlen( $aText ) == 0 ) { die("\n\nERROR: Zero length aText?!\n\n"); }
                preg_match("/\<text xml\:space\=\"preserve\">(.*)\<\/text\>/i", $aText, $mText);
                echo " Got Text... ";
#               if( strlen( $mText[1] ) == 0 ) { die("\n\nERROR: Zero length mText?!\n\n"); }
                if( !checkArticle( $mText[1] ) ) {
                        echo " No problems!\n";
                        $ok++;
                } else {
                        echo " Problem!\n";
                        fwrite($fOut, "$title\n");
                        $prob++;
                }
                $num++;
        } else {
        }
}

fclose( $fIn );
fclose( $fOut );
?>