Jump to content

User:Xenobot/communes.py

From Wikipedia, the free encyclopedia
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/env python2.5
#version 0.6

import codecs
import re
import wikitools
import settings

wiki = wikitools.Wiki()
wiki.login(settings.username, settings.password)
print 'logged in now'
output_skip = codecs.open('output-commune.txt', 'a', 'utf-8')

f = open('may3morb.txt', 'r')
page_list = f.read().split('\n')
f.close()
print 'retrieved list of pages'

for name in page_list:
    title = unicode(name, 'utf-8')
    page = wikitools.Page(wiki, '%s' % title, followRedir=False)
    if not page.exists:
        continue

    article_text = page.getWikiText()
    if re.search(r'www.insee.fr', article_text, re.I|re.U):
        if re.search(r'\|[ ]*insee[ ]*=[ ]*(\w\w)(\w\w\w)', article_text, re.I|re.U):
            insee = re.search(r'\|[ ]*insee[ ]*=[ ]*(\w\w)(\w\w\w)', article_text, re.I|re.U)
            if not re.search(r'''
==[ ]*References[ ]*==
.*based on the article.*
.*asso.fr.*
.*insee.fr.*
.*www.ign.fr.*''', article_text, re.I|re.U):
                print 'couldn\'t find ref header; skipping %s' % title
                output_skip.write(title + '  --  no ref header found\n')
                output_skip.flush()
                continue
            new_text = re.compile(r'''
==[ ]*References[ ]*==
.*based on the article.*
.*asso.fr.*
.*insee.fr.*
.*www.ign.fr.*''', re.I|re.U).sub('''
== References ==
* [http://www.maires56.asso.fr Mayors of Morbihan Association] {{fr icon}}
* [http://www.insee.fr/fr/methodes/nomenclatures/cog/fichecommunale.asp?codedep=%s&codecom=%s INSEE commune file]''' % (insee.group(1), insee.group(2)), article_text)
            page.edit(new_text, summary=settings.editsumm, bot=1)
            print 'Editing %s' % title
        else:
            print 'couldn\'t find insee; skipping %s' % title
            output_skip.write(title + '  --  no insee param\n')
            output_skip.flush()
            continue
    else:
        print 'Skipping %s' % title
        output_skip.write(title + '  --  no insee.fr url\n')
        output_skip.flush()

output_skip.close()