User:YiFeiBot/wikiindex size.py

 __version__ = '$Id: NULL $'
 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8  -*-

import re

import wikipedia as pywikibot import pagegenerators from pywikibot import i18n import catlib import sys import urllib from datetime import datetime

docuReplacements = { '&params;': pagegenerators.parameterHelp }
 * 1) This is required for the text that is shown when you run this script
 * 2) with the parameter -help.

class WiSizeBot: # Edit summary message that should be used is placed on /i18n subdirectory. # The file containing these messages should have the same name as the caller # script (i.e. wikiindex-size.py in this case)

def __init__(self, generator, summary, debug=False): """       Constructor. Parameters:            @param generator: The page generator that determines on which pages                              to work.            @type generator: generator.            @param summary: Set the summary message text for the edit.            @type summary: (unicode) string.        """ self.generator = generator # init constants self.site = pywikibot.getSite("en", "wikiindex") # Set the edit summary message if summary: self.summary = summary else: self.summary = i18n.twtranslate(self.site, 'basic-changing') self.debug = debug

def run(self): for page in self.generator: self.treat(page)

def treat(self, page): """       Loads the given page, does some changes, and saves it.        """ text = self.load(page) if not text: return

newtext = text size_r = None size_r = re.compile(ur"""(?P \{\{\s*[Ss]ize\s*((\|\s*(?P pages|wiki[ _]pages)\s*=\s*(?P\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?Phttps?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P wikiFactor)\s*=\s*(?P\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P wikiFactor[ _]URL)\s*=\s*(?Phttp://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\}(\s*\([Aa]s\s*of:?\s*(?P \d+)\s*(?P [A-Z][a-z]+)\s*(?P \d+)\s*\)\s*(\)?)?)""")

wtext = page.get m = size_r.finditer(wtext) all = "" newvalues = "" for i in m:           all = i.group('all') and i.group('all').strip or '' pages = i.group('pages') and i.group('pages').strip or '' pagesurl = i.group('pagesurl') and i.group('pagesurl').strip or '' wikifactor = i.group('wikifactor') and i.group('wikifactor').strip or '' wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip or '' pages_value = i.group('pages_value') and i.group('pages_value').strip or '0' pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip or '' wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip or '' wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip or '' day = i.group('day') and i.group('day').strip or '' month = i.group('month') and i.group('month').strip or '' year = i.group('year') and i.group('year').strip or '' if self.debug: pywikibot.output(u"text = " + text) pywikibot.output(u"all = " + all) pywikibot.output(u"pages = " + pages) pywikibot.output(u"pagesurl = " + pagesurl) pywikibot.output(u"wikifactor = " + wikifactor) pywikibot.output(u"wikifactorurl = " + wikifactorurl) pywikibot.output(u"pages_value = " + pages_value) pywikibot.output(u"pagesurl_value = " + pagesurl_value) pywikibot.output(u"wikifactor_value = " + wikifactor_value) pywikibot.output(u"wikifactorurl_value = " + wikifactorurl_value) pywikibot.output(u"day = " + day) pywikibot.output(u"month = " + month) pywikibot.output(u"year = " + year)

#get new values n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value) if n:               raw = '' try: url = n[0] if url.endswith(":Statistics"): url += '?action=raw' f = urllib.urlopen(url) raw = unicode(f.read, 'utf-8') f.close except: break o = re.findall(ur"total=\d+;good=(\d+);", raw) if o:                   if o[0] and int(pages_value) != int(o[0]): self.summary = u"Robot: Updating size: %s -> %s" %    (pages_value, o[0]) pages_value = o[0] newtime = True else: break else: break else: break #end get #recalculate wikifactor pass #TODO, leave AS IS meanwhile #end recalculate """print pages, pages_value           print pagesurl, pagesurl_value            print wikifactor, wikifactor_value            print wikifactorurl, wikifactorurl_value""" if newtime: dt = datetime.date(datetime.utcnow) day = dt.strftime('%d') month = dt.strftime('%B') year = dt.strftime('%Y') newvalues = u"""(As of: %s %s %s)""" % (               pages or 'pages',                pages_value or ,                pagesurl or 'statistics URL',                pagesurl_value or ,                wikifactor or 'wikiFactor',                wikifactor_value or ,                wikifactorurl or 'wikiFactor URL',                wikifactorurl_value or ,                day or ,                month or ,                year or '') newtext = text.replace(all, newvalues) if not self.save(newtext, page, self.summary): pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))

def load(self, page): """       Loads the given page, does some changes, and saves it.        """ try: # Load the page text = page.get except pywikibot.NoPage: pywikibot.output(u"Page %s does not exist; skipping."                            % page.title(asLink=True)) except pywikibot.IsRedirectPage: pywikibot.output(u"Page %s is a redirect; skipping."                            % page.title(asLink=True)) else: return text return None

def save(self, text, page, comment=None, **kwargs): # only save if something was changed if text != page.get: # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"                            % page.title) # show what was changed pywikibot.showDiff(page.get, text) pywikibot.output(u'Comment: %s' % comment) #choice = pywikibot.inputChoice(           #    u'Do you want to accept these changes?',            #    ['Yes', 'No'], ['y', 'N'], 'N') if True: try: # Save the page page.put(text, comment=comment or self.comment, **kwargs) except pywikibot.LockedPage: pywikibot.output(u"Page %s is locked; skipping."                                    % page.title(asLink=True)) except pywikibot.EditConflict: pywikibot.output(                       u'Skipping %s because of edit conflict'                        % (page.title)) except pywikibot.SpamfilterError, error: pywikibot.output(                       u'Cannot change %s because of spam blacklist entry %s'                        % (page.title, error.url)) else: return True return False

def main: # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory # The generator gives the pages that should be worked upon. gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitleParts = [] # summary message editSummary = '' debug = False start = "!"

# Parse command line arguments for arg in pywikibot.handleArgs: if arg.startswith('-summary:'): editSummary = arg[9:] elif arg.startswith('-start:'): start = arg[7:] elif arg == '-debug': debug = True else: pywikibot.output(u'Unknown argument: %s' % arg)

cat = catlib.Category(pywikibot.getSite("en", "wikiindex"), 'Category:MediaWiki') gen = pagegenerators.CategorizedPageGenerator(cat, start=start)

#if not gen: #   gen = genFactory.getCombinedGenerator if gen: # The preloading generator is responsible for downloading multiple # pages from the wiki simultaneously. gen = pagegenerators.PreloadingGenerator(gen) bot = WiSizeBot(gen, editSummary, debug) bot.run else: pywikibot.showHelp

if __name__ == "__main__": try: main finally: pywikibot.stopme