User:YiFeiBot/wikiindex size.py: Difference between revisions
Jump to navigation
Jump to search
Zhuyifei1999 (talk | contribs) (highlight) |
Zhuyifei1999 (talk | contribs) (readable) |
||
(13 intermediate revisions by the same user not shown) | |||
Line 2: | Line 2: | ||
<pre style="overflow-x:scroll; overflow-y:hidden"> | <pre style="overflow-x:scroll; overflow-y:hidden"> | ||
#!/usr/bin/python | #!/usr/bin/python | ||
# -*- coding: utf-8 -*- | # -*- coding: utf-8 -*- | ||
# | |||
__version__ = '$Id: NULL $' | |||
# | |||
import re | |||
import wikipedia as pywikibot | |||
import pagegenerators | |||
from pywikibot import i18n | |||
import catlib | import catlib | ||
import sys | import sys | ||
import urllib | import urllib | ||
import | from datetime import datetime | ||
# This is required for the text that is shown when you run this script | |||
# with the parameter -help. | |||
docuReplacements = { | |||
'¶ms;': pagegenerators.parameterHelp | |||
} | |||
class WiSizeBot: | |||
# Edit summary message that should be used is placed on /i18n subdirectory. | |||
# The file containing these messages should have the same name as the caller | |||
# script (i.e. wikiindex-size.py in this case) | |||
""" | def __init__(self, generator, summary, debug=False): | ||
""" | |||
Constructor. Parameters: | |||
@param generator: The page generator that determines on which pages | |||
to work. | |||
@type generator: generator. | |||
@param summary: Set the summary message text for the edit. | |||
@type summary: (unicode) string. | |||
""" | |||
self.generator = generator | |||
# init constants | |||
self.site = pywikibot.getSite("en", "wikiindex") | |||
# Set the edit summary message | |||
if summary: | |||
self.summary = summary | |||
else: | |||
self.summary = i18n.twtranslate(self.site, 'basic-changing') | |||
self.debug = debug | |||
def run(self): | |||
for page in self.generator: | |||
self.treat(page) | |||
def treat(self, page): | |||
""" | |||
Loads the given page, does some changes, and saves it. | |||
""" | """ | ||
text = self.load(page) | |||
if not text: | |||
return | |||
""" | newtext = text | ||
{{ | size_r = None | ||
|pages= < | size_r = re.compile(ur"""(?P<all>\{\{\s*[Ss]ize\s*((\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\}(\s*\([Aa]s\s*of:?\s*(?P<day>\d+)\s*(?P<month>[A-Z][a-z]+)\s*(?P<year>\d+)\s*\)\s*(\<!--[ A-Za-z0-9/]+--\>)?)?)""") | ||
|statistics URL= | |||
|wikiFactor URL= | |||
}}( | |||
""" | |||
size_r | wtext = page.get() | ||
m = size_r.finditer(wtext) | |||
all = "" | |||
newvalues = "" | |||
for i in m: | |||
all = i.group('all') and i.group('all').strip() or '' | |||
pages = i.group('pages') and i.group('pages').strip() or '' | |||
pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or '' | |||
wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or '' | |||
wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or '' | |||
pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0' | |||
pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or '' | |||
wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or '' | |||
wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or '' | |||
day = i.group('day') and i.group('day').strip() or '' | |||
month = i.group('month') and i.group('month').strip() or '' | |||
year = i.group('year') and i.group('year').strip() or '' | |||
if self.debug: | |||
pywikibot.output(u"text = " + text) | |||
pywikibot.output(u"all = " + all) | |||
pywikibot.output(u"pages = " + pages) | |||
pywikibot.output(u"pagesurl = " + pagesurl) | |||
pywikibot.output(u"wikifactor = " + wikifactor) | |||
pywikibot.output(u"wikifactorurl = " + wikifactorurl) | |||
pywikibot.output(u"pages_value = " + pages_value) | |||
pywikibot.output(u"pagesurl_value = " + pagesurl_value) | |||
pywikibot.output(u"wikifactor_value = " + wikifactor_value) | |||
pywikibot.output(u"wikifactorurl_value = " + wikifactorurl_value) | |||
pywikibot.output(u"day = " + day) | |||
pywikibot.output(u"month = " + month) | |||
pywikibot.output(u"year = " + year) | |||
#get new values | |||
n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value) | |||
if n: | |||
raw = '' | |||
try: | |||
url = n[0] | |||
if url.endswith(":Statistics"): | |||
url += '?action=raw' | |||
f = urllib.urlopen(url) | |||
raw = unicode(f.read(), 'utf-8') | |||
f.close() | |||
except: | |||
break | |||
o = re.findall(ur"total=\d+;good=(\d+);", raw) | |||
if o: | |||
if o[0] and int(pages_value) != int(o[0]): | |||
self.summary = u"Robot: Updating size: %s -> %s" % (pages_value, o[0]) | |||
pages_value = o[0] | |||
newtime = True | |||
else: | |||
break | |||
else: | else: | ||
break | break | ||
else: | else: | ||
break | break | ||
#end get | |||
#recalculate wikifactor | |||
pass #TODO, leave AS IS meanwhile | |||
#end recalculate | |||
"""print pages, pages_value | |||
print pagesurl, pagesurl_value | |||
print wikifactor, wikifactor_value | |||
print wikifactorurl, wikifactorurl_value""" | |||
if newtime: | |||
dt = datetime.date(datetime.utcnow()) | |||
day = dt.strftime('%d') | |||
month = dt.strftime('%B') | |||
year = dt.strftime('%Y') | |||
newvalues = u"""{{Size <!--see Template:Size for full detail--> | |||
| %s = %s <!--type the plain number of pages - NO thousands separators--> | | %s = %s <!--type the plain number of pages - NO thousands separators--> | ||
| %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'--> | | %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'--> | ||
| %s = %s <!--preferred; if unknown leave void; see: Category:wikiFactor for help--> | | %s = %s <!--preferred; if unknown leave void; see: Category:wikiFactor for help--> | ||
| %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void--> | | %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void--> | ||
}}""" % ( | }}(As of: %s %s %s)<!--manually add/amend date when stats are verified and/or updated-->""" % ( | ||
pages or 'pages', | |||
if | pages_value or '', | ||
pagesurl or 'statistics URL', | |||
page.put( | pagesurl_value or '', | ||
wikifactor or 'wikiFactor', | |||
wikifactor_value or '', | |||
wikifactorurl or 'wikiFactor URL', | |||
wikifactorurl_value or '', | |||
day or '', | |||
month or '', | |||
year or '') | |||
newtext = text.replace(all, newvalues) | |||
if not self.save(newtext, page, self.summary): | |||
pywikibot.output(u'Page %s not saved.' % page.title(asLink=True)) | |||
def load(self, page): | |||
""" | |||
Loads the given page, does some changes, and saves it. | |||
""" | |||
try: | |||
# Load the page | |||
text = page.get() | |||
except pywikibot.NoPage: | |||
pywikibot.output(u"Page %s does not exist; skipping." | |||
% page.title(asLink=True)) | |||
except pywikibot.IsRedirectPage: | |||
pywikibot.output(u"Page %s is a redirect; skipping." | |||
% page.title(asLink=True)) | |||
else: | |||
return text | |||
return None | |||
def save(self, text, page, comment=None, **kwargs): | |||
# only save if something was changed | |||
if text != page.get(): | |||
# Show the title of the page we're working on. | |||
# Highlight the title in purple. | |||
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" | |||
% page.title()) | |||
# show what was changed | |||
pywikibot.showDiff(page.get(), text) | |||
pywikibot.output(u'Comment: %s' % comment) | |||
#choice = pywikibot.inputChoice( | |||
# u'Do you want to accept these changes?', | |||
# ['Yes', 'No'], ['y', 'N'], 'N') | |||
if True: | |||
try: | |||
# Save the page | |||
page.put(text, comment=comment or self.comment, **kwargs) | |||
except pywikibot.LockedPage: | |||
pywikibot.output(u"Page %s is locked; skipping." | |||
% page.title(asLink=True)) | |||
except pywikibot.EditConflict: | |||
pywikibot.output( | |||
u'Skipping %s because of edit conflict' | |||
% (page.title())) | |||
except pywikibot.SpamfilterError, error: | |||
pywikibot.output( | |||
u'Cannot change %s because of spam blacklist entry %s' | |||
% (page.title(), error.url)) | |||
else: | |||
return True | |||
return False | |||
def main(): | |||
# This factory is responsible for processing command line arguments | |||
# that are also used by other scripts and that determine on which pages | |||
# to work on. | |||
genFactory = pagegenerators.GeneratorFactory() | |||
# The generator gives the pages that should be worked upon. | |||
gen = None | |||
# This temporary array is used to read the page title if one single | |||
# page to work on is specified by the arguments. | |||
pageTitleParts = [] | |||
# summary message | |||
editSummary = '' | |||
debug = False | |||
start = "!" | |||
# Parse command line arguments | |||
for arg in pywikibot.handleArgs(): | |||
if arg.startswith('-summary:'): | |||
editSummary = arg[9:] | |||
elif arg.startswith('-start:'): | |||
start = arg[7:] | |||
elif arg == '-debug': | |||
debug = True | |||
else: | |||
pywikibot.output(u'Unknown argument: %s' % arg) | |||
cat = catlib.Category(pywikibot.getSite("en", "wikiindex"), 'Category:MediaWiki') | |||
gen = pagegenerators.CategorizedPageGenerator(cat, start=start) | |||
#if not gen: | |||
# gen = genFactory.getCombinedGenerator() | |||
if gen: | |||
# The preloading generator is responsible for downloading multiple | |||
# pages from the wiki simultaneously. | |||
gen = pagegenerators.PreloadingGenerator(gen) | |||
bot = WiSizeBot(gen, editSummary, debug) | |||
bot.run() | |||
else: | |||
pywikibot.showHelp() | |||
if __name__ == "__main__": | |||
try: | |||
main() | |||
finally: | |||
pywikibot.stopme() |
Latest revision as of 06:34, 25 August 2014