User:YiFeiBot/wikiindex size.py: Difference between revisions

From WikiIndex
Jump to navigation Jump to search
(highlight)
(readable)
 
(13 intermediate revisions by the same user not shown)
Line 2: Line 2:
<pre style="overflow-x:scroll; overflow-y:hidden">
<pre style="overflow-x:scroll; overflow-y:hidden">
#!/usr/bin/python
#!/usr/bin/python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
#
__version__ = '$Id: NULL $'
#


# Copyright (C) 2011 emijrp
import re
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.


import wikipedia as pywikibot
import pagegenerators
from pywikibot import i18n
import catlib
import catlib
import re
import pagegenerators
import sys
import sys
import urllib
import urllib
import wikipedia
from datetime import datetime


s = wikipedia.Site('en', 'wikiindex')
# This is required for the text that is shown when you run this script
cat = catlib.Category(s, 'Category:MediaWiki')
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}


start = '!'
if len(sys.argv) == 2:
    start = sys.argv[1]


gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
class WiSizeBot:
pre = pagegenerators.PreloadingGenerator(gen, pageNumber=50)
    # Edit summary message that should be used is placed on /i18n subdirectory.
    # The file containing these messages should have the same name as the caller
    # script (i.e. wikiindex-size.py in this case)


"""
    def __init__(self, generator, summary, debug=False):
{{Size
        """
|pages = <!--Necessary. Type the plain number of pages here - no thousands separators.-->
        Constructor. Parameters:
wiki pages, wiki_pages
            @param generator: The page generator that determines on which pages
                              to work.
            @type generator: generator.
            @param summary: Set the summary message text for the edit.
            @type summary: (unicode) string.
        """
        self.generator = generator
        # init constants
        self.site = pywikibot.getSite("en", "wikiindex")
        # Set the edit summary message
        if summary:
            self.summary = summary
        else:
            self.summary = i18n.twtranslate(self.site, 'basic-changing')
        self.debug = debug


|statistics URL = <!--Preferred, source of page count (mostly a statistics page). If unknown leave void.-->
    def run(self):
wiki statistics URL, wiki_statistics_URL
        for page in self.generator:
            self.treat(page)


|wikiFactor = <!--Optional. If unknown leave void. (See Proposal:wikiFactor)-->
    def treat(self, page):
|wikiFactor URL = <!--Optional, source of wiki factor. If unknown leave void.-->
        """
}}
        Loads the given page, does some changes, and saves it.
"""
        """
        text = self.load(page)
        if not text:
            return


"""
        newtext = text
{{Size <!--see Template:Size for full detail-->
        size_r = None
|pages= <!--type the plain number of pages - NO thousands separators-->
        size_r = re.compile(ur"""(?P<all>\{\{\s*[Ss]ize\s*((\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\}(\s*\([Aa]s\s*of:?\s*(?P<day>\d+)\s*(?P<month>[A-Z][a-z]+)\s*(?P<year>\d+)\s*\)\s*(\<!--[ A-Za-z0-9/]+--\>)?)?)""")
|statistics URL= No <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
|wikiFactor= <!--preferred; if unknown leave void; see: Category:wikiFactor for help-->
|wikiFactor URL= No <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
}}(As of: DD Month 2013)<!--manually add/amend date when stats are verified and/or updated-->
"""


size_r = re.compile(ur"""(?im)(?P<all>\{\{\s*Size\s*((\s*\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\})""")
        wtext = page.get()
        m = size_r.finditer(wtext)
        all = ""
        newvalues = ""
        for i in m:
            all = i.group('all') and i.group('all').strip() or ''
            pages = i.group('pages') and i.group('pages').strip() or ''
            pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or ''
            wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or ''
            wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or ''
           
            pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0'
            pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or ''
            wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or ''
            wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or ''
            day = i.group('day') and i.group('day').strip() or ''
            month = i.group('month') and i.group('month').strip() or ''
            year = i.group('year') and i.group('year').strip() or ''
           
            if self.debug:
                pywikibot.output(u"text = " + text)
                pywikibot.output(u"all = " + all)
                pywikibot.output(u"pages = " + pages)
                pywikibot.output(u"pagesurl = " + pagesurl)
                pywikibot.output(u"wikifactor = " + wikifactor)
                pywikibot.output(u"wikifactorurl = " + wikifactorurl)
                pywikibot.output(u"pages_value = " + pages_value)
                pywikibot.output(u"pagesurl_value = " + pagesurl_value)
                pywikibot.output(u"wikifactor_value = " + wikifactor_value)
                pywikibot.output(u"wikifactorurl_value = " + wikifactorurl_value)
                pywikibot.output(u"day = " + day)
                pywikibot.output(u"month = " + month)
                pywikibot.output(u"year = " + year)


for page in pre:
            #get new values
    if not page.exists() or page.isRedirectPage():
            n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value)
        continue
            if n:
   
                raw = ''
    wikipedia.output('--> %s <--' % (page.title()))
                try:
    wtext = page.get()
                    url = n[0]
    newtext = wtext
                    if url.endswith(":Statistics"):
   
                        url += '?action=raw'
    m = size_r.finditer(wtext)
                    f = urllib.urlopen(url)
    for i in m:
                    raw = unicode(f.read(), 'utf-8')
        all = i.group('all') and i.group('all').strip() or ''
                    f.close()
        pages = i.group('pages') and i.group('pages').strip() or ''
                except:
        pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or ''
                    break
        wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or ''
                o = re.findall(ur"total=\d+;good=(\d+);", raw)
        wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or ''
                if o:
       
                    if o[0] and int(pages_value) != int(o[0]):
        pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0'
                        self.summary = u"Robot: Updating size: %s -> %s" %     (pages_value, o[0])
        pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or ''
                        pages_value = o[0]
        wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or ''
                        newtime = True
        wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or ''
                    else:
       
                        break
        #get new values
        n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value)
        if n:
            raw = ''
            try:
                url = n[0]
                if url.endswith(":Statistics"):
                    url += '?action=raw'
                f = urllib.urlopen(url)
                raw = unicode(f.read(), 'utf-8')
                f.close()
            except:
                break
            o = re.findall(ur"total=\d+;good=(\d+);", raw)
            if o:
                if o[0] and int(pages_value) != int(o[0]):
                    summary = u"Robot: Updating size: %s -> %s" % (pages_value, o[0])
                    pages_value = o[0]
                 else:
                 else:
                     break
                     break
             else:
             else:
                 break
                 break
        else:
             #end get
             break
           
        #end get
            #recalculate wikifactor
       
            pass #TODO, leave AS IS meanwhile
        #recalculate wikifactor
            #end recalculate
        pass #TODO, leave AS IS meanwhile
           
        #end recalculate
            """print pages, pages_value
       
            print pagesurl, pagesurl_value
        """print pages, pages_value
            print wikifactor, wikifactor_value
        print pagesurl, pagesurl_value
            print wikifactorurl, wikifactorurl_value"""
        print wikifactor, wikifactor_value
           
        print wikifactorurl, wikifactorurl_value"""
            if newtime:
       
                dt = datetime.date(datetime.utcnow())
        newvalues = u"""{{Size <!--see Template:Size for full detail-->
               
                day = dt.strftime('%d')
                month = dt.strftime('%B')
                year = dt.strftime('%Y')
               
            newvalues = u"""{{Size <!--see Template:Size for full detail-->
| %s = %s <!--type the plain number of pages - NO thousands separators-->
| %s = %s <!--type the plain number of pages - NO thousands separators-->
| %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
| %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
| %s = %s <!--preferred; if unknown leave void; see: Category:wikiFactor for help-->
| %s = %s <!--preferred; if unknown leave void; see: Category:wikiFactor for help-->
| %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
| %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
}}""" % (pages and pages or 'pages', pages_value and pages_value or '', pagesurl and pagesurl or 'statistics URL', pagesurl_value and pagesurl_value or '', wikifactor and wikifactor or 'wikiFactor', wikifactor_value and wikifactor_value or '', wikifactorurl and wikifactorurl or 'wikiFactor URL', wikifactorurl_value and wikifactorurl_value or '')
}}(As of: %s %s %s)<!--manually add/amend date when stats are verified and/or updated-->""" % (
        newtext = wtext.replace(all, newvalues)
                pages or 'pages',
         if wtext != newtext:
                pages_value or '',
             wikipedia.showDiff(wtext, newtext)
                pagesurl or 'statistics URL',
             page.put(newtext, summary)
                pagesurl_value or '',
              
                wikifactor or 'wikiFactor',
         break
                wikifactor_value or '',
</pre>
                wikifactorurl or 'wikiFactor URL',
</code>
                wikifactorurl_value or '',
                day or '',
                month or '',
                year or '')
            newtext = text.replace(all, newvalues)
       
        if not self.save(newtext, page, self.summary):
            pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
 
    def load(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        try:
            # Load the page
            text = page.get()
        except pywikibot.NoPage:
            pywikibot.output(u"Page %s does not exist; skipping."
                            % page.title(asLink=True))
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"Page %s is a redirect; skipping."
                            % page.title(asLink=True))
        else:
            return text
        return None
 
    def save(self, text, page, comment=None, **kwargs):
        # only save if something was changed
         if text != page.get():
             # Show the title of the page we're working on.
            # Highlight the title in purple.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                            % page.title())
            # show what was changed
            pywikibot.showDiff(page.get(), text)
            pywikibot.output(u'Comment: %s' % comment)
            #choice = pywikibot.inputChoice(
            #    u'Do you want to accept these changes?',
            #    ['Yes', 'No'], ['y', 'N'], 'N')
             if True:
                try:
                    # Save the page
                    page.put(text, comment=comment or self.comment, **kwargs)
                except pywikibot.LockedPage:
                    pywikibot.output(u"Page %s is locked; skipping."
                                    % page.title(asLink=True))
                except pywikibot.EditConflict:
                    pywikibot.output(
                        u'Skipping %s because of edit conflict'
                        % (page.title()))
                except pywikibot.SpamfilterError, error:
                    pywikibot.output(
                        u'Cannot change %s because of spam blacklist entry %s'
                        % (page.title(), error.url))
                else:
                    return True
        return False
 
 
 
 
def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # summary message
    editSummary = ''
    debug = False
    start = "!"
 
    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith('-summary:'):
             editSummary = arg[9:]
         elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg == '-debug':
            debug = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)
 
    cat = catlib.Category(pywikibot.getSite("en", "wikiindex"), 'Category:MediaWiki')
    gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
 
    #if not gen:
    #    gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = WiSizeBot(gen, editSummary, debug)
        bot.run()
    else:
        pywikibot.showHelp()
 
if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()

Latest revision as of 06:34, 25 August 2014

#!/usr/bin/python
# -*- coding: utf-8  -*-
#
__version__ = '$Id: NULL $'
#

import re

import wikipedia as pywikibot
import pagegenerators
from pywikibot import i18n
import catlib
import sys
import urllib
from datetime import datetime

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}


class WiSizeBot:
    # Edit summary message that should be used is placed on /i18n subdirectory.
    # The file containing these messages should have the same name as the caller
    # script (i.e. wikiindex-size.py in this case)

    def __init__(self, generator, summary, debug=False):
        """
        Constructor. Parameters:
            @param generator: The page generator that determines on which pages
                              to work.
            @type generator: generator.
            @param summary: Set the summary message text for the edit.
            @type summary: (unicode) string.
        """
        self.generator = generator
        # init constants
        self.site = pywikibot.getSite("en", "wikiindex")
        # Set the edit summary message
        if summary:
            self.summary = summary
        else:
            self.summary = i18n.twtranslate(self.site, 'basic-changing')
        self.debug = debug

    def run(self):
        for page in self.generator:
            self.treat(page)

    def treat(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        text = self.load(page)
        if not text:
            return

        newtext = text
        size_r = None
        size_r = re.compile(ur"""(?P<all>\{\{\s*[Ss]ize\s*((\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\}(\s*\([Aa]s\s*of:?\s*(?P<day>\d+)\s*(?P<month>[A-Z][a-z]+)\s*(?P<year>\d+)\s*\)\s*(\
| %s = %s 
| %s = %s 
| %s = %s 
| %s = %s 
}}(As of: %s %s %s)""" % (
                pages or 'pages',
                pages_value or ,
                pagesurl or 'statistics URL',
                pagesurl_value or ,
                wikifactor or 'wikiFactor',
                wikifactor_value or ,
                wikifactorurl or 'wikiFactor URL',
                wikifactorurl_value or ,
                day or ,
                month or ,
                year or )
            newtext = text.replace(all, newvalues)
        
        if not self.save(newtext, page, self.summary):
            pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))

    def load(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        try:
            # Load the page
            text = page.get()
        except pywikibot.NoPage:
            pywikibot.output(u"Page %s does not exist; skipping."
                             % page.title(asLink=True))
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"Page %s is a redirect; skipping."
                             % page.title(asLink=True))
        else:
            return text
        return None

    def save(self, text, page, comment=None, **kwargs):
        # only save if something was changed
        if text != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                             % page.title())
            # show what was changed
            pywikibot.showDiff(page.get(), text)
            pywikibot.output(u'Comment: %s' % comment)
            #choice = pywikibot.inputChoice(
            #    u'Do you want to accept these changes?',
            #    ['Yes', 'No'], ['y', 'N'], 'N')
            if True:
                try:
                    # Save the page
                    page.put(text, comment=comment or self.comment, **kwargs)
                except pywikibot.LockedPage:
                    pywikibot.output(u"Page %s is locked; skipping."
                                     % page.title(asLink=True))
                except pywikibot.EditConflict:
                    pywikibot.output(
                        u'Skipping %s because of edit conflict'
                        % (page.title()))
                except pywikibot.SpamfilterError, error:
                    pywikibot.output(
                        u'Cannot change %s because of spam blacklist entry %s'
                        % (page.title(), error.url))
                else:
                    return True
        return False




def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # summary message
    editSummary = 
    debug = False
    start = "!"

    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith('-summary:'):
            editSummary = arg[9:]
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg == '-debug':
            debug = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    cat = catlib.Category(pywikibot.getSite("en", "wikiindex"), 'Category:MediaWiki')
    gen = pagegenerators.CategorizedPageGenerator(cat, start=start)

    #if not gen:
    #    gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = WiSizeBot(gen, editSummary, debug)
        bot.run()
    else:
        pywikibot.showHelp()

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()