User:YiFeiBot/wikiindex size.py: Difference between revisions

Jump to navigation Jump to search
major change
(highlight)
(major change)
Line 2: Line 2:
<pre style="overflow-x:scroll; overflow-y:hidden">
<pre style="overflow-x:scroll; overflow-y:hidden">
#!/usr/bin/python
#!/usr/bin/python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
"""
This is not a complete bot; rather, it is a template from which simple
bots can be made. You can rename it to mybot.py, then edit it in
whatever way you want.


# Copyright (C) 2011 emijrp
The following parameters are supported:
# This program is free software: you can redistribute it and/or modify
 
# it under the terms of the GNU General Public License as published by
&params;
# the Free Software Foundation, either version 3 of the License, or
 
# (at your option) any later version.
-summary:XYZ      Set the summary message text for the edit to XYZ, bypassing
#
                  the predefined message texts with original and replacements
# This program is distributed in the hope that it will be useful,
                  inserted.
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
All other parameters will be regarded as part of the title of a single page,
# GNU General Public License for more details.
and the bot will only work on that single page.
#  
"""
# You should have received a copy of the GNU General Public License
#
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# (C) Pywikipedia bot team, 2006-2011
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: wikiindex-size.py 11155 2013-05-13 10:39:02Z xqt $'
#


import catlib
import re
import re
import wikipedia as pywikibot
import pagegenerators
import pagegenerators
from pywikibot import i18n
import catlib
import sys
import sys
import urllib
import urllib
import wikipedia


s = wikipedia.Site('en', 'wikiindex')
# This is required for the text that is shown when you run this script
cat = catlib.Category(s, 'Category:MediaWiki')
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}


start = '!'
if len(sys.argv) == 2:
    start = sys.argv[1]


gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
class WiSizeBot:
pre = pagegenerators.PreloadingGenerator(gen, pageNumber=50)
    # Edit summary message that should be used is placed on /i18n subdirectory.
    # The file containing these messages should have the same name as the caller
    # script (i.e. wikiindex-size.py in this case)


"""
    def __init__(self, generator, summary):
{{Size
        """
|pages = <!--Necessary. Type the plain number of pages here - no thousands separators.-->
        Constructor. Parameters:
wiki pages, wiki_pages
            @param generator: The page generator that determines on which pages
                              to work.
            @type generator: generator.
            @param summary: Set the summary message text for the edit.
            @type summary: (unicode) string.
        """
        self.generator = generator
        # init constants
        self.site = pywikibot.getSite(code=pywikibot.default_code)
        # Set the edit summary message
        if summary:
            self.summary = summary
        else:
            self.summary = i18n.twtranslate(self.site, 'basic-changing')


|statistics URL = <!--Preferred, source of page count (mostly a statistics page). If unknown leave void.-->
    def run(self):
wiki statistics URL, wiki_statistics_URL
        for page in self.generator:
            self.treat(page)


|wikiFactor = <!--Optional. If unknown leave void. (See Proposal:wikiFactor)-->
    def treat(self, page):
|wikiFactor URL = <!--Optional, source of wiki factor. If unknown leave void.-->
        """
}}
        Loads the given page, does some changes, and saves it.
"""
        """
        text = self.load(page)
        if not text:
            return


"""
        ################################################################
{{Size <!--see Template:Size for full detail-->
        # NOTE: Here you can modify the text in whatever way you want. #
|pages= <!--type the plain number of pages - NO thousands separators-->
        ################################################################
|statistics URL= No <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
|wikiFactor= <!--preferred; if unknown leave void; see: Category:wikiFactor for help-->
|wikiFactor URL= No <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
}}(As of: DD Month 2013)<!--manually add/amend date when stats are verified and/or updated-->
"""


size_r = re.compile(ur"""(?im)(?P<all>\{\{\s*Size\s*((\s*\|\s*(?P<pages>pages|wiki[ _]pages)\s*=\s*(?P<pages_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<pagesurl>statistics[ _]URL|wiki[ _]statistics[ _]URL)\s*=\s*(?P<pagesurl_value>https?://[^ \|\}\<]*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactor>wikiFactor)\s*=\s*(?P<wikifactor_value>\d*)\s*[^\|\}]*\s*)|(\s*\|\s*(?P<wikifactorurl>wikiFactor[ _]URL)\s*=\s*(?P<wikifactorurl_value>http://[^ \|\}\<]*)\s*[^\|\}]*\s*))+\s*\|?\s*\}\})""")
        # If you find out that you do not want to edit this page, just return.
        # Example: This puts the text 'Test' at the beginning of the page.
        #text = 'Test ' + text


for page in pre:
         newtext = text
    if not page.exists() or page.isRedirectPage():
         continue
   
    wikipedia.output('--> %s <--' % (page.title()))
    wtext = page.get()
    newtext = wtext
   
    m = size_r.finditer(wtext)
    for i in m:
        all = i.group('all') and i.group('all').strip() or ''
        pages = i.group('pages') and i.group('pages').strip() or ''
        pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or ''
        wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or ''
        wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or ''
          
          
         pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0'
         m = size_r.finditer(text)
        pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or ''
        for i in m:
        wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or ''
            all = i.group('all') and i.group('all').strip() or ''
        wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or ''
            pages = i.group('pages') and i.group('pages').strip() or ''
       
            pagesurl = i.group('pagesurl') and i.group('pagesurl').strip() or ''
        #get new values
            wikifactor = i.group('wikifactor') and i.group('wikifactor').strip() or ''
        n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value)
            wikifactorurl = i.group('wikifactorurl') and i.group('wikifactorurl').strip() or ''
        if n:
           
            raw = ''
            pages_value = i.group('pages_value') and i.group('pages_value').strip() or '0'
            try:
            pagesurl_value = i.group('pagesurl_value') and i.group('pagesurl_value').strip() or ''
                url = n[0]
            wikifactor_value = i.group('wikifactor_value') and i.group('wikifactor_value').strip() or ''
                if url.endswith(":Statistics"):
            wikifactorurl_value = i.group('wikifactorurl_value') and i.group('wikifactorurl_value').strip() or ''
                    url += '?action=raw'
           
                f = urllib.urlopen(url)
            #get new values
                raw = unicode(f.read(), 'utf-8')
            n = re.findall(ur"(https?://[^\|\}\]]+\?action=raw|https?://[^\|\}\]]+:Statistics)", pagesurl_value)
                f.close()
            if n:
            except:
                raw = ''
                break
                try:
            o = re.findall(ur"total=\d+;good=(\d+);", raw)
                    url = n[0]
            if o:
                    if url.endswith(":Statistics"):
                if o[0] and int(pages_value) != int(o[0]):
                        url += '?action=raw'
                    summary = u"Robot: Updating size: %s -> %s" % (pages_value, o[0])
                    f = urllib.urlopen(url)
                    pages_value = o[0]
                    raw = unicode(f.read(), 'utf-8')
                    f.close()
                except:
                    break
                o = re.findall(ur"total=\d+;good=(\d+);", raw)
                if o:
                    if o[0] and int(pages_value) != int(o[0]):
                        summary = u"Robot: Updating size: %s -> %s" %     (pages_value, o[0])
                        pages_value = o[0]
                    else:
                        break
                 else:
                 else:
                     break
                     break
             else:
             else:
                 break
                 break
        else:
             #end get
             break
           
        #end get
            #recalculate wikifactor
       
            pass #TODO, leave AS IS meanwhile
        #recalculate wikifactor
            #end recalculate
        pass #TODO, leave AS IS meanwhile
           
        #end recalculate
            """print pages, pages_value
       
            print pagesurl, pagesurl_value
        """print pages, pages_value
            print wikifactor, wikifactor_value
        print pagesurl, pagesurl_value
            print wikifactorurl, wikifactorurl_value"""
        print wikifactor, wikifactor_value
           
        print wikifactorurl, wikifactorurl_value"""
            newvalues = u"""{{Size <!--see Template:Size for full     detail-->
       
        newvalues = u"""{{Size <!--see Template:Size for full detail-->
| %s = %s <!--type the plain number of pages - NO thousands separators-->
| %s = %s <!--type the plain number of pages - NO thousands separators-->
| %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
| %s = %s <!--page count source (often a 'Statistics' page); if unknown type 'No'-->
Line 121: Line 142:
| %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
| %s = %s <!--wF source (often 'PopularPages', 'Mostvisitedpages' or 'PageHits'); if unknown leave void-->
}}""" % (pages and pages or 'pages', pages_value and pages_value or '', pagesurl and pagesurl or 'statistics URL', pagesurl_value and pagesurl_value or '', wikifactor and wikifactor or 'wikiFactor', wikifactor_value and wikifactor_value or '', wikifactorurl and wikifactorurl or 'wikiFactor URL', wikifactorurl_value and wikifactorurl_value or '')
}}""" % (pages and pages or 'pages', pages_value and pages_value or '', pagesurl and pagesurl or 'statistics URL', pagesurl_value and pagesurl_value or '', wikifactor and wikifactor or 'wikiFactor', wikifactor_value and wikifactor_value or '', wikifactorurl and wikifactorurl or 'wikiFactor URL', wikifactorurl_value and wikifactorurl_value or '')
         newtext = wtext.replace(all, newvalues)
         newtext = text.replace(all, newvalues)
         if wtext != newtext:
       
             wikipedia.showDiff(wtext, newtext)
         if not self.save(newtext, page, self.summary):
             page.put(newtext, summary)
            pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
              
 
         break
    def load(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        try:
            # Load the page
            text = page.get()
        except pywikibot.NoPage:
            pywikibot.output(u"Page %s does not exist; skipping."
                            % page.title(asLink=True))
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"Page %s is a redirect; skipping."
                            % page.title(asLink=True))
        else:
            return text
        return None
 
    def save(self, text, page, comment=None, **kwargs):
        # only save if something was changed
        if text != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                            % page.title())
            # show what was changed
            pywikibot.showDiff(page.get(), text)
            pywikibot.output(u'Comment: %s' % comment)
            choice = pywikibot.inputChoice(
                u'Do you want to accept these changes?',
                ['Yes', 'No'], ['y', 'N'], 'N')
             if choice == 'y':
                try:
                    # Save the page
                    page.put(text, comment=comment or self.comment, **kwargs)
                except pywikibot.LockedPage:
                    pywikibot.output(u"Page %s is locked; skipping."
                                    % page.title(asLink=True))
                except pywikibot.EditConflict:
                    pywikibot.output(
                        u'Skipping %s because of edit conflict'
                        % (page.title()))
                except pywikibot.SpamfilterError, error:
                    pywikibot.output(
                        u'Cannot change %s because of spam blacklist entry %s'
                        % (page.title(), error.url))
                else:
                    return True
        return False
 
 
class AutoWiSizeBot(WiSizeBot):
    # Intended for usage e.g. as cronjob without prompting the user.
 
    _REGEX_eol = re.compile(u'\n')
 
    def __init__(self):
        WiSizeBot.__init__(self, None, None)
 
    ## @since  10326
    #  @remarks needed by various bots
    def save(self, page, text, comment=None, **kwargs):
        pywikibot.output(u'\03{lightblue}Writing to wiki on %s...\03{default}'
                        % page.title(asLink=True))
 
        comment_output = comment or pywikibot.action
        pywikibot.output(u'\03{lightblue}Comment: %s\03{default}'
                        % comment_output)
 
        #pywikibot.showDiff(page.get(), text)
 
        for i in range(3):
            try:
                # Save the page
                page.put(text, comment=comment, **kwargs)
            except pywikibot.LockedPage:
                pywikibot.output(
                    u"\03{lightblue}Page %s is locked; skipping.\03{default}"
                    % page.title(asLink=True))
            except pywikibot.EditConflict:
                pywikibot.output(
                    u'\03{lightblue}Skipping %s because of edit '
                    u'conflict\03{default}' % (page.title()))
            except pywikibot.SpamfilterError, error:
                pywikibot.output(
                    u'\03{lightblue}Cannot change %s because of spam blacklist '
                    u'entry %s\03{default}' % (page.title(), error.url))
            else:
                return True
        return False
 
    ## @since  10326
    #  @remarks needed by various bots
    def append(self, page, text, comment=None, section=None, **kwargs):
        if section:
            pywikibot.output(
                u'\03{lightblue}Appending to wiki on %s in section '
                u'%s...\03{default}' % (page.title(asLink=True), section))
            for i in range(3):
                try:
                    # Append to page section
                    page.append(text, comment=comment, section=section,
                                **kwargs)
                except pywikibot.PageNotSaved, error:
                    pywikibot.output(
                        u'\03{lightblue}Cannot change %s because of '
                        u'%s\03{default}' % (page.title(), error))
                else:
                    return True
        else:
            content = self.load(page)    # 'None' if not existing page
            if not content:                # (create new page)
                content = u''
 
            content += u'\n\n'
            content += text
 
            return self.save(page, content, comment=comment, **kwargs)
 
    ## @since  10326
    #  @remarks needed by various bots
    def loadTemplates(self, page, template, default={}):
        """Get operating mode from page with template by searching the template.
 
          @param page: The user (page) for which the data should be retrieved.
 
          Returns a list of dict with the templates parameters found.
        """
 
        self._content = self.load(page)  # 'None' if not existing page
 
        templates = []
        if not self._content:
            return templates  # catch empty or not existing page
 
        for tmpl in pywikibot.extract_templates_and_params(self._content):
            if tmpl[0] == template:
                param_default = {}
                param_default.update(default)
                param_default.update(tmpl[1])
                templates.append(param_default)
        return templates
 
    ## @since  10326
    #  @remarks common interface to bot job queue on wiki
    def loadJobQueue(self, page, queue_security, reset=True):
        """Check if the data queue security is ok to execute the jobs,
          if so read the jobs and reset the queue.
 
          @param page: Wiki page containing job queue.
          @type  page: page
          @param queue_security: This string must match the last edit
                              comment, or else nothing is done.
          @type  queue_security: string
 
          Returns a list of jobs. This list may be empty.
        """
 
        try:
            actual = page.getVersionHistory(revCount=1)[0]
        except:
            pass
 
        secure = False
        for item in queue_security[0]:
            secure = secure or (actual[2] == item)
 
        secure = secure and (actual[3] == queue_security[1])
 
        if not secure:
            return []
 
        data = self._REGEX_eol.split(page.get())
        if reset:
            pywikibot.output(u'\03{lightblue}Job queue reset...\03{default}')
            pywikibot.setAction(u'reset job queue')
             page.put(u'', minorEdit=True)
 
        queue = []
        for line in data:
            queue.append(line[1:].strip())
        return queue
 
 
def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # summary message
    editSummary = ''
    start = "!"
 
    # Parse command line arguments
    for arg in pywikibot.handleArgs():
        if arg.startswith('-summary:'):
            editSummary = arg[9:]
        elif arg.startswith('-start:'):
            start = arg[7:]
        else:
             pywikibot.output(u'Unknown argument: %s' % arg)
 
    cat = catlib.Category(pywikibot.getSite(), 'Category:MediaWiki')
    gen = pagegenerators.CategorizedPageGenerator(cat, start=start)
 
    #if not gen:
    #    gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = WiSizeBot(gen, editSummary)
        bot.run()
    else:
         pywikibot.showHelp()
 
if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()
</pre>
</pre>
</code>
</code>
checkuser, Administrators
2,635

edits

Navigation menu