Permalink
6121 lines (5209 sloc) 223 KB
# -*- coding: utf-8 -*-
"""
Objects representing various types of MediaWiki, including Wikibase, pages.
This module also includes objects:
* Property: a type of semantic data.
* Claim: an instance of a semantic assertion.
* Revision: a single change to a wiki page.
* FileInfo: a structure holding imageinfo of latest rev. of FilePage
* Link: an internal or interwiki link in wikitext.
"""
#
# (C) Pywikibot team, 2008-2018
#
# Distributed under the terms of the MIT license.
#
from __future__ import absolute_import, division, unicode_literals
import hashlib
import logging
import os.path
import re
import sys
import unicodedata
from collections import Counter, defaultdict, namedtuple, OrderedDict
from warnings import warn
import pywikibot
from pywikibot.comms import http
from pywikibot import config
from pywikibot.exceptions import (
AutoblockUser,
NotEmailableError,
SiteDefinitionError,
UserRightsError,
)
from pywikibot.data.api import APIError
from pywikibot.family import Family
from pywikibot.site import DataSite, Namespace, need_version
from pywikibot import textlib
from pywikibot.tools import (
compute_file_hash,
UnicodeMixin, ComparableMixin, DotReadableDict,
deprecated, deprecate_arg, deprecated_args, issue_deprecation_warning,
add_full_name, manage_wrapping,
ModuleDeprecationWrapper as _ModuleDeprecationWrapper, PY2,
first_upper, redirect_func, remove_last_args,
)
from pywikibot.tools.ip import ip_regexp
from pywikibot.tools.ip import is_IP
if not PY2:
unicode = basestring = str
long = int
from html import entities as htmlentitydefs
from urllib.parse import quote_from_bytes, unquote_to_bytes
else:
if __debug__ and not PY2:
unichr = NotImplemented # pyflakes workaround
chr = unichr
import htmlentitydefs
from urllib import quote as quote_from_bytes, unquote as unquote_to_bytes
__all__ = (
'BasePage',
'Page',
'FilePage',
'Category',
'User',
'WikibasePage',
'ItemPage',
'Property',
'PropertyPage',
'Claim',
'Revision',
'FileInfo',
'Link',
'html2unicode',
'UnicodeToAsciiHtml',
'unicode2html',
'url2unicode',
'ip_regexp', # unused & deprecated
)
logger = logging.getLogger('pywiki.wiki.page')
@add_full_name
def allow_asynchronous(func):
"""
Decorator to make it possible to run a BasePage method asynchronously.
This is done when the method is called with kwarg asynchronous=True.
Optionally, you can also provide kwarg callback, which, if provided, is
a callable that gets the page as the first and a possible exception that
occurred during saving in the second thread or None as the second argument.
"""
def handle(func, self, *args, **kwargs):
do_async = kwargs.pop('asynchronous', False)
callback = kwargs.pop('callback', None)
err = None
try:
func(self, *args, **kwargs)
# TODO: other "expected" error types to catch?
except pywikibot.Error as edit_err:
err = edit_err # edit_err will be deleted in the end of the scope
link = self.title(as_link=True)
if do_async:
pywikibot.error('page {} not saved due to {}\n'
.format(link, err))
pywikibot.log('Error saving page %s (%s)\n' % (link, err),
exc_info=True)
if not callback and not do_async:
if isinstance(err, pywikibot.PageSaveRelatedError):
raise err
raise pywikibot.OtherPageSaveError(self, err)
if callback:
callback(self, err)
def wrapper(self, *args, **kwargs):
if kwargs.get('asynchronous'):
pywikibot.async_request(handle, func, self, *args, **kwargs)
else:
handle(func, self, *args, **kwargs)
manage_wrapping(wrapper, func)
return wrapper
# Note: Link objects (defined later on) represent a wiki-page's title, while
# Page objects (defined here) represent the page itself, including its
# contents.
class BasePage(UnicodeMixin, ComparableMixin):
"""
BasePage: Base object for a MediaWiki page.
This object only implements internally methods that do not require
reading from or writing to the wiki. All other methods are delegated
to the Site object.
Will be subclassed by Page, WikibasePage, and FlowPage.
"""
_cache_attrs = (
'_text', '_pageid', '_catinfo', '_templates', '_protection',
'_contentmodel', '_langlinks', '_isredir', '_coords',
'_preloadedtext', '_timestamp', '_applicable_protections',
'_flowinfo', '_quality', '_pageprops', '_revid', '_quality_text',
'_pageimage', '_item', '_lintinfo',
)
def __init__(self, source, title='', ns=0):
"""
Instantiate a Page object.
Three calling formats are supported:
- If the first argument is a Page, create a copy of that object.
This can be used to convert an existing Page into a subclass
object, such as Category or FilePage. (If the title is also
given as the second argument, creates a copy with that title;
this is used when pages are moved.)
- If the first argument is a Site, create a Page on that Site
using the second argument as the title (may include a section),
and the third as the namespace number. The namespace number is
mandatory, even if the title includes the namespace prefix. This
is the preferred syntax when using an already-normalized title
obtained from api.php or a database dump. WARNING: may produce
invalid objects if page title isn't in normal form!
- If the first argument is a Link, create a Page from that link.
This is the preferred syntax when using a title scraped from
wikitext, URLs, or another non-normalized source.
@param source: the source of the page
@type source: Link, Page (or subclass), or Site
@param title: normalized title of the page; required if source is a
Site, ignored otherwise
@type title: unicode
@param ns: namespace number; required if source is a Site, ignored
otherwise
@type ns: int
"""
if title is None:
raise ValueError('Title cannot be None.')
if isinstance(source, pywikibot.site.BaseSite):
self._link = Link(title, source=source, default_namespace=ns)
self._revisions = {}
elif isinstance(source, Page):
# copy all of source's attributes to this object
# without overwriting non-None values
self.__dict__.update((k, v) for k, v in source.__dict__.items()
if k not in self.__dict__ or
self.__dict__[k] is None)
if title:
# overwrite title
self._link = Link(title, source=source.site,
default_namespace=ns)
elif isinstance(source, Link):
self._link = source
self._revisions = {}
else:
raise pywikibot.Error(
"Invalid argument type '{}' in Page initializer: {}"
.format(type(source), source))
@property
def site(self):
"""Return the Site object for the wiki on which this Page resides.
@rtype: pywikibot.Site
"""
return self._link.site
def version(self):
"""
Return MediaWiki version number of the page site.
This is needed to use @need_version() decorator for methods of
Page objects.
"""
return self.site.version()
@property
def image_repository(self):
"""Return the Site object for the image repository."""
return self.site.image_repository()
@property
def data_repository(self):
"""Return the Site object for the data repository."""
return self.site.data_repository()
def namespace(self):
"""
Return the number of the namespace of the page.
@return: namespace of the page
@rtype: Namespace
"""
return self._link.namespace
@property
def content_model(self):
"""
Return the content model for this page.
If it cannot be reliably determined via the API,
None is returned.
"""
# TODO: T102735: Add a sane default of 'wikitext' and others for <1.21
if not hasattr(self, '_contentmodel'):
self.site.loadpageinfo(self)
return self._contentmodel
@property
def depth(self):
"""Return the depth/subpage level of the page."""
if not hasattr(self, '_depth'):
# Check if the namespace allows subpages
if self.namespace().subpages:
self._depth = self.title().count('/')
else:
# Does not allow subpages, which means depth is always 0
self._depth = 0
return self._depth
@property
def pageid(self):
"""
Return pageid of the page.
@return: pageid or 0 if page does not exist
@rtype: int
"""
if not hasattr(self, '_pageid'):
self.site.loadpageinfo(self)
return self._pageid
@deprecated_args(
decode=None, savetitle='as_url', withNamespace='with_ns',
withSection='with_section', forceInterwiki='force_interwiki',
asUrl='as_url', asLink='as_link', allowInterwiki='allow_interwiki')
def title(self, underscore=False, with_ns=True,
with_section=True, as_url=False, as_link=False,
allow_interwiki=True, force_interwiki=False, textlink=False,
as_filename=False, insite=None, without_brackets=False):
"""
Return the title of this Page, as a Unicode string.
@param underscore: (not used with as_link) if true, replace all ' '
characters with '_'
@param with_ns: if false, omit the namespace prefix. If this
option is false and used together with as_link return a labeled
link like [[link|label]]
@param with_section: if false, omit the section
@param as_url: (not used with as_link) if true, quote title as if in an
URL
@param as_link: if true, return the title in the form of a wikilink
@param allow_interwiki: (only used if as_link is true) if true, format
the link as an interwiki link if necessary
@param force_interwiki: (only used if as_link is true) if true, always
format the link as an interwiki link
@param textlink: (only used if as_link is true) if true, place a ':'
before Category: and Image: links
@param as_filename: (not used with as_link) if true, replace any
characters that are unsafe in filenames
@param insite: (only used if as_link is true) a site object where the
title is to be shown. default is the current family/lang given by
-family and -lang option i.e. config.family and config.mylang
@param without_brackets: (cannot be used with as_link) if true, remove
the last pair of brackets(usually removes disambiguation brackets).
@rtype: unicode
"""
title = self._link.canonical_title()
label = self._link.title
if with_section and self._link.section:
section = '#' + self._link.section
else:
section = ''
if as_link:
if insite:
target_code = insite.code
target_family = insite.family.name
else:
target_code = config.mylang
target_family = config.family
if force_interwiki or \
(allow_interwiki and
(self.site.family.name != target_family
or self.site.code != target_code)):
if self.site.family.name != target_family \
and self.site.family.name != self.site.code:
title = '%s:%s:%s' % (
self.site.family.name, self.site.code, title)
else:
# use this form for sites like commons, where the
# code is the same as the family name
title = '%s:%s' % (self.site.code, title)
elif textlink and (self.is_filepage() or self.is_categorypage()):
title = ':%s' % title
elif self.namespace() == 0 and not section:
with_ns = True
if with_ns:
return '[[%s%s]]' % (title, section)
else:
return '[[%s%s|%s]]' % (title, section, label)
if not with_ns and self.namespace() != 0:
title = label + section
else:
title += section
if without_brackets:
brackets_re = r'\s+\([^()]+?\)$'
title = re.sub(brackets_re, '', title)
if underscore or as_url:
title = title.replace(' ', '_')
if as_url:
encoded_title = title.encode(self.site.encoding())
title = quote_from_bytes(encoded_title, safe='')
if as_filename:
# Replace characters that are not possible in file names on some
# systems.
# Spaces are possible on most systems, but are bad for URLs.
for forbidden in ':*?/\\ ':
title = title.replace(forbidden, '_')
return title
@remove_last_args(('decode', 'underscore'))
def section(self):
"""
Return the name of the section this Page refers to.
The section is the part of the title following a '#' character, if
any. If no section is present, return None.
@rtype: unicode
"""
return self._link.section
def __unicode__(self):
"""Return a unicode string representation."""
return self.title(as_link=True, force_interwiki=True)
def __repr__(self):
"""Return a more complete string representation."""
if not PY2:
title = repr(self.title())
else:
try:
title = self.title().encode(config.console_encoding)
except UnicodeEncodeError:
# okay console encoding didn't work, at least try something
title = self.title().encode('unicode_escape')
return str('{0}({1})').format(self.__class__.__name__, title)
def _cmpkey(self):
"""
Key for comparison of Page objects.
Page objects are "equal" if and only if they are on the same site
and have the same normalized title, including section if any.
Page objects are sortable by site, namespace then title.
"""
return (self.site, self.namespace(), self.title())
def __hash__(self):
"""
A stable identifier to be used as a key in hash-tables.
This relies on the fact that the string
representation of an instance can not change after the construction.
"""
return hash(self._cmpkey())
def full_url(self):
"""Return the full URL."""
return self.site.base_url(self.site.article_path +
self.title(as_url=True))
def autoFormat(self):
"""
Return L{date.getAutoFormat} dictName and value, if any.
Value can be a year, date, etc., and dictName is 'YearBC',
'Year_December', or another dictionary name. Please note that two
entries may have exactly the same autoFormat, but be in two
different namespaces, as some sites have categories with the
same names. Regular titles return (None, None).
"""
if not hasattr(self, '_autoFormat'):
from pywikibot import date
self._autoFormat = date.getAutoFormat(
self.site.lang,
self.title(with_ns=False)
)
return self._autoFormat
def isAutoTitle(self):
"""Return True if title of this Page is in the autoFormat dict."""
return self.autoFormat()[0] is not None
@deprecated_args(throttle=None,
change_edit_time=None,
expandtemplates=None)
def get(self, force=False, get_redirect=False, sysop=False):
"""
Return the wiki-text of the page.
This will retrieve the page from the server if it has not been
retrieved yet, or if force is True. This can raise the following
exceptions that should be caught by the calling code:
@exception NoPage: The page does not exist
@exception IsRedirectPage: The page is a redirect. The argument of the
exception is the title of the page it
redirects to.
@exception SectionError: The section does not exist on a page with
a # link
@param force: reload all page attributes, including errors.
@param get_redirect: return the redirect text, do not follow the
redirect, do not raise an exception.
@param sysop: if the user has a sysop account, use it to
retrieve this page
@rtype: unicode
"""
if force:
del self.latest_revision_id
try:
self._getInternals(sysop)
except pywikibot.IsRedirectPage:
if not get_redirect:
raise
return self.latest_revision.text
def _latest_cached_revision(self):
"""Get the latest revision if cached and has text, otherwise None."""
if (hasattr(self, '_revid') and self._revid in self._revisions and
self._revisions[self._revid].text is not None):
return self._revisions[self._revid]
else:
return None
def _getInternals(self, sysop):
"""
Helper function for get().
Stores latest revision in self if it doesn't contain it, doesn't think.
* Raises exceptions from previous runs.
* Stores new exceptions in _getexception and raises them.
"""
# Raise exceptions from previous runs
if hasattr(self, '_getexception'):
raise self._getexception
# If not already stored, fetch revision
if self._latest_cached_revision() is None:
try:
self.site.loadrevisions(self, content=True, sysop=sysop)
except (pywikibot.NoPage, pywikibot.SectionError) as e:
self._getexception = e
raise
# self._isredir is set by loadrevisions
if self._isredir:
self._getexception = pywikibot.IsRedirectPage(self)
raise self._getexception
@deprecated_args(throttle=None, change_edit_time=None)
def getOldVersion(self, oldid, force=False, get_redirect=False,
sysop=False):
"""
Return text of an old revision of this page; same options as get().
@param oldid: The revid of the revision desired.
@rtype: unicode
"""
if force or oldid not in self._revisions \
or self._revisions[oldid].text is None:
self.site.loadrevisions(self,
content=True,
revids=oldid,
sysop=sysop)
# TODO: what about redirects, errors?
return self._revisions[oldid].text
def permalink(self, oldid=None, percent_encoded=True, with_protocol=False):
"""Return the permalink URL of an old revision of this page.
@param oldid: The revid of the revision desired.
@param percent_encoded: if false, the link will be provided
without title uncoded.
@param with_protocol: if true, http or https prefixes will be
included before the double slash.
@rtype: unicode
"""
if percent_encoded:
title = self.title(as_url=True)
else:
title = self.title(as_url=False).replace(' ', '_')
return '{0}//{1}{2}/index.php?title={3}&oldid={4}'.format(
self.site.protocol() + ':' if with_protocol else '',
self.site.hostname(),
self.site.scriptpath(),
title,
oldid if oldid is not None else self.latest_revision_id)
@property
def latest_revision_id(self):
"""Return the current revision id for this page."""
if not hasattr(self, '_revid'):
self.revisions(self)
return self._revid
@latest_revision_id.deleter
def latest_revision_id(self):
"""
Remove the latest revision id set for this Page.
All internal cached values specifically for the latest revision
of this page are cleared.
The following cached values are not cleared:
- text property
- page properties, and page coordinates
- lastNonBotUser
- isDisambig and isCategoryRedirect status
- langlinks, templates and deleted revisions
"""
# When forcing, we retry the page no matter what:
# * Old exceptions do not apply any more
# * Deleting _revid to force reload
# * Deleting _redirtarget, that info is now obsolete.
for attr in ['_redirtarget', '_getexception', '_revid']:
if hasattr(self, attr):
delattr(self, attr)
@latest_revision_id.setter
def latest_revision_id(self, value):
"""Set the latest revision for this Page."""
del self.latest_revision_id
self._revid = value
@deprecated('latest_revision_id', since='20150727')
def latestRevision(self):
"""Return the current revision id for this page."""
return self.latest_revision_id
@deprecated('latest_revision_id', since='20150407')
def pageAPInfo(self):
"""Return the current revision id for this page."""
if self.isRedirectPage():
raise pywikibot.IsRedirectPage(self)
return self.latest_revision_id
@property
def latest_revision(self):
"""Return the current revision for this page."""
rev = self._latest_cached_revision()
if rev is not None:
return rev
return next(self.revisions(content=True, total=1))
@property
def text(self):
"""
Return the current (edited) wikitext, loading it if necessary.
@return: text of the page
@rtype: unicode
"""
if not hasattr(self, '_text') or self._text is None:
try:
self._text = self.get(get_redirect=True)
except pywikibot.NoPage:
# TODO: what other exceptions might be returned?
self._text = ''
return self._text
@text.setter
def text(self, value):
"""
Update the current (edited) wikitext.
@param value: New value or None
@param value: basestring
"""
self._text = None if value is None else unicode(value)
if hasattr(self, '_raw_extracted_templates'):
del self._raw_extracted_templates
@text.deleter
def text(self):
"""Delete the current (edited) wikitext."""
if hasattr(self, '_text'):
del self._text
if hasattr(self, '_expanded_text'):
del self._expanded_text
if hasattr(self, '_raw_extracted_templates'):
del self._raw_extracted_templates
def preloadText(self):
"""
The text returned by EditFormPreloadText.
See API module "info".
Application: on Wikisource wikis, text can be preloaded even if
a page does not exist, if an Index page is present.
@rtype: unicode
"""
self.site.loadpageinfo(self, preload=True)
return self._preloadedtext
def _get_parsed_page(self):
"""Retrieve parsed text (via action=parse) and cache it."""
# Get (cached) parsed text.
if not hasattr(self, '_parsed_text'):
self._parsed_text = self.site.get_parsed_page(self)
return self._parsed_text
def properties(self, force=False):
"""
Return the properties of the page.
@param force: force updating from the live site
@rtype: dict
"""
if not hasattr(self, '_pageprops') or force:
self._pageprops = {} # page may not have pageprops (see T56868)
self.site.loadpageprops(self)
return self._pageprops
def defaultsort(self, force=False):
"""
Extract value of the {{DEFAULTSORT:}} magic word from the page.
@param force: force updating from the live site
@rtype: unicode or None
"""
return self.properties(force=force).get('defaultsort')
@deprecate_arg('refresh', 'force')
def expand_text(self, force=False, includecomments=False):
"""Return the page text with all templates and parser words expanded.
@param force: force updating from the live site
@param includecomments: Also strip comments if includecomments
parameter is not True.
@rtype unicode or None
"""
if not hasattr(self, '_expanded_text') or (
self._expanded_text is None) or force:
if not self.text:
self._expanded_text = ''
return ''
self._expanded_text = self.site.expand_text(
self.text,
title=self.title(with_section=False),
includecomments=includecomments)
return self._expanded_text
def userName(self):
"""
Return name or IP address of last user to edit page.
@rtype: unicode
"""
return self.latest_revision.user
def isIpEdit(self):
"""
Return True if last editor was unregistered.
@rtype: bool
"""
return self.latest_revision.anon
def lastNonBotUser(self):
"""
Return name or IP address of last human/non-bot user to edit page.
Determine the most recent human editor out of the last revisions.
If it was not able to retrieve a human user, returns None.
If the edit was done by a bot which is no longer flagged as 'bot',
i.e. which is not returned by Site.botusers(), it will be returned
as a non-bot edit.
@rtype: unicode
"""
if hasattr(self, '_lastNonBotUser'):
return self._lastNonBotUser
self._lastNonBotUser = None
for entry in self.revisions():
if entry.user and (not self.site.isBot(entry.user)):
self._lastNonBotUser = entry.user
break
return self._lastNonBotUser
@remove_last_args(('datetime', ))
def editTime(self):
"""Return timestamp of last revision to page.
@rtype: pywikibot.Timestamp
"""
return self.latest_revision.timestamp
@property
@deprecated('latest_revision.parent_id (0 instead of -1 when no parent)',
since='20150609')
def previous_revision_id(self):
"""
Return the revision id for the previous revision of this Page.
If the page has only one revision, it shall return -1.
@rtype: long
@raise AssertionError: Use on MediaWiki prior to v1.16.
"""
return self.latest_revision.parent_id or -1
@deprecated('latest_revision.parent_id (0 instead of -1 when no parent)',
since='20150609')
def previousRevision(self):
"""
Return the revision id for the previous revision.
DEPRECATED: Use latest_revision.parent_id instead.
@rtype: long
@raise AssertionError: Use on MediaWiki prior to v1.16.
"""
return self.latest_revision.parent_id or -1
def exists(self):
"""Return True if page exists on the wiki, even if it's a redirect.
If the title includes a section, return False if this section isn't
found.
@rtype: bool
"""
return self.pageid > 0
@property
def oldest_revision(self):
"""
Return the first revision of this page.
@rtype: L{Revision}
"""
return next(self.revisions(reverse=True, total=1))
def isRedirectPage(self):
"""Return True if this is a redirect, False if not or not existing."""
return self.site.page_isredirect(self)
def isStaticRedirect(self, force=False):
"""
Determine whether the page is a static redirect.
A static redirect must be a valid redirect, and contain the magic word
__STATICREDIRECT__.
@param force: Bypass local caching
@type force: bool
@rtype: bool
"""
found = False
if self.isRedirectPage():
static_keys = self.site.getmagicwords('staticredirect')
text = self.get(get_redirect=True, force=force)
if static_keys:
for key in static_keys:
if key in text:
found = True
break
return found
def isCategoryRedirect(self):
"""
Return True if this is a category redirect page, False otherwise.
@rtype: bool
"""
if not self.is_categorypage():
return False
if not hasattr(self, '_catredirect'):
self._catredirect = False
catredirs = self.site.category_redirects()
for template, args in self.templatesWithParams():
if template.title(with_ns=False) in catredirs:
if args:
# Get target (first template argument)
p = pywikibot.Page(
self.site, args[0].strip(), Namespace.CATEGORY)
if p.namespace() == Namespace.CATEGORY:
self._catredirect = p.title()
else:
pywikibot.warning(
'Category redirect target {0} on {1} is not a '
'category'.format(p.title(as_link=True),
self.title(as_link=True)))
else:
pywikibot.warning(
'No target found for category redirect on '
+ self.title(as_link=True))
break
return bool(self._catredirect)
def getCategoryRedirectTarget(self):
"""
If this is a category redirect, return the target category title.
@rtype: Category
"""
if self.isCategoryRedirect():
return Category(Link(self._catredirect, self.site))
raise pywikibot.IsNotRedirectPage(self)
@deprecated('interwiki.page_empty_check(page)', since='20151207')
def isEmpty(self):
"""
Return True if the page text has less than 4 characters.
Character count ignores language links and category links.
Can raise the same exceptions as get().
@rtype: bool
"""
txt = self.get()
txt = textlib.removeLanguageLinks(txt, site=self.site)
txt = textlib.removeCategoryLinks(txt, site=self.site)
return len(txt) < 4
def isTalkPage(self):
"""Return True if this page is in any talk namespace."""
ns = self.namespace()
return ns >= 0 and ns % 2 == 1
def toggleTalkPage(self):
"""
Return other member of the article-talk page pair for this Page.
If self is a talk page, returns the associated content page;
otherwise, returns the associated talk page. The returned page need
not actually exist on the wiki.
@return: Page or None if self is a special page.
@rtype: Page or None
"""
ns = self.namespace()
if ns < 0: # Special page
return
if self.isTalkPage():
if self.namespace() == 1:
return Page(self.site, self.title(with_ns=False))
else:
return Page(self.site,
'%s:%s' % (self.site.namespace(ns - 1),
self.title(with_ns=False)))
else:
return Page(self.site,
'%s:%s' % (self.site.namespace(ns + 1),
self.title(with_ns=False)))
def is_categorypage(self):
"""Return True if the page is a Category, False otherwise."""
return self.namespace() == 14
@deprecated('is_categorypage', since='20140819')
def isCategory(self):
"""DEPRECATED: use is_categorypage instead."""
return self.is_categorypage()
def is_filepage(self):
"""Return True if this is an file description page, False otherwise."""
return self.namespace() == 6
@deprecated('is_filepage', since='20140819')
def isImage(self):
"""DEPRECATED: use is_filepage instead."""
return self.is_filepage()
@remove_last_args(('get_Index', ))
def isDisambig(self):
"""
Return True if this is a disambiguation page, False otherwise.
By default, it uses the the Disambiguator extension's result. The
identification relies on the presense of the __DISAMBIG__ magic word
which may also be transcluded.
If the Disambiguator extension isn't activated for the given site,
the identification relies on the presence of specific templates.
First load a list of template names from the Family file;
if the value in the Family file is None or no entry was made, look for
the list on [[MediaWiki:Disambiguationspage]]. If this page does not
exist, take the MediaWiki message. 'Template:Disambig' is always
assumed to be default, and will be appended regardless of its
existence.
@rtype: bool
"""
if self.site.has_extension('Disambiguator'):
# If the Disambiguator extension is loaded, use it
return 'disambiguation' in self.properties()
if not hasattr(self.site, '_disambigtemplates'):
try:
default = set(self.site.family.disambig('_default'))
except KeyError:
default = {'Disambig'}
try:
distl = self.site.family.disambig(self.site.code,
fallback=False)
except KeyError:
distl = None
if distl is None:
disambigpages = Page(self.site,
'MediaWiki:Disambiguationspage')
if disambigpages.exists():
disambigs = {link.title(with_ns=False)
for link in disambigpages.linkedPages()
if link.namespace() == 10}
elif self.site.has_mediawiki_message('disambiguationspage'):
message = self.site.mediawiki_message(
'disambiguationspage').split(':', 1)[1]
# add the default template(s) for default mw message
# only
disambigs = {first_upper(message)} | default
else:
disambigs = default
self.site._disambigtemplates = disambigs
else:
# Normalize template capitalization
self.site._disambigtemplates = {first_upper(t) for t in distl}
templates = {tl.title(with_ns=False) for tl in self.templates()}
disambigs = set()
# always use cached disambig templates
disambigs.update(self.site._disambigtemplates)
# see if any template on this page is in the set of disambigs
disambig_in_page = disambigs.intersection(templates)
return self.namespace() != 10 and len(disambig_in_page) > 0
@deprecated_args(
step=None, withTemplateInclusion='with_template_inclusion',
onlyTemplateInclusion='only_template_inclusion',
redirectsOnly='filter_redirects')
def getReferences(
self, follow_redirects=True, with_template_inclusion=True,
only_template_inclusion=False, filter_redirects=False,
namespaces=None, total=None, content=False
):
"""
Return an iterator all pages that refer to or embed the page.
If you need a full list of referring pages, use
C{pages = list(s.getReferences())}
@param follow_redirects: if True, also iterate pages that link to a
redirect pointing to the page.
@param with_template_inclusion: if True, also iterate pages where self
is used as a template.
@param only_template_inclusion: if True, only iterate pages where self
is used as a template.
@param filter_redirects: if True, only iterate redirects to self.
@param namespaces: only iterate pages in these namespaces
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each referring page (default False)
"""
# N.B.: this method intentionally overlaps with backlinks() and
# embeddedin(). Depending on the interface, it may be more efficient
# to implement those methods in the site interface and then combine
# the results for this method, or to implement this method and then
# split up the results for the others.
return self.site.pagereferences(
self,
follow_redirects=follow_redirects,
filter_redirects=filter_redirects,
with_template_inclusion=with_template_inclusion,
only_template_inclusion=only_template_inclusion,
namespaces=namespaces,
total=total,
content=content
)
@deprecated_args(step=None, followRedirects='follow_redirects',
filterRedirects='filter_redirects')
def backlinks(self, follow_redirects=True, filter_redirects=None,
namespaces=None, total=None, content=False):
"""
Return an iterator for pages that link to this page.
@param follow_redirects: if True, also iterate pages that link to a
redirect pointing to the page.
@param filter_redirects: if True, only iterate redirects; if False,
omit redirects; if None, do not filter
@param namespaces: only iterate pages in these namespaces
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each referring page (default False)
"""
return self.site.pagebacklinks(
self,
follow_redirects=follow_redirects,
filter_redirects=filter_redirects,
namespaces=namespaces,
total=total,
content=content
)
@deprecated_args(step=None)
def embeddedin(self, filter_redirects=None, namespaces=None,
total=None, content=False):
"""
Return an iterator for pages that embed this page as a template.
@param filter_redirects: if True, only iterate redirects; if False,
omit redirects; if None, do not filter
@param namespaces: only iterate pages in these namespaces
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each embedding page (default False)
"""
return self.site.page_embeddedin(
self,
filter_redirects=filter_redirects,
namespaces=namespaces,
total=total,
content=content
)
def protection(self):
"""
Return a dictionary reflecting page protections.
@rtype: dict
"""
return self.site.page_restrictions(self)
def applicable_protections(self):
"""
Return the protection types allowed for that page.
If the page doesn't exists it only returns "create". Otherwise it
returns all protection types provided by the site, except "create".
It also removes "upload" if that page is not in the File namespace.
It is possible, that it returns an empty set, but only if original
protection types were removed.
@return: set of unicode
@rtype: set
"""
# New API since commit 32083235eb332c419df2063cf966b3400be7ee8a
if self.site.mw_version >= '1.25wmf14':
self.site.loadpageinfo(self)
return self._applicable_protections
p_types = set(self.site.protection_types())
if not self.exists():
return {'create'} if 'create' in p_types else set()
else:
p_types.remove('create') # no existing page allows that
if not self.is_filepage(): # only file pages allow upload
p_types.remove('upload')
return p_types
def canBeEdited(self):
"""
Determine whether the page may be edited.
This returns True if and only if:
- page is unprotected, and bot has an account for this site, or
- page is protected, and bot has a sysop account for this site.
@rtype: bool
"""
return self.site.page_can_be_edited(self)
def botMayEdit(self):
"""
Determine whether the active bot is allowed to edit the page.
This will be True if the page doesn't contain {{bots}} or {{nobots}}
or any other template from edit_restricted_templates list
in x_family.py file, or it contains them and the active bot is allowed
to edit this page. (This method is only useful on those sites that
recognize the bot-exclusion protocol; on other sites, it will always
return True.)
The framework enforces this restriction by default. It is possible
to override this by setting ignore_bot_templates=True in
user-config.py, or using page.put(force=True).
@rtype: bool
"""
# TODO: move this to Site object?
# FIXME: templatesWithParams is defined in Page only.
if not hasattr(self, 'templatesWithParams'):
return True
if config.ignore_bot_templates: # Check the "master ignore switch"
return True
username = self.site.user()
try:
templates = self.templatesWithParams()
except (pywikibot.NoPage,
pywikibot.IsRedirectPage,
pywikibot.SectionError):
return True
# go through all templates and look for any restriction
# multiple bots/nobots templates are allowed
restrictions = self.site.family.edit_restricted_templates.get(
self.site.code)
# also add archive templates for non-archive bots
if pywikibot.calledModuleName() != 'archivebot':
archived = self.site.family.archived_page_templates.get(
self.site.code)
if restrictions and archived:
restrictions += archived
elif archived:
restrictions = archived
for template, params in templates:
title = template.title(with_ns=False)
if restrictions:
if title in restrictions:
return False
if title == 'Nobots':
if not params:
return False
else:
bots = [bot.strip() for bot in params[0].split(',')]
if 'all' in bots or pywikibot.calledModuleName() in bots \
or username in bots:
return False
elif title == 'Bots':
if not params:
return True
else:
(ttype, bots) = [part.strip() for part
in params[0].split('=', 1)]
bots = [bot.strip() for bot in bots.split(',')]
if ttype == 'allow':
return 'all' in bots or username in bots
if ttype == 'deny':
return not ('all' in bots or username in bots)
if ttype == 'allowscript':
return ('all' in bots
or pywikibot.calledModuleName() in bots)
if ttype == 'denyscript':
return not ('all' in bots
or pywikibot.calledModuleName() in bots)
# no restricting template found
return True
@deprecate_arg('async', 'asynchronous') # T106230
@deprecated_args(comment='summary', sysop=None)
def save(self, summary=None, watch=None, minor=True, botflag=None,
force=False, asynchronous=False, callback=None,
apply_cosmetic_changes=None, quiet=False, **kwargs):
"""
Save the current contents of page's text to the wiki.
@param summary: The edit summary for the modification (optional, but
most wikis strongly encourage its use)
@type summary: unicode
@param watch: Specify how the watchlist is affected by this edit, set
to one of "watch", "unwatch", "preferences", "nochange":
* watch: add the page to the watchlist
* unwatch: remove the page from the watchlist
* preferences: use the preference settings (Default)
* nochange: don't change the watchlist
If None (default), follow bot account's default settings
For backward compatibility watch parameter may also be boolean:
if True, add or if False, remove this Page to/from bot
user's watchlist.
@type watch: string, bool (deprecated) or None
@param minor: if True, mark this edit as minor
@type minor: bool
@param botflag: if True, mark this edit as made by a bot (default:
True if user has bot status, False if not)
@param force: if True, ignore botMayEdit() setting
@type force: bool
@param asynchronous: if True, launch a separate thread to save
asynchronously
@param callback: a callable object that will be called after the
page put operation. This object must take two arguments: (1) a
Page object, and (2) an exception instance, which will be None
if the page was saved successfully. The callback is intended for
use by bots that need to keep track of which saves were
successful.
@param apply_cosmetic_changes: Overwrites the cosmetic_changes
configuration value to this value unless it's None.
@type apply_cosmetic_changes: bool or None
@param quiet: enable/disable successful save operation message;
defaults to False.
In asynchronous mode, if True, it is up to the calling bot to
manage the output e.g. via callback.
@type quiet: bool
"""
if not summary:
summary = config.default_edit_summary
if watch is True:
watch = 'watch'
elif watch is False:
watch = 'unwatch'
if not force and not self.botMayEdit():
raise pywikibot.OtherPageSaveError(
self, 'Editing restricted by {{bots}}, {{nobots}} '
"or site's equivalent of {{in use}} template")
self._save(summary=summary, watch=watch, minor=minor, botflag=botflag,
asynchronous=asynchronous, callback=callback,
cc=apply_cosmetic_changes, quiet=quiet, **kwargs)
@allow_asynchronous
def _save(self, summary=None, watch=None, minor=True, botflag=None,
cc=None, quiet=False, **kwargs):
"""Helper function for save()."""
link = self.title(as_link=True)
if cc or cc is None and config.cosmetic_changes:
summary = self._cosmetic_changes_hook(summary)
done = self.site.editpage(self, summary=summary, minor=minor,
watch=watch, bot=botflag, **kwargs)
if not done:
if not quiet:
pywikibot.warning('Page %s not saved' % link)
raise pywikibot.PageNotSaved(self)
if not quiet:
pywikibot.output('Page %s saved' % link)
def _cosmetic_changes_hook(self, summary):
"""The cosmetic changes hook.
@param summary: The current edit summary.
@type summary: str
@return: Modified edit summary if cosmetic changes has been done,
else the old edit summary.
@rtype: str
"""
if self.isTalkPage() or \
pywikibot.calledModuleName() in config.cosmetic_changes_deny_script:
return summary
family = self.site.family.name
if config.cosmetic_changes_mylang_only:
cc = ((family == config.family and
self.site.lang == config.mylang) or
family in list(config.cosmetic_changes_enable.keys()) and
self.site.lang in config.cosmetic_changes_enable[family])
else:
cc = True
cc = (cc and not
(family in list(config.cosmetic_changes_disable.keys()) and
self.site.lang in config.cosmetic_changes_disable[family]))
if not cc:
return summary
old = self.text
pywikibot.log('Cosmetic changes for %s-%s enabled.'
% (family, self.site.lang))
# cc depends on page directly and via several other imports
from pywikibot.cosmetic_changes import (
CANCEL_MATCH, CosmeticChangesToolkit)
cc_toolkit = CosmeticChangesToolkit(
self.site, namespace=self.namespace(), pageTitle=self.title(),
ignore=CANCEL_MATCH)
self.text = cc_toolkit.change(old)
if summary and old.strip().replace(
'\r\n', '\n') != self.text.strip().replace('\r\n', '\n'):
from pywikibot import i18n
summary += i18n.twtranslate(self.site, 'cosmetic_changes-append')
return summary
@deprecate_arg('async', 'asynchronous') # T106230
@deprecated_args(comment='summary', watchArticle='watch',
minorEdit='minor')
def put(self, newtext, summary=None, watch=None, minor=True, botflag=None,
force=False, asynchronous=False, callback=None, **kwargs):
"""
Save the page with the contents of the first argument as the text.
This method is maintained primarily for backwards-compatibility.
For new code, using Page.save() is preferred. See save() method
docs for all parameters not listed here.
@param newtext: The complete text of the revised page.
@type newtext: unicode
"""
self.text = newtext
self.save(summary=summary, watch=watch, minor=minor, botflag=botflag,
force=force, asynchronous=asynchronous, callback=callback,
**kwargs)
@deprecated('put(asynchronous=True) or save(asynchronous=True)')
@deprecated_args(comment='summary', watchArticle='watch',
minorEdit='minor')
def put_async(self, newtext, summary=None, watch=None, minor=True,
botflag=None, force=False, callback=None, **kwargs):
"""
Put page on queue to be saved to wiki asynchronously.
Asynchronous version of put (takes the same arguments), which places
pages on a queue to be saved by a daemon thread. All arguments are
the same as for .put(). This version is maintained solely for
backwards-compatibility.
"""
self.put(newtext, summary=summary, watch=watch,
minor=minor, botflag=botflag, force=force,
asynchronous=True, callback=callback, **kwargs)
def watch(self, unwatch=False):
"""
Add or remove this page to/from bot account's watchlist.
@param unwatch: True to unwatch, False (default) to watch.
@type unwatch: bool
@return: True if successful, False otherwise.
@rtype: bool
"""
return self.site.watch(self, unwatch)
def clear_cache(self):
"""Clear the cached attributes of the page."""
self._revisions = {}
for attr in self._cache_attrs:
try:
delattr(self, attr)
except AttributeError:
pass
def purge(self, **kwargs):
"""
Purge the server's cache for this page.
@rtype: bool
"""
self.clear_cache()
return self.site.purgepages([self], **kwargs)
def touch(self, callback=None, botflag=False, **kwargs):
"""
Make a touch edit for this page.
See save() method docs for all parameters.
The following parameters will be overridden by this method:
- summary, watch, minor, force, asynchronous
Parameter botflag is False by default.
minor and botflag parameters are set to False which prevents hiding
the edit when it becomes a real edit due to a bug.
"""
if self.exists():
# ensure always get the page text and not to change it.
del self.text
self.save(summary='Pywikibot touch edit', watch='nochange',
minor=False, botflag=botflag, force=True,
asynchronous=False, callback=callback,
apply_cosmetic_changes=False, nocreate=True, **kwargs)
else:
raise pywikibot.NoPage(self)
@deprecated_args(step=None)
def linkedPages(self, namespaces=None, total=None,
content=False):
"""
Iterate Pages that this Page links to.
Only returns pages from "normal" internal links. Image and category
links are omitted unless prefixed with ":". Embedded templates are
omitted (but links within them are returned). All interwiki and
external links are omitted.
@param namespaces: only iterate links in these namespaces
@param namespaces: int, or list of ints
@param total: iterate no more than this number of pages in total
@type total: int
@param content: if True, retrieve the content of the current version
of each linked page (default False)
@type content: bool
@return: a generator that yields Page objects.
@rtype: generator
"""
return self.site.pagelinks(self, namespaces=namespaces,
total=total, content=content)
def interwiki(self, expand=True):
"""
Iterate interwiki links in the page text, excluding language links.
@param expand: if True (default), include interwiki links found in
templates transcluded onto this page; if False, only iterate
interwiki links found in this page's own wikitext
@type expand: bool
@return: a generator that yields Link objects
@rtype: generator
"""
# This function does not exist in the API, so it has to be
# implemented by screen-scraping
if expand:
text = self.expand_text()
else:
text = self.text
for linkmatch in pywikibot.link_regex.finditer(
textlib.removeDisabledParts(text)):
linktitle = linkmatch.group('title')
link = Link(linktitle, self.site)
# only yield links that are to a different site and that
# are not language links
try:
if link.site != self.site:
if linktitle.lstrip().startswith(':'):
# initial ":" indicates not a language link
yield link
elif link.site.family != self.site.family:
# link to a different family is not a language link
yield link
except pywikibot.Error:
# ignore any links with invalid contents
continue
def langlinks(self, include_obsolete=False):
"""
Return a list of all inter-language Links on this page.
@param include_obsolete: if true, return even Link objects whose site
is obsolete
@type include_obsolete: bool
@return: list of Link objects.
@rtype: list
"""
# Note: We preload a list of *all* langlinks, including links to
# obsolete sites, and store that in self._langlinks. We then filter
# this list if the method was called with include_obsolete=False
# (which is the default)
if not hasattr(self, '_langlinks'):
self._langlinks = list(self.iterlanglinks(include_obsolete=True))
if include_obsolete:
return self._langlinks
else:
return [i for i in self._langlinks if not i.site.obsolete]
@deprecated_args(step=None)
def iterlanglinks(self, total=None, include_obsolete=False):
"""
Iterate all inter-language links on this page.
@param total: iterate no more than this number of pages in total
@param include_obsolete: if true, yield even Link object whose site
is obsolete
@type include_obsolete: bool
@return: a generator that yields Link objects.
@rtype: generator
"""
if hasattr(self, '_langlinks'):
return iter(self.langlinks(include_obsolete=include_obsolete))
# XXX We might want to fill _langlinks when the Site
# method is called. If we do this, we'll have to think
# about what will happen if the generator is not completely
# iterated upon.
return self.site.pagelanglinks(self, total=total,
include_obsolete=include_obsolete)
def data_item(self):
"""
Convenience function to get the Wikibase item of a page.
@rtype: ItemPage
"""
return ItemPage.fromPage(self)
@deprecate_arg('tllimit', None)
@deprecated('Page.templates()', since='20140421')
def getTemplates(self):
"""DEPRECATED. Use templates()."""
return self.templates()
@deprecate_arg('get_redirect', None)
def templates(self, content=False):
"""
Return a list of Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded
templates, not template pages that happen to be referenced through
a normal link.
@param content: if True, retrieve the content of the current version
of each template (default False)
@param content: bool
"""
# Data might have been preloaded
if not hasattr(self, '_templates'):
self._templates = list(self.itertemplates(content=content))
return self._templates
@deprecated_args(step=None)
def itertemplates(self, total=None, content=False):
"""
Iterate Page objects for templates used on this Page.
Template parameters are ignored. This method only returns embedded
templates, not template pages that happen to be referenced through
a normal link.
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each template (default False)
@param content: bool
"""
if hasattr(self, '_templates'):
return iter(self._templates)
return self.site.pagetemplates(self, total=total, content=content)
@deprecated_args(followRedirects=None, loose=None, step=None)
def imagelinks(self, total=None, content=False):
"""
Iterate FilePage objects for images displayed on this Page.
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each image description page (default False)
@return: a generator that yields FilePage objects.
"""
return self.site.pageimages(self, total=total, content=content)
@deprecated_args(nofollow_redirects=None, get_redirect=None, step=None,
withSortKey='with_sort_key')
def categories(self, with_sort_key=False, total=None, content=False):
"""
Iterate categories that the article is in.
@param with_sort_key: if True, include the sort key in each Category.
@param total: iterate no more than this number of pages in total
@param content: if True, retrieve the content of the current version
of each category description page (default False)
@return: a generator that yields Category objects.
@rtype: generator
"""
# FIXME: bug T75561: with_sort_key is ignored by Site.pagecategories
if with_sort_key:
raise NotImplementedError('with_sort_key is not implemented')
return self.site.pagecategories(self, total=total, content=content)
@deprecated_args(step=None)
def extlinks(self, total=None):
"""
Iterate all external URLs (not interwiki links) from this page.
@param total: iterate no more than this number of pages in total
@return: a generator that yields unicode objects containing URLs.
@rtype: generator
"""
return self.site.page_extlinks(self, total=total)
def coordinates(self, primary_only=False):
"""
Return a list of Coordinate objects for points on the page.
Uses the MediaWiki extension GeoData.
@param primary_only: Only return the coordinate indicated to be primary
@return: A list of Coordinate objects
@rtype: list
"""
if not hasattr(self, '_coords'):
self._coords = []
self.site.loadcoordinfo(self)
if primary_only:
return self._coords[0] if len(self._coords) > 0 else None
else:
return self._coords
@need_version('1.20')
def page_image(self):
"""
Return the most appropriate image on the page.
Uses the MediaWiki extension PageImages.
@return: A FilePage object
@rtype: FilePage
"""
if not hasattr(self, '_pageimage'):
self._pageimage = None
self.site.loadpageimage(self)
return self._pageimage
def getRedirectTarget(self):
"""
Return a Page object for the target this Page redirects to.
If this page is not a redirect page, will raise an IsNotRedirectPage
exception. This method also can raise a NoPage exception.
@rtype: pywikibot.Page
"""
return self.site.getredirtarget(self)
@deprecated('moved_target()', since='20150524')
def getMovedTarget(self):
"""
Return a Page object for the target this Page was moved to.
DEPRECATED: Use Page.moved_target().
If this page was not moved, it will raise a NoPage exception.
This method also works if the source was already deleted.
@rtype: pywikibot.Page
@raises NoPage: this page was not moved
"""
try:
return self.moved_target()
except pywikibot.NoMoveTarget:
raise pywikibot.NoPage(self)
def moved_target(self):
"""
Return a Page object for the target this Page was moved to.
If this page was not moved, it will raise a NoMoveTarget exception.
This method also works if the source was already deleted.
@rtype: pywikibot.Page
@raises NoMoveTarget: this page was not moved
"""
gen = iter(self.site.logevents(logtype='move', page=self, total=1))
try:
lastmove = next(gen)
except StopIteration:
raise pywikibot.NoMoveTarget(self)
else:
return lastmove.target_page
@deprecated_args(getText='content', reverseOrder='reverse', step=None)
def revisions(self, reverse=False, total=None, content=False,
rollback=False, starttime=None, endtime=None):
"""Generator which loads the version history as Revision instances."""
# TODO: Only request uncached revisions
self.site.loadrevisions(self, content=content, rvdir=reverse,
starttime=starttime, endtime=endtime,
total=total, rollback=rollback)
return (self._revisions[rev] for rev in
sorted(self._revisions, reverse=not reverse)[:total])
# BREAKING CHANGE: in old framework, default value for getVersionHistory
# returned no more than 500 revisions; now, it iterates
# all revisions unless 'total' argument is used
@deprecated('Page.revisions()', since='20150206')
@deprecated_args(forceReload=None, revCount='total', step=None,
getAll=None, reverseOrder='reverse')
def getVersionHistory(self, reverse=False, total=None):
"""
Load the version history page and return history information.
Return value is a list of tuples, where each tuple represents one
edit and is built of revision id, edit date/time, user name, and
edit summary. Starts with the most current revision, unless
reverse is True.
@param total: iterate no more than this number of revisions in total
"""
return [rev.hist_entry()
for rev in self.revisions(reverse=reverse, total=total)
]
@deprecated_args(forceReload=None, reverseOrder='reverse', step=None)
def getVersionHistoryTable(self, reverse=False, total=None):
"""Return the version history as a wiki table."""
result = '{| class="wikitable"\n'
result += '! oldid || date/time || username || edit summary\n'
for entry in self.revisions(reverse=reverse, total=total):
result += '|----\n'
result += ('| {r.revid} || {r.timestamp} || {r.user} || '
'<nowiki>{r.comment}</nowiki>\n'.format(r=entry))
result += '|}\n'
return result
@deprecated('Page.revisions(content=True)', since='20150206')
@deprecated_args(reverseOrder='reverse', rollback=None, step=None)
def fullVersionHistory(self, reverse=False, total=None):
"""Iterate previous versions including wikitext.
Takes same arguments as getVersionHistory.
"""
return [rev.full_hist_entry()
for rev in self.revisions(content=True, reverse=reverse,
total=total)
]
@deprecated_args(step=None)
def contributors(self, total=None, starttime=None, endtime=None):
"""
Compile contributors of this page with edit counts.
@param total: iterate no more than this number of revisions in total
@param starttime: retrieve revisions starting at this Timestamp
@param endtime: retrieve revisions ending at this Timestamp
@return: number of edits for each username
@rtype: L{collections.Counter}
"""
return Counter(rev.user for rev in
self.revisions(total=total,
starttime=starttime, endtime=endtime))
@deprecated('contributors()', since='20150206')
@deprecated_args(step=None)
def contributingUsers(self, total=None):
"""
Return a set of usernames (or IPs) of users who edited this page.
@param total: iterate no more than this number of revisions in total
@rtype: set
"""
return self.contributors(total=total).keys()
def revision_count(self, contributors=None):
"""
Determine number of edits from a set of contributors.
@param contributors: contributor usernames
@type contributors: iterable of str
@return: number of edits for all provided usernames
@rtype: int
"""
if not contributors:
return len(list(self.revisions()))
cnt = self.contributors()
return sum(cnt[username] for username in contributors)
@deprecated('oldest_revision', since='20140421')
def getCreator(self):
"""
Get the first revision of the page.
DEPRECATED: Use Page.oldest_revision.
@rtype: tuple(username, Timestamp)
"""
result = self.oldest_revision
return result.user, unicode(result.timestamp.isoformat())
@deprecated('contributors() or revisions()', since='20150206')
@deprecated_args(limit='total')
def getLatestEditors(self, total=1):
"""
Get a list of revision informations of the last total edits.
DEPRECATED: Use Page.revisions.
@param total: iterate no more than this number of revisions in total
@return: list of dict, each dict containing the username and Timestamp
@rtype: list
"""
return [
{'user': rev.user, 'timestamp': unicode(rev.timestamp.isoformat())}
for rev in self.revisions(total=total)]
def merge_history(self, dest, timestamp=None, reason=None):
"""
Merge revisions from this page into another page.
See L{APISite.merge_history} for details.
@param dest: Destination page to which revisions will be merged
@type dest: pywikibot.Page
@param timestamp: Revisions from this page dating up to this timestamp
will be merged into the destination page (if not given or False,
all revisions will be merged)
@type timestamp: pywikibot.Timestamp
@param reason: Optional reason for the history merge
@type reason: str
"""
self.site.merge_history(self, dest, timestamp, reason)
@deprecated_args(
throttle=None, deleteAndMove='noredirect', movetalkpage='movetalk')
def move(self, newtitle, reason=None, movetalk=True, sysop=False,
noredirect=False, safe=True):
"""
Move this page to a new title.
@param newtitle: The new page title.
@param reason: The edit summary for the move.
@param movetalk: If true, move this page's talk page (if it exists)
@param sysop: Try to move using sysop account, if available
@param noredirect: if move succeeds, delete the old page
(usually requires sysop privileges, depending on wiki settings)
@param safe: If false, attempt to delete existing page at newtitle
(if there is one) and then move this page to that title
"""
if reason is None:
pywikibot.output('Moving %s to [[%s]].'
% (self.title(as_link=True), newtitle))
reason = pywikibot.input('Please enter a reason for the move:')
# TODO: implement "safe" parameter (Is this necessary ?)
# TODO: implement "sysop" parameter
return self.site.movepage(self, newtitle, reason,
movetalk=movetalk,
noredirect=noredirect)
@deprecate_arg('throttle', None)
def delete(self, reason=None, prompt=True, mark=False, quit=False):
"""
Delete the page from the wiki. Requires administrator status.
@param reason: The edit summary for the deletion, or rationale
for deletion if requesting. If None, ask for it.
@param prompt: If true, prompt user for confirmation before deleting.
@param mark: If true, and user does not have sysop rights, place a
speedy-deletion request on the page instead. If false, non-sysops
will be asked before marking pages for deletion.
@param quit: show also the quit option, when asking for confirmation.
"""
if reason is None:
pywikibot.output('Deleting %s.' % (self.title(as_link=True)))
reason = pywikibot.input('Please enter a reason for the deletion:')
# If user is a sysop, delete the page
if self.site.username(sysop=True):
answer = 'y'
if prompt and not hasattr(self.site, '_noDeletePrompt'):
answer = pywikibot.input_choice(
'Do you want to delete %s?' % self.title(
as_link=True, force_interwiki=True),
[('Yes', 'y'), ('No', 'n'), ('All', 'a')],
'n', automatic_quit=quit)
if answer == 'a':
answer = 'y'
self.site._noDeletePrompt = True
if answer == 'y':
return self.site.deletepage(self, reason)
else: # Otherwise mark it for deletion
if mark or hasattr(self.site, '_noMarkDeletePrompt'):
answer = 'y'
else:
answer = pywikibot.input_choice(
"Can't delete %s; do you want to mark it "
'for deletion instead?' % self.title(as_link=True,
force_interwiki=True),
[('Yes', 'y'), ('No', 'n'), ('All', 'a')],
'n', automatic_quit=False)
if answer == 'a':
answer = 'y'
self.site._noMarkDeletePrompt = True
if answer == 'y':
template = '{{delete|1=%s}}\n' % reason
# We can't add templates in a wikidata item, so let's use its
# talk page
if isinstance(self, pywikibot.ItemPage):
talk = self.toggleTalkPage()
talk.text = template + talk.text
talk.save(summary=reason)
else:
self.text = template + self.text
self.save(summary=reason)
@deprecated_args(step=None)
def loadDeletedRevisions(self, total=None):
"""
Retrieve deleted revisions for this Page.
Stores all revisions' timestamps, dates, editors and comments in
self._deletedRevs attribute.
@return: iterator of timestamps (which can be used to retrieve
revisions later on).
@rtype: generator
"""
if not hasattr(self, '_deletedRevs'):
self._deletedRevs = {}
for item in self.site.deletedrevs(self, total=total):
for rev in item.get('revisions', []):
self._deletedRevs[rev['timestamp']] = rev
yield rev['timestamp']
@deprecated_args(retrieveText='content')
def getDeletedRevision(self, timestamp, content=False):
"""
Return a particular deleted revision by timestamp.
@return: a list of [date, editor, comment, text, restoration
marker]. text will be None, unless content is True (or has
been retrieved earlier). If timestamp is not found, returns
None.
@rtype: list
"""
if hasattr(self, '_deletedRevs'):
if timestamp in self._deletedRevs and (
not content
or 'content' in self._deletedRevs[timestamp]):
return self._deletedRevs[timestamp]
for item in self.site.deletedrevs(self, start=timestamp,
content=content, total=1):
# should only be one item with one revision
if item['title'] == self.title:
if 'revisions' in item:
return item['revisions'][0]
def markDeletedRevision(self, timestamp, undelete=True):
"""
Mark the revision identified by timestamp for undeletion.
@param undelete: if False, mark the revision to remain deleted.
@type undelete: bool
"""
if not hasattr(self, '_deletedRevs'):
self.loadDeletedRevisions()
if timestamp not in self._deletedRevs:
raise ValueError(
'Timestamp %d is not a deleted revision' % timestamp)
self._deletedRevs[timestamp]['marked'] = undelete
@deprecated_args(comment='reason', throttle=None)
def undelete(self, reason=None):
"""
Undelete revisions based on the markers set by previous calls.
If no calls have been made since loadDeletedRevisions(), everything
will be restored.
Simplest case::
Page(...).undelete('This will restore all revisions')
More complex::
pg = Page(...)
revs = pg.loadDeletedRevisions()
for rev in revs:
if ... #decide whether to undelete a revision
pg.markDeletedRevision(rev) #mark for undeletion
pg.undelete('This will restore only selected revisions.')
@param reason: Reason for the action.
@type reason: basestring
"""
if hasattr(self, '_deletedRevs'):
undelete_revs = [ts for ts, rev in self._deletedRevs.items()
if 'marked' in rev and rev['marked']]
else:
undelete_revs = []
if reason is None:
warn('Not passing a reason for undelete() is deprecated.',
DeprecationWarning)
pywikibot.output('Undeleting %s.' % (self.title(as_link=True)))
reason = pywikibot.input(
'Please enter a reason for the undeletion:')
self.site.undelete_page(self, reason, undelete_revs)
@deprecate_arg('throttle', None)
def protect(self, edit=False, move=False, create=None, upload=None,
unprotect=False, reason=None, prompt=None, protections=None,
**kwargs):
"""
Protect or unprotect a wiki page. Requires administrator status.
Valid protection levels (in MediaWiki 1.12) are '' (equivalent to
'none'), 'autoconfirmed', and 'sysop'. If None is given, however,
that protection will be skipped.
@param protections: A dict mapping type of protection to protection
level of that type.
@type protections: dict
@param reason: Reason for the action
@type reason: basestring
@param prompt: Whether to ask user for confirmation (deprecated).
Defaults to protections is None
@type prompt: bool
"""
def process_deprecated_arg(value, arg_name):
# if protections was set and value is None, don't interpret that
# argument. But otherwise warn that the parameter was set
# (even implicit)
if called_using_deprecated_arg:
if value is False: # explicit test for False (don't use not)
value = 'sysop'
if value == 'none': # 'none' doesn't seem do be accepted
value = ''
if value is not None: # empty string is allowed
protections[arg_name] = value
warn('"protections" argument of protect() replaces "{0}"'
.format(arg_name),
DeprecationWarning)
else:
if value:
warn('"protections" argument of protect() replaces "{0}";'
' cannot use both.'.format(arg_name),
RuntimeWarning)
# buffer that, because it might get changed
called_using_deprecated_arg = protections is None
if called_using_deprecated_arg:
protections = {}
process_deprecated_arg(edit, 'edit')
process_deprecated_arg(move, 'move')
process_deprecated_arg(create, 'create')
process_deprecated_arg(upload, 'upload')
if reason is None:
pywikibot.output('Preparing to protection change of %s.'
% (self.title(as_link=True)))
reason = pywikibot.input('Please enter a reason for the action:')
if unprotect:
warn('"unprotect" argument of protect() is deprecated',
DeprecationWarning, 2)
protections = {p_type: ''
for p_type in self.applicable_protections()}
answer = 'y'
if called_using_deprecated_arg and prompt is None:
prompt = True
if prompt:
warn('"prompt" argument of protect() is deprecated',
DeprecationWarning, 2)
if prompt and not hasattr(self.site, '_noProtectPrompt'):
answer = pywikibot.input_choice(
'Do you want to change the protection level of %s?'
% self.title(as_link=True, force_interwiki=True),
[('Yes', 'y'), ('No', 'n'), ('All', 'a')],
'n', automatic_quit=False)
if answer == 'a':
answer = 'y'
self.site._noProtectPrompt = True
if answer == 'y':
return self.site.protect(self, protections, reason, **kwargs)
@deprecated_args(
comment='summary', oldCat='old_cat', newCat='new_cat',
sortKey='sort_key', inPlace='in_place')
def change_category(
self, old_cat, new_cat, summary=None, sort_key=None, in_place=True,
include=[]
):
"""
Remove page from oldCat and add it to newCat.
@param old_cat: category to be removed
@type old_cat: Category
@param new_cat: category to be added, if any
@type new_cat: Category or None
@param summary: string to use as an edit summary
@param sort_key: sortKey to use for the added category.
Unused if newCat is None, or if inPlace=True
If sortKey=True, the sortKey used for oldCat will be used.
@param in_place: if True, change categories in place rather than
rearranging them.
@param include: list of tags not to be disabled by default in relevant
textlib functions, where CategoryLinks can be searched.
@type include: list
@return: True if page was saved changed, otherwise False.
@rtype: bool
"""
# get list of Category objects the article is in and remove possible
# duplicates
cats = []
for cat in textlib.getCategoryLinks(self.text, site=self.site,
include=include):
if cat not in cats:
cats.append(cat)
if not self.canBeEdited():
pywikibot.output("Can't edit %s, skipping it..."
% self.title(as_link=True))
return False
if old_cat not in cats:
if self.namespace() != 10:
pywikibot.error('%s is not in category %s!'
% (self.title(as_link=True), old_cat.title()))
else:
pywikibot.output('%s is not in category %s, skipping...'
% (self.title(as_link=True), old_cat.title()))
return False
# This prevents the bot from adding new_cat if it is already present.
if new_cat in cats:
new_cat = None
oldtext = self.text
if in_place or self.namespace() == 10:
newtext = textlib.replaceCategoryInPlace(oldtext, old_cat, new_cat,
site=self.site)
else:
old_cat_pos = cats.index(old_cat)
if new_cat:
if sort_key is True:
# Fetch sort_key from old_cat in current page.
sort_key = cats[old_cat_pos].sortKey
cats[old_cat_pos] = Category(self.site, new_cat.title(),
sort_key=sort_key)
else:
cats.pop(old_cat_pos)
try:
newtext = textlib.replaceCategoryLinks(oldtext, cats)
except ValueError:
# Make sure that the only way replaceCategoryLinks() can return
# a ValueError is in the case of interwiki links to self.
pywikibot.output('Skipping %s because of interwiki link to '
'self' % self.title())
return False
if oldtext != newtext:
try:
self.put(newtext, summary)
return True
except pywikibot.PageSaveRelatedError as error:
pywikibot.output('Page %s not saved: %s'
% (self.title(as_link=True),
error))
except pywikibot.NoUsername:
pywikibot.output('Page %s not saved; sysop privileges '
'required.' % self.title(as_link=True))
return False
@deprecated('Page.is_flow_page()', since='20150128')
def isFlowPage(self):
"""DEPRECATED: use self.is_flow_page instead."""
return self.is_flow_page()
def is_flow_page(self):
"""
Whether a page is a Flow page.
@rtype: bool
"""
return self.content_model == 'flow-board'
# ####### DEPRECATED METHODS ########
@deprecated('Site.encoding()', since='20090307')
def encoding(self):
"""DEPRECATED: use self.site.encoding instead."""
return self.site.encoding()
@deprecated('Page.title(with_ns=False)', since='20090307')
def titleWithoutNamespace(self, underscore=False):
"""DEPRECATED: use self.title(with_ns=False) instead."""
return self.title(underscore=underscore, with_ns=False,
with_section=False)
@deprecated('Page.title(as_filename=True)', since='20090307')
def titleForFilename(self):
"""DEPRECATED: use self.title(as_filename=True) instead."""
return self.title(as_filename=True)
@deprecated('Page.title(with_section=False)', since='20090307')
def sectionFreeTitle(self, underscore=False):
"""DEPRECATED: use self.title(with_section=False) instead."""
return self.title(underscore=underscore, with_section=False)
@deprecated('Page.title(as_link=True)', since='20090307')
@deprecated_args(
forceInterwiki='force_interwiki', noInterwiki='no_interwiki')
def aslink(
self, force_interwiki=False, textlink=False, no_interwiki=False
):
"""DEPRECATED: use self.title(as_link=True) instead."""
return self.title(as_link=True, force_interwiki=force_interwiki,
allow_interwiki=not no_interwiki, textlink=textlink)
@deprecated('Page.title(as_url=True)', since='20090307')
def urlname(self):
"""Return the Page title encoded for use in an URL.
DEPRECATED: use self.title(as_url=True) instead.
"""
return self.title(as_url=True)
@deprecated('Page.protection()', since='20150725')
def getRestrictions(self):
"""DEPRECATED. Use self.protection() instead."""
restrictions = self.protection()
return {k: list(restrictions[k]) for k in restrictions}
class Page(BasePage):
"""Page: A MediaWiki page."""
@deprecated_args(defaultNamespace='ns', insite=None)
def __init__(self, source, title='', ns=0):
"""Instantiate a Page object."""
if isinstance(source, pywikibot.site.BaseSite):
if not title:
raise ValueError('Title must be specified and not empty '
'if source is a Site.')
super(Page, self).__init__(source, title, ns)
@property
def raw_extracted_templates(self):
"""
Extract templates using L{textlib.extract_templates_and_params}.
Disabled parts and whitespace are stripped, except for
whitespace in anonymous positional arguments.
This value is cached.
@rtype: list of (str, OrderedDict)
"""
if not hasattr(self, '_raw_extracted_templates'):
templates = textlib.extract_templates_and_params(
self.text, True, True)
self._raw_extracted_templates = templates
return self._raw_extracted_templates
@deprecate_arg('get_redirect', None)
def templatesWithParams(self):
"""
Return templates used on this Page.
The templates are extracted by L{textlib.extract_templates_and_params},
with positional arguments placed first in order, and each named
argument appearing as 'name=value'.
All parameter keys and values for each template are stripped of
whitespace.
@return: a list of tuples with one tuple for each template invocation
in the page, with the template Page as the first entry and a list
of parameters as the second entry.
@rtype: list of (Page, list)
"""
# WARNING: may not return all templates used in particularly
# intricate cases such as template substitution
titles = [t.title() for t in self.templates()]
templates = self.raw_extracted_templates
# backwards-compatibility: convert the dict returned as the second
# element into a list in the format used by old scripts
result = []
for template in templates:
try:
link = pywikibot.Link(template[0], self.site,
default_namespace=10)
if link.canonical_title() not in titles:
continue
except pywikibot.Error:
# this is a parser function or magic word, not template name
# the template name might also contain invalid parts
continue
args = template[1]
intkeys = {}
named = {}
positional = []
for key in sorted(args):
try:
intkeys[int(key)] = args[key]
except ValueError:
named[key] = args[key]
for i in range(1, len(intkeys) + 1):
# only those args with consecutive integer keys can be
# treated as positional; an integer could also be used
# (out of order) as the key for a named argument
# example: {{tmp|one|two|5=five|three}}
if i in intkeys:
positional.append(intkeys[i])
else:
for k in intkeys:
if k < 1 or k >= i:
named[str(k)] = intkeys[k]
break
for name in named:
positional.append('%s=%s' % (name, named[name]))
result.append((pywikibot.Page(link, self.site), positional))
return result
def set_redirect_target(self, target_page, create=False, force=False,
keep_section=False, save=True, **kwargs):
"""
Change the page's text to point to the redirect page.
@param target_page: target of the redirect, this argument is required.
@type target_page: pywikibot.Page or string
@param create: if true, it creates the redirect even if the page
doesn't exist.
@type create: bool
@param force: if true, it set the redirect target even the page
doesn't exist or it's not redirect.
@type force: bool
@param keep_section: if the old redirect links to a section
and the new one doesn't it uses the old redirect's section.
@type keep_section: bool
@param save: if true, it saves the page immediately.
@type save: bool
@param kwargs: Arguments which are used for saving the page directly
afterwards, like 'summary' for edit summary.
"""
if isinstance(target_page, basestring):
target_page = pywikibot.Page(self.site, target_page)
elif self.site != target_page.site:
raise pywikibot.InterwikiRedirectPage(self, target_page)
if not self.exists() and not (create or force):
raise pywikibot.NoPage(self)
if self.exists() and not self.isRedirectPage() and not force:
raise pywikibot.IsNotRedirectPage(self)
redirect_regex = self.site.redirectRegex()
if self.exists():
old_text = self.get(get_redirect=True)
else:
old_text = ''
result = redirect_regex.search(old_text)
if result:
oldlink = result.group(1)
if (keep_section and '#' in oldlink
and target_page.section() is None):
sectionlink = oldlink[oldlink.index('#'):]
target_page = pywikibot.Page(
self.site,
target_page.title() + sectionlink
)
prefix = self.text[:result.start()]
suffix = self.text[result.end():]
else:
prefix = ''
suffix = ''
target_link = target_page.title(as_link=True, textlink=True,
allow_interwiki=False)
target_link = '#{0} {1}'.format(self.site.redirect(), target_link)
self.text = prefix + target_link + suffix
if save:
self.save(**kwargs)
class FilePage(Page):
"""
A subclass of Page representing a file description page.
Supports the same interface as Page, with some added methods.
"""
@deprecate_arg('insite', None)
def __init__(self, source, title=''):
"""Initializer."""
self._file_revisions = {} # dictionary to cache File history.
super(FilePage, self).__init__(source, title, 6)
if self.namespace() != 6:
raise ValueError("'%s' is not in the file namespace!" % title)
def _load_file_revisions(self, imageinfo):
for file_rev in imageinfo:
file_revision = FileInfo(file_rev)
self._file_revisions[file_revision.timestamp] = file_revision
@property
def latest_file_info(self):
"""
Retrieve and store information of latest Image rev. of FilePage.
At the same time, the whole history of Image is fetched and cached in
self._file_revisions
@return: instance of FileInfo()
"""
if not len(self._file_revisions):
self.site.loadimageinfo(self, history=True)
latest_ts = max(self._file_revisions)
return self._file_revisions[latest_ts]
@property
def oldest_file_info(self):
"""
Retrieve and store information of oldest Image rev. of FilePage.
At the same time, the whole history of Image is fetched and cached in
self._file_revisions
@return: instance of FileInfo()
"""
if not len(self._file_revisions):
self.site.loadimageinfo(self, history=True)
oldest_ts = min(self._file_revisions)
return self._file_revisions[oldest_ts]
def get_file_history(self):
"""
Return the file's version history.
@return: dictionary with:
key: timestamp of the entry
value: instance of FileInfo()
@rtype: dict
"""
if not len(self._file_revisions):
self.site.loadimageinfo(self, history=True)
return self._file_revisions
def getImagePageHtml(self):
"""
Download the file page, and return the HTML, as a unicode string.
Caches the HTML code, so that if you run this method twice on the
same FilePage object, the page will only be downloaded once.
"""
if not hasattr(self, '_imagePageHtml'):
path = '%s/index.php?title=%s' \
% (self.site.scriptpath(), self.title(as_url=True))
self._imagePageHtml = http.request(self.site, path)
return self._imagePageHtml
@deprecated('get_file_url', since='20160609')
def fileUrl(self):
"""Return the URL for the file described on this page."""
return self.latest_file_info.url
def get_file_url(self, url_width=None, url_height=None, url_param=None):
"""
Return the url or the thumburl of the file described on this page.
Fetch the information if not available.
Once retrieved, thumburl information will also be accessible as
latest_file_info attributes, named as in [1]:
- url, thumburl, thumbwidth and thumbheight
Parameters correspond to iiprops in:
[1] U{https://www.mediawiki.org/wiki/API:Imageinfo}
Parameters validation and error handling left to the API call.
@param url_width: see iiurlwidth in [1]
@param url_height: see iiurlheigth in [1]
@param url_param: see iiurlparam in [1]
@return: latest file url or thumburl
@rtype: unicode
"""
# Plain url is requested.
if url_width is None and url_height is None and url_param is None:
return self.latest_file_info.url
# Thumburl is requested.
self.site.loadimageinfo(self, history=not self._file_revisions,
url_width=url_width, url_height=url_height,
url_param=url_param)
return self.latest_file_info.thumburl
@deprecated('fileIsShared', since='20121101')
def fileIsOnCommons(self):
"""
DEPRECATED. Check if the image is stored on Wikimedia Commons.
@rtype: bool
"""
return self.fileIsShared()
def fileIsShared(self):
"""
Check if the file is stored on any known shared repository.
@rtype: bool
"""
# as of now, the only known repositories are commons and wikitravel
# TODO: put the URLs to family file
if not self.site.has_image_repository:
return False
elif 'wikitravel_shared' in self.site.shared_image_repository():
return self.latest_file_info.url.startswith(
'https://wikitravel.org/upload/shared/')
else:
return self.latest_file_info.url.startswith(
'https://upload.wikimedia.org/wikipedia/commons/')
@deprecated('FilePage.latest_file_info.sha1', since='20141106')
def getFileMd5Sum(self):
"""Return image file's MD5 checksum."""
# TODO: check whether this needs a User-Agent header added
req = http.fetch(self.fileUrl())
h = hashlib.md5()
h.update(req.raw)
return h.hexdigest()
@deprecated('FilePage.latest_file_info.sha1', since='20141106')
def getFileSHA1Sum(self):
"""Return the file's SHA1 checksum."""
return self.latest_file_info.sha1
@deprecated('FilePage.oldest_file_info.user', since='20150206')
def getFirstUploader(self):
"""
Return a list with first uploader of the FilePage and timestamp.
For compatibility with compat only.
"""
return [self.oldest_file_info.user,
unicode(self.oldest_file_info.timestamp.isoformat())]
@deprecated('FilePage.latest_file_info.user', since='20141106')
def getLatestUploader(self):
"""
Return a list with latest uploader of the FilePage and timestamp.
For compatibility with compat only.
"""
return [self.latest_file_info.user,
unicode(self.latest_file_info.timestamp.isoformat())]
@deprecated('FilePage.get_file_history()', since='20141106')
def getFileVersionHistory(self):
"""
Return the file's version history.
@return: A list of dictionaries with the following keys:
[comment, sha1, url, timestamp, metadata,
height, width, mime, user, descriptionurl, size]
@rtype: list
"""
return self.site.loadimageinfo(self, history=True)
def getFileVersionHistoryTable(self):
"""Return the version history in the form of a wiki table."""
lines = []
for info in self.getFileVersionHistory():
dimension = '{width}×{height} px ({size} bytes)'.format(**info)
lines.append('| {timestamp} || {user} || {dimension} |'
'| <nowiki>{comment}</nowiki>'
''.format(dimension=dimension, **info))
return ('{| class="wikitable"\n'
'! {{int:filehist-datetime}} || {{int:filehist-user}} |'
'| {{int:filehist-dimensions}} || {{int:filehist-comment}}\n'
'|-\n%s\n|}\n' % '\n|-\n'.join(lines))
@deprecated_args(step=None)
def usingPages(self, total=None, content=False):
"""
Yield Pages on which the file is displayed.
@param total: iterate no more than this number of pages in total
@param content: if True, load the current content of each iterated page
(default False)
"""
return self.site.imageusage(self, total=total, content=content)
def upload(self, source, **kwargs):
"""
Upload this file to the wiki.
keyword arguments are from site.upload() method.
@param source: Path or URL to the file to be uploaded.
@type source: str
@keyword comment: Edit summary; if this is not provided, then
filepage.text will be used. An empty summary is not permitted.
This may also serve as the initial page text (see below).
@keyword text: Initial page text; if this is not set, then
filepage.text will be used, or comment.
@keyword watch: If true, add filepage to the bot user's watchlist
@keyword ignore_warnings: It may be a static boolean, a callable
returning a boolean or an iterable. The callable gets a list of
UploadWarning instances and the iterable should contain the warning
codes for which an equivalent callable would return True if all
UploadWarning codes are in thet list. If the result is False it'll
not continue uploading the file and otherwise disable any warning
and reattempt to upload the file. NOTE: If report_success is True
or None it'll raise an UploadWarning exception if the static
boolean is False.
@type ignore_warnings: bool or callable or iterable of str
@keyword chunk_size: The chunk size in bytesfor chunked uploading (see
U{https://www.mediawiki.org/wiki/API:Upload#Chunked_uploading}). It
will only upload in chunks, if the version number is 1.20 or higher
and the chunk size is positive but lower than the file size.
@type chunk_size: int
@keyword _file_key: Reuses an already uploaded file using the filekey.
If None (default) it will upload the file.
@type _file_key: str or None
@keyword _offset: When file_key is not None this can be an integer to
continue a previously canceled chunked upload. If False it treats
that as a finished upload. If True it requests the stash info from
the server to determine the offset. By default starts at 0.
@type _offset: int or bool
@keyword _verify_stash: Requests the SHA1 and file size uploaded and
compares it to the local file. Also verifies that _offset is
matching the file size if the _offset is an int. If _offset is
False if verifies that the file size match with the local file. If
None it'll verifies the stash when a file key and offset is given.
@type _verify_stash: bool or None
@keyword report_success: If the upload was successful it'll print a
success message and if ignore_warnings is set to False it'll
raise an UploadWarning if a warning occurred. If it's
None (default) it'll be True if ignore_warnings is a bool and False
otherwise. If it's True or None ignore_warnings must be a bool.
@return: It returns True if the upload was successful and False
otherwise.
@rtype: bool
"""
filename = url = None
if '://' in source:
url = source
else:
filename = source
return self.site.upload(self, source_filename=filename, source_url=url,
**kwargs)
def download(self, filename=None, chunk_size=100 * 1024, revision=None):
"""
Download to filename file of FilePage.
@param filename: filename where to save file:
None: self.title(as_filename=True, with_ns=False)
will be used
str: provided filename will be used.
@type filename: None or str
@param chunk_size: the size of each chunk to be received and
written to file.
@type chunk_size: int
@param revision: file revision to download:
None: self.latest_file_info will be used
FileInfo: provided revision will be used.
@type revision: None or FileInfo
@return: True if download is successful, False otherwise.
@raise: IOError if filename cannot be written for any reason.
"""
if filename is None:
filename = self.title(as_filename=True, with_ns=False)
filename = os.path.expanduser(filename)
if revision is None:
revision = self.latest_file_info
req = http.fetch(revision.url, stream=True)
if req.status == 200:
try:
with open(filename, 'wb') as f:
for chunk in req.data.iter_content(chunk_size):
f.write(chunk)
except IOError as e:
raise e
sha1 = compute_file_hash(filename)
return sha1 == revision.sha1
else:
pywikibot.warning(
'Unsuccesfull request (%s): %s' % (req.status, req.uri))
return False
def globalusage(self, total=None):
"""
Iterate all global usage for this page.
@param total: iterate no more than this number of pages in total
@return: a generator that yields Pages also on sites different from
self.site.
@rtype: generator
"""
return self.site.globalusage(self, total=total)
wrapper = _ModuleDeprecationWrapper(__name__)
wrapper._add_deprecated_attr('ImagePage', FilePage, since='20140924')
class Category(Page):
"""A page in the Category: namespace."""
@deprecated_args(insite=None, sortKey='sort_key')
def __init__(self, source, title='', sort_key=None):
"""
Initializer.
All parameters are the same as for Page() Initializer.
"""
self.sortKey = sort_key
Page.__init__(self, source, title, ns=14)
if self.namespace() != 14:
raise ValueError("'%s' is not in the category namespace!"
% title)
@deprecated_args(
forceInterwiki=None, textlink=None, noInterwiki=None,
sortKey='sort_key')
def aslink(self, sort_key=None):
"""
Return a link to place a page in this Category.
Use this only to generate a "true" category link, not for interwikis
or text links to category pages.
@param sort_key: The sort key for the article to be placed in this
Category; if omitted, default sort key is used.
@type sort_key: (optional) unicode
"""
key = sort_key or self.sortKey
if key is not None:
title_with_sort_key = self.title(with_section=False) + '|' + key
else:
title_with_sort_key = self.title(with_section=False)
return '[[%s]]' % title_with_sort_key
@deprecated_args(startFrom=None, cacheResults=None, step=None)
def subcategories(self, recurse=False, total=None, content=False):
"""
Iterate all subcategories of the current category.
@param recurse: if not False or 0, also iterate subcategories of
subcategories. If an int, limit recursion to this number of
levels. (Example: recurse=1 will iterate direct subcats and
first-level sub-sub-cats, but no deeper.)
@type recurse: int or bool
@param total: iterate no more than this number of
subcategories in total (at all levels)
@param content: if True, retrieve the content of the current version
of each category description page (default False)
"""
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
if not hasattr(self, '_subcats'):
self._subcats = []
for member in self.site.categorymembers(
self, member_type='subcat', total=total, content=content):
subcat = Category(member)
self._subcats.append(subcat)
yield subcat
if total is not None:
total -= 1
if total == 0:
return
if recurse:
for item in subcat.subcategories(
recurse, total=total, content=content):
yield item
if total is not None:
total -= 1
if total == 0:
return
else:
for subcat in self._subcats:
yield subcat
if total is not None:
total -= 1
if total == 0:
return
if recurse:
for item in subcat.subcategories(
recurse, total=total, content=content):
yield item
if total is not None:
total -= 1
if total == 0:
return
@deprecated_args(startFrom='startprefix', step=None)
def articles(self, recurse=False, total=None,
content=False, namespaces=None, sortby=None,
reverse=False, starttime=None, endtime=None,
startsort=None, endsort=None,
startprefix=None, endprefix=None,
):
"""
Yield all articles in the current category.
By default, yields all *pages* in the category that are not
subcategories!
@param recurse: if not False or 0, also iterate articles in
subcategories. If an int, limit recursion to this number of
levels. (Example: recurse=1 will iterate articles in first-level
subcats, but no deeper.)
@type recurse: int or bool
@param total: iterate no more than this number of pages in
total (at all levels)
@param namespaces: only yield pages in the specified namespaces
@type namespaces: int or list of ints
@param content: if True, retrieve the content of the current version
of each page (default False)
@param sortby: determines the order in which results are generated,
valid values are "sortkey" (default, results ordered by category
sort key) or "timestamp" (results ordered by time page was
added to the category). This applies recursively.
@type sortby: str
@param reverse: if True, generate results in reverse order
(default False)
@param starttime: if provided, only generate pages added after this
time; not valid unless sortby="timestamp"
@type starttime: pywikibot.Timestamp
@param endtime: if provided, only generate pages added before this
time; not valid unless sortby="timestamp"
@type endtime: pywikibot.Timestamp
@param startsort: if provided, only generate pages that have a
sortkey >= startsort; not valid if sortby="timestamp"
(Deprecated in MW 1.24)
@type startsort: str
@param endsort: if provided, only generate pages that have a
sortkey <= endsort; not valid if sortby="timestamp"
(Deprecated in MW 1.24)
@type endsort: str
@param startprefix: if provided, only generate pages >= this title
lexically; not valid if sortby="timestamp"; overrides "startsort"
(requires MW 1.18+)
@type startprefix: str
@param endprefix: if provided, only generate pages < this title
lexically; not valid if sortby="timestamp"; overrides "endsort"
(requires MW 1.18+)
@type endprefix: str
"""
seen = set()
for member in self.site.categorymembers(self,
namespaces=namespaces,
total=total,
content=content, sortby=sortby,
reverse=reverse,
starttime=starttime,
endtime=endtime,
startsort=startsort,
endsort=endsort,
startprefix=startprefix,
endprefix=endprefix,
member_type=['page', 'file']
):
if recurse:
seen.add(hash(member))
yield member
if total is not None:
total -= 1
if total == 0:
return
if recurse:
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
for subcat in self.subcategories():
for article in subcat.articles(recurse, total=total,
content=content,
namespaces=namespaces,
sortby=sortby,
reverse=reverse,
starttime=starttime,
endtime=endtime,
startsort=startsort,
endsort=endsort,
startprefix=startprefix,
endprefix=endprefix,
):
hash_value = hash(article)
if hash_value in seen:
continue
seen.add(hash_value)
yield article
if total is not None:
total -= 1
if total == 0:
return
@deprecated_args(step=None)
def members(self, recurse=False, namespaces=None, total=None,
content=False):
"""Yield all category contents (subcats, pages, and files)."""
for member in self.site.categorymembers(
self, namespaces, total=total, content=content):
yield member
if total is not None:
total -= 1
if total == 0:
return
if recurse:
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1
for subcat in self.subcategories():
for article in subcat.members(
recurse, namespaces, total=total, content=content):
yield article
if total is not None:
total -= 1
if total == 0:
return
@need_version('1.13')
def isEmptyCategory(self):
"""
Return True if category has no members (including subcategories).
@rtype: bool
"""
ci = self.categoryinfo
return sum(ci[k] for k in ['files', 'pages', 'subcats']) == 0
@need_version('1.11')
def isHiddenCategory(self):
"""
Return True if the category is hidden.
@rtype: bool
"""
return 'hiddencat' in self.properties()
def copyTo(self, cat, message):
"""
Copy text of category page to a new page. Does not move contents.
@param cat: New category title (without namespace) or Category object
@type cat: unicode or Category
@param message: message to use for category creation message
If two %s are provided in message, will be replaced
by (self.title, authorsList)
@type message: unicode
@return: True if copying was successful, False if target page
already existed.
@rtype: bool
"""
# This seems far too specialized to be in the top-level framework
# move to category.py? (Although it doesn't seem to be used there,
# either)
if not isinstance(cat, Category):
target_cat = Category(self.site, 'Category:' + cat)
else:
target_cat = cat
if target_cat.exists():
pywikibot.warning(
'Target page %s already exists!' % target_cat.title())
return False
else:
pywikibot.output('Moving text from %s to %s.'
% (self.title(), target_cat.title()))
authors = ', '.join(self.contributingUsers())
try:
creation_summary = message % (self.title(), authors)
except TypeError:
creation_summary = message
target_cat.put(self.get(), creation_summary)
return True
@deprecated_args(cfdTemplates='cfd_templates')
def copyAndKeep(self, catname, cfd_templates, message):
"""
Copy partial category page text (not contents) to a new title.
Like copyTo above, except this removes a list of templates (like
deletion templates) that appear in the old category text. It also
removes all text between the two HTML comments BEGIN CFD TEMPLATE
and END CFD TEMPLATE. (This is to deal with CFD templates that are
substituted.)
Returns true if copying was successful, false if target page already
existed.
@param catname: New category title (without namespace)
@param cfd_templates: A list (or iterator) of templates to be removed
from the page text
@return: True if copying was successful, False if target page
already existed.
@rtype: bool
"""
# I don't see why we need this as part of the framework either
# move to scripts/category.py?
target_cat = Category(self.site, 'Category:' + catname)
if target_cat.exists():
pywikibot.warning('Target page %s already exists!'
% target_cat.title())
return False
else:
pywikibot.output(
'Moving text from %s to %s.'
% (self.title(), target_cat.title()))
authors = ', '.join(self.contributingUsers())
creation_summary = message % (self.title(), authors)
newtext = self.get()
for regexName in cfd_templates:
matchcfd = re.compile(r'{{%s.*?}}' % regexName, re.IGNORECASE)
newtext = matchcfd.sub('', newtext)
matchcomment = re.compile(
r'<!--BEGIN CFD TEMPLATE-->.*?<!--END CFD TEMPLATE-->',
re.IGNORECASE | re.MULTILINE | re.DOTALL)
newtext = matchcomment.sub('', newtext)
pos = 0
while (newtext[pos:pos + 1] == '\n'):
pos = pos + 1
newtext = newtext[pos:]
target_cat.put(newtext, creation_summary)
return True
@property
def categoryinfo(self):
"""
Return a dict containing information about the category.
The dict contains values for:
Numbers of pages, subcategories, files, and total contents.
@rtype: dict
"""
return self.site.categoryinfo(self)
def newest_pages(self, total=None):
"""
Return pages in a category ordered by the creation date.
If two or more pages are created at the same time, the pages are
returned in the order they were added to the category. The most
recently added page is returned first.
It only allows to return the pages ordered from newest to oldest, as it
is impossible to determine the oldest page in a category without
checking all pages. But it is possible to check the category in order
with the newly added first and it yields all pages which were created
after the currently checked page was added (and thus there is no page
created after any of the cached but added before the currently
checked).
@param total: The total number of pages queried.
@type total: int
@return: A page generator of all pages in a category ordered by the
creation date. From newest to oldest. Note: It currently only
returns Page instances and not a subclass of it if possible. This
might change so don't expect to only get Page instances.
@rtype: generator
"""
def check_cache(latest):
"""Return the cached pages in order and not more than total."""
cached = []
for timestamp in sorted((ts for ts in cache if ts > latest),
reverse=True):
# The complete list can be removed, it'll either yield all of
# them, or only a portion but will skip the rest anyway
cached += cache.pop(timestamp)[:None if total is None else
total - len(cached)]
if total and len(cached) >= total:
break # already got enough
assert total is None or len(cached) <= total, \
'Number of caches is more than total number requested'
return cached
# all pages which have been checked but where created before the
# current page was added, at some point they will be created after
# the current page was added. It saves all pages via the creation
# timestamp. Be prepared for multiple pages.
cache = defaultdict(list)
# TODO: Make site.categorymembers is usable as it returns pages
# There is no total defined, as it's not known how many pages need to
# be checked before the total amount of new pages was found. In worst
# case all pages of a category need to be checked.
for member in pywikibot.data.api.QueryGenerator(
site=self.site, parameters={
'list': 'categorymembers', 'cmsort': 'timestamp',
'cmdir': 'older', 'cmprop': 'timestamp|title',
'cmtitle': self.title()}):
# TODO: Upcast to suitable class
page = pywikibot.Page(self.site, member['title'])
assert page.namespace() == member['ns'], \
'Namespace of the page is not consistent'
cached = check_cache(pywikibot.Timestamp.fromISOformat(
member['timestamp']))
for cached_page in cached:
yield cached_page
if total is not None:
total -= len(cached)
if total <= 0:
break
cache[page.oldest_revision.timestamp] += [page]
else:
# clear cache
assert total is None or total > 0, \
'As many items as given in total already returned'
for cached_page in check_cache(pywikibot.Timestamp.min):
yield cached_page
# ### DEPRECATED METHODS ####
@deprecated('list(Category.subcategories(...))', since='20090307')
def subcategoriesList(self, recurse=False):
"""DEPRECATED: Equivalent to list(self.subcategories(...))."""
return sorted(list(set(self.subcategories(recurse))))
@deprecated('list(Category.articles(...))', since='20090307')
def articlesList(self, recurse=False):
"""DEPRECATED: equivalent to list(self.articles(...))."""
return sorted(list(set(self.articles(recurse))))
@deprecated('Category.categories()', since='20090307')
def supercategories(self):
"""DEPRECATED: equivalent to self.categories()."""
return self.categories()
@deprecated('list(Category.categories(...))', since='20090307')
def supercategoriesList(self):
"""DEPRECATED: equivalent to list(self.categories(...))."""
return sorted(list(set(self.categories())))
class User(Page):
"""
A class that represents a Wiki user.
This class also represents the Wiki page User:<username>
"""
@deprecated_args(site='source', name='title')
def __init__(self, source, title=''):
"""
Initializer for a User object.
All parameters are the same as for Page() Initializer.
"""
self._isAutoblock = True
if title.startswith('#'):
title = title[1:]
elif ':#' in title:
title = title.replace(':#', ':')
else:
self._isAutoblock = False
Page.__init__(self, source, title, ns=2)
if self.namespace() != 2:
raise ValueError("'%s' is not in the user namespace!"
% title)
if self._isAutoblock:
# This user is probably being queried for purpose of lifting
# an autoblock.
pywikibot.output(
'This is an autoblock ID, you can only use to unblock it.')
@deprecated('User.username', since='20160504')
def name(self):
"""
The username.
DEPRECATED: use username instead.
@rtype: unicode
"""
return self.username
@property
def username(self):
"""
The username.
Convenience method that returns the title of the page with
namespace prefix omitted, which is the username.
@rtype: unicode
"""
if self._isAutoblock:
return '#' + self.title(with_ns=False)
else:
return self.title(with_ns=False)
def isRegistered(self, force=False):
"""
Determine if the user is registered on the site.
It is possible to have a page named User:xyz and not have
a corresponding user with username xyz.
The page does not need to exist for this method to return
True.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: bool
"""
# T135828: the registration timestamp may be None but the key exists
return (not self.isAnonymous()
and 'registration' in self.getprops(force))
def isAnonymous(self):
"""
Determine if the user is editing as an IP address.
@rtype: bool
"""
return is_IP(self.username)
def getprops(self, force=False):
"""
Return a properties about the user.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: dict
"""
if force and hasattr(self, '_userprops'):
del self._userprops
if not hasattr(self, '_userprops'):
self._userprops = list(self.site.users([self.username, ]))[0]
if self.isAnonymous():
r = list(self.site.blocks(users=self.username))
if r:
self._userprops['blockedby'] = r[0]['by']
self._userprops['blockreason'] = r[0]['reason']
return self._userprops
@deprecated('User.registration()', since='20100609')
def registrationTime(self, force=False):
"""
DEPRECATED. Fetch registration date for this user.
@param force: if True, forces reloading the data from API
@type force: bool
@return: long (MediaWiki's internal timestamp format) or 0
@rtype: int or long
"""
if self.registration():
return long(self.registration().strftime('%Y%m%d%H%M%S'))
else:
return 0
def registration(self, force=False):
"""
Fetch registration date for this user.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: pywikibot.Timestamp or None
"""
if self.isAnonymous():
return None
reg = self.getprops(force).get('registration')
if reg:
return pywikibot.Timestamp.fromISOformat(reg)
def editCount(self, force=False):
"""
Return edit count for a registered user.
Always returns 0 for 'anonymous' users.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: int or long
"""
return self.getprops(force).get('editcount', 0)
def isBlocked(self, force=False):
"""
Determine whether the user is currently blocked.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: bool
"""
return 'blockedby' in self.getprops(force)
def isEmailable(self, force=False):
"""
Determine whether emails may be send to this user through MediaWiki.
@param force: if True, forces reloading the data from API
@type force: bool
@rtype: bool
"""
return (not self.isAnonymous() and 'emailable' in self.getprops(force))
def groups(self, force=False):
"""
Return a list of groups to which this user belongs.
The list of groups may be empty.
@param force: if True, forces reloading the data from API
@type force: bool
@return: groups property
@rtype: list
"""
return self.getprops(force).get('groups', [])
def gender(self, force=False):
"""Return the gender of the user.
@param force: if True, forces reloading the data from API
@type force: bool
@return: return 'male', 'female', or 'unknown'
@rtype: str
"""
if self.isAnonymous():
return 'unknown'
return self.getprops(force).get('gender', 'unknown')
def rights(self, force=False):
"""Return user rights.
@param force: if True, forces reloading the data from API
@type force: bool
@return: return user rights
@rtype: list
"""
return self.getprops(force).get('rights', [])
def getUserPage(self, subpage=''):