/
page.py
6603 lines (5604 loc) · 239 KB
/
page.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Objects representing various types of MediaWiki, including Wikibase, pages.
This module also includes objects:
* Property: a type of semantic data.
* Claim: an instance of a semantic assertion.
* Revision: a single change to a wiki page.
* FileInfo: a structure holding imageinfo of latest rev. of FilePage
* Link: an internal or interwiki link in wikitext.
"""
#
# (C) Pywikibot team, 2008-2019
#
# Distributed under the terms of the MIT license.
#
from __future__ import absolute_import, division, unicode_literals
import hashlib
import logging
import os.path
import re
import sys
import unicodedata
from collections import Counter, defaultdict, namedtuple, OrderedDict
from itertools import chain
from warnings import warn
import pywikibot
from pywikibot import config, i18n, textlib
from pywikibot.comms import http
from pywikibot.data.api import APIError
from pywikibot.exceptions import (
AutoblockUser,
NotEmailableError,
SiteDefinitionError,
UserRightsError,
)
from pywikibot.family import Family
from pywikibot.site import DataSite, Namespace, need_version
from pywikibot.tools import (
compute_file_hash,
UnicodeMixin, ComparableMixin, DotReadableDict,
deprecated, deprecate_arg, deprecated_args, issue_deprecation_warning,
add_full_name, manage_wrapping,
ModuleDeprecationWrapper as _ModuleDeprecationWrapper, PY2,
first_upper, redirect_func, remove_last_args, UnicodeType,
StringTypes
)
from pywikibot.tools.ip import is_IP, ip_regexp
if not PY2:
long = int
from html import entities as htmlentitydefs
from urllib.parse import quote_from_bytes, unquote_to_bytes
else:
if __debug__ and not PY2:
unichr = NotImplemented # pyflakes workaround
chr = unichr
import htmlentitydefs
from urllib import quote as quote_from_bytes, unquote as unquote_to_bytes
PROTOCOL_REGEX = r'\Ahttps?://'
__all__ = (
'BasePage',
'Page',
'FilePage',
'Category',
'User',
'WikibasePage',
'ItemPage',
'Property',
'PropertyPage',
'Claim',
'Revision',
'FileInfo',
'BaseLink',
'Link',
'SiteLink',
'SiteLinkCollection',
'html2unicode',
'UnicodeToAsciiHtml',
'unicode2html',
'url2unicode',
'ip_regexp', # unused & deprecated
)
logger = logging.getLogger('pywiki.wiki.page')
@add_full_name
def allow_asynchronous(func):
"""
Decorator to make it possible to run a BasePage method asynchronously.
This is done when the method is called with kwarg asynchronous=True.
Optionally, you can also provide kwarg callback, which, if provided, is
a callable that gets the page as the first and a possible exception that
occurred during saving in the second thread or None as the second argument.
"""
def handle(func, self, *args, **kwargs):
do_async = kwargs.pop('asynchronous', False)
callback = kwargs.pop('callback', None)
err = None
try:
func(self, *args, **kwargs)
# TODO: other "expected" error types to catch?
except pywikibot.Error as edit_err:
err = edit_err # edit_err will be deleted in the end of the scope
link = self.title(as_link=True)
if do_async:
pywikibot.error('page {} not saved due to {}\n'
.format(link, err))
pywikibot.log('Error saving page %s (%s)\n' % (link, err),
exc_info=True)
if not callback and not do_async:
if isinstance(err, pywikibot.PageSaveRelatedError):
raise err
raise pywikibot.OtherPageSaveError(self, err)
if callback:
callback(self, err)
def wrapper(self, *args, **kwargs):
if kwargs.get('asynchronous'):
pywikibot.async_request(handle, func, self, *args, **kwargs)
else:
handle(func, self, *args, **kwargs)
manage_wrapping(wrapper, func)
return wrapper
# Note: Link objects (defined later on) represent a wiki-page's title, while
# Page objects (defined here) represent the page itself, including its
# contents.
class BasePage(UnicodeMixin, ComparableMixin):
"""
BasePage: Base object for a MediaWiki page.
This object only implements internally methods that do not require
reading from or writing to the wiki. All other methods are delegated
to the Site object.
Will be subclassed by Page, WikibasePage, and FlowPage.
"""
_cache_attrs = (
'_text', '_pageid', '_catinfo', '_templates', '_protection',
'_contentmodel', '_langlinks', '_isredir', '_coords',
'_preloadedtext', '_timestamp', '_applicable_protections',
'_flowinfo', '_quality', '_pageprops', '_revid', '_quality_text',
'_pageimage', '_item', '_lintinfo',
)
def __init__(self, source, title='', ns=0):
"""
Instantiate a Page object.
Three calling formats are supported:
- If the first argument is a Page, create a copy of that object.
This can be used to convert an existing Page into a subclass
object, such as Category or FilePage. (If the title is also
given as the second argument, creates a copy with that title;
this is used when pages are moved.)
- If the first argument is a Site, create a Page on that Site
using the second argument as the title (may include a section),
and the third as the namespace number. The namespace number is
mandatory, even if the title includes the namespace prefix. This
is the preferred syntax when using an already-normalized title
obtained from api.php or a database dump. WARNING: may produce
invalid objects if page title isn't in normal form!
- If the first argument is a BaseLink, create a Page from that link.
This is the preferred syntax when using a title scraped from
wikitext, URLs, or another non-normalized source.
@param source: the source of the page
@type source: BaseLink (or subclass), Page (or subclass), or Site
@param title: normalized title of the page; required if source is a
Site, ignored otherwise
@type title: str
@param ns: namespace number; required if source is a Site, ignored
otherwise
@type ns: int
"""
if title is None:
raise ValueError('Title cannot be None.')
if isinstance(source, pywikibot.site.BaseSite):
self._link = Link(title, source=source, default_namespace=ns)
self._revisions = {}
elif isinstance(source, Page):
# copy all of source's attributes to this object
# without overwriting non-None values
self.__dict__.update((k, v) for k, v in source.__dict__.items()
if k not in self.__dict__
or self.__dict__[k] is None)
if title:
# overwrite title
self._link = Link(title, source=source.site,
default_namespace=ns)
elif isinstance(source, BaseLink):
self._link = source
self._revisions = {}
else:
raise pywikibot.Error(
"Invalid argument type '{}' in Page initializer: {}"
.format(type(source), source))
@property
def site(self):
"""Return the Site object for the wiki on which this Page resides.
@rtype: pywikibot.Site
"""
return self._link.site
def version(self):
"""
Return MediaWiki version number of the page site.
This is needed to use @need_version() decorator for methods of
Page objects.
"""
return self.site.version()
@property
def image_repository(self):
"""Return the Site object for the image repository."""
return self.site.image_repository()
@property
def data_repository(self):
"""Return the Site object for the data repository."""
return self.site.data_repository()
def namespace(self):
"""
Return the namespace of the page.
@return: namespace of the page
@rtype: pywikibot.Namespace
"""
return self._link.namespace
@property
def content_model(self):
"""
Return the content model for this page.
If it cannot be reliably determined via the API,
None is returned.
"""
# TODO: T102735: Add a sane default of 'wikitext' and others for <1.21
if not hasattr(self, '_contentmodel'):
self.site.loadpageinfo(self)
return self._contentmodel
@property
def depth(self):
"""Return the depth/subpage level of the page."""
if not hasattr(self, '_depth'):
# Check if the namespace allows subpages
if self.namespace().subpages:
self._depth = self.title().count('/')
else:
# Does not allow subpages, which means depth is always 0
self._depth = 0
return self._depth
@property
def pageid(self):
"""
Return pageid of the page.
@return: pageid or 0 if page does not exist
@rtype: int
"""
if not hasattr(self, '_pageid'):
self.site.loadpageinfo(self)
return self._pageid
@deprecated_args(
decode=None, savetitle='as_url', withNamespace='with_ns',
withSection='with_section', forceInterwiki='force_interwiki',
asUrl='as_url', asLink='as_link', allowInterwiki='allow_interwiki')
def title(self, underscore=False, with_ns=True,
with_section=True, as_url=False, as_link=False,
allow_interwiki=True, force_interwiki=False, textlink=False,
as_filename=False, insite=None, without_brackets=False):
"""
Return the title of this Page, as a Unicode string.
@param underscore: (not used with as_link) if true, replace all ' '
characters with '_'
@param with_ns: if false, omit the namespace prefix. If this
option is false and used together with as_link return a labeled
link like [[link|label]]
@param with_section: if false, omit the section
@param as_url: (not used with as_link) if true, quote title as if in an
URL
@param as_link: if true, return the title in the form of a wikilink
@param allow_interwiki: (only used if as_link is true) if true, format
the link as an interwiki link if necessary
@param force_interwiki: (only used if as_link is true) if true, always
format the link as an interwiki link
@param textlink: (only used if as_link is true) if true, place a ':'
before Category: and Image: links
@param as_filename: (not used with as_link) if true, replace any
characters that are unsafe in filenames
@param insite: (only used if as_link is true) a site object where the
title is to be shown. default is the current family/lang given by
-family and -lang option i.e. config.family and config.mylang
@param without_brackets: (cannot be used with as_link) if true, remove
the last pair of brackets(usually removes disambiguation brackets).
@rtype: str
"""
title = self._link.canonical_title()
label = self._link.title
if with_section and self.section():
section = '#' + self.section()
else:
section = ''
if as_link:
if insite:
target_code = insite.code
target_family = insite.family.name
else:
target_code = config.mylang
target_family = config.family
if force_interwiki or \
(allow_interwiki
and (self.site.family.name != target_family
or self.site.code != target_code)):
if self.site.family.name != target_family \
and self.site.family.name != self.site.code:
title = '%s:%s:%s' % (
self.site.family.name, self.site.code, title)
else:
# use this form for sites like commons, where the
# code is the same as the family name
title = '%s:%s' % (self.site.code, title)
elif textlink and (self.is_filepage() or self.is_categorypage()):
title = ':%s' % title
elif self.namespace() == 0 and not section:
with_ns = True
if with_ns:
return '[[%s%s]]' % (title, section)
else:
return '[[%s%s|%s]]' % (title, section, label)
if not with_ns and self.namespace() != 0:
title = label + section
else:
title += section
if without_brackets:
brackets_re = r'\s+\([^()]+?\)$'
title = re.sub(brackets_re, '', title)
if underscore or as_url:
title = title.replace(' ', '_')
if as_url:
encoded_title = title.encode(self.site.encoding())
title = quote_from_bytes(encoded_title, safe='')
if as_filename:
# Replace characters that are not possible in file names on some
# systems.
# Spaces are possible on most systems, but are bad for URLs.
for forbidden in ':*?/\\ ':
title = title.replace(forbidden, '_')
return title
@remove_last_args(('decode', 'underscore'))
def section(self):
"""
Return the name of the section this Page refers to.
The section is the part of the title following a '#' character, if
any. If no section is present, return None.
@rtype: str or None
"""
try:
section = self._link.section
except AttributeError:
section = None
return section
def __unicode__(self):
"""Return a unicode string representation."""
return self.title(as_link=True, force_interwiki=True)
def __repr__(self):
"""Return a more complete string representation."""
if not PY2:
title = repr(self.title())
else:
try:
title = self.title().encode(config.console_encoding)
except UnicodeEncodeError:
# okay console encoding didn't work, at least try something
title = self.title().encode('unicode_escape')
return str('{0}({1})').format(self.__class__.__name__, title)
def _cmpkey(self):
"""
Key for comparison of Page objects.
Page objects are "equal" if and only if they are on the same site
and have the same normalized title, including section if any.
Page objects are sortable by site, namespace then title.
"""
return (self.site, self.namespace(), self.title())
def __hash__(self):
"""
A stable identifier to be used as a key in hash-tables.
This relies on the fact that the string
representation of an instance can not change after the construction.
"""
return hash(self._cmpkey())
def full_url(self):
"""Return the full URL."""
return self.site.base_url(self.site.article_path
+ self.title(as_url=True))
def autoFormat(self):
"""
Return L{date.getAutoFormat} dictName and value, if any.
Value can be a year, date, etc., and dictName is 'YearBC',
'Year_December', or another dictionary name. Please note that two
entries may have exactly the same autoFormat, but be in two
different namespaces, as some sites have categories with the
same names. Regular titles return (None, None).
"""
if not hasattr(self, '_autoFormat'):
from pywikibot import date
self._autoFormat = date.getAutoFormat(
self.site.lang,
self.title(with_ns=False)
)
return self._autoFormat
def isAutoTitle(self):
"""Return True if title of this Page is in the autoFormat dict."""
return self.autoFormat()[0] is not None
@deprecated_args(throttle=None,
change_edit_time=None,
expandtemplates=None)
def get(self, force=False, get_redirect=False, sysop=False):
"""
Return the wiki-text of the page.
This will retrieve the page from the server if it has not been
retrieved yet, or if force is True. This can raise the following
exceptions that should be caught by the calling code:
@exception NoPage: The page does not exist
@exception IsRedirectPage: The page is a redirect. The argument of the
exception is the title of the page it
redirects to.
@exception SectionError: The section does not exist on a page with
a # link
@param force: reload all page attributes, including errors.
@param get_redirect: return the redirect text, do not follow the
redirect, do not raise an exception.
@param sysop: if the user has a sysop account, use it to
retrieve this page
@rtype: str
"""
if force:
del self.latest_revision_id
try:
self._getInternals(sysop)
except pywikibot.IsRedirectPage:
if not get_redirect:
raise
return self.latest_revision.text
def _latest_cached_revision(self):
"""Get the latest revision if cached and has text, otherwise None."""
if (hasattr(self, '_revid') and self._revid in self._revisions
and self._revisions[self._revid].text is not None):
return self._revisions[self._revid]
else:
return None
def _getInternals(self, sysop):
"""
Helper function for get().
Stores latest revision in self if it doesn't contain it, doesn't think.
* Raises exceptions from previous runs.
* Stores new exceptions in _getexception and raises them.
"""
# Raise exceptions from previous runs
if hasattr(self, '_getexception'):
raise self._getexception
# If not already stored, fetch revision
if self._latest_cached_revision() is None:
try:
self.site.loadrevisions(self, content=True, sysop=sysop)
except (pywikibot.NoPage, pywikibot.SectionError) as e:
self._getexception = e
raise
# self._isredir is set by loadrevisions
if self._isredir:
self._getexception = pywikibot.IsRedirectPage(self)
raise self._getexception
@deprecated_args(throttle=None, change_edit_time=None)
def getOldVersion(self, oldid, force=False, get_redirect=False,
sysop=False):
"""
Return text of an old revision of this page; same options as get().
@param oldid: The revid of the revision desired.
@rtype: str
"""
if force or oldid not in self._revisions \
or self._revisions[oldid].text is None:
self.site.loadrevisions(self,
content=True,
revids=oldid,
sysop=sysop)
# TODO: what about redirects, errors?
return self._revisions[oldid].text
def permalink(self, oldid=None, percent_encoded=True, with_protocol=False):
"""Return the permalink URL of an old revision of this page.
@param oldid: The revid of the revision desired.
@param percent_encoded: if false, the link will be provided
without title uncoded.
@param with_protocol: if true, http or https prefixes will be
included before the double slash.
@rtype: str
"""
if percent_encoded:
title = self.title(as_url=True)
else:
title = self.title(as_url=False).replace(' ', '_')
return '{0}//{1}{2}/index.php?title={3}&oldid={4}'.format(
self.site.protocol() + ':' if with_protocol else '',
self.site.hostname(),
self.site.scriptpath(),
title,
oldid if oldid is not None else self.latest_revision_id)
@property
def latest_revision_id(self):
"""Return the current revision id for this page."""
if not hasattr(self, '_revid'):
self.revisions(self)
return self._revid
@latest_revision_id.deleter
def latest_revision_id(self):
"""
Remove the latest revision id set for this Page.
All internal cached values specifically for the latest revision
of this page are cleared.
The following cached values are not cleared:
- text property
- page properties, and page coordinates
- lastNonBotUser
- isDisambig and isCategoryRedirect status
- langlinks, templates and deleted revisions
"""
# When forcing, we retry the page no matter what:
# * Old exceptions do not apply any more
# * Deleting _revid to force reload
# * Deleting _redirtarget, that info is now obsolete.
for attr in ['_redirtarget', '_getexception', '_revid']:
if hasattr(self, attr):
delattr(self, attr)
@latest_revision_id.setter
def latest_revision_id(self, value):
"""Set the latest revision for this Page."""
del self.latest_revision_id
self._revid = value
@deprecated('latest_revision_id', since='20150727')
def latestRevision(self):
"""Return the current revision id for this page."""
return self.latest_revision_id
@deprecated('latest_revision_id', since='20150407')
def pageAPInfo(self):
"""Return the current revision id for this page."""
if self.isRedirectPage():
raise pywikibot.IsRedirectPage(self)
return self.latest_revision_id
@property
def latest_revision(self):
"""Return the current revision for this page."""
rev = self._latest_cached_revision()
if rev is not None:
return rev
return next(self.revisions(content=True, total=1))
@property
def text(self):
"""
Return the current (edited) wikitext, loading it if necessary.
@return: text of the page
@rtype: str
"""
if not hasattr(self, '_text') or self._text is None:
try:
self._text = self.get(get_redirect=True)
except pywikibot.NoPage:
# TODO: what other exceptions might be returned?
self._text = ''
return self._text
@text.setter
def text(self, value):
"""
Update the current (edited) wikitext.
@param value: New value or None
@type value: basestring
"""
self._text = None if value is None else UnicodeType(value)
if hasattr(self, '_raw_extracted_templates'):
del self._raw_extracted_templates
@text.deleter
def text(self):
"""Delete the current (edited) wikitext."""
if hasattr(self, '_text'):
del self._text
if hasattr(self, '_expanded_text'):
del self._expanded_text
if hasattr(self, '_raw_extracted_templates'):
del self._raw_extracted_templates
def preloadText(self):
"""
The text returned by EditFormPreloadText.
See API module "info".
Application: on Wikisource wikis, text can be preloaded even if
a page does not exist, if an Index page is present.
@rtype: str
"""
self.site.loadpageinfo(self, preload=True)
return self._preloadedtext
def _get_parsed_page(self):
"""Retrieve parsed text (via action=parse) and cache it."""
# Get (cached) parsed text.
if not hasattr(self, '_parsed_text'):
self._parsed_text = self.site.get_parsed_page(self)
return self._parsed_text
def properties(self, force=False):
"""
Return the properties of the page.
@param force: force updating from the live site
@rtype: dict
"""
if not hasattr(self, '_pageprops') or force:
self._pageprops = {} # page may not have pageprops (see T56868)
self.site.loadpageprops(self)
return self._pageprops
def defaultsort(self, force=False):
"""
Extract value of the {{DEFAULTSORT:}} magic word from the page.
@param force: force updating from the live site
@rtype: str or None
"""
return self.properties(force=force).get('defaultsort')
@deprecate_arg('refresh', 'force')
def expand_text(self, force=False, includecomments=False):
"""Return the page text with all templates and parser words expanded.
@param force: force updating from the live site
@param includecomments: Also strip comments if includecomments
parameter is not True.
@rtype unicode or None
"""
if not hasattr(self, '_expanded_text') or (
self._expanded_text is None) or force:
if not self.text:
self._expanded_text = ''
return ''
self._expanded_text = self.site.expand_text(
self.text,
title=self.title(with_section=False),
includecomments=includecomments)
return self._expanded_text
def userName(self):
"""
Return name or IP address of last user to edit page.
@rtype: str
"""
return self.latest_revision.user
def isIpEdit(self):
"""
Return True if last editor was unregistered.
@rtype: bool
"""
return self.latest_revision.anon
def lastNonBotUser(self):
"""
Return name or IP address of last human/non-bot user to edit page.
Determine the most recent human editor out of the last revisions.
If it was not able to retrieve a human user, returns None.
If the edit was done by a bot which is no longer flagged as 'bot',
i.e. which is not returned by Site.botusers(), it will be returned
as a non-bot edit.
@rtype: str
"""
if hasattr(self, '_lastNonBotUser'):
return self._lastNonBotUser
self._lastNonBotUser = None
for entry in self.revisions():
if entry.user and (not self.site.isBot(entry.user)):
self._lastNonBotUser = entry.user
break
return self._lastNonBotUser
@remove_last_args(('datetime', ))
def editTime(self):
"""Return timestamp of last revision to page.
@rtype: pywikibot.Timestamp
"""
return self.latest_revision.timestamp
@property
@deprecated('latest_revision.parent_id (0 instead of -1 when no parent)',
since='20150609')
def previous_revision_id(self):
"""
Return the revision id for the previous revision of this Page.
If the page has only one revision, it shall return -1.
@rtype: long
@raise AssertionError: Use on MediaWiki prior to v1.16.
"""
return self.latest_revision.parent_id or -1
@deprecated('latest_revision.parent_id (0 instead of -1 when no parent)',
since='20150609')
def previousRevision(self):
"""
Return the revision id for the previous revision.
DEPRECATED: Use latest_revision.parent_id instead.
@rtype: long
@raise AssertionError: Use on MediaWiki prior to v1.16.
"""
return self.latest_revision.parent_id or -1
def exists(self):
"""Return True if page exists on the wiki, even if it's a redirect.
If the title includes a section, return False if this section isn't
found.
@rtype: bool
"""
return self.pageid > 0
@property
def oldest_revision(self):
"""
Return the first revision of this page.
@rtype: L{Revision}
"""
return next(self.revisions(reverse=True, total=1))
def isRedirectPage(self):
"""Return True if this is a redirect, False if not or not existing."""
return self.site.page_isredirect(self)
def isStaticRedirect(self, force=False):
"""
Determine whether the page is a static redirect.
A static redirect must be a valid redirect, and contain the magic word
__STATICREDIRECT__.
@param force: Bypass local caching
@type force: bool
@rtype: bool
"""
found = False
if self.isRedirectPage():
static_keys = self.site.getmagicwords('staticredirect')
text = self.get(get_redirect=True, force=force)
if static_keys:
for key in static_keys:
if key in text:
found = True
break
return found
def isCategoryRedirect(self):
"""
Return True if this is a category redirect page, False otherwise.
@rtype: bool
"""
if not self.is_categorypage():
return False
if not hasattr(self, '_catredirect'):
self._catredirect = False
catredirs = self.site.category_redirects()
for template, args in self.templatesWithParams():
if template.title(with_ns=False) in catredirs:
if args:
# Get target (first template argument)
p = pywikibot.Page(
self.site, args[0].strip(), Namespace.CATEGORY)
if p.namespace() == Namespace.CATEGORY:
self._catredirect = p.title()
else:
pywikibot.warning(
'Category redirect target {0} on {1} is not a '
'category'.format(p.title(as_link=True),
self.title(as_link=True)))
else:
pywikibot.warning(
'No target found for category redirect on '
+ self.title(as_link=True))
break
return bool(self._catredirect)
def getCategoryRedirectTarget(self):
"""
If this is a category redirect, return the target category title.
@rtype: Category
"""
if self.isCategoryRedirect():
return Category(Link(self._catredirect, self.site))
raise pywikibot.IsNotRedirectPage(self)
@deprecated('interwiki.page_empty_check(page)', since='20151207')
def isEmpty(self):
"""
Return True if the page text has less than 4 characters.
Character count ignores language links and category links.
Can raise the same exceptions as get().
@rtype: bool
"""
txt = self.get()
txt = textlib.removeLanguageLinks(txt, site=self.site)
txt = textlib.removeCategoryLinks(txt, site=self.site)
return len(txt) < 4
def isTalkPage(self):
"""Return True if this page is in any talk namespace."""
ns = self.namespace()
return ns >= 0 and ns % 2 == 1
def toggleTalkPage(self):
"""
Return other member of the article-talk page pair for this Page.
If self is a talk page, returns the associated content page;
otherwise, returns the associated talk page. The returned page need
not actually exist on the wiki.
@return: Page or None if self is a special page.
@rtype: typing.Optional[pywikibot.Page]
"""
ns = self.namespace()
if ns < 0: # Special page
return
if self.isTalkPage():
if self.namespace() == 1:
return Page(self.site, self.title(with_ns=False))
else:
return Page(self.site,
'%s:%s' % (self.site.namespace(ns - 1),
self.title(with_ns=False)))
else:
return Page(self.site,
'%s:%s' % (self.site.namespace(ns + 1),
self.title(with_ns=False)))
def is_categorypage(self):
"""Return True if the page is a Category, False otherwise."""
return self.namespace() == 14
@deprecated('is_categorypage', since='20140819')
def isCategory(self):
"""DEPRECATED: use is_categorypage instead."""
return self.is_categorypage()
def is_filepage(self):
"""Return True if this is an file description page, False otherwise."""
return self.namespace() == 6
@deprecated('is_filepage', since='20140819')
def isImage(self):
"""DEPRECATED: use is_filepage instead."""
return self.is_filepage()
@remove_last_args(('get_Index', ))
def isDisambig(self):
"""
Return True if this is a disambiguation page, False otherwise.
By default, it uses the the Disambiguator extension's result. The
identification relies on the presence of the __DISAMBIG__ magic word
which may also be transcluded.
If the Disambiguator extension isn't activated for the given site,
the identification relies on the presence of specific templates.
First load a list of template names from the Family file;
if the value in the Family file is None or no entry was made, look for
the list on [[MediaWiki:Disambiguationspage]]. If this page does not
exist, take the MediaWiki message. 'Template:Disambig' is always
assumed to be default, and will be appended regardless of its
existence.
@rtype: bool
"""
if self.site.has_extension('Disambiguator'):
# If the Disambiguator extension is loaded, use it
return 'disambiguation' in self.properties()
if not hasattr(self.site, '_disambigtemplates'):
try:
default = set(self.site.family.disambig('_default'))
except KeyError:
default = {'Disambig'}
try:
distl = self.site.family.disambig(self.site.code,
fallback=False)
except KeyError:
distl = None
if distl is None:
disambigpages = Page(self.site,
'MediaWiki:Disambiguationspage')
if disambigpages.exists():
disambigs = {link.title(with_ns=False)
for link in disambigpages.linkedPages()
if link.namespace() == 10}
elif self.site.has_mediawiki_message('disambiguationspage'):
message = self.site.mediawiki_message(
'disambiguationspage').split(':', 1)[1]