-
Notifications
You must be signed in to change notification settings - Fork 190
/
config2.py
1230 lines (1034 loc) · 46.1 KB
/
config2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""
Module to define and load pywikibot configuration default and user preferences.
User preferences are loaded from a python file called user-config.py, which
may be located in directory specified by the environment variable
PYWIKIBOT_DIR, or the same directory as pwb.py, or in a directory within
the users home. See get_base_dir for more information.
If user-config.py can not be found in any of those locations, this module
will fail to load unless the environment variable PYWIKIBOT_NO_USER_CONFIG
is set to a value other than '0'. i.e. PYWIKIBOT_NO_USER_CONFIG=1 will
allow config to load without a user-config.py. However, warnings will be
shown if user-config.py was not loaded.
To prevent these warnings, set PYWIKIBOT_NO_USER_CONFIG=2.
Provides two functions to register family classes which can be used in
the user-config:
- register_family_file
- register_families_folder
Other functions made available to user-config:
- user_home_path
Sets module global base_dir and provides utility methods to
build paths relative to base_dir:
- makepath
- datafilepath
- shortpath
"""
#
# (C) Rob W.W. Hooft, 2003
# (C) Pywikibot team, 2003-2018
#
# Distributed under the terms of the MIT license.
#
from __future__ import absolute_import, division, unicode_literals
import collections
import os
import platform
import re
import stat
import sys
import types
from distutils.version import StrictVersion
from locale import getdefaultlocale
from os import getenv, environ
from warnings import warn
from requests import __version__ as requests_version
from pywikibot.logging import error, output, warning
from pywikibot.tools import PY2, issue_deprecation_warning
OSWIN32 = (sys.platform == 'win32')
if OSWIN32:
if not PY2:
import winreg
else:
import _winreg as winreg
# Normalize old PYWIKIBOT2 environment variables and issue a deprecation warn.
for env_name in (
'PYWIKIBOT2_DIR', 'PYWIKIBOT2_DIR_PWB', 'PYWIKIBOT2_NO_USER_CONFIG',
):
if env_name not in environ:
continue
env_value = environ[env_name]
new_env_name = env_name.replace('PYWIKIBOT2_', 'PYWIKIBOT_')
del environ[env_name]
if new_env_name not in environ:
environ[new_env_name] = env_value
issue_deprecation_warning(
env_name + ' environment variable', new_env_name, 0, since='20180803')
# This frozen set should contain all imported modules/variables, so it must
# occur directly after the imports. At that point globals() only contains the
# names and some magic variables (like __name__)
_imports = frozenset(name for name in globals() if not name.startswith('_'))
__no_user_config = getenv('PYWIKIBOT_NO_USER_CONFIG')
if __no_user_config == '0':
__no_user_config = None
class _ConfigurationDeprecationWarning(UserWarning):
"""Feature that is no longer supported."""
pass
# IMPORTANT:
# Do not change any of the variables in this file. Instead, make
# a file user-config.py, and overwrite values in there.
# Note: all variables defined in this module are made available to bots as
# configuration settings, *except* variable names beginning with an
# underscore (example: _variable). Be sure to use an underscore on any
# variables that are intended only for internal use and not to be exported
# to other modules.
_private_values = ['authenticate', 'proxy', 'db_password']
_deprecated_variables = ['use_SSL_onlogin', 'use_SSL_always',
'available_ssl_project', 'fake_user_agent',
'special_page_limit']
# ############# ACCOUNT SETTINGS ##############
# The family of sites we are working on. pywikibot will import
# families/xxx_family.py so if you want to change this variable,
# you need to write such a file if one does not exist.
family = 'wikipedia'
# The language code of the site we're working on.
mylang = 'language'
# If family and mylang are not modified from the above, the default is changed
# to test:test, which is test.wikipedia.org, at the end of this module.
# The dictionary usernames should contain a username for each site where you
# have a bot account. Please set your usernames by adding such lines to your
# user-config.py:
#
# usernames['wikipedia']['de'] = 'myGermanUsername'
# usernames['wiktionary']['en'] = 'myEnglishUsername'
#
# If you have a unique username for all languages of a family,
# you can use '*'
# usernames['wikibooks']['*'] = 'mySingleUsername'
# You may use '*' for family name in a similar manner.
#
# If you have a sysop account on some wikis, this will be used to delete pages
# or to edit locked pages if you add such lines to your
# user-config.py:
#
# sysopnames['wikipedia']['de'] = 'myGermanUsername'
# sysopnames['wiktionary']['en'] = 'myEnglishUsername'
#
# If you have a unique syop account for all languages of a family,
# you can use '*'
# sysopnames['myownwiki']['*'] = 'mySingleUsername'
usernames = collections.defaultdict(dict)
sysopnames = collections.defaultdict(dict)
disambiguation_comment = collections.defaultdict(dict)
# User agent format.
# For the meaning and more help in customization see:
# https://www.mediawiki.org/wiki/Manual:Pywikibot/User-agent
user_agent_format = ('{script_product} ({script_comments}) {pwb} ({revision}) '
'{http_backend} {python}')
# Fake user agent.
# Some external websites reject bot-like user agents. It is possible to use
# fake user agents in requests to these websites.
# It is recommended to default this to False and use on an as-needed basis.
#
# Default behaviours in modules that can utilize fake UAs.
# True for enabling fake UA, False for disabling / using pywikibot's own UA,
# str to specify custom UA.
fake_user_agent_default = {'reflinks': False, 'weblinkchecker': False}
# Website domains excepted to the default behaviour.
# True for enabling, False for disabling, str to hardcode a UA.
# Example: {'problematic.site.example': True,
# 'prefers.specific.ua.example': 'snakeoil/4.2'}
fake_user_agent_exceptions = {}
# This following option is deprecated in favour of finer control options above.
fake_user_agent = False
# The default interface for communicating with the site
# currently the only defined interface is 'APISite', so don't change this!
site_interface = 'APISite'
# number of days to cache namespaces, api configuration, etc.
API_config_expiry = 30
# The maximum number of bytes which uses a GET request, if not positive
# it'll always use POST requests
maximum_GET_length = 255
# Some networks modify GET requests when they are not encrypted, to avoid
# bug reports related to that disable those. If we are confident that bug
# related to this are really because of the network this could be changed.
enable_GET_without_SSL = False
# Solve captchas in the webbrowser. Setting this to False will result in the
# exception CaptchaError being thrown if a captcha is encountered.
solve_captcha = True
# Some sites will require password authentication to access the HTML pages at
# the site. If you have any such site, add lines to your user-config.py of
# the following form:
#
# authenticate['en.wikipedia.org'] = ('John','XXXXX')
# authenticate['*.wikipedia.org'] = ('John','XXXXX')
#
# where John is your login name, and XXXXX your password.
# Note:
# 1. This is only for sites that use authentication in the form that gives
# you a popup for name and password when you try to access any data, NOT
# for, for example, wiki usernames
# 2. You must use the hostname of the site, not its family/language pair.
# Pywikibot supports wildcard (*) in the prefix of hostname and select the
# best match authentication. So you can specify authentication not only for
# one site
#
# Pywikibot also support OAuth 1.0a via mwoauth
# https://pypi.org/project/mwoauth
#
# You can add OAuth tokens to your user-config.py of the following form:
#
# authenticate['en.wikipedia.org'] = ('consumer_key','consumer_secret',
# 'access_key', 'access_secret')
# authenticate['*.wikipedia.org'] = ('consumer_key','consumer_secret',
# 'access_key', 'access_secret')
#
# Note: the target wiki site must install OAuth extension
authenticate = {}
#
# Secure connection overrides
#
# These settings are deprecated. They existed to support the Wikimedia
# family which only served HTTPS on https://secure.wikimedia.org/<site>/<uri>
# Use Family.protocol()
use_SSL_onlogin = False # if available, use SSL when logging in
use_SSL_always = False # if available, use SSL for all API queries
# Available secure projects should be listed here.
available_ssl_project = []
# By default you are asked for a password on the terminal.
# A password file may be used, e.g. password_file = '.passwd'
# The path to the password file is relative to that of the user_config file.
# The password file should consist of lines containing Python tuples of any
# of the following formats:
# (code, family, username, password)
# (family, username, password)
# (username, password)
# It's also possible (and safer) for bot users to use BotPasswords to limit
# the permissions given to a bot. When using BotPasswords, each instance gets
# keys. This combination can only access the API, not the normal web interface.
# See https://www.mediawiki.org/wiki/Manual:Pywikibot/BotPasswords to know how
# use them. In this case, the password file should contein a BotPassword object
# in the following format:
# (username, BotPassword(botname, botpassword))
password_file = None
# edit summary to use if not supplied by bot script
# WARNING: this should NEVER be used in practice, ALWAYS supply a more
# relevant summary for bot edits
default_edit_summary = 'Pywikibot 3.0-dev'
# What permissions to use to set private files to it
# such as password file.
#
# stat.S_IRWXU 0o700 mask for owner permissions
# stat.S_IRUSR 0o400 read permission for owner
# stat.S_IWUSR 0o200 write permission for owner
# stat.S_IXUSR 0o100 execute permission for owner
# stat.S_IRWXG 0o070 mask for group permissions
# stat.S_IRGRP 0o040 read permission for group
# stat.S_IWGRP 0o020 write permission for group
# stat.S_IXGRP 0o010 execute permission for group
# stat.S_IRWXO 0o007 mask for others permissions
# stat.S_IROTH 0o004 read permission for others
# stat.S_IWOTH 0o002 write permission for others
# stat.S_IXOTH 0o001 execute permission for others
private_files_permission = stat.S_IRUSR | stat.S_IWUSR
# Allow user to stop warnings about file security
# by setting this to true.
ignore_file_security_warnings = False
# Custom headers to send on all requests.
# This is mainly intended to support setting the
# X-Wikimedia-Debug header, which is sometimes
# needed to debug issues with Wikimedia sites:
# https://wikitech.wikimedia.org/wiki/Debugging_in_production
#
# Note that these headers will be sent with all requests,
# not just MediaWiki API calls.
extra_headers = {}
def user_home_path(path):
"""Return a file path to a file in the user home."""
return os.path.join(os.path.expanduser('~'), path)
def get_base_dir(test_directory=None):
r"""Return the directory in which user-specific information is stored.
This is determined in the following order:
1. If the script was called with a -dir: argument, use the directory
provided in this argument.
2. If the user has a PYWIKIBOT_DIR environment variable, use the value
of it.
3. If user-config is present in current directory, use the current
directory.
4. If user-config is present in pwb.py directory, use that directory
5. Use (and if necessary create) a 'pywikibot' folder under
'Application Data' or 'AppData\Roaming' (Windows) or
'.pywikibot' directory (Unix and similar) under the user's home
directory.
Set PYWIKIBOT_NO_USER_CONFIG=1 to disable loading user-config.py
@param test_directory: Assume that a user config file exists in this
directory. Used to test whether placing a user config file in this
directory will cause it to be selected as the base directory.
@type test_directory: str or None
@rtype: unicode
"""
def exists(directory):
directory = os.path.abspath(directory)
if directory == test_directory:
return True
else:
return os.path.exists(os.path.join(directory, 'user-config.py'))
if test_directory is not None:
test_directory = os.path.abspath(test_directory)
base_dir = ''
for arg in sys.argv[1:]:
if arg.startswith(str('-dir:')):
base_dir = arg[5:]
base_dir = os.path.expanduser(base_dir)
break
else:
if ('PYWIKIBOT_DIR' in environ
and exists(os.path.abspath(environ['PYWIKIBOT_DIR']))):
base_dir = os.path.abspath(environ['PYWIKIBOT_DIR'])
elif exists('.'):
base_dir = os.path.abspath('.')
elif ('PYWIKIBOT_DIR_PWB' in environ
and exists(os.path.abspath(environ['PYWIKIBOT_DIR_PWB']))):
base_dir = os.path.abspath(environ['PYWIKIBOT_DIR_PWB'])
else:
base_dir_cand = []
home = os.path.expanduser('~')
if OSWIN32:
win_version = int(platform.version().split('.')[0])
if win_version == 5:
sub_dir = ['Application Data']
elif win_version in (6, 10):
sub_dir = ['AppData', 'Roaming']
else:
raise WindowsError('Windows version {} not supported yet.'
.format(win_version))
base_dir_cand.extend([[home] + sub_dir + ['Pywikibot'],
[home] + sub_dir + ['pywikibot']])
else:
base_dir_cand.append([home, '.pywikibot'])
for dir in base_dir_cand:
dir = os.path.join(*dir)
try:
os.makedirs(dir, mode=private_files_permission)
except OSError: # PermissionError or already exists
if exists(dir):
base_dir = dir
break
if not os.path.isabs(base_dir):
base_dir = os.path.normpath(os.path.join(os.getcwd(), base_dir))
# make sure this path is valid and that it contains user-config file
if not os.path.isdir(base_dir):
raise RuntimeError("Directory '%s' does not exist." % base_dir)
# check if user-config.py is in base_dir
if not exists(base_dir):
exc_text = "No user-config.py found in directory '%s'.\n" % base_dir
if __no_user_config:
if __no_user_config != '2':
output(exc_text)
else:
exc_text += (
' Please check that user-config.py is stored in the correct '
'location.\n'
' Directory where user-config.py is searched is determined '
'as follows:\n\n ') + get_base_dir.__doc__
raise RuntimeError(exc_text)
return base_dir
# Save base_dir for use by other modules
base_dir = get_base_dir()
for arg in sys.argv[1:]:
if arg.startswith(str('-verbose')) or arg == str('-v'):
output('The base directory is ' + base_dir)
break
family_files = {}
def register_family_file(family_name, file_path):
"""Register a single family class file."""
usernames[family_name] = {}
sysopnames[family_name] = {}
disambiguation_comment[family_name] = {}
family_files[family_name] = file_path
def register_families_folder(folder_path):
"""Register all family class files contained in a directory."""
for file_name in os.listdir(folder_path):
if file_name.endswith('_family.py'):
family_name = file_name[:-len('_family.py')]
register_family_file(family_name, os.path.join(folder_path,
file_name))
# Get the names of all known families, and initialize with empty dictionaries.
# ‘families/’ is a subdirectory of the directory in which config2.py is found.
register_families_folder(os.path.join(os.path.dirname(__file__), 'families'))
register_family_file('wikiapiary', 'https://wikiapiary.com')
# Set to True to override the {{bots}} exclusion protocol (at your own risk!)
ignore_bot_templates = False
# ############# USER INTERFACE SETTINGS ##############
# The encoding that's used in the user's console, i.e. how strings are encoded
# when they are read by raw_input(). On Windows systems' DOS box, this should
# be 'cp850' ('cp437' for older versions). Linux users might try 'iso-8859-1'
# or 'utf-8'.
# This default code should work fine, so you don't have to think about it.
# TODO: consider getting rid of this config variable.
try:
if not PY2 or not sys.stdout.encoding:
console_encoding = sys.stdout.encoding
else:
console_encoding = sys.stdout.encoding.decode('ascii')
except AttributeError:
# When using pywikibot inside a daemonized twisted application,
# we get "StdioOnnaStick instance has no attribute 'encoding'"
console_encoding = None
# The encoding the user would like to see text transliterated to. This can be
# set to a charset (e.g. 'ascii', 'iso-8859-1' or 'cp850'), and we will output
# only characters that exist in that charset. However, the characters will be
# output using console_encoding.
# If this is not defined on Windows, we emit a Warning explaining the user
# to either switch to a Unicode-able font and use
# transliteration_target = None
# or to keep using raster fonts and set
# transliteration_target = console_encoding
# After emitting the warning, this last option will be set.
transliteration_target = None
# The encoding in which textfiles are stored, which contain lists of page
# titles. The most used is 'utf-8'; 'utf-8-sig' recognizes BOM.
# For a complete list please see:
# https://docs.python.org/2/library/codecs.html#standard-encodings
textfile_encoding = 'utf-8'
# tkinter isn't yet ready
userinterface = 'terminal'
# this can be used to pass variables to the UI init function
# useful for e.g.
# userinterface_init_kwargs = {'default_stream': 'stdout'}
userinterface_init_kwargs = {}
# i18n setting for user interface language
# default is obtained from L{locale.getdefaultlocale}
userinterface_lang = None
# Should we transliterate characters that do not exist in the console
# character set?
# True: whenever possible
# False: never - always replace them by question marks
# Currently only works if interface 'terminal' is set.
transliterate = True
# Should the system bell ring if the bot expects user input?
ring_bell = False
# Colorization can be used to markup important text parts of the output.
# On Linux/Unix terminals, ANSI escape codes are used for this. On Windows,
# it is done by a DLL call via ctypes.
# Set this to False if you're using Linux and your tty doesn't support
# ANSI colors.
try:
# Don't print colorized when the output is, for example, piped to a file.
colorized_output = sys.stdout.isatty()
except AttributeError:
# When using pywikibot inside a daemonized twisted application,
# we get "StdioOnnaStick instance has no attribute 'isatty'"
colorized_output = False
# An indication of the size of your screen, or rather the size of the screen
# to be shown, for flickrripper
tkhorsize = 1600
tkvertsize = 1000
# ############# EXTERNAL EDITOR SETTINGS ##############
# The command for the editor you want to use. If set to None, a simple Tkinter
# editor will be used.
editor = os.environ.get('EDITOR', None)
# Warning: DO NOT use an editor which doesn't support Unicode to edit pages!
# You will BREAK non-ASCII symbols!
editor_encoding = 'utf-8'
# The temporary file name extension can be set in order to use syntax
# highlighting in your text editor.
editor_filename_extension = 'wiki'
# ############# LOGFILE SETTINGS ##############
# Defines for which scripts a logfile should be enabled. Logfiles will be
# saved in the 'logs' subdirectory.
#
# Example:
# log = ['interwiki', 'weblinkchecker', 'table2wiki']
# It is also possible to enable logging for all scripts, using this line:
# log = ['*']
# To disable all logging, use this:
# log = []
# Per default, logging of interwiki.py is enabled because its logfiles can
# be used to generate so-called warnfiles.
# This setting can be overridden by the -log or -nolog command-line arguments.
log = ['interwiki']
# filename defaults to modulename-bot.log
logfilename = None
# maximal size of a logfile in kilobytes. If the size reached that limit the
# logfile will be renamed (if logfilecount is not 0) and the old file is filled
# again. logfilesize must be an integer value
logfilesize = 1024
# Number of rotating logfiles are created. The older files get the higher
# number. If logfilecount is 0, no logfile will be archived but the current
# logfile will be overwritten if the file size reached the logfilesize above.
# If logfilecount is -1 there are no rotating logfiles but the files where
# renamed if the logfile is full. The newest file gets the highest number until
# some logfiles where deleted.
logfilecount = 5
# set to 1 (or higher) to generate "informative" messages to terminal
verbose_output = 0
# set to True to fetch the pywiki version online
log_pywiki_repo_version = False
# if True, include a lot of debugging info in logfile
# (overrides log setting above)
debug_log = []
# ############# EXTERNAL SCRIPT PATH SETTINGS ##############
# Set your own script path to lookup for your script files.
#
# Your private script path must be located inside the
# framework folder, subfolders must be delimited by '.'.
# every folder must contain an (empty) __init__.py file.
#
# The search order is
# 1. user_script_paths in the given order
# 2. scripts
# 3. scripts/maintenance
# 4. scripts/archive
#
# sample:
# user_script_paths = ['scripts.myscripts']
user_script_paths = []
# ############# INTERWIKI SETTINGS ##############
# Should interwiki.py report warnings for missing links between foreign
# languages?
interwiki_backlink = True
# Should interwiki.py display every new link it discovers?
interwiki_shownew = True
# Should interwiki.py output a graph PNG file on conflicts?
# You need pydot for this:
# https://pypi.org/project/pydot/
interwiki_graph = False
# Specifies that the robot should process that amount of subjects at a time,
# only starting to load new pages in the original language when the total
# falls below that number. Default is to process (at least) 100 subjects at
# once.
interwiki_min_subjects = 100
# If interwiki graphs are enabled, which format(s) should be used?
# Supported formats include png, jpg, ps, and svg. See:
# http://www.graphviz.org/doc/info/output.html
# If you want to also dump the dot files, you can use this in your
# user-config.py:
# interwiki_graph_formats = ['dot', 'png']
# If you need a PNG image with an HTML image map, use this:
# interwiki_graph_formats = ['png', 'cmap']
# If you only need SVG images, use:
# interwiki_graph_formats = ['svg']
interwiki_graph_formats = ['png']
# You can post the contents of your autonomous_problems.dat to the wiki,
# e.g. to https://de.wikipedia.org/wiki/Wikipedia:Interwiki-Konflikte .
# This allows others to assist you in resolving interwiki problems.
# To help these people, you can upload the interwiki graphs to your
# webspace somewhere. Set the base URL here, e.g.:
# 'https://www.example.org/~yourname/interwiki-graphs/'
interwiki_graph_url = None
# Save file with local articles without interwikis.
without_interwiki = False
# Experimental feature:
# Store the page contents on disk (/cache/ directory) instead of loading
# them in RAM.
interwiki_contents_on_disk = False
# ############# SOLVE_DISAMBIGUATION SETTINGS ############
#
# Set disambiguation_comment[FAMILY][LANG] to a non-empty string to override
# the default edit comment for the solve_disambiguation bot.
#
# Use %s to represent the name of the disambiguation page being treated.
# Example:
#
# disambiguation_comment['wikipedia']['en'] = \
# 'Robot-assisted disambiguation ([[WP:DPL|you can help!]]): %s'
# Sorting order for alternatives. Set to True to ignore case for sorting order.
sort_ignore_case = False
# ############# IMAGE RELATED SETTINGS ##############
# If you set this to True, images will be uploaded to Wikimedia
# Commons by default.
upload_to_commons = False
# ############# SETTINGS TO AVOID SERVER OVERLOAD ##############
# Slow down the robot such that it never requests a second page within
# 'minthrottle' seconds. This can be lengthened if the server is slow,
# but never more than 'maxthrottle' seconds. However - if you are running
# more than one bot in parallel the times are lengthened.
#
# 'maxlag' is used to control the rate of server access (see below).
# Set minthrottle to non-zero to use a throttle on read access.
minthrottle = 0
maxthrottle = 60
# Slow down the robot such that it never makes a second page edit within
# 'put_throttle' seconds.
put_throttle = 10
# Sometimes you want to know when a delay is inserted. If a delay is larger
# than 'noisysleep' seconds, it is logged on the screen.
noisysleep = 3.0
# Defer bot edits during periods of database server lag. For details, see
# https://www.mediawiki.org/wiki/Maxlag_parameter
# You can set this variable to a number of seconds, or to None (or 0) to
# disable this behavior. Higher values are more aggressive in seeking
# access to the wiki.
# Non-Wikimedia wikis may or may not support this feature; for families
# that do not use it, it is recommended to set minthrottle (above) to
# at least 1 second.
maxlag = 5
# Maximum of pages which can be retrieved at one time from wiki server.
# -1 indicates limit by api restriction
step = -1
# Maximum number of times to retry an API request before quitting.
max_retries = 15
# Minimum time to wait before resubmitting a failed API request.
retry_wait = 5
# Maximum time to wait before resubmitting a failed API request.
retry_max = 120
# ############# TABLE CONVERSION BOT SETTINGS ##############
# Will split long paragraphs for better reading the source.
# Only table2wiki.py use it by now.
splitLongParagraphs = False
# sometimes HTML-tables are indented for better reading.
# That can do very ugly results.
deIndentTables = True
# ############# WEBLINK CHECKER SETTINGS ##############
# How many external links should weblinkchecker.py check at the same time?
# If you have a fast connection, you might want to increase this number so
# that slow servers won't slow you down.
max_external_links = 50
report_dead_links_on_talk = False
# Don't alert on links days_dead old or younger
weblink_dead_days = 7
# ############# DATABASE SETTINGS ##############
# Setting to connect the database or replica of the database of the wiki.
# db_name_format can be used to manipulate the dbName of site.
#
# Example for a pywikibot running on wmflabs:
# db_hostname = 'enwiki.analytics.db.svc.eqiad.wmflabs'
# db_name_format = '{0}_p'
# db_connect_file = user_home_path('replica.my.cnf')
db_hostname = 'localhost'
db_username = ''
db_password = ''
db_name_format = '{0}'
db_connect_file = user_home_path('.my.cnf')
# local port for mysql server
# ssh -L 4711:enwiki.analytics.db.svc.eqiad.wmflabs:3306 \
# user@login.tools.wmflabs.org
db_port = 3306
# ############# SEARCH ENGINE SETTINGS ##############
# Live search web service appid settings.
#
# Yahoo! Search Web Services are not operational.
# See https://phabricator.wikimedia.org/T106085
yahoo_appid = ''
# To use Windows Live Search web service you must get an AppID from
# http://www.bing.com/dev/en-us/dev-center
msn_appid = ''
# ############# FLICKR RIPPER SETTINGS ##############
# Using the Flickr api
flickr = {
'api_key': '', # Provide your key!
'api_secret': '', # Api secret of your key (optional)
'review': False, # Do we use automatically make our uploads reviewed?
'reviewer': '', # If so, under what reviewer name?
}
# ############# COPYRIGHT SETTINGS ##############
# Enable/disable search engine in copyright.py script
copyright_google = True
copyright_yahoo = True
copyright_msn = False
# Perform a deep check, loading URLs to search if 'Wikipedia' is present.
# This may be useful to increase the number of correct results. If you haven't
# a fast connection, you might want to keep them disabled.
copyright_check_in_source_google = False
copyright_check_in_source_yahoo = False
copyright_check_in_source_msn = False
# Web pages may contain a Wikipedia text without the word 'Wikipedia' but with
# the typical '[edit]' tag as a result of a copy & paste procedure. You want
# no report for this kind of URLs, even if they are copyright violations.
# However, when enabled, these URLs are logged in a file.
copyright_check_in_source_section_names = False
# Limit number of queries for page.
copyright_max_query_for_page = 25
# Skip a specified number of queries
copyright_skip_query = 0
# Number of attempts on connection error.
copyright_connection_tries = 10
# Behavior if an exceeded error occur.
#
# Possibilities:
#
# 0 = None
# 1 = Disable search engine
# 2 = Sleep (default)
# 3 = Stop
copyright_exceeded_in_queries = 2
copyright_exceeded_in_queries_sleep_hours = 6
# Append last modified date of URL to script result
copyright_show_date = True
# Append length of URL to script result
copyright_show_length = True
# By default the script tries to identify and skip text that contains a large
# comma separated list or only numbers. But sometimes that might be the
# only part unmodified of a slightly edited and not otherwise reported
# copyright violation. You can disable this feature to try to increase the
# number of results.
copyright_economize_query = True
# ############# HTTP SETTINGS ##############
# Use a persistent http connection. An http connection has to be established
# only once per site object, making stuff a whole lot faster. Do NOT EVER
# use this if you share Site objects across threads without proper locking.
#
# DISABLED FUNCTION. Setting this variable will not have any effect.
persistent_http = False
# Default socket timeout in seconds.
# DO NOT set to None to disable timeouts. Otherwise this may freeze your
# script.
# You may assign either a tuple of two int or float values for connection and
# read timeout, or a single value for both in a tuple (since requests 2.4.0).
socket_timeout = (6.05, 45)
# ############# COSMETIC CHANGES SETTINGS ##############
# The bot can make some additional changes to each page it edits, e.g. fix
# whitespace or positioning of interwiki and category links.
# This is an experimental feature; handle with care and consider re-checking
# each bot edit if enabling this!
cosmetic_changes = False
# If cosmetic changes are switched on, and you also have several accounts at
# projects where you're not familiar with the local conventions, you probably
# only want the bot to do cosmetic changes on your "home" wiki which you
# specified in config.mylang and config.family.
# If you want the bot to also do cosmetic changes when editing a page on a
# foreign wiki, set cosmetic_changes_mylang_only to False, but be careful!
cosmetic_changes_mylang_only = True
# The dictionary cosmetic_changes_enable should contain a tuple of languages
# for each site where you wish to enable in addition to your own langlanguage
# (if cosmetic_changes_mylang_only is set)
# Please set your dictionary by adding such lines to your user-config.py:
# cosmetic_changes_enable['wikipedia'] = ('de', 'en', 'fr')
cosmetic_changes_enable = {}
# The dictionary cosmetic_changes_disable should contain a tuple of languages
# for each site where you wish to disable cosmetic changes. You may use it with
# cosmetic_changes_mylang_only is False, but you can also disable your own
# language. This also overrides the settings in the cosmetic_changes_enable
# dictionary. Please set your dict by adding such lines to your user-config.py:
# cosmetic_changes_disable['wikipedia'] = ('de', 'en', 'fr')
cosmetic_changes_disable = {}
# cosmetic_changes_deny_script is a list of scripts for which cosmetic changes
# are disabled. You may add additional scripts by appending script names in
# your user-config.py ("+=" operator is strictly recommended):
# cosmetic_changes_deny_script += ['your_script_name_1', 'your_script_name_2']
# Appending the script name also works:
# cosmetic_changes_deny_script.append('your_script_name')
cosmetic_changes_deny_script = ['category_redirect', 'cosmetic_changes',
'newitem', 'touch']
# ############# REPLICATION BOT SETTINGS ################
# You can add replicate_replace to your user-config.py.
#
# Use has the following format:
#
# replicate_replace = {
# 'wikipedia:li': {'Hoofdpagina': 'Veurblaad'}
# }
#
# to replace all occurrences of 'Hoofdpagina' with 'Veurblaad' when writing to
# liwiki. Note that this does not take the origin wiki into account.
replicate_replace = {}
# ############# FURTHER SETTINGS ##############
# Proxy configuration
# TODO: proxy support
proxy = None
# Simulate settings
# Defines what additional actions the bots are NOT allowed to do (e.g. 'edit')
# on the wiki server. Allows simulation runs of bots to be carried out without
# changing any page on the server side. Use this setting to add more actions
# in user-config.py for wikis with extra write actions.
actions_to_block = []
# Set simulate to True or use -simulate option to block all actions given
# above.
simulate = False
# How many pages should be put to a queue in asynchronous mode.
# If maxsize is <= 0, the queue size is infinite.
# Increasing this value will increase memory space but could speed up
# processing. As higher this value this effect will decrease.
max_queue_size = 64
# Define the line separator. Pages retrieved via API have "\n" whereas
# pages fetched from screen (mostly) have "\r\n". Interwiki and category
# separator settings in family files should use multiplied of this.
# LS is a shortcut alias.
line_separator = LS = '\n'
# Settings to enable mwparserfromhell
# <https://mwparserfromhell.readthedocs.org/en/latest/>
# Currently used in textlib.extract_templates_and_params
# This is more accurate than our current regex, but only works
# if the user has already installed the library.
use_mwparserfromhell = True
# Pickle protocol version to use for storing dumps.
# This config variable is not used for loading dumps.
# Version 2 is common to both Python 2 and 3, and should
# be used when dumps are accessed by both versions.
# Version 4 is only available for Python 3.4
pickle_protocol = 2
# ============================
# End of configuration section
# ============================
# ############# OBSOLETE SETTINGS #############
# This section contains configuration options that are no longer in use.
# They are kept here to prevent warnings about undefined parameters.
panoramio = {
'review': False, # Do we use automatically make our uploads reviewed?
'reviewer': '', # If so, under what reviewer name?
}
special_page_limit = 500
# #############################################
def makepath(path, create=True):
"""Return a normalized absolute version of the path argument.
If the given path already exists in the filesystem or create is False
the filesystem is not modified.
Otherwise if create is True makepath creates directories along the given
path using the dirname() of the path. You may append a '/' to the path if
you want it to be a directory path.
from holger@trillke.net 2002/03/18
@param path: path in the filesystem
@type path: str
@param create: create the directory if it is True. Otherwise do not change
the filesystem. Default is True.
@type create: bool
"""
dpath = os.path.normpath(os.path.dirname(path))
if create and not os.path.exists(dpath):
os.makedirs(dpath)
return os.path.normpath(os.path.abspath(path))
def datafilepath(*filename, **kwargs):
"""Return an absolute path to a data file in a standard location.
Argument(s) are zero or more directory names, optionally followed by a
data file name. The return path is offset to config.base_dir. Any
directories in the path that do not already exist are created if create
is True, otherwise the filesystem keeps unchanged.
@param path: path in the filesystem
@type path: str
@keyword create: create the directory if it is True. Otherwise don't change
the filesystem. Default is True.
@type create: bool
"""
create = kwargs.get('create', True)
return makepath(os.path.join(base_dir, *filename), create=create)
def shortpath(path):
"""Return a file path relative to config.base_dir."""
if path.startswith(base_dir):
return path[len(base_dir) + len(os.path.sep):]
return path
def _win32_extension_command(extension):
"""Get the command from the Win32 registry for an extension."""
fileexts_key = \
r'Software\Microsoft\Windows\CurrentVersion\Explorer\FileExts'
key_name = fileexts_key + r'\.' + extension + r'\OpenWithProgids'
_winreg = winreg # exists for git blame only; do not use
try:
key1 = winreg.OpenKey(winreg.HKEY_CURRENT_USER, key_name)
_progID = winreg.EnumValue(key1, 0)[0]
_key2 = _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT,
r'%s\shell\open\command' % _progID)
_cmd = _winreg.QueryValueEx(_key2, None)[0]
# See T102465 for issues relating to using this value.
cmd = _cmd
if cmd.find('%1'):
cmd = cmd[:cmd.find('%1')]
# Remove any trailing characher, which should be a quote or space
# and then remove all whitespace.
return cmd[:-1].strip()
except WindowsError as e:
# Catch any key lookup errors
output('Unable to detect program for file extension "{0}": {1!r}'
.format(extension, e))
def _detect_win32_editor():
"""Detect the best Win32 editor."""
# Notepad is even worse than our Tkinter editor.
unusable_exes = ['notepad.exe',
'py.exe',
'pyw.exe',