camelbot

#!/usr/bin/perl
# author: seth
# description: a wikibot

use strict;
use warnings;
BEGIN{
	# used to check whether this script is running on a labs server or somewhere else
	# on labs e.g. Tk is disabled.
	our $_running_on_labs = (-e '/data/project/camelbot/perl/lib');
}
our $_running_on_labs;
use if $_running_on_labs, lib => qw(/data/project/camelbot/perl/lib);
use Carp;                     # better output on errors for debugging purposes
use Data::Dumper;             # for debugging purposes
use File::Path qw(make_path); # create directories
use File::Slurp qw(slurp write_file);  # read/write files
use Getopt::Long qw(:config bundling); # cli params
use Pod::Usage;               # cli params help
use POSIX qw/strftime/;       # format timestamp

# packages: CamelBot, CamelBotRC, CamelBotIRC
#
# functions
# =========
# global functions
# -------------------
# sub syntaxCheck
# 	parse cli params
#
# CamelBot: mediawiki functions
# -------------------
# sub new
# 	constructor
#
# sub api_cont
# 	use MediaWiki::API using 'continue' for a loop
#
# sub api_simple
# 	use MediaWiki::API and catch errors
#
# sub archive_ext_links
#		find all links in given text ref and call saving procedure on archive.org	
#
# sub cat_add
# 	adds a given bunch of pages to a given category by adding [[category:new]] at 
# 	the end
#
# sub cat_rename
# 	moves a category and replaces all [[category:old]] by [[category:new]]
#
# sub check_external_link
# 	given two successive parts $a1, $a2 of a string $a this function checks, 
# 	whether second part of string $a2 is an url with http status 200. the first 
# 	part of the string $a1 is used to detect the end of url in part two $a2.
#
# sub cleanup_wiki_page
# 	standard cleaning up
#
# sub convert_ns
# 	converts namespace id to its name or vice versa
#
# sub _db_init
# 	init db connection
#
# sub _db_query
# 	send query to db
#
# sub _db_simplequery
# 	wrapper for simple db queries
#
# sub db_cat_of_dead_monitoring
# 	use db to monitor added categories of dead people in an endless loop
#
# sub db_extlinks_monitoring
# 	use db to monitor external links in an endless loop
#
# sub db_fetch_externallinks
# 	use db to get external links for a given range of el_ids of el-table
#
# sub db_fetch_pages_of_categories_of_the_dead
# 	use db to get pages that are contained in a category concerning dead people
#
# sub db_fetch_recentchanges
# 	use db to get recent changes for a given time period
#
# sub db_fetch_sbl_log
# 	use db to get all entries of the sbl log of blocked edits; optional 
# 	(but recommended) given a regexp for a searched url
#
# sub db_rc_monitoring
# 	use db to monitor recent changes in an endless loop
#
# sub db_table2array_of_hash
# 	converts a db_table (fetchall_arrayref) to a array where each element is a 
# 	hashref with {'rowname0'=>'content', 'rowname1'=>'content', ...}
#
# sub delete_marked_pages
# 	delete pages that are in a special category
#
# sub delete_wiki_page
# 	deletes a wiki page
#
# sub download_css
# 	download a css file (not implemented yet: and all files loaded inside css)
#
# sub download_files
# 	downloads files from the wiki
#
# sub download_pages_by_prefix
# 	downloads all pages with the same prefix
#
# sub get_abuse_filter_info
# 	return some info on abuse filter rules
#
# sub get_all_pages
# 	return all pages that match a given pattern
#
# sub get_diff
# 	a more detailed version of MediaWiki::Bot::diff
#
# sub get_extlinks_via_db
#		get ref to an array of hashes, where each hash represents an external link and 
#		one page where it's used
#
# sub get_page_info
# 	get information about a given page (or given pages)
#
# sub get_pages
#   get page titles by search or explicitely
#
# sub get_pages_content
#   get content of wikipages
#
# sub get_pages_by_prefix
# 	get all pages and their latest content by prefix
#
# sub get_pages_of_cat_of_dead_via_db
#		get ref to an array of hashes, where each hash represents a category and 
#		one page where it's used
#
# sub get_rc_via_db
#		get ref to an array of hashes, where each hash represents a recent change
#
# sub get_namespace_id
# 	return namespace id of a given page
#
# sub get_sbl_entries
# 	get all sbl entries of a given wiki
#
# sub get_tables_from_wikitext
# 	tries to return all tables from a given wikitext. (this will fail on tables in 
# 	tables!)
#
# sub get_date_iso
# 	get date as formatted string "%Y-%m-%d" from given (or current) unix timestamp
#
# sub get_time_iso
# 	get date and time as formatted string "%Y-%m-%d%H:%M:%S" from given (or current)
# 	unix timestamp
#
# sub get_time_iso_
# 	get date and time as formatted string "%Y-%m-%d %H:%M:%S" from given 
# 	(or current) unix timestamp
#
# sub get_user_contribs
# 	get list of pages edited by user
#
# sub get_wikitable_ascii_col_widths
# 	get col widths defined by ascii art
#
# sub handle_cat_dead
# 	given a bunch of categories of dead people, this function updates a maintenance 
# 	list
#
# sub handle_extlinks
# 	given a bunch of external links, this function calls functions like 
# 	archive_ext_links
#
# sub handle_rc_pages
# 	major function for handling of recent changes of short-term and mid-term
#
# sub _handle_api_error
# 	just a centralized short version of mw api error handling
#
# sub is_allowed
# 	checks whether bot-template is placed on a page and forbids editing
#
# sub link_replacement
# 	replaces links in wiki pages
#
# sub login
# 	logs a bot in to some wiki
#
# sub newest_post_info
# 	\return hash ref with thread, author and date of newest post
# 	\param[in] $wikitext content of a talk page
#
# sub notifier
# 	notify on user or pre-defined wiki pages, if particular edits occur
#
# sub parse_wikitext
# 	use mediawiki-api of a wiki to parse wikitext
#
# sub parse_page
# 	use mediawiki-api of a wiki to get a parsed wikipage
#
# sub parse_irc_rc
# 	parse recent changes told by bot via irc. return ref to hash with content.
#
# sub php_code_unused
# 	searches for all links in a given piece of wikitext
#
# sub post_process_html
# 	clean up html that was generated by parsing wikitext
#
# sub rebuild_table
# 	builds a table and pastes it to a section (overwriting the section)
#
# sub save_wiki_page
# 	\brief saves a wiki page
# 	\param[in] $bot a logged in MediaWiki::Bot
# 	\param[in] $page_utf8 page name in utf8
# 	\param[in] $summary an edit summary (without 'Bot: ') that will be used when 
# 		saving
# 	\param[in] $text a ref to the new wikitext
# 	\param[in] $orig_text a ref to the old wikitext
# 	\param[in] $time_stack a ref to an array that is used for low frequency editing
# 	\param[in] $user_answer a ref to a string containing '' (nothing) or 'y' to 
# 		save the last user's answer
#
# sub search_sbl_blocked_log
# 	search the list of blocked edits (blocked by sbl) for a given url-regexp
#
# sub search_sbl_attempts
# 	search list(s) of blocked edits (blocked by sbl)
#
# sub table2wikitext
# 	converts a special table hash ref to wikitext
#
# sub table_body2array
# 	convert wiki table body to perl ref to 2d array
#
# sub test_and_replace
# 	tests whether a regexp matches and replaces it. give some verbose output.
# 	checks urls, breaks if http status != 200
# 	returns number of replacements
#
# sub text_replacement
# 	replaces text in wiki pages
#
# sub time_management
# 	forces that only x edits per minute are done 
#
# sub title2filename
# 	converts a page title to a filename
#
# sub title2url
# 	convert article name to url
#
# sub title2url_part
# 	convert article name to part of url with article name
# 	'w t f' => 'w_t_f'
#
# sub update_edit_filter_index
# 	update page with index of present threads about edit filters
#
# sub update_maintenance_lists_build_table
# 	build a table for update_maintenance_lists
#
# sub update_maintenance_lists
# 	update some maintenance lists on given edit
#
# sub upload_file
# 	uploads a file to the wiki
#
# sub url2title
# 	convert url to article name
#
# sub wikitableHeader2array
# 	convert wiki table header to perl ref to 1d array
#
# sub wikitable2array
# 	convert wiki table to perl ref to hash, containing a header and a body 
# 	containing a 2d array
#
# CamelBot: other functions
# ---------------
# sub decode_html_entities
# 	decode &gt;, &lt;, and &amp; in-place
#
# sub get_http_content
# 	return content of a given url
#
# sub get_http_status
# 	return http status code of a given url
#
# sub msg
# 	print message to stdout
#
# sub num_unique_elem
# 	return number of elements, without counting double entries
#
# sub read_file_binary
# 	read a file binary
#
# sub show_diff
# 	shows a diff via Tk
#
# sub write_csv
# 	write a 2d-array (= ref to array of refs to arrays) to a csv file

# global vars
$main::VERSION = '1.54.0'; # 2017-05-08
# avoid 'wide character' warning
binmode STDOUT, ":encoding(UTF-8)";

# Carp::Always
sub _die {
	die @_ if ref($_[0]);
	if($_[-1] =~ /\n$/s){ # $_ is a read-only value
		my $arg = pop @_;
		$arg =~ s/.*\K at .*? line .*?\n$//s;
		push @_, $arg;
	}
	unshift @_, strftime("%Y-%m-%d %H:%M:%S ", gmtime());
	die &Carp::longmess;
}

sub _warn {
	if($_[-1] =~ /\n$/s){ # $_ is a read-only value
		my $arg = pop @_;
		$arg =~ s/.*\K at .*? line .*?\n$//s;
		push @_, $arg;
	}
	unshift @_, strftime("%Y-%m-%d %H:%M:%S ", gmtime());
	warn &Carp::longmess;
}

$SIG{__DIE__} = \&_die;
$SIG{__WARN__} = \&_warn;

sub syntaxCheck{
	my %params = ( # default cli params
		'archive-ext-links'  => undef, # save external links from id till id, using db
		'cat-add'            => 0,     # add pages to a category
		'cat-change'         => 0,     # cat change
		'cat-dead'           => 0,     # cat of the dead
		'clean-up'           => undef, # clean-up
		'db-access'          => 'replica.my.cnf', # filename with database access data 
		'diff-from'          => undef, # page or revision id to get a diff from
		'diff-to'            => undef, # version to diff to
		'delete'             => 0,     # delete pages
		'download-by-prefix' => undef, # download pages by prefix
		'file'               => undef, # a file for text-replacement
		'get-page'           => undef, # get a page
		'http-status'        => undef, # check reachability of url
		'link-replacement'   => 0,     # link replacing
		'minor'              => 1,     # mark as minor
		'parse'              => undef, # wikitext-file to parse
		'search-pattern'     => undef, # text replacing regexp pattern
		'rc-monitoring'      => undef, # start rc monitoring via irc or db
		'use-eval'           => 0,     # use eval on text-replacement
		'save-as-html'       => undef, # download pages and save as html pages
		'search'             => undef, # search via api
		'search-sbl-attempts' => undef, # search the sbl log of blocked edits
		'section'            => undef, # section in wikipage
		'text-replacement'   => undef, # text replacing
		'dest'               => undef, # destination filename
		'update-editfilter-index' => 0,# update edit filter index in w:de
		'upload'             => 0,     # upload a file
		'usercontribs'       => 0,     # fetch user contributions
		'username'           => $ENV{'USER'}, # user name
		'source'             => undef, # source filename
		'summary'            => undef, # summary/description
		'wikipage'           => undef, # wikipage (single page)
		'wikipages'          => undef, # wikipages (regexp)
		'test'               => 0,     # show result only (without renaming)
		'verbose'            => 1,     # trace; grade of verbosity
		'version'            => 0,     # diplay version and exit
	);
	pod2usage(-exitval => 2) if @ARGV == 0;
	GetOptions(\%params,
		'archive-ext-links=s',
		'cat-add',
		'cat-change|c',
		'cat-dead',
		'clean-up',
		'db-access=s',
		'diff-from=s',
		'diff-to=s',
		'delete',
		'download-by-prefix=s',
		'file=s',
		'get-page|g',
		'http-status=s',
		'irc' => sub { $params{'rc-monitoring'} = 'irc';},
		'link-replacement|l',
		'minor!',
		'parse=s',
		'rc-monitoring=s',
		'use-eval',
		'save-as-html=s',
		'search=s',
		'search-pattern=s',
		'search-sbl-attempts=s',
		'section=s',
		'text-replacement',
		'dest|udest=s',
		'update-editfilter-index',
		'upload',
		'usercontribs|usercontributions',
		'username=s',
		'source|usource=s',
		'summary|usummary=s',
		'wikipage=s',
		'wikipages=s',
		'test|t',
		'silent|quiet|q' => sub { $params{'verbose'} = 0;},
		'very-verbose' => sub { $params{'verbose'} = 2;},
		'verbose|v:+',
		# auto_version will not auto make use of 'V'
		'version|V' => sub { Getopt::Long::VersionMessage();},
		# auto_help will not auto make use of 'h'
		'help|?|h' => sub { Getopt::Long::HelpMessage(
				-verbose  => 99,
				-sections => 'NAME|SYNOPSIS|EXAMPLES'
			);
		},
		'man' => sub { pod2usage(-exitval => 0, -verbose => 2);},
	) or pod2usage(-exitval => 2);
	$params{'verbose'} = 1 unless exists $params{'verbose'};
	my @additional_params = (0, 0); # number of additional params (min, max);
	if(@ARGV < $additional_params[0] or 
		($additional_params[1] != -1 and @ARGV > $additional_params[1])
	){
		if($additional_params[0] == $additional_params[1]){
			print "error: number of arguments must be exactly $additional_params[0]," . 
				" but is " . (0 + @ARGV) . ".\n";
		}else{
			print "error: number of arguments must be at least $additional_params[0]" .
				" and at most " . 
				($additional_params[1] == -1 ? 'inf' : $additional_params[1]) .
				", but is " . (0 + @ARGV) . ".\n";
		}
		print "please use -h for help\n";
		exit 2;
	}
	return \%params;
}

{
	package CamelBot;
	use if $_running_on_labs, lib => qw(/data/project/camelbot/perl/lib);
	use Data::Dumper;           # for debugging purposes
	use DBI;                    # db connection
	use File::Slurp qw(write_file); # read/write files
	use IPC::Run;               # used for tidy
	use LWP::UserAgent;         # fast, small web browser
	use MediaWiki::Bot;         # bot for mediawiki, uses mediawiki-api-interface
	use POSIX qw/strftime/;     # format timestamp

	my %_loaded_mod;
	$_loaded_mod{'Term::ReadKey'} = eval{
		require Term::ReadKey;    # used for user input
		Term::ReadKey->import();
		1;
	};
	$_loaded_mod{'Text::Diff'} = eval{
		require Text::Diff;       # diff
		Text::Diff->import();
		1;
	};
	$_loaded_mod{'Tk::DiffText'} = eval{
		require Tk::DiffText;     # graphical diff tool
		Tk::DiffText->import();
		1;
	};
	use Time::Local;            # timegm
	$_loaded_mod{'URI::Escape'} = eval{
		require URI::Escape;      # uri_escape, uri_unescape
		URI::Escape->import();
		1;
	};

	sub new{
		my $class = shift;
		my $params = shift;
		my $self = bless {
			# 0 = don't ask user; 1  = ask user for every action
			'ask_user'       => $params->{'ask_user'} // 0, 
			'db'             => {'file' => $params->{'db_access'}}, 
			'host'           => $params->{'host'} // 'de.wikipedia.org', 
			'rel_url_path'   => $params->{'rel_url_path'} // 'w', 
			# maximum number of edits per minute (-1 = inf)
			'max_edits_per_min' => $params->{'max_edits_per_min'} // 5, 
			'minor'          => $params->{'minor'} // 1, # 0 = non-minor; 1 = minor
			'mw_api'         => undef, # instance of MediaWiki::API
			'mw_apiurl'      => undef, # url of mw_api
			'mw_bot'         => undef, # instance of MediaWiki::Bot
			#'mw_password'    => $mw_password, # not needed permanently
			'mw_username'    => $params->{'mw_username'},
			're_editfilter'  => qr/^Wikipedia:Bearbeitungsfilter\/
					(?:\d+|Antr\xe4ge$|Fehlerkennungen$|(?:Regelp|P)r\xfcfung$)/x,
			're_url_class'   => qr/[^\]\[<>"'\x00-\x20\x7F)]|\)(?!\s)/,
			're_url_rear'    => qr/ # no trailing points
				(?:[^\]\[<>"'\x00-\x20\x7F)]|\)(?!\s))*
				(?:[^\]\[<>"'\x00-\x20\x7F).]|\)(?!\s))/x,
			'maintenance'    => {}, # will be set in separate function
			# 0 = don't show diff; 1 = show a diff, 2 = gui
			'showdiff'       => $params->{'showdiff'} // 0, 
			# 0 = do real edits; 1 = simulate only
			'simulation'     => $params->{'simulation'} // 0, 
			'time_stack'     => [], # needed for edit/min constraint
			# user answer initially is "no"
			'user_answer'    => $params->{'user_answer'} // '', 
			'verbosity'      => $params->{'verbosity'} // 1,
			'wm_proj_type'   => undef, # e.g. wikipedia
			'wm_proj'        => undef, # e.g. dewiki
			'wm_lang'        => undef, # e.g. de
			'cliparams'      => $params->{'cliparams'},
		}, $class;
		$self->refresh_maintenance_params();
		$self->login($params->{'mw_password'}); # may change mw_username!
		return $self;
	}

	sub DESTROY{
		my $self = shift;
		$self->{'db'}->{'handle'}->disconnect if defined $self->{'db'}->{'handle'};
	}

	sub api_cont{
		my $self         = shift;
		my $query        = shift; # may be modified in this function
		my $mw_options   = shift;
		my $die_on_error = shift // 1;
		my $finished = 0;
		$query->{'continue'} = '' unless exists $query->{'continue'};
		my $api_result = $self->api_simple($query, $mw_options, $die_on_error);
		if(defined $api_result->{'continue'}){
			while(my ($k, $v) = each(%{$api_result->{'continue'}})){
				$query->{$k} = $v;
			}
		}else{
			$finished = 1;
		}
		#if(defined $api_result->{'batchcomplete'}){
		#	print Dumper $api_result->{'batchcomplete'};
		#}
		return ($api_result, $finished);
	}

	sub api_simple{
		my $self         = shift;
		my $query        = shift;
		my $mw_options   = shift;
		my $die_on_error = shift // 1;
		$self->msg(3, Dumper($query));
		my $api_result = $self->{'mw_api'}->api($query, $mw_options);
		unless(defined $api_result){
			my $caller_increment = 1;
			$self->_handle_api_error($caller_increment);
			die if $die_on_error;
		}
		$self->msg(3, Dumper($api_result)) if defined $api_result;
		return $api_result;
	}

	sub archive_ext_links{
		my $self = shift;
		my $urls = shift;
		my $wp_page = shift // '';
		my $wp_page_url = $self->title2url($wp_page);
		# TODO: 1. fill in params; 2. test
		my $email_address = '';
		map {
			my $encoded_url = $self->uri_escaper($_);
			my $url = 'https://web.archive.org/save/' . $_;
			#$self->msg(2, $url);
			$self->check_external_link(\'', \$url);
			#$url = "http://www.webcitation.org/archive?email=$email_address&source=$wp_page_url&url=$encoded_url";
			#$self->msg(2, $url);
			#$self->check_external_link(\'', \$url);
		} grep {!/^https?:\/\/(?:[a-z]+\.)?(?:archive|webcitation)\.org/} @$urls;
		return 1;
	}

	sub cat_add{
		my $self  = shift;
		my $pages = shift;
		my $cat   = shift;
		utf8::decode($cat);
		my $summary = '+cat';
		my $cat_first = lc substr($cat, 0, 1);
		my $cat_tail = substr($cat, 1);
		$self->msg(1, "start cat adding...");
		for my $page (@$pages){
			$self->msg(1, "page = $page");
			my $page_utf8 = $page;
			utf8::decode($page_utf8);
			my $text = $self->{'mw_bot'}->get_text($page_utf8);
			next unless $self->is_allowed(\$text, $page);
			# page is already in cat
			next if $text =~ /\[\[[Cc]ategory:(?i:\Q$cat_first\E)\Q$cat_tail\E\]\]/;
			$self->msg(1, "add page to cat.");
			# add page to cat
			my $text_bak = $text;
			if($text =~ /\[\[[cC]ategory:/){
				$text =~ s/.*\K(?=\[\[[cC]ategory:)/[[category:$cat]]\n/g;
			}else{
				$text =~ s/\s*$/\n\n[[category:$cat]]/g;
			}
			$self->time_management();
			$self->save_wiki_page($page_utf8, $summary, \$text, \$text_bak);
		}
		return 1;
	}

	sub cat_rename{
		my $self = shift;
		my $old_cat = shift;
		my $new_cat = shift;
		utf8::decode($old_cat);
		utf8::decode($new_cat);
		my $summary = 'cat change';
		my $old_cat_first = lc substr($old_cat, 0, 1);
		my $old_cat_tail = substr($old_cat, 1);
		$self->msg(1, "start cat renaming...");
		# get pages in (old) category
		my @pages = $self->{'mw_bot'}->get_pages_in_category("Category:$old_cat");
		$self->msg(1, 'found '.@pages." pages:");
		# for all pages in category change category
		for my $page (@pages){
			$self->msg(1, "page = $page");
			my $text = $self->{'mw_bot'}->get_text($page);
			next unless $self->is_allowed(\$text, $page);
			my $text_bak = $text;
			$text =~ s/
				\[\[[cC]ategory:((?i:\Q$old_cat_first\E)\Q$old_cat_tail\E)\]\]
				/[[category:$new_cat]]/gx;
			if($text ne $text_bak){
				$self->msg(1, "changing cat: $1 -> $new_cat");
				$self->time_management();
				$self->save_wiki_page($page, $summary, \$text, \$text_bak);
			}
		}
	}

	sub check_external_link{
		my $self      = shift;
		my $pretext   = shift;
		my $urlstring = shift;
		my $ok = 1;
		if($$urlstring =~ /^https?:\/\//){
			my $url = $$urlstring; #for vim: { {
			if($$pretext =~ /url\s*=\s*$/ and $$urlstring =~ /^([^|}]+)[|}]/){
				$url = $1;
			}
			my $response_code = $self->get_http_status($url);
			$self->msg(1, "  status of '$url': $response_code");
			$ok = 0 if $response_code != 200;
		}
		return $ok;
	}

	sub cleanup_wiki_page{
		my $self     = shift;
		my $text     = shift; # ref to string
		my $page     = shift;
		my $changes = {};
		my $use_eval_in_repl = 1;
		if(defined $text and ref $text eq 'SCALAR' and defined $$text){
			# == link fixes ==
			# === double protocol ===
			$changes->{'double protocol'} = $self->test_and_replace($text, 
				qr/(?:(?:http:|(?<=\[))\/\/)+(https?):?\/\/($self->{'re_url_rear'})/m, 
				'"$1:\/\/$2"', $use_eval_in_repl);
			# === missing/too much brackets ===
			# <ref>[http://www.example.org</ref>
			#  was '"$1\x5b$2]$3"' until 2015-07-18, but deleting of such brackets seems more convenient
			$changes->{'wrong left bracket'} = $self->test_and_replace($text, 
				qr/(<ref>)\s*\[\s*(https?:\/\/$self->{'re_url_rear'})[\s.]*+(<\/ref>)/m, 
				'"$1$2$3"', $use_eval_in_repl); 
			# <ref>http://www.example.org]</ref>
			$changes->{'wrong right bracket'} = $self->test_and_replace($text, 
				qr/(<ref>)\s*(https?:\/\/$self->{'re_url_rear'})\s*+]\s*(<\/ref>)/m, 
				'"$1$2$3"', $use_eval_in_repl); # see above
			# === checking of spelling/typography ===
			$changes->{'abk..'} = $self->test_and_replace($text, 
				qr/ (?:Chr|e\.(?: |&nbsp;)?V|ff|Jh|usw)\.\K\. /m, 
				' ', !$use_eval_in_repl);
			$changes->{'abk.. in links'} = $self->test_and_replace($text, 
				qr/\.\]\]\K\./m, 
				'', !$use_eval_in_repl);
			$changes->{'%ig'} = $self->test_and_replace($text, 
				qr/[0-9]\K(?: |&nbsp;)?%-ig/m, 
				'%ig', !$use_eval_in_repl);
			$changes->{'komma vor "sondern"'} = $self->test_and_replace($text, 
				qr/mehr sondern/m, 
				'mehr, sondern', !$use_eval_in_repl);
			$changes->{'°C ([[WP:SVZ]])'} = $self->test_and_replace($text, 
				qr/°\K(?: |&nbsp;|&thinsp;)C/m, 
				'C', !$use_eval_in_repl);
			#$changes->{"whitespace nach gestorben-symbol"} = $self->test_and_replace($text, 
			#	qr/\x{2020}\K(?! |&nbsp;)/m, 
			#	'" "');
			$changes->{"whitespace vor ref-tag"} = $self->test_and_replace($text, 
				qr/(?<![|=°])(?<!<\/sup>)(?<!\}\})(?: |&(?:nbsp|thinsp);)(?=<ref\b)/m, 
				'', !$use_eval_in_repl); # \n will lead to false positives # vim: {{
			# + }} because of false positive: //de.wikipedia.org/w/index.php?title=Griechische_Staatsschuldenkrise&diff=143759791&oldid=143757358
			# === specific domains ===
			# see [https://de.wikipedia.org/w/index.php?title=MediaWiki_Diskussion:
			# Spam-blacklist&oldid=136131974#
			# www.denkmalschutz.de_.22.26cHash.3D.5B0-9a-f.5D.2B.22_entfernen]
			$changes->{'lf denkmalschutz.de'} = $self->test_and_replace($text, 
				qr/http:\/\/www.denkmalschutz.de\/$self->{'re_url_class'}+?\K
				&cHash=[0-9a-f]+/mx, 
				'', !$use_eval_in_repl);
			# == optional fixes==
			if(scalar(grep {$_ > 0} values %$changes) > 0){
				# === unicode character 0x200e before categories ===
				$changes->{'u200e'} = $self->test_and_replace($text, 
					qr/\x{200e}(?=\]\]|\}\}|\|)/m, 
					'', !$use_eval_in_repl);
				# === superfluous space in cats ===
				$changes->{'superfluous space in cats'} = $self->test_and_replace($text, 
					qr/\[\[Kategorie:\K +/m, 
					'', !$use_eval_in_repl);
				# === superfluous declaration as tamplate via keyword ===
				$changes->{'superfluous "template"'} = $self->test_and_replace($text, 
					qr/\{\{\K[Vv]orlage:/m, 
					'', !$use_eval_in_repl);
				# === wrong html br-tag ===
				$changes->{'wrong br-tag'} = $self->test_and_replace($text, 
					qr/<(?:[\/\\]br\s*|br[.\\])>/m, 
					'<br />', !$use_eval_in_repl);
				# === superfluous br-tag ===
				$changes->{'superfluous br-tag'} = $self->test_and_replace($text, 
					qr/^\*.*\K<(?:[\/\\]br\s*|br ?[.\\\/])> *$/m, 
					'', !$use_eval_in_repl);
				# === wrong white space before '%' ===
				$changes->{'white space before %; see [[WP:SVZ]]'} = $self->test_and_replace($text, 
					qr/&(?:nbsp|thinsp|#x202f|#8239);%/m, 
					' %', !$use_eval_in_repl);
				# === technically superfluous external links ===
				if($page ne 'Hypertext Transfer Protocol Secure' and 
					 $page ne 'Hypertext_Transfer_Protocol_Secure'){
					$changes->{'lf pseudo-external links'} = $self->test_and_replace($text, 
						qr/(?<!<ref>)\[{1,2}
							(https?:\/\/$self->{'wm_lang'}\.$self->{'wm_proj_type'}\.org\/wiki\/
								[^\]|\x20\x23?]++
								(?:\x23(?!mediaviewer)[^\]|\x20?]+)?)\s
							([^\]]+)
						\]{1,2}/mx, 
						'"[[".$self->url2title($1).($self->url2title($1) eq $2 ? "" : "|$2")."]]"', 
						$use_eval_in_repl
					);
				}
				# === redundant internal link description ===
				$changes->{'redundant link description'} = $self->test_and_replace($text, 
					qr/\[\[\s*+([^|\]\[]+\S)\s*\|\s*+\1\s*+\]\]/m, 
					'"[[$1]]"', $use_eval_in_repl);
				# === zeros ===
				$changes->{'zeros'} = $self->test_and_replace($text, 
					qr/((?:\{\{0\}\}){2,})/m, 
					'"{{0|" . ("0"x(length($1)/5)) . "}}"', $use_eval_in_repl);
			}
			# === tests ===
			#$self->test_and_replace($text, 
			#		qr/seth sagt: hallo camelbot!/m, 
			#		'camelbot sagt: hallo seth!', !$use_eval_in_repl);
		}else{
			$self->msg(0, '$$text is not defined', 'warning');
			return $changes;
		}
		my $summary = (keys %$changes > 0) ? 'kleinere korrekturen (' .
			(join ', ', grep {$changes->{$_} > 0} keys %$changes) .
			'), siehe [[user:CamelBot]].' : '';
		return ($changes, $summary);
	}

	sub convert_ns{
		my $self = shift;
		my $ns = shift;
		my $converted;
		unless(defined $self->{'namespaces'}){
			# get namespaces
			$self->{'namespaces'} = {$self->{'mw_bot'}->get_namespace_names()};
			my $res = $self->api_simple({
				'action' => 'query',
				'meta'   => 'siteinfo',
				'siprop' => 'namespacealiases',
			});
			$self->{'namespacealiases'} = {
				map {
					($_->{'*'} => $_->{'id'});
				} @{$res->{'query'}->{'namespacealiases'}}
			};
		}
		if($ns =~ /^[0-9]+$/){
			$converted = $self->{'namespaces'}->{$ns};
		}else{
			my %rev_ns = reverse(%{$self->{'namespaces'}});
			$converted = $rev_ns{$ns} // $self->{'namespacealiases'}->{$ns};
		}
		$converted = 0 unless defined $converted;
		return $converted;
	}

	sub _db_init{
		my $self    = shift;
		my $proj    = shift // $self->{'wm_proj'};
		unless(defined $proj){
			$self->msg(0, '$proj is not defined', 'error');
			return 0;
		}
		if(defined $self->{'db'}->{'handle'} and 
			defined $self->{'db'}->{'proj'} and 
			$self->{'db'}->{'proj'} eq $proj){
			# don't re-read db_file or reconnect, if still connected
			return 2;
		}else{
			# check db conn file
			unless(-e $self->{'db'}->{'file'}){
				$self->msg(1, " db conn file: '$self->{'db'}->{'file'}' does not seem to exist.", 'warning');
				if(substr($self->{'db'}->{'file'}, 0, 1) ne '/'){
					$self->{'db'}->{'file'} = $ENV{'HOME'} . '/' . $self->{'db'}->{'file'};
					if(-e $self->{'db'}->{'file'}){
						$self->msg(1, " using db conn file: '$self->{'db'}->{'file'}'.");
					}else{
						# warning will appear soon
						# $self->msg(0, " db conn file does not seem to exist.", 'error');
					}
				}
			}
			# db connection parameters
			$self->{'db'}->{'host'} = $proj . '.labsdb';
			#$self->{'db'}->{'host'} = 's1.labsdb';
			$self->{'db'}->{'name'} = $proj . '_p';
			$self->{'db'}->{'port'} = '3306';
			$self->{'db'}->{'user'} = '';
			$self->{'db'}->{'pw'} = '';
			my $opened = open(my $INFILE, '<', $self->{'db'}->{'file'});
			unless($opened){
				$self->msg(0, "could not read db conn file: $!", 'error');
				return 0;
			}
			while(my $line = <$INFILE>){
				$self->{'db'}->{'user'} = $1 if $line =~ /^\s*user='(.*)'/;
				$self->{'db'}->{'pw'} = $1 if $line =~ /^\s*password='(.*)'/;
				$self->{'db'}->{'port'} = $1 if $line =~ /^\s*port='?(.*?)'?\s*(?:#.*)?\n/;
				$self->{'db'}->{'host'} = $1 if $line =~ /^\s*host='?(.*?)'?\s*(?:#.*)?\n/;
			}
			close($INFILE);
			$self->msg(2, 'connect to db: ' . Dumper($self->{'db'}));
			$self->{'db'}->{'handle'} = DBI->connect( # db connect
				'DBI:mysql:' . 
					'database=' . $self->{'db'}->{'name'} . 
					';host=' . $self->{'db'}->{'host'} . 
					';port=' . $self->{'db'}->{'port'} . 
					';mysql_client_found_rows=0',
				$self->{'db'}->{'user'},
				$self->{'db'}->{'pw'},
				{	'RaiseError' => 1,
					'AutoCommit' => 1,
					'mysql_enable_utf8' => 1
				}
			) or $self->msg(0, 'cannot connect to db: '.$DBI::errstr);
			$self->{'db'}->{'proj'} = $proj;
		}
		return 1;
	}

	sub _db_query{
		my $self  = shift;
		my $query = shift;
		my $return_header = shift // 1;
		unless(defined $self->{'db'}->{'handle'}){
			$self->msg(0, 'no databae handle', 'error');
			return 0;
		}
		my $sth = $self->{'db'}->{'handle'}->prepare($query) 
			or $self->msg(0, $self->{'db'}->{'handle'}->errstr.' query = \''.$query.'\'');
		return 0 unless $sth;
		$sth->execute;
		my $names = $sth->{'NAME'}; # or NAME_lc if needed
		my $rows_ref = $sth->fetchall_arrayref();
		#$sth->finish;
		unshift @$rows_ref, $names if $return_header;
		return $rows_ref;
	}

	sub _db_simplequery{
		my $self          = shift;
		my $db_query      = shift;
		my $return_header = shift // 1;
		my $proj          = shift // $self->{'wm_proj'};
		if(defined $proj){
			$self->msg(3, "proj = $proj");
		}else{
			$self->msg(0, '$proj is not defined.', 'error');
		}
		$self->_db_init($proj) or return undef;
		#print Dumper $db_query;
		if(ref(\$db_query) ne 'SCALAR'){
			#$db_query->[0] =~ s/\s+/ /g;
			$db_query->[0] =~ s/^\s+|\s+\z//g;
			$db_query = sprintf $db_query->[0], map {
				(defined $_ and /^-?(?:\d*\.\d+|\d+\.?)\z/) ? $_ :
				$self->{'db'}->{'handle'}->quote($_); #(not defined $_) ? 'NULL'  (will be done by quote, so ensure, using '%s' even on numbers)
			} @$db_query[1..@$db_query-1];
		}
		$self->msg(2, $db_query);
		my $db_table = $self->_db_query($db_query, $return_header);
		return $db_table;
	}

	#sub db_fetch_recentchanges_detailed{
	#	my $self = shift;
	#	my $last_seconds = shift // 60*60*12; # default = last 12 hours
	#	my $timestamp = $self->get_time_iso($time()-$last_seconds);
	#	# check whether token exists:
	#	my $db_table = db_simplequery([
	#			'SELECT `rc_timestamp`, `rc_user_text`, `rc_namespace`, `rc_title`, `rc_comment`, `rc_minor`, `rc_bot`, `rc_new`, `rc_cur_id`, `rc_this_oldid`, `rc_last_oldid`, `rc_type`, `rc_source`, `rc_patrolled`, `rc_old_len`, `rc_new_len`, `rc_deleted` FROM `recentchanges` WHERE `rc_timestamp`>%s', $timestamp], 
	#		!$_return_header);
	#	if(defined $db_table->[0]){
	#		my $email = lc $db_table->[0][0];
	#		my $user_id = $db_table->[0][1];
	#	}
	#}

	sub db_fetch_pages_of_categories_of_the_dead{
		my $self    = shift;
		my $since   = shift;
		if(defined $since and 
			$since !~ /^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\z/)
		{
			$self->msg(0, 'param $since is not valid, see source code', 'warning');
			return [];
		}
		my $return_header = 1;
		my $sql_query = '
			SELECT 
				`page`.`page_title` AS `page`, 
				`page`.`page_namespace` AS `namespace`, 
				`cl_to` AS `category`,
				`cl_timestamp`
			FROM `categorylinks` 
			LEFT JOIN `page` 
				ON `page`.`page_id`=`cl_from` 
			WHERE `cl_to` LIKE \'Gestorben_2%%\' 
				AND `page_namespace` = \'0\'
				AND `cl_timestamp` >= ' . (defined $since ? '%s' : 'date_sub(now(), interval 2 day)') . 
			' LIMIT 10000';
		my $db_table = defined $since ? 
		  $self->_db_simplequery([$sql_query, $since], $return_header)
		: $self->_db_simplequery([$sql_query], $return_header);
		for my $row(@$db_table[1..(@$db_table-1)]){
			utf8::decode($row->[0]); # page
		}
		return $self->db_table2array_of_hash($db_table);
	}

	sub db_fetch_externallinks{
		my $self    = shift;
		my $from_id = shift // 1;
		my $to_id   = shift;
		if($from_id !~ /^[0-9]+\z/){
			$self->msg(0, 'param $from_id is not valid, see source code', 'warning');
			return [];
		}elsif(defined $to_id and $to_id !~ /^[0-9]+\z/){
			$self->msg(0, 'param $to_id is not valid, see source code', 'warning');
			return [];
		}
		my $return_header = 1;
		my $db_table;
		my $sql_max_id = defined $to_id ? 'AND `el_id`<=%s' : '';
		$db_table = $self->_db_simplequery(['
				SELECT `el_id`, `el_from`, `el_to`, `p`.`page_title` AS `page`
				FROM `externallinks`
				LEFT JOIN `page` AS `p`
					ON (`p`.`page_id`=`externallinks`.`el_from`)
				WHERE `el_id`>=%s ' . $sql_max_id . '
				ORDER BY `el_id`
				LIMIT 1000
		', $from_id, $to_id], $return_header);
		return $self->db_table2array_of_hash($db_table);
	}

	sub db_fetch_recentchanges{
		my $self = shift;
		my $timestamp_begin = shift; # format yyyymmddhhmmss
		my $timestamp_end   = shift; # format yyyymmddhhmmss
		unless(defined $timestamp_begin){
			# default = last 12 hours
			$timestamp_begin = $self->get_time_iso(time()-60*60*12);
		}
		my $sql_max_ts = defined $timestamp_end ? 'AND `rc_timestamp`<=%s' : '';
		my $return_header = 1;
		my $db_table;
		$db_table = $self->_db_simplequery(['
				SELECT
					`rc_timestamp` AS `timestamp`, 
					`rc_user_text` AS `user`, 
					`rc_namespace` AS `ns_id`, 
					`rc_title` AS `page`, 
					`rc_comment` AS `summary`, ' .
					#`rc_this_oldid`,
					#`rc_last_oldid`,
					'`rc_old_len`,
					`rc_new_len`
				FROM `recentchanges`
				WHERE `rc_timestamp`>=%s
					' . $sql_max_ts . '
					AND (`rc_namespace`=0
						OR `rc_namespace`=4
					)
					AND `rc_log_type` IS NULL
				ORDER BY `rc_timestamp`
		', $timestamp_begin, $timestamp_end], $return_header);
		return $self->db_table2array_of_hash($db_table);
	}

	sub db_fetch_sbl_log{
		my $self   = shift;
		my $proj   = shift // $self->{'wm_proj'};
		my $url_re = shift // '.';
		# mariadb does not know perl's /(?^)/ pattern
		$url_re =~ s/\(\?\K\^//g;
		my $return_header = 1;
		my $db_table;
		my $rows = [
			'log_timestamp',
			'log_namespace',
			'log_title',
			'log_comment',
			'log_params',
			'log_user_text',
		];
		$rows = join ', ', map {$_ = '`'.$_.'`'} @$rows;
		$db_table = $self->_db_simplequery(['
				SELECT '.$rows.'
				FROM `logging`
				WHERE `log_type` = \'spamblacklist\'
					AND log_params regexp %s
				ORDER BY `log_timestamp`
		', $url_re], !$return_header, $proj);
		return $db_table;
	}

	sub db_table2array_of_hash{
		my $self      = shift;
		my $db_table  = shift;
		my $db_header = shift @$db_table;
		my $db_hashtable = [];
		for my $r(@$db_table){
			push @$db_hashtable, {
				map {
					($db_header->[$_] => $r->[$_])
				} 0..@$db_header-1
			};
		}
		return $db_hashtable;
	}

	sub delete_marked_pages{
		my $self = shift;
		my $summary = 'deleting marked page';
		# wiki dependent
		my $delete_cat = 'Category:Löschen';
		utf8::decode($delete_cat);
		my @pages = grep {!/Template:L.schen/}
			$self->{'mw_bot'}->get_pages_in_category($delete_cat);
		$self->msg(1, 'found '.(@pages+0).' pages.');
		$self->msg(2, 'adding manually set pages');
		push @pages, ();
		for my $page(@pages){
			if($self->{'verbosity'} >= 3 or $self->{'simulation'}){
				$self->msg(1, " $page");
			}
			$self->time_management();
			$self->delete_wiki_page($page, $summary);
		}
	}

	sub delete_wiki_page{
		my $self        = shift;
		my $page        = shift;
		my $summary     = shift;
		if((not defined $self->{'simulation'}) or !$self->{'simulation'}){
			my $user_input;
			if(defined $self->{'ask_user'} and $self->{'ask_user'}){
				print "execute? ('y' = yes, else = no) ";
				chomp($user_input = <STDIN>);
			}
			if((not defined $self->{'ask_user'}) or !$self->{'ask_user'} 
					or $user_input =~ /y(?:es)?/ 
					or ($user_input eq '' and $self->{'user_answer'} eq 'y')){
				$self->{'user_answer'} = 'y';
				$self->msg(1, "  deleting page [[$page]] ...");
				push @{$self->{'time_stack'}}, time;
				$self->{'mw_api'}->edit({
					'action' => 'delete',
					'minor'  => $self->{'minor'},
					'title'  => $page,
					'reason' => "Bot: $summary"
				}) or ($self->_handle_api_error() and die);
			}else{
				$self->{'user_answer'} = '';
			}
		}
	}

	sub get_diff{
		my $self   = shift;
		my $page   = shift;
		my $diffto = shift // 'prev';
		# get diff via api
    my $query = {
			'action'   => 'query',
			'prop'     => 'revisions',
			'rvdiffto' => $diffto,
		};
    if(defined $page->{'title'}){
			$query->{titles}  = $page->{'title'};
			$query->{rvlimit} = 1;
    }elsif(defined $page->{'revid'}){
			$query->{'revids'} = $page->{'revid'};
    }else{
			$self->msg(0, 'wrong param $page', 'error');
			return undef;
		}
		my $diff = $self->api_simple($query) or return undef;
		# extract diff info 
    my @revids = keys %{$diff->{query}->{pages}};
		$diff = $diff->{'query'}->{'pages'}->{$revids[0]}->{'revisions'}->[0];
		my @diff_text = split /\n/, $diff->{'diff'}->{'*'};
		my $diff_struct = [];
		my $lin_num = 0;
		for my $line(@diff_text){ # loop over all diff lines
			next if $line =~ /^(?:<\/?tr>|<!-- .* -->)?\z/; # skip tr-tags
			unless($line =~/^\s*<td [^>]*class="([^"]+)"[^>]*>(.*)<\/td>\z/){
				$self->msg(0, "could not parse $line", 'error');
			}
			my $type = $1;
			my $text = $2;
			next if $1 =~ /^diff-(?:lineno|marker|context)\z/;
			$text =~ s/^<div>|<\/div>\z//g; # truncate
			$text = [
				map {
					$self->decode_html_entities(\$_);
					my $text_type = ($_=~/^<(ins|del)\b[^>]*>(.*)<\/\1>\z/) ? $1 : 'equal';
					{
						'text' => ($text_type eq 'equal' ? $_ : $2),
						'type' => $text_type,
					};
				} split /(<(?:(?:del|ins)\b[^>]*>[^<>]*<\/(?:del|ins))>)/, $text
			];
			if($type !~ /^diff-(?:added|deleted)line\z/){
				$self->msg(0, "unknown type '$type'", 'warning');
			}
			$diff_struct->[$lin_num]->{$type} = $text;
			++$lin_num if $type eq 'diff-addedline';
		}
		#print Dumper $diff_struct;
		#looks like
		#[{
		#	'diff-deletedline' => [
		#		{
		#			'text' => 'some text ',
		#			'type' => 'equal'
		#		},
		#		{
		#			'type' => 'del',
		#			'text' => 'some deleted text '
		#		},
		#		{
		#			'text' => 'some other text',
		#			'type' => 'equal'
		#		}
		#	],
		#	'diff-addedline' => [
		#		{
		#			'text' => 'some text',
		#			'type' => 'equal'
		#		},
		#		{
		#			'text' => 'some better text',
		#			'type' => 'ins'
		#		},
		#		{
		#			'text' => 'some other text',
		#			'type' => 'equal'
		#		}
		#	]
		#}, ... ]
		return {
			'revid'     => $diff->{'revid'},
			'parentid'  => $diff->{'parentid'},
			'minor'     => $diff->{'minor'},
			'user'      => $diff->{'user'},
			'timestamp' => $diff->{'timestamp'},
			'comment'   => $diff->{'comment'},
			'from'      => $diff->{'diff'}->{'from'},
			'to'        => $diff->{'diff'}->{'to'},
			'diff'      => $diff_struct,
		};
	}

	sub download_css{
		my $self       = shift;
		my $cssurl     = shift;
		$cssurl =~s/&amp;/&/g;
		my $css_dir    = shift;
		my $images_dir = shift; # todo: save url{...}
		my $filename = $cssurl;
		$filename =~s/[^a-zA-Z0-9~_-]/_/g;
		$filename = "$css_dir/$filename.css";
		my ($http_status, $css_content) = get_http_content($cssurl);
		if($http_status != 200){
			$self->msg(0, 'download of css failed.', 'error');
		}else{
			write_file($filename, $css_content);
		}
		return $filename;
	}

	sub download_files{
		my $self          = shift;
		my $files         = shift;
		my $no_warn_files = shift;
		my $files_prefix  = shift // '';
		for my $file(@$files){
			$self->msg(2, "getting '$file' from wiki");
			my $img_data = $self->{'mw_bot'}->get_image("File:$file");
			unless(defined $img_data){
				$self->msg(0, "could not read file '$file' from wiki", 'error');
				next;
			}
			$file = $self->title2filename($file);
			if(-e $file){
				unless(grep {$_ eq $file} @$no_warn_files){
					$self->msg(0, "file '$file' exists already", 'warning');
					push @$no_warn_files, $file;
				}
				next;
			};
			push @$no_warn_files, $file;
			$self->msg(2, "writing file '$file'");
			write_file($files_prefix.$file, {binmode => ':raw'}, \$img_data);
		}
		return 1;
	}

	sub download_pages_by_prefix{
		my $self    = shift;
		my $prefix  = shift;
		my @prefix_pages = grep {!$_->{'redirect'} and $_ = $_->{'title'}} 
			$self->{'mw_bot'}->prefixindex($prefix);
		my $success = 1;
		if(@prefix_pages == 0){
			$self->msg(0, "no pages found with prefix '$prefix'", 'warning');
			$success = 0;
		}else{
			my $page_texts = $self->{'mw_bot'}->get_pages_content(\@prefix_pages);
			if($self->{'verbosity'} >= 1){
				map {$self->msg(1, "getting '$_'")} sort keys %$page_texts;
			}
			my $no_warn_files = [];
			my $files_prefix = '';
			while(my ($title, $text) = each %$page_texts){
				$title = $self->title2filename($title).'.wikitext';
				$self->msg(2, "saving '$title'");
				if(-e $title){
					$self->msg(0, "file '$title' exists already", 'error');
					$success = 0;
					last;
				}
				write_file($title, {binmode => ':utf8'}, $text);
				my @images = $text =~ /
					\[\[
						(?:[fF]ile|[iI]mage|[mM]edia):
						(
							(?:
								(?!\]\])
								[^|]
							)*
						)/xg;
				$self->download_files(\@images, $no_warn_files, $files_prefix);
			}
		}
		return $success;
	}

	sub get_abuse_filter_info{
		my $self   = shift;
		# api.php?action=query&list=abusefilters&abfprop=id|description
		# &abfstartid=1&abflimit=500
		# could be extended to 
		# api.php?action=query&list=abusefilters&abfprop=
		# id|description|pattern|actions|hits|comments|lasteditor|lastedittime|status|
		# private&abflimit=500
		my $query = {
			'action'         => 'query',
			'list'           => 'abusefilters', 
			'abflimit'       => '500',
			'abfprop'        => 'id|description',
			'abfstartid'     => '1',
		};
		my $mw_options = {'max' => 1};
		my $finished = 0;
		my @results;
		while(!$finished){
			(my $pages, $finished) = $self->api_cont($query, $mw_options);
			map {$results[$_->{'id'}] = $_->{'description'}} @{$pages->{'query'}->{'abusefilters'}};
		}
		return \@results;
	}

	sub get_all_pages{
		my $self    = shift;
		my $pattern = shift;
		my $query = {
			'action'    => 'query',
			'list'      => 'allpages', 
			'aplimit'   => '500', 
		};
		my $mw_options = {'max' => 1};
		my %list_of_articles;
		my $pages = [];
		my $die_on_error = 1; # 0 = dont die; 1 = die
		my $finished = 0;
		while(!$finished){
			(my $tmp, $finished) = $self->api_cont($query, $mw_options, !$die_on_error);
			my $allpages = $tmp->{'query'}->{'allpages'};
			if(defined $allpages){
				for my $ap(@$allpages){
					next if exists $list_of_articles{$ap->{'title'}} or $ap->{'title'} !~ /$pattern/;
					$list_of_articles{$ap->{'title'}} = 1;
					push @$pages, $ap->{'title'};
				}
			}
		}
		return $pages;
	}

	sub decode_html_entities{
		my $self = shift;
		my $text = shift;
		$$text =~ s/&gt;/>/g;
		$$text =~ s/&lt;/</g;
		$$text =~ s/&amp;/&/g;
		return 1;
	}

	sub get_http_content{
		my $self = shift;
		my $url = shift;
		my $lwp = LWP::UserAgent->new(
			'keep_alive' => 1, 
			'timeout' => 10, 
			'agent' => 'Mozilla/5.0'
		);
		my $content;
		my $response = $lwp->get($url);
		if($response->is_success){
			$content = $response->decoded_content;
		}else{
			$self->msg(0, $response->status_line, 'error');
		}
		return ($response->code, $content);
	}

	sub get_http_status{
		my $self = shift;
		my $url = shift;
		my $lwp = LWP::UserAgent->new(
			'keep_alive' => 1, 
			'timeout' => 10, 
			'agent' => 'Mozilla/5.0'
		);
		my $response = $lwp->head($url);
		return $response->code;
	}

	sub get_namespace_id{
		my $self = shift;
		my $page = shift;
		my $ns_id = 0;
		if($page =~ /^([^:]+):/){
			$ns_id = $self->convert_ns($1);
		}
		return $ns_id;
	}

	sub get_page_info{
		my $self   = shift;
		my $titles = shift;
		$titles = join '|', @$titles if ref $titles eq 'ARRAY';
		# api.php?action=query&prop=info&titles=Wikipedia:Bearbeitungsfilter/latest%20topics&continue=
		my $query = {
			'action'         => 'query',
			'continue'       => '',
			'prop'           => 'info', 
			'titles'         => $titles, 
		};
		my $mw_options = {'max' => 1};
		my $pages = $self->api_simple($query, $mw_options);
		my $page_infos;
		while(my ($page_id, $page_info) = each %{$pages->{'query'}->{'pages'}}){
			$page_infos->{$page_info->{'title'}} = $page_info;
		}
		return $page_infos;
	}

	sub get_pages{
		my $self = shift;
		my $collect_types = shift;
		my $pages = [];
		$self->msg(2, 'getting pages');
		if(defined $collect_types->{'from all_pages'}){
			$pages = $self->get_all_pages(qr/$collect_types->{'from all_pages'}/);
		}
		if(defined $collect_types->{'single page'}){
			utf8::decode($collect_types->{'single page'});
			push @$pages, $collect_types->{'single page'};
		}
		if(defined $collect_types->{'search'}){
			push @$pages, @{$self->search_pages($collect_types->{'search'})};
		}
		$self->msg(3, Dumper($pages));
		return $pages;
	}

	sub get_pages_content{
		# similar to sub text_replacement
		my $self    = shift;
		my $pages   = shift; # wiki pages to get
		my $section = shift; # section to use
		my $contents = [];
		for my $page(@$pages){
			$self->msg(2, "page = '$page'");
			my $text = $self->{'mw_bot'}->get_text($page);
			unless(defined $text){
				$self->msg(0, "page content of '$page' not defined", 'warning');
				next;
			}
			my @split_text;
			if(defined $section){
				my $i = 0;
				@split_text = grep {++$i % 3} 
					split /^((=+)(?!=).+(?<!=)\2(?:\n|\z))/m, $text;
				# 0, 2, 4, ... = text
				# 1, 3, 5, ... = headings
				my $section_num = -1;
				for($i = 1; $i < @split_text; $i+=2){
					if($split_text[$i] =~ /^(=+)\s*$section\s*\1$/){
						$section_num = $i;
						last;
					}
				}
				if($section_num == -1){
					$self->msg(1, 'could not find section in page', 'warning');
				}else{
					$self->msg(2, "found find section '$section' in page '$page'");
				}
				$text = $split_text[$section_num + 1];
			}
			push @$contents, {
				'page'    => $page,
				'section' => $section,
				'content' => $text,
			};
		}
		return $contents;
	}

	sub get_pages_by_prefix{
		my $self   = shift;
		my $params = shift;
		my $query = {
			'action'         => 'query',
			'continue'       => '',
			'generator'      => 'allpages', 
			'gapnamespace'   => $params->{'namespace_id'}, 
			'gapprefix'      => $params->{'prefix'},
			'gapfilterredir' => 'nonredirects',
			'gaplimit'       => '500',
			'prop'           => 'info|revisions',
			'rvprop'         => 'content|timestamp',
		};
		my $mw_options = {'max' => 1};
		my $finished = 0;
		my @results;
		my $rvcont_old;
		while(!$finished){
			my $rvcont;
			my $pages = $self->api_simple($query, $mw_options);
			#print Dumper [keys $pages];
			if(defined $pages->{'continue'}){
				#print Dumper $pages->{'continue'};
				while(my ($k, $v) = each(%{$pages->{'continue'}})){
					$query->{$k} = $v;
				}
				if(defined $pages->{'continue'}->{'rvcontinue'}){
					$rvcont = $pages->{'continue'}->{'rvcontinue'};
					$rvcont =~s/\|.*//;
				}
			}else{
				$finished = 1;
			}
			push @results, grep {
				(not defined $params->{'title_re'} 
					or $_->{'title'} =~ /$params->{'title_re'}/)
				and (not defined $rvcont or $_->{'pageid'} < $rvcont)
				and (not defined $rvcont_old or $_->{'pageid'} >= $rvcont_old)
			} values %{$pages->{'query'}->{'pages'}};
			$rvcont_old = $rvcont;
			#if(defined $pages->{'batchcomplete'}){
			#	print Dumper $pages->{'batchcomplete'};
			#}
		}
		return \@results;
	}

	sub get_sbl_entries{
		my $self = shift;
		my $wiki = shift;
		my $sbl_entries;
		my $sbl_text = '';
		if(defined $wiki){
			if($wiki eq 'meta'){
				(my $http_status, $sbl_text) = $self->get_http_content(
					'https://meta.wikimedia.org/w/index.php?' . 
					'title=Spam_blacklist&action=raw&sb_ver=1'
				);
			}else{
				$self->msg(0, "unsupported param '$wiki'.", 'error');
			}
		}else{
			$sbl_text = $self->{'mw_bot'}->get_text('MediaWiki:Spam-blacklist');
		}
		# remove non-pre items
		if($sbl_text =~ s/.*?<pre\b[^>]*>(.*?)<\/pre>(?:.(?!<pre\b))*/$1/gs){
			# remove comments
			$sbl_text =~ s/\s*#.*//g;
			# remove empty lines
			$sbl_text =~ s/\n\K\n+//g;
			# remove preceding and trailing space
			$sbl_text =~ s/^\s+//;
			$sbl_text =~ s/\s+$//gm;
			$sbl_entries = [split /\n/, $sbl_text];
		}else{
			$self->msg(0, 'could not read sbl', 'error');
		}
		return $sbl_entries;
	}

	sub get_tables_from_wikitext{
		my $self = shift;
		my $wikitext = shift;
		return [grep /^\{\|/, split /^(?=\{\|)|^\|\}\K$/m, $$wikitext];
	}

	sub get_date_iso{
		my $self = shift;
		my $unixtimestamp = shift // time();
		return strftime("%Y-%m-%d", gmtime($unixtimestamp));
	}

	sub get_time_iso{
		my $self = shift;
		my $unixtimestamp = shift // time();
		return strftime("%Y-%m-%d%H:%M:%S", gmtime($unixtimestamp));
	}

	sub get_time_iso_{
		my $self = shift;
		my $unixtimestamp = shift // time();
		return strftime("%Y-%m-%d %H:%M:%S", gmtime($unixtimestamp));
	}

	sub get_user_contribs{
		my $self    = shift;
		my $options = shift;
		my $ucstart = $options->{'ucstart'};
		my $ucend   = $options->{'ucend'};
		my $uclimit = $options->{'uclimit'};
		my $user    = $options->{'username'} // $options->{'usernameprefix'};
		$self->msg(1, "searching for contributions of user: $user");
		#my $ref = $self->{'mw_bot'}->contributions($options->{'username'}, 0, undef);
		my $query = {
			'action'    => 'query',
			'list'      => 'usercontribs', 
			'ucprop'    => 'ids|title',
			'ucstart'   => $ucstart,
			'uclimit'   => defined $uclimit ? $uclimit : 100,
		};
		if(exists $options->{'username'}){
			$query->{'ucuser'}  = $user;
		}else{
			$query->{'ucuserprefix'}  = $user;
		}
		if(defined $options->{'namespace'}){
			$query->{'ucnamespace'}  = $options->{'namespace'};
		}
		$query->{'ucend'}   = $ucend if defined $ucend;
		my $mw_options = {'max' => 1};
		my %list_of_articles;
		my $die_on_error = 1; # 0 = dont die; 1 = die
		my $finished = 0;
		while(!$finished){
			(my $tmp, $finished) = $self->api_cont($query, $mw_options, !$die_on_error);
			my $usercontribs = $tmp->{'query'}->{'usercontribs'};
			if(defined $usercontribs){
				for my $uc(@$usercontribs){
					next if exists $list_of_articles{$uc->{'title'}};
					$list_of_articles{$uc->{'title'}} = $uc->{'revid'};
				}
			}
		}
		return \%list_of_articles;
	}

	sub get_wikitable_ascii_col_widths{
		my $self = shift;
		my $row_array = shift;
		my @counts;
		# save length of cells with their frequency
		for my $row(@$row_array){
			my @row_cell_lengths = map {$_ = length($_)} split /(?:\|\||!!)/, $row;
			for(my $i = 0; $i < @row_cell_lengths ; ++$i){
				++$counts[$i]->{$row_cell_lengths[$i]};
			}
		}
		#print Dumper \@counts;
		# get size with max frequency
		for my $count(@counts){
			my $length = 0;
			my $bak_count = 0;
			while(my ($size, $num_occ) = each %$count){
				if($num_occ > $bak_count){
					$bak_count = $num_occ;
					$length = $size;
				}
			}
			$count = $length;
		}
		--$counts[0] if defined $counts[0]; # decrease, because of leading '|'
		#print Dumper \@counts;
		return \@counts;
	}

	sub _handle_api_error{
		my $self = shift;
		my $caller_inc = shift // 0;
		$self->msg(0, $self->{'mw_api'}->{'error'}->{'code'} . ': ' . 
			$self->{'mw_api'}->{'error'}->{'details'}, 'error', 1 + $caller_inc);
		return 1;
	}

	sub is_allowed{
		my $self = shift;
		my $textref = shift;
		my $page = shift // 'unknown';
		my $allowed = 1; # for vim: { { { { { {
		if(defined $$textref and $$textref =~ /{{[nN]obots}}|{{[Ii]n(?:use| Bearbeitung)\s*\||
			[bB]ots\s*+\|\s*+(?:
				deny\s*+=\s*+(?:
					all\s*+}}|
					(?<deny>.*?)}}
				)|
				allow\s*+=\s*+(?:
					none\s*+}}|
					(?<allow>.*?)}}
				)
			)/x){
			if(defined $+{'allow'}){
				$allowed = 0 if $+{'allow'} !~/(?:^|,\s*)$self->{'mw_username'}(?:\s*,|$)/;
			}elsif(defined $+{'deny'}){
				$allowed = 0 if $+{'deny'} =~/(?:^|,\s*)$self->{'mw_username'}(?:\s*,|$)/;
			}else{
				$allowed = 0;
			}
		}
		$self->msg(1, "bot is not allowed on page '$page'.") if $allowed == 0;
		return $allowed;
	}

	sub link_replacement{
		my $self = shift;
		my $params = shift;
		# regexp pattern of protocol, e.g., /http:\/\//
		my $re_prot_part          = $params->{'re_prot_part'};
		# regexp pattern of searched url (without protocol), 
		# e.g., /(?:www\.)?example\.com/
		my $re_url_part           = $params->{'re_url_part'};
		# replacement function
		my $replacement           = $params->{'replacement'};
		my %options;
		# 1 = delete links
		$options{'delete_link'}   = $params->{'delete_link'};
		# 1 = replace links in refs by 'dead link' template.
		$options{'ref2deadlink'}  = $params->{'ref2deadlink'};
		# link to use in api link search
		$options{'searched_link'} = $params->{'searched_link'};
		# if user contributions shall be searched...
		$options{'user_contribs'} = $params->{'user_contribs'};
		# wiki summary of page edit
		$options{'summary'}       = $params->{'summary'};
		# 0 = don't append on ns 0, 1 = append on ns 0
		$options{'articles'}      = $params->{'articles'}    //  0;
		# 0 = don't append on ns!=0, 1 = append on ns!=0
		$options{'nonarticles'}   = $params->{'nonarticles'} //  0;
		# 0 = don't touch refs; 1 = replace refs
		$options{'refs'}          = $params->{'refs'}        //  1;
		# 0 = don't touch non-refs; 1 = replace non-refs
		$options{'nonrefs'}       = $params->{'nonrefs'}     //  1;
		# maximum number of edits (-1 = inf)
		$options{'max_edits'}     = $params->{'max_edits'}   // -1;
		# skip first n pages.
		$options{'skip_edits'}    = $params->{'skip_edits'}  //  0;
		# print results (1) or don't (0)
		$options{'results'}       = $params->{'results'}     //  0;
		my $edit_counter = 0;          # edit counter
		my $old_page_0 = {};           # list of edited pages in ns==0
		my $old_page_1 = {};           # list of edited pages in ns!=0
		# some regexp vars should make source more readable 
		my $re_url = $re_prot_part . $re_url_part . $self->{'re_url_class'};
		my $re_descr_class = qr/[^\]\x00-\x08\x0a-\x1F]/;
		my $re_ref_tag_name = qr/<ref [^>]*name\s*=\s*['"]?([^>]*?)['"]?>/;
		my $re_ref_inner_part = qr/(?s:(?!<ref)(?!<\/ref>).)+?$re_url_part.*?<\/ref>/;
		my $re_deadlink = qr/\Q{{dead link|date=March 2011}}\E/;
		my %results = (
			'num_ns0_links'  => 0,
			'num_ns!0_links' => 0,
		);
		my $use_eval_in_repl = 1;

		if($options{'articles'}){
			$self->msg(1, "starting linksearch in ns=0...");
			my @links;
			if(defined $options{'searched_link'}){
				@links = $self->{'mw_bot'}->linksearch($options{'searched_link'}, 0, undef);
			}elsif(defined $options{'user_contribs'}){
				$self->msg(1, 'searching for contributions of user:' . 
					$options{'user_contribs'}->{'user'});
				my $uc_options = {
					'ucstart'   => '2011-03-18T00:00:00Z',
					'ucend'     => '2011-03-16T00:00:00Z',
					'uclimit'   => '500',
					'username'  => $options{'user_contribs'}->{'user'},
					'namespace' => 0,
				};
				$old_page_0 = $self->get_user_contribs($uc_options);
				while(my ($page, $revid) = each %$old_page_0){
					# $self->msg(1, $page);
					my $diff = $self->{'mw_bot'}->diff({
						'revid' => $revid,
						'oldid' => 'prev',
					});
					# my $found = 0;
					while($diff =~ /<td class=\"diff-deletedline\">(.*?)<\/td>/g){
						if($1 =~ /($re_url*)/){
							push @links, {
								'title' => $page,
								'url' => $1,
							};
							# $found = 1;
							last;
						}
					}
					# print Dumper($diff) if $found;
				}
			}
			$self->msg(1, 'results (found '.@links.' links in '.
				$self->num_unique_elem(map {$_->{'title'}} @links)." pages):");
			$results{'num_ns0_links'} = 0+@links;
			#push @links, {'url' => 'scheissegal', 'title' => 'LyX'};
			for my $hash (@links){
				my $page = $hash->{'title'};
				next if exists $old_page_0->{$page};
				$old_page_0->{$page} = 1;
				if(++$edit_counter < $options{'skip_edits'} or $options{'max_edits'} == 0){
					next;
				}elsif($edit_counter > $options{'max_edits'} 
						and $options{'max_edits'} != -1){
					last;
				}
				#next unless $hash->{'url'} =~ /$re_prot_part$re_url_part/;
				$self->msg(1, "$edit_counter: page= $page, url= ".$hash->{'url'});
				my $text = $self->{'mw_bot'}->get_text($page);
				next unless $self->is_allowed(\$text, $page);
				my $text_bak = $text;
				$self->cleanup_wiki_page(\$text, $page);
				if(defined $replacement){
					#	$self->test_and_replace(
					#		\$text, 
					#		qr/$re_prot_part$re_url_part(?<trail>$self->{'re_url_class'}*)(?:(?<posturl>.*?)$re_deadlink)?/m, 
					#		$replacement, $use_eval_in_repl
					#	);
				}elsif($options{'delete_link'}){
					if($options{'refs'}){
						if($options{'ref2deadlink'}){
							if($text!~/url\s*=\s*$re_url/){ # special templates
								$self->test_and_replace(\$text, 
									qr/
										(
											<(?i:ref)\b[^>]*>\s*          # <ref>
											(?s:(?!<ref)(?!<\/ref>).)*?   # blabla
										)(?|
											\[($re_url*+)(\x20[^\]]+|)\]| # [url] or 
											($re_url*+)()                # url (not preceeded by "url = ")
										)(
											.*?                           # blabla
											<\/ref>                       # <\/ref>
										)/x,
									'"${1}[{{dead link|inline=yes|bot=' . $self->{'mw_username'} . 
									'|date=' . $self->get_date_iso() . '|url=$2}}$3]$4"', 
									$use_eval_in_repl
								);
							}
						}else{
							# backrefs
							if($text =~ /$re_ref_tag_name$re_ref_inner_part/){
								my $ref_name = $1;
								$self->msg(1, $ref_name, 'debug');
								$self->test_and_replace(
									\$text, 
									qr/<(?i:ref) name\s*=\s*['"]?\s*$ref_name\s*['"]?\s*\/\s*>/, 
									'', !$use_eval_in_repl
								);
							}
							# refs
							$self->test_and_replace(
								\$text, 
								qr/<(?i:ref)\b[^>]*>\s*$re_ref_inner_part/, 
								'', !$use_eval_in_repl
							);
						}
					}
					# <references />
					if($text!~/<(?i:ref)(?:>| name)|EinwohnerOrtQuelle|EinwohnerRef/){
						my $text_tmp = $text;
						$self->test_and_replace(
							\$text_tmp, 
							qr/^=(=+)\s*(?:Einzelnachweise|Quellen)\s*\1=[\s\n]*\n
							<references(?:\x20?\/)?>\s*/mx, 
							'', !$use_eval_in_repl
						);
						my $parsed = $self->parse_wikitext($text_tmp);
						if($parsed !~ /class="error mw-ext-cite-error"/){
							$self->test_and_replace(
								\$text, 
								qr/^=(=+)\s*(?:Einzelnachweise|Quellen)\s*\1=[\s\n]*\n
								<references(?:\x20?\/)?>\s*/mx, 
								'', !$use_eval_in_repl
							);
						}else{
							$self->msg(1, '<references /> not deleted, because it would lead ' .
								'to an error.', 'warning');
						}
					}
					if($options{'nonrefs'}){
						# links like [link descr] in lists (as in "external links")
						$self->test_and_replace(
							\$text, 
							qr/^\*(?i:.(?!<ref))*\[$re_url*?\s*$re_descr_class*\].*\n?/m, 
							'', !$use_eval_in_repl
						);
						# plain links in lists (as in "external links")
						$self->test_and_replace(
							\$text, 
							qr/^\*(?i:.(?!<ref))*$re_url*?.*\n?/m, 
							'', !$use_eval_in_repl
						);
					}
					# heading "weblinks" without entries
					my $weblinks_heading_re = qr/^=(=+)\s*Weblinks?\s*=\1[\s\n]*\n
						(\[\[[Kk]atego|\{\{
							(?:Coordinate|DEFAULTSORT|Hinweis\x20|(?:Vorlage:)?Navigations
								|Normdaten|(?:Vorlage:)?Orden,|SORTIERUNG)
							|(?:<!--\x20?)?==)/mx;
					$self->test_and_replace(\$text, $weblinks_heading_re, '"$2"', $use_eval_in_repl);
					if($text =~ /(.*(?<!\|url=$re_prot_part)$re_url_part.*)/){
						$self->msg(1, " not resolved: $1");
					}
				}
				$self->time_management();
				$self->save_wiki_page($page, $options{'summary'}, \$text, \$text_bak);
			}
			$results{'num_ns0_unique_links'} = $edit_counter;
		}

		if($options{'nonarticles'}){
			my $namespaces = {$self->{'mw_bot'}->get_namespace_names()}; # get namespaces
			$namespaces = [grep {$_ != 0} keys %$namespaces]; # delete namespace 0 from list
			$self->msg(1, "starting linksearch in ns!=0...");
			my @links;
			if(defined $options{'searched_link'}){
				@links = $self->{'mw_bot'}->linksearch(
					$options{'searched_link'}, $namespaces, undef);
			}elsif(defined $options{'user_contribs'}){
			}
			$self->msg(1, 'results (found '.@links." links in ".
				$self->num_unique_elem(map {$_->{'title'}} @links)." pages):");
			$results{'num_ns!0_links'} = 0 + @links;
			for my $hash (@links){
				my $page = $hash->{'title'};
				next if exists $old_page_1->{$page};
				$old_page_1->{$page} = 1;
				if(++$edit_counter < $options{'skip_edits'} or $options{'max_edits'} == 0){
					next;
				}elsif($edit_counter > $options{'max_edits'} 
						and $options{'max_edits'} != -1){
					last;
				}
				$self->msg(1, "$edit_counter: page= $page, url= ".$hash->{'url'});
				my $text = $self->{'mw_bot'}->get_text($page);
				next unless $self->is_allowed(\$text, $page);
				my $text_bak = $text;

				#$self->cleanup_wiki_page(\$text, $page);
				if(defined $replacement){
					#	$self->test_and_replace(
					#		\$text, 
					#		qr/$re_prot_part$re_url_part(?<trail>$self->{'re_url_class'}*)(?:(?<posturl>.*?)$re_deadlink)?/m, 
					#		$replacement, $use_eval_in_repl
					#	);
				}elsif($options{'delete_link'}){
					if($options{'nonrefs'}){
						# [link]
						$self->test_and_replace(\$text, 
							qr/\[$re_prot_part($re_url_part$self->{'re_url_class'}*?)\]/, 
							'$1', $use_eval_in_repl);
						# [link descr]
						$self->test_and_replace(\$text, 
							qr/(?<!<nowiki>)(\[$re_url*\s*$re_descr_class*\])/, 
							'"<nowiki>".$1."<\/nowiki>"', $use_eval_in_repl);
						# plain_link or word:plainlink
						$self->test_and_replace(\$text, 
							qr/(?<!$self->{'re_url_class'})(?<!<nowiki>\[)([a-zA-Z0-9]*:|)$re_prot_part($re_url_part$self->{'re_url_class'}*)/, 
							'$1.$2', $use_eval_in_repl);
						# ==plain_link==
						$self->test_and_replace(\$text, 
							qr/^(=+) *$re_prot_part($re_url_part$self->{'re_url_class'}*) *\1 */m, 
							'$1." ".$2." ".$1', $use_eval_in_repl);
						# :*plain_link
						$self->test_and_replace(\$text, 
							qr/^([:\s*]*)$re_prot_part($re_url_part$self->{'re_url_class'}*)/m, 
							'$1.$2', $use_eval_in_repl);
						# (plain_link)
						$self->test_and_replace(\$text, 
							qr/([()]\s*)$re_prot_part($re_url_part$self->{'re_url_class'}*)(\s*\)|\s)/, 
							'$1.$2.$3', $use_eval_in_repl);
	#					# {|plain_link|}
	#					$self->test_and_replace(\$text, qr/([|{]\s*)$re_prot_part($re_url_part$self->{'re_url_class'}*)(\s*[|}])/, 
	#					'$1.$2.$3', $use_eval_in_repl);
					}
					# not good for large pages:
					if($text =~ /^(.*(?<!<nowiki>)(?<!<nowiki>\[)$re_prot_part$re_url_part.*)/m){
						$self->msg(1, " not resolved: $1");
					}
					# better:
	#				while($text =~ /(^.*$re_prot_part$re_url_part.*)/gmo){
	#					my $temp = $1;
	#					if($temp =~ /(?<!<nowiki>)(?<!<nowiki>\[)$re_prot_part$re_url_part/){
	#						$self->msg(1, " not resolved: $temp");
	#					}
	#				}
				}
				$self->time_management();
				$self->save_wiki_page($page, $options{'summary'}, \$text, \$text_bak);
			}
			$results{'num_ns!0_unique_links'} = $edit_counter-($results{'num_ns0_unique_links'} // 0);
		}
		if($options{'results'}){
			print "\n";
			$self->msg(1, "results:");
			print " found " . ($results{'num_ns0_links'}+$results{'num_ns!0_links'}) .
				' links in ' . (0 + keys(%$old_page_0) + keys(%$old_page_1)) . " pages:\n";
			if($options{'articles'} && $options{'nonarticles'}){
				print "       ".$results{'num_ns0_links'}." links in ".$results{'num_ns0_unique_links'}." pages in ns==0\n";
				print "       ".$results{'num_ns!0_links'}." links in ".$results{'num_ns!0_unique_links'}." pages in ns!=0\n";
			}
			if($options{'articles'}){
				print "\n pages in ns==0:\n";
				print "  $_\n" for sort keys %$old_page_0;
			}
			if($options{'nonarticles'}){
				print "\n pages in ns!=0:\n";
				print "  $_\n" for sort keys %$old_page_1;
			}
		}
	}

	sub login{
		my $self = shift;
		my $wiki_password = shift;
		# if wiki user name is not defined, let user type in wiki user name
		if(not defined $self->{'mw_username'}){
			$self->msg(1, "enter wiki username:");
			$self->{'mw_username'} = <STDIN>;
			chomp($self->{'mw_username'});
		}
		# cope with auto-normalization (bug in MediaWiki::Bot)
		$self->{'mw_username'} = ucfirst($self->{'mw_username'});
		$self->{'mw_username'} =~y/_/ /;
		$self->msg(1, "login user name is '$self->{'mw_username'}'.");
		my $cookie = '.mediawiki-bot-' . $self->{'mw_username'} . '-cookies';
		# if wiki user password is not defined, search typical password places or let 
		# user type in wiki user password
		if(not defined $wiki_password){
			# /bot/ needed for toolserver (for historical reasons)
			my $pwfile = [".password", "$ENV{HOME}/.password", "$ENV{HOME}/bot/.password"];
			for(@$pwfile){
				if(-e $_){
					$self->msg(1, "using standard password file $_");
					open(my $INFILE, '<', $_) or die $!;
						chomp($wiki_password = <$INFILE>);
					close($INFILE);
					last;
				}
			}
			unless(defined $wiki_password or
					(	# $self->{'mw_username'} =~/^(?:CamelBot)$/ and 
						-e $cookie and
						not ($self->{'cliparams'}->{'delete'} or $self->{'cliparams'}->{'upload'})
					)
			){
				if($self->{'verbosity'} >= 1){
					$self->msg(1, 'could not read password file at places: ');
					print '  '.$pwfile->[$_].', ' for 0..($#$pwfile-1);
					print $pwfile->[-1]."\n";
				}
				unless($_running_on_labs){
					$self->msg(0, "enter password (will not be echoed):");
					ReadMode('raw') if defined &ReadMode;
					$wiki_password = ReadLine(0) if defined &ReadLine;
					chomp($wiki_password);
					ReadMode('restore') if defined &ReadMode;
					while($wiki_password =~ /\x{007f}/){
						$wiki_password =~ s/(?:^|[^\x{007f}])\x{007f}//g;
					}
				}
			}
		}
		my %address_params = (
			'protocol' => 'https',
			'host'     => $self->{'host'},
			'path'     => $self->{'rel_url_path'},
		);
		$self->msg(2, "logging in at $self->{'host'}...");
		if($self->{'host'} =~ /^([a-z]+)\.(
				wikibooks|wikidata|wikimedia|wikinews|wikipedia|wikiquote|
				wikisource|wikiversity|wikivoyage|wiktionary
			)\.org\z/x){
			$self->{'wm_lang'} = $1;
			$self->{'wm_proj_type'} = $2;
			$self->{'wm_proj'} = $self->{'wm_lang'} . $self->{'wm_proj_type'};
			$self->{'wm_proj'} =~ s/wiki\K[pm]edia\z//;
		}
		$self->{'mw_bot'} = MediaWiki::Bot->new({
			#agent       => 'CamelBot',
			assert      => 'bot',
			#do_sul      => ($address_params{'host'} =~ /\.wikipedia\.org$/ ? 1 : 0),
			protocol    => $address_params{'protocol'},
			host        => $address_params{'host'},
			path        => $address_params{'path'},
			operator    => 'lustiger_seth',
			debug       => 1,
			login_data  => {
				username => $self->{'mw_username'},
				password => $wiki_password,
				#lgdomain => ''
			},
		});
		# TODO: maybe this should just be replaced by:
		# $self->{'mw_api'} = $self->{'mw_bot'}->{'api'};
		$self->{'mw_api'} = MediaWiki::API->new();
		my $wiki_base_url = $address_params{'protocol'}.'://'.$address_params{'host'};
		$self->{'mw_apiurl'} = $wiki_base_url . '/' . $address_params{'path'} . '/api.php';
		#$self->{'mw_apiurl'} = $self->{'mw_api'}->{'config'}->{'api_url'};
		$self->{'mw_api'}->{'config'}->{'api_url'} = $self->{'mw_apiurl'};
		if(    $self->{'cliparams'}->{'delete'} 
				or $self->{'cliparams'}->{'upload'} 
				or $self->{'cliparams'}->{'link-replacement'}
				or $self->{'cliparams'}->{'search-sbl-attempts'}
		){
			$self->{'mw_api'}->{'config'}->{'upload_url'} = $wiki_base_url.'/wiki/Special:Upload';
			$self->{'mw_api'}->{'config'}->{'files_url'} = '';
			$self->{'mw_api'}->login({
				'lgname' => $self->{'mw_username'},
				'lgpassword' => $wiki_password,
				#'lgdomain' => ''
			}) or ($self->_handle_api_error and die);
		}else{
			unless(defined $self->{'mw_bot'}){
				$self->msg(0, 'check upper/lower case. or maybe there\'s an expired cookie file. please delete \''. 
					$cookie. '\' and try again.', 'error') if -e $cookie;
				die("error: could not login.\n");
			}
		}
		$self->msg(2, "logged in...");
		return 1;
	}

	sub msg{
		my $self           = shift;
		my $verb_threshold = shift;
		my $msg            = shift;
		my $type           = shift;
		my $caller_inc     = shift // 0;
		return 0 if $self->{'verbosity'} < $verb_threshold;
		$type = (defined $type ? "$type in ": '');
		my $timestamp = $self->get_time_iso_;
		# my ($package, $filename, $line, $subr, $has_args, $wantarray, $evaltext, 
		# $is_require, $hints, $bitmask, $hinthash) = caller(0);
		my @callers = caller(0 + $caller_inc);
		my $line = $callers[2];
		@callers = caller(1 + $caller_inc);
		my $subr = $callers[3] // '[no sub]';
		print "$timestamp $type$subr:$line: $msg\n";
		return 1;
	}

	sub newest_post_info{
		my $self     = shift;
		my $wikitext = shift;
		my $threads = [grep {$_!~/^=+$/} split /\n(?=(=++)[^=].*\1(?:\n|$))/, $wikitext];
		my $newest_thread = {
			'date'   => 0, 
			'author' => '',
		};
		for my $t(@$threads){
			next unless $t =~ /^(=++)\s*(.*?)\s*\1/;
			my $thread_title = $2;
			$newest_thread->{'thread'} = $thread_title unless defined $newest_thread->{'thread'};
			my $date;
			while($t =~ /(?<pre_sig>.*)(?<hour>\d\d):(?<min>\d\d),\x20
				(?<mday>[1-9]|[123][0-9])\.\x20(?<mon>[a-zA-Zä\xe4]{3,4})\.?\x20
				(?<year>20\d\d)\x20\((?<tz>CES?T)\)/gx){
				my $month = 
					$+{'mon'} eq 'Jan' ? 0 : 
					$+{'mon'} eq 'Feb' ? 1 : 
					($+{'mon'} eq "Mär" or $+{'mon'} eq "M\xe4r") ? 2 : 
					$+{'mon'} eq 'Apr' ? 3 : 
					$+{'mon'} eq 'Mai' ? 4 : 
					$+{'mon'} eq 'Jun' ? 5 : 
					$+{'mon'} eq 'Jul' ? 6 : 
					$+{'mon'} eq 'Aug' ? 7 : 
					$+{'mon'} eq 'Sep' ? 8 : 
					$+{'mon'} eq 'Okt' ? 9 : 
					$+{'mon'} eq 'Nov' ? 10 : 
					$+{'mon'} eq 'Dez' ? 11 : 12
				;
				next if $month == 12;
				$date = timegm(0, $+{'min'}, $+{'hour'}, $+{'mday'}, $month, $+{'year'});
				$date += $+{'tz'} eq 'CEST' ? 2 : 1;
				if($date>$newest_thread->{'date'}){
					$newest_thread = {
						'thread' => $thread_title,
						'author' => $+{'pre_sig'},
						'date'   => $date,
					};
				}
			}
		}
		my $username;
		while($newest_thread->{'author'} =~/\[\[
			(?:
				(?:[Bb]enutzer(?:in)?|[uU]ser)(?:\x20talk|Diskussion)?:| # 'user:...' or 'user talk:...' or
				[Ss]pe[zc]ial:(?:Beitr..?ge|[Cc]ontributions)\/   # 'special:contributions...'
			)
			([^|\/\]]+)
			/gx){
			$username = $1;
		}
		$newest_thread->{'author'} = $username if defined $username;
		return $newest_thread;
	}

	sub notifier{
		my $self        = shift;
		my $page        = shift;
		utf8::decode($page); # is this necessary?
		my @hist = $self->{'mw_bot'}->get_history($page, 2);
		if(@hist == 0){
			$self->msg(1, "page '$page' doesn't seem to exist anymore.", 'notice');
			return 0;
		}
		# get newest version of page content
		my $page_text = $self->{'mw_bot'}->get_text($page, $hist[0]->{'revid'});
		if(defined $page_text){
			# ASINs and local files are unwanted in w:de
			my %num_matches;
			# get number of occurrences in present page
			for my $ntf_type(keys %{$self->{'maintenance'}->{'notifiers'}}){
				if($page =~ /$self->{'maintenance'}->{'exceptions'}->{$ntf_type}/){
					$num_matches{$ntf_type} = 0; 
				}else{
					$num_matches{$ntf_type} =()= $page_text =~ 
						/$self->{'maintenance'}->{'text_re'}->{$ntf_type}/g;
				}
			}
			# any matches in present page?
			if(grep {$_ > 0} values %num_matches){
				my $notify = 'no';
				my $page_text_prev_version;
				# get older version of page if available
				if(@hist > 1 and defined $hist[1]->{'revid'}){
					$page_text_prev_version = $self->{'mw_bot'}->get_text($page, $hist[1]->{'revid'});
				}else{
					$notify = 'yes';
				}
				for my $ntf_type(keys %{$self->{'maintenance'}->{'notifiers'}}){
					next if $num_matches{$ntf_type} == 0;
					# compare with older version
					if($notify eq 'no'){
						my $num_matches_pv =()= $page_text_prev_version =~ /$self->{'maintenance'}->{'text_re'}->{$ntf_type}/g;
						if($num_matches_pv < $num_matches{$ntf_type}){
							$notify = 'this time';
						}
					}
					if($notify ne 'no'){
						my $user_to_inform = $hist[0]->{'user'};
						my $userpage = 'User talk:' . $user_to_inform;
						my $text = $self->{'mw_bot'}->get_text($userpage) // '';
						if($self->is_allowed(\$text, $userpage)){
							my $text_bak = $text;
							my $heading = $self->{'maintenance'}->{'notifiers'}->{$ntf_type};
							if($text =~ /\Q$heading\E/){
								$self->msg(1, 
									"user '$user_to_inform' seems to be informed already.", 'notice');
							}else{
								my $notice = $self->{'mw_bot'}->get_text(
									'user:CamelBot/notice-' . $ntf_type);
								my $title = $self->title2url_part($page);
								my $wp_path = '//de.wikipedia.org/w/index.php';
								# replace variables in notice template
								my $complete_url = $wp_path . '?title=' . $title 
									. '&diff=prev&oldid=' . $hist[0]->{'revid'};
								$notice =~ s/\$diff\b/$complete_url/g;
								$notice =~ s/\$article\b/$page/g;
								$notice =~ s/\$signature\b.*/-- ~~~~/sg;
								$text .= "\n== $heading ==\n$notice\n";
								$self->time_management();
								$self->msg(2, "notifying on '$userpage'");
								my $minor = 0;
								$self->save_wiki_page(
									$userpage, $heading, \$text, \$text_bak, $minor);
							}
						}
						$notify = 'no' if $notify eq 'this time';
					}
				}
			}
		}else{
			$self->msg(1, "could not fetch page '$page'. maybe deleted already", 
				'notice');
		}
		return 1;
	}

	sub num_unique_elem{
		my $self = shift;
		my %hash;
		@hash{@_} = 1;
		return ''.(keys %hash);
	}

	sub parse_page{
		my $self = shift;
		my $page = shift;
		my $text = $self->api_simple({
			action => 'parse',
			page => $page,
			prop => 'text|headhtml',
		});
		my $title = $text->{'parse'}->{'title'};
		my $html = $text->{'parse'}->{'text'}->{'*'};
		my $headhtml = $text->{'parse'}->{'headhtml'}->{'*'};
		# delete final comments
		$html =~s/\n\n<!--\s+NewPP.*?\n-->$//s;
		return ($html, $title, $headhtml);
	}

	sub parse_irc_rc{
		my $self = shift;
		my $msg = shift;
		my %parsed_msg;
		if($msg =~ /^\cC14
			\[\[\cC07([^\]]+)\cC14]]\cC4\x20   # page
			([a-z0-9_-]+|[!NMB]*)\cC10\x20     # flags
			\cC02([^\cC]*)\cC\x20\cC5\*\cC\x20 # url
			\cC03([^\cC]*)\cC\x20\cC5\*\cC\x20 # user
			(\(\cB?[+-]\d+\cB?\)|)\x20         # diffbytes
			\cC10(.*[^\cC]|)\cC?\z/x){         # summary
			$parsed_msg{'page_with_ns'} = $1;
			$parsed_msg{'flags'} = $2;
			$parsed_msg{'url'} = $3;
			$parsed_msg{'user'} = $4;
			$parsed_msg{'diffbytes'} = $5;
			$parsed_msg{'summary'} = $6;
			if($parsed_msg{'diffbytes'} =~ /^\(\cB?([+-]\d+)\cB?\)$/){
				$parsed_msg{'diffbytes'} = $1;
			}
			$parsed_msg{'ns_id'} = $self->get_namespace_id($parsed_msg{'page_with_ns'});
			$parsed_msg{'page'} = $parsed_msg{'page_with_ns'};
			$parsed_msg{'page'} =~ s/^[^:]+:// if $parsed_msg{'ns_id'} != 0;
			$parsed_msg{'timestamp_unix'} = time();
			$parsed_msg{'timestamp'} = $self->get_time_iso($parsed_msg{'timestamp_unix'});
			print Data::Dumper->Dump([\%parsed_msg]) if $self->{'verbosity'} >= 5;
		}else{
			$self->msg(2, $msg);
		}
		return \%parsed_msg;
	}

	sub parse_wikitext{
		my $self = shift;
		my $wikitext = shift;
		my $text = $self->api_simple({
			action => 'parse',
			text => $wikitext,
			prop => 'text',
		});
		my $html = $text->{'parse'}->{'text'}->{'*'};
		# delete final comments
		$html =~s/\n\n<!--\s+NewPP.*?\n-->$//s;
		return $html;
	}

	# searches for all links in a given piece of wikitext
	# original code got from 
	# http://svn.wikimedia.org/viewvc/mediawiki/trunk/phase3/includes/parser/Parser.php?view=markup&pathrev=74981
	# ported to perl because phps regexp-handling (\\\\\\\\\\\\\\\\\) sucks.
	sub php_code_unused{
		my $self = shift;
		# get wikitext as param
		my $wikitext = shift; # $text in orig code
		# building regexp for matching of "[...]"-like links
		my @wgUrlProtocols = (
			'http://',
			'https://',
			'ftp://',
			'irc://',
			'gopher://',
			'telnet://',
			'nntp://',
			'worldwind://',
			'mailto:',
			'news:',
			'svn://',
			'git://',
			'mms://'
		);
		my $re_prot = join('|', @wgUrlProtocols);
		my $ext_link_url_class = qr/[^\][<>"\x00-\x20\x7F\p{Zs}]/;
		my $re_url_wo_prot =     qr/$ext_link_url_class+/;
		my $re_descr = qr/[^\]\x00-\x08\x0a-\x1F]*?/;
		my $mExtLinkBracketedRegex = qr/\[(($re_prot)$re_url_wo_prot) *($re_descr)\]'/;
		# $1 = url, $2 = prot, $3 = descr
		# split wikitext by links
		my @bits = split /$mExtLinkBracketedRegex/, $wikitext;
		my $s = shift @bits; # first part is something non-link-like (i.e. an empty string or some wikitext).
		# loop over parts (of 4 different classes: wikitext, url, prot, descr, wikitext, url, prot, descr, ...)
		for(my $i=0; $i<@bits;){
			my $url = $bits[++$i];
			my $prot = $bits[++$i];
			my $descr = $bits[++$i]; # $text in orig code
			my $wtext = $bits[++$i] // ''; # $trail in orig code
			# will continue later on
		}
	}

	sub post_process_html{
		my $self = shift;
		my $html = shift;
		my $image_prefix = shift // '';
		#  images
		$html=~s/<a\x20                                 # <a
					href="\/wiki\/File:(?<filename>[^"]++)"   # href="\/wiki\/File:..."
				[^>]*+>                                     # ...>
				(?<imgpre><img\x20[^>]*src=")               # <img ... src="
					[^"]++                                    # ...
				(?<imgpost>"[^>]*+>)                        # ...>
			<\/a>                                         # <\/a>
			/$+{'imgpre'}$image_prefix$+{'filename'}$+{'imgpost'}/gx;
		$html =~ s/ srcset="[^"]++"//g;
		#  html tidy
		my $html_cleaned = '';
		my $err = '';
		IPC::Run::run([qw(tidy -iq --tidy-mark 0 -f /dev/null -w 5000 -utf8)], 
			\$html, \$html_cleaned, \$err);
		return $html_cleaned;
	}

	sub refresh_maintenance_params{
		my $self = shift;
		my $current_year = strftime("%Y", gmtime());
		$self->{'maintenance'} = {
			'text_re'    => {
				'ASIN'        => 
					qr/\bASIN(?:\]\])?[: ]+(?:B[0-9]{2}[A-Z0-9]{7}|[0-9]{9}[0-9X])/,
				'cat dead'    => 
					qr/\[\[\s*(?i:category|kategorie):
						\s*[Gg]estorben[\s|_]$current_year\s*(?:\|[^\]]*)?\]\]/x,
				'local files' => qr/\bfile\:\/{3}/i,
				'finanznachrichten.de' => qr/\bfinanznachrichten\.de\b/,
				'traditionsverband.de' => qr/\btraditionsverband\.de\b/,
				'self-ref'    => qr/ref\s*>\s*
					(?:
						\[?(?:https?\:)?\/\/[a-z]+\.wikipedia\.org\/wiki\/|   # wikipedia
						\[?(?:https?\:)?\/\/www\.wikidata\.org\/wiki\/Q[1-9]| # wikidata
						\[\[:[a-z]{2}:                               # wikipedia (internal link)
					)/x,
			},
			'exceptions' => {
				'ASIN'        => qr/^Amazon Standard Identification Number\z/,
				'cat dead'    => undef,
				'local files' => qr/
					^(?:Uniform\sResource\s(?:Identifier|Locator)|
					XML\sBase|XML\sShareable\sPlaylist\sFormat)
				\z/x,
				'finanznachrichten.de' => undef,
				'traditionsverband.de' => undef,
				'self-ref'    => qr/Wikipedia\z/, # 'Wikipedia', 'Spanische Wikipedia', ...
			},
			'intros'     => {},
			'notifiers'  => { # requires page user:CamelBot/notice-... to exist
				'ASIN'        => 'Hinweis auf Katalog-Nummern von Amazon', 
				'local files' => 'Hinweis auf Verlinkung lokaler Dateien',
				'self-ref'    => 'Hinweis auf Selfreferenzierungen', 
			},
			'page_name'  => { # if not defined, use key
				'cat dead'    => 'categories of the dead', 
			},
			'check_entries' => {
				'ASIN'                 => 1, 
				'cat dead'             => 0, 
				'local files'          => 1, 
				'finanznachrichten.de' => 1, 
				'traditionsverband.de' => 1, 
				'self-ref'             => 1, 
			},
		};
		my %i_parts = (
			'intro' => 'dies ist eine durch CamelBot befuellte wartungsliste, ' 
				. 'die kuerzlich (auf wikipedia/wikidata) bearbeitete artikel auflistet',
			'general' => 'CamelBot lauscht auf den recent changes, d.h., ' 
				. 'er schaut sich alle artikel an, die aktuell geaendert wurden.',
			'condition_link' => 'faellt ihm dabei auf, dass sich in einem artikel ' 
				. 'ein verweis auf jene domain befindet',
			'action' => 'meldet er das nach ca. 30 minuten hier auf dieser seite',
			'deleting_link' => 'auch das loeschen von eintraegen, die ' 
				. 'einen solchen verweis nicht mehr enthalten, uebernimmt CamelBot.',
			'list_section' => "== liste ==\n",
		);
		$self->{'maintenance'}{'intros'} = {
			'ASIN' => $i_parts{'intro'} . 
				", die mind. eine [[Amazon Standard Identification Number|ASIN]]" . 
				" enthalten.\n\n" . $i_parts{'general'} .
				" faellt ihm dabei auf, dass in einem artikel eine ASIN steht, " .
				$i_parts{'action'} . '. ' . 
				"auch das loeschen von eintraegen, die keine ASIN mehr enthalten, " .
				"uebernimmt CamelBot.\n\n" . 
				"$i_parts{'list_section'}",
			'cat dead' => $i_parts{'intro'} . 
				", die der category ''gestorben " . (strftime('%Y', gmtime())) . 
				"'' hinzugefuegt wurden.\n\n" .  $i_parts{'general'} . 
				" faellt ihm dabei auf, " . 
				"dass ein artikel dieser category hinzugefuegt wird, " . 
				'meldet er das hier auf dieser seite. ' . 
				"dabei fuegt er gefundene artikel nur der liste hinzu. " . 
				"loeschen muss man manuell.\n\n" . 
				"$i_parts{'list_section'}",
			'local files' => $i_parts{'intro'} . 
				", die mind. eine relative adresse auf die festplatte enthalten.\n\n" . 
				$i_parts{'general'} . 
				" faellt ihm dabei auf, " . 
				"dass in einem artikel eine solche adresse steht, " .
				$i_parts{'action'} . '. ' . 
				"auch das loeschen von eintraegen, " . 
				"die keine solche adresse mehr enthalten, uebernimmt CamelBot.\n\n" . 
				"$i_parts{'list_section'}",
			'self-ref' => $i_parts{'intro'} . ", die mind. eine " . 
				"[[WP:Belege#Wikipedia_ist_keine_Quelle|selbstreferenz]] enthalten, " . 
				"also einen unzulaessigen beleg durch einen anderen wikipedia-artikel " . 
				"oder aehnliches.\n\n" . 
				$i_parts{'general'} . 
				" faellt ihm dabei auf, dass ein artikel eine selbstreferenz enthaelt, " .
				$i_parts{'action'} . '. ' . 
				"auch das loeschen von eintraegen, " . 
				"die keine selbstreferenz mehr enthalten, uebernimmt CamelBot.\n\n" . 
				$i_parts{'list_section'},
		};
		for my $domain('finanznachrichten.de', 'traditionsverband.de'){
			$self->{'maintenance'}{'intros'}{$domain} = $i_parts{'intro'} . 
				", die mind. einen verweis auf $domain enthalten.\n\n" .
				"$i_parts{'general'} $i_parts{'condition_link'}, $i_parts{'action'}. " . 
				"$i_parts{'deleting_link'}\n\n$i_parts{'list_section'}";
		}
		return 1;
	}

	sub read_file_binary{
		my $self = shift;
		my $filename = shift;
		open(my $FILE, '<:encoding(UTF-8)', $filename) or die $!;
			binmode $FILE;
			my $buffer;
			my $data = '';
			while(read($FILE, $buffer, 65536)){
				$data .= $buffer;
			}
		close($FILE);
		return \$data;
	}

	sub rebuild_table{
		my $self      = shift;
		my $page      = shift;         # wiki page
		my $tables    = shift;         # tables to insert
		my $summary   = shift;         # wiki summary
		my $text = $self->{'mw_bot'}->get_text($page);
		if(not defined $text){
			$self->msg(1, "page $page does not exist.", 'notice');
			$text = '';
		}elsif($text eq ''){
			$self->msg(1, "page $page is empty.", 'notice');
		}else{
			return unless $self->is_allowed(\$text, $page);
		}
		my $text_bak = $text;
		# convert table to wikitext string
		my $full_auto = 1;
		if($text eq ''){
			for my $table(@$tables){
				my $wikitable = $self->table2wikitext($table, $full_auto);
				if(defined $table->{'section'}){
					$text .= "== " . $table->{'section'} . " ==\n";
				}
				$text .= $wikitable;
			}
		}else{
			# find place to insert table and replace table
			for my $table(@$tables){
				my $wikitable = $self->table2wikitext($table, $full_auto);
				if(defined $table->{'section'}){
					my $section = $table->{'section'};
					my $re_section_heading = 
						qr/^=(?<equals>=+)\x20?\Q$section\E\x20?\g{equals}=\n/msx;
					if($text =~ /$re_section_heading/g){
						my $table_begin = pos($text); # begin of table to replace
						my $tmp = substr($text, $table_begin);
						my $table_length;
						# find end of table
						if($tmp =~ /(?=\n(?:=|\[\[[cC]ategory:))/g){
							$table_length = pos($tmp); # length of table to replace
						}else{ # table goes till end
							$table_length = length($tmp);
						}
						$text = substr($text, 0, $table_begin) . $wikitable .
							substr($tmp, $table_length);
					}else{
						$self->msg(0, 
							'could not create/modify section. please ensure that wiki page ' . 
								"[[$page]] either is empty or the section '$section' exists!",
							'error'
						);
						die;
					}
				}else{
					if($text =~/(?:^<!--.*?-->\n)?^\{\|.*\|\}/ms){
						$text =~ s/(?:^<!--.*?-->\n)?^\{\|.*\|\}(\n|)\n*/$wikitable$1/ms;
					}else{
						$self->msg(0, 
							'could not create/modify section. please ensure that wiki page ' . 
								"[[$page]] either is empty or the appropriate section exists!", 
							'error'
						);
						die;
					}
				}
			}
		}
		$self->msg(2, $text) if $self->{'simulation'};
		$self->time_management();
		$self->save_wiki_page($page, $summary, \$text, \$text_bak);
	}

	sub save_wiki_page{
		my $self = shift;
		my $page_utf8 = shift;
		my $summary = shift;
		my $text = shift;
		my $orig_text = shift;
		my $minor = shift // $self->{'minor'};
		my $success = 1; # no change or just simulation
		if($$text ne $$orig_text){
			my $user_answer_diff;
			if(defined $self->{'showdiff'} and $self->{'showdiff'} > 0){
				$user_answer_diff = $self->show_diff($text, $orig_text);
			}
			if(not defined $self->{'simulation'} or !$self->{'simulation'}){
				my $user_input;
				$user_input = $user_answer_diff ? 'yes' : 'no' if defined $user_answer_diff;
				if(not defined $user_input and defined $self->{'ask_user'} and $self->{'ask_user'}){
					print "execute? ('y' = yes, else = no) ";
					chomp($user_input = <STDIN>);
				}
				if(not defined $user_input or $user_input=~/y(?:es)?/ or 
					($user_input eq '' and $self->{'user_answer'} eq 'y')){
					$self->{'user_answer'} = 'y';
					my $page = $page_utf8;
					utf8::encode($page);
					for(my $num_tries = 1; $num_tries < 4 and $success <= 1; ++$num_tries){
						$self->msg(1, "saving page [[$page]] ... ");
						push @{$self->{'time_stack'}}, time;
						my $res = $self->{'mw_bot'}->edit({
								is_minor => $minor,
								page     => $page_utf8,
								text     => $$text,
								bot      => 1,
								summary  => "Bot: $summary",
						});
						if($res){
							$success = 2; # real success
						}else{
							$success = -1; # failed
							$self->_handle_api_error();
							if($self->{'mw_api'}->{'error'}->{'code'} =~ /\b3\b/ and 
								$self->{'mw_api'}->{'error'}->{'details'} =~ /assertbotfailed|Assertion .* failed/){
								# restart bot
								$self->login();
							}
							$self->msg(0, "saving page failed (trying again: num_tries = $num_tries) ", 'error');
							sleep(5 * $num_tries);
						}
					}
				}else{
					$success = 0; # user said: no
					$self->{'user_answer'} = '';
				}
			}else{
				my $page = $page_utf8;
				utf8::encode($page);
				$self->msg(1, "simulation of saving of page [[$page]] with summary '$summary'");
			}
		}
		return $success;
	}

	sub search_pages{
		my $self    = shift;
		my $search  = shift;
		my $results = [$self->{'mw_bot'}->search($search)];
		return $results;
	}

	sub show_diff{
		my $self      = shift;
		my $text      = shift;
		my $orig_text = shift;
		return undef if $_running_on_labs;
		my $diff      = Text::Diff::diff($text, $orig_text);
		my $result;
		print $diff if $self->{'showdiff'} & 1;
		if($self->{'showdiff'} & 2){
			my @text_lines = grep {s/^\s*(.*?)\s*$/$1/} split /\n/, $$text;
			my @orig_text_lines = grep {s/^\s*(.*?)\s*$/$1/} split /\n/, $$orig_text;
			my @lines_to_show = ();
			while($diff =~ /@@ -(\d+),(\d+) \+\d+,\d+ @@/g){
				push @lines_to_show, [$1-1, $1+$2-2];
			}
			#print Dumper(\@lines_to_show);
			#print Dumper(\@text_lines);
			my $shortend_text = '';
			my $shortend_orig_text = '';
			for(@lines_to_show){
				for($$_[0]..$$_[1]){
					$shortend_text.=$text_lines[$_]."\n";
					$shortend_orig_text.=$orig_text_lines[$_]."\n";
				}
			}
			chomp($shortend_text);
			chomp($shortend_orig_text);
			my $mw = Tk::MainWindow->new;
			my $w = $mw->DiffText(
				-width=>1500, -height=>500, -orient => 'horizontal', -map => 'scrolled'
			)->pack();
			$w->load('a' => $shortend_text);
			$w->load('b' => $shortend_orig_text);
			$w->compare(-granularity => 'word');
			$result = '0';
			$mw->bind('<KeyPress-y>' => sub {$result = '1'; $mw->destroy;} );
			$mw->bind('<KeyPress-q>' => sub {exit 0;} );
			$mw->bind('<KeyPress>' => sub {$result = '0'; $mw->destroy;} );
			$mw->MainLoop;
		}
		return $result;
	}

	sub search_sbl_blocked_log{
		my $self   = shift;
		my $regexp = shift;
		my $proj   = shift // $self->{'wm_proj'};
		my @results = ();
		my $db_table = $self->db_fetch_sbl_log($proj, $regexp);
		if(defined $db_table){
			for my $dataset(@$db_table){
				push @results, {
					'ns' => $dataset->[1],
					'title' => $dataset->[2],
					'url' => $dataset->[4],
					'user' => $dataset->[5],
					'timestamp' => $dataset->[0],
					'comment' => $dataset->[3],
				};
				$results[-1]->{'url'} =~ s/^.*?"(http[^"]*)".*$/$1/;
			}
			$self->msg(2, 'found '.scalar(@results).' matching blocked edits.');
		}elsif(not defined $proj or $proj eq $self->{'wm_proj'}){ # use api
			# api.php?action=query&list=logevents&letype=spamblacklist&leprop=ids|title|type|user|timestamp|comment|details&continue=
			my $query = {
				'action'         => 'query',
				'list'           => 'logevents', 
				'letype'         => 'spamblacklist',
				'lelimit'        => '500',
				'leprop'         => 'title|type|user|timestamp|comment|details', #ids
			};
			my $mw_options = {'max' => 1};
			my $finished = 0;
			my $num_results = 0;
			my $log_counter = 0;
			while(!$finished){
				(my $pages, $finished) = $self->api_cont($query, $mw_options);
				my $curr_counter = scalar(@{$pages->{'query'}->{'logevents'}});
				$log_counter += $curr_counter;
				push @results, grep {
						die "undefined url?" unless defined $_->{'params'}->{'url'};
						$_->{'url'} = $_->{'params'}->{'url'};
						delete $_->{'params'};
						$_->{'url'} =~ /$regexp/
				} @{$pages->{'query'}->{'logevents'}};
				if($self->{'verbosity'} >= 3 and $curr_counter > 0){
					my $min_ts = $pages->{'query'}->{'logevents'}->[0]->{'timestamp'};
					my $max_ts = $min_ts;
					map {
						my $ts = $_->{'timestamp'};
						$min_ts = $ts if $ts lt $min_ts;
						$max_ts = $ts if $ts gt $max_ts;
					} @{$pages->{'query'}->{'logevents'}};
					$self->msg(3, 'found ' . (scalar(@results) - $num_results) . 
						" blocked edits ($min_ts till $max_ts).");
				}
				$num_results = scalar(@results);
			}
			map {
				delete $_->{'type'};
				delete $_->{'anon'};
				delete $_->{'action'};
			} @results;
			$self->msg(1, "log contains $log_counter entries. found $num_results matching blocked edits.");
		}else{
			$self->msg(0, "not defined for proj = '$proj'", 'error');
		}
		return \@results;
	}

	sub search_sbl_attempts{
		my $self   = shift;
		my $params = shift;
		$params->{'proj'} = $self->{'wm_proj'} unless defined $params->{'proj'};
		$params->{'sbl'}  = $self->{'wm_proj'} unless defined $params->{'sbl'};
		my @large_projects = qw(ca ceb de en es fr it ja nl no pl pt ru sv uk vi war zh);
		map {$_ .= 'wiki'} @large_projects;
		# search for given regexp
		if(exists $params->{'regexp'}){
			my %results = ();
			if($params->{'proj'} eq 'large'){
				# search large wikipedias for a given regexp; return all single spam 
				# attempts
				for my $proj(@large_projects){
					$results{$proj} = $self->search_sbl_blocked_log(
						qr/$params->{'regexp'}/, $proj);
				}
			}elsif($params->{'proj'} eq $self->{'wm_proj'}){
				# search current project for given regexp
				$results{$params->{'proj'}} = $self->search_sbl_blocked_log(
						qr/$params->{'regexp'}/);
			}elsif(grep {$_ eq $params->{'proj'}} @large_projects){
				# search current project for given regexp
				$results{$params->{'proj'}} = $self->search_sbl_blocked_log(
						qr/$params->{'regexp'}/, $params->{'proj'});
			}else{
				$self->msg(0, 'not implemented yet', 'error');
				#$results{$params->{'proj'}} = $self->search_sbl_blocked_log(
				#		qr/$params->{'regexp'}/, $params->{'proj'});
			}
			# print results
			while(my ($proj, $blocked_edits) = each %results){
				$self->msg(1, "$proj: ".scalar(@$blocked_edits));
				for my $edit(@$blocked_edits){
					print "$edit->{'timestamp'}, $edit->{'url'}, $edit->{'user'}; " . 
						$edit->{'title'} . "\n";
				}
			}
		}else{ # if no explicite regexp is set, then use all sbl entries
			# search large wikipedias for all entries of meta sbl
			my $table_header = '{| class="wikitable sortable" ' .  # }
				'style="table-layout:fixed; width:100%;"' . "\n" . 
				'! style="width:300px;" | regexp || ';
			my $table_row_begin = "|-\n| style=\"overflow:hidden;\" |";
			if($params->{'sbl'} =~ /^meta(?:wiki)?\z/){
				my $sbl_meta = $self->get_sbl_entries('meta');
				$self->msg(1, 'found ' . scalar(@$sbl_meta) . ' entries in sbl.');
				my $projects;
				if($params->{'proj'} eq 'large'){
					$projects = \@large_projects;
				}elsif($params->{'proj'} eq 'all'){
					my ($http_status, $all_projects) = $self->get_http_content(
						'https://noc.wikimedia.org/conf/all.dblist');
					$projects = [grep {$_ !~ /^(?:(?:arbcom_.*|auditcom|board|boardgovcom|
						chair|chap|chapcom|checkuser|collab|exec|fdc|grants|iegcom|internal|
						labs|legalteam|movementroles|office|ombudsmen|otrs_wiki|searchcom|
						spcom|steward|transitionteam|wg_en|wikimania.*|zero)wiki|
						(?:il|noboard_chapters)wikimedia)$/x} split /\n/, $all_projects];     
				}else{
					$self->msg(0, 'not implemented yet', 'error');
				}
				if(defined $projects){
					print $table_header . (join ' || ', sort @$projects) . " || sum\n";
					for my $regexp(@$sbl_meta){
						my %results = ();
						my $sum = 0;
						for my $proj(@$projects){
							$results{$proj} = $self->search_sbl_blocked_log($regexp, $proj);
							$sum += @{$results{$proj}};
						}
						print "$table_row_begin $regexp || " . 
							(join ' || ', (map {scalar(@{$results{$_}})} sort keys %results));
						print ' || ' . $sum . "\n";
					}
					print "|}\n";
				}
			}elsif($params->{'sbl'} eq $self->{'wm_proj'} 
					and $params->{'proj'} eq $self->{'wm_proj'}){
				# search current mediawiki for all entries of its sbl
				my $proj = $self->{'wm_proj'};
				my $sbl  = $self->get_sbl_entries();
				$self->msg(1, 'found ' . scalar(@$sbl) . ' entries in sbl.');
				print $table_header . "$proj\n";
				for my $regexp(@$sbl){
					my $results = $self->search_sbl_blocked_log($regexp, $proj);
					print "$table_row_begin $regexp || " . scalar(@$results) . "\n";
				}
				print "|}\n";
			}else{
				$self->msg(0, 'not implemented yet', 'error');
			}
		}
	}

	sub table2wikitext{
		my $self      = shift;
		my $table     = shift;
		my $full_auto = shift // 1; # full auto message
		my $only_one_cell_per_line = shift // 1;
		my $col_widths = shift; # til now: ignored if $only_one_cell_per_line
		my $style = $table->{'style'} // {};
		$style->{'table'} = '' unless defined $style->{'table'};
		my @col_widths_cumsum;
		if(defined $col_widths){
			$col_widths_cumsum[0] = $col_widths->[0];
			for(my $i = 1; $i < @$col_widths; ++$i){
				$col_widths_cumsum[$i] = $col_widths_cumsum[$i-1] + $col_widths->[$i];
			}
		}
		my $wikitable = '';
		if($full_auto){
			$wikitable .= "<!-- this table is generated automatically. " . 
				"any manual modifications will be deleted on next update. -->\n";
		}
		$wikitable .= "{| " . $style->{'table'} . "\n";
		# anonymous function for re-use with header and body
		my $gen_row = sub {
			my $sep               = shift;
			my $only_one_cell_per_line = shift;
			my $col_widths        = shift;
			my $table_row         = shift;
			my $col_widths_cumsum = shift;
			my $wikitable = '';
			if($only_one_cell_per_line){
				$wikitable .= "$sep $_\n" for @$table_row;
			}else{
				if($col_widths){
					$wikitable .= "$sep ";
					my $wikirow = '';
					for(my $col = 0; $col < @$table_row; ++$col){
						my $cell = $table_row->[$col];
						$wikirow .= " $sep$sep " if $col > 0; 
						$wikirow .= $cell;
						my $num_spaces = $col_widths_cumsum->[$col] 
							- 2        # spaces at begin and end
							- length($wikirow) 
							+ 2 * $col;  # '!!' or '||' respectively
						$wikirow .= ' ' x $num_spaces if $num_spaces > 0;
					}
					$wikitable .= $wikirow . "\n";
				}else{
					$wikitable .= "$sep " . join(" $sep$sep ", @$table_row) . "\n";
				}
			}
			return $wikitable;
		};
		if(defined $table->{'header'} and @{$table->{'header'}} > 0){
			my $table_row = $table->{'header'};
			$wikitable .= '|- ' . $style->{'header'} . "\n" if defined $style->{'header'};
			$wikitable .= $gen_row->('!', $only_one_cell_per_line, $col_widths, 
				$table_row, \@col_widths_cumsum);
		}
		for my $table_row(@{$table->{'body'}}){
			$wikitable .= "|-\n";
			$wikitable .= $gen_row->('|', $only_one_cell_per_line, $col_widths, 
				$table_row, \@col_widths_cumsum);
		}
		$wikitable .= "|}\n";
		return $wikitable;
	}

	sub table_body2array{
		my $self = shift;
		my $table_body = shift;
		# truncate table body
		$$table_body =~ s/\n\z//;
		$$table_body =~ s/^\s*\|-\s*|\s*\|-\s*$//g;
		return [
			map {
				s/^\s*\| ?//;
				s/\s+$//;
				if(index($_,'||')>-1){
					[split / *+\|\| *+/, $_];
				}else{
					[split /\s*\n\| *+/m, $_];
				}
			} split /\s*\n\|- *+\n/, $$table_body
		];
	}

	sub test_and_replace{
		my $self     = shift;
		my $text     = shift;
		my $regexp_s = shift;
		my $regexp_r = shift;
		my $use_eval_replacement = shift;
		my $strpos = 0;
		my $array_of_changes = [];
		$self->msg(3, "   ".$regexp_s);
		my $numChanges = $$text =~ s/$regexp_s/
			my $match = ${^MATCH};
			my $prematch = ${^PREMATCH};
			my $replaced = $use_eval_replacement ? eval($regexp_r) : $regexp_r;
			# cope with links
			if(!$self->check_external_link(\$prematch, \$replaced)){
				$replaced = $match;
			}
			push(@$array_of_changes, [$match, $replaced]);
			$replaced;
			/gpme;
		$numChanges = 0 if $numChanges eq '';
		if($self->{'verbosity'} >= 1){
			for my $repl(@$array_of_changes){
				$self->msg(1, "   ".$repl->[0]);
				$self->msg(1, " ->".$repl->[1]);
			}
		}
		#$test=~s/$regexp_s/$regexp_r/gmee; # just for debugging
		#$self->msg(1, "wrong replacing with '$regexp_r'?", 'error') if $$text ne $test;
		return $numChanges;
	}

	sub test_and_replace_with_funcref{
		my $self        = shift;
		my $replacement = shift; # function ref
		my $pre_url     = shift;
		my $old_url     = shift;
		my $post_url    = shift;
		my $correction  = '';
		# cope with templates like {{something|url=http://example.org|...}} or {{something|url=http://example.org}} 
		if($pre_url =~/url\s*=\s*$/ and $old_url =~/^[^{|}]+([{|}].*)/){
			$correction = $1;
			$post_url = $1.$post_url;
			$old_url = substr($old_url, 0, -length($1));
		}
		# call function reference
		my $new_url = $replacement->($old_url, $post_url);
		# print changes to stdout
		if($self->{'verbosity'} >= 1){
			print "   ".$old_url."\n"; # print old text
			print " ->".$new_url."\n"; # print replaced text
		}
		# check http status; avoid change, if target not reachable
		if($new_url=~/^https?:\/\//){
			my $response_code = $self->get_http_status($new_url);
			$self->msg(1, '  status: '.$response_code);
			$new_url = $old_url if $response_code != 200;
		}
		return $new_url.$correction;
	}

	sub text_replacement{
		my $self       = shift;
		my $pages      = shift; # pages to search
		my $section    = shift; # section to use
		my $patterns   = shift; # patterns to search s/X//
		my $repls      = shift; # replacing texts s//X/(e)
		my $params     = shift;
		my $use_eval_in_repl = shift;
		my $clean_up   = shift // 0;
		my $max_edits  = $params->{'max_edits'};
		my $skip_edits = $params->{'skip_edits'};
		$self->msg(1, "loop over pages and replace text");
		my $edit_counter = -1;
		my $success = 0;
		if(@$patterns == 0 + @$repls){
			$success = 1;
			for my $page(@$pages){
				if(++$edit_counter < $skip_edits){
					next;
				}elsif($edit_counter >= $max_edits and $max_edits != -1){
					last;
				}
				$self->msg(1, "$edit_counter: page = '$page'");
				my $text = $self->{'mw_bot'}->get_text($page);
				unless(defined $text){
					$self->msg(0, "page content not defined", 'error');
					$success = 0;
					next;
				}
				if(not $self->is_allowed(\$text, $page)){
					$self->msg(2, "  not allowed");
					$success = 0;
					next;
				}
				my $text_bak = $text;
				my ($changes, $summary_clean_up) = (undef, '');
				if($clean_up){
					($changes, $summary_clean_up) = $self->cleanup_wiki_page(\$text, $page);
				}
				my $text_to_use;
				my @split_text;
				if(defined $section){
					my $i = 0;
					@split_text = grep {++$i % 3} 
						split /^((=+)(?!=).+(?<!=)\2(?:\n|\z))/m, $text;
					# 0, 2, 4, ... = text
					# 1, 3, 5, ... = headings
					my $section_num = -1;
					for($i = 1; $i < @split_text; $i+=2){
						if($split_text[$i] =~ /^(=+)\s*$section\s*\1$/){
							$section_num = $i;
							last;
						}
					}
					if($section_num == -1){
						$self->msg(0, 'could not find section in page', 'error');
						$success = 0;
						next;
					}else{
						$self->msg(1, "found find section '$section' in page '$page'");
					}
					#my $text2 = join '', @split_text;
					#map {$_ = substr($_, 0, 100)} @split_text;
					#print Dumper \@split_text;
					$text_to_use = \$split_text[$section_num + 1];
				}else{
					$text_to_use = \$text;
				}
				my $found = 0;
				for(my $i = 0; $i < @$patterns; ++$i){
					if($self->test_and_replace($text_to_use, $patterns->[$i], $repls->[$i], 
							$use_eval_in_repl) > 0){
						++$found;
					}
				}
				if(defined $section){
					$text = join '', @split_text;
				}
				$self->msg(2, "  $found replacements") if @$patterns > 0;
				$self->time_management();
				my $summary = $params->{'summary'};
				if(defined $summary && $summary ne ''){
					$summary .= '; ' . $summary_clean_up;
				}else{
					$summary = $summary_clean_up;
				}
				$success = $success && $self->save_wiki_page(
					$page, $summary, \$text, \$text_bak);
			}
		}else{
			$self->msg(0, 'number of patterns and number of replacements differ', 
				'error');
		}
		return $success;
	}

	sub time_management{
		my $self = shift;
		while($self->{'max_edits_per_min'} != -1 and 
			@{$self->{'time_stack'}} >= $self->{'max_edits_per_min'}){
			my $time_diff = time - $self->{'time_stack'}->[0];
			if($time_diff < 60){
				$self->msg(1, '  ...waiting '.(60-$time_diff)." seconds");
				sleep(60 - $time_diff);
			}
			shift @{$self->{'time_stack'}};
		}
		return 1;
	}

	sub title2filename{
		my $self = shift;
		my $title = shift;
		$title =~s/ß/ss/g;
		$title =~s/ä/ae/g;
		$title =~s/Ä/Ae/g;
		$title =~s/ö/oe/g;
		$title =~s/Ö/Oe/g;
		$title =~s/ü/ue/g;
		$title =~s/Ü/Ue/g;
		$title =~s/[^a-zA-Z0-9_.~!-]/_/g;
		return $title;
	}

	sub title2url{
		my $self  = shift;
		my $title = shift;
		utf8::encode($title);
		$title =~ s/ /_/g;
		my $url = $self->{'mw_apiurl'};
		$url =~ s/api\.php$/index.php?title=/;
		$url .= $self->uri_escaper($title);
		return $url;
	}

	sub title2url_part{
		my $self = shift;
		my $title = shift;
		$title =~ s/ /_/g;
		my $url_part = $self->uri_escaper($title);
		return $url_part;
	}

	sub update_edit_filter_index{
		my $self = shift;
		# first get recent changes
		my $max_num_talk_pages = 40;
		my $max_num_days_age = 5;
		my $edit_filter_name = 'Bearbeitungsfilter';
		my $lt_page    = 'Wikipedia:' . $edit_filter_name . '/latest topics';
		# api.php?action=query&generator=allpages&gapnamespace=4&gapprefix=Bearbeitungsfilter/&gapfilterredir=nonredirects&gaplimit=500&prop=info|revisions&rvprop=content|timestamp
		my $pages = [
			sort {$b->{'revisions'}->[0]->{'timestamp'} cmp $a->{'revisions'}->[0]->{'timestamp'}}
			@{$self->get_pages_by_prefix({
				'namespace_id' => 4,
				'prefix'       => $edit_filter_name.'/',
				'title_re'     => $self->{'re_editfilter'},
			})}
		];
		# last change of latest_topics
		my $lt_last_change = 
			$self->get_page_info($lt_page)->{$lt_page}->{'touched'};
		my $tables = [{
			'style'  => {'table' => 'class="wikitable sortable"'},
			'header' => ['Regel', "letzte Seiten\xe4nderung", 'Thread', 'letzter Autor'],
			'body'   => [],
		}];
		my %new_topics;
		my $i = 0;
		for my $page(@$pages){
			#$self->msg(2, " $page->{'revisions'}->[0]->{'timestamp'}: $page->{title}");
			my $npost = $self->newest_post_info($page->{'revisions'}->[0]->{'*'});
			next unless defined $npost->{'thread'};
			# date of last change of rule
			my $date_of_last_change = 
				'{{#timel:Y-m-d H:i:s|'.$page->{'revisions'}->[0]->{'timestamp'}.'}}';
			$tables->[0]{'body'}[$i][1] = $date_of_last_change;
			# page title
			if($page->{'title'}=~/\D(\d+)$/){
				$tables->[0]{'body'}[$i][0] = "[[$page->{'title'}|#$1]]";
				# get description for rule
			}elsif($page->{'title'}=~/\/([^\/]+)$/){
				$tables->[0]{'body'}[$i][0] = "[[$page->{'title'}|$1]]";
			}
			my $page_short_title = $1 // 'unknown';
			# thread title
			my $thread_anchor = "{{subst:anchorencode:$npost->{'thread'}}}";
			my $thread_title = $npost->{'thread'};
			# remove links from title
			$thread_title =~s/\[\[[^|\]]*\|([^|\]]*)\]\]/$1/g;
			$thread_title =~s/\[\[|\]\]//g;
			# add thread title to table
			$tables->[0]{'body'}[$i][2] = 
				"[[$page->{'title'}#$thread_anchor|$thread_title]]";
			# remember newest topics
			if(not defined $lt_last_change 
					or $lt_last_change lt $page->{'revisions'}->[0]->{'timestamp'}){
				$new_topics{$page_short_title} = $thread_title;
			}
			# author
			$tables->[0]{'body'}[$i][3] = $npost->{'author'} eq '' ? '' : 
				"[[special:contributions/$npost->{'author'}|$npost->{'author'}]]";
			last if $page->{'revisions'}->[0]->{'timestamp'} lt 
				$self->get_date_iso(time-60*60*24*$max_num_days_age) 
				and $i >= $max_num_talk_pages;
			++$i;
		}
		#print table2wikitext($tables->[0], $full_auto);
		if(keys %new_topics > 0){
			my $filter_descriptions = $self->get_abuse_filter_info();
			# build summary
			my $summary_thread_part = '';
			my $summary = '(' . (
				join ', ', 
				map {
					$summary_thread_part .= 
						($_ =~ /^[0-9]+$/ and defined $filter_descriptions->[$_]) ? 
						$filter_descriptions->[$_] : $new_topics{$_};
					$summary_thread_part .= '; ';
					/^[0-9]+$/ ? "#$_" : $_;
				} # add '#' on rule numbers
				sort {
					# TODO: there's some bug inhere. strings are recognized as numbers?
					my $result;
					if($a =~ /^[0-9]+$/ and $b =~ /^[0-9]+$/){ 
						# numerical sorting on rule numbers
						# print "$a X $b\n" if $a !~ /^[0-9]+$/ or $b !~ /^[0-9]+$/;
						$result = ($a <=> $b);
					}elsif($a !~ /^[0-9]+$/ and $b !~ /^[0-9]+$/){ 
						# alphabetical sorting on other pages
						$result = ($a cmp $b);
					}else{ 
						# when sorting rule number against other page, use counter-sort
						$result = ($b cmp $a);
					}
					$result;
				} 
				keys %new_topics
			) . '): ';
			$summary .= substr($summary_thread_part, 0, -2); # delete last '; '
			# now update page
			$self->rebuild_table($lt_page, $tables, $summary);
		}
		return 1;
	}

	sub update_maintenance_lists_build_table{
		my $self              = shift;
		my $maintenance_lists = shift; # maintenance lists
		my $mt_type           = shift;
		my $pages             = shift;
		my $additional_col    = shift // []; # ['column1', 'column2', ...]
		my $mt_type_page = $self->{'maintenance'}{'page_name'}{$mt_type} // $mt_type;
		# get maintenance list
		my $ml = "User:CamelBot/maintenance list/$mt_type_page";
		my $ml_text = $self->{'mw_bot'}->get_text($ml) // '';
		my $ml_text_bak = $ml_text;
		# rebuild base of maintenance list
		my $intro = $self->{'maintenance'}{'intros'}{$mt_type} . "== liste ==\n";
		my $outro = "\n\n[[Kategorie:Wikipedia:Wartungskategorie|$mt_type_page]]";
		my $wikitable;
		# extract (save) old intro
		if($ml_text =~ /^(.*\n== [lL]iste? ==\n+.*?)(\{\|.*?\|\})\n(.*)$/s){
			$intro = $1;
			$wikitable = $2;
			$outro = $3;
		}else{ # or create a new one
			$self->msg(1, 'could not fetch table. resetting page.', 'notice');
			$wikitable = '{| class="wikitable sortable"' . "\n" .
				"! page !! " . 
				join(' !! ', (@$additional_col, "date of detection by CamelBot")) . 
				"\n" . '|}';
		}
		# get old table content
		my $table = $self->wikitable2array(\$wikitable);
		# number of additional (ignored/unchanged) columns
		my $num_add_cols = @{$table->{'header'}} - 2 - scalar(@$additional_col);
		$self->msg(1, "number of additional cols = $num_add_cols");
		# optional: rebuild table: check all old entries; remove entries if possible;
		# search new entry, if not in array, then push
		my $summary = 'update maintenance list';
		# 1. @new_entries = grep{not in list already} @$pages
		my @new_entries = grep {
			my $page = $_;
			0 < grep {
				my $row = $_; # table row
				my $is_in_list = # check first/main column
					(defined $page->{'full_page_title'} 
						&& $row->[0] =~ /^\Q[[$page->{'full_page_title'}]]\E/) ? 1 : 0;
				if($is_in_list){ # check other columns
					for(my $i = 0; $i < @$additional_col; ++$i){
						unless(defined $page->{$additional_col->[$i]} && 
							defined $row->[$i + 1] &&
							$page->{$additional_col->[$i]} eq $row->[$i + 1]
						){
							$is_in_list = -1;
							last;
						}
					}
				}
				if($is_in_list >= 1){
					$self->msg(1, "page '$page->{'full_page_title'}' already in list");
				}
				$is_in_list <= 0;
			} @{$table->{'body'}}
		} @$pages;
		# 2. check old entries for deletion
		if($self->{'maintenance'}{'check_entries'}{$mt_type}){
			$table->{'body'} = [
				grep {
					my $keep_in_list = 0;
					if($_->[0] =~ /^\[\[(.*)\]\]/){ # (?:\s\[[^\s]+\sedit\])?$
						my $page_from_list = $1;
						$self->msg(4, "check '$page_from_list'");
						# may be deleted meanwhile
						my $page_content = $self->{'mw_bot'}->get_text($page_from_list) // '';
						$keep_in_list = 
							$page_content =~ /$self->{'maintenance'}{'text_re'}{$mt_type}/;
						unless($keep_in_list){
							$self->msg(4, "regexp did not match");
						}
					}
					$keep_in_list; # return value for grep
				} @{$table->{'body'}}
			];
		}
		# 3. push @$table, @new_entries
		my @added_pages;
		for my $page(@new_entries){
			$self->msg(1, "add new list entry '$page->{'full_page_title'}'");
			# if table has more than 2 columns, then pad each row
			my @empty_cells = split //, (' ' x $num_add_cols);
			my @additional_cells = (); # content of ['column1', 'column2', ...]
			push @added_pages, '';
			$added_pages[-1] .= "[[$page->{'full_page_title'}]]";
			map {
				my $add_cell = $page->{$_} // '';
				if($add_cell ne ''){
					$added_pages[-1] .= ', ' . $_ . ': ' . $add_cell;
				}
				push @additional_cells, $add_cell;
			} @$additional_col;
			push @{$table->{'body'}}, [
				"[[$page->{'full_page_title'}]]", 
				@additional_cells,
				$self->get_date_iso(), 
				@empty_cells
			];
		}
		# create/update summary
		if(@added_pages > 0){
			$summary = 'added: ' . join('; ', @added_pages);
		}
		$summary .= '; num items = ' . scalar(@{$table->{'body'}});
		# reset and refill maintenance list
		if(defined $maintenance_lists){
			$maintenance_lists->{$mt_type} = {}; 
			for my $entry(@{$table->{'body'}}){
				if($entry->[0] =~ /^\[\[(.+?)\]\]/){
					$maintenance_lists->{$mt_type}->{$1} = 1;
				}
			}
		}
		my $full_auto = 1;
		# convert array to wikitable
		$wikitable = $self->table2wikitext($table, !$full_auto);
		# rebuild page, i.e., put pieces together and save
		$ml_text = $intro . $wikitable . $outro;
		$self->time_management();
		$self->save_wiki_page($ml, $summary, \$ml_text, \$ml_text_bak);
	}

	sub update_maintenance_lists{
		my $self          = shift;
		# page title or undef; undef means: force update without checking conditions
		# no page, no text => check pages in current list(s), update list
		# no page,    text => same as above
		#    page,    text => check pages in current list(s) if text matches condition
		#    page, no text => same as above, but at the beginning: 
		#                     get text from page or leave function
		my $page          = shift; 
		my $text          = shift; # ref to string (or undef)
		my $type          = shift; # type of maintenance list, "all_types" for all types
		my $maintenance_lists = shift; # maintenance lists
		if(defined $page and not defined $text){
			my $text_ = $self->{'mw_bot'}->get_text($page);
			$text = \$text_ if defined $text_;
		}
		if(defined $text or not defined $page){
			for my $mt_type(keys %{$self->{'maintenance'}{'text_re'}}){
				if(($type eq $mt_type or $type eq 'all_types') and 
					(not defined $page 
							or $$text =~ /$self->{'maintenance'}{'text_re'}{$mt_type}/)
				){
					# TODO: how to cope with cat dead? right now using another function 
					# db_cat_of_dead_monitoring
					next if $mt_type eq 'cat dead';# and defined $$text 
					$self->msg(1, "check '$mt_type'");
					# skip exception pages
					if(defined $page 
						and defined $self->{'maintenance'}{'exceptions'}{$mt_type} 
						and $page =~ /$self->{'maintenance'}{'exceptions'}{$mt_type}/){
						next;
					}
					#and $$text =~/(?:[cC]ategory|[kK]ategorie)\s*:\s*[nN]ekrolog/;
					my $add_column = undef;
					my $pages = [{
						'full_page_title' => $page,
					}];
					$self->update_maintenance_lists_build_table( 
						$maintenance_lists, $mt_type, $pages, $add_column);
				}
			}
			return 1;
		}else{
			if(defined $page){
				$self->msg(1, "could not fetch page '$page'. maybe deleted already", 
					'notice');
			}else{
				$self->msg(1, 'no page and no text defined. this seems senseless.', 
					'notice');
			}
			return 0;
		}
	}

	sub upload_file{
		my $self = shift;
		my $filename_src = shift;
		my $summary = shift;
		my $filename_dest = shift // $filename_src;
		my $data = $self->read_file_binary($filename_src);
		# check, whether file exists already. If so, compare. don't upload, if same.
		$self->msg(2, " downloading file if existant");
		my $oldfile = $self->{'mw_api'}->download({'title' => 'File:'.$filename_dest});
		if(not defined $oldfile or $self->{'mw_api'}->{'error'}->{'code'}!=0){
			$self->_handle_api_error();
			return 0;
		}
		$self->msg(3, " downloaded file (if existant)");
		if($oldfile eq '' or $oldfile ne $$data){
			$self->msg(3, " trying to upload new file");
			if(!$self->{'simulation'}){
				# upload
				#$mw->edit({
				# 'action'   => 'upload',
				# 'filename' => $filename_src,
				# 'comment'  => $summary,
				# 'file'     => [undef, $filename_dest, 'Content'=>$$data],
				# })
				$self->{'mw_api'}->upload({
						'data'    => $$data,
						'summary' => $summary,
						'title'   => $filename_dest,
					}) or ($self->_handle_api_error() and die);
			}
			return 1;
		}else{
			$self->msg(1, "won't force uploading. file is already online.");
			return 0;
		}
	}

	sub uri_escaper{
		my $self    = shift;
		my $url     = shift;
		my $escaped = $url;
		if($_loaded_mod{'URI::Escape'}){
			$escaped = uri_escape($url);
		}else{
			$self->msg(0, 'function uri_escape not available. ' . 
				'please check installation of perl module URI::Escape', 'error');
		}
		return $escaped;
	}

	sub uri_unescape{
		my $self    = shift;
		my $url     = shift;
		my $unescaped = $url;
		if($_loaded_mod{'URI::Escape'}){
			$unescaped = uri_unescape($url);
		}else{
			$self->msg(0, 'function uri_unescape not available. ' . 
				'please check installation of perl module URI::Escape', 'error');
		}
		return $unescaped;
	}

	sub url2title{
		my $self   = shift;
		my $url    = shift;
		$url =~ /^(?:https?:\/\/$self->{'wm_lang'}\.$self->{'wm_proj_type'}\.org\/wiki\/)?(.*)/;
		my $article_name = $self->uri_unescape($1);
		$article_name =~s/_/ /g;
		# 2014-12-23, seth: "https://de.wikipedia.org/wiki/K%C3%B6nigliche_und_barmherzige_Vereinigung_der_Ordens-_und_Medaillentr%C3%A4ger_von_Belgien" became: "KÃ¶nigliche und barmherzige Vereinigung der Ordens- und MedaillentrÃ¤ger von Belgien"
		utf8::decode($article_name);
		$article_name =~ s/^(?=Datei:|Bild:|Image:|File:|Category:|Kategorie:)/:/;
		return $article_name;
	}

	sub wikitableHeader2array{
		my $self = shift;
		my $header = shift;
		$header =~s/^\s*!\s*//s;
		$header =~s/\s+$//s;
		if(index($header, '!!') > -1){
			$header = [split /\s*!!\s*/, $header];
		}else{
			$header = [split /\s*!\s*/, $header];
		}
		return $header;
	}

	sub wikitable2array{
		my $self = shift;
		my $wikitable = shift;
		$$wikitable =~/^\{\|   # begin of table
			\x20*((?-s:.*))\n+   # class, id, ...
			((?:!\s*[^\n]*\n+)*) # table header
			(.*)                 # table body
			(?<=\n)\|\}\n?$      # end of table
		/sx;
		my $table = {
			'style' => {'table' => $1},
			'header' => $self->wikitableHeader2array($2),
			'body' => $3,
		};
		# decomposing wiki-table to array
		$table->{'body'} = $self->table_body2array(\$table->{'body'});
		return $table;
	}

	sub write_csv{
		my $self = shift;
		my $array = shift;
		my $filename = shift;
		my $separator = shift // ';';
		open(my $OUTFILE, '>:encoding(UTF-8)', $filename) or die "$!\n";
			for my $row(@$array){
				map {
					s/([\\'])/\\$1/g; # escape \ and '
					$_ = "'$_'";  # entry -> 'entry'
				} @$row;
				print $OUTFILE join $separator, @$row;
				print $OUTFILE "\n";
			}
		close($OUTFILE);
		return 1;
	}
}

{
	package CamelBotRC;
	use POSIX qw/strftime/;     # format timestamp
	use Data::Dumper;           # for debugging purposes
	use File::Slurp qw(write_file); # read/write files
	use Time::Local;            # timegm
	$| = 1; # deactivate buffering, so flush all the time

	sub new{
		my $class    = shift;
		my $camelbot = shift;
		my $namespace_names = {$camelbot->{'mw_bot'}->get_namespace_names()};
		my $self = bless {
			'camelbot'             => $camelbot,
			're_edit_filter_name'  => qr/(?:Bearbeitung|Missbrauch)sfilter/,
			'rc_pages_shortterm'   => {},    # rc pages to check
			'rc_pages_midterm'     => {},    # rc pages to check
			'rc_pages_maintenance' => {},    # rc special pages to check
			'delay'                => 30*60, # seconds
			'delay_short'          => 10,    # seconds
			'short_max_age'        => 60*60*3, # seconds
			'start_ts'             => time,  # only for debugging purposes
			'last_ext_links'       => [],    # remember last external links to avoid some redundancy
			'max_ext_links'        => 1000,  # max length of list 'last_ext_links' 
			're_namespaces'        => (join '|', grep {$_ ne ''} # get all namespaces!=0
				values %$namespace_names),
		}, $class;
		$self->{'re_namespaces'} = qr/^$self->{'re_namespaces'}:/;
		# initially update and fetch maintenance lists 
		my $page = undef;
		my $text = undef;
		$camelbot->update_maintenance_lists(
			$page, $text, 'all_types', $self->{'rc_pages_maintenance'});
		return $self;
	}

	sub get_extlinks_via_db{
		my $self    = shift;
		my $from_id = shift;
		my $to_id   = shift;
		my $extlinks = $self->{'camelbot'}->db_fetch_externallinks($from_id, $to_id);
		if(defined $extlinks){
			for my $dataset(@$extlinks){
				$dataset->{'page'} =~ s/_/ /g;
			}
			if($self->{'camelbot'}{'verbosity'} >= 2 or @$extlinks > 50){
				$self->{'camelbot'}->msg(1, 'got ' . (0 + @$extlinks) . ' entries.');
			}
		}
		return $extlinks;
	}

	sub get_pages_of_cat_of_dead_via_db{
		my $self  = shift;
		my $since = shift;
		my $pages = $self->{'camelbot'}->db_fetch_pages_of_categories_of_the_dead($since);
		if(defined $pages){
			for my $dataset(@$pages){
				$dataset->{'page'} =~ s/_/ /g;
				$dataset->{'full_page_title'} = $dataset->{'namespace'} == 0 ? '' : 
					$self->convert_ns($dataset->{'namespace'}) . ':';
				$dataset->{'full_page_title'} .= $dataset->{'page'};
				$dataset->{'category'} =~ s/_/ /g;
			}
			if($self->{'camelbot'}{'verbosity'} >= 2 or @$pages > 50){
				$self->{'camelbot'}->msg(1, 'got ' . (0 + @$pages) . ' entries.');
			}
		}
		return $pages;
	}

	sub get_rc_via_db{
		my $self = shift;
		my $camelbot = $self->{'camelbot'};
		my $timestamp_begin = shift // $camelbot->get_time_iso(time()-60*60*12);
		my $rc_pages = $self->{'camelbot'}->db_fetch_recentchanges($timestamp_begin);
		return undef unless defined $rc_pages;
		for my $dataset(@$rc_pages){
			next unless $dataset->{'timestamp'} =~/^
				(?<year>[0-9]{4})
				(?<month>[0-9]{2})
				(?<mday>[0-9]{2})
				(?<hour>[0-9]{2})
				(?<min>[0-9]{2})
				(?<sec>[0-9]{2})
				$/x;
			my $timestamp_unix = timegm(
				$+{'sec'}, $+{'min'}, $+{'hour'}, $+{'mday'}, $+{'month'}-1, $+{'year'});
			#$self->{'rc_pages_midterm'}->{$dataset->{'ns_id'}} = $timestamp_unix;
			#$self->{'rc_pages_shortterm'}->{$dataset->{'ns_id'}} = $timestamp_unix;
			$dataset->{'timestamp_unix'} = $timestamp_unix;
			$dataset->{'page'} =~ s/_/ /g;
			$dataset->{'page_with_ns'} = $dataset->{'ns_id'} == 0 ? $dataset->{'page'} : 
				$camelbot->convert_ns($dataset->{'ns_id'}) . ':' . $dataset->{'page'};
			#$dataset->{'flags'}     = $dataset->{''};
			#$dataset->{'url'}       = $dataset->{''};
			$dataset->{'diffbytes'} = $dataset->{'rc_new_len'} - $dataset->{'rc_old_len'};
			#delete $dataset->{'rc_this_oldid'};
			#delete $dataset->{'rc_last_oldid'};
			delete $dataset->{'rc_old_len'};
			delete $dataset->{'rc_new_len'};
		}
		if($camelbot->{'verbosity'} >= 2 or @$rc_pages > 50){
			$camelbot->msg(1, 'got '.(0+@$rc_pages).' entries.');
		}
		return $rc_pages;
	}

	# TODO: to be deleted
	#sub handle_cat_dead{
	#	my $self              = shift;
	#	my $maintenance_lists = shift; # maintenance lists
	#	my $mt_type           = shift; # cat dead
	#	my $pages             = shift;
	#	my $additional_col    = shift // []; # ['column1', 'column2', ...]
	#	my $camelbot = $self->{'camelbot'};
	#	my $mt_type_page = $camelbot->{'maintenance'}{'page_name'}{$mt_type}
	#		// $mt_type;
	#	# get maintenance list
	#	my $ml = "User:CamelBot/maintenance list/$mt_type_page";
	#	my $ml_text = $camelbot->{'mw_bot'}->get_text($ml) // '';
	#	my $ml_text_bak = $ml_text;
	#	# rebuild base of maintenance list
	#	my $intro = $camelbot->{'maintenance'}{'intros'}{$mt_type}
	#		. "== liste ==\n";
	#	my $outro = "\n\n[[Kategorie:Wikipedia:Wartungskategorie|$mt_type_page]]";
	#	my $wikitable;
	#	# extract (save) old intro
	#	if($ml_text =~ /^(.*\n== [lL]iste? ==\n+.*?)(\{\|.*?\|\})\n(.*)$/s){
	#		$intro = $1;
	#		$wikitable = $2;
	#		$outro = $3;
	#	}else{ # or create a new one
	#		$camelbot->msg(1, 'could not fetch table. resetting page.', 'notice');
	#		$wikitable = '{| class="wikitable sortable"' . "\n" .
	#			"! page !! " . 
	#			join(' !! ', (@$additional_col, "date of detection by CamelBot")) . 
	#			"\n" . '|}';
	#	}
	#	# get old table content
	#	my $table = $camelbot->wikitable2array(\$wikitable);
	#	# number of additional (ignored/unchanged) columns
	#	my $num_add_cols = @{$table->{'header'}} - 2 - scalar(@$additional_col);
	#	$camelbot->msg(1, "number of additional cols = $num_add_cols");
	#	# search new entry, if not in array, then push
	#	my @added_pages;
	#	my $summary = 'update maintenance list';
	#	for my $page(@$pages){
	#		my $new_entry = "[[$page->{'full_page_title'}]]";
	#		if(grep {
	#				$_->[0] eq $new_entry and $_->[1] eq $page->{'category'}
	#			} @{$table->{'body'}}
	#		){
	#			$camelbot->msg(1, "page '$page->{'full_page_title'}' already in list");
	#			next;
	#		}else{ # add to list
	#			$camelbot->msg(1, "add new list entry '$page->{'full_page_title'}'");
	#			# if table has more than 3 columns, then pad each row
	#			my @empty_cells = split //, (' ' x $num_add_cols);
	#			push @{$table->{'body'}}, [
	#				$new_entry, 
	#				$page->{'category'}, 
	#				$camelbot->get_date_iso(), 
	#				@empty_cells
	#			];
	#			push @added_pages, "$page->{'full_page_title'} ($page->{'category'})";
	#		}
	#	}
	#	if(@added_pages > 0){
	#		$summary = 'added ' . join('; ', @added_pages);
	#	}
	#	$summary .= '; num items = ' . scalar(@{$table->{'body'}});
	#	my $full_auto = 1;
	#	# convert array to wikitable
	#	$wikitable = $camelbot->table2wikitext($table, !$full_auto);
	#	# rebuild page, i.e., put pieces together and save
	#	$ml_text = $intro . $wikitable . $outro;
	#	$camelbot->time_management();
	#	$camelbot->save_wiki_page($ml, $summary, \$ml_text, \$ml_text_bak);
	#	return 1;
	#}

	sub handle_extlinks{
		my $self     = shift;
		my $extlinks = shift;
		for my $extlink(@$extlinks){
			$extlink->{'el_to'} =~ s/^\/\//https:\/\//;
			if($extlink->{'el_to'} !~ /
				https?:\/\/
				(?:[a-zA-Z0-9_-]+\.)?
				(?:wiki(?:books|data|[mp]edia)|wmflabs)
				\.org(?![^\/])/x
			){
				# check whether this link has been treated recently
				unless(grep {$_ eq $extlink->{'el_to'}} @{$self->{'last_ext_links'}}){
					# if not, then add link to list of recent ext. links
					push @{$self->{'last_ext_links'}}, $extlink->{'el_to'};
					if(@{$self->{'last_ext_links'}} > $self->{'max_ext_links'}){
						shift @{$self->{'last_ext_links'}};
					}
					$self->{'camelbot'}->archive_ext_links(
						[$extlink->{'el_to'}], $extlink->{'page'}
					);
				}
			}
		}
		return 1;
	}

	sub handle_rc_pages{
		my $self   = shift;
		my $now    = time;
		my $camelbot = $self->{'camelbot'};
		# delayed clean up:
		my $max_pages_per_call = 200;
		my $min_info_number = 10; # for debugging/info only
		# cope with short-term list
		#  after delay transfer pages from $rc_pages_shortterm to @updList_shortterm.
		#  use oldest $max_pages_per_call articles only
		my @loop_array = sort { # sort by timestamp
			$self->{'rc_pages_shortterm'}->{$a} <=> $self->{'rc_pages_shortterm'}->{$b}
			} keys %{$self->{'rc_pages_shortterm'}};
		if(@loop_array > 0){
			# right trim array
			splice @loop_array, $max_pages_per_call if @loop_array > $max_pages_per_call;
			my @updList_shortterm;
			# just for debugging/info output
			my $oldest_date = $camelbot->get_time_iso_($self->{'rc_pages_shortterm'}->{$loop_array[0]});
			my $newest_date = $camelbot->get_time_iso_($self->{'rc_pages_shortterm'}->{$loop_array[-1]});
			for my $title(@loop_array){
				my $timestamp_unix = $self->{'rc_pages_shortterm'}->{$title};
				if($timestamp_unix + $self->{'delay_short'} < $now){
					delete $self->{'rc_pages_shortterm'}->{$title};
					$camelbot->msg(2, ' short term update of page: '. $title);
					push @updList_shortterm, $title;
				}
			}
			# print some information
			my $numArticles = scalar(@updList_shortterm);
			if($numArticles >= $min_info_number or $camelbot->{'verbosity'} >= 2){
				$camelbot->msg(1, "starting notification check of $numArticles (of " . 
					($numArticles + keys(%{$self->{'rc_pages_shortterm'}})) . ") articles, " .
					"dated from $oldest_date to $newest_date.");
			}
			# all pages in @updList_shortterm shall be treated now.
			for my $page(@updList_shortterm){
				# notifier on particular edits; there needs to be some delay, because 
				# otherwise it may occur that the notification is faster than the saving 
				# process itself.
				$camelbot->msg(3, "fetch page '$page'.");
				$camelbot->notifier($page);
			}
			if($numArticles >= $min_info_number){
				$camelbot->msg(2, "end of notification check of $numArticles articles.");
			}
		}
		# cope with mid-term list
		#  after delay transfer pages from $rc_pages_midterm to @updList_midterm
		@loop_array = sort {
			$self->{'rc_pages_midterm'}->{$a} <=> $self->{'rc_pages_midterm'}->{$b}
			} keys %{$self->{'rc_pages_midterm'}};
		if(@loop_array > 0){
			splice @loop_array, $max_pages_per_call if @loop_array > $max_pages_per_call;
			my @updList_midterm;
			my $oldest_date = $camelbot->get_time_iso_($self->{'rc_pages_midterm'}->{$loop_array[0]});
			my $newest_date = $camelbot->get_time_iso_($self->{'rc_pages_midterm'}->{$loop_array[-1]});
			for my $title(@loop_array){
				my $timestamp_unix = $self->{'rc_pages_midterm'}->{$title};
				if($timestamp_unix + $self->{'delay'} < $now){
					delete $self->{'rc_pages_midterm'}->{$title};
					$camelbot->msg(2, ' mid term update of page: '. $title);
					push @updList_midterm, $title;
				}
			}
			#  print some information
			my $numArticles = scalar(@updList_midterm);
			if($numArticles >= $min_info_number or $camelbot->{'verbosity'} >= 2){
				$camelbot->msg(1, "starting mid term checks of $numArticles (of " . 
					($numArticles + keys(%{$self->{'rc_pages_midterm'}})) . ") articles, " .
					"dated from $oldest_date to $newest_date.");
			}
			#  all pages in @updList_midterm shall be treated now.
			my $cnt = 0;
			for my $page(@updList_midterm){
				$camelbot->msg(3, "fetch page '$page' (".(++$cnt).").");
				my $text = $camelbot->{'mw_bot'}->get_text($page);
				if(defined $text and $camelbot->is_allowed(\$text, $page)){
					my $text_bak = $text;
					# TODO: test this
					## find all external links and push to web-archive
					## find links
					#my @urls = ($$text =~ /https?:\/\/$self->{'re_url_rear'}/g);
					#$self->archive_ext_links(\@urls, $page);
					my ($changes, $summary) = $camelbot->cleanup_wiki_page(\$text, $page);
					if($text_bak ne $text){
						$camelbot->time_management();
						$camelbot->save_wiki_page($page, $summary, \$text, \$text_bak);
					}
					$camelbot->update_maintenance_lists($page, \$text, 'all_types', 
						$self->{'rc_pages_maintenance'});
				}else{
					$camelbot->msg(2, "could not fetch page '$page'. maybe deleted already", 
						'notice');
				}
			}
			if($numArticles >= $min_info_number){
				$camelbot->msg(2, "end of mid term checks of $numArticles articles.");
			}
		}
		return 1;
	}

	sub put_new_rc{
		my $self     = shift;
		my $rc_pages = shift;
		my $camelbot = $self->{'camelbot'};
		my %update = (
			'editfilter' => 0,
		);
		for my $rc_msg(@$rc_pages){
			if(# $rc_msg->{'user'} eq 'Lustiger seth' or # for debugging only
				$rc_msg->{'user'} ne $camelbot->{'mw_username'} # don't track own edit
				&& (!(defined $rc_msg->{'summary'}) # don't play edit war
					|| $rc_msg->{'summary'} !~ 
						/\bCamelBot\b.*\b(?:rückgängig gemacht|revertiert)\b/)
				&& $rc_msg->{'ns_id'} == 0
			){
				# add page to mid term list
				$self->{'rc_pages_midterm'}->{$rc_msg->{'page_with_ns'}} = 
					$rc_msg->{'timestamp_unix'};
				# add page to short term list
				if(time - $rc_msg->{'timestamp_unix'} < $self->{'short_max_age'}){
					$self->{'rc_pages_shortterm'}->{$rc_msg->{'page_with_ns'}} = 
						$rc_msg->{'timestamp_unix'};
				}
				# print status
				$camelbot->msg(2, 'new rc: ' . 
					($rc_msg->{'timestamp_unix'} - $self->{'start_ts'}) .
					' (#pages in stack: mt = ' . 
					scalar(keys %{$self->{'rc_pages_midterm'}}) . ', st = ' . 
					scalar(keys %{$self->{'rc_pages_shortterm'}}) . ') ' . 
					$rc_msg->{'page_with_ns'} . ', editor = ' . $rc_msg->{'user'}
				);
				# check if page is on a maintenance list
				my $text = undef;
				while(my ($type, $pages) = each %{$self->{'rc_pages_maintenance'}}){
					if(exists $pages->{$rc_msg->{'page_with_ns'}}){
						$camelbot->msg(2, "page '$rc_msg->{'page_with_ns'}' is on maintenance list '$type'.");
						$camelbot->update_maintenance_lists($rc_msg->{'page_with_ns'}, $text, 
							$type, $self->{'rc_pages_maintenance'});
					}
				}
			}
			# update edit filter index
			if($rc_msg->{'ns_id'} == 4 and $update{'editfilter'} == 0
					and $rc_msg->{'page'} =~ /^$self->{'re_edit_filter_name'}\//
			){
				$update{'editfilter'} = 1;
			}
		}
		if(keys(%{$self->{'rc_pages_shortterm'}}) > 0 or 
				keys(%{$self->{'rc_pages_midterm'}}) > 0){
			$self->handle_rc_pages();
		}
		if($update{'editfilter'}){
			$camelbot->update_edit_filter_index();
		}
		return 1;
	}

	sub db_cat_of_dead_monitoring{
		my $self  = shift;
		my $last_check = 0;
		my $continue_timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime(time()-60*60*24*2));
		my $camelbot = $self->{'camelbot'};
		my $sleep_delay = 115; # seconds
		my @edge_articles_bak = (); # articles at a specific timestamp
		while(1){ # endless loop, break via ctrl-c
			# wait for $sleep_delay seconds before sending a query to db again
			if($last_check + $sleep_delay > time){
				my $sleep_time = $sleep_delay + $last_check - time;
				$camelbot->msg(2, "sleep $sleep_time seconds");
				sleep($sleep_time);
			}
			$last_check = time;
			$camelbot->msg(2, 'get new pages of categories of the dead from db');
			my $new_entries = $self->get_pages_of_cat_of_dead_via_db($continue_timestamp);
			# delete multiple entries of same page
			$camelbot->msg(2, 'delete multiple entries');
			my %dbl_counter;
			$new_entries = [reverse
				grep {
					$dbl_counter{$_->{'full_page_title'}} = (exists $dbl_counter{$_->{'full_page_title'}}) ? 0 : 1;
					$dbl_counter{$_->{'full_page_title'}};
				} sort {$b->{'cl_timestamp'} cmp $a->{'cl_timestamp'}} @$new_entries
			];
			if(@$new_entries > 0 and
				$continue_timestamp ne $new_entries->[-1]->{'cl_timestamp'})
			{
				@edge_articles_bak = (); # reset array of redundant articles
				$continue_timestamp = $new_entries->[-1]->{'cl_timestamp'};
				$camelbot->msg(1, 'last cl_timestamp: ' . $continue_timestamp);
			}
			# delete redundant entries of newest timestamp, because they will be 
			# re-fetched in next step along with others (that maybe of the same timestamp)
			# step 1: delete all entries of newest timestamp
			my @edge_articles = (); # articles at a specific timestamp
			while(@$new_entries > 0 
					and $new_entries->[-1]->{'cl_timestamp'} eq $continue_timestamp){
				unshift @edge_articles, pop @$new_entries;
			}
			# step 2: restore those elements that are not handled already
			for my $edge_article(@edge_articles){
				unless(grep {
						$_->{'full_page_title'} eq $edge_article->{'full_page_title'}
					} @edge_articles_bak
				){
					push @$new_entries, $edge_article;
					push @edge_articles_bak, $edge_article;
				}
			}
			$camelbot->msg(1, 'got ' . (0 + @$new_entries) . ' new entries');
			if(@$new_entries > 0){
				$camelbot->update_maintenance_lists_build_table(
					undef, 'cat dead', $new_entries, ['category']);
				$sleep_delay = 1; 
			}else{
				$sleep_delay = 115; 
			}
		}
	}

	sub db_extlinks_monitoring{
		my $self    = shift;
		my $from_id = shift // 1;
		my $to_id   = shift;
		my $file_of_state = shift // 'temp_' . time . rand . '.tmp';
		my $last_check = 0;
		my $camelbot = $self->{'camelbot'};
		$camelbot->msg(2, 'to_id = ' . (defined $to_id ? $to_id: 'undef'));
		$camelbot->msg(2, "file_of_state = $file_of_state");
		my $sleep_delay = 30; # seconds
		while(not defined $to_id or $to_id >= $from_id){ # endless loop, break via ctrl-c
			# wait for $sleep_delay seconds before sending a query to db again
			if($last_check + $sleep_delay > time){
				my $sleep_time = $sleep_delay + $last_check - time;
				$camelbot->msg(2, "sleep $sleep_time seconds");
				sleep($sleep_time);
			}
			$last_check = time;
			$camelbot->msg(2, 'get new external links from db');
			my $new_extlinks = $self->get_extlinks_via_db($from_id, $to_id);
			# delete multiple entries of same page
			$camelbot->msg(2, 'delete multiple entries');
			my %dbl_counter;
			$new_extlinks = [reverse
				grep {
					$dbl_counter{$_->{'el_to'}} = (exists $dbl_counter{$_->{'el_to'}}) ? 0 : 1;
					$dbl_counter{$_->{'el_to'}};
				} sort {$b->{'el_id'} <=> $a->{'el_id'}} @$new_extlinks
			];
			if(@$new_extlinks > 0){
				$from_id = $new_extlinks->[-1]->{'el_id'};
				$camelbot->msg(1, 'last from_id: ' . $from_id);
				++$from_id;
				$camelbot->msg(1, 'got ' . (0 + @$new_extlinks) . ' new external links');
				$self->handle_extlinks($new_extlinks);
				write_file($file_of_state, {binmode => ':utf8'}, 
					'continue_from=' . ($from_id + 1) . ';' . 
					'continue_to=' . ($to_id // ''));
				$sleep_delay = 1; 
			}else{
				$sleep_delay = 55; 
			}
		}
	}

	sub db_rc_monitoring{
		my $self = shift;
		my $last_check = 0;
		#my $continue_timestamp = undef;
		my $continue_timestamp = strftime("%Y%m%d%H%M%S", gmtime(time()-60*60*24*.05)); # e.g. '20150217170000'
		my $camelbot = $self->{'camelbot'};
		my $sleep_delay = 30; # seconds
		while(1){ # endless loop, break via ctrl-c
			# wait for $sleep_delay seconds before sending a query to db again
			if($last_check + $sleep_delay > time){
				my $sleep_time = $sleep_delay + $last_check - time;
				$camelbot->msg(2, "sleep $sleep_time seconds");
				sleep($sleep_time);
			}
			$last_check = time;
			$camelbot->msg(2, 'get new rc pages from db');
			my $new_rc_pages = $self->get_rc_via_db($continue_timestamp);
			# delete multiple entries of same page
			$camelbot->msg(2, 'delete multiple entries');
			my %dbl_counter;
			$new_rc_pages = [reverse
				grep {
					$dbl_counter{$_->{'page_with_ns'}} = (exists $dbl_counter{$_->{'page_with_ns'}}) ? 0 : 1;
					$dbl_counter{$_->{'page_with_ns'}};
				} sort {$b->{'timestamp_unix'} <=> $a->{'timestamp_unix'}} @$new_rc_pages
			];
			if(@$new_rc_pages > 0){
				$continue_timestamp = $new_rc_pages->[-1]->{'timestamp'};
				$camelbot->msg(2, 'last timestamp: ' . 
					$camelbot->get_time_iso_($new_rc_pages->[-1]->{'timestamp_unix'})
				);
				# delete all entries of newest timestamp, because they will be re-fetched 
				# in next step along with others (that maybe of the same timestamp)
				while(@$new_rc_pages > 0 
						and $new_rc_pages->[-1]->{'timestamp'} eq $continue_timestamp){
					pop @$new_rc_pages;
				}
				$camelbot->msg(1, 'got ' . (0 + @$new_rc_pages) . ' new pages');
				$self->put_new_rc($new_rc_pages);
			}
		}
	}
}

{
	package CamelBotIRC;
	use if $_running_on_labs, lib => qw(/data/project/camelbot/perl/lib);
	use parent qw(Bot::BasicBot); # irc bot
	use Data::Dumper;           # for debugging purposes
	use Time::Local;            # timegm
	$| = 1; # deactivate buffering, so flush all the time

	sub new{
		my $class    = shift;
		my $camelbot = shift;
		my $params   = shift;
		my $self = $class->SUPER::new(%$params);
		$self->{'rc'} = CamelBotRC->new($camelbot);
		$self->{'camelbot'} = $camelbot;
		return $self;
	}

	# bot help
	sub help {
		my $self = shift;
		return 'i\'m just listening to recent changes in w:de and sometimes those changes trigger some of my functions. if you have any questions, please ask my master at https://de.wikipedia.org/wiki/user_talk:lustiger_seth.';
	}

	# bot handler: if somebody said something
	sub said {
		my ($self, $message) = @_;
		my $camelbot = $self->{'camelbot'};
		# print everything what is said to you
		$camelbot->msg(4, "$message->{who}: $message->{body}");

		# general
		if(defined $message->{address} and $message->{address} eq $self->{nick}){
			$camelbot->msg(2, $message->{who}.': '.$message->{body});
			# help
			if($message->{body}=~/^(?:help|was machst du\?|was kannst du\?)/){
				$self->reply($message, $self->help());
			}
		}

		# rc-channel
		if($self->{'alias'} eq 'rc'){
			# reporting of specific edits
			if($message->{who} =~/^rc-\w+$/){
				# parse rc message
				my $rc_msg = $camelbot->parse_irc_rc($message->{body});
				$self->{'rc'}->put_new_rc([$rc_msg]);
				#if(# $rc_msg->{'user'} eq 'Lustiger seth' or # for debugging only
				#	$rc_msg->{'user'} ne $camelbot->{'mw_username'} and # don't track own edit
				#	$rc_msg->{'summary'} !~ /\bCamelBot\b.*\b(?:rückgängig gemacht|revertiert)\b/ and # don't play edit war
				#	$rc_msg->{'ns_id'} == 0
				#){
				#	$self->forkit(
				#		run => \&{$self->put_new_rc}, 
				#		channel => $message->{'channel'}, 
				#		#handler => \&_fork_said_handler,
				#		body => $self,
				#		arguments => [$rc_msg, time()],
				#	);
				#}
				##print Dumper $rc_msg;
				## update edit filter index
				#if($rc_msg->{'ns_id'} == 4 and $rc_msg->{'page'} =~ /^$self->{'rc'}->{'re_edit_filter_name'}\//){
				#	$camelbot->update_edit_filter_index();
				#}
			}
		}
	}
}

# main
my $params = syntaxCheck(@ARGV);
my $camelbot = CamelBot->new({
	'ask_user'      => 0,
	'max_edits_per_min' => 5,
	'minor'         => $params->{'minor'},
	'mw_username'   => $params->{'username'}, 
	#'mw_password'  => undef, 
	#'host'         => 'wiki.selfhtml.org',
	#'rel_url_path' => 'mediawiki',
	'simulation'    => $params->{'test'},
	'showdiff'      => 0,
	'verbosity'     => $params->{'verbose'},
	'db_access'     => $params->{'db-access'},
	'cliparams'     => $params,
});

# used for debugging only:
#my $camelbot_rc = CamelBotRC->new($camelbot);
#my $text = $camelbot->{'mw_bot'}->get_text("T\xe4cks");
#my $changes = $camelbot->cleanup_wiki_page(\$text, 'moep');
#print Dumper $changes;
#exit 1;

if($params->{'archive-ext-links'}){
	if($params->{'archive-ext-links'} =~ /\b(?:from|to)_id\s*=\s*[0-9]|\bfile\s*=/){
		my ($from_id, $to_id, $file);
		$from_id = $1 if $params->{'archive-ext-links'} =~ /\bfrom_id\s*=\s*([0-9]*)/;
		$to_id = $1 if $params->{'archive-ext-links'} =~ /\bto_id\s*=\s*([0-9]*)/;
		$file = $1 if $params->{'archive-ext-links'} =~ /\bfile\s*=\s*([^;]*[^; ])/;
		if(defined $file){
			if(-e $file){
				my $file_content = slurp $file;
				$from_id = $1 if $file_content =~ /\bcontinue_from\s*=\s*([0-9]+)/;
				$to_id = $1 if not defined $to_id and $file_content =~ /\bcontinue_to\s*=\s*([0-9]+)/;
			}else{
				$camelbot->msg(1, "file '$file' not found, will be created");
			}
		}
		my $camelbot_rc = CamelBotRC->new($camelbot);
		$camelbot_rc->db_extlinks_monitoring($from_id, $to_id, $file);
	}else{
		$camelbot->msg(0, 'not implemented / wrong syntax', 'error');
	}
	$camelbot->msg(1, 'leaving');
}

if($params->{'cat-add'}){
	my $pages = [
		# list of articles {{{
		# }}}
	];
	my $category = 'safety data sheet';
	$camelbot->cat_add($pages, $category);
}

if($params->{'cat-change'}){
	$camelbot->cat_rename('Software tool', 'Library or package');
}

if($params->{'cat-dead'}){
	my $camelbot_rc = CamelBotRC->new($camelbot);
	$camelbot_rc->db_cat_of_dead_monitoring();
	$camelbot->msg(1, 'leaving');
}

if($params->{'delete'}){
	$camelbot->delete_marked_pages();
}

if($params->{'diff-from'}){
	my $diff_from = {};
	if($params->{'diff-from'} =~ /^[0-9]+\z/){
		$diff_from->{'revid'} = $params->{'diff-from'};
	}else{
		$diff_from->{'title'} = $params->{'diff-from'};
	}
	my $diff = $camelbot->get_diff($diff_from, $params->{'diff-to'});
	#print Dumper $diff;
}

if(defined $params->{'download-by-prefix'}){
	$camelbot->download_pages_by_prefix($params->{'download-by-prefix'});
}

if(defined $params->{'get-page'}){
	my $pages = $camelbot->get_pages({
		'single page'    => $params->{'wikipage'},
		'from all_pages' => $params->{'wikipages'},
		'search'         => $params->{'search'}
	});
	if(@$pages > 0){
		utf8::decode($params->{'section'});
		my $pages_content = $camelbot->get_pages_content($pages, $params->{'section'});
		print Dumper $pages_content if $params->{'verbose'} > 0;
	}else{
		$camelbot->msg(0, "no pages to be edited.", 'warning');
	}
}

if(defined $params->{'http-status'}){
	my @urls = (
		#'http://de.wikipedia.org/example',
		split / /, $params->{'http-status'}
	);
	for my $url(@urls){
		my $response_code = $camelbot->get_http_status($url);
		$camelbot->msg(1, $response_code.' '.$url);
		sleep 5;
	}
}

if($params->{'link-replacement'}){
	my %options;
	$options{'articles'}      = 1;  # 0 = don't work on namespace 0; 1 = work on namespace 0
	$options{'nonarticles'}   = 1;  # 0 = don't work on namespace!=0; 1 = work on namespace!=0
	$options{'refs'}          = 1;  # 0 = don't touch refs; 1 = replace refs
	$options{'nonrefs'}       = 1;  # 0 = don't touch non-refs; 1 = replace non-refs
	$options{'max_edits'}     = -1;  # maximum number of edits (-1 = inf)
	$options{'skip_edits'}    = 0;  # skip number of pages
	$options{'results'}       = 1;  # print a summary at the end
	# user to use in api contributions
	$options{'user_contribs'} = undef; # {'user' => 'Gary Dee'};
	# searched_link = link to use in api link search
	# re_prot_part  = regexp pattern of protocol
	# re_url_part   = regexp pattern of searched url (without protocol)
	# replacement   = replacement part of s///
	# summary       = summary of page edit
	
	$options{'re_prot_part'}  = qr/http:\/\//;
	# some typical cases:
	# 1. linkfix
	#$options{'searched_link'} = '*.bgblportal.de/BGBL/bgbl1f/';
	#$options{'searched_link'} = '217.160.60.235';
	#$options{'searched_link'} = 'eur-lex.europa.eu/legal-content/';
	#$options{'searched_link'} = 'eur-lex.europa.eu/LexUriServ/site';
	##$options{'re_url_part'}   = qr/www\.bgblportal\.de\/BGBL\/bgbl1f\/bgbl(\d+s\d+[a-z]?)\.?pdf/;
	##$options{'re_url_part'}   = qr/www\.bgblportal\.de\/BGBL\/bgbl1f\/b(\d+)[a-z]\.?pdf/;
	#$options{'re_url_part'}   = qr/217.160.60.235\/BGBL\/bgbl1f\/b(?:gbl)?(\d+s\d+[a-z]?|\d+)[a-z]?\.?pdf/;
	#$options{'re_url_part'}   = qr/eur-lex.europa.eu\/legal-content\/.*?uniserv/;
	#$options{'re_url_part'}   = qr/eur-lex.europa.eu\/LexUriServ\/site\//;
	#$options{'delete_link'}   = undef;
	#$options{'replacement'}   = # function disabled, has to be re-implemented
	# $from = qr/(\Qeur-lex.europa.eu\/legal-content\/\E[A-Z]{2}\/TXT\/PDF\/\?uri=)uniserv(:OJ\.[A-Z_]+\.[0-9]{4}\.[0-9]+\.01)([0-9]{4}\.01\.[A-Z]{3})$/;
	# $to   = 'http://'.$1.'uriserv'.$2.'.'.$3;
	# $from = qr/www\.bgblportal\.de\/BGBL\/bgbl1f\/bgbl(\d+s\d+[a-z]?)\.?pdf/;
	# $to   = 'http://www.bgbl.de/Xaver/start.xav?startbk=Bundesanzeiger_BGBl&start=//*%5B\@attr_id=%27bgbl'.$1.'.pdf%27%5D';
	#$options{'summary'}       = 'link fixes, see [[WP:FZW#Mehrere_Hundert_Weblinks_auf_EUR-Lex_defekt]]';

	# 2. linkfix
	#$options{'searched_link'} = '*.google.com';
	#$options{'re_url_part'}   = qr/[a-z0-9]+\.google\.[a-z]+\/.*?[?&]url=[^&]+/;
	#$options{'replacement'}   = sub {
	# $from = qr/[a-z0-9]+\.google\.[a-z]+\/.*?[?&]url=([^&]+)/;
	# $to   = $1;
	#$options{'summary'}       = 'resolve google redirects, see [[mTalk:Spam_blacklist#Google_redirect_spam]]';
	
	# 3. linkfix: remove double http://
	#$options{'searched_link'} = "http//";
	#$options{'summary'}       = 'link fixes, siehe [[Wikipedia:Bots/Anfragen/Archiv/2011-1#fehlerhafte_externe_links_mit_doppeltem_protokoll]]';
	#$options{'re_url_part'}   = qr/./;
	
	# 4. delete/unlink blacklisted links
	my $simple_domain         = 'example.com';
	$options{'searched_link'} = "*.$simple_domain";
	$options{'re_url_part'}   = qr/(?:[a-zA-Z0-9-]+\.)?(?:\Q$simple_domain\E)/;
	$options{'delete_link'}   = 1;
	#$options{'ref2deadlink'}  = 0;
	#$options{'summary'}       = "website is not compatible with WP:EL and content has changed, see [[WP:SBL#$simple_domain]]";
	$options{'summary'}       = "domain is on blacklist, see [[WP:SBL#$simple_domain]]";
	#$options{'summary'}       = "1. link fixes; 2. domain $simple_domain is on blacklist, see [[WP:SBL#$simple_domain]]";
	$camelbot->link_replacement(\%options);
}

if(defined $params->{'parse'}){
	# filenames
	my $filename = $params->{'parse'};
	my $htmlfilename = $filename;
	$htmlfilename =~s/\.[a-zA-Z0-9_-]*$/.html/;
	$htmlfilename = $filename.'.html' if $htmlfilename eq $filename;
	# read wikitext
	my $wikitext = slurp $filename;
	# convert to html
	my $html = $camelbot->parse_wikitext($wikitext);
	my $html_cleaned = $camelbot->post_process_html($html);
	$html_cleaned =~ s/\n\K\n+//g;
	$html_cleaned =~ s/.*?<body>\n?//sg;
	$html_cleaned =~ s/<\/body>\n<\/html>\s*$//sg;
	# write result
	write_file($htmlfilename, {binmode => ':utf8'}, $html_cleaned);
}

if($params->{'rc-monitoring'}){
	if($params->{'rc-monitoring'} eq 'irc'){
		$camelbot->update_edit_filter_index();
		my $username = 'CamelBot';
		my $bot_irc = CamelBotIRC->new($camelbot,{
			server   => 'irc.wikimedia.org',
			channels => ['#de.wikipedia'],
			nick     => $username,
			username => $username,
			name     => $username,
			alias    => 'rc',
			# no_run => 1,
		});
		#$bot_irc->get_rc_via_db();
		$bot_irc->run();
		#use POE;
		#$poe_kernel->run();
	}elsif($params->{'rc-monitoring'} eq 'db'){
		$camelbot->update_edit_filter_index();
		my $camelbot_rc = CamelBotRC->new($camelbot);
		$camelbot_rc->db_rc_monitoring();
	}else{
		$camelbot->msg(0, 'not implemented', 'error');
	}
	$camelbot->msg(1, 'leaving');
}

if(defined $params->{'save-as-html'}){
	my ($html, $title, $headhtml) = $camelbot->parse_page($params->{'save-as-html'});
	# set html title
	$headhtml =~ s/<title>\K[^<]+(?=<\/title>)/$title/;
	# delete js and other useless stuff
	$headhtml =~ s/<script\b[^>]*>.*?<\/script>//gs;
	$headhtml =~ s/<link rel="(?:search|alternate|EditURI|shortcut icon)" [^>]+>//g;
	$headhtml =~ s/<meta name="(?:generator|ResourceLoaderDynamicStyles)"[^>]+>//g;
	$headhtml =~ s/\n\/\* cache key: .*?\*\///g;
	$headhtml =~ s/\n\K\n+//g;
	# first heading
	$headhtml .= "\n".'<h1 id="firstHeading" class="firstHeading"><span dir="auto">'.$title.'</span></h1>'."\n";
	# get css files
	my $css_directory = 'css';
	my $images_directory = 'images';
	mkdir $images_directory unless -d $images_directory;
	mkdir $css_directory unless -d $css_directory;
	$headhtml =~ s/<link rel="stylesheet" href=\K"([^"]*)"/
		$camelbot->download_css($1, $css_directory, $images_directory)/ge;
	# get image files
	my @images = ($html =~ /<a href="\/wiki\/File:([^"]+)" [^>]*\bclass="image"/g);
	my $no_warn_files = [];
	$camelbot->download_files(\@images, $no_warn_files, "$images_directory/");
	# remove navbar
	$html =~ s/<tr>\s*<td[^>]*>\s*<div [^>]*class="[^"]*navbar[^"]*"[^>]*>\s*.*?<\/div>\s*<\/td>\s*<\/tr>//s;
	# remove edit-links
	$html =~ s/<span class="editsection">\[<a href="[^"]*\/index.php?[^"]*\baction=edit[^"]*"[^>]*>(?i:edit|bearbeiten)<\/a>\]<\/span> *//g;
	# clean up html source
	my $html_cleaned = $camelbot->post_process_html($headhtml.$html, "$images_directory/");
	# remove icon-like images ("magnify")
	$html_cleaned =~ s/<div class="magnify">\s*<img src="[^"]*"[^>]*>\s*<\/div>//gs;
	# change internal links
	$html_cleaned =~ s/<a [^>]*\bhref="\K\/wiki\/([^"]+)/$1.html/g;
	# save to file
	my $htmlfilename = $params->{'save-as-html'}.'.html';
	$htmlfilename =~y/ /_/;
	# create directory if necessary
	make_path($1) if $htmlfilename =~/^(.*)\/[^\/]*$/;
	write_file($htmlfilename, {binmode => ':utf8'}, $html_cleaned);
}

if(defined $params->{'search-sbl-attempts'}){
	if($params->{'search-sbl-attempts'} =~ /^(?:[a-z]+=[^;]*;){1,2}[a-z]+=[^;]*;?$/){
		my @param_parts = split /;/, $params->{'search-sbl-attempts'};
		my $subparams;
		map {
			/^([a-z]+)\s*=\s*+([^;]*)/; 
			$subparams->{$1} = $2;
		} @param_parts;
		$camelbot->search_sbl_attempts($subparams);
	}else{
		$camelbot->msg(0, "could not read param '$params->{'search-sbl-attempts'}'", 'error');
	}
}

if(defined $params->{'clean-up'}){
	my $pages = $camelbot->get_pages({
		'single page'    => $params->{'wikipage'},
		'from all_pages' => $params->{'wikipages'},
		'search'         => $params->{'search'}
	});
	if(@$pages > 0){
		my %options = (
			'max_edits'  => -1, # maximum number of edits (-1 = inf)
			'skip_edits' => 0,  # skip number of pages
			'summary'    => $params->{'summary'} // '',
		);
		$camelbot->text_replacement(
			$pages, undef, [], [], \%options, undef, $params->{'clean-up'});
	}else{
		$camelbot->msg(0, "no pages to be edited.", 'warning');
	}
}

if(defined $params->{'text-replacement'}){
	my $use_eval = $params->{'use-eval'};
	my $pages = $camelbot->get_pages({
		'single page'    => $params->{'wikipage'},
		'from all_pages' => $params->{'wikipages'},
		'search'         => $params->{'search'}
	});
	my $pattern = [qr/(?s:^.*$)/];
	my $replacement = [];
	if(defined $params->{'search-pattern'}){
		$pattern = [qr/$params->{'search-pattern'}/];
	}
	if(defined $params->{'file'}){
		my $file_content = slurp $params->{'file'};
		utf8::decode($file_content);
		$replacement = [$file_content];
	}elsif(@$replacement == 0){
		my $file_content = '';
		$camelbot->msg(1, 'reading replacement text from STDIN');
		while(<STDIN>){
			$file_content .= $_;
		};
		utf8::decode($file_content);
		$replacement = [$file_content];
	}
	my %options;
# choose pages
# by user
	#$options{'username'}      = 'seth';
	#$options{'ucstart'}       = '2011-12-14T00:00:00Z';
	#$options{'ucend'}         = undef; #'2010-12-14T00:00:00Z';
	#$options{'uclimit'}       = 269;
	#$options{'pages'}         = [keys %{$camelbot->get_user_contribs(\%options)}];
# by whatlinkshere
	#use utf8;
	#$pages = [
	#	map {$_->{'title'}} (
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Module/Einführung in Perl-Module'),
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Module/Einführung in Perl-Modul'),
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Module/Hinweise zum Arbeiten mit Modulen'),
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Listen bzw. Arrays (Variablen)'),
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Hashes (Variablen)'),
	#		$camelbot->{'mw_bot'}->what_links_here('Doku:Perl/Module/CPAN-Module'),
	#		$camelbot->{'mw_bot'}->what_links_here('Perl/Skalare (Variablen)'),
	#	)
	#];
# or explicite
	#$pages = [
	#	'Perl',
	#	'Perl Dingens',
	#];
	print Dumper $pages if $params->{'verbose'} > 2;
# search and replace
#  case 0: link replacement
	#$pattern = [
	#	qr/<nowiki>(http:\/\/world.guns.ru\b[^\]]*?\].*?)<\/nowiki>&nbsp;<small>'''\(Achtung: Bitte diese Website nicht aufrufen, da sie gef.hrliche Software verbreitet!\)'''<\/small>/,
	#	qr/<nowiki>(http:\/\/world.guns.ru\b.*?)<\/nowiki>&nbsp;<small>'''\(Achtung: Bitte diese Website nicht aufrufen, da sie gef.hrliche Software verbreitet!\)'''<\/small>/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]* [^\[\]]+\])/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]*)( - '''erledigt)/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]* [^\[\]\n]+?)( auf Modern Firearms \('*en(?:gl.*?)?\))/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]* [^\[\]\n]+?)( \('*en(?:gl.*?)?\))/,
	#	qr/(\*\s*)(http:\/\/world.guns.ru\b[^ \]]* [^\[\]\n]+)/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]*)\s*\n/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]*),/,
	#	qr/(?<!\[)(http:\/\/world.guns.ru\b[^ \]]*)/,
	#]; # search patterns s/X//
	#$replacement   = [
	#	'"[".$1',
	#	'$1',
	#	'"[".$1',
	#	'"[".$1."]".$2',
	#	'"[".$1."]".$2',
	#	'"[".$1."]".$2',
	#	'$1."[".$2."]"',
	#	'"[".$1."]"',
	#	'"[".$1."],"',
	#	'"[".$1."]"',
	#];   # replace texts s//X/e
#  case 1: internal linkfixes
	#$pattern = [
	#	qr/\[\[Perl\/Module\/Standardmodule\K[_ ]von[_ ]Perl/,
	#	qr/\[\[Perl\/Module\/Einführung[_ ]in[_ ]Perl-Module?/,
	#	qr/\[\[Perl\/Module\/Hinweise[_ ]zum[_ ]Arbeiten[_ ]mit[_ ]Modulen/,
	#	qr/\[\[Doku:Perl\/Module\/CPAN-Module/,
	#	qr/\[\[Perl\/Skalare\K[_ ]\(Variablen\)/,
	#	qr/\[\[Perl\/Hashes\K[_ ]\(Variablen\)/,
	#	qr/\[\[Perl\/Listen[_ ]bzw\.[_ ]Arrays\K[_ ]\(Variablen\)/,
	#]; # search patterns s/X//
	#$replacement = [
	#	"''",
	#	"'[[Perl/Module/Einführung'",
	#	"'[[Perl/Module/Einführung'",
	#	"'[[Perl/Module/Einführung'",
	#	"''",
	#	"''",
	#	"''",
	#]; # replace texts s//X/e

	print Dumper $pattern if $params->{'verbose'} > 2;
	print Dumper $replacement if $params->{'verbose'} > 2;
	if(@$pages > 0){
		if(defined $params->{'summary'}){
			$options{'max_edits'}     = -1; # maximum number of edits (-1 = inf)
			$options{'skip_edits'}    = 0;  # skip number of pages
			$options{'summary'}       = $params->{'summary'};
			utf8::decode($params->{'section'});
			$camelbot->text_replacement(
				$pages, $params->{'section'}, $pattern, $replacement, \%options, $use_eval);
		}else{
			$camelbot->msg(0, "missing param 'summary'. see --help", 'error');
		}
	}else{
		$camelbot->msg(0, "no pages to be edited.", 'warning');
	}
}

if($params->{'update-editfilter-index'}){
	$camelbot->update_edit_filter_index();
}

if($params->{'update-table'}){
	# get information somehow
	# write all information to a table structure
	my $tables = [{
		'header'  => ['row0', 'row1', 'row2'],
		'section' => 'somesection',
		'body'    => [],
		'style'   => {'table' => 'class="wikitable"'},
	}];
	# table body
	my $col = 0;
	#for(sort keys %packages){
	#	$tables->[0]{'body'}[$col][0] = '[['.$packages{$_}{'name'}.']]';
	#	$tables->[0]{'body'}[$col][1] = $packages{$_}{'archs'};
	#	$tables->[0]{'body'}[$col][2] = $packages{$_}{'description'};
	#	++$col;
	#}
	# select page, section, ...
	my $page    = '';
	my $summary = 'table updated';
	$camelbot->rebuild_table($page, $tables, $summary);
}

if($params->{'upload'}){
	my $source  = $params->{'source'};
	my $summary = $params->{'summary'};
	my $dest    = $params->{'dest'};
	die "error in upload: source file not defined!\n" unless defined $source;
	die "error in upload: file '$source' not found!\n" unless -e $source;
	die "error in upload: summary not defined!\n" unless defined $summary;
	die "error in upload: destination filename not defined!\n" unless defined $dest;
	$camelbot->msg(1, "uploading file '$source' as '$dest' in wiki");
	$camelbot->upload_file($source, $summary, $dest);
}

if($params->{'usercontribs'}){
	my %options;
	$options{'username'}      = 'seth';
	$options{'ucstart'}       = '2011-12-14T00:00:00Z';
	$options{'ucend'}         = undef; #'2010-12-14T00:00:00Z';
	$options{'uclimit'}       = 500;
	my $contribs = $camelbot->get_user_contribs(\%options);
	#print Dumper $contribs;
}

$camelbot->msg(2, 'finished');

__END__

=head1 NAME

camelbot manipulates or gets information from wiki pages

=head1 DESCRIPTION

this program is a CLI tool for automatic editing of MediaWiki pages. one of the 
main tasks is the replacement of external links (urls). furthermore this tool can 
be used to just retrieve information on a MediaWiki.

=head1 SYNOPSIS

camelbot [options]

(there are no mandatory options, only mandatory sub-options)

general options:

     --http-status=s           check http status of given url
 -t, --test                    don't change anything, just print possible changes

mediawiki/wikipedia login:

     --db-access=s             filename of file containing database access data, 
                                i.e., host, port, user and password
     --username=s              login as different user (default = shell login name)

mediawiki/wikipedia commands:

  passive commands in order to get pages:

     --diff-from=s             revision id to get a diff from
     --diff-to=s               revision id (or 'prev' or 'next') to get a diff to
     --download-by-prefix=s    download pages (and used images) with given prefix
 -g, --get-page                get one or wiki pages, print to stdout
       --section=s              get this section in wiki page (e.g. 'some section')
       --wikipage=s             get this wiki page (e.g. 'some page')
       --wikipages=s            get these wiki pages by regexp (e.g. 'page[0-9]')
     --save-as-html=s          saves a given wiki page as local html-file
  
  other commands:

     --archive-ext-links=s     save external links at archive.org, using db
                                param is of format 
                                  'from_id=...;to_id=...;file=...;'
                                (syntax may change in future)
     --cat-add                 add a given bunch of pages to a given category
 -c, --cat-change              replaces pre-defined categories
     --cat-dead                check recent changes (in dewiki) for additions of 
                                categories of (recently) died people
     --delete                  delete some pre-defined pages
     --irc                     short for --rc-monitoring=irc
 -l, --link-replacement        replaces pre-defined links 
     --clean-up                use clean-up functions on given pages
       --search=s               use the mediawiki search to add matching pages
       --wikipage=s             use this wiki page (e.g. 'page')
       --wikipages=s            use these wiki pages by regexp (e.g. 'page[0-9]')
     --text-replacement        replaces text in given wiki pages by regexp or by 
                                given file
       --file=s                 use content of this file as replacement, i.e., as 
                                 new text
       --search-pattern=s       use this perl-style regexp to search a pattern to be
                                 replaced
       --section=s              use this section in wiki page (e.g. 'own packages')
       --summary=s              a summary of the edit (mandatory)
       --use-eval               use eval on replacement text (default = no eval)
       --search=s               use the mediawiki search to add matching pages
       --wikipage=s             use this wiki page (e.g. 'page')
       --wikipages=s            use these wiki pages by regexp (e.g. 'page[0-9]')
 -m, --minor=[01]              mark edit(s) as minor (1) or not (0), default = 1
     --parse=s                 parses wikitext from file and saves result as new 
                                file
     --rc-monitoring=s         start monitoring of recent changes (in dewiki) via
                                irc - an irc bot
                                db  - an db connection
     --search-sbl-attempts=s   search the list of blocked edits (blocked by sbl) 
                                for a given url-regexp or for a complete given sbl.
                                this may search in several wikis.
                                param is of format 
                                  'regexp=...;proj=...;sbl=...;'
                                e.g. 'regexp=evil-?domain\.;proj=enwiki;' or
                                e.g. 'proj=enwiki;sbl=meta;'
                                (syntax may change in future)
     --update-editfilter-index update edit filter index (at dewiki)
     --upload                  upload a file
                               iff this param is set, you should additionally set 
                               the following params, too.
       --source=s               source filename (e.g. '../somefile.txt')
       --summary=s              a summary/description of the file
                                 (e.g. "robot cat with hat\n\n[[Category:Nonsense]]")
       --dest=s                 destination filename (e.g. 'a_descriptive_name.txt')
     --usercontribs            fetch user contributions

meta options:

 -V, --version                 display version and exit.
 -h, --help                    display brief help
     --man                     display long help (man page)
 -q, --silent                  same as --verbose=0
 -v, --verbose                 same as --verbose=2
 -vv,--very-verbose            same as --verbose=3
 -v, --verbose=x               grade of verbosity
                                x=0: no output
                                x=1: default output
                                x=2: much output

=head1 EXAMPLES

camelbot -cl
  replaces links and cats.

=head1 OPTIONS

=head2 GENERAL

=over 8

=item B<--archive-ext-links>=I<string>

save external links at archive.org, using db. param is of format 
 'from_id=...;to_id=...;file=...;'

=item B<--cat-add>

add a given bunch of pages to a given category.

=item B<--cat-change>, B<-c>

replaces categories 

=item B<--cat-dead>

check recent changes (in dewiki) for additions of categories of (recently) died 
people

=item B<--clean-up>

use typical clean-up functions on given pages. pages can be given by 
B<--search>, B<--wikipage>, or B<--wikipages>.

=item B<--db-access=I<filename>>

if you are lucky and have database access to the mediawiki db, then camelbot will 
try to make use of this database. just provide the I<filename> (default = 
'replica.my.cnf') of a file containing something like
 user='your name'
 password='your password'
 port='e.g. 3306'
 host='e.g. 127.0.0.1'

=item B<--delete>

delete some pre-defined pages

=item B<--diff-from>=I<string>

returns diff of two revisions of a wiki page to stdout. If I<string> is a number, 
it's treated as a page revision. If it's not a number, it's treated as a page title
and the newest revision is used.

should be combined with B<--diff-to>=I<string>, where a second revision number or 
'prev' (default) or 'next' can be given to choose a revision to diff to.

=item B<--diff-to>=I<string>

see B<--diff-from>

=item B<--download-by-prefix>=I<string>

download pages by given prefix and save the pages wieth the extension '.wikitext'.
all images used in those pages will be downloaded too.

=item B<--get-page>

get text of a B<--wikipage> or some B<--wikipages> or a B<--section> of each page.

=over 8

=item B<--section>=I<string>

get this section in wiki page (e.g. 'some funny section').

=item B<--wikipage>=I<string>

use this wiki page (e.g. 'User:CamelBot/Something').
          
=item B<--wikipages>=I<string>

use these wiki pages given by perl-style regexp (e.g. 'page[0-9]+').
          
=back

=item B<--http-status>=I<string>

prints the http response status code of a given URL I<string>. If I<string> 
contains spaces, they are treated as separators between multiple URLs.

=item B<--irc>

same as B<--rc-monitoring>=irc

=item B<--link-replacement>, B<-l>

replace links

=item B<--minor>, B<--no-minor>

mark edit(s) as minor or not, default = B<--minor>.

=item B<--parse>=I<filename>

parses wikitext from file and saves result to file with same name but with 
extension .html.

=item B<--rc-monitoring>=I<type>

start monitoring of recent changes (in de-wikipedia) via an irc bot (I<type>==irc) 
or a db (I<type>==db) connection to a "real time" db.

=item B<--save-as-html>=I<pagename>

parses wikitext of given page and saves result to file with same name but with 
extension .html.

=item B<--search-sbl-attempts>=I<string>

search the log of edits blocked by sbl for a given url-regexp I<string> or all 
entries of a given spam-blacklist (sbl). may search several wikis.

I<string> contains at least one of the following parameters

'regexp=...;proj=...;sbl=...;'

e.g.

'regexp=some-?evil-?domain;proj=de.wikipedia;'

'sbl=meta;proj=dewiki;'

'sbl=dewiki;proj=dewiki;'

'sbl=meta;proj=all;'

see <https://noc.wikimedia.org/conf/all.dblist> for a complete list of projects

=item B<--test>, B<-t>

don't change anything, just print possible changes.

=item B<--text-replacement>

replace content of B<--section> of pages or whole pages. you may choose be a regular
expression (via B<--search-pattern>) which part to be replaced. the new text may 
be the content of a given B<--file>.

=over 8

=item B<--file>=I<filename>

the content of this file will be used as replacement.

=item B<--search-pattern>=I<regexp>

I<regexp> is a perl-style search pattern that will be raplaced. 

default = (?s:^.*$)

handle with care. it's always recommended to B<--test> before using.

=item B<--section>=I<string>

use this section in wiki page (e.g. 'some funny section').

=item B<--summary>=I<string>

a one-line summary of the edit (e.g. "inserted ultimate theory")

=item B<--use-eval>

use eval() on replacement text. normally the text will be replaced literally.
if B<--use-eval> is set, the replacement will be treated like

  replace /pattern/ by eval(replacement)

so if replacement contains something like $1, this will be treated as a 
back-reference, if B<--use-eval> is set.

=item B<--search>=I<string>

use the mediawiki search to add all pages that are matched by I<string>.
          
=item B<--wikipage>=I<string>

use this wiki page (e.g. 'User:CamelBot/Something').
          
=item B<--wikipages>=I<string>

use these wiki pages given by perl-style regexp (e.g. 'page[0-9]+').
          
=back

=item B<--update-editfilter-index>

update overview of discussions concerning single rules of edit filter (in w:de)

=item B<--upload>

upload a file iff this param is set, you should additionally set the following 
params, too.

=over 8

=item B<--source>=I<string>

source filename (e.g. '../somefile.txt')

=item B<--summary>=I<string>

a summary/description of the file (e.g. "robot cat with hat\\n\\n[[Category:Nonsense]]")

=item B<--dest>=I<string>

destination filename (e.g. 'a_descriptive_name.txt')

=back

=item B<--username=>I<string>

login as different user. default: I<string> = shell login name

=back

=head2 META

=over 8

=item B<--version>, B<-V>

prints version and exits.

=item B<--help>, B<-h>, B<-?>

prints a brief help message and exits.

=item B<--man>

prints the manual page and exits.

=item B<--verbose>=I<number>, B<-v> I<number>

set grade of verbosity to I<number>. if I<number>==0 then no output
will be given, except hard errors. the higher I<number> is, the more 
output will be printed. default: I<number> = 1.

=item B<--silent, --quiet, -q>

same as B<--verbose=0>.

=item B<--very-verbose, -vv>

same as B<--verbose=3>. you may use B<-vvv> for B<--verbose=4> a.s.o.

=item B<--verbose, -v>

same as B<--verbose=2>.

=back

=head1 LICENCE

Copyright (c) 2016, seth
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice, this
  list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
  this list of conditions and the following disclaimer in the documentation
  and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

originally written by seth (see https://github.com/wp-seth/camelbot)

=cut