diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/backend/__main__.py b/backend/__main__.py new file mode 100644 index 0000000..f55506b --- /dev/null +++ b/backend/__main__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import print_function +from __future__ import absolute_import +import sys + +from .server import run + +if len(sys.argv) < 2: + print("ERROR: no port number as first argument given. Quitting", + file=sys.stderr) + sys.exit(1) +else: + port = int(sys.argv[1]) + run(port) \ No newline at end of file diff --git a/backend/celery/__init__.py b/backend/celery/__init__.py new file mode 100644 index 0000000..68cef10 --- /dev/null +++ b/backend/celery/__init__.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +from celery import Celery +from celery.utils.log import get_task_logger +from kombu import Exchange, Queue + +from .. import config + +logger = get_task_logger(__name__) + +BROKER_URL = 'redis://{server}:{port}/{db}'.format( + server=config.redis_server, + port=config.redis_port, + db=config.redis_db +) + +app = Celery(broker=BROKER_URL, + include=['backend.celery.tasks']) + +app.conf.update( + CELERY_TASK_SERIALIZER='json', + CELERY_ACCEPT_CONTENT=['json'], + CELERY_IGNORE_RESULT=True, + CELERY_DISABLE_RATE_LIMITS=True, + CELERY_DEFAULT_QUEUE=config.toolname, + CELERY_QUEUES=( + Queue(config.redis_prefix + 'q', Exchange(config.toolname), + routing_key=config.toolname), + ), + BROKER_TRANSPORT_OPTIONS={ + 'fanout_prefix': True, + 'fanout_patterns': True, + 'keyprefix_queue': config.redis_prefix + '.binding.%s', + 'unacked_key': config.redis_prefix + '_unacked', + 'unacked_index_key': config.redis_prefix + '_unacked_index', + 'unacked_mutex_key': config.redis_prefix + '_unacked_mutex' + }, +) diff --git a/backend/celery/api.py b/backend/celery/api.py new file mode 100644 index 0000000..7a4af15 --- /dev/null +++ b/backend/celery/api.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +import requests +from requests_oauthlib import OAuth1 +from redis import StrictRedis +import json +from datetime import datetime, timedelta + +from .. import config +from . import logger + + +class MediaWiki(object): + def __init__(self, host="https://en.wikipedia.org", path="/w/api.php", + access_token=None, redis_channel=None): + self.api_url = host + path + self.wikis = {} + + user_agent = "crosswatch (https://tools.wmflabs.org/crosswatch;" +\ + "crosswatch@tools.wmflabs.org) python-requests/" +\ + requests.__version__ + self.headers = {'User-Agent': user_agent} + + if access_token: + # Construct an auth object with the consumer and access tokens + access_token = json.loads(access_token) + self.auth = OAuth1(config.consumer_token.key, + client_secret=config.consumer_token.secret, + resource_owner_key=access_token['key'], + resource_owner_secret=access_token['secret']) + else: + self.auth = None + + self.redis_channel = redis_channel + self.redis = StrictRedis( + host=config.redis_server, + port=config.redis_port, + db=config.redis_db + ) + + def publish(self, message): + if not self.redis_channel: + raise Exception("No redis channel set to publish to") + self.redis.publish(self.redis_channel, json.dumps(message)) + + def timestamp(self, daysdelta=0): + """ + :param daysdelta: calculate timestamp in ´daysdelta´ days + :return: MediaWIki timestamp format + """ + now = datetime.utcnow() + delta = timedelta(days=daysdelta) + time = now + delta + return time.strftime("%Y%m%d%H%M%S") + + def query(self, params): + params['format'] = "json" + response = requests.get(self.api_url, params=params, auth=self.auth, + headers=self.headers).json() + + if 'error' in response: + logger.error(response['error']['code']) + if response['error']['code'] == "mwoauth-invalid-authorization": + raise Exception("OAuth authentication failed") + + raise Exception(str(response['error']['code'])) + if 'warnings' in response: + logger.warn("API-request warning: " + str(response['warnings'])) + return response + + def query_gen(self, params): + params['format'] = "json" + params['action'] = "query" + last_continue = {'continue': ""} + while True: + p = params.copy() + p.update(last_continue) + response = requests.get(self.api_url, params=p, auth=self.auth, + headers=self.headers).json() + + if 'error' in response: + raise Exception(str(response['error'])) + if 'warnings' in response: + warning = response['warnings']['query']['*'] + logger.warn("API-request warning: " + warning) + if 'query' in response: + yield response['query'] + if 'continue' not in response: + break + last_continue = response['continue'] + + def get_username(self): + try: + params = { + 'action': "query", + 'meta': "userinfo", + } + response = self.query(params) + username = response['query']['userinfo']['name'] + return username + except KeyError as e: + if response['error']['code'] == "mwoauth-invalid-authorization": + logger.error('mwoauth-invalid-authorization') + raise Exception("OAuth authentication failed") + raise e + + def get_wikis(self, use_cache=True): + key = config.redis_prefix + 'cached_wikis' + wikis = self.redis.get(key) + if use_cache and wikis: + wikis = json.loads(wikis) + else: + # Cache miss, do api request and fill cache + wikis = self._get_wikis() + self.redis.setex(key, 86400, json.dumps(wikis)) # 1 day exp. + + return wikis + + def _get_wikis(self): + params = {'action': "sitematrix"} + data = self.query(params) + for key, val in data['sitematrix'].items(): + if key == 'count': + continue + + if 'code' in val: + for site in val['site']: + self._parse_sitematrix(site, val['code'], val['name']) + else: + for site in val: + self._parse_sitematrix(site, '', '') + + return self.wikis + + def _parse_sitematrix(self, site, lang, langname): + wiki = { + 'lang': lang, + 'langname': langname, + 'url': site['url'], + 'dbname': site['dbname'], + 'group': site['code'] + } + if wiki['group'] == 'wiki': + wiki['group'] = 'wikipedia' + + inactive_wikis = ['closed', 'private', 'fishbowl'] + if any([key in site for key in inactive_wikis]): + wiki['closed'] = True + + self.wikis[site['dbname']] = wiki diff --git a/backend/celery/tasks.py b/backend/celery/tasks.py new file mode 100644 index 0000000..21079fc --- /dev/null +++ b/backend/celery/tasks.py @@ -0,0 +1,83 @@ +# -*- coding: utf-8 -*- +"""Celery tasks""" +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import + +from . import app, logger +from .api import MediaWiki + + +@app.task +def initial_task(obj): + mw = MediaWiki(access_token=obj['access_token']) + wikis = mw.get_wikis() + projects = ['enwiki', 'dewiki', 'itwiki', 'frwiki', 'itwiki', + 'commonswiki', 'wikidatawiki', 'enwiktionary', 'dewiktionary', + 'metawiki', 'mediawikiwiki'] + for project in projects: + obj['wiki'] = wikis[project] + watchlistgetter.delay(obj) + + +def fix_urls(html, url): + a = u' -# (C) 2015 Sitic +# (C) 2015 Jan Lebert # Licensed under the MIT License // http://opensource.org/licenses/MIT # @@ -12,7 +12,7 @@ import sys import urllib -from flask import request, session, Blueprint, make_response +from flask import request, session, Blueprint, make_response, redirect from flask_oauth import OAuth, OAuthRemoteApp, OAuthException, parse_response import json @@ -42,7 +42,7 @@ def __init__(self, clean_url='https://www.mediawiki.org/wiki', consumer_key=None, consumer_secret=None, - toolname='crosswatch'): + toolname='mytool'): if not consumer_key or not consumer_secret: raise Exception('MWOAuthBlueprintFactory needs consumer key and\ secret') @@ -96,8 +96,7 @@ def oauth_authorized(resp): 'secret': resp['oauth_token_secret']} mwo_token = json.dumps(mwo_token) - resp = make_response('This tab should close automatically.' + - '') + resp = make_response(redirect('/' + toolname + '/')) resp.set_cookie(self.toolname + 'Auth', mwo_token, max_age=30*24*60*60, path='/' + self.toolname + '/') @@ -110,7 +109,7 @@ def oauth_authorized(resp): @self.bp.route('/logout') def logout(): - resp = make_response('You are now logged out. Goodbye :-)') + resp = make_response(redirect('/' + toolname + '/')) session['mwo_token'] = None resp.set_cookie(self.toolname + 'Auth', '', path='/' + self.toolname + '/', expires=0) diff --git a/backend/server/oauth_handler.py b/backend/server/oauth_handler.py new file mode 100644 index 0000000..774b667 --- /dev/null +++ b/backend/server/oauth_handler.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +""" +WSGI container for OAuth login/logout handling. +""" +# ISC License +# Copyright (C) 2015 Jan Lebert +from __future__ import absolute_import +import os + +from flask import Flask + +from .flask_mwoauth import MWOAuth +from ..config import oauth_consumer_key, oauth_consumer_secret, toolname + +app = Flask(__name__) +app.secret_key = os.urandom(24) + +mwoauth = MWOAuth(base_url='https://en.wikipedia.org/w', + clean_url='https://en.wikipedia.org/wiki', + consumer_key=oauth_consumer_key, + consumer_secret=oauth_consumer_secret, + toolname=toolname) + +app.register_blueprint(mwoauth.bp, url_prefix='/' + toolname) + +if __name__ == "__main__": + app.run() diff --git a/backend/server/public b/backend/server/public new file mode 120000 index 0000000..e4608c0 --- /dev/null +++ b/backend/server/public @@ -0,0 +1 @@ +../../frontend/dist \ No newline at end of file diff --git a/backend/tasks.py b/backend/tasks.py deleted file mode 100644 index fc9978b..0000000 --- a/backend/tasks.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/python -# -*- coding: utf-8 -*- -# MIT License -# Copyright (C) 2014 sitic -""" - Tornado web server for find-and-replace tool - running on tools.wmflabs.org/find-and-replace -""" -from __future__ import print_function - -from celery import Celery -from simplecrypt import decrypt -import requests -from requests_oauthlib import OAuth1 -import json -import redis - -import config - -from celery.utils.log import get_task_logger -logger = get_task_logger(__name__) - -app = Celery() -app.config_from_object('config:celeryconfig') - -@app.task -def get_watchlist(obj): - projects = [ - ['enwiki', 'https://en.wikipedia.org/wiki', 'https://en.wikipedia.org/w/api.php'], - ['dewiki', 'https://de.wikipedia.org/wiki', 'https://de.wikipedia.org/w/api.php'], - ['frwiki', 'https://fr.wikipedia.org/wiki', 'https://fr.wikipedia.org/w/api.php'], - ['itwiki', 'https://it.wikipedia.org/wiki', 'https://it.wikipedia.org/w/api.php'], - ['commons', 'https://commons.wikimedia.org/wiki', 'https://commons.wikimedia.org/w/api.php'], - ['wikidata', 'https://www.wikidata.org/wiki/wiki', 'https://www.wikidata.org/w/api.php'], - ['metawiki', 'https://meta.wikimedia.org/wiki', 'https://meta.wikimedia.org/w/api.php'], - ['enwiktionary', 'https://en.wiktionary.org/wiki', 'https://en.wiktionary.org/w/api.php'], - ['dewiktionary', 'https://de.wiktionary.org/wiki', 'https://de.wiktionary.org/w/api.php'], - ['mediawiki', 'https://www.mediawiki.org/wiki', 'https://www.mediawiki.org/w/api.php'] - ] - for project in projects: - watchlistgetter.delay(obj, *project) - -@app.task -def watchlistgetter(obj, project, project_url, api_url): - r = redis.StrictRedis( - host=config.redis_server, - port=config.redis_port, - db=config.redis_db) - - redis_channel = obj['redis_channel'] - - access_token = obj['access_token'] - access_token = json.loads(access_token) - - # Construct an auth object with the consumer and access tokens - auth1 = OAuth1(config.consumer_token.key, - client_secret=config.consumer_token.secret, - resource_owner_key=access_token['key'], - resource_owner_secret=access_token['secret']) - - # Now, accessing the API on behalf of a user - logger.info("Reading top 70 watchlist items for wiki " + project) - response = requests.get( - api_url, - params={ - 'action': "query", - 'list': "watchlist", - 'wlallrev': "", - 'wltype': "edit", - 'wllimit': 70, - 'wlprop': "ids|flags|title|parsedcomment|user|timestamp|sizes|loginfo", - 'format': "json" - }, - auth=auth1 - ) - href = u'