In [None]:
import requests
import concurrent.futures

from lxml import html
from IPython.display import JSON,HTML

from jsonpath_rw import jsonpath, parse
import jsonpath_rw_ext as jsonp
import collections
import os

In [None]:
import json
from json import JSONEncoder, JSONDecoder
import pickle

class PythonObjectEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (list, dict, str, unicode, int, float, bool, type(None))):
            return JSONEncoder.default(self, obj)
        return {'_python_object': pickle.dumps(obj)}

def as_python_object(dct):
    if '_python_object' in dct:
        return pickle.loads(str(dct['_python_object']))
    return dct

def save_to_json_file(filename, content):
    temp = json.dumps(content, cls=PythonObjectEncoder)
    return save_to_file(filename, temp)

def save_to_file(filename, content):
    with open(filename, 'w') as file:
        file.write(content)
    return "saved " + filename    

def load_from_json_file(file_name):
    content = load_from_file(file_name)
    result = json.load(content, object_hook=as_python_object)
    return result

def load_from_file(file_name):
    content = open(file_name)
    return content

def as_json(content):
    return json.loads(content)

In [None]:
def select_all(key):
    return lambda data: jsonp.match(key, data)

def select_one(key):
    return lambda data: jsonp.match1(key, data)

def contains(key, value):
    return lambda data: jsonp.match1(key, data) == value

def all_of(*filters):
    def fn(data):
        for f in filters:
            if not f(data):
                return False
        return True  
    return fn

In [None]:
points = [1, 5, 20, 100, 500, 100]

import sys
import time
from datetime import datetime

def print_progress(i, timer_log=None, total=None):
    if (i % points[0] == 0):
        if i > 0 and (i % points[1] == 0):
            sys.stdout.write(' ')
            if (i % points[2] == 0):
                sys.stdout.write('  ')
                if (i % points[3] == 0):
                    if total:                            
                        timer_log = print_stats(total, i, points[5], timer_log)
                    else:
                        sys.stdout.write('\n')
                    if (i % points[4] == 0):
                        sys.stdout.write('\n')
        sys.stdout.write('.')
        sys.stdout.flush()   
    return timer_log
        
def print_stats(total, processed, bunch, last_time, **kwargs):
    now = time.time()
    speed = (now - last_time) / 60.0
    eta = (now - last_time) * (total - processed) / bunch
    print('eta: {eta:%M:%S} ({speed:.1f} min/{bunch})'.format(speed = speed, eta = datetime.fromtimestamp(eta), bunch=bunch))
    return now

def with_progress(l, size=None):
    timer_log = time.time()
    for i,v in enumerate(l):
        timer_log = print_progress(i, timer_log, size)
        yield v

In [None]:
def load_latest_tables(batch, from_time, from_id=1):
    url = site + '/message/board?type=lastresult&social=false&dojo.preventCache=1537975056450&id=14'+\
        '&page=0&per_page={per_page}&from_time={from_time}&from_id={from_id}'.format(per_page=batch, from_id=from_id, from_time=from_time)
    page = s.get(url).text
    return as_json(page)

In [None]:
def load_batch(amout, from_time=1550000000):
    result = []
    x = load_latest_tables(amout, from_time)
    for t in x['data']:
        tree = html.fromstring(t['html'])
        ll = tree.xpath('//a/@href')
        ids = [l[l.find('=')+1:] for l in ll]
        (ids[0], ids[1], len(ids)-1, t['timestamp'])
        result.append({
            'table': ids[0], 
            'player': ids[1], 
            'player_amount': len(ids)-1, 
            'timestamp': t['timestamp']
        })
    return result

In [None]:
def table_iterator(stop=30, batch=5000, ts=1550000000):
#     print('starting with ' + str(dict(stop=stop, batch=batch, ts = ts)))
    for i in range(stop // batch):
        for t in load_batch(batch, ts):
            yield t
        if t:
            ts = t['timestamp']
        print_progress(i)
    for t in load_batch(stop % batch, ts):
        yield t

In [None]:
def get_table_info(tbl):
    url = site + '/table/table/tableinfos.html?id={table}&dojo.preventCache=1537970039954'.format(table=tbl)
    page = s.get(url).text
    data = as_json(page)['data']
    return data

def get_table_datas(table, player, version):
    url = site + '/archive/replay/{version}/?table={table}&player={player}&comments=1' \
        .format(table=table, player=player, version=version)
    page = s.get(url).text

    tree = html.fromstring(page)
    nodes = tree.xpath('//div[@id="overall-content"]/script')
    if not nodes:
        nodes = tree.xpath('//b[@id="bga_fatal_error_descr"]')
        raise Exception(nodes[0].text_content())
    text = nodes[0].text_content()
    
    start = text.find('completesetup')
    end = text.find('\n', start)
    return as_json('[' +text[start+14:end-2]+']')

def get_game_log(table):
    url = site + '/archive/archive/logs.html?table={table}&translated=false&dojo.preventCache=1537972617341' \
        .format(table=table)
    page = s.get(url).text
    return as_json(page)

In [None]:
def valid_info(info):
    return all_of(
        contains('$.options."100".value', '0'),
        contains('$.options."101".value', '0'),
        contains('$.options."102".value', '0'),
        contains('$.options."103".value', '0')                      
    )(info)

def valid_logs(d):
    return select_one('$..data[?(@.type == "gameStateChange" & @.args.action == "stGameEnd")].args.args.table.neutralized')(d) == '0'

In [None]:
def load_table_datas(table, player):
    info = get_table_info(table)
    if not valid_info(info):
        return None
    log = get_game_log(table)
    if not valid_logs(log):
        return None 
    datas = get_table_datas(table, player, info['siteversion'])
    return {
        'table': table,
        'info': info,
        'datas': datas,
        'log': log
    }

In [None]:
def prepare_table_datas(table, player, **kvargs):
    info = get_table_info(table)
    if not valid(info):
        return None
    return {
        'table': table,
        'player': player,
        'version': info['siteversion']
    }

In [None]:
import os.path

def load_table(t):
    try:
        tbl = t['table']
        fn = 'pr/pr_{}.json'.format(tbl)
        if os.path.exists(fn):
            pass
        d = load_table_datas(tbl, t['player'])
        if d:
            save_to_json_file(fn, d)
    except Exception as e:
        print('\n Error: ', str(e), t)
        global usrno
        usrno += 1
        if usrno < len(usr):
            print('\n Retrying with: ', usr[usrno])
            use_usr(usrno)
        else:
            raise

In [None]:
site = '/moc.aneraemagdraob.ne//:sptth'[::-1]
usr = [
#     {'e' : 'tolar', 'p' : 'here'},
    {'u' : 'RMelin', 'p' : 'Melin123', 'e': 'e7087812@nwytg.net'},
    {'u' : 'RobertNN', 'p' : 'RobertN123', 'e': 'e7107542@nwytg.net'},
    {'u' : 'f2658863', 'p' : 'f265886ppp', 'e': 'f2658863@nwytg.net'},
    {'u' : 'f2648108', 'p' : 'f264810ppp', 'e': 'f2648108@nwytg.net'},
    {'u' : 'f2677456', 'p' : 'f267745ppp', 'e': 'f2677456@nwytg.net'},
    {'u' : 'f2678869', 'p' : 'f267886ppp', 'e': 'f2678869@nwytg.net'},
    {'u' : 'f2776045', 'p' : 'f277604ppp', 'e': 'f2776045@nwytg.net'},
]

def use_usr(id):
    data = { 'email': usr[id]['e'], 'password': usr[id]['p']}
    return s.post(site + 'account/account/login.html', data = data)
  
s = requests.Session()
usrno = 6
use_usr(usrno)

In [None]:
%%bash
# tar -xvzf pr_tables.tgz
# tar -xvzf pr_4.tgz | wc -l

In [None]:
l = load_from_json_file('pr_tables_filtered.json')
len(l)

In [None]:
%ls pr | wc -l

In [None]:
start = 1300
for t in with_progress(l[start:start+100]):
    load_table(t)

In [None]:
start = 1000
for i,t in enumerate(l[start:start+50]):
    fn = 'pr/pr_{}.json'.format(t['table'])
    if not os.path.exists(fn):
        info = get_table_info(t['table'])
        if valid_info(info):
            print(start+i)
#             break

In [None]:
%%bash
tar -cvzf pr_4.tgz pr | wc -l
# tar -cvzf pr_tables.tgz pr_tables*.json

In [None]:
# load_table({'table': '43390300', 'player': '84896075', 'player_amount': 4, 'timestamp': '1535458831'})
load_table({'table': '43301667', 'player': '83982148', 'player_amount': 4, 'timestamp': '1535204755'})