## Init & utils

In [None]:
# !tar -xvzf pr_4.tgz | wc -l

In [None]:
import collections

from jsonpath_rw import jsonpath, parse
import jsonpath_rw_ext as jsonp

In [None]:
import os
import json 
from json import JSONEncoder, JSONDecoder
import pickle

class PythonObjectEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (list, dict, str, unicode, int, float, bool, type(None))):
            return JSONEncoder.default(self, obj)
        return {'_python_object': pickle.dumps(obj)}

def as_python_object(dct):
    if '_python_object' in dct:
        return pickle.loads(str(dct['_python_object']))
    return dct

def save_to_json_file(filename, content):
    temp = json.dumps(content, cls=PythonObjectEncoder)
    with open(filename, 'w') as file:
        file.write(temp)
    return "saved " + filename    

def load_from_json_file(file_name):
    with open(file_name) as f:
        return json.load(f, object_hook=as_python_object)

## Printing

In [None]:
from pprint import pprint
from IPython.display import JSON

width = 180
def print_line(char=' '):
    print(char*width)

In [None]:
points = [1, 5, 20, 100, 500, 100]

import sys
import time
from datetime import datetime

def print_progress(i, timer_log=None, total=None):
    if (i % points[0] == 0):
        if i > 0 and (i % points[1] == 0):
            sys.stdout.write(' ')
            if (i % points[2] == 0):
                sys.stdout.write('  ')
                if (i % points[3] == 0):
                    if total:                            
                        timer_log = print_stats(total, i, points[5], timer_log)
                    else:
                        sys.stdout.write('\n')
                    if (i % points[4] == 0):
                        sys.stdout.write('\n')
        sys.stdout.write('.')
        sys.stdout.flush()   
    return timer_log
        
def print_stats(total, processed, bunch, last_time, **kwargs):
    now = time.time()
    speed = (now - last_time) / 60.0
    eta = (now - last_time) * (total - processed) / bunch
    print('eta: {eta:%M:%S} ({speed:.1f} min/{bunch})'.format(speed = speed, eta = datetime.fromtimestamp(eta), bunch=bunch))
    return now

def with_progress(l, size=None):
    timer_log = time.time()
    for i,v in enumerate(l):
        timer_log = print_progress(i, timer_log, size)
        yield v

## Data manipulations

In [None]:
def collect_data(files, collector):
    for f in files:
        data = load_from_json_file(f)
        data['file_name'] = f
        yield collector(data)

def flattern(data):
    return [l2 for l1 in data for l2 in l1]

def get_nodes(data, ptr):
    return list(select_all(ptr)(data))

In [None]:
def select_all(key):
    return lambda data: jsonp.match(key, data)

def select_one(key):
    return lambda data: jsonp.match1(key, data)

def contains(key, value):
    return lambda data: jsonp.match1(key, data) == value

def all_of(*filters):
    def fn(data):
        for f in filters:
            if not f(data):
                return False
        return True  
    return fn

In [None]:
def for_each(data, *funs):
    for fun in funs:
        data = fun(data)
    return data

## Prepare data & clean up

In [None]:
%load_ext memory_profiler
%load_ext autotime

In [None]:
from os import listdir
from os.path import isfile, join

game = 'pr'
games = [join(game, f) for f in listdir(game) if f.endswith('.json')]
games.sort()
len(games)

In [None]:
rmc = []
for f in with_progress(games, len(games)):
    d = load_from_json_file(f)
    if not d:
        rmc.append(f)
    elif not contains('$.info.result.endgame_reason', 'normal_end')(d):
        rmc.append(f)
    elif select_one('$..data[?(@.type == "gameStateChange" & @.args.action == "stGameEnd")].args.args.table.neutralized')(d['log']) != '0':
        rmc.append(f)
    elif not all_of(
        contains('$."100".value', '0'),
        contains('$."101".value', '0'),
        contains('$."102".value', '0'),
        contains('$."103".value', '0')                       
    )(d['info']['options']):
        rmc.append(f)
len(rmc)

In [None]:
for f in rmc:
    os.remove(f)

!rm pr_4.tgz
!tar -cvzf pr_4.tgz pr | wc -l

### check single file

In [None]:
f = games[1]
# f = 'pr/pr_44010638.json'
# f = 'pr/pr_42000779.json'
data1 = load_from_json_file(f)
game_log = data1['log']['data']['data']['data']
stat = game_log[-1]['data'][-2]['args']['args']

In [None]:
JSON(stat)

### validate

In [None]:
def copy_ids(src, dst):
    dst['table_id']  = src['table']
    dst['file_name'] = src['file_name']
    return dst    

def get_logs(data):
    result = select_one('$[*].log.data.data')(data)
    copy_ids(data, result)
    return result

def get_stats(data):
    result = select_one('$..data[?(@.type == "gameStateChange" & @.args.action == "stGameEnd")].args.args')(data)
    copy_ids(data, result)
    return result

In [None]:
sz = len(games)
stats = list(collect_data(with_progress(games[:sz], sz), get_stats))
print()

In [None]:
chk = [data for data in with_progress(stats) if data['table']['neutralized'] != '0']
print(len(chk))
pprint(chk[:5], depth=3, width=180)