In [None]:
import os
import time
import pandas as pd
from slippi_db import make_compression_datasets
from slippi_db import upload_lib

In [None]:
env = 'compression_test'

In [None]:
slp_db = upload_lib.get_db(env, upload_lib.SLP)
slp_infos = list(slp_db.find({}))
slp_compressed_size = sum(info['stored_size'] for info in slp_infos)
slp_size = sum(info['original_size'] for info in slp_infos)

In [None]:
def get_dataset_size(dataset: str):
    parsed_db = upload_lib.get_db(env, dataset)
    infos = parsed_db.find({})
    sizes = [info['size'] for info in infos if not info.get('failed', False)]
    return sum(sizes)

In [None]:
compression = slp_infos[0]['compression']
for info in slp_infos:
    assert info['compression'] == compression
print(compression)

In [None]:
sizes = {name: get_dataset_size(name) for name in make_compression_datasets.configurations}
sizes[f'slp_{compression}_compressed'] = slp_compressed_size
sizes['slp'] = slp_size

In [None]:
names, sizes_ = zip(*sizes.items())
df = pd.DataFrame({'name': names, 'size': sizes_})
df['relative_size'] = df['size'] / sizes['uncompressed']

In [None]:
df.sort_values(by='relative_size', inplace=True)
df[['name', 'relative_size']]

In [None]:
# Download test files
slp_dir = f'data/{env}'

In [None]:
os.makedirs(slp_dir, exist_ok=True)
for info in slp_infos:
    upload_lib.download_slp_locally(env, info['key'], slp_dir)

In [None]:
import pickle, zlib, os
from slippi_db import parse_libmelee, parse_peppi
from slippi_ai.types import InvalidGameError, array_to_nest

In [None]:
paths = [os.path.join(slp_dir, f) for f in os.listdir(slp_dir)]

In [None]:
%%time

pa_arrays = []
errors = []

for path in paths:
    print(path)
    try:
#         pa_arrays.append(parse_peppi.get_slp(path))
        pa_arrays.append(parse_libmelee.get_slp(path))
    except InvalidGameError as e:
        errors.append((path, e))
len(errors)

In [None]:
def nested_size(array, level) -> dict:
    nest = array_to_nest(array)
    pickled = pickle.dumps(nest)
    compressed = zlib.compress(pickled, level=level)
    return len(compressed)

In [None]:
%%time

totals = {}
times = {}
for level in range(zlib.Z_BEST_COMPRESSION + 1):
    start = time.perf_counter()
    totals[level] = sum(nested_size(a, level) for a in pa_arrays)
    times[level] = time.perf_counter() - start

In [None]:
times

In [None]:
totals

In [None]:
total_nested_size = sum(map(nested_size, pa_arrays))

In [None]:
sizes['pickle_zlib'] = total_nested_size

In [None]:
for name in sorted(sizes, key=lambda n: sizes[n]):
    print(name, '%.3f' % (sizes['slp'] / sizes[name]))