In [None]:
import os
from slippi_db import make_compression_datasets
from slippi_db import upload_lib

In [None]:
env = 'compression_test'

In [None]:
slp_db = upload_lib.get_db(env, upload_lib.SLP)
slp_infos = list(slp_db.find({}))
slp_size = sum(info['stored_size'] for info in slp_infos)

In [None]:
def get_dataset_size(dataset: str):
    parsed_db = upload_lib.get_db(env, dataset)
    infos = parsed_db.find({})
    sizes = [info['size'] for info in infos if not info.get('failed', False)]
    return sum(sizes)

In [None]:
sizes = {name: get_dataset_size(name) for name in make_compression_datasets.configurations}
sizes['slp'] = slp_size

In [None]:
for name, size in sizes.items():
    print(name, '%.3f' % (size / sizes['uncompressed']))

In [None]:
# Download test files
slp_dir = f'data/{env}'
os.makedirs(slp_dir, exist_ok=True)
for info in slp_infos:
    upload_lib.download_slp_locally(env, info['key'], slp_dir)

In [None]:
import pickle, zlib
from slippi_db import parse_libmelee
from slippi_ai.types import InvalidGameError, array_to_nest

In [None]:
paths = [os.path.join(slp_dir, f) for f in os.listdir(slp_dir)]

In [None]:
pa_arrays = []

for path in paths:
    try:
        pa_arrays.append(parse_libmelee.get_slp(path))
    except InvalidGameError:
        continue

In [None]:
def nested_size(array) -> int:
    nest = array_to_nest(array)
    pickled = pickle.dumps(nest)
    compressed = zlib.compress(pickled, level=9)
    return len(compressed)

In [None]:
total_nested_size = sum(map(nested_size, pa_arrays))

In [None]:
sizes['pickle'] = total_nested_size

In [None]:
for name in sorted(sizes, key=lambda n: sizes[n]):
    print(name, '%.3f' % (sizes['slp'] / sizes[name]))