In [None]:
import pickle
import pandas as pd
import os
import peppi_py
import tree
import json
import io
import math
import tqdm.notebook
import functools

from slippi_db import utils, preprocessing, parse_peppi

In [None]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

In [None]:
root = '/linusr/vlad/SSBM/Replays/'

In [None]:
parsed_path = os.path.join(root, 'parsed.pkl')

with open(parsed_path, 'rb') as f:
    data_bytes = f.read()
len(data_bytes)

In [None]:
%%time
data = pickle.loads(data_bytes)
del data_bytes
len(data)

In [None]:
# %%time
# data_json = json.dumps(data)
# len(data_json)

In [None]:
# %%time
# json.loads(data_json);

In [None]:
df = pd.DataFrame(data)

In [None]:
@functools.cache
def compact_raw(raw: str) -> str:
    if raw.startswith('Players/'):
        return raw.split('/')[1]
    return raw

df['compact_raw'] = df['raw'].map(compact_raw)

In [None]:
df.groupby('compact_raw')['reason'].value_counts()

In [None]:
df.groupby('compact_raw')['is_training'].mean()

In [None]:
not_valid_reasons = df[~df['valid']]
not_training_reasons.groupby('compact_raw')['reason'].value_counts()

In [None]:
not_training_reasons = df[df['not_training_reason'] != '']
not_training_reasons.groupby('compact_raw')['not_training_reason'].value_counts()

In [None]:
df['has_winner'] = ~df['winner'].isnull()
valid = df[df['valid']]
training = df[df['is_training'] == True]
no_winner = training[~training['has_winner']]

In [None]:
training.groupby('compact_raw')['has_winner'].mean()

In [None]:
import typing as tp
import itertools
import melee
from slippi_ai import nametags

In [None]:
def split_row(row: dict) -> tp.Iterator[dict]:
    for player in row['players']:
        new_row = row.copy()
        del new_row['players']
        new_row.update(player)
        name = nametags.name_from_metadata(player)
        new_row['name'] = nametags.normalize_name(name)

        new_row['char'] = char_to_str(player['character'])
        new_row['compact_raw'] = compact_raw(row['raw'])
        yield new_row

def char_to_str(char: int) -> str:
    return melee.Character(char).name

def make_per_player_df(rows):
    per_player_data = []
    for row in tqdm.notebook.tqdm(rows):
        if not row['valid'] or not row['is_training']:
            continue
        per_player_data.extend(list(split_row(row)))
    
    return pd.DataFrame(per_player_data)

In [None]:
%%time
per_player_df = make_per_player_df(data)

In [None]:
# per_player_df.groupby('compact_raw')['name'].value_counts()

In [None]:
zain_df = per_player_df[per_player_df['name'] == 'Zain']
zain_df['compact_raw'].value_counts()

In [None]:
per_player_df['blank_name'] = per_player_df['name'] == ''

In [None]:
def is_player(raw):
    return raw.startswith('Players')

player_df = per_player_df[per_player_df['raw'].map(is_player)]
len(player_df)

In [None]:
player_df.groupby('compact_raw')['blank_name'].mean()

In [None]:
per_player_df['name'].value_counts()

In [None]:
per_player_df['banned'] = per_player_df['name'].map(nametags.is_banned_name)
legal_df = per_player_df[~per_player_df['banned']]

In [None]:
by_char_name = legal_df.groupby(['char', 'name']).size().reset_index(name='count').sort_values(by='count', ascending=False)

In [None]:
x = by_char_name
x[x['char'] == 'MARTH']

In [None]:
kodo = per_player_df[per_player_df['compact_raw'] == 'KoDoRiN']

In [None]:
kodo[kodo['name'] == '8#9']['char'].value_counts()

In [None]:
mango = per_player_df[per_player_df['compact_raw'] == 'Mango']
len(mango)

In [None]:
mango['name'].value_counts()

In [None]:
df.keys()

In [None]:
kodo_df = df[df['compact_raw'] == 'KoDoRiN']
len(kodo_df)

In [None]:
kodo_df['not_training_reason'].value_counts()

In [None]:
kodo_df['slippi_version'].value_counts()

In [None]:
df[df['compact_raw'] == 'Mango']['slippi_version'].value_counts()