In [None]:
import collections
import pickle
import pandas as pd
import os
import peppi_py
import tree
import json
import io
import math
import tqdm.notebook
import functools

from slippi_db import utils, preprocessing, parse_peppi

In [None]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

In [None]:
root = '/linusr/vlad/SSBM/Replays'

In [None]:
parsed_path = os.path.join(root, 'parsed.pkl')

with open(parsed_path, 'rb') as f:
    data_bytes = f.read()
len(data_bytes)

In [None]:
%%time
data = pickle.loads(data_bytes)
del data_bytes
len(data)

In [None]:
# %%time
# data_json = json.dumps(data)
# len(data_json)

In [None]:
# %%time
# json.loads(data_json);

In [None]:
df = pd.DataFrame(data)

In [None]:
@functools.cache
def compact_raw(raw: str) -> str:
    if raw.startswith('Players/'):
        return raw.split('/')[1]
    if raw.startswith('Phillip/'):
        return 'Phillip'
    return raw

df['compact_raw'] = df['raw'].map(compact_raw)

In [None]:
df.groupby('compact_raw')['reason'].value_counts()

In [None]:
df.groupby('compact_raw')['is_training'].mean()

In [None]:
not_valid_reasons = df[~df['valid']]
not_training_reasons.groupby('compact_raw')['reason'].value_counts()

In [None]:
# not_training_reasons = df[df['not_training_reason'] != '']
df.groupby('compact_raw')['not_training_reason'].value_counts(normalize=True)

In [None]:
df['has_winner'] = ~df['winner'].isnull()
valid = df[df['valid']]
training = df[df['is_training'] == True]
no_winner = training[~training['has_winner']]

In [None]:
training.groupby('compact_raw')['has_winner'].mean()

In [None]:
phillip_df = df[df['compact_raw'] == 'Phillip']
len(phillip_df)

In [None]:
phillip_df['agent'] = phillip_df['name'].map(lambda s: s.split('/')[0])

In [None]:
phillip_df['agent'].value_counts()

In [None]:
phillip_df['not_training_reason'].value_counts()

In [None]:
import typing as tp
import itertools
import melee
from slippi_ai import nametags

In [None]:
def split_row(row: dict) -> tp.Iterator[dict]:
    for player in row['players']:
        new_row = row.copy()
        del new_row['players']
        new_row.update(player)
        new_row['filename'] = new_row['name']

        name = nametags.name_from_metadata(player)
        new_row['name'] = nametags.normalize_name(name)

        new_row['char'] = char_to_str(player['character'])
        new_row['compact_raw'] = compact_raw(row['raw'])
        yield new_row

def char_to_str(char: int) -> str:
    return melee.Character(char).name

def make_per_player_df(rows):
    per_player_data = []
    for row in tqdm.notebook.tqdm(rows):
        if not row['valid'] or not row['is_training']:
            continue
        per_player_data.extend(list(split_row(row)))
    
    return pd.DataFrame(per_player_data)

In [None]:
%%time
per_player_df = make_per_player_df(data)

In [None]:
# per_player_df.groupby('compact_raw')['name'].value_counts()

In [None]:
falco_df = per_player_df[per_player_df['char'] == 'FALCO']
len(falco_df)

In [None]:
falco_df['name'].value_counts()

In [None]:
by_name_and_raw = falco_df.groupby(['name', 'compact_raw']).size().reset_index(name='count')

In [None]:
by_name_and_raw.sort_values('count', ascending=False)

In [None]:
kjh_df = per_player_df[per_player_df['compact_raw'] == 'BillyBoPeep']
len(kjh_df)

In [None]:
kjh_df['char'].value_counts()

In [None]:
kjh_df['name'].value_counts()

In [None]:
phil_df = per_player_df[per_player_df['name'] == 'Phillip AI']
phil_df['compact_raw'].value_counts()

In [None]:
vs_phil_df = per_player_df[per_player_df['compact_raw'] == 'Phillip/Phillip-2025-3-30.zip']
vs_phil_df = vs_phil_df[vs_phil_df['name'] != 'Phillip AI']
len(vs_phil_df)

In [None]:
vs_phil_df['name'].value_counts()

In [None]:
KNOWN_PLAYERS = {group[0] for group in nametags.name_groups}

In [None]:
known_phil_df = vs_phil_df[vs_phil_df['name'].isin(KNOWN_PLAYERS)].copy()
len(known_phil_df)

In [None]:
known_phil_df['name'].value_counts()

In [None]:
known_phil_df['has_winner'] = ~known_phil_df['winner'].isnull()

In [None]:
known_phil_df.groupby('name')['has_winner'].mean()

In [None]:
known_phil_df['sufficient_damage'] = known_phil_df['damage_taken'] >= 100
known_phil_df.groupby('name')['sufficient_damage'].mean()