In [None]:
import collections
import pickle
import pandas as pd
import os
import peppi_py
import tree
import json
import io
import math
import tqdm.notebook
import functools
import typing as tp
import itertools
import melee
from slippi_ai import nametags

from slippi_db import utils, preprocessing, parse_peppi
from slippi_db.scripts.make_local_dataset import check_replay

In [None]:
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)

In [None]:
root = '/linusr/vlad/SSBM/Replays'

In [None]:
parsed_path = os.path.join(root, 'parsed.pkl')

with open(parsed_path, 'rb') as f:
    data_bytes = f.read()
len(data_bytes)

In [None]:
%%time
data = pickle.loads(data_bytes)
del data_bytes
len(data)

In [None]:
df = pd.DataFrame(data)

In [None]:
@functools.cache
def compact_raw(raw: str) -> str:
    if raw.startswith('Players/'):
        return raw.split('/')[1]
    if raw.startswith('Phillip/'):
        return 'Phillip'
    return raw

df['compact_raw'] = df['raw'].map(compact_raw)

In [None]:
phillip_df = df[df['compact_raw'] == 'Phillip']
len(phillip_df)

In [None]:
def split_row(row: dict) -> tp.Iterator[dict]:
    row['not_training_reason'] = check_replay(row)
    
    players = row['players']
    for player, opponent in zip(players, reversed(players)):
        new_row = row.copy()
        del new_row['players']
        new_row.update(player)
        new_row['filename'] = new_row['name']

        name = nametags.name_from_metadata(player)
        new_row['name'] = nametags.normalize_name(name)

        opponent_name = nametags.name_from_metadata(opponent)
        new_row['opponent_name'] = nametags.normalize_name(opponent_name)

        new_row['char'] = char_to_str(player['character'])
        new_row['opponent_char'] = char_to_str(opponent['character'])
        new_row['compact_raw'] = compact_raw(row['raw'])
        yield new_row

def char_to_str(char: int) -> str:
    return melee.Character(char).name

def make_per_player_df(rows):
    per_player_data = []
    for row in tqdm.notebook.tqdm(rows):
        if not row['valid'] or not row['is_training']:
            continue
        per_player_data.extend(list(split_row(row)))
    
    return pd.DataFrame(per_player_data)

In [None]:
%%time
per_player_df = make_per_player_df(data)

In [None]:
vs_phil_df = per_player_df[per_player_df['compact_raw'] == 'Phillip']
vs_phil_df = vs_phil_df[vs_phil_df['name'] != 'Phillip AI']
len(vs_phil_df)

In [None]:
training_phil_df = vs_phil_df[vs_phil_df['not_training_reason'].isnull()].copy()
len(training_phil_df)

In [None]:
training_phil_df.sort_values('startAt', inplace=True)

In [None]:
matchup_df = training_phil_df.groupby(['char', 'opponent_char']).tail(5).reset_index()
len(matchup_df)

In [None]:
dataset_root = '/linusr/vlad/SSBM/Replays/'

In [None]:
files_to_copy = {}
for raw, filename in zip(matchup_df['raw'], matchup_df['filename']):
    files_to_copy.setdefault(raw, []).append(filename)

In [None]:
dest_zip = os.path.join(dataset_root, 'PhillipMatchupCompilation.zip')

if os.path.exists(dest_zip):
    os.remove(dest_zip)

sources_and_files = [
    (os.path.join(dataset_root, 'Raw', raw), filenames)
    for raw, filenames in files_to_copy.items()
]

utils.copy_multi_zip_files(sources_and_files, dest_zip)

In [None]:
matchup_df['name'].value_counts()

In [None]:
to_rename = []

for char, phil_char, filename in zip(
    matchup_df['char'], matchup_df['opponent_char'], matchup_df['filename']):
    to_rename.append((filename, f'{phil_char}-{char}/{os.path.basename(filename)}'))

In [None]:
utils.rename_within_zip(dest_zip, to_rename)