In [None]:
import glob
import os

import dask.dataframe as dd
import pandas as pd
from dask.diagnostics import ProgressBar
from numpy.random import default_rng
from sc2.unit import Race

from spy_sc2.replay import ReplayMetadata

In [2]:
maps = [
    "AutomatonAIE.SC2Map",
    "AbyssalReefAIE.SC2Map",
    "InterloperAIE.SC2Map",
    "EphemeronAIE.SC2Map",
    "AcropolisAIE.SC2Map",
    "ThunderbirdAIE.SC2Map",
]
test_maps = {maps[0]}

In [3]:
replay_dir = "../resources/replays/aiarena/*.SC2Replay.parquet"
replay_paths = glob.glob(replay_dir)
replay_names = [os.path.basename(p) for p in replay_paths]
replay_paths

['../resources/replays/aiarena\\3901860_Sharkling_PhantomBot_EphemeronAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3904878_Deimos_Eris_AutomatonAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3904909_VeTerran-revived_Eris_AbyssalReefAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3904922_Ketroc_Eris_InterloperAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905061_MicroMachine_Eris_InterloperAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905138_Eris_Aeolus_AbyssalReefAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905139_Eris_Xena_EphemeronAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905140_Eris_Zozo_AcropolisAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905142_Eris_Caninana_AbyssalReefAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905145_Eris_Phobos_AcropolisAIE.SC2Replay.parquet',
 '../resources/replays/aiarena\\3905146_Eris_SharpenedEdge_AutomatonAIE.SC2Replay.parquet',
 '../resources/replays/aiar

In [4]:
race_to_id = {race.name: Race(race).value for race in Race}
race_to_id

{'NoRace': 0, 'Terran': 1, 'Zerg': 2, 'Protoss': 3, 'Random': 4}

In [5]:
def pipeline(replay_path: str) -> pd.DataFrame:
    replay_name = os.path.basename(replay_path)
    df = pd.read_parquet(replay_path)
    replay_path_raw = replay_path.replace(".parquet", "")
    metadata = ReplayMetadata.from_file(replay_path_raw)
    df = df.groupby(["game_loop", "unit_type", "player", "owner"])["tag"]
    df = df.count()
    df = df.rename("count")
    df = df.reset_index()
    df = df.assign(race=df["player"].map(metadata.player_races).map(race_to_id))
    df = df.assign(enemy=3 - df["player"])
    df = df.assign(enemy_race=df["enemy"].map(metadata.player_races).map(race_to_id))
    df = df.assign(replay_name=replay_name)
    return df

In [6]:
with ProgressBar():
    df_all = dd.from_map(pipeline, replay_paths).compute()

[########################################] | 100% Completed | 1.14 ss
[########################################] | 100% Completed | 263.73 s


In [7]:
replay_names = df_all["replay_name"].unique()
test_replay_count = max(1, len(replay_names) // 11)
test_replays = default_rng().choice(replay_names, test_replay_count, replace=False)
test_mask = df_all["replay_name"].isin(test_replays)
df_train = df_all[~test_mask].reset_index(drop=True)
df_test = df_all[test_mask].reset_index(drop=True)

In [8]:
df_test.to_parquet("../resources/datasets/scout/test.parquet")
df_train.to_parquet("../resources/datasets/scout/train.parquet")

In [9]:
df_train

Unnamed: 0,game_loop,unit_type,player,owner,count,race,enemy,enemy_race,replay_name
0,0,86,1,1,1,2,2,2,3901860_Sharkling_PhantomBot_EphemeronAIE.SC2R...
1,0,86,2,2,1,2,1,2,3901860_Sharkling_PhantomBot_EphemeronAIE.SC2R...
2,0,104,1,1,12,2,2,2,3901860_Sharkling_PhantomBot_EphemeronAIE.SC2R...
3,0,104,2,2,12,2,1,2,3901860_Sharkling_PhantomBot_EphemeronAIE.SC2R...
4,0,106,1,1,1,2,2,2,3901860_Sharkling_PhantomBot_EphemeronAIE.SC2R...
...,...,...,...,...,...,...,...,...,...
25148324,9153,106,1,1,11,2,2,3,3905917_Eris_SharpenedEdge_InterloperAIE.SC2Re...
25148325,9153,126,1,1,8,2,2,3,3905917_Eris_SharpenedEdge_InterloperAIE.SC2Re...
25148326,9153,137,1,1,38,2,2,3,3905917_Eris_SharpenedEdge_InterloperAIE.SC2Re...
25148327,9153,138,1,1,1,2,2,3,3905917_Eris_SharpenedEdge_InterloperAIE.SC2Re...
