In [209]:
from IPython.display import display
from collections import Counter
import pandas as pd
import glob
import json
import os

os.environ['SC2READER_CACHE_DIR'] = './cache'
os.environ['SC2READER_CACHE_MAX_SIZE'] = '2048MB'

import sc2reader
from sc2reader import events, data
from sc2reader.engine.plugins import APMTracker, SelectionTracker


pd.options.display.max_columns = None

In [None]:
class HandleReplays:
    '''
    A class to handle all thing SC2 replay.

    ...

    Attributes
    ----------
    attr_map : dict
        Attribute map
    unit_map : dict
        Unit map
    loader_amount : int
        Number of replays loaded

    Methods
    -------
    load_replays(glob_path, amount=None, verbose=True):
        Loads SC2 replays found in path.
    '''


    def __init__(self, attr_map=None, unit_map=None):
        '''
        Constructs all the necessary attributes for HandleReplays.

        Parameters
        ----------
        attr_map : dict
            Attribute map
        unit_map : dict
            Unit map
        '''

        if attr_map is None:
            return Exception('Please provide an attr_map.')
        if unit_map is None:
            return Exception('Please provide a unit_map.')
        
        self.attr_map = attr_map
        self.unit_map = unit_map


    def load_replays(self, glob_path, amount=None, verbose=True):
        '''
        Loads SC2 replays found in the provided path.

        If the argument 'amount' is passed, then only that amount will be loaded.

        Parameters
        ----------
        glob_path : str
            Path to .SC2Replay files as a glob string
        amount : int, optional
            Number of replays to be loaded (default is All)
        verbose : bool, optional
            Show verbose information (default is True)
            

        Returns
        -------
        None
        '''

        paths = [path for path in glob.glob(glob_path, recursive=True)]
        loader_amount = len(paths) if amount is None or amount > len(paths) else amount

        replays = sc2reader.load_replays(
            paths[:amount],
            engine=sc2reader.engine.GameEngine(plugins=[
                APMTracker(),
                SelectionTracker()
            ])
        )
        
        if verbose:
            print('Loaded {} replays.'.format(loader_amount))

        self.replays = replays
        self.loader_amount = loader_amount


    def get_dataframe(self, matchup, verbose=True):
        '''
        Returns the generated DataFrame with the provided matchup.

        Parameters
        ----------
        matchup : str
            Matchup as a two character string with membership [PT,TP,PZ,ZP,TZ,ZT].
        verbose : bool, optional
            Show verbose information (default is True)
            

        Returns
        -------
        Matchup DataFrame
        '''

        race_map = {
            'P': 'Protoss',
            'T': 'Terran',
            'Z': 'Zerg'
        }
        
        if matchup is None or len(matchup) != 2 or matchup.upper()[0] not in race_map or matchup.upper()[1] not in race_map:
            return Exception('The parameter "matchup" must be a string containing the initials of each race of the matchup, e.g.: "TT" or "PT".')
        
        races = [matchup.upper()[0], matchup.upper()[1]]
        df_data = []
        valid_match = 0
        pt_dict = dict.fromkeys(self.unit_map[race_map[races[0]]] + self.unit_map[race_map[races[1]]], 0)

        print('Gathering data for matchup {}'.format(races))

        for i, replay in enumerate(self.replays):

            if not verbose:
                 print('\rLoading replay {:4}/{:04} | Loaded {:6.2f}% of total!'.format(i+1, self.loader_amount, (i+1)/self.loader_amount*100), end='', flush=True)

            # only if it's the matchup we're looking for
            if replay.players[0].pick_race[0] in races and replay.players[1].pick_race[0] in races:
                if verbose:
                    print('\n{} Game #{:03} | {} vs. {} {}'.format('-'*17, i+1, replay.players[0].pick_race, replay.players[1].pick_race, '-'*17))

                valid_match += 1
                dd = {}

                for event in replay.events:

                    # break if nothing to collect
                    if isinstance(event, events.PlayerLeaveEvent):
                        if verbose:
                            print('Player {} left {} seconds into the game.'.format(event.player, event.second))
                        break

                    # every 30 seconds
                    if event.second % 30 == 0:

                        # every 10 seconds
                        if isinstance(event, events.PlayerStatsEvent):
                            d = {}

                            is_player_1 = replay.players[1].pid == event.pid
                            race = replay.players[is_player_1].pick_race[0]
                            win = replay.players[is_player_1].result == 'Win'

                            map_name = replay.map_name
                            region = replay.region
                            game_length = replay.game_length.seconds

                            lower_bound = 0 if event.second == 0 else event.second-30
                            ap30s = sum(list(replay.players[is_player_1].aps.values())[lower_bound:event.second])

                            d['match_id'] = i
                            d['map_name'] = map_name
                            d['region'] = region
                            d['game_length'] = game_length
                            d['frame'] = event.frame
                            d['second'] = event.second
                            d['race'] = race
                            d['ap30s'] = ap30s

                            for attr in self.attr_map['PlayerStatsEvent']:
                                d[attr] = eval('event.' + attr)
                            
                            d['win'] = win

                            dd[replay.players[is_player_1].pid] = d

                        # every 15 seconds
                        if isinstance(event, events.UnitPositionsEvent):
                            dd1 = dd[replay.players[0].pid]
                            dd2 = dd[replay.players[1].pid]

                            dd1.update(pt_dict)
                            dd2.update(pt_dict)

                            current_units = [str(a).split(' ')[0].lower() for a in event.units.keys()]
                            counted_units = Counter(current_units)

                            for k in counted_units:
                                if k in self.unit_map[race_map[races[0]]]:
                                    dd1[k] = counted_units[k]
                                elif k in self.unit_map[race_map[races[1]]]:
                                    dd2[k] = counted_units[k]
                                elif verbose:
                                    print('Found invalid unit "{}".'.format(k))

                            df_data.extend([dd1, dd2])
                            dd = {}

        print('')

        df = pd.DataFrame(df_data)
        if verbose:
            print('\nEND: ({}, {}) found {} valid games out of {}.'.format(*df.shape, valid_match, len(self.replays)))

        return df

In [None]:
with open('./stats.json', 'rb') as f:
    attr_map = json.load(f)

unit_data = json.loads(data.unit_data)

unit_map = {}
for k in unit_data:
    unit_map[k] = list(unit_data[k].keys())

In [None]:
%%time

hr = HandleReplays(attr_map=attr_map, unit_map=unit_map)

hr.load_replays('./_data/**/*.SC2Replay')

In [None]:
%%time

dfs = {}

## TODO: Make get_dataframe save all matchups in a single loop O(k) instead O(nk)
# DOES NOT WORK FOR MIRROR MATCHUPS
for matchup in ['PT','PZ','TZ']:
    df = hr.get_dataframe(matchup, verbose=False)
    dfs[matchup] = df.copy()

    ## Uncomment to save as CSV
    valid_matches = len(set(df.loc[:,'match_id']))
    df.to_csv('./_sc2_{}_{}{}.csv'.format(valid_matches, *list(matchup)))