In [1]:
from IPython.display import display
from collections import Counter
import pandas as pd
import glob
import json
import os

cache_dir = './cache'
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)

os.environ['SC2READER_CACHE_DIR'] = cache_dir
os.environ['SC2READER_CACHE_MAX_SIZE'] = '2048MB'

import sc2reader
from sc2reader import events, data
from sc2reader.engine.plugins import APMTracker, SelectionTracker


pd.options.display.max_columns = None

In [2]:
class WorkReplays:
    def get_dataframe(self, verbose=True):
        '''
        Returns the generated DataFrame with the provided matchup.

        Parameters
        ----------
        matchup : str
            Matchup as a two character string with membership [PT,TP,PZ,ZP,TZ,ZT].
        verbose : bool, optional
            Show verbose information (default is True)
            

        Returns
        -------
        Matchup DataFrame
        '''
        
        race_map = {
            'P': 'Protoss',
            'T': 'Terran',
            'Z': 'Zerg'
        }

        df_data = {
            'PP': [],
            'PT': [],
            'PZ': [],
            'TT': [],
            'TZ': [],
            'ZZ': []
        }

        valid_match = 0

        for i, replay in enumerate(self.replays):
            if not verbose:
                 print('\rLoading replay {:4}/{:04} | Loaded {:6.2f}% of total!'.format(i+1, self.loader_amount, (i+1)/self.loader_amount*100), end='', flush=True)

            races = sorted([replay.players[0].pick_race[0], replay.players[1].pick_race[0]])
            unit_dict = {
                races[0]: dict.fromkeys(self.unit_map[race_map[races[0]]] + self.unit_map[race_map[races[1]]], 0),
                races[1]: dict.fromkeys(self.unit_map[race_map[races[0]]] + self.unit_map[race_map[races[1]]], 0)
            }

            if verbose:
                print('\n{} Game #{:03} | {} vs. {} {}'.format('-'*17, i+1, replay.players[0].pick_race, replay.players[1].pick_race, '-'*17))

            valid_match += 1
            dd = {}

            for event in replay.events:

                # break if nothing to collect
                if isinstance(event, events.PlayerLeaveEvent):
                    if verbose:
                        print('Player {} left {} seconds into the game.'.format(event.player, event.second))
                    break
                
                if isinstance(event, events.UnitInitEvent):
                    is_player_1 = replay.players[1].pid == event.control_pid
                    race = replay.players[is_player_1].pick_race[0]
                    unit = event.unit_type_name.lower()

                    if unit in unit_dict:
                        unit_dict[race][unit] += 1
                    elif verbose:
                        print('Found invalid unit "{}".'.format(unit))

                if isinstance(event, events.UnitBornEvent):
                    is_player_1 = replay.players[1].pid == event.control_pid
                    race = replay.players[is_player_1].pick_race[0]
                    unit = event.unit_type_name.lower()

                    if unit in unit_dict[race]:
                        unit_dict[race][unit] += 1
                    elif unit == 'vikingfighter':
                        unit_dict[race]['viking'] += 1
                    elif verbose:
                        print('Found invalid unit "{}".'.format(unit))

                if isinstance(event, events.UnitTypeChangeEvent):
                    try:
                        is_player_1 = replay.players[1].pid == event.unit.owner
                        race = replay.players[is_player_1].pick_race[0]
                        unit = event.unit_type_name.lower()

                        if unit in unit_dict:
                            unit_dict[race][unit] += 1
                        elif verbose:
                            print('Found invalid unit "{}".'.format(unit))
                    except:
                        print('Error', replay)
                        continue

                if isinstance(event, events.UnitDiedEvent):
                    is_player_1 = replay.players[1].pid == event.killing_player_id
                    race = replay.players[is_player_1].pick_race[0]
                    unit = event.unit.name.lower()

                    if unit in unit_dict:
                        unit_dict[race][unit] += 1
                    elif verbose:
                        print('Found invalid unit "{}".'.format(unit))

                # every 30 seconds
                if event.second % 30 == 0:

                    # every 10 seconds
                    if isinstance(event, events.PlayerStatsEvent):
                        d = {}

                        is_player_1 = replay.players[1].pid == event.pid
                        race = replay.players[is_player_1].pick_race[0]
                        win = replay.players[is_player_1].result == 'Win'

                        map_name = replay.map_name
                        region = replay.region
                        game_length = replay.game_length.seconds

                        lower_bound = 0 if event.second == 0 else event.second-30
                        ap30s = sum(list(replay.players[is_player_1].aps.values())[lower_bound:event.second])

                        d['match_id'] = i
                        d['map_name'] = map_name
                        d['region'] = region
                        d['game_length'] = game_length
                        d['frame'] = event.frame
                        d['second'] = event.second
                        d['race'] = race
                        d['ap30s'] = ap30s

                        for attr in self.attr_map['PlayerStatsEvent']:
                            d[attr] = eval('event.' + attr)
                        
                        d['win'] = win

                        dd[replay.players[is_player_1].pid] = d

                    # every 15 seconds
                    if isinstance(event, events.UnitPositionsEvent):
                        dd1 = dd[replay.players[0].pid]
                        dd2 = dd[replay.players[1].pid]

                        dd1.update(unit_dict[replay.players[0].pick_race[0]])
                        dd2.update(unit_dict[replay.players[1].pick_race[0]])

                        current_units = [str(a).split(' ')[0].lower() for a in event.units.keys()]
                        counted_units = Counter(current_units)

                        for k in counted_units:
                            if k in self.unit_map[race_map[races[0]]]:
                                dd1[k] = counted_units[k]
                            elif k in self.unit_map[race_map[races[1]]]:
                                dd2[k] = counted_units[k]
                            elif verbose:
                                print('Found invalid unit "{}".'.format(k))

                        df_data[''.join(races)].extend([dd1, dd2])
                        dd = {}

        print('')
        dfs = {}

        for k in df_data.keys():
            dfs[k] = pd.DataFrame(df_data[k])


        if verbose:
            print('\nEND: ({}, {}) found {} valid games out of {}.'.format(*df.shape, valid_match, len(self.replays)))

        return dfs

In [3]:
class HandleReplays(WorkReplays):
    '''
    A class to handle all thing SC2 replay. Extends WorkReplays.

    ...

    Attributes
    ----------
    attr_map : dict
        Attribute map
    unit_map : dict
        Unit map
    loader_amount : int
        Number of replays loaded

    Methods
    -------
    load_replays(glob_path, amount=None, verbose=True):
        Loads SC2 replays found in path.
    '''


    def __init__(self, attr_map=None, unit_map=None):
        '''
        Constructs all the necessary attributes for HandleReplays.

        Parameters
        ----------
        attr_map : dict
            Attribute map
        unit_map : dict
            Unit map
        '''

        if attr_map is None:
            return Exception('Please provide an attr_map.')
        if unit_map is None:
            return Exception('Please provide a unit_map.')
        
        self.attr_map = attr_map
        self.unit_map = unit_map


    def load_replays(self, glob_path, limit=None, verbose=True):
        '''
        Loads SC2 replays found in the provided path.

        If the argument 'amount' is passed, then only that amount will be loaded.

        Parameters
        ----------
        glob_path : str
            Path to .SC2Replay files as a glob string
        limit : int, optional
            Number of replays to be loaded (default is All)
        verbose : bool, optional
            Show verbose information (default is True)
            

        Returns
        -------
        None
        '''

        paths = [path for path in glob.glob(glob_path, recursive=True)]
        loader_amount = len(paths) if limit is None or limit > len(paths) else limit

        replays = sc2reader.load_replays(
            paths[:limit],
            engine=sc2reader.engine.GameEngine(plugins=[
                APMTracker(),
                SelectionTracker()
            ])
        )
        
        if verbose:
            print('Loaded {} replays.'.format(loader_amount))

        self.replays = replays
        self.loader_amount = loader_amount

In [4]:
with open('./stats.json', 'rb') as f:
    attr_map = json.load(f)

unit_data = json.loads(data.unit_data)

unit_map = {}
for k in unit_data:
    unit_map[k] = list(unit_data[k].keys())

In [5]:
%%time

hr = HandleReplays(attr_map=attr_map, unit_map=unit_map)

hr.load_replays('./_data/**/*.SC2Replay', limit=10)

Loaded 10 replays.
CPU times: user 53.1 ms, sys: 71.9 ms, total: 125 ms
Wall time: 124 ms


In [6]:
%%time

dfs = hr.get_dataframe(verbose=False)
dfs['PT'].head()

## Uncomment to save as CSV
# valid_matches = len(set(df.loc[:,'match_id']))
# df.to_csv('./_sc2_{}_{}{}.csv'.format(valid_matches, *list(matchup)))

Loading replay    1/0010 | Loaded  10.00% of total!

AttributeError: 'Replay' object has no attribute 'details'

Unnamed: 0,match_id,map_name,region,game_length,frame,second,race,ap30s,minerals_current,vespene_current,minerals_collection_rate,vespene_collection_rate,workers_active_count,minerals_used_in_progress,vespene_used_in_progress,minerals_used_current,vespene_used_current,minerals_lost,vespene_lost,minerals_killed,vespene_killed,food_used,food_made,minerals_used_active_forces,vespene_used_active_forces,win,adept,archon,assimilator,carrier,colossus,cyberneticscore,darkshrine,darktemplar,disruptor,fleetbeacon,forge,gateway,hightemplar,immortal,interceptor,mothership,mothershipcore,nexus,observer,oracle,phoenix,photoncannon,probe,pylon,reactor,roboticsbay,roboticsfacility,sentry,stalker,stargate,tempest,templararchive,twilightcouncil,voidray,warpgate,warpprism,warpprismphasing,zealot
0,2,Thunderbird LE,us,414,2880,180,P,0,240,164,979,201,22,600,0,2000,0,0,0,0,0,23.0,31.0,0,0,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,Thunderbird LE,us,414,2880,180,P,0,185,140,895,335,24,200,50,2400,0,0,0,0,0,28.0,31.0,0,0,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,Thunderbird LE,us,414,3360,210,P,0,100,22,979,313,24,725,200,2250,0,0,0,0,0,27.0,31.0,0,0,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,2,Thunderbird LE,us,414,3360,210,P,0,150,102,951,313,24,450,150,2400,0,0,0,0,0,28.0,31.0,0,0,False,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,2,Thunderbird LE,us,414,3840,240,P,0,25,88,1035,313,26,475,200,2875,50,0,0,0,0,32.0,46.0,125,50,True,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [81]:
df.columns

Index(['Match ID', 'Second', 'Result', 'Player', 'Player#', 'Race',
       'Current Workers', 'Food Used', 'Food Available', 'Current Minerals',
       'Minerals Collection Rate', 'Minerals Used in Progress',
       'Minerals Used', 'Minerals Used Active Forces', 'Minerals Lost',
       'Current Vespene', 'Vespene Collection Rate',
       'Vespene Used in Progress', 'Vespene Used',
       'Vespene Used Active Forces', 'Vespene Lost', 'armory', 'autoturret',
       'banshee', 'barracks', 'barrackstechlab', 'barracksreactor',
       'barracksflying', 'battlecruiser', 'battlehellion', 'bunker',
       'commandcenter', 'commandcenterflying', 'cyclone', 'engineeringbay',
       'factory', 'factoryflying', 'factoryreactor', 'factorytechlab',
       'fusioncore', 'ghost', 'ghostacademy', 'hellion', 'marauder', 'marine',
       'medivac', 'missileturret', 'mule', 'orbitalcommand',
       'orbitalcommandflying', 'planetaryfortress', 'raven', 'reaper',
       'refinery', 'scv', 'sensortower', 's