In [1]:
import sys
sys.path.append('../') # goes to project root

from preparation.generators.Generator import Generator
from IPython.display import display
import pandas as pd
import datetime
import json

pd.options.display.max_columns = None

In [2]:
# our Generator object requires one parameter
# grab the attr_map and load it in as a dictionary
with open('../data/functional/races.json', 'rb') as f:
    attr_map = json.load(f)

# instantiate a dataset Generator object
hr = Generator(attr_map=attr_map)

In [3]:
# loads replay references into memory
dfs = hr.load_replays('../data/raw/**/*.SC2Replay', limit=None)

Loaded 3456 replays out of 3456.


In [4]:
# We're storing all dataframes by key
# Keys are P, T, and Z
# These represent each race
# valid games total should always be double the number of replays since there are 2 sides to a game
dfs = hr.getData(verbose=False)

Skipping replay #1821 due to thrown exception.
[Player 1 - soO (Zerg), Player 2 - HeroMarine (Terran)]
'NoneType' object has no attribute 'lower'
Loading replay 1823/3456 | Loaded  52.75% | ETA 00h24m36s Elapsed 00h27m26s
Skipping replay #1822 due to thrown exception.
[Player 1 - HeroMarine (Terran), Player 2 - soO (Zerg)]
'NoneType' object has no attribute 'lower'

Skipping replay #1822 due to thrown exception.
[Player 1 - HeroMarine (Terran), Player 2 - soO (Zerg)]
'NoneType' object has no attribute 'lower'
Loading replay 1826/3456 | Loaded  52.84% | ETA 00h24m32s Elapsed 00h27m27s
Skipping replay #1825 due to thrown exception.
[Player 1 - GOStephano (Zerg), Player 2 - OGZest (Protoss)]
'NoneType' object has no attribute 'lower'

Skipping replay #1825 due to thrown exception.
[Player 1 - GOStephano (Zerg), Player 2 - OGZest (Protoss)]
'NoneType' object has no attribute 'lower'
Loading replay 1839/3456 | Loaded  53.21% | ETA 00h24m16s Elapsed 00h27m34s
Skipping replay #1838 due to thr

In [6]:
for k in dfs.keys():
    tmp_df = dfs[k] # grab a dataset by its player 1's race

    # go through all quantitative columns and check if their values are < 0
    # meaning it will return true if they are negative
    # then, since True == 1 and False == 0, get the result of summation per column
    negative_check = tmp_df[tmp_df.select_dtypes(exclude=['object']) < 0].sum()
    # only keep columns that are actually negative
    negative_check = negative_check[negative_check < 0]

    # this is not necessary
    # I just like how nice it looks
    col_negative_sum_zip = list(zip(negative_check.index.to_list(), negative_check.to_list()))
    if len(col_negative_sum_zip) > 0:
        col_negative_sum_zip = '\n'.join(['{} {}'.format(*n) for n in col_negative_sum_zip])
    else:
        col_negative_sum_zip = None

    print('Negative total by column for {}:\n  {}\n'.format(k, col_negative_sum_zip))

Negative total by column for P:
  None

Negative total by column for T:
  orbitalcommand -83.0

Negative total by column for Z:
  None



In [7]:
name_map = {
    'T': 'terran',
    'P': 'protoss',
    'Z': 'zerg'
}

for k in dfs.keys():
    # get the race name by key map
    name = name_map[k]

    # only generate dataset csv if it actually contains stuff
    if len(dfs[k]) == 0:
        print('Dataframe for {} is empty, so skipping.'.format(name))
        continue

    tmp_df = dfs[k] # grab a dataset by its player 1's race

    # we're storing the number of matches stored in each dataframe
    # so, get its respective total match count by its race
    total_matches = hr.valid_matches[k]
    # create a date string for the current time and date
    date_str = datetime.datetime.now().strftime('%b-%d-%Y_%H%M%S').lower()
    # create a descriptive csv file name with:
    # - player 1's race
    # - the number of matches it contains
    # - when the dataset was generated
    filename = '{}_{}_{}'.format(name, total_matches, date_str)
    path = '../data/processed/{}.csv'.format(filename)

    print('Saving as', path)
    tmp_df.to_csv(path)

Saving as ../data/processed/protoss_2308_mar-05-2021_174442.csv
Saving as ../data/processed/terran_1984_mar-05-2021_174444.csv
Saving as ../data/processed/zerg_2482_mar-05-2021_174446.csv
