In [1]:
import sys
sys.path.append('..') # goes to project root

from preparation.generators.Generator import Generator
from IPython.display import display
import pandas as pd
import datetime
import json

pd.options.display.max_columns = None

In [3]:
# our Generator object requires one parameter
# grab the attr_map and load it in as a dictionary
with open('../data/functional/races.json', 'rb') as f:
    attr_map = json.load(f)

# instantiate a dataset Generator object
hr = Generator(attr_map=attr_map)

In [4]:
# loads replay references into memory
dfs = hr.load_replays('../data/raw/**/*.SC2Replay', limit=1000)

Loaded 1000 replays out of 3456.


In [5]:
# We're storing all dataframes by key
# Keys are P, T, and Z
# These represent each race
dfs = hr.getData(verbose=False)

Loading replay 1000/1000 | Loaded 100.00% of total!

END: Found {'P': 317, 'T': 285, 'Z': 0} valid games (total=602) out of 3456.


In [9]:
for k in dfs.keys():
    tmp_df = dfs[k] # grab a dataset by its player 1's race

    # go through all quantitative columns and check if their values are < 0
    # meaning it will return true if they are negative
    # then, since True == 1 and False == 0, get the result of summation per column
    negative_check = tmp_df[tmp_df.select_dtypes(exclude=['object']) < 0].sum()
    # only keep columns that are actually negative
    negative_check = negative_check[negative_check < 0]

    # this is not necessary
    # I just like how nice it looks
    col_negative_sum_zip = list(zip(negative_check.index.to_list(), negative_check.to_list()))
    if len(col_negative_sum_zip) > 0:
        col_negative_sum_zip = '\n'.join(['{} {}'.format(*n) for n in col_negative_sum_zip])
    else:
        col_negative_sum_zip = None

    print('Negative total by column for {}:\n  {}\n'.format(k, col_negative_sum_zip))

Negative total by column for P:
  None

Negative total by column for T:
  orbitalcommand -34.0

Negative total by column for Z:
  None



In [12]:
name_map = {
    'T': 'terran',
    'P': 'protoss',
    'Z': 'zerg'
}

for k in dfs.keys():
    # get the race name by key map
    name = name_map[k]

    # only generate dataset csv if it actually contains stuff
    if len(dfs[k]) == 0:
        print('Dataframe for {} is empty, so skipping.'.format(name))
        continue

    tmp_df = dfs[k] # grab a dataset by its player 1's race

    # we're storing the number of matches stored in each dataframe
    # so, get its respective total match count by its race
    total_matches = hr.valid_matches[k]
    # create a date string for the current time and date
    date_str = datetime.datetime.now().strftime('%b-%d-%Y_%H%M%S').lower()
    # create a descriptive csv file name with:
    # - player 1's race
    # - the number of matches it contains
    # - when the dataset was generated
    filename = '{}_{}_{}'.format(name, total_matches, date_str)
    path = '../data/processed/{}.csv'.format(filename)

    print('Saving as', path)
    tmp_df.to_csv(path)

Saving as ../data/processed/protoss_317_jan-21-2021_224018.csv
Saving as ../data/processed/terran_285_jan-21-2021_224018.csv
Dataframe for zerg is empty, so skipping.
