In [2]:
import pandas as pd

### AMERICAS Datasets

In [123]:
# Data collected from a previous project
americas_kickoff_df = pd.read_csv('datasets/americas_kickoff.csv')
americas_kickoff_df = americas_kickoff_df.drop(columns=['Unnamed: 0'])

americas_stage1_df = pd.read_csv('datasets/americas_stage1.csv')
americas_stage1_df = americas_stage1_df.drop(columns=['Unnamed: 0'])

americas_stage1_playoffs_df = pd.read_csv('datasets/americas_stage1_playoffs.csv')
americas_stage1_playoffs_df = americas_stage1_playoffs_df.drop(columns=['Unnamed: 0', '1', '2', '3', '4'])

# Combining the datasets into one df
americas_combined_df = pd.concat([americas_kickoff_df, americas_stage1_df, americas_stage1_playoffs_df], ignore_index=True)

In [124]:
unique_teams = sorted(americas_combined_df['Team'].unique())
print(unique_teams)

['100 Thieves', '2Game Esports', 'Cloud9', 'Evil Geniuses', 'FURIA', 'G2 Esports', 'KRÜ Esports', 'LOUD', 'Leviatán', 'MIBR', 'NRG Esports', 'Sentinels']


In [125]:
# Changing the region and team names for better clarity
americas_combined_df.insert(0, "Region", "AMERICAS")  # Make "Americas" the first column as "Region"

americas_teams_dict = {
    "100T": '100 Thieves',
    "2G": '2Game Esports',
    "C9": 'Cloud9',
    "EG": 'Evil Geniuses',
    "FUR": 'FURIA',
    "G2": 'G2 Esports',
    "KRU": 'KRÜ Esports',
    "LOUD": 'LOUD',
    "LEV": 'Leviatán',
    "MIBR": 'MIBR',
    "NRG": 'NRG Esports',
    "SEN": 'Sentinels',
}
americas_combined_df['Team'] = americas_combined_df['Team'].replace({v: k for k, v in americas_teams_dict.items()})

In [127]:
null_columns = americas_combined_df.columns[americas_combined_df.isnull().any()]
print(null_columns)

Index(['+1', '+2', '+3', '+4', '-4'], dtype='object')


In [128]:
americas_combined_df[["+1"]] = americas_combined_df[["+1"]].fillna(americas_combined_df[["+1"]].mean())
americas_combined_df[["+2"]] = americas_combined_df[["+2"]].fillna(americas_combined_df[["+2"]].mean())
americas_combined_df[["+3"]] = americas_combined_df[["+3"]].fillna(americas_combined_df[["+3"]].mean())
americas_combined_df[["+4"]] = americas_combined_df[["+4"]].fillna(americas_combined_df[["+4"]].mean())
americas_combined_df[["-4"]] = americas_combined_df[["-4"]].fillna(americas_combined_df[["-4"]].mean())

null_columns = americas_combined_df.columns[americas_combined_df.isnull().any()]
print(null_columns)

Index([], dtype='object')


In [129]:
total_pistol_rounds = americas_combined_df["Pistol Rounds Won"] + americas_combined_df["Pistol Rounds Won Opp"]

americas_combined_df["Pistol Rounds Won"] = americas_combined_df["Pistol Rounds Won"] / total_pistol_rounds
americas_combined_df["Pistol Rounds Won Opp"] = americas_combined_df["Pistol Rounds Won Opp"] / total_pistol_rounds

americas_combined_df.head()

Unnamed: 0,Region,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result
0,AMERICAS,EG,0.5,23,0.67,0.068966,0.0,0.25,0.33,0.52,...,0.865,0.28,0.605,0.916667,0.046667,0.0,1.0,1.0,0.0,0
1,AMERICAS,LOUD,0.5,26,0.71,0.173913,0.0,0.0,0.43,0.68,...,0.72,0.135,0.395,0.953333,0.083333,0.0,1.0,1.0,0.0,1
2,AMERICAS,MIBR,0.25,11,0.59,0.04,0.0,0.0,0.25,0.53,...,0.7925,0.3,0.675,0.953333,0.166667,0.0,1.0,1.0,0.0,0
3,AMERICAS,100T,0.75,28,0.82,0.2,0.0,0.0,0.44,0.76,...,0.7,0.2075,0.325,0.833333,0.046667,0.0,0.5,0.0,0.0,1
4,AMERICAS,2G,0.5,20,0.67,0.071429,0.0,0.0,0.38,0.37,...,0.945,0.345,0.9,0.85,0.11,0.0,1.0,1.0,0.0,0


In [130]:
americas_combined_df.to_csv('Datasets/americas_combined.csv')

### EMEA Datasets

In [131]:
# Data collected from a previous project
emea_kickoff_df = pd.read_csv('datasets/emea_kickoff.csv')
emea_kickoff_df = emea_kickoff_df.drop(columns=['Unnamed: 0'])

emea_stage1_df = pd.read_csv('datasets/emea_stage1.csv')
emea_stage1_df = emea_stage1_df.drop(columns=['Unnamed: 0'])

emea_stage1_playoffs_df = pd.read_csv('datasets/emea_stage1_playoffs.csv')
emea_stage1_playoffs_df = emea_stage1_playoffs_df.drop(columns=['Unnamed: 0'])

# Combining the datasets into one df
emea_combined_df = pd.concat([emea_kickoff_df, emea_stage1_df, emea_stage1_playoffs_df], ignore_index=True)

In [132]:
unique_teams = sorted(emea_combined_df['Team'].unique())
print(unique_teams)

['Apeks', 'BBL Esports', 'FNATIC', 'FUT Esports', 'GIANTX', 'Gentle Mates', 'KOI', 'Karmine Corp', 'Natus Vincere', 'Team Heretics', 'Team Liquid', 'Team Vitality']


In [133]:
# Changing the region and team names for better clarity
emea_combined_df.insert(0, "Region", "EMEA")  # Make "Americas" the first column as "Region"

emea_teams_dict = {
    "APK": 'Apeks',
    "BBL": 'BBL Esports',
    "FNC": 'FNATIC',
    "FUT": 'FUT Esports',
    "GX": 'GIANTX',
    "M8": 'Gentle Mates',
    "MKOI": 'KOI',
    "KC": 'Karmine Corp',
    "NAVI": 'Natus Vincere',
    "TH": 'Team Heretics',
    "TL": 'Team Liquid',
    "VIT": 'Team Vitality',
}
emea_combined_df['Team'] = emea_combined_df['Team'].replace({v: k for k, v in emea_teams_dict.items()})

In [134]:
null_columns = emea_combined_df.columns[emea_combined_df.isnull().any()]
print(null_columns)

Index(['+4', '-4'], dtype='object')


In [135]:
emea_combined_df[["+4"]] = emea_combined_df[["+4"]].fillna(emea_combined_df[["+4"]].mean())
emea_combined_df[["-4"]] = emea_combined_df[["-4"]].fillna(emea_combined_df[["-4"]].mean())

null_columns = emea_combined_df.columns[emea_combined_df.isnull().any()]
print(null_columns)

Index([], dtype='object')


In [136]:
emea_combined_df.to_csv('Datasets/emea_combined.csv')

### APAC Datasets

In [137]:
# Data collected from a previous project
apac_kickoff_df = pd.read_csv('datasets/apac_kickoff.csv')
apac_kickoff_df = apac_kickoff_df.drop(columns=['Unnamed: 0'])

apac_stage1_df = pd.read_csv('datasets/apac_stage1.csv')
apac_stage1_df = apac_stage1_df.drop(columns=['Unnamed: 0'])

apac_stage1_playoffs_df = pd.read_csv('datasets/apac_stage1_playoffs.csv')
apac_stage1_playoffs_df = apac_stage1_playoffs_df.drop(columns=['Unnamed: 0'])

# Combining the datasets into one df
apac_combined_df = pd.concat([apac_kickoff_df, apac_stage1_df, apac_stage1_playoffs_df], ignore_index=True)

In [138]:
unique_teams = sorted(apac_combined_df['Team'].unique())
print(unique_teams)

['BOOM Esports', 'DRX', 'DetonatioN FocusMe', 'Gen.G Esports', 'Global Esports', 'Nongshim RedForce', 'Paper Rex', 'Rex Regum Qeon', 'T1', 'TALON', 'Team Secret', 'ZETA DIVISION']


In [139]:
# Changing the region and team names for better clarity
apac_combined_df.insert(0, "Region", "APAC")  # Make "Americas" the first column as "Region"

apac_teams_dict = {
    "BME": 'BOOM Esports',
    "DRX": 'DRX',
    "DFM": 'DetonatioN FocusMe',
    "GEN": 'Gen.G Esports',
    "GE": 'Global Esports',
    "NS": 'Nongshim RedForce',
    "PRX": 'Paper Rex',
    "RRQ": 'Rex Regum Qeon',
    "T1": 'T1',
    "TLN": 'TALON',
    "TS": 'Team Secret',
    "ZETA": 'ZETA DIVISION',
}
apac_combined_df['Team'] = apac_combined_df['Team'].replace({v: k for k, v in apac_teams_dict.items()})

In [140]:
null_columns = apac_combined_df.columns[apac_combined_df.isnull().any()]
print(null_columns)

Index([], dtype='object')


In [141]:
apac_combined_df.to_csv('Datasets/apac_combined.csv')

### Combined dataset for all the regions

In [None]:
vct_combined_df = pd.concat([americas_combined_df, emea_combined_df, apac_combined_df], ignore_index=True)
vct_combined_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 384 entries, 0 to 383
Data columns (total 37 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Region                 384 non-null    object 
 1   Team                   384 non-null    object 
 2   Pistol Rounds Won      384 non-null    float64
 3   First Kills            384 non-null    int64  
 4   KAST                   384 non-null    float64
 5   Clutches               384 non-null    float64
 6   Eco                    384 non-null    float64
 7   Semi-Eco               384 non-null    float64
 8   Half-Buy               384 non-null    float64
 9   Full-Buy               384 non-null    float64
 10  +1                     384 non-null    float64
 11  -1                     384 non-null    float64
 12  0                      384 non-null    float64
 13  +2                     384 non-null    float64
 14  -2                     384 non-null    float64
 15  -3    

In [149]:
null_columns = vct_combined_df.columns[vct_combined_df.isnull().any()]
print(null_columns)

null_indices = vct_combined_df[vct_combined_df.isnull().any(axis=1)].index
vct_combined_df.loc[null_indices]

Index([], dtype='object')


Unnamed: 0,Region,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result


In [150]:
vct_combined_df.head()

Unnamed: 0,Region,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result
0,AMERICAS,EG,0.5,23,0.67,0.068966,0.0,0.25,0.33,0.52,...,0.865,0.28,0.605,0.916667,0.046667,0.0,1.0,1.0,0.0,0
1,AMERICAS,LOUD,0.5,26,0.71,0.173913,0.0,0.0,0.43,0.68,...,0.72,0.135,0.395,0.953333,0.083333,0.0,1.0,1.0,0.0,1
2,AMERICAS,MIBR,0.25,11,0.59,0.04,0.0,0.0,0.25,0.53,...,0.7925,0.3,0.675,0.953333,0.166667,0.0,1.0,1.0,0.0,0
3,AMERICAS,100T,0.75,28,0.82,0.2,0.0,0.0,0.44,0.76,...,0.7,0.2075,0.325,0.833333,0.046667,0.0,0.5,0.0,0.0,1
4,AMERICAS,2G,0.5,20,0.67,0.071429,0.0,0.0,0.38,0.37,...,0.945,0.345,0.9,0.85,0.11,0.0,1.0,1.0,0.0,0


In [151]:
vct_combined_df.to_csv('Datasets/vct_combined.csv')

### Storing the averages for each team

In [152]:
americas_average_df = americas_combined_df.groupby('Team').mean(numeric_only=True).reset_index()
americas_average_df

Unnamed: 0,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,+1,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result
0,100T,0.469697,26.272727,0.73,0.166413,0.143636,0.197273,0.435455,0.572727,0.771683,...,0.795455,0.224545,0.5225,0.941212,0.063636,0.0,0.909091,0.727273,0.0,0.454545
1,2G,0.452381,20.142857,0.671429,0.128353,0.178571,0.02,0.391429,0.425714,0.665357,...,0.877143,0.334643,0.628571,0.96619,0.222381,0.01,0.928571,0.857143,0.0,0.142857
2,C9,0.541667,25.6,0.707,0.205404,0.083,0.025,0.479,0.585,0.804817,...,0.76825,0.218,0.48,0.953667,0.085,0.0,0.9915,1.0,0.0,0.4
3,EG,0.5,23.545455,0.693636,0.195855,0.257273,0.292727,0.493636,0.550909,0.731001,...,0.760909,0.253636,0.456136,0.900303,0.059091,0.0,0.909091,0.818182,0.0,0.545455
4,FUR,0.270833,19.5,0.66375,0.133068,0.145,0.03125,0.4175,0.47125,0.654375,...,0.790625,0.345625,0.564688,0.880417,0.115417,0.020625,0.9925,0.875,0.0,0.125
5,G2,0.552976,33.928571,0.746429,0.21373,0.023571,0.32,0.6,0.637857,0.810834,...,0.695179,0.176964,0.440893,0.923571,0.046429,0.010357,0.892857,0.785714,0.0,0.928571
6,KRU,0.541667,24.75,0.726667,0.196171,0.083333,0.375,0.565833,0.555833,0.792348,...,0.70625,0.214167,0.450764,0.868611,0.0425,0.0,1.0,0.916667,0.0,0.583333
7,LEV,0.458333,21.5,0.7175,0.17148,0.04125,0.1925,0.465,0.5325,0.77875,...,0.7525,0.22125,0.505833,0.915417,0.06125,0.0,0.979375,0.75,0.0,0.25
8,LOUD,0.518519,29.333333,0.698889,0.137035,0.083333,0.175556,0.488889,0.544444,0.778333,...,0.776667,0.221667,0.546389,0.961852,0.077407,0.0,0.936667,1.0,0.0,0.222222
9,MIBR,0.544444,29.666667,0.726,0.166306,0.222,0.262667,0.543333,0.579333,0.772423,...,0.736833,0.205833,0.451278,0.900889,0.073333,0.0,0.919,0.733333,0.0,0.666667


In [153]:
emea_average_df = emea_combined_df.groupby('Team').mean(numeric_only=True).reset_index()
emea_average_df

Unnamed: 0,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,+1,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result
0,APK,2.142857,24.0,0.675714,0.121045,0.071429,0.077143,0.472857,0.47,0.77,...,0.821071,0.23,0.553929,0.921429,0.15,0.014286,1.0,0.571429,0.0,0.0
1,BBL,3.076923,26.615385,0.720769,0.155869,0.116154,0.276923,0.569231,0.563846,0.787115,...,0.725,0.212885,0.431538,0.914103,0.091538,0.011154,0.93,0.769231,0.0,0.615385
2,FNC,2.384615,29.076923,0.728462,0.170188,0.179231,0.230769,0.548462,0.619231,0.803462,...,0.761731,0.196538,0.521346,0.908205,0.059231,0.0,0.894231,0.846154,0.0,0.769231
3,FUT,2.5,24.166667,0.703333,0.154489,0.094167,0.2025,0.5325,0.535,0.749167,...,0.770833,0.250833,0.465833,0.873611,0.066111,0.015417,0.82125,0.833333,0.0,0.5
4,GX,1.625,24.625,0.6925,0.109753,0.15,0.13875,0.42125,0.49625,0.726875,...,0.814063,0.273125,0.584688,0.943333,0.148333,0.03125,0.99125,1.0,0.0,0.25
5,KC,2.666667,28.222222,0.735556,0.146892,0.058889,0.174444,0.553333,0.583333,0.783611,...,0.704722,0.216389,0.456667,0.922593,0.100741,0.011111,1.0,0.777778,0.0,0.333333
6,M8,2.111111,25.0,0.677778,0.098896,0.136667,0.165556,0.478889,0.49,0.650556,...,0.792222,0.293889,0.574167,0.960741,0.105185,0.046111,0.986111,1.0,0.0,0.333333
7,MKOI,2.428571,23.142857,0.678571,0.125277,0.128571,0.082857,0.378571,0.464286,0.694643,...,0.830714,0.305357,0.618214,0.91,0.065238,0.0,1.0,0.857143,0.0,0.142857
8,NAVI,2.8,27.3,0.698,0.149449,0.083,0.05,0.466,0.525,0.7695,...,0.782,0.2305,0.53075,0.908667,0.057333,0.0,0.99,0.7,0.0,0.4
9,TH,2.461538,26.384615,0.729231,0.160216,0.102308,0.252308,0.543077,0.58,0.759423,...,0.793462,0.240577,0.461731,0.896923,0.055385,0.005385,0.862308,0.692308,0.0,0.692308


In [154]:
apac_average_df = apac_combined_df.groupby('Team').mean(numeric_only=True).reset_index()
apac_average_df

Unnamed: 0,Team,Pistol Rounds Won,First Kills,KAST,Clutches,Eco,Semi-Eco,Half-Buy,Full-Buy,+1,...,+1 Opp,-1 Opp,0 Opp,+2 Opp,-2 Opp,-3 Opp,+3 Opp,+4 Opp,-4 Opp,Result
0,BME,3.0,26.555556,0.726667,0.189085,0.0,0.463333,0.548889,0.52,0.762778,...,0.763889,0.237222,0.4625,0.944444,0.111111,0.006111,0.854444,0.777778,0.0,0.555556
1,DFM,2.0,20.222222,0.665556,0.114888,0.0,0.346667,0.437778,0.401111,0.688611,...,0.792222,0.283611,0.54713,0.941481,0.064074,0.0,0.992222,0.777778,0.0,0.222222
2,DRX,3.076923,33.307692,0.733077,0.1453,0.133077,0.262308,0.556154,0.577692,0.783269,...,0.774038,0.216731,0.506731,0.972308,0.063077,0.006538,0.923077,0.692308,0.0,0.769231
3,GE,1.75,20.125,0.67375,0.129961,0.0,0.22125,0.47,0.465,0.725938,...,0.801562,0.274062,0.478854,0.918333,0.081667,0.03125,1.0,0.875,0.0,0.25
4,GEN,2.571429,27.571429,0.733571,0.161173,0.09,0.207857,0.677143,0.619286,0.846786,...,0.737143,0.153214,0.434107,0.874762,0.045238,0.022143,0.863929,0.785714,0.0,0.642857
5,NS,2.636364,30.909091,0.72,0.1662,0.13,0.244545,0.568182,0.573636,0.782727,...,0.766136,0.217273,0.531818,0.913636,0.052727,0.0,1.0,0.909091,0.0,0.545455
6,PRX,3.25,28.916667,0.733333,0.167338,0.083333,0.338333,0.53,0.575,0.766458,...,0.709792,0.233542,0.517292,0.929722,0.058611,0.02,0.909583,0.916667,0.0,0.5
7,RRQ,3.0,33.769231,0.727692,0.172573,0.102308,0.097692,0.511538,0.595385,0.793846,...,0.765385,0.186923,0.484615,0.903077,0.038205,0.0,0.915385,0.923077,0.0,0.615385
8,T1,2.384615,30.846154,0.697692,0.124679,0.120769,0.173077,0.564615,0.551538,0.769038,...,0.766538,0.230962,0.500962,0.945385,0.058205,0.0,0.992308,0.923077,0.0,0.538462
9,TLN,2.666667,24.666667,0.688333,0.128438,0.016667,0.11,0.489167,0.503333,0.75875,...,0.777708,0.24125,0.465,0.898611,0.048333,0.0,0.958333,0.833333,0.0,0.5


In [155]:
americas_average_df.to_csv('Datasets/americas_average.csv')
emea_average_df.to_csv('Datasets/emea_average.csv')
apac_average_df.to_csv('Datasets/apac_average.csv')