In [1]:
import json
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import linregress
import tensorflow as tf

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA


In [2]:
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)


## Extract data

In [3]:
files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
len(files)

8606

In [4]:
with open(files[1], 'r') as f:
    json_data = json.load(f)
json_data = json.loads(json_data)
json_data['data'].keys()

dict_keys(['attributes', 'metadata', 'segments', 'expiryDate'])

In [5]:
files[0]

'C:\\Users\\trist\\OneDrive\\Documents\\game_data\\valorant_raw\\0001be30-1ef6-40d8-8f92-13ffbd093cf1.json'

In [6]:
json_data['data']['attributes']

{'id': '000e3b03-f34d-4fd2-94b1-bbc6d3270d9b'}

In [7]:
json_data['data']['metadata']

{'modeKey': 'bomb',
 'modeName': 'Competitive',
 'modeImageUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/modes/normal.png',
 'modeMaxRounds': 25,
 'duration': 1812504,
 'dateStarted': '2022-08-08T01:52:43.244+00:00',
 'rounds': 19,
 'isRanked': True,
 'queueId': 'competitive',
 'map': '2fb9a4fd-47b8-4e7d-a969-74b4046ebd53',
 'mapName': 'Breeze',
 'mapImageUrl': 'https://titles.trackercdn.com/valorant-api/maps/2fb9a4fd-47b8-4e7d-a969-74b4046ebd53/splash.png'}

In [8]:
game_metadata = json_data['data']['metadata']

In [9]:
json_data['data']['expiryDate']

'2022-08-11T15:58:03.6559528+00:00'

In [10]:
all_segments_types = set()

for i in json_data['data']['segments']:
    all_segments_types.add(i['type'])

all_segments_types

{'player-round',
 'player-round-damage',
 'player-round-kills',
 'player-summary',
 'round-summary',
 'team-summary'}

In [11]:
player_rounds = [i for i in json_data['data']['segments'] if i['type'] == 'player-round']
player_rounds_damage = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-damage']
player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
player_summary = [i for i in json_data['data']['segments'] if i['type'] == 'player-summary']
round_summary = [i for i in json_data['data']['segments'] if i['type'] == 'round-summary']
team_summary = [i for i in json_data['data']['segments'] if i['type'] == 'team-summary']

## player_rounds_kills

In [12]:
player_rounds_kills[0].keys()

dict_keys(['type', 'attributes', 'metadata', 'expiryDate', 'stats'])

In [13]:



player_rounds_kills[0]['attributes']

{'round': 1,
 'platformSlug': 'riot',
 'platformUserIdentifier': 'blackblanko#NA1',
 'opponentPlatformSlug': 'riot',
 'opponentPlatformUserIdentifier': 'ImSpax#8134'}

In [14]:
player_rounds_kills[0]['metadata']

{'platformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'blackblanko#NA1',
  'platformUserIdentifier': 'blackblanko#NA1',
  'avatarUrl': None,
  'additionalParameters': None},
 'opponentPlatformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'ImSpax#8134',
  'platformUserIdentifier': 'ImSpax#8134',
  'avatarUrl': None,
  'additionalParameters': None},
 'opponentLocation': {'x': 8995, 'y': -2931},
 'playerLocations': [{'puuid': 'MDBU9wluuFn3NJlalwfAaM705Qz_4dLXOV9ctRm2jKr5FeiuwMS3-DFVunSDjjzstWpS04GamLhL8g',
   'viewRadians': 1.5682076,
   'location': {'x': 9028, 'y': -5850}},
  {'puuid': 'WCPmrBX-MOwK_WhZnSj8-XREMAzQ_eTGks_deYdFQtd3Axl5bKEeP4Jh8yaoqEm4pkCeHDjMjSE3qQ',
   'viewRadians': 4.9773493,
   'location': {'x': 6242, 'y': -5066}},
  {'puuid': '50-tn1DpjozbTkQszSpD8HZl0PCL8D4Yn2iyW8tW0Jb1ysS9Q7ozR4CWgjSngLt3N8lSI6Lrm1bukw',
   'viewRadians': 2.0112894,
   'location': {'x': 6531, 'y': -5964}},
  {'puuid': 'IqqwkQ

In [15]:
player_rounds_kills[0]['stats']

{'damage': {'rank': None,
  'percentile': None,
  'displayName': None,
  'displayCategory': None,
  'category': None,
  'metadata': {},
  'value': 141,
  'displayValue': '141',
  'displayType': 'Number'}}

In [16]:
files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
print(len(files))
kill_records = list()

for file in files:
    
    
    with open(file, 'r') as f:
        json_data = json.load(f)
    json_data = json.loads(json_data)
    
    if 'data' not in json_data or json_data['data']['metadata']['modeName'] != 'Competitive':
        continue
    
    player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
    for kill in player_rounds_kills:        
        kill_records.append({'weaponName':kill['metadata']['weaponName'],
                            'round':kill['attributes']['round'],
                            'roundTime':kill['metadata']['roundTime']})
kill_df = pd.DataFrame.from_dict(kill_records)
kill_df.head()

8606


Unnamed: 0,weaponName,round,roundTime
0,Ghost,1,49352
1,Classic,1,52806
2,Classic,1,14577
3,Classic,1,48929
4,Classic,1,53169


In [17]:
kill_df['weaponName'].value_counts().index.tolist()

['Vandal',
 'Phantom',
 'Spectre',
 'Ghost',
 'Classic',
 'Operator',
 'Sheriff',
 'Marshal',
 'Guardian',
 'Odin',
 'Frenzy',
 'Bulldog',
 'Judge',
 'Stinger',
 'Ares',
 'Shorty',
 'Bucky']

In [18]:
kill_df[kill_df['round'] == 1]['weaponName'].value_counts()

Ghost      25578
Classic    18960
Sheriff     5177
Frenzy      3204
Shorty       662
Name: weaponName, dtype: int64

## round_summary

In [19]:
# round_summary[19]

## player_rounds

In [20]:
player_rounds[17]

{'type': 'player-round',
 'attributes': {'round': 2,
  'platformSlug': 'riot',
  'platformUserIdentifier': 'ImSpax#8134'},
 'metadata': {'teamId': 'Blue',
  'agentKey': '6f2a04ca-43e0-be17-7f36-b3908627744d',
  'platformInfo': {'platformSlug': 'riot',
   'platformUserId': None,
   'platformUserHandle': 'ImSpax#8134',
   'platformUserIdentifier': 'ImSpax#8134',
   'avatarUrl': None,
   'additionalParameters': None},
  'hasWon': False},
 'expiryDate': '0001-01-01T00:00:00+00:00',
 'stats': {'score': {'rank': None,
   'percentile': None,
   'displayName': 'Score',
   'displayCategory': None,
   'category': None,
   'metadata': {},
   'value': 0,
   'displayValue': '0',
   'displayType': 'Number'},
  'kills': {'rank': None,
   'percentile': None,
   'displayName': 'Kills',
   'displayCategory': None,
   'category': None,
   'metadata': {},
   'value': 0,
   'displayValue': '0',
   'displayType': 'Number'},
  'deaths': {'rank': None,
   'percentile': None,
   'displayName': 'Deaths',
   'di

## round_summary

In [21]:
len(round_summary)

19

In [22]:
# round_summary[22]


In [23]:
def get_winner(round_summary):
    return round_summary[-1]['stats']['winningTeam']['value']

get_winner(round_summary)

'Red'

## player_summary

In [24]:
len(player_summary)

10

In [25]:
player_summary[0].keys()

dict_keys(['type', 'attributes', 'metadata', 'expiryDate', 'stats'])

In [26]:
player_summary[0]['attributes']

{'platformSlug': 'riot', 'platformUserIdentifier': 'Yugiomaster69#5878'}

In [27]:
player_summary[0]['metadata']

{'partyId': 'c058d667-dd0e-4cd8-8916-de94779fcb07',
 'teamId': 'Red',
 'agentKey': '707eab51-4836-f488-046a-cda6bf494859',
 'agentName': 'Viper',
 'agentColor': '#1BB728',
 'agentImageUrl': 'https://titles.trackercdn.com/valorant-api/agents/707eab51-4836-f488-046a-cda6bf494859/displayicon.png',
 'agentPortraitUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/db/agents/viper_portrait.png',
 'countryCode': None,
 'platformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'Yugiomaster69#5878',
  'platformUserIdentifier': 'Yugiomaster69#5878',
  'avatarUrl': None,
  'additionalParameters': None}}

In [28]:
player_summary[0]['stats'].keys()

dict_keys(['rank', 'currRank', 'score', 'scorePerRound', 'killsPerRound', 'kills', 'deaths', 'assists', 'kdRatio', 'damage', 'damagePerRound', 'singleKills', 'doubleKills', 'tripleKills', 'quadraKills', 'pentaKills', 'multiKills', 'grenadeCasts', 'ability1Casts', 'ability2Casts', 'ultimateCasts', 'grenadeCastsPerRound', 'ability1CastsPerRound', 'ability2CastsPerRound', 'ultimateCastsPerRound', 'plants', 'defuses', 'firstKills', 'firstDeaths', 'esr', 'firstKillsPerRound', 'firstDeathsPerRound', 'econRating', 'hsAccuracy', 'kast'])

In [29]:
player_summary[0]['stats']['rank']

{'rank': None,
 'percentile': None,
 'displayName': 'Rating',
 'displayCategory': None,
 'category': 'mmr',
 'metadata': {'iconUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/tiersv2/12.png'},
 'value': 'Gold 1',
 'displayValue': 'Gold 1',
 'displayType': 'String'}

In [30]:
player_summary[0]['stats']['score']

{'rank': None,
 'percentile': None,
 'displayName': 'Score',
 'displayCategory': None,
 'category': None,
 'metadata': {},
 'value': 4019,
 'displayValue': '4,019',
 'displayType': 'Number'}

In [31]:
player_summary[0]['stats']['scorePerRound']

{'rank': None,
 'percentile': None,
 'displayName': 'Score per Round',
 'displayCategory': None,
 'category': None,
 'metadata': {},
 'value': 211.52631578947367,
 'displayValue': '212',
 'displayType': 'Number'}

## Win prediction data set creation

In [32]:
all_agents_list = ['Astra',
         'Breach',
         'Brimstone',
         'Chamber',
         'Cypher',
         'Fade',
         'Jett',
         'KAY/O',
         'Killjoy',
         'Neon',
         'Omen',
         'Phoenix',
         'Raze',
         'Reyna',
         'Sage',
         'Skye',
         'Sova',
         'Viper',
         'Yoru']

agent_roles = {'Astra':'Controller',
              'Breach':'Initiator',
         'Brimstone':'Controller',
         'Chamber':'Sentinel',
         'Cypher':'Sentinel',
         'Fade':'Initiator',
         'Jett':'Duelist',
         'KAY/O':'Initiator',
         'Killjoy':'Sentinel',
         'Neon':'Duelist',
         'Omen':'Controller',
         'Phoenix':'Duelist',
         'Raze':'Duelist',
         'Reyna':'Duelist',
         'Sage':'Sentinel',
         'Skye':'Initiator',
         'Sova':'Initiator',
         'Viper':'Controller',
         'Yoru':'Duelist'}
roles_list = ['Controller', 'Initiator', 'Duelist', 'Sentinel']
all_maps_list = ['Ascent', 
                 'Bind', 
                 'Breeze', 
                 'Fracture', 
                 'Haven', 
                 'Icebox', 
                 'Pearl', 
                 'Split']
weapons = ['Vandal',
             'Phantom',
             'Spectre',
             'Ghost',
             'Classic',
             'Operator',
             'Sheriff',
             'Guardian',
             'Marshal',
             'Odin',
             'Judge',
             'Bulldog',
             'Stinger',
             'Frenzy',
             'Ares',
             'Shorty',
             'Bucky']
ranks = [
    'Unranked',
    'Iron 1',
    'Iron 2',
    'Iron 3',
    'Bronze 1',
    'Bronze 2',
    'Bronze 3',
    'Silver 1',
    'Silver 2',
    'Silver 3',
    'Gold 1',
    'Gold 2',
    'Gold 3',
    'Platinum 1',
    'Platinum 2',
    'Platinum 3',
    'Diamond 1',
    'Diamond 2',
    'Diamond 3',
    'Ascendant 1',
    'Ascendant 2',
    'Ascendant 3',
    'Immortal 1',
    'Immortal 2',
    'Immortal 3',
    'Radiant',
        ]

In [33]:
invalid_keys = [
    'currRank',
]

def extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills):
    winning_team = get_winner(round_summary)
    game_datetime = game_metadata['dateStarted']
    map_pick = game_metadata['mapName']
    
    data = list()
    
    for i in player_summary:
        new_record = dict()
        new_record['name'] = i['attributes']['platformUserIdentifier']
        
        agent = i['metadata']['agentName']
        agent_role = agent_roles[agent]
        
        new_record['game_datetime'] = game_datetime
        new_record['won_game'] = int(winning_team == i['metadata']['teamId'])
        new_record['team'] = i['metadata']['teamId']
        new_record['map_pick'] = map_pick
        
        for j in i['stats'].keys():
            if j in invalid_keys:
                continue
            new_record[j] = i['stats'][j]['value']
        
        for j in roles_list:
            if j == agent_role:
                new_record[f'role_{j}'] = 1
            else:
                new_record[f'role_{j}'] = 0
        
        for j in all_agents_list:
            if j == agent:
                new_record[f'agent_{j}'] = 1
            else:
                new_record[f'agent_{j}'] = 0
        new_record['rank_num'] = ranks.index(i['stats']['rank']['value'])
        
        
        weapon_kills_dict = {j: 0 for j in weapons}
        for j in player_rounds_kills:
            if 'platformInfo' in j and i['attributes']['platformUserIdentifier'] != j['platformInfo']['platformUserHandle']:
                if j['metadata']['weaponName'] in weapons:
                    weapon_kills_dict[j['metadata']['weaponName']] += 1
            
        data.append(new_record)
            
    return data
        
        
# processed_match_records = extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills)
# processed_match_records[8]

In [34]:
# player_summary[0]

In [35]:


def create_agent_row(agent_list, map_pick, game_win, rank_list):
    
    sorted_agent_list = sorted(agent_list)
    
    for i in agent_list:
        if i not in all_agents_list:
            raise Exception(f'Invalid agent: {i}')
            
    if map_pick not in all_maps_list:
        raise Exception(f'Invalid map: {map_pick}')
    
    row = dict()
    
    agent_dict_encoded = {f'agent_num_{i}': 0 for i in range(5)}
    for n, i in enumerate(sorted_agent_list):
        agent_dict_encoded[f'agent_num_{n}'] = all_agents_list.index(i)
        
    row.update(agent_dict_encoded) 
    
    roles_dict = {f'role_{i}': 0 for i in roles_list}
    
    for i in all_agents_list:
        row['agent_' + i] = int(i in agent_list)
    
    for i in agent_list:
        roles_dict[f'role_{agent_roles[i]}'] += 1    
    
    row['map_pick'] = all_maps_list.index(map_pick)
    for i in all_maps_list:
        row['map_' + i] = int(i == map_pick)
    
    rank_avg = sum([ranks.index(i) for i in rank_list])/max(len(rank_list), 1)
    row['rank_avg'] = rank_avg
    
    
    for i in ranks:
        if i in rank_list:
            row[f'rank_{i}'.replace(' ', '_')] = 1
        else:
            row[f'rank_{i}'.replace(' ', '_')] = 0
    
    
    row.update(roles_dict)
        
    row['game_win'] = game_win
    return row


def extract_team_rows(game_metadata, game_attributes, round_summary, player_summary):
    
    team_red_agents = list()
    team_blue_agents = list()
    
    map_pick = game_metadata['mapName']
        
    winning_team = get_winner(round_summary)
    
    rank_list_team_red = []
    rank_list_team_blue = []
    
    for i in player_summary:
        if i['metadata']['teamId'] == 'Red':
            team_red_agents.append(i['metadata']['agentName'])
            rank_list_team_red.append(i['stats']['rank']['value'])
        if i['metadata']['teamId'] == 'Blue':
            team_blue_agents.append(i['metadata']['agentName'])
            rank_list_team_blue.append(i['stats']['rank']['value'])

    row1 = create_agent_row(team_red_agents, map_pick, int('Red' == winning_team), rank_list_team_red)
    row2 = create_agent_row(team_blue_agents, map_pick, int('Blue' == winning_team), rank_list_team_blue)
    
    return [row1, row2]
    
    




In [36]:
def get_all_processed_data(): 
    files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
    
    all_records = list()
    agent_records = list()
    
    for file in files:
        
        with open(file, 'r') as f:
            json_data = json.load(f)
        json_data = json.loads(json_data)
        if 'data' not in json_data:
            print(f'error {file}')
            continue
            
        if json_data['data']['metadata']['modeName'] != 'Competitive':
            continue
        
        game_metadata = json_data['data']['metadata']
        game_attributes = json_data['data']['attributes']

        player_rounds = [i for i in json_data['data']['segments'] if i['type'] == 'player-round']
        player_rounds_damage = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-damage']
        player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
        player_summary = [i for i in json_data['data']['segments'] if i['type'] == 'player-summary']
        round_summary = [i for i in json_data['data']['segments'] if i['type'] == 'round-summary']
        team_summary = [i for i in json_data['data']['segments'] if i['type'] == 'team-summary']
        
        all_records.extend(extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills))
        agent_records.extend(extract_team_rows(game_metadata, game_attributes, round_summary, player_summary))
    return all_records, agent_records
        
all_records, all_agent_records = get_all_processed_data()
len(all_records), len(all_agent_records)


(80170, 16034)

In [37]:
ranks.index('Unranked')

0

In [38]:
ranks

['Unranked',
 'Iron 1',
 'Iron 2',
 'Iron 3',
 'Bronze 1',
 'Bronze 2',
 'Bronze 3',
 'Silver 1',
 'Silver 2',
 'Silver 3',
 'Gold 1',
 'Gold 2',
 'Gold 3',
 'Platinum 1',
 'Platinum 2',
 'Platinum 3',
 'Diamond 1',
 'Diamond 2',
 'Diamond 3',
 'Ascendant 1',
 'Ascendant 2',
 'Ascendant 3',
 'Immortal 1',
 'Immortal 2',
 'Immortal 3',
 'Radiant']

In [39]:
user_record_count = dict()

for i in all_records:
    user_record_count.setdefault(i['name'], 0)
    user_record_count[i['name']] += 1

user_record_count_sorted = list()

for k, v in user_record_count.items():
    user_record_count_sorted.append({'user':k, 'count':v})

sorted(user_record_count_sorted, key = lambda x: x['count'], reverse = True)[:20]


[{'user': 'Mathematics#6622', 'count': 59},
 {'user': 'Kouf#514', 'count': 47},
 {'user': 'asianwater#999', 'count': 47},
 {'user': 'strawberry milk#우유갠디', 'count': 47},
 {'user': 'GameKnightAndy#водка', 'count': 41},
 {'user': 'HLee312#3476', 'count': 41},
 {'user': 'CV WorstNub#2822', 'count': 39},
 {'user': 'Brian X Ryan#Ryab', 'count': 39},
 {'user': 'Gabiru#9982', 'count': 38},
 {'user': 'Cowpico#007', 'count': 38},
 {'user': 'wakefull#icy', 'count': 37},
 {'user': 'Beru#NaCl', 'count': 36},
 {'user': 'snodu#Doggu', 'count': 35},
 {'user': 'I am the beta#8743', 'count': 34},
 {'user': 'QuestionMarc#5002', 'count': 34},
 {'user': 'kiyun#xoxo', 'count': 33},
 {'user': 'Static#RDIUS', 'count': 33},
 {'user': 'IAmTempest#NA1', 'count': 33},
 {'user': 'ASP smiley#TONKA', 'count': 33},
 {'user': 'Daddy Brim#OTP', 'count': 32}]

In [40]:
all_records = [i for i in all_records if user_record_count[i['name']] >= 4]
len(all_records)

19666

In [41]:
all_records_df = pd.DataFrame.from_dict(all_records)
all_records_df

Unnamed: 0,name,game_datetime,won_game,team,map_pick,rank,score,scorePerRound,killsPerRound,kills,...,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,rank_num
0,Loxx#6998,2022-08-08T01:52:43.244+00:00,0,Blue,Breeze,Silver 3,4270,224.736842,0.684211,13,...,0,0,0,0,0,0,0,0,0,9
1,hardstuckiron#3570,2022-08-07T22:08:58.634+00:00,1,Blue,Icebox,Bronze 1,6042,402.800000,1.466667,22,...,0,0,0,0,0,0,0,0,0,4
2,Andreww#1178,2022-08-07T22:08:58.634+00:00,1,Blue,Icebox,Iron 3,2032,135.466667,0.466667,7,...,0,1,0,0,0,0,0,0,0,3
3,daps#0011,2022-08-09T02:05:42.498+00:00,0,Blue,Icebox,Diamond 1,6879,286.625000,1.083333,26,...,0,0,0,0,0,0,0,0,0,16
4,papi#7186,2022-08-03T10:43:51.7+00:00,1,Blue,Haven,Platinum 2,2000,117.647059,0.411765,7,...,0,0,0,0,0,0,0,0,0,14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19661,August#SUI,2022-07-27T04:07:20.866+00:00,1,Blue,Haven,Gold 1,7741,297.730769,1.038462,27,...,1,0,0,0,0,0,0,0,0,10
19662,Migegg#8765,2022-07-29T22:49:19.346+00:00,1,Blue,Icebox,Bronze 2,5645,256.590909,0.909091,20,...,0,0,0,0,0,0,0,0,0,5
19663,itsililya#7653,2022-08-01T01:45:02.107+00:00,0,Red,Ascent,Bronze 1,5946,228.692308,0.769231,20,...,0,0,0,0,0,1,0,0,0,4
19664,RedFridgee#NA1,2022-08-06T00:08:24.629+00:00,0,Blue,Bind,Bronze 1,3169,144.045455,0.545455,12,...,0,1,0,0,0,0,0,0,0,4


In [42]:
all_records_df['name'].nunique()

1764

In [43]:
all_records_df.shape

(19666, 63)

In [44]:
all_records_df['game_datetime'].min()

'2022-03-26T16:46:26.008+00:00'

In [45]:
all_records_df['map_pick'].value_counts(normalize=True)

Bind        0.148530
Ascent      0.147259
Breeze      0.144971
Haven       0.142378
Icebox      0.139174
Pearl       0.137700
Fracture    0.137191
Split       0.002797
Name: map_pick, dtype: float64

In [46]:
all_records_df['killsPerRound'].describe()

count    19666.000000
mean         0.728168
std          0.288190
min          0.000000
25%          0.529412
50%          0.705882
75%          0.900000
max          2.600000
Name: killsPerRound, dtype: float64

In [47]:
all_records_df['rank'].value_counts(normalize=True)

Radiant        0.115478
Immortal 3     0.099664
Silver 1       0.093003
Bronze 3       0.079833
Bronze 2       0.079732
Silver 2       0.077392
Silver 3       0.067528
Bronze 1       0.065341
Gold 1         0.052070
Gold 2         0.042001
Iron 3         0.038239
Gold 3         0.035188
Platinum 1     0.031018
Unranked       0.027204
Platinum 2     0.024306
Platinum 3     0.019272
Iron 2         0.015204
Diamond 1      0.010882
Immortal 2     0.006814
Diamond 2      0.004780
Immortal 1     0.003712
Diamond 3      0.002797
Ascendant 1    0.002746
Iron 1         0.002288
Ascendant 3    0.002187
Ascendant 2    0.001322
Name: rank, dtype: float64

In [48]:
# set(all_records_df[all_records_df['rank'].str.contains('Ascendant')]['name'].value_counts().index.tolist() +  all_records_df[all_records_df['rank'].str.contains('Diamond')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Platinum 3')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Immortal 1')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Platinum 2')]['name'].value_counts().index.tolist())

In [49]:

def get_player_features(player_df):
    player_df = player_df.sort_values(by = ['game_datetime'])

    data = list()


    row_counter = 0
    for n, (idx, row) in enumerate(player_df.iterrows()):
        if n < 5:
            continue

        past_game = player_df.iloc[n-1:n].mean(numeric_only=True).to_dict()
        past_3_games = player_df.iloc[n-3:n].mean(numeric_only=True).to_dict()

        new_x = dict()

#         for k, v in past_game.items():
#             new_x[f'past_game_{k}'] = v

        for k, v in past_3_games.items():
            new_x[f'past_3_games_avg_{k}'] = v


        new_x['future_won_game'] =  row['won_game']
        data.append(new_x)


    data_df = pd.DataFrame.from_dict(data)
    data_df = data_df.fillna(0)
    return data_df



In [50]:

training_players, val_players = train_test_split(list(set(all_records_df['name'].tolist())))

training_data_dfs = list()
val_data_dfs = list()
all_data_dfs= list()

for i in training_players:
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    training_data_dfs.append(player_data_df)

    
for i in val_players:
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    val_data_dfs.append(player_data_df)
    
for i in list(set(all_records_df['name'].tolist())):
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    all_data_dfs.append(player_data_df)


    
training_data_df = pd.concat(training_data_dfs)
val_data_df = pd.concat(val_data_dfs)
all_data_df = pd.concat(all_data_dfs)

len(training_players), len(val_players)

(1323, 441)

In [51]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# training_data_df.corr().sort_values(by=['future_won_game'])[['future_won_game']]

In [52]:
# training_data_df.isna().sum()

In [53]:
rf = RandomForestClassifier()

x = training_data_df.drop('future_won_game', axis = 1)
y = training_data_df['future_won_game']

x_val = val_data_df.drop('future_won_game', axis = 1)
y_val = val_data_df['future_won_game']

rf.fit(x, y)

preds = rf.predict(x_val)

accuracy_score(preds, y_val)

0.4851341551849166

In [54]:

def pd_div(c1,  c2):
    return c1 / max(0.0001, c2)

def create_interactions(df: pd.DataFrame) -> pd.DataFrame:
    df_interaction = pd.DataFrame(index = df.index)

    df_interaction['future_won_game'] = df['future_won_game']

    columns_list = sorted(df.columns.tolist())

    for i in columns_list:
        df_interaction[i] = df[i]
        for j in columns_list:
            if  i == 'future_won_game' or j == 'future_won_game':
                continue
            if columns_list.index(i) >=columns_list.index(j):
                continue
            
            df_interaction[f'{i}_mul_{j}'] = df[i]*df[j]
            df_interaction[f'{i}_max_{j}'] = df[[i,j]].max(axis = 1)
            df_interaction[f'{i}_div_{j}'] = df.apply(lambda x: pd_div(x[i], x[j]), axis=1)

    return df_interaction

training_data_df_int = create_interactions(training_data_df)
val_data_df_int = create_interactions(val_data_df)


In [55]:
rf = RandomForestClassifier()

x = training_data_df_int.drop('future_won_game', axis = 1)
y = training_data_df_int['future_won_game']

x_val = val_data_df_int.drop('future_won_game', axis = 1)
y_val = val_data_df_int['future_won_game']

rf.fit(x, y)

preds = rf.predict(x_val)

accuracy_score(preds, y_val)

0.5036258158085569

In [56]:
pd.set_option('max_colwidth', 400)



fi_list = list()
for i, j in zip(x.columns, rf.feature_importances_):
    fi_list.append({'column':i, 'importance':j})
    
pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'], ascending = [False]).head(100)

Unnamed: 0,column,importance
4393,past_3_games_avg_hsAccuracy_div_past_3_games_avg_rank_num,0.000911
3348,past_3_games_avg_damage_div_past_3_games_avg_kills,0.000828
3234,past_3_games_avg_assists_mul_past_3_games_avg_grenadeCastsPerRound,0.000817
3806,past_3_games_avg_econRating_div_past_3_games_avg_killsPerRound,0.00081
3381,past_3_games_avg_damage_div_past_3_games_avg_score,0.000789
4981,past_3_games_avg_scorePerRound_div_past_3_games_avg_won_game,0.000778
3363,past_3_games_avg_damage_div_past_3_games_avg_quadraKills,0.000773
3237,past_3_games_avg_assists_mul_past_3_games_avg_hsAccuracy,0.000759
4423,past_3_games_avg_hsAccuracy_div_past_3_games_avg_ultimateCastsPerRound,0.000754
3530,past_3_games_avg_deaths_div_past_3_games_avg_hsAccuracy,0.000752


In [57]:



fi_list = list()

for i in training_data_df_int.columns:
    
    if i == 'future_won_game':
        continue
        
    rel = linregress(training_data_df_int['future_won_game'], training_data_df_int[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False]).head(100)

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
3874,past_3_games_avg_esr_mul_past_3_games_avg_hsAccuracy,125.268629,1199.583,0.048389,0.002341,8e-06,28.015091
416,past_3_games_avg_ability2Casts_div_past_3_games_avg_defuses,5304.131201,17355.37,0.047673,0.002273,1.1e-05,1204.080922
467,past_3_games_avg_ability2Casts_div_past_3_games_avg_plants,5304.131201,17355.37,0.047673,0.002273,1.1e-05,1204.080922
3730,past_3_games_avg_doubleKills_div_past_3_games_avg_plants,1013.717377,3408.683,0.047355,0.002242,1.2e-05,231.67051
4500,past_3_games_avg_kdRatio_div_past_3_games_avg_plants,442.716968,1440.695,0.046339,0.002147,1.9e-05,103.400044
4387,past_3_games_avg_hsAccuracy_div_past_3_games_avg_plants,7222.325953,25742.65,0.045503,0.002071,2.6e-05,1717.867231
4370,past_3_games_avg_hsAccuracy_mul_past_3_games_avg_kdRatio,1.368162,21.60591,0.045357,0.002057,2.8e-05,0.326475
579,past_3_games_avg_ability2CastsPerRound_div_past_3_games_avg_defuses,244.775807,882.0945,0.044268,0.00196,4.4e-05,59.849264
630,past_3_games_avg_ability2CastsPerRound_div_past_3_games_avg_plants,244.775807,882.0945,0.044268,0.00196,4.4e-05,59.849264
3434,past_3_games_avg_damagePerRound_mul_past_3_games_avg_hsAccuracy,132.393858,2720.074,0.043761,0.001915,5.3e-05,32.746624


In [58]:
# raise Exception

In [59]:
fi_list = list()

for i in all_data_df.columns:
    if i == 'future_won_game':
        continue
    rel = linregress(all_data_df['future_won_game'], all_data_df[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False])

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
9,past_3_games_avg_damagePerRound,2.727834,137.952577,0.039439,0.001555396,2.8e-05,0.650823
8,past_3_games_avg_damage,61.399585,2852.238677,0.038562,0.001487061,4.2e-05,14.982389
4,past_3_games_avg_kills,0.321151,14.975437,0.03549,0.001259549,0.000163,0.085159
1,past_3_games_avg_score,88.651173,4354.502516,0.035464,0.001257664,0.000165,23.525136
2,past_3_games_avg_scorePerRound,3.87815,210.820615,0.035316,0.001247236,0.000176,1.033433
7,past_3_games_avg_kdRatio,0.033409,1.091372,0.035272,0.001244139,0.000179,0.008914
28,past_3_games_avg_esr,3.594368,61.282757,0.035199,0.001238976,0.000185,0.961004
31,past_3_games_avg_econRating,1.184771,58.01348,0.034533,0.001192517,0.000244,0.322884
3,past_3_games_avg_killsPerRound,0.013928,0.726063,0.034105,0.001163118,0.000292,0.003843
15,past_3_games_avg_multiKills,0.054574,1.102265,0.03278,0.001074527,0.000498,0.015669



## Agent pick prediction data set creation

In [60]:
all_records, all_agent_records = get_all_processed_data()


In [61]:
# agents = list()

# for i in all_records:
#     agents.append(i['agent'])
    
# sorted(list(set(agents)))

In [62]:
maps = list()

for i in all_records:
    maps.append(i['map_pick'])
    
sorted(list(set(maps)))

['Ascent', 'Bind', 'Breeze', 'Fracture', 'Haven', 'Icebox', 'Pearl', 'Split']

In [63]:
all_agent_records_df = pd.DataFrame.from_dict(all_agent_records)
all_agent_records_df.head()

Unnamed: 0,agent_num_0,agent_num_1,agent_num_2,agent_num_3,agent_num_4,agent_Astra,agent_Breach,agent_Brimstone,agent_Chamber,agent_Cypher,agent_Fade,agent_Jett,agent_KAY/O,agent_Killjoy,agent_Neon,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,map_pick,map_Ascent,map_Bind,map_Breeze,map_Fracture,map_Haven,map_Icebox,map_Pearl,map_Split,rank_avg,rank_Unranked,rank_Iron_1,rank_Iron_2,rank_Iron_3,rank_Bronze_1,rank_Bronze_2,rank_Bronze_3,rank_Silver_1,rank_Silver_2,rank_Silver_3,rank_Gold_1,rank_Gold_2,rank_Gold_3,rank_Platinum_1,rank_Platinum_2,rank_Platinum_3,rank_Diamond_1,rank_Diamond_2,rank_Diamond_3,rank_Ascendant_1,rank_Ascendant_2,rank_Ascendant_3,rank_Immortal_1,rank_Immortal_2,rank_Immortal_3,rank_Radiant,role_Controller,role_Initiator,role_Duelist,role_Sentinel,game_win
0,3,6,14,16,17,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,2,0,0,1,0,0,0,0,0,9.2,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,1
1,1,2,3,14,15,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,2,0,0,1,0,0,0,0,0,8.8,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,2,0
2,3,5,9,13,14,0,0,0,1,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,5,0,0,0,0,0,1,0,0,4.4,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0
3,3,8,11,13,14,0,0,0,1,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,5,0,0,0,0,0,1,0,0,3.6,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,1
4,3,12,15,16,17,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,1,0,5,0,0,0,0,0,1,0,0,15.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,2,1,1,1


In [64]:
train_df, val_df = train_test_split(all_agent_records_df)

train_x = train_df.drop('game_win', axis = 1)
train_y = train_df['game_win']

val_x = val_df.drop('game_win', axis = 1)
val_y = val_df['game_win']
train_x.head()

Unnamed: 0,agent_num_0,agent_num_1,agent_num_2,agent_num_3,agent_num_4,agent_Astra,agent_Breach,agent_Brimstone,agent_Chamber,agent_Cypher,agent_Fade,agent_Jett,agent_KAY/O,agent_Killjoy,agent_Neon,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,map_pick,map_Ascent,map_Bind,map_Breeze,map_Fracture,map_Haven,map_Icebox,map_Pearl,map_Split,rank_avg,rank_Unranked,rank_Iron_1,rank_Iron_2,rank_Iron_3,rank_Bronze_1,rank_Bronze_2,rank_Bronze_3,rank_Silver_1,rank_Silver_2,rank_Silver_3,rank_Gold_1,rank_Gold_2,rank_Gold_3,rank_Platinum_1,rank_Platinum_2,rank_Platinum_3,rank_Diamond_1,rank_Diamond_2,rank_Diamond_3,rank_Ascendant_1,rank_Ascendant_2,rank_Ascendant_3,rank_Immortal_1,rank_Immortal_2,rank_Immortal_3,rank_Radiant,role_Controller,role_Initiator,role_Duelist,role_Sentinel
11493,3,7,8,10,12,0,0,0,1,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,4,0,0,0,0,1,0,0,0,10.2,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2
6025,13,14,15,17,18,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,1,2,0,0,1,0,0,0,0,0,2.8,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,1
1231,3,5,6,10,13,0,0,0,1,0,1,1,0,0,0,1,0,0,1,0,0,0,0,0,6,0,0,0,0,0,0,1,0,24.6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,2,1
10727,0,3,5,6,13,1,0,0,1,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,6,0,0,0,0,0,0,1,0,23.4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,2,1
15342,4,9,10,11,13,0,0,0,0,1,0,0,0,0,1,1,1,0,1,0,0,0,0,0,6,0,0,0,0,0,0,1,0,6.6,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3,1


In [65]:
rf = RandomForestClassifier()

rf.fit(train_x, train_y)

preds = rf.predict(val_x)

accuracy_score(preds, val_y)

0.5221393034825871

In [66]:
from sklearn.naive_bayes import GaussianNB

rf = GaussianNB()

rf.fit(train_x, train_y)

preds = rf.predict(val_x)

accuracy_score(preds, val_y)


0.5019900497512437

In [67]:
# fi_list = list()
# for i, j in zip(train_x.columns, rf.feature_importances_):
#     fi_list.append({'column':i, 'importance':j})
    
# pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'])[-10:]

In [68]:
fi_list = list()

for i in train_x.columns:
    rel = linregress(train_y, train_x[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False])

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
34,rank_Unranked,-0.027322,0.227521,-0.033315,0.001109922,0.000253,0.007465
23,agent_Yoru,-0.015589,0.15847,-0.021788,0.0004747144,0.016732,0.006515
63,role_Sentinel,0.024613,1.257327,0.019512,0.0003807372,0.032143,0.011486
18,agent_Reyna,0.017656,0.563504,0.017843,0.0003183857,0.050076,0.00901
61,role_Initiator,-0.021937,0.998013,-0.016803,0.0002823464,0.065026,0.011888
12,agent_KAY/O,-0.013052,0.221891,-0.015876,0.0002520338,0.081297,0.007487
8,agent_Chamber,0.014309,0.506375,0.014314,0.0002049045,0.116003,0.009103
35,rank_Iron_1,0.003195,0.011591,0.014005,0.0001961436,0.124097,0.002078
30,map_Icebox,-0.009653,0.145057,-0.013899,0.0001931876,0.126967,0.006324
41,rank_Silver_1,0.013123,0.34923,0.013705,0.0001878274,0.132363,0.00872


In [69]:




def create_interactions(df: pd.DataFrame) -> pd.DataFrame:
    df_interaction = pd.DataFrame(index = df.index)

    df_interaction['game_win'] = df['game_win']

    columns_list = sorted(df.columns.tolist())

    for i in columns_list:
        df_interaction[i] = df[i]
        for j in columns_list:
            if  i == 'game_win' or j == 'game_win':
                continue
            if columns_list.index(i) >=columns_list.index(j):
                continue
            
            if 'rank' in i or 'rank' in j:
                df_interaction[f'{i}_mul_{j}'] = df[i].copy()*df[j].copy()
            elif 'role' in i:
                df_interaction[f'{i}_mul_{j}'] = df[i].copy()*df[j].copy()
                df_interaction[f'{i}_max_{j}'] = df[[i,j]].copy().max(axis = 1)
            else:
                df_interaction[f'{i}_max_{j}'] = df[i].copy()*df[j].copy()
                
    return df_interaction

In [70]:
train_df, test_df = train_test_split(all_agent_records_df)


In [71]:
results = list()


In [72]:
import tensorflow as tf

In [73]:
sorted(results, key = lambda x: x['score'], reverse = True)[:10]

[]

In [74]:
import numpy as np

In [75]:
import random
from scipy import stats
from sklearn.model_selection import KFold 

counter = 0

solver_dict = {
    "newton-cg": ["l2"],
    "lbfgs": ["l2"],
    "liblinear": ['l1', "l2"],
    "sag": ["l2"],
    "saga": ["l1", 'l2', 'elasticnet'],
}

while True:
    counter += 1
    if counter > 100:
        break
    i = random.randint(10, 100)
    j = random.randint(10, 50) * .01
    k = 10
    solver = random.choice(list(solver_dict.keys()))
    penalty =  random.choice(solver_dict[solver])
    model_choice = random.choice(['rf', 'lr'])
    
    max_depth = random.randint(2, 8)

    
    kf = KFold(n_splits=k, random_state=1, shuffle = True)
    
    if model_choice == 'rf':
        model = RandomForestClassifier(n_estimators=i, max_features = j, max_depth=max_depth)
    else:
        if penalty == 'elasticnet':
            model = LogisticRegression(solver=solver, penalty=penalty, l1_ratio=.5, max_iter=1000)
        else:
            model = LogisticRegression(solver=solver, penalty=penalty, max_iter=1000)

    scores = list()
    
    preds_list = list()
    
    for train_fold_np , val_fold_np in kf.split(train_df):
        
        train_fold_df = all_agent_records_df.iloc[train_fold_np,:]
        val_fold_df = all_agent_records_df.iloc[val_fold_np,:]

        model.fit(train_fold_df.drop('game_win', axis = 1), train_fold_df['game_win'])

        preds = model.predict(test_df.drop('game_win', axis = 1))
        preds_list.append(preds)
    
    result_mode = list()
    for idx in range(test_df.shape[0]):
        tmp_result = 0
        for pred_idx in preds_list:
            tmp_result += pred_idx[idx]
#         print(k, len(preds_list), tmp_result)
        if tmp_result >= k/2:
            result_mode.append(1)
        else:
            result_mode.append(0)
        
    score= accuracy_score(np.array(result_mode), test_df['game_win'])
    results.append({'n_estimators':i,
                   'max_features':j,
                    'max_depth':max_depth,
                    'k':k,
                    'model_choice':model_choice,
                    'penalty':penalty,
                    'solver':solver,
                    'avg_pred':sum(result_mode)/len(result_mode),
                   'score':score})

    if score >= sorted(results, key = lambda x: x['score'], reverse = True)[0]['score'] or counter%100 == 0:
        print(counter, i, j, score, sorted(results, key = lambda x: x['score'], reverse = True)[:3])

        
            


1 13 0.26 0.6492537313432836 [{'n_estimators': 13, 'max_features': 0.26, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.6166666666666667, 'score': 0.6492537313432836}]
20 21 0.38 0.66318407960199 [{'n_estimators': 21, 'max_features': 0.38, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.6027363184079602, 'score': 0.66318407960199}, {'n_estimators': 13, 'max_features': 0.26, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.6166666666666667, 'score': 0.6492537313432836}, {'n_estimators': 36, 'max_features': 0.29, 'max_depth': 7, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.5982587064676617, 'score': 0.6283582089552239}]




66 60 0.1 0.663681592039801 [{'n_estimators': 60, 'max_features': 0.1, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5743781094527363, 'score': 0.663681592039801}, {'n_estimators': 21, 'max_features': 0.38, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.6027363184079602, 'score': 0.66318407960199}, {'n_estimators': 13, 'max_features': 0.26, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.6166666666666667, 'score': 0.6492537313432836}]
76 61 0.5 0.6671641791044776 [{'n_estimators': 61, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5977611940298507, 'score': 0.6671641791044776}, {'n_estimators': 60, 'max_features': 0.1, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5743781094527363, 'score': 0.663681592039801}, {'n_estim



100 62 0.5 0.6253731343283582 [{'n_estimators': 61, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5977611940298507, 'score': 0.6671641791044776}, {'n_estimators': 60, 'max_features': 0.1, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5743781094527363, 'score': 0.663681592039801}, {'n_estimators': 21, 'max_features': 0.38, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.6027363184079602, 'score': 0.66318407960199}]


In [76]:
sorted(results, key = lambda x: x['score'], reverse = True)[:10]

[{'n_estimators': 61,
  'max_features': 0.5,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'saga',
  'avg_pred': 0.5977611940298507,
  'score': 0.6671641791044776},
 {'n_estimators': 60,
  'max_features': 0.1,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'sag',
  'avg_pred': 0.5743781094527363,
  'score': 0.663681592039801},
 {'n_estimators': 21,
  'max_features': 0.38,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'lbfgs',
  'avg_pred': 0.6027363184079602,
  'score': 0.66318407960199},
 {'n_estimators': 48,
  'max_features': 0.12,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'liblinear',
  'avg_pred': 0.5813432835820895,
  'score': 0.6572139303482587},
 {'n_estimators': 14,
  'max_features': 0.48,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'newton-cg',
  'avg_pred': 0.5883084577114428,
  'scor

In [77]:
# sorted(results, key = lambda x: x['score'], reverse = True)

In [78]:
from sklearn.linear_model import LogisticRegression

train_df, val_df = train_test_split(all_agent_records_df, random_state = 1)

n_estimators = 69
max_features = 0.24
max_depth = 8
model_choice='rf'
k=5

kf = KFold(n_splits=k, random_state=1, shuffle = True)

def get_model_to_train(n_estimators, max_features, max_depth, solver, penalty, max_iter):
    if model_choice == 'rf':
        model = RandomForestClassifier(n_estimators=n_estimators, max_features = max_features, max_depth=max_depth)
    else:
        if penalty == 'elasticnet':
            model = LogisticRegression(solver=solver, penalty=penalty, l1_ratio=.5, max_iter=1000)
        else:
            model = LogisticRegression(solver=solver, penalty=penalty, max_iter=1000)

scores = list()

preds_list = list()

models = list()

for train_fold_np , val_fold_np in kf.split(train_df):

    train_fold_df = all_agent_records_df.iloc[train_fold_np,:]
    val_fold_df = all_agent_records_df.iloc[val_fold_np,:]
    
    model = get_model(n_estimators, max_features, max_depth, solver, penalty, max_iter)
    model.fit(train_fold_df.drop('game_win', axis = 1), train_fold_df['game_win'])
    
    models.append(model)

    preds = model.predict_proba(test_df.drop('game_win', axis = 1))[:,-1]
    preds_list.append(preds)

result_mode = list()
for idx in range(test_df.shape[0]):
    tmp_result = 0
    for pred_idx in preds_list:
        tmp_result += pred_idx[idx]
    result_mode.append(tmp_result/k)


result_mode_np = np.rint(np.array(result_mode))
accuracy_score(result_mode_np, test_df['game_win'])




NameError: name 'get_model' is not defined

In [None]:
np.array(result_mode)

In [None]:
fi_list = list()
for i, j in zip(df_train_int.columns, model.feature_importances_):
    fi_list.append({'column':i, 'importance':j})
    
pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'])[-10:]

In [None]:
pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance']).tail(100)

In [None]:
fi_list = list()


for i in df_train_int.columns:
    if i == 'game_win':
        continue
    rel = linregress(df_train_int['game_win'], df_train_int[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False])

In [None]:
fi_list = list()

for i in df_val_int.columns:
    if i == 'game_win':
        continue
    rel = linregress(df_val_int['game_win'], df_val_int[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False]).head(50)

In [None]:
# df_train_int.corr()[[i for i in df_train_int.columns if 'pca' not in i]]

In [None]:

model = RandomForestClassifier(n_estimators=n_estimators, max_features = max_features, max_depth=max_depth)

df_all = create_interactions(all_agent_records_df)

model.fit(df_all.drop('game_win', axis = 1), all_agent_records_df['game_win'])

preds = model.predict(df_all.drop('game_win', axis = 1))
preds.shape, df_all.shape

In [None]:
def predict_best_lineup(model, map_pick, current_agent_list, rank_list):
    
    agent_score = list()
    
    agent_tuples = list()
    
    for i1 in all_agents_list:
        for i2 in all_agents_list:
            for i3 in all_agents_list:
                for i4 in all_agents_list:
                    for i5 in all_agents_list:
                        if len(set([i1, i2, i3, i4, i5])) < 5:
                            continue
                        else:
                            agent_tuples.append(tuple(sorted([i1, i2, i3, i4, i5])))
    agent_tuples = list(set(agent_tuples))
    matched_agent_tuples = list()
    
    for i in agent_tuples:
        match = True
        for j in current_agent_list:
            if j not in i:
                match = False
        if match:
            matched_agent_tuples.append(i)
    
    inputs = list()
    features = list()
    
    for i in matched_agent_tuples:
        inputs.append({'agent1_name': i[0], 'agent2_name': i[1], 'agent3_name': i[2], 'agent4_name': i[3], 'agent5_name': i[4]})
        features.append(create_agent_row(list(i), map_pick, None, rank_list))
    
    features_df = pd.DataFrame.from_dict(features)
    features_df_interactions = create_interactions(features_df)

    inputs_df = pd.DataFrame.from_dict(inputs)
    
    inputs_df.index = features_df.index

    inputs_df['win_prob'] = model.predict_proba(features_df_interactions.drop('game_win', axis = 1))[:,-1]
    return inputs_df.sort_values('win_prob', ascending = False)


map_pick = 'Icebox'
current_agent_list = []
rank_list= ['Bronze 2']

predict_best_lineup(model, map_pick, current_agent_list, rank_list).head()

In [None]:
map_pick = 'Icebox'
current_agent_list = []
rank_list= ['Gold 2']

predict_best_lineup(model, map_pick, current_agent_list, rank_list).head()

In [None]:
map_pick = 'Icebox'
current_agent_list = []
rank_list= ['Radiant']

predict_best_lineup(model, map_pick, current_agent_list, rank_list).head()

In [None]:
['Ascent', 'Bind', 'Breeze', 'Fracture', 'Haven', 'Icebox', 'Pearl', 'Split']

In [None]:
map_pick = 'Ascent'
current_agent_list = []

predict_best_lineup(model, map_pick, current_agent_list, rank_list).head()

In [None]:
predict_best_lineup(model, map_pick, current_agent_list, rank_list).tail()

In [None]:
map_pick = 'Bind'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Breeze'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Fracture'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Haven'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Icebox'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Pearl'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

In [None]:
map_pick = 'Split'
current_agent_list = []

predict_best_lineup(model,pca, map_pick, current_agent_list).head()

In [None]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()