In [1]:
import json
import glob
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.stats import linregress
import tensorflow as tf

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.decomposition import PCA



pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)


## Extract data

In [3]:
files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
len(files)

12420

In [4]:
with open(files[1], 'r') as f:
    json_data = json.load(f)
json_data = json.loads(json_data)
json_data['data'].keys()

dict_keys(['attributes', 'metadata', 'segments', 'expiryDate'])

In [5]:
files[0]

'C:\\Users\\trist\\OneDrive\\Documents\\game_data\\valorant_raw\\00006c76-09e0-4f98-b892-2923c55a4d95.json'

In [6]:
json_data['data']['attributes']

{'id': '0001be30-1ef6-40d8-8f92-13ffbd093cf1'}

In [7]:
json_data['data']['metadata']

{'modeKey': 'bomb',
 'modeName': 'Normal',
 'modeImageUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/modes/normal.png',
 'modeMaxRounds': 25,
 'duration': 2664924,
 'dateStarted': '2022-07-08T06:53:09.588+00:00',
 'rounds': 24,
 'isRanked': False,
 'queueId': 'unrated',
 'map': '2fb9a4fd-47b8-4e7d-a969-74b4046ebd53',
 'mapName': 'Breeze',
 'mapImageUrl': 'https://titles.trackercdn.com/valorant-api/maps/2fb9a4fd-47b8-4e7d-a969-74b4046ebd53/splash.png'}

In [8]:
game_metadata = json_data['data']['metadata']

In [9]:
json_data['data']['expiryDate']

'2022-08-08T18:11:36.1374295+00:00'

In [10]:
all_segments_types = set()

for i in json_data['data']['segments']:
    all_segments_types.add(i['type'])

all_segments_types

{'player-round',
 'player-round-damage',
 'player-round-kills',
 'player-summary',
 'round-summary',
 'team-summary'}

In [11]:
player_rounds = [i for i in json_data['data']['segments'] if i['type'] == 'player-round']
player_rounds_damage = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-damage']
player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
player_summary = [i for i in json_data['data']['segments'] if i['type'] == 'player-summary']
round_summary = [i for i in json_data['data']['segments'] if i['type'] == 'round-summary']
team_summary = [i for i in json_data['data']['segments'] if i['type'] == 'team-summary']

## player_rounds_kills

In [12]:
player_rounds_kills[0].keys()

dict_keys(['type', 'attributes', 'metadata', 'expiryDate', 'stats'])

In [13]:



player_rounds_kills[0]['attributes']

{'round': 1,
 'platformSlug': 'riot',
 'platformUserIdentifier': 'rimo#SEXY',
 'opponentPlatformSlug': 'riot',
 'opponentPlatformUserIdentifier': 'Hennessyy#AKA47'}

In [14]:
player_rounds_kills[0]['metadata']

{'platformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'rimo#SEXY',
  'platformUserIdentifier': 'rimo#SEXY',
  'avatarUrl': None,
  'additionalParameters': None},
 'opponentPlatformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'Hennessyy#AKA47',
  'platformUserIdentifier': 'Hennessyy#AKA47',
  'avatarUrl': None,
  'additionalParameters': None},
 'opponentLocation': {'x': 4917, 'y': 3223},
 'playerLocations': [{'puuid': 'gaxPhPH2L0l07DbnPvzTzKcQzNi3qJfuNjigpnUV3UBkRNdDpGdhyhbU2KWVV1QbHIAXZrowoPOkcA',
   'viewRadians': 2.4492486,
   'location': {'x': 5927, 'y': 4912}},
  {'puuid': '-2GqRI2DorSneRUZgv2T47wOha66l9v3qUbG7Pxw1VibAdJkrHnLI6WgCZeAj3O8HUtAuAyuD4suLQ',
   'viewRadians': 5.6062036,
   'location': {'x': 4806, 'y': 5496}},
  {'puuid': 'LyIVoyMdyMEmaLGGwwI7eyO0vc6UIlwreIZF7OaidgY0SyLa4vF3Q6jGLlB_KBtCkhHfMtTNYaWi1A',
   'viewRadians': 5.0044217,
   'location': {'x': 3969, 'y': 6342}},
  {'puuid': 'q8cbMuUjAnP0Hj

In [15]:
player_rounds_kills[0]['stats']

{'damage': {'rank': None,
  'percentile': None,
  'displayName': None,
  'displayCategory': None,
  'category': None,
  'metadata': {},
  'value': 100,
  'displayValue': '100',
  'displayType': 'Number'}}

In [16]:
files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
print(len(files))
kill_records = list()

for file in files:
    
    
    with open(file, 'r') as f:
        json_data = json.load(f)
    json_data = json.loads(json_data)
    
    if 'data' not in json_data or json_data['data']['metadata']['modeName'] != 'Competitive':
        continue
    
    player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
    for kill in player_rounds_kills:        
        kill_records.append({'weaponName':kill['metadata']['weaponName'],
                            'round':kill['attributes']['round'],
                            'roundTime':kill['metadata']['roundTime']})
kill_df = pd.DataFrame.from_dict(kill_records)
kill_df.head()

12420


Unnamed: 0,weaponName,round,roundTime
0,Classic,1,38365
1,Classic,1,49471
2,Ghost,1,31862
3,Ghost,1,48859
4,Ghost,1,72900


In [17]:
kill_df['weaponName'].value_counts().index.tolist()

['Vandal',
 'Phantom',
 'Spectre',
 'Ghost',
 'Classic',
 'Operator',
 'Sheriff',
 'Marshal',
 'Guardian',
 'Odin',
 'Frenzy',
 'Bulldog',
 'Judge',
 'Stinger',
 'Ares',
 'Shorty',
 'Bucky']

In [18]:
kill_df[kill_df['round'] == 1]['weaponName'].value_counts()

Ghost      38821
Classic    27039
Sheriff     7732
Frenzy      4752
Shorty       931
Name: weaponName, dtype: int64

## round_summary

In [19]:
# round_summary[19]

## player_rounds

In [20]:
player_rounds[17]

{'type': 'player-round',
 'attributes': {'round': 2,
  'platformSlug': 'riot',
  'platformUserIdentifier': 'Corgi Tree#NA1'},
 'metadata': {'teamId': 'Red',
  'agentKey': '707eab51-4836-f488-046a-cda6bf494859',
  'platformInfo': {'platformSlug': 'riot',
   'platformUserId': None,
   'platformUserHandle': 'Corgi Tree#NA1',
   'platformUserIdentifier': 'Corgi Tree#NA1',
   'avatarUrl': None,
   'additionalParameters': None},
  'hasWon': False},
 'expiryDate': '0001-01-01T00:00:00+00:00',
 'stats': {'score': {'rank': None,
   'percentile': None,
   'displayName': 'Score',
   'displayCategory': None,
   'category': None,
   'metadata': {},
   'value': 300,
   'displayValue': '300',
   'displayType': 'Number'},
  'kills': {'rank': None,
   'percentile': None,
   'displayName': 'Kills',
   'displayCategory': None,
   'category': None,
   'metadata': {},
   'value': 1,
   'displayValue': '1',
   'displayType': 'Number'},
  'deaths': {'rank': None,
   'percentile': None,
   'displayName': 'Dea

## round_summary

In [21]:
len(round_summary)

24

In [22]:
# round_summary[22]


In [23]:
def get_winner(round_summary):
    return round_summary[-1]['stats']['winningTeam']['value']

get_winner(round_summary)

'Red'

## player_summary

In [24]:
len(player_summary)

10

In [25]:
player_summary[0].keys()

dict_keys(['type', 'attributes', 'metadata', 'expiryDate', 'stats'])

In [26]:
player_summary[0]['attributes']

{'platformSlug': 'riot', 'platformUserIdentifier': 'Hennessyy#AKA47'}

In [27]:
player_summary[0]['metadata']

{'partyId': '4155c5e2-bf8d-4736-acb1-8ec95e2576b1',
 'teamId': 'Blue',
 'agentKey': 'add6443a-41bd-e414-f6ad-e58d267f4e95',
 'agentName': 'Jett',
 'agentColor': '#98DCFF',
 'agentImageUrl': 'https://titles.trackercdn.com/valorant-api/agents/add6443a-41bd-e414-f6ad-e58d267f4e95/displayicon.png',
 'agentPortraitUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/db/agents/jett_portrait.png',
 'countryCode': None,
 'platformInfo': {'platformSlug': 'riot',
  'platformUserId': None,
  'platformUserHandle': 'Hennessyy#AKA47',
  'platformUserIdentifier': 'Hennessyy#AKA47',
  'avatarUrl': None,
  'additionalParameters': None}}

In [28]:
player_summary[0]['stats'].keys()

dict_keys(['rank', 'currRank', 'score', 'scorePerRound', 'killsPerRound', 'kills', 'deaths', 'assists', 'kdRatio', 'damage', 'damagePerRound', 'singleKills', 'doubleKills', 'tripleKills', 'quadraKills', 'pentaKills', 'multiKills', 'grenadeCasts', 'ability1Casts', 'ability2Casts', 'ultimateCasts', 'grenadeCastsPerRound', 'ability1CastsPerRound', 'ability2CastsPerRound', 'ultimateCastsPerRound', 'plants', 'defuses', 'firstKills', 'firstDeaths', 'esr', 'firstKillsPerRound', 'firstDeathsPerRound', 'econRating', 'hsAccuracy', 'kast'])

In [29]:
player_summary[0]['stats']['rank']

{'rank': None,
 'percentile': None,
 'displayName': 'Rating',
 'displayCategory': None,
 'category': 'mmr',
 'metadata': {'iconUrl': 'https://trackercdn.com/cdn/tracker.gg/valorant/icons/tiersv2/0.png'},
 'value': 'Unranked',
 'displayValue': 'Unranked',
 'displayType': 'String'}

In [30]:
player_summary[0]['stats']['score']

{'rank': None,
 'percentile': None,
 'displayName': 'Score',
 'displayCategory': None,
 'category': None,
 'metadata': {},
 'value': 7621,
 'displayValue': '7,621',
 'displayType': 'Number'}

In [31]:
player_summary[0]['stats']['scorePerRound']

{'rank': None,
 'percentile': None,
 'displayName': 'Score per Round',
 'displayCategory': None,
 'category': None,
 'metadata': {},
 'value': 317.5416666666667,
 'displayValue': '318',
 'displayType': 'Number'}

## Win prediction data set creation

In [32]:
all_agents_list = ['Astra',
         'Breach',
         'Brimstone',
         'Chamber',
         'Cypher',
         'Fade',
         'Jett',
         'KAY/O',
         'Killjoy',
         'Neon',
         'Omen',
         'Phoenix',
         'Raze',
         'Reyna',
         'Sage',
         'Skye',
         'Sova',
         'Viper',
         'Yoru']

agent_roles = {'Astra':'Controller',
              'Breach':'Initiator',
         'Brimstone':'Controller',
         'Chamber':'Sentinel',
         'Cypher':'Sentinel',
         'Fade':'Initiator',
         'Jett':'Duelist',
         'KAY/O':'Initiator',
         'Killjoy':'Sentinel',
         'Neon':'Duelist',
         'Omen':'Controller',
         'Phoenix':'Duelist',
         'Raze':'Duelist',
         'Reyna':'Duelist',
         'Sage':'Sentinel',
         'Skye':'Initiator',
         'Sova':'Initiator',
         'Viper':'Controller',
         'Yoru':'Duelist'}
roles_list = ['Controller', 'Initiator', 'Duelist', 'Sentinel']
all_maps_list = ['Ascent', 
                 'Bind', 
                 'Breeze', 
                 'Fracture', 
                 'Haven', 
                 'Icebox', 
                 'Pearl', 
                 'Split']
weapons = ['Vandal',
             'Phantom',
             'Spectre',
             'Ghost',
             'Classic',
             'Operator',
             'Sheriff',
             'Guardian',
             'Marshal',
             'Odin',
             'Judge',
             'Bulldog',
             'Stinger',
             'Frenzy',
             'Ares',
             'Shorty',
             'Bucky']
ranks = [
    'Unranked',
    'Iron 1',
    'Iron 2',
    'Iron 3',
    'Bronze 1',
    'Bronze 2',
    'Bronze 3',
    'Silver 1',
    'Silver 2',
    'Silver 3',
    'Gold 1',
    'Gold 2',
    'Gold 3',
    'Platinum 1',
    'Platinum 2',
    'Platinum 3',
    'Diamond 1',
    'Diamond 2',
    'Diamond 3',
    'Ascendant 1',
    'Ascendant 2',
    'Ascendant 3',
    'Immortal 1',
    'Immortal 2',
    'Immortal 3',
    'Radiant',
        ]

In [33]:
invalid_keys = [
    'currRank',
]

def extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills):
    winning_team = get_winner(round_summary)
    game_datetime = game_metadata['dateStarted']
    map_pick = game_metadata['mapName']
    
    data = list()
    
    for i in player_summary:
        new_record = dict()
        new_record['name'] = i['attributes']['platformUserIdentifier']
        
        agent = i['metadata']['agentName']
        agent_role = agent_roles[agent]
        
        new_record['agent_num'] = all_agents_list.index(agent)
        
        new_record['game_datetime'] = game_datetime
        new_record['won_game'] = int(winning_team == i['metadata']['teamId'])
        new_record['team'] = i['metadata']['teamId']
        new_record['map_pick'] = map_pick
        
        for j in i['stats'].keys():
            if j in invalid_keys:
                continue
            new_record[j] = i['stats'][j]['value']
        
        for j in roles_list:
            if j == agent_role:
                new_record[f'role_{j}'] = 1
            else:
                new_record[f'role_{j}'] = 0
        
        for j in all_agents_list:
            if j == agent:
                new_record[f'agent_{j}'] = 1
            else:
                new_record[f'agent_{j}'] = 0
        new_record['rank_num'] = ranks.index(i['stats']['rank']['value'])
        
        
        weapon_kills_dict = {j: 0 for j in weapons}
        for j in player_rounds_kills:
            if 'platformInfo' in j and i['attributes']['platformUserIdentifier'] != j['platformInfo']['platformUserHandle']:
                if j['metadata']['weaponName'] in weapons:
                    weapon_kills_dict[j['metadata']['weaponName']] += 1
            
        data.append(new_record)
            
    return data
        
        
# processed_match_records = extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills)
# processed_match_records[8]

In [34]:
# player_summary[0]

In [35]:


def create_agent_row(agent_list, map_pick, game_win, rank_list):
    
    sorted_agent_list = sorted(agent_list)
    
    for i in agent_list:
        if i not in all_agents_list:
            raise Exception(f'Invalid agent: {i}')
            
    if map_pick not in all_maps_list:
        raise Exception(f'Invalid map: {map_pick}')
    
    row = dict()
    
    agent_dict_encoded = {f'agent_num_{i}': 0 for i in range(5)}
    for n, i in enumerate(sorted_agent_list):
        agent_dict_encoded[f'agent_num_{n}'] = all_agents_list.index(i)
        
    row.update(agent_dict_encoded) 
    
    roles_dict = {f'role_{i}': 0 for i in roles_list}
    
    for i in all_agents_list:
        row['agent_' + i] = int(i in agent_list)
    
    for i in agent_list:
        roles_dict[f'role_{agent_roles[i]}'] += 1    
    
    row['map_pick'] = all_maps_list.index(map_pick)
    for i in all_maps_list:
        row['map_' + i] = int(i == map_pick)
    
    rank_avg = sum([ranks.index(i) for i in rank_list])/max(len(rank_list), 1)
    row['rank_avg'] = rank_avg
    
    
    for i in ranks:
        if i in rank_list:
            row[f'rank_{i}'.replace(' ', '_')] = 1
        else:
            row[f'rank_{i}'.replace(' ', '_')] = 0
    
    
    row.update(roles_dict)
        
    row['game_win'] = game_win
    return row


def extract_team_rows(game_metadata, game_attributes, round_summary, player_summary):
    
    team_red_agents = list()
    team_blue_agents = list()
    
    map_pick = game_metadata['mapName']
        
    winning_team = get_winner(round_summary)
    
    rank_list_team_red = []
    rank_list_team_blue = []
    
    for i in player_summary:
        if i['metadata']['teamId'] == 'Red':
            team_red_agents.append(i['metadata']['agentName'])
            rank_list_team_red.append(i['stats']['rank']['value'])
        if i['metadata']['teamId'] == 'Blue':
            team_blue_agents.append(i['metadata']['agentName'])
            rank_list_team_blue.append(i['stats']['rank']['value'])

    row1 = create_agent_row(team_red_agents, map_pick, int('Red' == winning_team), rank_list_team_red)
    row2 = create_agent_row(team_blue_agents, map_pick, int('Blue' == winning_team), rank_list_team_blue)
    
    return [row1, row2]
    
    




In [36]:
def get_all_processed_data(): 
    files = glob.glob(r'C:\Users\trist\OneDrive\Documents\game_data\valorant_raw/*.json')
    
    all_records = list()
    agent_records = list()
    
    for file in files:
        
        with open(file, 'r') as f:
            json_data = json.load(f)
        json_data = json.loads(json_data)
        if 'data' not in json_data:
            print(f'error {file}')
            continue
            
        if json_data['data']['metadata']['modeName'] != 'Competitive':
            continue
        
        game_metadata = json_data['data']['metadata']
        game_attributes = json_data['data']['attributes']

        player_rounds = [i for i in json_data['data']['segments'] if i['type'] == 'player-round']
        player_rounds_damage = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-damage']
        player_rounds_kills = [i for i in json_data['data']['segments'] if i['type'] == 'player-round-kills']
        player_summary = [i for i in json_data['data']['segments'] if i['type'] == 'player-summary']
        round_summary = [i for i in json_data['data']['segments'] if i['type'] == 'round-summary']
        team_summary = [i for i in json_data['data']['segments'] if i['type'] == 'team-summary']
        
        all_records.extend(extract_player_rows(game_metadata, round_summary, player_summary, player_rounds_kills))
        agent_records.extend(extract_team_rows(game_metadata, game_attributes, round_summary, player_summary))
    return all_records, agent_records
        
all_records, all_agent_records = get_all_processed_data()
len(all_records), len(all_agent_records)


error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\070bf263-f254-47ed-89f1-be30d806ebde.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\3a65f8a2-79bd-48a6-afdb-b896405299aa.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\42715bd6-4e77-4ce7-b143-56df88173190.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\428a3e5d-e9dc-4251-a919-98a2c918291a.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\5939c85d-13f0-4061-987e-667b9b910446.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\67ed3e1e-8402-45bc-a15c-608e3fdef99a.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\72618013-c631-4269-8f60-dfb20f12c2d0.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\8f1e8916-6b3d-4687-aabb-b797e49518ba.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\925c046a-0a4a-496d-b303-9fcc0448b50c.json
error C:\Users\trist\OneDrive\Documents\game_data\valor

(117830, 23566)

In [37]:
ranks.index('Unranked')

0

In [38]:
ranks

['Unranked',
 'Iron 1',
 'Iron 2',
 'Iron 3',
 'Bronze 1',
 'Bronze 2',
 'Bronze 3',
 'Silver 1',
 'Silver 2',
 'Silver 3',
 'Gold 1',
 'Gold 2',
 'Gold 3',
 'Platinum 1',
 'Platinum 2',
 'Platinum 3',
 'Diamond 1',
 'Diamond 2',
 'Diamond 3',
 'Ascendant 1',
 'Ascendant 2',
 'Ascendant 3',
 'Immortal 1',
 'Immortal 2',
 'Immortal 3',
 'Radiant']

In [39]:
user_record_count = dict()

for i in all_records:
    user_record_count.setdefault(i['name'], 0)
    user_record_count[i['name']] += 1

user_record_count_sorted = list()

for k, v in user_record_count.items():
    user_record_count_sorted.append({'user':k, 'count':v})

sorted(user_record_count_sorted, key = lambda x: x['count'], reverse = True)[:20]


[{'user': 'HLee312#3476', 'count': 62},
 {'user': 'Mathematics#6622', 'count': 59},
 {'user': 'Cowpico#007', 'count': 57},
 {'user': 'Kouf#514', 'count': 51},
 {'user': 'strawberry milk#우유갠디', 'count': 49},
 {'user': 'Panda#6784', 'count': 47},
 {'user': 'asianwater#999', 'count': 47},
 {'user': 'Schleepers#Burns', 'count': 47},
 {'user': 'Arlo#103', 'count': 46},
 {'user': 'Lzaps#5447', 'count': 44},
 {'user': 'YoGirlBsf#1738', 'count': 44},
 {'user': 'XFNYTRO#8384', 'count': 42},
 {'user': 'PandaFather#4914', 'count': 42},
 {'user': 'Niviuos#niv', 'count': 41},
 {'user': 'cavern#001', 'count': 41},
 {'user': 'theramsaregood#00000', 'count': 41},
 {'user': 'GameKnightAndy#водка', 'count': 41},
 {'user': 'DraYstaR#zapr', 'count': 41},
 {'user': 'Michael Simp#2004', 'count': 41},
 {'user': 'Prvided#NA1', 'count': 41}]

In [40]:
all_records = [i for i in all_records if user_record_count[i['name']] >= 10]
len(all_records)

20775

In [41]:
all_records_df = pd.DataFrame.from_dict(all_records)
all_records_df

Unnamed: 0,name,agent_num,game_datetime,won_game,team,map_pick,rank,score,scorePerRound,killsPerRound,kills,deaths,assists,kdRatio,damage,damagePerRound,singleKills,doubleKills,tripleKills,quadraKills,pentaKills,multiKills,grenadeCasts,ability1Casts,ability2Casts,ultimateCasts,grenadeCastsPerRound,ability1CastsPerRound,ability2CastsPerRound,ultimateCastsPerRound,plants,defuses,firstKills,firstDeaths,esr,firstKillsPerRound,firstDeathsPerRound,econRating,hsAccuracy,kast,role_Controller,role_Initiator,role_Duelist,role_Sentinel,agent_Astra,agent_Breach,agent_Brimstone,agent_Chamber,agent_Cypher,agent_Fade,agent_Jett,agent_KAY/O,agent_Killjoy,agent_Neon,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,rank_num
0,Shinobu#2805,3,2022-08-11T21:59:20.199+00:00,0,Blue,Fracture,Platinum 2,8310,319.615385,1.115385,29,19,1,1.526316,4970,191.153846,13,5,2,0,0,2,24.0,6.0,12.0,4.0,0.923077,0.230769,0.461538,0.153846,0,0,8,1,88.888889,0.307692,0.038462,71,33.928571,76.923077,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14
1,Niviuos#niv,8,2022-08-10T01:59:23.595+00:00,0,Red,Breeze,Bronze 3,2619,174.600000,0.600000,9,13,1,0.692308,1680,112.000000,3,3,0,0,0,0,10.0,8.0,6.0,0.0,0.666667,0.533333,0.400000,0.000000,2,2,1,1,50.000000,0.066667,0.066667,48,15.094340,46.666667,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,6
2,Loxx#6998,3,2022-08-08T01:52:43.244+00:00,0,Blue,Breeze,Silver 3,4270,224.736842,0.684211,13,16,2,0.812500,2558,134.631579,2,4,1,0,0,1,15.0,0.0,1.0,2.0,0.789474,0.000000,0.052632,0.105263,0,0,3,1,75.000000,0.157895,0.052632,56,26.086957,42.105263,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9
3,hardstuckiron#3570,8,2022-08-07T22:08:58.634+00:00,1,Blue,Icebox,Bronze 1,6042,402.800000,1.466667,22,8,3,2.750000,4248,283.200000,4,4,2,1,0,3,26.0,11.0,1.0,2.0,1.733333,0.733333,0.066667,0.133333,1,1,6,1,85.714286,0.400000,0.066667,111,21.333333,86.666667,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4
4,daps#0011,3,2022-08-09T02:05:42.498+00:00,0,Blue,Icebox,Diamond 1,6879,286.625000,1.083333,26,16,2,1.625000,4558,189.916667,8,3,4,0,0,4,17.0,3.0,17.0,4.0,0.708333,0.125000,0.708333,0.166667,1,1,2,3,40.000000,0.083333,0.125000,74,23.611111,75.000000,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20770,itsililya#7653,15,2022-08-01T01:45:02.107+00:00,0,Red,Ascent,Bronze 1,5946,228.692308,0.769231,20,19,10,1.052632,3746,144.076923,8,5,1,0,0,1,10.0,5.0,44.0,3.0,0.384615,0.192308,1.692308,0.115385,1,1,5,2,71.428571,0.192308,0.076923,49,10.752688,65.384615,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,4
20771,pancakes#lover,6,2022-08-06T03:29:57.164+00:00,0,Blue,Breeze,Bronze 3,2722,151.222222,0.388889,7,18,4,0.388889,1860,103.333333,7,0,0,0,0,0,12.0,8.0,16.0,2.0,0.666667,0.444444,0.888889,0.111111,0,0,6,3,66.666667,0.333333,0.166667,37,28.571429,50.000000,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,6
20772,waffles#lover,13,2022-08-06T03:29:57.164+00:00,0,Blue,Breeze,Silver 3,5280,293.333333,1.055556,19,12,5,1.583333,3661,203.388889,6,1,4,0,0,4,5.0,13.0,4.0,2.0,0.277778,0.722222,0.222222,0.111111,1,1,2,2,50.000000,0.111111,0.111111,98,19.718310,72.222222,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,9
20773,Bender#9978,16,2022-08-07T03:32:03.139+00:00,0,Red,Icebox,Bronze 3,4009,200.450000,0.700000,14,17,4,0.823529,2512,125.600000,6,4,0,0,0,0,3.0,5.0,24.0,3.0,0.150000,0.250000,1.200000,0.150000,1,1,0,0,0.000000,0.000000,0.000000,54,7.142857,70.000000,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,6


In [42]:
all_records_df['name'].nunique()

1061

In [43]:
all_records_df.shape

(20775, 64)

In [44]:
all_records_df['game_datetime'].min()

'2022-03-14T23:27:21.753+00:00'

In [45]:
all_records_df['map_pick'].value_counts(normalize=True)

Bind        0.149603
Ascent      0.145463
Breeze      0.142286
Icebox      0.142046
Haven       0.140842
Fracture    0.140313
Pearl       0.136799
Split       0.002647
Name: map_pick, dtype: float64

In [46]:
all_records_df['killsPerRound'].describe()

count    20775.000000
mean         0.733434
std          0.286796
min          0.000000
25%          0.538462
50%          0.714286
75%          0.904762
max          2.600000
Name: killsPerRound, dtype: float64

In [47]:
all_records_df['rank'].value_counts(normalize=True)

Radiant        0.115572
Silver 1       0.089049
Bronze 3       0.082262
Immortal 3     0.078748
Silver 2       0.078652
Bronze 2       0.076294
Silver 3       0.069555
Bronze 1       0.058580
Gold 1         0.055548
Gold 2         0.042262
Gold 3         0.038219
Platinum 1     0.036245
Platinum 2     0.033887
Iron 3         0.032010
Platinum 3     0.028833
Diamond 1      0.021276
Unranked       0.017617
Iron 2         0.013381
Diamond 2      0.011264
Immortal 2     0.006113
Diamond 3      0.005295
Ascendant 1    0.002792
Immortal 1     0.002455
Iron 1         0.001588
Ascendant 3    0.001444
Ascendant 2    0.001059
Name: rank, dtype: float64

In [48]:
all_records_df.drop_duplicates('name')['rank'].value_counts(normalize=True)

Radiant        0.124411
Immortal 3     0.094251
Silver 1       0.090481
Bronze 3       0.085768
Bronze 2       0.071631
Silver 2       0.067861
Silver 3       0.066918
Bronze 1       0.064090
Gold 1         0.057493
Gold 2         0.043355
Gold 3         0.036758
Platinum 1     0.032988
Iron 3         0.031103
Platinum 2     0.030160
Platinum 3     0.026390
Diamond 1      0.020735
Unranked       0.019793
Iron 2         0.011310
Diamond 2      0.011310
Immortal 2     0.004713
Diamond 3      0.003770
Ascendant 2    0.002828
Immortal 1     0.000943
Iron 1         0.000943
Name: rank, dtype: float64

In [49]:
# set(all_records_df[all_records_df['rank'].str.contains('Ascendant')]['name'].value_counts().index.tolist() +  all_records_df[all_records_df['rank'].str.contains('Diamond')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Platinum 3')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Immortal 1')]['name'].value_counts().index.tolist() + all_records_df[all_records_df['rank'].str.contains('Platinum 2')]['name'].value_counts().index.tolist())

In [50]:

def get_player_features(player_df):
    player_df = player_df.sort_values(by = ['game_datetime'])

    data = list()


    row_counter = 0
    for n, (idx, row) in enumerate(player_df.iterrows()):
        
        if n < 5:
            continue

        past_game = player_df.iloc[n-1:n].mean(numeric_only=True).to_dict()
        past_5_games = player_df.iloc[n-5:n].mean(numeric_only=True).to_dict()

        new_x = dict()

        for k, v in past_game.items():
            new_x[f'past_game_{k}'] = v

        for k, v in past_5_games.items():
            new_x[f'past_5_games_avg_{k}'] = v

        new_x['future_agent_pick'] = row['agent_num']
        new_x['future_won_game'] =  row['won_game']
        data.append(new_x)


    data_df = pd.DataFrame.from_dict(data)
    data_df = data_df.fillna(0)
    return data_df



In [51]:

training_players, val_players = train_test_split(list(set(all_records_df['name'].tolist())))

training_data_dfs = list()
val_data_dfs = list()
all_data_dfs= list()

for i in training_players:
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    training_data_dfs.append(player_data_df)

    
for i in val_players:
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    val_data_dfs.append(player_data_df)
    
for i in list(set(all_records_df['name'].tolist())):
    
    player_df = all_records_df[all_records_df['name'] == i]
    player_data_df = get_player_features(player_df)
    all_data_dfs.append(player_data_df)


    
training_data_df = pd.concat(training_data_dfs)
val_data_df = pd.concat(val_data_dfs)
all_data_df = pd.concat(all_data_dfs)

len(training_players), len(val_players)

(795, 266)

In [52]:
training_data_df.head()

Unnamed: 0,past_game_agent_num,past_game_won_game,past_game_score,past_game_scorePerRound,past_game_killsPerRound,past_game_kills,past_game_deaths,past_game_assists,past_game_kdRatio,past_game_damage,past_game_damagePerRound,past_game_singleKills,past_game_doubleKills,past_game_tripleKills,past_game_quadraKills,past_game_pentaKills,past_game_multiKills,past_game_grenadeCasts,past_game_ability1Casts,past_game_ability2Casts,past_game_ultimateCasts,past_game_grenadeCastsPerRound,past_game_ability1CastsPerRound,past_game_ability2CastsPerRound,past_game_ultimateCastsPerRound,past_game_plants,past_game_defuses,past_game_firstKills,past_game_firstDeaths,past_game_esr,past_game_firstKillsPerRound,past_game_firstDeathsPerRound,past_game_econRating,past_game_hsAccuracy,past_game_kast,past_game_role_Controller,past_game_role_Initiator,past_game_role_Duelist,past_game_role_Sentinel,past_game_agent_Astra,past_game_agent_Breach,past_game_agent_Brimstone,past_game_agent_Chamber,past_game_agent_Cypher,past_game_agent_Fade,past_game_agent_Jett,past_game_agent_KAY/O,past_game_agent_Killjoy,past_game_agent_Neon,past_game_agent_Omen,past_game_agent_Phoenix,past_game_agent_Raze,past_game_agent_Reyna,past_game_agent_Sage,past_game_agent_Skye,past_game_agent_Sova,past_game_agent_Viper,past_game_agent_Yoru,past_game_rank_num,past_5_games_avg_agent_num,past_5_games_avg_won_game,past_5_games_avg_score,past_5_games_avg_scorePerRound,past_5_games_avg_killsPerRound,past_5_games_avg_kills,past_5_games_avg_deaths,past_5_games_avg_assists,past_5_games_avg_kdRatio,past_5_games_avg_damage,past_5_games_avg_damagePerRound,past_5_games_avg_singleKills,past_5_games_avg_doubleKills,past_5_games_avg_tripleKills,past_5_games_avg_quadraKills,past_5_games_avg_pentaKills,past_5_games_avg_multiKills,past_5_games_avg_grenadeCasts,past_5_games_avg_ability1Casts,past_5_games_avg_ability2Casts,past_5_games_avg_ultimateCasts,past_5_games_avg_grenadeCastsPerRound,past_5_games_avg_ability1CastsPerRound,past_5_games_avg_ability2CastsPerRound,past_5_games_avg_ultimateCastsPerRound,past_5_games_avg_plants,past_5_games_avg_defuses,past_5_games_avg_firstKills,past_5_games_avg_firstDeaths,past_5_games_avg_esr,past_5_games_avg_firstKillsPerRound,past_5_games_avg_firstDeathsPerRound,past_5_games_avg_econRating,past_5_games_avg_hsAccuracy,past_5_games_avg_kast,past_5_games_avg_role_Controller,past_5_games_avg_role_Initiator,past_5_games_avg_role_Duelist,past_5_games_avg_role_Sentinel,past_5_games_avg_agent_Astra,past_5_games_avg_agent_Breach,past_5_games_avg_agent_Brimstone,past_5_games_avg_agent_Chamber,past_5_games_avg_agent_Cypher,past_5_games_avg_agent_Fade,past_5_games_avg_agent_Jett,past_5_games_avg_agent_KAY/O,past_5_games_avg_agent_Killjoy,past_5_games_avg_agent_Neon,past_5_games_avg_agent_Omen,past_5_games_avg_agent_Phoenix,past_5_games_avg_agent_Raze,past_5_games_avg_agent_Reyna,past_5_games_avg_agent_Sage,past_5_games_avg_agent_Skye,past_5_games_avg_agent_Sova,past_5_games_avg_agent_Viper,past_5_games_avg_agent_Yoru,past_5_games_avg_rank_num,future_agent_pick,future_won_game
0,7.0,1.0,4483.0,249.055556,0.833333,15.0,13.0,10.0,1.153846,2601.0,144.5,5.0,5.0,0.0,0.0,0.0,0.0,6.0,7.0,17.0,2.0,0.333333,0.388889,0.944444,0.111111,1.0,1.0,0.0,3.0,0.0,0.0,0.166667,59.0,20.0,77.777778,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,9.0,0.8,4219.4,214.782394,0.753689,14.8,12.6,6.4,1.188205,2661.0,134.671588,6.4,2.6,0.8,0.2,0.0,1.0,9.6,7.2,22.0,1.8,0.455145,0.375028,1.125673,0.089997,3.4,3.4,1.4,2.4,34.666667,0.07019,0.124158,56.6,15.803854,71.900403,0.4,0.4,0.0,0.2,0.0,0.0,0.2,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,9.4,2,0
1,2.0,0.0,3558.0,169.428571,0.619048,13.0,16.0,8.0,0.8125,2308.0,109.904762,8.0,1.0,1.0,0.0,0.0,1.0,9.0,2.0,36.0,3.0,0.428571,0.095238,1.714286,0.142857,3.0,3.0,0.0,1.0,0.0,0.0,0.047619,43.0,11.764706,61.904762,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,8.8,0.8,3900.0,203.842022,0.712281,13.6,12.8,7.6,1.097372,2437.4,126.861236,5.8,2.6,0.6,0.2,0.0,0.8,6.6,7.4,26.2,1.8,0.332164,0.38538,1.338095,0.092481,3.0,3.0,1.0,2.2,24.666667,0.052799,0.116291,51.0,14.156795,69.498747,0.6,0.4,0.0,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,9.6,9,0
2,9.0,0.0,2529.0,140.5,0.444444,8.0,14.0,3.0,0.571429,1686.0,93.666667,6.0,1.0,0.0,0.0,0.0,0.0,8.0,18.0,12.0,3.0,0.444444,1.0,0.666667,0.166667,0.0,0.0,1.0,3.0,25.0,0.055556,0.166667,51.0,9.302326,55.555556,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,10.2,0.6,3544.0,186.584127,0.622222,11.8,13.6,7.4,0.871658,2251.6,118.068254,5.2,2.0,0.6,0.2,0.0,0.8,7.8,9.8,21.0,2.0,0.4,0.522222,1.071429,0.104762,2.2,2.2,0.8,2.4,19.666667,0.042857,0.128571,50.4,12.508488,65.873016,0.4,0.4,0.2,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.2,0.0,9.8,2,0
3,2.0,0.0,3535.0,186.052632,0.631579,12.0,14.0,7.0,0.857143,2198.0,115.684211,6.0,3.0,0.0,0.0,0.0,0.0,6.0,5.0,32.0,2.0,0.315789,0.263158,1.684211,0.105263,2.0,2.0,2.0,2.0,50.0,0.105263,0.105263,52.0,24.444444,68.421053,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,7.4,0.4,3664.2,195.851796,0.672348,12.6,14.0,6.8,0.909753,2268.2,121.062239,5.8,2.4,0.4,0.2,0.0,0.6,6.6,8.4,22.6,2.2,0.348872,0.460568,1.179699,0.116291,1.6,1.6,1.0,2.4,23.0,0.054386,0.130576,52.2,15.397377,67.176274,0.6,0.2,0.2,0.0,0.0,0.0,0.4,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,10.0,5,1
4,5.0,1.0,2996.0,142.666667,0.571429,12.0,14.0,7.0,0.857143,1946.0,92.666667,6.0,3.0,0.0,0.0,0.0,0.0,18.0,4.0,23.0,3.0,0.857143,0.190476,1.095238,0.142857,2.0,2.0,1.0,2.0,33.333333,0.047619,0.095238,41.0,10.416667,76.190476,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,5.0,0.4,3420.2,177.540685,0.619967,12.0,14.2,7.0,0.850412,2147.8,111.284461,6.2,2.6,0.2,0.0,0.0,0.2,9.4,7.2,24.0,2.6,0.475856,0.387552,1.220969,0.133751,1.6,1.6,0.8,2.2,21.666667,0.041688,0.116291,49.2,15.185629,67.969925,0.4,0.4,0.2,0.0,0.0,0.0,0.4,0.0,0.0,0.2,0.0,0.2,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,2,1


In [53]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# training_data_df.corr().sort_values(by=['future_won_game'])[['future_won_game']]

In [54]:
# training_data_df.isna().sum()

In [55]:
rf = RandomForestClassifier()

x = training_data_df.drop('future_won_game', axis = 1)
y = training_data_df['future_won_game']

x_val = val_data_df.drop('future_won_game', axis = 1)
y_val = val_data_df['future_won_game']

rf.fit(x, y)

preds = rf.predict(x_val)

accuracy_score(preds, y_val)

0.5111053044159917

In [56]:
pd.set_option('max_colwidth', 400)



fi_list = list()
for i, j in zip(x.columns, rf.feature_importances_):
    fi_list.append({'column':i, 'importance':j})
    
pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'], ascending = [False]).head(100)

Unnamed: 0,column,importance
93,past_5_games_avg_kast,0.019024
88,past_5_games_avg_esr,0.018345
33,past_game_hsAccuracy,0.018331
92,past_5_games_avg_hsAccuracy,0.017863
83,past_5_games_avg_ultimateCastsPerRound,0.017795
81,past_5_games_avg_ability1CastsPerRound,0.017671
80,past_5_games_avg_grenadeCastsPerRound,0.017534
82,past_5_games_avg_ability2CastsPerRound,0.017361
89,past_5_games_avg_firstKillsPerRound,0.017314
90,past_5_games_avg_firstDeathsPerRound,0.017166


In [57]:


fi_list = list()

for i in training_data_df.columns:
    
    if i == 'future_won_game':
        continue
        
    rel = linregress(training_data_df['future_won_game'], training_data_df[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False]).head(25)

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
69,past_5_games_avg_damagePerRound,2.444967,138.005794,0.040896,0.001672,1e-05,0.553647
62,past_5_games_avg_scorePerRound,3.713903,210.826163,0.03917,0.001534,2.4e-05,0.87812
68,past_5_games_avg_damage,52.530472,2856.019886,0.038797,0.001505,2.8e-05,12.539696
61,past_5_games_avg_score,80.411422,4358.325273,0.037836,0.001432,4.4e-05,19.683436
10,past_game_damagePerRound,3.517996,137.381554,0.037024,0.001371,6.4e-05,0.880084
3,past_game_scorePerRound,5.595746,209.724125,0.03699,0.001368,6.5e-05,1.401133
63,past_5_games_avg_killsPerRound,0.012935,0.726327,0.036811,0.001355,7.1e-05,0.003255
64,past_5_games_avg_kills,0.280875,14.996536,0.036448,0.001328,8.4e-05,0.071376
8,past_game_kdRatio,0.051229,1.07974,0.036083,0.001302,9.8e-05,0.01315
67,past_5_games_avg_kdRatio,0.02664,1.091922,0.035088,0.001231,0.000153,0.007033


In [58]:

def pd_div(c1,  c2):
    return c1 / max(0.0001, c2)

def create_interactions(df: pd.DataFrame) -> pd.DataFrame:
    df_interaction = pd.DataFrame(index = df.index)

    df_interaction['future_won_game'] = df['future_won_game']

    columns_list = sorted(df.columns.tolist())

    for i in columns_list:
        df_interaction[i] = df[i]
        for j in columns_list:
            if  i == 'future_won_game' or j == 'future_won_game':
                continue
            if columns_list.index(i) >=columns_list.index(j):
                continue
            
            df_interaction[f'{i}_mul_{j}'] = df[i]*df[j]
            df_interaction[f'{i}_max_{j}'] = df[[i,j]].max(axis = 1)
            df_interaction[f'{i}_div_{j}'] = df.apply(lambda x: pd_div(x[i], x[j]), axis=1)

    return df_interaction

training_data_df_int = create_interactions(training_data_df)
val_data_df_int = create_interactions(val_data_df)


In [59]:
rf = RandomForestClassifier()

x = training_data_df_int.drop('future_won_game', axis = 1)
y = training_data_df_int['future_won_game']

x_val = val_data_df_int.drop('future_won_game', axis = 1)
y_val = val_data_df_int['future_won_game']

rf.fit(x, y)

preds = rf.predict(x_val)

accuracy_score(preds, y_val)

0.5003919519205644

In [60]:
pd.set_option('max_colwidth', 400)



fi_list = list()
for i, j in zip(x.columns, rf.feature_importances_):
    fi_list.append({'column':i, 'importance':j})
    
pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'], ascending = [False]).head(100)

Unnamed: 0,column,importance
20571,past_game_hsAccuracy_mul_past_game_score,0.000305
8303,past_5_games_avg_damage_div_past_5_games_avg_kills,0.00029
19555,past_game_damage_div_past_game_tripleKills,0.000285
8897,past_5_games_avg_deaths_mul_past_5_games_avg_won_game,0.000285
16297,past_game_ability1CastsPerRound_div_past_game_plants,0.000269
9378,past_5_games_avg_doubleKills_div_past_5_games_avg_kdRatio,0.000267
1227,past_5_games_avg_ability2Casts_div_past_game_ability1Casts,0.000265
9417,past_5_games_avg_doubleKills_div_past_5_games_avg_scorePerRound,0.000261
10121,past_5_games_avg_esr_div_past_game_tripleKills,0.000258
183,future_agent_pick_div_past_game_ability1CastsPerRound,0.000254


In [61]:



fi_list = list()

for i in training_data_df_int.columns:
    
    if i == 'future_won_game':
        continue
        
    rel = linregress(training_data_df_int['future_won_game'], training_data_df_int[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False]).head(25)

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
10074,past_5_games_avg_esr_mul_past_game_kdRatio,9.08007,71.975962,0.045217,0.002045,1e-06,1.85928
8546,past_5_games_avg_damagePerRound_max_past_5_games_avg_esr,2.817832,139.174739,0.044242,0.001957,2e-06,0.589734
10080,past_5_games_avg_esr_mul_past_game_killsPerRound,4.04175,47.356766,0.044209,0.001954,2e-06,0.846524
12228,past_5_games_avg_kdRatio_mul_past_game_damagePerRound,9.062897,157.776903,0.04409,0.001944,2e-06,1.903325
12657,past_5_games_avg_killsPerRound_max_past_game_agent_Reyna,0.016427,0.745803,0.044077,0.001943,2e-06,0.003451
10113,past_5_games_avg_esr_mul_past_game_scorePerRound,1119.102417,13695.697979,0.043827,0.001921,2e-06,236.439363
12309,past_5_games_avg_kdRatio_mul_past_game_scorePerRound,14.069047,241.706916,0.0435,0.001892,3e-06,2.994841
10032,past_5_games_avg_esr_mul_past_game_damagePerRound,713.970062,8944.357577,0.043487,0.001891,3e-06,152.023442
10044,past_5_games_avg_esr_mul_past_game_econRating,326.566952,3754.691326,0.043421,0.001885,3e-06,69.641982
9773,past_5_games_avg_econRating_mul_past_game_damagePerRound,396.107941,8277.762955,0.043143,0.001861,3e-06,85.01576


In [62]:
# raise Exception

In [63]:
fi_list = list()

for i in all_data_df.columns:
    if i == 'future_won_game':
        continue
    rel = linregress(all_data_df['future_won_game'], all_data_df[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False])

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
69,past_5_games_avg_damagePerRound,2.508139,138.497132,0.041659,0.001735478,2.179684e-07,0.483668
68,past_5_games_avg_damage,55.278069,2863.333012,0.040651,0.001652471,4.242538e-07,10.924706
10,past_game_damagePerRound,3.795289,137.742005,0.039844,0.001587525,7.148978e-07,0.765284
62,past_5_games_avg_scorePerRound,3.78741,211.625393,0.039638,0.001571178,8.153241e-07,0.767664
61,past_5_games_avg_score,84.113356,4370.475485,0.039372,0.001550189,9.652876e-07,17.164009
64,past_5_games_avg_kills,0.298103,15.044285,0.038491,0.001481574,1.677262e-06,0.062225
63,past_5_games_avg_killsPerRound,0.013408,0.729417,0.037864,0.001433678,2.467868e-06,0.002845
3,past_game_scorePerRound,5.671251,210.490366,0.037356,0.001395451,3.359899e-06,1.219836
8,past_game_kdRatio,0.053374,1.083656,0.037123,0.001378129,3.864495e-06,0.011552
91,past_5_games_avg_econRating,1.052894,58.011182,0.036201,0.001310529,6.676012e-06,0.2337



## Agent pick prediction data set creation

In [64]:
all_records, all_agent_records = get_all_processed_data()


error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\070bf263-f254-47ed-89f1-be30d806ebde.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\3a65f8a2-79bd-48a6-afdb-b896405299aa.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\42715bd6-4e77-4ce7-b143-56df88173190.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\428a3e5d-e9dc-4251-a919-98a2c918291a.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\5939c85d-13f0-4061-987e-667b9b910446.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\67ed3e1e-8402-45bc-a15c-608e3fdef99a.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\72618013-c631-4269-8f60-dfb20f12c2d0.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\8f1e8916-6b3d-4687-aabb-b797e49518ba.json
error C:\Users\trist\OneDrive\Documents\game_data\valorant_raw\925c046a-0a4a-496d-b303-9fcc0448b50c.json
error C:\Users\trist\OneDrive\Documents\game_data\valor

In [65]:
# agents = list()

# for i in all_records:
#     agents.append(i['agent'])
    
# sorted(list(set(agents)))

In [66]:
maps = list()

for i in all_records:
    maps.append(i['map_pick'])
    
sorted(list(set(maps)))

['Ascent', 'Bind', 'Breeze', 'Fracture', 'Haven', 'Icebox', 'Pearl', 'Split']

In [67]:
all_agent_records_df = pd.DataFrame.from_dict(all_agent_records)
all_agent_records_df.head()

Unnamed: 0,agent_num_0,agent_num_1,agent_num_2,agent_num_3,agent_num_4,agent_Astra,agent_Breach,agent_Brimstone,agent_Chamber,agent_Cypher,agent_Fade,agent_Jett,agent_KAY/O,agent_Killjoy,agent_Neon,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,map_pick,map_Ascent,map_Bind,map_Breeze,map_Fracture,map_Haven,map_Icebox,map_Pearl,map_Split,rank_avg,rank_Unranked,rank_Iron_1,rank_Iron_2,rank_Iron_3,rank_Bronze_1,rank_Bronze_2,rank_Bronze_3,rank_Silver_1,rank_Silver_2,rank_Silver_3,rank_Gold_1,rank_Gold_2,rank_Gold_3,rank_Platinum_1,rank_Platinum_2,rank_Platinum_3,rank_Diamond_1,rank_Diamond_2,rank_Diamond_3,rank_Ascendant_1,rank_Ascendant_2,rank_Ascendant_3,rank_Immortal_1,rank_Immortal_2,rank_Immortal_3,rank_Radiant,role_Controller,role_Initiator,role_Duelist,role_Sentinel,game_win
0,2,5,9,12,14,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,3,0,0,0,1,0,0,0,0,14.8,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,1,1,2,1,1
1,3,13,15,16,17,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,3,0,0,0,1,0,0,0,0,14.0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,2,1,1,0
2,8,9,13,16,17,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,1,0,2,0,0,1,0,0,0,0,0,5.0,0,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,1,0
3,6,13,14,17,18,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,1,2,0,0,1,0,0,0,0,0,5.6,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3,1,1
4,3,6,14,16,17,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,1,0,2,0,0,1,0,0,0,0,0,9.2,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,2,1


In [68]:
train_df, val_df = train_test_split(all_agent_records_df)

train_x = train_df.drop('game_win', axis = 1)
train_y = train_df['game_win']

val_x = val_df.drop('game_win', axis = 1)
val_y = val_df['game_win']
train_x.head()

Unnamed: 0,agent_num_0,agent_num_1,agent_num_2,agent_num_3,agent_num_4,agent_Astra,agent_Breach,agent_Brimstone,agent_Chamber,agent_Cypher,agent_Fade,agent_Jett,agent_KAY/O,agent_Killjoy,agent_Neon,agent_Omen,agent_Phoenix,agent_Raze,agent_Reyna,agent_Sage,agent_Skye,agent_Sova,agent_Viper,agent_Yoru,map_pick,map_Ascent,map_Bind,map_Breeze,map_Fracture,map_Haven,map_Icebox,map_Pearl,map_Split,rank_avg,rank_Unranked,rank_Iron_1,rank_Iron_2,rank_Iron_3,rank_Bronze_1,rank_Bronze_2,rank_Bronze_3,rank_Silver_1,rank_Silver_2,rank_Silver_3,rank_Gold_1,rank_Gold_2,rank_Gold_3,rank_Platinum_1,rank_Platinum_2,rank_Platinum_3,rank_Diamond_1,rank_Diamond_2,rank_Diamond_3,rank_Ascendant_1,rank_Ascendant_2,rank_Ascendant_3,rank_Immortal_1,rank_Immortal_2,rank_Immortal_3,rank_Radiant,role_Controller,role_Initiator,role_Duelist,role_Sentinel
17856,1,3,6,13,17,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,2,0,0,1,0,0,0,0,0,9.8,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,1
6563,7,10,13,15,17,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,5.6,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,1,0
12218,3,10,13,17,18,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,2,0,0,1,0,0,0,0,0,5.6,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,1
743,3,10,13,15,18,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,1,0,1,0,0,0,0,0,0,9.2,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,1
13034,9,10,12,14,16,0,0,0,0,0,0,0,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,6.6,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,2,1


In [69]:
rf = RandomForestClassifier()

rf.fit(train_x, train_y)

preds = rf.predict(val_x)

accuracy_score(preds, val_y)

0.5044127630685675

In [70]:
from sklearn.naive_bayes import GaussianNB

rf = GaussianNB()

rf.fit(train_x, train_y)

preds = rf.predict(val_x)

accuracy_score(preds, val_y)


0.5225729803122878

In [71]:
# fi_list = list()
# for i, j in zip(train_x.columns, rf.feature_importances_):
#     fi_list.append({'column':i, 'importance':j})
    
# pd.DataFrame.from_dict(fi_list).sort_values(by = ['importance'])[-10:]

In [72]:
fi_list = list()

for i in train_x.columns:
    rel = linregress(train_y, train_x[i])
    fi_list.append({'column': i,
                   'slope':rel.slope,
                   'intercept':rel.intercept,
                   'rvalue':rel.rvalue,
                    'r2':rel.rvalue*rel.rvalue,
                   'pvalue':rel.pvalue,
                   'stderr':rel.stderr})

pd.DataFrame.from_dict(fi_list).sort_values(by = ['r2'], ascending = [False])

Unnamed: 0,column,slope,intercept,rvalue,r2,pvalue,stderr
34,rank_Unranked,-0.030976,0.216216,-0.038662,0.001494733,2.727082e-07,0.006023
23,agent_Yoru,-0.019258,0.158671,-0.027034,0.0007308349,0.0003251471,0.005357
18,agent_Reyna,0.026652,0.550901,0.026874,0.0007222036,0.0003527822,0.007458
13,agent_Killjoy,0.014038,0.14482,0.019561,0.0003826389,0.00930621,0.005398
15,agent_Omen,-0.018339,0.35732,-0.019247,0.0003704472,0.01050274,0.007166
11,agent_Jett,-0.018236,0.437613,-0.018425,0.0003394695,0.01430626,0.007444
12,agent_KAY/O,-0.014977,0.224324,-0.018171,0.0003301758,0.01570447,0.006199
17,agent_Raze,0.015429,0.28705,0.016921,0.00028632,0.0244782,0.006858
63,role_Sentinel,0.020018,1.232995,0.015992,0.000255755,0.03349786,0.009415
7,agent_Brimstone,0.012029,0.241667,0.013934,0.000194147,0.0639755,0.006494


In [73]:




def create_interactions(df: pd.DataFrame) -> pd.DataFrame:
    df_interaction = pd.DataFrame(index = df.index)

    df_interaction['game_win'] = df['game_win']

    columns_list = sorted(df.columns.tolist())

    for i in columns_list:
        df_interaction[i] = df[i]
        for j in columns_list:
            if  i == 'game_win' or j == 'game_win':
                continue
            if columns_list.index(i) >=columns_list.index(j):
                continue
            
            if 'rank' in i or 'rank' in j:
                df_interaction[f'{i}_mul_{j}'] = df[i].copy()*df[j].copy()
            elif 'role' in i:
                df_interaction[f'{i}_mul_{j}'] = df[i].copy()*df[j].copy()
                df_interaction[f'{i}_max_{j}'] = df[[i,j]].copy().max(axis = 1)
            else:
                df_interaction[f'{i}_max_{j}'] = df[i].copy()*df[j].copy()
                
    return df_interaction

In [74]:
train_df, test_df = train_test_split(all_agent_records_df)


In [75]:
results = list()


In [76]:
import tensorflow as tf

In [77]:
sorted(results, key = lambda x: x['score'], reverse = True)[:10]

[]

In [78]:
import numpy as np

In [79]:
import random
from scipy import stats
from sklearn.model_selection import KFold 

counter = 0

solver_dict = {
    "newton-cg": ["l2"],
    "lbfgs": ["l2"],
    "liblinear": ['l1', "l2"],
    "sag": ["l2"],
    "saga": ["l1", 'l2', 'elasticnet'],
}

while True:
    counter += 1
    if counter > 1000:
        break
    i = random.randint(10, 100)
    j = random.randint(10, 50) * .01
    k = 10
    solver = random.choice(list(solver_dict.keys()))
    penalty =  random.choice(solver_dict[solver])
    model_choice = random.choice(['rf', 'lr'])
    
    max_depth = random.randint(2, 8)

    
    kf = KFold(n_splits=k, random_state=1, shuffle = True)
    
    if model_choice == 'rf':
        model = RandomForestClassifier(n_estimators=i, max_features = j, max_depth=max_depth)
    else:
        if penalty == 'elasticnet':
            model = LogisticRegression(solver=solver, penalty=penalty, l1_ratio=.5, max_iter=1000)
        else:
            model = LogisticRegression(solver=solver, penalty=penalty, max_iter=1000)

    scores = list()
    
    preds_list = list()
    
    for train_fold_np , val_fold_np in kf.split(train_df):
        
        train_fold_df = all_agent_records_df.iloc[train_fold_np,:]
        val_fold_df = all_agent_records_df.iloc[val_fold_np,:]

        model.fit(train_fold_df.drop('game_win', axis = 1), train_fold_df['game_win'])

        preds = model.predict(test_df.drop('game_win', axis = 1))
        preds_list.append(preds)
    
    result_mode = list()
    for idx in range(test_df.shape[0]):
        tmp_result = 0
        for pred_idx in preds_list:
            tmp_result += pred_idx[idx]
#         print(k, len(preds_list), tmp_result)
        if tmp_result >= k/2:
            result_mode.append(1)
        else:
            result_mode.append(0)
        
    score= accuracy_score(np.array(result_mode), test_df['game_win'])
    results.append({'n_estimators':i,
                   'max_features':j,
                    'max_depth':max_depth,
                    'k':k,
                    'model_choice':model_choice,
                    'penalty':penalty,
                    'solver':solver,
                    'avg_pred':sum(result_mode)/len(result_mode),
                   'score':score})

    if score >= sorted(results, key = lambda x: x['score'], reverse = True)[0]['score'] or counter%100 == 0:
        print(counter, i, j, score, sorted(results, key = lambda x: x['score'], reverse = True)[:3])

        
            


1 35 0.17 0.5308893414799728 [{'n_estimators': 35, 'max_features': 0.17, 'max_depth': 3, 'k': 10, 'model_choice': 'lr', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5157841140529531, 'score': 0.5308893414799728}]
2 55 0.2 0.5310590631364562 [{'n_estimators': 55, 'max_features': 0.2, 'max_depth': 5, 'k': 10, 'model_choice': 'lr', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.5156143923964698, 'score': 0.5310590631364562}, {'n_estimators': 35, 'max_features': 0.17, 'max_depth': 3, 'k': 10, 'model_choice': 'lr', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5157841140529531, 'score': 0.5308893414799728}]
3 72 0.38 0.5948744059742023 [{'n_estimators': 72, 'max_features': 0.38, 'max_depth': 6, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5492192803801765, 'score': 0.5948744059742023}, {'n_estimators': 55, 'max_features': 0.2, 'max_depth': 5, 'k': 10, 'model_choice': 'lr', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.515614392



29 62 0.41000000000000003 0.6546164290563475 [{'n_estimators': 62, 'max_features': 0.41000000000000003, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.561439239646979, 'score': 0.6546164290563475}, {'n_estimators': 45, 'max_features': 0.44, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5750169721656483, 'score': 0.6461303462321792}, {'n_estimators': 84, 'max_features': 0.43, 'max_depth': 7, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.556856754921928, 'score': 0.6194840461642905}]




100 67 0.48 0.5307196198234895 [{'n_estimators': 62, 'max_features': 0.41000000000000003, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.561439239646979, 'score': 0.6546164290563475}, {'n_estimators': 48, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5685675492192804, 'score': 0.6478275627970129}, {'n_estimators': 45, 'max_features': 0.44, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5750169721656483, 'score': 0.6461303462321792}]




140 96 0.49 0.6593686354378818 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 62, 'max_features': 0.41000000000000003, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.561439239646979, 'score': 0.6546164290563475}, {'n_estimators': 67, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.5660217243720299, 'score': 0.6520706042090971}]




200 65 0.22 0.5381873727087576 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 62, 'max_features': 0.41000000000000003, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.561439239646979, 'score': 0.6546164290563475}, {'n_estimators': 71, 'max_features': 0.39, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'liblinear', 'avg_pred': 0.5639850644942295, 'score': 0.6530889341479973}]




300 53 0.48 0.5310590631364562 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 62, 'max_features': 0.41000000000000003, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.561439239646979, 'score': 0.6546164290563475}, {'n_estimators': 71, 'max_features': 0.39, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'liblinear', 'avg_pred': 0.5639850644942295, 'score': 0.6530889341479973}]




400 46 0.48 0.6174473862864902 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}, {'n_estimators': 71, 'max_features': 0.44, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'lbfgs', 'avg_pred': 0.5636456211812627, 'score': 0.654786150712831}]




500 22 0.24 0.5307196198234895 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]




600 24 0.36 0.6410386965376782 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]




700 84 0.33 0.5678886625933469 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]




800 62 0.41000000000000003 0.5935166327223353 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]




900 80 0.47000000000000003 0.5308893414799728 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]




1000 50 0.35000000000000003 0.5310590631364562 [{'n_estimators': 96, 'max_features': 0.49, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'sag', 'avg_pred': 0.5672097759674134, 'score': 0.6593686354378818}, {'n_estimators': 81, 'max_features': 0.5, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'newton-cg', 'avg_pred': 0.5631364562118126, 'score': 0.6569925322471147}, {'n_estimators': 65, 'max_features': 0.45, 'max_depth': 8, 'k': 10, 'model_choice': 'rf', 'penalty': 'l2', 'solver': 'saga', 'avg_pred': 0.5641547861507128, 'score': 0.655295315682281}]


In [80]:
sorted(results, key = lambda x: x['score'], reverse = True)[:10]

[{'n_estimators': 96,
  'max_features': 0.49,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'sag',
  'avg_pred': 0.5672097759674134,
  'score': 0.6593686354378818},
 {'n_estimators': 81,
  'max_features': 0.5,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'newton-cg',
  'avg_pred': 0.5631364562118126,
  'score': 0.6569925322471147},
 {'n_estimators': 65,
  'max_features': 0.45,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'saga',
  'avg_pred': 0.5641547861507128,
  'score': 0.655295315682281},
 {'n_estimators': 71,
  'max_features': 0.44,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'lbfgs',
  'avg_pred': 0.5636456211812627,
  'score': 0.654786150712831},
 {'n_estimators': 62,
  'max_features': 0.41000000000000003,
  'max_depth': 8,
  'k': 10,
  'model_choice': 'rf',
  'penalty': 'l2',
  'solver': 'sag',
  'avg_pred': 0.56143923964697

In [81]:
# sorted(results, key = lambda x: x['score'], reverse = True)

In [82]:
from sklearn.linear_model import LogisticRegression

train_df, val_df = train_test_split(all_agent_records_df, random_state = 1)

n_estimators = 69
max_features = 0.24
max_depth = 8
model_choice='rf'
k=5
max_iter=1000

kf = KFold(n_splits=k, random_state=1, shuffle = True)

def get_model_to_train(n_estimators, max_features, max_depth, solver, penalty, max_iter):
    if model_choice == 'rf':
        model = RandomForestClassifier(n_estimators=n_estimators, max_features = max_features, max_depth=max_depth)
    else:
        if penalty == 'elasticnet':
            model = LogisticRegression(solver=solver, penalty=penalty, l1_ratio=.5, max_iter=1000)
        else:
            model = LogisticRegression(solver=solver, penalty=penalty, max_iter=1000)
    return model

scores = list()

preds_list = list()

models = list()

for train_fold_np , val_fold_np in kf.split(train_df):

    train_fold_df = all_agent_records_df.iloc[train_fold_np,:]
    val_fold_df = all_agent_records_df.iloc[val_fold_np,:]
    
    model = get_model_to_train(n_estimators, max_features, max_depth, solver, penalty, max_iter)
    model.fit(train_fold_df.drop('game_win', axis = 1), train_fold_df['game_win'])
    
    models.append(model)

    preds = model.predict_proba(test_df.drop('game_win', axis = 1))[:,-1]
    preds_list.append(preds)

result_mode = list()
for idx in range(test_df.shape[0]):
    tmp_result = 0
    for pred_idx in preds_list:
        tmp_result += pred_idx[idx]
    result_mode.append(tmp_result/k)


result_mode_np = np.rint(np.array(result_mode))
accuracy_score(result_mode_np, test_df['game_win'])




0.6410386965376782

In [83]:
predict_best_lineup(model,pca, map_pick, current_agent_list).tail()

NameError: name 'predict_best_lineup' is not defined