## Working through an example case for a player

In [1]:
import requests
import pandas as pd
import numpy as np
import re

pd.set_option('display.max_columns', 500)

In [53]:
api_key = 'RGAPI-6c38038d-a55b-4092-be89-1ce73092057c' #put in the api key

In [54]:
#make a request to grab the challenger players from NA 
#this is top 200 players (# len(challenger_players.json()['entries']))
challenger_players = requests.get(
    'https://na1.api.riotgames.com/tft/league/v1/challenger' #gets all NA challenger players
    +'?api_key=' + api_key)

In [56]:
#here is a sample player from the response
player_summoner_name = challenger_players.json()['entries'][0]['summonerName']

player_summoner_name


'FayeBae'

In [57]:
#we grab the sample player's account info
player_account_info = requests.get(
    'https://na1.api.riotgames.com/tft/summoner/v1/summoners/by-name/' +
    player_summoner_name
    +'?api_key=' + api_key)   

player_account_info.json()

{'accountId': 'yOhKOlWJL-2vTH1qZnZPbyd7DnAeY1AjETsuoc1IwrNbJA',
 'id': '5mkzl0kZ-bzDai4VINRsldJv84OQlvpIdI1Y4SbDkpPHZBc',
 'name': 'FayeBae',
 'profileIconId': 4276,
 'puuid': 'TZwOW-dVMgFQpZRUGKcXizW9wag0_n16uUrjMPKPSkZ1feSl4f_vyEUHTU8p2-AJ3eORwBlD03W_1Q',
 'revisionDate': 1581876955000,
 'summonerLevel': 56}

In [58]:
#we isolate the puuid of the player from the account info
player_puuid = player_account_info.json()['puuid']

player_puuid

'TZwOW-dVMgFQpZRUGKcXizW9wag0_n16uUrjMPKPSkZ1feSl4f_vyEUHTU8p2-AJ3eORwBlD03W_1Q'

In [65]:
#make a set of matches so no possible duplicates
matches_set = set()

#we grab the last 20 matches of the player via the player_puuid
player_matches = requests.get(
    'https://americas.api.riotgames.com/tft/match/v1/matches/by-puuid/'
    + player_puuid
    +'/ids?api_key=' + api_key)

#add the last 20 matches to the set
for match in player_matches.json():
    matches_set.add(match)

In [60]:
#turn into a list so we can iterate through it
matches_set = list(matches_set)
matches_set.sort()
# matches_set

### Info below pertains to a specific match

In [66]:
#we pick a match out of the matches given to us
match_number = player_matches.json()[0]

In [67]:
#we get the info of the match chosen
player_match_info = requests.get(
    'https://americas.api.riotgames.com/tft/match/v1/matches/'
    + match_number
    +'?api_key=' + api_key
)

In [70]:
#create a new dataframe to house the data
data_test = pd.DataFrame()

## GET DATETIME

In [94]:
#grabbing the date
from datetime import datetime

match_start_date = datetime.fromtimestamp(
    player_match_info.json()['info']['game_datetime']/1000
    ).strftime('%m/%d/%Y %H:%M')

match_start_date

# player_match_info.json()['info']['game_datetime']


datetime.datetime(2020, 2, 10, 1, 46, 39, 508000)

In [71]:
#we isolate only on the 'info/participants' section since it's all we care about
match = player_match_info.json()['info']['participants']

for player in match:
    
#     players_data = dict.fromkeys(list_of_tft_units,0) #create a dict to insert data into 
    players_data = {}
    
    #capture the units
    units = player['units'] 
    for unit in units:
        unit_name = unit['character_id'].replace('TFT2_','').lower()
        unit_tier = unit['tier']
        players_data[unit_name] = unit_tier  
    
    #capture the traits
    traits = player['traits']

    for trait in traits:
        trait_name = '_trait_' + trait['name'].lower()
        tier = trait['tier_current']
        players_data[trait_name] = tier 
        
    #capture the placement
    players_data['_placement'] = player['placement'] 

    #capture the level
    players_data['_level'] = player['level']

    #add this all to the dataframe
    data_test = data_test.append(players_data, ignore_index=True)

In [115]:
data = data.fillna(0) #fill out the zeroes
data = data.sort_index(axis=1) #re-sort the index

move_to_front = ['_placement','_level'] #move placement and level to the front
data = data[move_to_front + [col for col in data.columns if col not in move_to_front]]

data = data.astype(int) #turn to whole numbers, should not be any floats

# data

# ACTUAL RUNS

## Script to run to get matches

In [47]:
api_key = 'RGAPI-ee1aa144-08df-4a93-ba59-b4efd8c1a640' #put in the api key

In [5]:
import requests
import pandas as pd
import numpy as np
import re
import time
from datetime import datetime

pd.set_option('display.max_columns', 500)

start_time = time.time()

#######################################################################################
# api_key = 'RGAPI-e7f8f8ec-df36-47fe-8c70-559d8d979573' #put in the api key
#######################################################################################

matches_set = set()             #make a set of matches so no possible duplicates
data = pd.DataFrame()           #create a new dataframe to house the data
list_of_challenger_players = [] #make a list of challenger players

#make a request to grab the challenger players from NA 
#this is 200 players (len(challenger_players.json()['entries']))
challenger_players = requests.get(
    'https://na1.api.riotgames.com/tft/league/v1/challenger' 
    +'?api_key=' + api_key)

#put into list of challenger players
for player in challenger_players.json()['entries']:
    list_of_challenger_players.append(player['summonerName'])

#iterate through each challenger player
for i in range(0, len(list_of_challenger_players)):
    player_summoner_name = list_of_challenger_players[i]

    #we grab the player's account info
    player_account_info = requests.get(
        'https://na1.api.riotgames.com/tft/summoner/v1/summoners/by-name/' +
        player_summoner_name
        +'?api_key=' + api_key)   

    #we isolate the puuid of the player from the account info
    player_puuid = player_account_info.json()['puuid']

    #we grab the last 20 matches of the player via the player_puuid
    player_matches = requests.get(
        'https://americas.api.riotgames.com/tft/match/v1/matches/by-puuid/'
        + player_puuid
        +'/ids?api_key=' + api_key)

    #add the last 20 matches to the set
    for match in player_matches.json():
        matches_set.add(match)
        
    time.sleep(1)

    
# #turn into a list so we can iterate through it
matches_set = list(matches_set)
matches_set.sort()

print("Time for grabbing all matches:", time.time() - start_time)

Time for grabbing all matches: 338.16722798347473


In [19]:
#save the match data in matches.txt
with open('matches.txt','w') as file:
    for match in matches_set:
        file.write('%s\n' % match)

## Script to pull data from matches into df

In [121]:
#grab the match data from matches.txt
matches_set = []

with open('matches.txt','r') as file:
    for match in file:
        matches_set.append(match.strip('\n'))

In [None]:
start_time = time.time()
i = 1

# for match_number in temp_set:
for match_number in matches_set:
    
    #we get the info of the match chosen
    player_match_info = requests.get(
        'https://americas.api.riotgames.com/tft/match/v1/matches/'
        + match_number
        +'?api_key=' + api_key
    )

    #we grab the date of the match first
    #then we isolate only on the 'info/participants'
    match = player_match_info.json()['info']['participants']

    for player in match:        

        players_data = {}
        
        #get timestamp of the match
        match_start_date = player_match_info.json()['info']['game_datetime']/1000
        match_start_date = datetime.fromtimestamp(
            player_match_info.json()['info']['game_datetime']/1000
            ).strftime('%m-%d-%Y')
        players_data['match_date'] = match_start_date   
#         print(match_start_date)
#         print(players_data['match_date'])
        
        players_data['match_no'] = match_number

        #capture the units
        units = player['units'] 
        for unit in units:
            unit_name = unit['character_id'].replace('TFT2_','').lower()
            unit_tier = unit['tier']
            players_data[unit_name] = unit_tier  

        #capture the traits
        traits = player['traits']

        for trait in traits:
            trait_name = '_trait_' + trait['name'].lower()
            tier = trait['tier_current']
            players_data[trait_name] = tier 

        #capture the placement
        players_data['_placement'] = player['placement'] 

        #capture the level
        players_data['_level'] = player['level']

        #add this all to the dataframe
        data = data.append(players_data, ignore_index=True)
        
    i = i + 1
    if i % 100 == 0:
        print(i, (time.time() - start_time)/60)
#         print(time.time() - start_time())
    time.sleep(1.15)

print("Time for grabbing all data from matches:", (time.time() - start_time)/60)

100 2.853688100973765
200 5.809619299570719
300 8.94265018304189
400 11.941843736171723
500 14.934542099634806
600 17.93859643538793
700 20.99937411546707
800 24.15588116645813
900 27.216607065995536
1000 30.330208051204682
1100 33.440845370292664
1200 36.54272166490555
1300 39.689809783299765
1400 42.902739616235095
1500 46.32568329970042
1600 49.788748653729755
1700 53.342973268032075
1800 56.975664699077605
1900 60.39199168682099
2000 63.81035815080007
2100 67.2420842329661


In [None]:
data = data.fillna(0) #fill out the zeroes
data = data.sort_index(axis=1) #re-sort the index

move_to_front = ['match_date','match_no','_placement','_level'] 
data = data[move_to_front + [col for col in data.columns if col not in move_to_front]]

data = data.astype(int) #turn to whole numbers, should not be any floats

In [125]:
data.head()
# data = pd.DataFrame()
# clean up the data frame and then retry this stuff

## Testing Models

In [38]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB

In [39]:
data_y = data['_placement']
data_x = data[data.columns[~data.columns.isin(['_placement'])]]

x_train, x_test, y_train, y_test = train_test_split(data_x,data_y, test_size = 0.2)

In [40]:
example = [data[data.columns[~data.columns.isin(['_placement'])]].iloc[1]]

#linear regression model
lin_reg_model = LinearRegression().fit(x_train,y_train)
print(lin_reg_model.predict(example))

'''
    logistic regression model ->
        we abandon this because we believe that the predicted output should not
        just be a single value, but should be flexible to losses and wins
         
        log_reg_model = LogisticRegression().fit(x_train,y_train)
        print(log_reg_model.predict(example))

    naive bayes model -> 
        we abandon this because we do not think that each parameter is 
        independent of each other
        
         nb_model = GaussianNB().fit(x_train,y_train)
         print(nb_model.predict(example))        
'''
# nb_model = GaussianNB().fit(x_train,y_train)
# print(nb_model.predict(example))

[4.84682221]


In [91]:
data.head()

Unnamed: 0,_level,_placement,_trait_crystal,_trait_metal,_trait_poison,_trait_predator,_trait_set2_assassin,_trait_set2_ranger,ashe,kogmaw,nocturne,reksai,skarner,_trait_celestial,_trait_desert,_trait_electric,_trait_light,_trait_mystic,_trait_ocean,_trait_summoner,_trait_warden,azir,karma,khazix,leona,nami,qiyanaocean,yorick,zed,_trait_berserker,_trait_inferno,_trait_mountain,_trait_shadow,annie,kindred,malphite,malzahar,sion,_trait_alchemist,_trait_avatar,_trait_set2_glacial,_trait_wind,drmundo,ezreal,luxwind,singed,taric,twitch,_trait_mage,amumu,brand,nautilus,syndra,thresh,vladimir,soraka,janna,_trait_set2_blademaster,luxshadow,masteryi,varus
0,6.0,8.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,2.0,1.0,3.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,8.0,5.0,,,,,1.0,,,,,,,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,2.0,1.0,2.0,2.0,1.0,2.0,2.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,8.0,4.0,,,,,,0.0,,,,,,0.0,,,0.0,1.0,0.0,1.0,0.0,,1.0,,,1.0,,2.0,,0.0,0.0,0.0,1.0,2.0,2.0,1.0,2.0,2.0,,,,,,,,,,,,,,,,,,,,,,,
3,8.0,3.0,1.0,,1.0,,,2.0,2.0,,,,,,,,,,,,0.0,,,,,,,,,0.0,0.0,,0.0,,2.0,,,,1.0,1.0,0.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,,,,,,,,,,,,,
4,8.0,2.0,,,,,,,,,,,,,,,,,2.0,0.0,2.0,,,,,,,,,,1.0,0.0,,2.0,,2.0,,,,,,,,,,,,,1.0,2.0,2.0,2.0,2.0,2.0,2.0,,,,,,


In [None]:
'''
current concerns:
    1. having the level of a unit may be too granular of data - try simplifying with 0/1's
    
after talk:
    1. take a look at non-linear regression models? 
        but is hard to test
    2. do some regularization techniques, try to reduce dimensionality 
    3. PCA - feature extraction, not feature selection (very low interpretability)
            More likely interested in feature selection, not feature extraction (PCA) but should try
    
    4. Grab top tier comps and record them now
            Data that is current is based off of challenger players, 
            which means they must be active so data is all fresh
        
questions to myself:
    3. Can you potentially make a model to build out that composition?
    
    Can take a the model which predicts placement, to look at the coefficients and see
        which is the most relevant (higher coefficient) and least relevant (lower coefficient)
        
    Take a look into seeing if you can get the dates of the model
    
____________________________________________________________________________

* fully expand the data set - CHECK
* explore if time/date is present when taking a look at matches - CHECK
* reach out to the website - CHECK 
* explore regularization

* non-linear models 
* pca
    
'''