In [1]:
# %pip install git+https://github.com/tensorflow/docs

import requests
import os.path
import asyncio
import json
import numpy as np
import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
import time
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
from riotwatcher import TftWatcher, ApiError
from datetime import date, datetime
from thefuzz import fuzz

pd.options.mode.chained_assignment = None  # default='warn'

from utils.configuration import settings
from utils.utils import *
from tft.api import *




In [2]:
API_KEY = settings.api_key
ASSETS_DIR = settings.assets_dir
SERVER = 'na1'  # euw1 na1 kr oc1
LEAGUE='challengers' # challengers grandmasters
MAX_COUNT = settings.max_count
LATEST_RELEASE = '12.12.450.4196' # '12.12.450.4196' Version 12.12.448.6653 12.11.446.9344
PATCH = date(2022, 7, 1)

TARGETNAME = 'placement'

In [3]:
summoners_df = pd.read_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_summoners.pickle'))

In [4]:
summoners_df

Unnamed: 0,id,accountId,puuid,name,profileIconId,revisionDate,summonerLevel,summonerId,summonerName,leaguePoints,rank,wins,losses,veteran,inactive,freshBlood,hotStreak
0,R6h4xOIWVyE7ouyBpfEXY18_vRuZNhXuclkNJTKP2CG5g5w,ZTNZxP0u3Srya_xMCIEjuNv4GQ7MMSAbmLMI1hDu-tj-pg,Vhc5wiUsMaYbH8uWr_9a73u7y6xmUTwiqSB17SNfIsGsvi...,Dace,1625,1657168299561,415,R6h4xOIWVyE7ouyBpfEXY18_vRuZNhXuclkNJTKP2CG5g5w,Dace,906,I,220,172,True,False,False,False
1,Y298V36HCyTqMQF4l3arT7Hn74Z8VgW-MJPbRYYVCeAEzjM,hw_VRXwFk5qe2lbGMuM-pXdIROKiUyuvpal2YxPh8kJptw,q6g9BjTYbn3Py1-YQndWy0_tzJV45lk7_CBu_w2oafUuc-...,Aesah,937,1657161849000,120,Y298V36HCyTqMQF4l3arT7Hn74Z8VgW-MJPbRYYVCeAEzjM,Aesah,1501,I,159,86,False,False,False,False
2,sx7rF36A3kOhKTnt27oJKVUpbsnh8QkugK8wPWaxNfBHQO3u,HJ1bbQkk7qRbHeLVHZ4Sc8DTva-gYf2WNMMm2T9G8yOQ3m...,Aw8kvUOJqEdjk5Yq1Gsw0E7A3tNXHo4FfrAVSJ7wpJP9b3...,Kaíro,4275,1657180560831,115,sx7rF36A3kOhKTnt27oJKVUpbsnh8QkugK8wPWaxNfBHQO3u,Kaíro,775,I,141,109,False,False,False,False
3,Vp_UBnZzhL46cURDKhVJMg3moHnwKDgA0WP1QYfCaheKPZ8,h003WWK7sFeBVej_cz8lvSHDA1qhMYp7wnjFagWTktjX7A,Vf8lE1vgfSQ_bz1Czx7lxig8BZQRIa4oEvmM6nWXGhRg60...,Darth Nub,3523,1657180706000,155,Vp_UBnZzhL46cURDKhVJMg3moHnwKDgA0WP1QYfCaheKPZ8,Darth Nub,1140,I,204,132,True,False,False,False
4,_IbfoDr3Pb3d5z3gyGiC6yZEmncNb-vN6PIeboo1AfQ345U,TQa1E--XyuiMnpeEg6Huk5G4mFhbi__BiifAsViTb1a5EU0,IlLHb6mNyqf2N4VvRY3mh_qdisgyW6ToVq2pt110xgKS5W...,Wíthin,7,1657172495000,287,_IbfoDr3Pb3d5z3gyGiC6yZEmncNb-vN6PIeboo1AfQ345U,Wíthin,820,I,119,81,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,t8Gkg9i9sQE4g7YTnBT-2ET_p5JkBE2qn387VrfTe6SM-gU,j36sT58zeAb5xTGj5Vf9uw6lvEQcA1bwlzWYPNPorPIH2gI,PX18RalD9kIOzMD_KiaLsEExTYUlNulg595Y4HHwE2EM1N...,CrazyCatKid,2087,1657042708000,20,t8Gkg9i9sQE4g7YTnBT-2ET_p5JkBE2qn387VrfTe6SM-gU,CrazyCatKid,551,I,195,164,False,False,True,False
183,yJnzKsN0u66lKkaE2Q0JhwmoHV51D7HegPNk_n5SP-ZRJyo,OQTNOH7lm7N5Hln1JPr8qwofAgrRWXthIEeaLyiywYbL3zg,tmkYmVD5f7vGiG6xlGLVtmk3FpGByIsXbzCJiaVkD4ufg9...,Amde,3898,1657178644000,200,yJnzKsN0u66lKkaE2Q0JhwmoHV51D7HegPNk_n5SP-ZRJyo,Amde,768,I,191,151,False,False,False,True
184,LXo3sWVUOr-4wbkMwPddq__iEKixmZA5NUyn-rk6Qs7eZUkq,p1y1wXHP0DrsG4xM3p2_VLcZXlAnGE8bpojr0CA9mEqMRc...,maHIgOI8z8xSS-rcveBY7-KYIdy8uQ3M4cXUiJkiNzSRzy...,ekollon99,4368,1657119219000,1,LXo3sWVUOr-4wbkMwPddq__iEKixmZA5NUyn-rk6Qs7eZUkq,ekollon99,538,I,102,65,False,False,True,False
185,-mtOot-nc0Uvpttjhg4T5KkEhdGKRAp-LoSKR0gTTUbfwzs,wV3MmO8NRdrf3GTWGVXpqUZZZavFLitrnOqKSy0JzH-ztQ,8RDK4lSUJxPeaqXO-CQ2CWZ1TcKeiz2dAXODlPlutATFNs...,Luqun,4149,1656871497000,394,-mtOot-nc0Uvpttjhg4T5KkEhdGKRAp-LoSKR0gTTUbfwzs,Luqun,535,I,104,83,False,False,True,False


In [5]:

# Get all unique matches_id from assets dir
matches_asset = load_matches(summoners_df, server=SERVER)
matches_id = [match['metadata']['match_id'] for match in matches_asset]
seen = set()
uniq_matches_id = [
    x for x in matches_id if x not in seen and not seen.add(x)]

In [6]:
len(uniq_matches_id)

6205

In [7]:
len(matches_asset)

10016

In [8]:
seen = set()
seen_add = seen.add
uniq_matches = [x for x in matches_asset if x['metadata']['match_id'] not in seen and not seen_add(x['metadata']['match_id'])]

In [9]:
len(uniq_matches)

6205

In [10]:
matches_asset[300]['info']['game_version']

'Version 12.11.446.9344 (Jun 09 2022/13:23:39) [PUBLIC] <Releases/12.11>'

In [11]:
latest_matches = [match for match in uniq_matches if (
    LATEST_RELEASE in match['info']['game_version'])]


In [12]:
latest_patch_matches = [match for match in uniq_matches if (LATEST_RELEASE in match['info']['game_version'])
                        and (PATCH <= date.fromtimestamp(match['info']['game_datetime']/1000.0))]


In [13]:
len(latest_matches)

4385

In [14]:
len(latest_patch_matches)

2415

In [15]:
# latest_matches[0]['info']

In [16]:
from typing import List


def process_matches(df) -> List:
    matches_array = []

    for match_row in df:
        match_id = match_row['metadata']['match_id']

        for participant in match_row['info']['participants']:
            match = {}
            match['match_id'] = match_id
            # match['level'] = participant['level']
            match['placement'] = participant['placement']
            # match['players_eliminated'] = participant['players_eliminated']
            # match['total_damage_to_players'] = participant['total_damage_to_players']

            for augment_index, augment in enumerate(participant['augments']):
                # if augment == 'TFT7_Augment_GuildLootHR':
                #     augment = 'TFT7_Augment_BandOfThieves1'
                match[f'augment{augment_index}'] = augment

            for trait_index, trait in enumerate(participant['traits']):
                match[f'{trait["name"]}'] = trait["tier_current"]

            for unit_index, unit in enumerate(participant['units']):
                match[f'{unit["character_id"]}'] = unit["tier"]
                match['TFT7_TrainerDragon_item1'] = 'None'
                match['TFT7_TrainerDragon_item2'] = 'None'
                for item_index, item in enumerate(unit['itemNames']):
                    match[f'{unit["character_id"]}_item{item_index}'] = item.split('_')[-1]

            matches_array.append(match)

    return matches_array


In [17]:
matches_array = process_matches(latest_matches)
matches_patch_array = process_matches(latest_patch_matches)

In [18]:
matches_array[0]
# len(matches_array)

{'match_id': 'NA1_4365597016',
 'placement': 3,
 'augment0': 'TFT7_Augment_MirageEmblem2',
 'augment1': 'TFT6_Augment_ItemGrabBag1',
 'augment2': 'TFT7_Augment_DragonHorde',
 'Set7_Cavalier': 0,
 'Set7_Dragon': 2,
 'Set7_Dragonmancer': 0,
 'Set7_Guardian': 0,
 'Set7_Mirage': 3,
 'Set7_Shimmerscale': 1,
 'Set7_Warrior': 0,
 'TFT7_Nunu': 3,
 'TFT7_TrainerDragon_item1': 'None',
 'TFT7_TrainerDragon_item2': 'None',
 'TFT7_Nunu_item0': 'RedBuff',
 'TFT7_Nunu_item1': 'LocketOfTheIronSolari',
 'TFT7_Nunu_item2': 'Zephyr',
 'TFT7_DragonBlue': 2,
 'TFT7_DragonBlue_item0': 'ArchangelsStaff',
 'TFT7_DragonBlue_item1': 'ShimmerscaleGoldmancersStaff',
 'TFT7_DragonBlue_item2': 'UnstableConcoction',
 'TFT7_DragonGold': 2,
 'TFT7_DragonGold_item0': 'WarmogsArmor',
 'TFT7_DragonGold_item1': 'MirageEmblemItem',
 'TFT7_DragonGold_item2': 'Shroud',
 'TFT7_Yasuo': 1}

In [19]:
matches_league_df = pd.json_normalize(matches_array)
matches_league_patch_df = pd.json_normalize(matches_patch_array)

In [20]:
matches_league_df

Unnamed: 0,match_id,placement,augment0,augment1,augment2,Set7_Cavalier,Set7_Dragon,Set7_Dragonmancer,Set7_Guardian,Set7_Mirage,...,TFT7_Taric_item2,TFT7_Jinx_item2,TFT7_Ezreal_item0,TFT7_Ezreal_item1,TFT7_Ezreal_item2,TFT7_Aatrox_item1,TFT7_Heimerdinger_item2,TFT7_Senna_item1,TFT7_Senna_item2,TFT7_Aatrox_item2
0,NA1_4365597016,3,TFT7_Augment_MirageEmblem2,TFT6_Augment_ItemGrabBag1,TFT7_Augment_DragonHorde,0.0,2.0,0.0,0.0,3.0,...,,,,,,,,,,
1,NA1_4365597016,6,TFT6_Augment_ItemGrabBag2,TFT6_Augment_PandorasItems,TFT7_Augment_AssassinEmblem,,1.0,0.0,,,...,,,,,,,,,,
2,NA1_4365597016,5,TFT7_Augment_LivingForge,TFT6_Augment_CelestialBlessing1,TFT6_Augment_ComponentGrabBag,,1.0,0.0,,0.0,...,,,,,,,,,,
3,NA1_4365597016,1,TFT6_Augment_Featherweights3,TFT6_Augment_CelestialBlessing1,TFT6_Augment_PortableForge,,1.0,,1.0,,...,,,,,,,,,,
4,NA1_4365597016,4,TFT7_Augment_LivingForge,TFT6_Augment_Weakspot,TFT6_Augment_ThrillOfTheHunt2,,1.0,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35075,NA1_4357111004,6,TFT6_Augment_Electrocharge2,TFT7_Augment_BestFriends1,TFT6_Augment_MakeshiftArmor2,1.0,,,,,...,,,,,,,,,,
35076,NA1_4357111004,5,TFT6_Augment_Ascension,TFT7_Augment_CannoneerTrait,TFT7_Augment_CannoneerHotShot,,1.0,,1.0,,...,,,,,,,,,,
35077,NA1_4357111004,8,TFT7_Augment_MikaelsGift,TFT6_Augment_Weakspot,TFT7_Augment_JadeTrait,,,2.0,,,...,,,,,,,,,,
35078,NA1_4357111004,4,TFT7_Augment_BestFriends2,TFT6_Augment_Diversify1,TFT6_Augment_Diversify2,,,2.0,,0.0,...,,,,,,,,,,


In [21]:
matches_league_df[matches_league_df.isnull().T.any()]

Unnamed: 0,match_id,placement,augment0,augment1,augment2,Set7_Cavalier,Set7_Dragon,Set7_Dragonmancer,Set7_Guardian,Set7_Mirage,...,TFT7_Taric_item2,TFT7_Jinx_item2,TFT7_Ezreal_item0,TFT7_Ezreal_item1,TFT7_Ezreal_item2,TFT7_Aatrox_item1,TFT7_Heimerdinger_item2,TFT7_Senna_item1,TFT7_Senna_item2,TFT7_Aatrox_item2
0,NA1_4365597016,3,TFT7_Augment_MirageEmblem2,TFT6_Augment_ItemGrabBag1,TFT7_Augment_DragonHorde,0.0,2.0,0.0,0.0,3.0,...,,,,,,,,,,
1,NA1_4365597016,6,TFT6_Augment_ItemGrabBag2,TFT6_Augment_PandorasItems,TFT7_Augment_AssassinEmblem,,1.0,0.0,,,...,,,,,,,,,,
2,NA1_4365597016,5,TFT7_Augment_LivingForge,TFT6_Augment_CelestialBlessing1,TFT6_Augment_ComponentGrabBag,,1.0,0.0,,0.0,...,,,,,,,,,,
3,NA1_4365597016,1,TFT6_Augment_Featherweights3,TFT6_Augment_CelestialBlessing1,TFT6_Augment_PortableForge,,1.0,,1.0,,...,,,,,,,,,,
4,NA1_4365597016,4,TFT7_Augment_LivingForge,TFT6_Augment_Weakspot,TFT6_Augment_ThrillOfTheHunt2,,1.0,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35075,NA1_4357111004,6,TFT6_Augment_Electrocharge2,TFT7_Augment_BestFriends1,TFT6_Augment_MakeshiftArmor2,1.0,,,,,...,,,,,,,,,,
35076,NA1_4357111004,5,TFT6_Augment_Ascension,TFT7_Augment_CannoneerTrait,TFT7_Augment_CannoneerHotShot,,1.0,,1.0,,...,,,,,,,,,,
35077,NA1_4357111004,8,TFT7_Augment_MikaelsGift,TFT6_Augment_Weakspot,TFT7_Augment_JadeTrait,,,2.0,,,...,,,,,,,,,,
35078,NA1_4357111004,4,TFT7_Augment_BestFriends2,TFT6_Augment_Diversify1,TFT6_Augment_Diversify2,,,2.0,,0.0,...,,,,,,,,,,


## Sort and reorder columns

In [22]:
def reorder_df_col(df):
    fixed_cols = ['placement', 'match_id', 'augment0', 'augment1', 'augment2']
    all_cols = df.columns
    to_sort_cols = list(set(all_cols) - set(fixed_cols))

    return df.reindex(columns=fixed_cols + sorted(to_sort_cols))


In [23]:
matches_league_df = reorder_df_col(matches_league_df)
matches_league_patch_df = reorder_df_col(matches_league_patch_df)

# Output dataframes

In [24]:
matches_league_df.to_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_matches.pickle'))
matches_league_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_matches.csv'), index=False)
matches_league_patch_df.to_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_matches.pickle'))
matches_league_patch_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_matches.csv'), index=False)

# Preprocessing

In [25]:
raw_df = matches_league_df.copy()
def impute(df):
    for name in df.select_dtypes("number"):
        df[name] = df[name].fillna(0)
    for name in df.select_dtypes("object"):
        df[name] = df[name].fillna("None")
    return df
raw_df = impute(raw_df)

In [26]:
match_id_df = raw_df['match_id']
X = raw_df.drop(['match_id'], axis=1)
y = X.pop(TARGETNAME)
X.fillna('', inplace=True)
numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()
traits_col = [s for s in numeric_cols if "Set7" in s]
units_col = [s for s in numeric_cols if "TFT7" in s]
augments_col = ['augment0', 'augment1', 'augment2']
items_col = [s for s in categorical_cols if s not in augments_col]
df_unique = X.nunique().to_frame().reset_index()
df_unique.columns = ['Variable','DistinctCount']
unique_items_set = {y for col in items_col for y in X[col].unique().tolist()}
unique_augments_set = {y for col in augments_col for y in X[col].unique().tolist()}
X[f'items_count'] = X[items_col].apply(lambda row: sum(x != 'None' for x in row), axis=1)
X[f'traits_sum'] = X[traits_col].sum(axis=1)
X[f'units_sum'] = X[units_col].sum(axis=1)
X.iloc[X[f'units_sum'].idxmax()]

augment0          TFT7_Augment_MageConference
augment1              TFT6_Augment_ThriftShop
augment2             TFT6_Augment_SecondWind1
Set7_Assassin                             0.0
Set7_Astral                               1.0
                             ...             
TFT7_Zoe_item1                           None
TFT7_Zoe_item2                           None
items_count                                12
traits_sum                                9.0
units_sum                                30.0
Name: 3817, Length: 270, dtype: object

In [27]:
numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()
X[numeric_cols] = X[numeric_cols].applymap(np.int64)
matches_df = X.copy()
matches_df[TARGETNAME] = y

In [28]:
def get_unit_items_ranking(df: matches_df, unit: str):
    # filter and melt the dataframe
    df = df.filter(regex=f'placement|{unit}_item0|{unit}_item1|{unit}_item2')
    df[f'unit'] = f'{unit}' # fill in current unit
    # join 3 items to 1 column
    df[f'{unit}_items'] = df[[f'{unit}_item0', f'{unit}_item1', f'{unit}_item2']].apply(lambda row: ', '.join(row.values.astype(str)), axis=1)
    # sort items for unique combination
    df[f'{unit}_items'] = df[f'{unit}_items'].apply(lambda x: ', '.join(sorted(x.split(', '))))
    df = df.filter(regex=f'placement|{unit}_items|unit')
    m = df.melt(
        ['placement',f'unit'], value_name=f'{unit}_items_grp') #, value_vars=[f'{unit}_items', f'{unit}']
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'{unit}_items_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'unit', f'{unit}_items_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [29]:
get_unit_items_ranking(df = matches_df[:2], unit='TFT7_Zoe')

Unnamed: 0,unit,TFT7_Zoe_items_grp,value_count,average_placement
0,TFT7_Zoe,"None, None, None",2,4.5


In [30]:
# Get top5 
top5_items_list = []
for unit in units_col:
    df = get_unit_items_ranking(df = matches_df, unit=unit)
    df = df[df['value_count']>=12][:5] #Top 5 with counts >= 12
    top5_items_list.extend(df.values)

In [31]:
top5_items_list = pd.DataFrame(top5_items_list, columns=['unit', 'items',	'value_count',	'average_placement'])

In [32]:
top5_items_list.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_top5_items.csv'), index=False)

In [33]:
pd.options.display.max_colwidth = 100

In [34]:
top5_items_list.groupby('unit').head(1)

Unnamed: 0,unit,items,value_count,average_placement
0,TFT7_Aatrox,"None, None, TitanicHydra",14,3.142857
3,TFT7_Anivia,"ArchangelsStaff, GuinsoosRageblade, Morellonomicon",14,2.714286
8,TFT7_AoShin,"ArchangelsStaff, Quicksilver, SpearOfShojin",16,2.25
13,TFT7_Ashe,"Deathblade, None, None",17,3.0
18,TFT7_AurelionSol,"None, None, None",34920,4.497766
22,TFT7_Bard,"None, None, RabadonsDeathcap",25,2.16
27,TFT7_Braum,"None, None, Zephyr",42,3.785714
32,TFT7_Corki,"GuinsoosRageblade, InfinityEdge, StatikkShiv",15,2.666667
37,TFT7_Diana,"FrozenHeart, IonicSpark, TitansResolve",12,2.75
42,TFT7_DragonBlue,"Quicksilver, RevelEmblemItem, StatikkShiv",15,2.8


In [117]:
tft_assets = read_json(os.path.join(ASSETS_DIR, f'en_us.json'))

In [152]:
tft7_set = tft_assets['setData'][6]['champions'] #['apiName'] ['traits']['name'] #['champions'] #['champions'].name

In [171]:
champions_dict = {}

for champion in tft7_set:
    if champion["apiName"] not in champions_dict:
        champions_dict[champion["apiName"]] = []
    for trait in champion["traits"]:
        champions_dict[champion["apiName"]].append(trait)

In [173]:
champions_dict['TFT7_Olaf']

['Scalescorn', 'Bruiser', 'Warrior']

In [253]:
def add_traits(units_str):
    # for units in units_str.split(', '):
    comp_array = []
    for unit in units_str.split(', '):
        traits_array = []
        for trait in champions_dict[unit]:
            traits_array.append(trait[:2]+trait[-1:]) # Add first 2 char for trait
        traits_str = f'{unit}-'+'-'.join(traits_array)
        comp_array.append(traits_str)

    # print(f'{"".join(comp_array)}')
    return ','.join(comp_array)

def get_unit_comp_ranking(df: matches_df):
    # filter and melt the dataframe
    df = df.filter(['placement']+units_col)
    # join units lvl > 0 to 1 column
    df['comp'] = df[units_col].apply(lambda row: ', '.join(row[row > 0].index.values.astype(str)), axis=1)
    df['comp'] = df['comp'].apply(add_traits)

    df['comp'] = df['comp'].str.replace('TFT7_','') # remove prefix .split('_',1).str[-1]
    df = df.filter(['placement', 'comp'])
    m = df.melt(
        ['placement'], value_name=f'comp_grp')
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'comp_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'comp_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [255]:
comp_df

Unnamed: 0,comp_grp,value_count,average_placement
1397,"Bard-Gud-Myc-Bad,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Pyke-Whs-As...",1,1.0
366,"Anivia-Jae-Evr-Led,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Soraka-Ja...",1,1.0
1732,"Braum-Scn-Gun,Corki-Rel-Car,Hecarim-Rag-Car,Neeko-Jae-Shr,Ornn-Tet-Brr-Led,Pyke-Whs-Asn,Xayah-Ra...",1,1.0
1706,"Braum-Scn-Gun,Corki-Rel-Car,DragonGold-She-Drn-Gun,Lulu-Trr-Myc-Evr,Nami-Asl-Mae-Myc,Sona-Rel-Ev...",1,1.0
957,"Ashe-Jae-Drr-Swt,Karma-Jae-Drr,LeeSin-Tet-Drr,Shen-Rag-Brr-War,Shyvana-Rag-Shr-Drn,Swain-Rag-Drr...",1,1.0
...,...,...,...
2074,"Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,DragonPurple-Whs-Brr-Drn,Gnar-Jae-Shr,Neeko-Jae-Shr,Talon-...",1,8.0
2080,"Diana-Scn-Asn,DragonPurple-Whs-Brr-Drn,Elise-Whs-Shr,LeeSin-Tet-Drr,Qiyana-Tet-Asn,Sylas-Whs-Mae...",1,8.0
2089,"Diana-Scn-Asn,DragonPurple-Whs-Brr-Drn,Illaoi-Asl-Brr,Pyke-Whs-Asn,Shen-Rag-Brr-War,Sylas-Whs-Ma...",1,8.0
201,"Anivia-Jae-Evr-Led,Bard-Gud-Myc-Bad,Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-...",1,8.0


In [254]:
# Get top5 
comp_df = get_unit_comp_ranking(df = matches_df)

In [256]:
top5_comp_list = []
m = comp_df[comp_df['value_count']>=1] #[:5] #Top 5 with counts >= 12
top5_comp_list.extend(m.values)
comp_ranking_df = pd.DataFrame(top5_comp_list, columns=['comp', 'value_count', 'average_placement'])

In [257]:
comp_ranking_df #.groupby('comp').head(1)

Unnamed: 0,comp,value_count,average_placement
0,"Bard-Gud-Myc-Bad,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Pyke-Whs-As...",1,1.0
1,"Anivia-Jae-Evr-Led,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Soraka-Ja...",1,1.0
2,"Braum-Scn-Gun,Corki-Rel-Car,Hecarim-Rag-Car,Neeko-Jae-Shr,Ornn-Tet-Brr-Led,Pyke-Whs-Asn,Xayah-Ra...",1,1.0
3,"Braum-Scn-Gun,Corki-Rel-Car,DragonGold-She-Drn-Gun,Lulu-Trr-Myc-Evr,Nami-Asl-Mae-Myc,Sona-Rel-Ev...",1,1.0
4,"Ashe-Jae-Drr-Swt,Karma-Jae-Drr,LeeSin-Tet-Drr,Shen-Rag-Brr-War,Shyvana-Rag-Shr-Drn,Swain-Rag-Drr...",1,1.0
...,...,...,...
3208,"Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,DragonPurple-Whs-Brr-Drn,Gnar-Jae-Shr,Neeko-Jae-Shr,Talon-...",1,8.0
3209,"Diana-Scn-Asn,DragonPurple-Whs-Brr-Drn,Elise-Whs-Shr,LeeSin-Tet-Drr,Qiyana-Tet-Asn,Sylas-Whs-Mae...",1,8.0
3210,"Diana-Scn-Asn,DragonPurple-Whs-Brr-Drn,Illaoi-Asl-Brr,Pyke-Whs-Asn,Shen-Rag-Brr-War,Sylas-Whs-Ma...",1,8.0
3211,"Anivia-Jae-Evr-Led,Bard-Gud-Myc-Bad,Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-...",1,8.0


In [66]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import DBSCAN

embedder = SentenceTransformer('all-MiniLM-L6-v2')

--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\furyx\miniconda3\envs\tft\lib\logging\__init__.py", line 1104, in emit
    self.flush()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\logging\__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 22] Invalid argument
Call stack:
  File "c:\Users\furyx\miniconda3\envs\tft\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Users\furyx\miniconda3\envs\tft\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\traitlets\config\application.py", line 976, in launch_instance
    app.start()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
    self.io_loop.start()
  File "c:\Users\furyx\m

In [263]:
corpus_sentences = list(comp_ranking_df['comp'])

corpus_embeddings = embedder.encode(corpus_sentences, batch_size=512, show_progress_bar=True, convert_to_tensor=True)
# Normalize the embeddings to unit length
# corpus_embeddings = corpus_embeddings /  np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

clustering_model = DBSCAN(eps=0.3, min_samples=1)
predict=clustering_model.fit_predict(corpus_embeddings)
comp_ranking_df['group'] = pd.Series(predict, index=comp_ranking_df.index)

Batches: 100%|██████████| 7/7 [00:57<00:00,  8.16s/it]


In [264]:
comp_ranking_df.sort_values(by='group')

Unnamed: 0,comp,value_count,average_placement,group,grp_placement,grp_count
0,"Bard-Gud-Myc-Bad,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Pyke-Whs-As...",1,1.0,0,3.696491,1338
1213,"Bard-Gud-Myc-Bad,Hecarim-Rag-Car,Ornn-Tet-Brr-Led,Shen-Rag-Brr-War,Soraka-Jae-Str,Talon-Gud-Asn,...",1,4.0,0,3.696491,1338
1214,"Bard-Gud-Myc-Bad,Hecarim-Rag-Car,Ornn-Tet-Brr-Led,Shen-Rag-Brr-War,Talon-Gud-Asn,Twitch-Gud-Swt,...",1,4.0,0,3.696491,1338
1215,"Bard-Gud-Myc-Bad,Heimerdinger-Trr-Mae,Nami-Asl-Mae-Myc,Ornn-Tet-Brr-Led,Ryze-Gud-Mae,Sylas-Whs-M...",1,4.0,0,3.696491,1338
1218,"Bard-Gud-Myc-Bad,Heimerdinger-Trr-Mae,Lulu-Trr-Myc-Evr,Ryze-Gud-Mae,Sejuani-Gud-Car,Talon-Gud-As...",1,4.0,0,3.696491,1338
...,...,...,...,...,...,...
3205,"Diana-Scn-Asn,DragonGold-She-Drn-Gun,Lulu-Trr-Myc-Evr,Olaf-Scn-Brr-War,Ornn-Tet-Brr-Led,Qiyana-T...",1,8.0,438,8.000000,1
3206,"Ashe-Jae-Drr-Swt,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Karma-Jae-Drr,Lulu-Trr-Myc-Evr,Neeko-Jae-S...",1,8.0,439,8.000000,1
3207,"Diana-Scn-Asn,DragonGold-She-Drn-Gun,Ornn-Tet-Brr-Led,Qiyana-Tet-Asn,Talon-Gud-Asn,Zoe-She-Spf-Mae",1,8.0,440,8.000000,1
3208,"Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,DragonPurple-Whs-Brr-Drn,Gnar-Jae-Shr,Neeko-Jae-Shr,Talon-...",1,8.0,441,8.000000,1


In [265]:
comp_ranking_df['grp_placement'] = comp_ranking_df.groupby(['group'], as_index=False)['average_placement'].transform('mean')
comp_ranking_df['grp_count'] = comp_ranking_df.groupby(['group'], as_index=False)['value_count'].transform('sum')

In [266]:
comp_ranking_df.sort_values(by='group')

Unnamed: 0,comp,value_count,average_placement,group,grp_placement,grp_count
0,"Bard-Gud-Myc-Bad,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Lulu-Trr-Myc-Evr,Neeko-Jae-Shr,Pyke-Whs-As...",1,1.0,0,3.696491,1338
1213,"Bard-Gud-Myc-Bad,Hecarim-Rag-Car,Ornn-Tet-Brr-Led,Shen-Rag-Brr-War,Soraka-Jae-Str,Talon-Gud-Asn,...",1,4.0,0,3.696491,1338
1214,"Bard-Gud-Myc-Bad,Hecarim-Rag-Car,Ornn-Tet-Brr-Led,Shen-Rag-Brr-War,Talon-Gud-Asn,Twitch-Gud-Swt,...",1,4.0,0,3.696491,1338
1215,"Bard-Gud-Myc-Bad,Heimerdinger-Trr-Mae,Nami-Asl-Mae-Myc,Ornn-Tet-Brr-Led,Ryze-Gud-Mae,Sylas-Whs-M...",1,4.0,0,3.696491,1338
1218,"Bard-Gud-Myc-Bad,Heimerdinger-Trr-Mae,Lulu-Trr-Myc-Evr,Ryze-Gud-Mae,Sejuani-Gud-Car,Talon-Gud-As...",1,4.0,0,3.696491,1338
...,...,...,...,...,...,...
3205,"Diana-Scn-Asn,DragonGold-She-Drn-Gun,Lulu-Trr-Myc-Evr,Olaf-Scn-Brr-War,Ornn-Tet-Brr-Led,Qiyana-T...",1,8.0,438,8.000000,1
3206,"Ashe-Jae-Drr-Swt,DragonGreen-Jae-Myc-Drn,Gnar-Jae-Shr,Karma-Jae-Drr,Lulu-Trr-Myc-Evr,Neeko-Jae-S...",1,8.0,439,8.000000,1
3207,"Diana-Scn-Asn,DragonGold-She-Drn-Gun,Ornn-Tet-Brr-Led,Qiyana-Tet-Asn,Talon-Gud-Asn,Zoe-She-Spf-Mae",1,8.0,440,8.000000,1
3208,"Diana-Scn-Asn,DragonGreen-Jae-Myc-Drn,DragonPurple-Whs-Brr-Drn,Gnar-Jae-Shr,Neeko-Jae-Shr,Talon-...",1,8.0,441,8.000000,1


In [267]:
comp_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_comp_ranking.csv'), index=False)

In [45]:
# from statistics import mode
# import jellyfish

# import pandas as pd

# df = pd.DataFrame({'Code': ['abc', 'abc', 'abc', 'abcc', 'abcc', 'zxc'],
#                    'Description': ['ABC String', 'abc string', 'ABC String and sth', 'abc sth else', 'zxc sth else', 'zxc zxc'],
#                 #    'Value': [10, 20, 30, 40, 100]
#                    })

# df_list = []
# for grp,df in df.groupby('Code'):
#     df['distance'] = df['Description'].apply(lambda x : fuzz.token_set_ratio(x, mode(df['Description'])))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)

In [46]:
# comp_ranking_df.groupby('comp')['comp'].apply(lambda x : fuzz.token_set_ratio(x, ','.join(units_col)))

comp
Anivia, Heimerdinger, Illaoi, Lulu, Nami, Ryze, Sylas, TrainerDragon                         4
AoShin, Bard, Lillia, Ornn, Ryze, Sylas, Zoe                                                 2
Ashe, DragonGreen, Elise, Gnar, Illaoi, Neeko, Nidalee                                       1
Bard, Corki, DragonGold, Jinx, Lulu, Sona, Soraka, Thresh, TrainerDragon, Tristana, Yasuo    5
Bard, Corki, DragonGold, Lulu, Ryze, Sona, Thresh, TrainerDragon, Tristana, Yasuo, Zoe       4
Bard, DragonPurple, Ornn, Pyke, Qiyana, Ryze, Shen, Sylas, Thresh, Zoe                       6
Bard, DragonPurple, Ornn, Pyke, Qiyana, Shen, Sylas, Thresh                                  5
Corki, DragonGold, Leona, Lulu, Sona, TrainerDragon, Tristana, Yasuo                         2
Diana, DragonPurple, LeeSin, Pyke, Qiyana, Sylas, Talon                                      4
DragonBlue, DragonGold, Nunu, Yasuo                                                          1
DragonGreen, Elise, Gnar, Lulu, Neeko, Nidale

In [47]:
# df_list = []
# for grp,df in comp_ranking_df.groupby('comp'):
#     df['distance'] = df['comp'].apply(lambda x : fuzz.token_set_ratio(x, 'Aatrox, DragonGold, Kayn, Shen, Twitch, Xayah, Zoe'))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)