In [360]:
# %pip install git+https://github.com/tensorflow/docs

import requests
import os.path
import asyncio
import json
import numpy as np
import pathlib
import pandas as pd
import seaborn as sns
import tensorflow as tf
import time
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
from riotwatcher import TftWatcher, ApiError
from datetime import date, datetime
from thefuzz import fuzz

pd.options.mode.chained_assignment = None  # default='warn'

from utils.configuration import settings
from utils.utils import *
from tft.api import *


In [361]:
API_KEY = settings.api_key
ASSETS_DIR = settings.assets_dir
SERVER = 'na1'  # euw1 na1 kr oc1
LEAGUE='challengers' # challengers grandmasters
MAX_COUNT = settings.max_count
LATEST_RELEASE = '12.12.450.4196' # '12.12.450.4196' Version 12.12.448.6653 12.11.446.9344
PATCH = date(2022, 7, 1)

TARGETNAME = 'placement'

In [362]:
summoners_df = pd.read_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_summoners.pickle'))

In [363]:
summoners_df

Unnamed: 0,id,accountId,puuid,name,profileIconId,revisionDate,summonerLevel,summonerId,summonerName,leaguePoints,rank,wins,losses,veteran,inactive,freshBlood,hotStreak
0,aM98OWZKwtQprZnmPfEQgyH__SScMb4ielz14Kbv49woQxQ,HLO_HWh7-0oHg0hgwWWuopqBLC4hh5_xsiFCowuQNoYBnjY,QmtvbmYjPxX6i5wBTRddnDen7XmbM6JAE84OcYF_eY3By_-i4yHe4vSJ_Kjxeiv9ArPXhlttPHeu4w,Jason Socks,5408,1657219691627,523,aM98OWZKwtQprZnmPfEQgyH__SScMb4ielz14Kbv49woQxQ,Jason Socks,502,I,100,69,False,False,True,False
1,0gkNDFyIZ1hZ6qZJcvQ5t8E-BQfTLeYJDAEPJ6cet1KgmI2d,JBjSFw_O3g_5QEgomj9AldeGzaPN6f4jaaUxbsrOtJbD-QkeD_Fo-aU-,IE7HMHPvvBmOicAHhwQ82QhDs-P64R15GDQ8Rg5fq9hEhlwnDWifj9qGHV_qgZHyfv3og1bZzcPD2w,VanillaXD,29,1657297937000,58,0gkNDFyIZ1hZ6qZJcvQ5t8E-BQfTLeYJDAEPJ6cet1KgmI2d,VanillaXD,533,I,187,157,False,False,True,False
2,6SiiTP1etDpYhnzK1IMb-D2RLoga1cTMCNJ1e4Q5gz5y9Co,CRfoIYZEQkHZ1EZqYay4h0VvhIfirU60fTKSaut96j7O1A,SUpb3Wr5I1GPOU48IqRdeeXDkr9Vcxr0pdahQ6t05EWdX_dnuEdX2yK0HdEAqFdAZSjSGiWkiLBO3Q,Trugz,5181,1657292366000,182,6SiiTP1etDpYhnzK1IMb-D2RLoga1cTMCNJ1e4Q5gz5y9Co,Trugz,505,I,118,81,False,False,True,True
3,NZUyBA6Bjiy_MZj8ShTluVnWxVfPA3NiKmzGuOCH2zNPkhM,sVsaODH09TvWBNVl4uLjnx4rHJ1DeAGAdUFf8LNIMj8jym4,dZKtDFv3NazDhbfzEYQh3Lz67zQfRJFblxCWcI6ZrSh6ArecuX7mZPL6JKGnKaZOqY1Z4Y4dVGH61g,ajmonsta,1591,1657297789000,399,NZUyBA6Bjiy_MZj8ShTluVnWxVfPA3NiKmzGuOCH2zNPkhM,ajmonsta,825,I,169,135,False,False,True,False
4,5dE6M7nXMdLATafB_x0kKwMQ0AVLZrLrt3oUnVrgmQRMHiU,gaVXvnE4upEw9z_3e-DMx9GyhIKxk54EUhiPu68kH9JeYgo,YG4qyuxricddSWKg4g0vF410Adn7lMFm2BayIF2a2BxrOVjXwKGpw9TQxuLVIm5S-LaAMAghpTnPOg,garet,0,1657286217000,210,5dE6M7nXMdLATafB_x0kKwMQ0AVLZrLrt3oUnVrgmQRMHiU,garet,1304,I,168,93,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,PwY95sJrHM_yUyedet6IyPUuqX5G_vVrcMYtdtAJ8_dPhIM,n0DwVbP6PAxH4Q0yIt4FE2kQZprEZJKeEaWytmkdUjQJqek,GNZgsn4Ja072f0o99inDwqCdwjgtU8-saoh1B6yyoGUnOsykCsM8-yKTzc-4mXFrhFOqac91a7LslQ,ECohmygod,7,1657221179000,34,PwY95sJrHM_yUyedet6IyPUuqX5G_vVrcMYtdtAJ8_dPhIM,ECohmygod,533,I,212,187,False,False,True,False
201,e34rJaogiO4CE8zLS8ewLMFGPGoNIMIMsB7gDqVH9aPNHIuY,BcLHPFVnuMFT_g6cJdB7LsDocnWKJ3TQyp9mMIY7amMcq-PdOm3O0410,St_wL0UOxf_11zObuZn-7FE-yGRGjfjFaeHlAbCMCESgyqgS636iSJhu5qxxDqBq7XuNlL4I161r8g,Spethom,4904,1657297246826,198,e34rJaogiO4CE8zLS8ewLMFGPGoNIMIMsB7gDqVH9aPNHIuY,Spethom,610,I,143,93,False,False,False,False
202,_qohVpjRSfy3xKoBpELkQaiVr0hnBq6Q7cVCOU1wYFE-tUyf,MaTrdhM5ZFuPPjYtfpng-SwqCteoIyGviwsAnA3f5G7LCwaPHTbASRG1,7BoKQcZCku1BMuaDQbXvwMkNWKBn9frOYViJTUbhQld7ZaOu0mpSggbyThbbE1jCcyf7fWemHvRHMg,cantkilltrag,14,1657258042000,18,_qohVpjRSfy3xKoBpELkQaiVr0hnBq6Q7cVCOU1wYFE-tUyf,cantkilltrag,504,I,144,111,False,False,True,False
203,f7_bi4CEGkGEPHpfMZkVU7zAz-XV4vl8QWhqiDMkRs3GUH4,SH_RhxMDbtdcSkevgJitMUaZiquawNSIFa0wDWa5DRArFxs,01S6NdFXqrUTGRZWrB31C-RlV52Szh-jlZvBx4SGqEX2z3XDzWfc7zg18i9bxgmaUob7xvpX61eRyA,sehm,3588,1657264401628,577,f7_bi4CEGkGEPHpfMZkVU7zAz-XV4vl8QWhqiDMkRs3GUH4,sehm,623,I,129,99,False,False,True,True


In [364]:

# Get all unique matches_id from assets dir
matches_asset = load_matches(summoners_df, server=SERVER)
matches_id = [match['metadata']['match_id'] for match in matches_asset]
seen = set()
uniq_matches_id = [
    x for x in matches_id if x not in seen and not seen.add(x)]

In [365]:
len(uniq_matches_id)

6961

In [366]:
len(matches_asset)

11112

In [367]:
seen = set()
seen_add = seen.add
uniq_matches = [x for x in matches_asset if x['metadata']['match_id'] not in seen and not seen_add(x['metadata']['match_id'])]

In [368]:
len(uniq_matches)

6961

In [369]:
matches_asset[300]['info']['game_version']

'Version 12.11.446.9344 (Jun 09 2022/13:23:39) [PUBLIC] <Releases/12.11>'

In [370]:
latest_matches = [match for match in uniq_matches if (
    LATEST_RELEASE in match['info']['game_version'])]


In [371]:
latest_patch_matches = [match for match in uniq_matches if (LATEST_RELEASE in match['info']['game_version'])
                        and (PATCH <= date.fromtimestamp(match['info']['game_datetime']/1000.0))]


In [372]:
len(latest_matches)

5068

In [373]:
len(latest_patch_matches)

3009

In [374]:
# latest_matches[0]['info']

In [375]:
from typing import List


def process_matches(df) -> List:
    matches_array = []

    for match_row in df:
        match_id = match_row['metadata']['match_id']

        for participant in match_row['info']['participants']:
            match = {}
            match['match_id'] = match_id
            # match['level'] = participant['level']
            match['placement'] = participant['placement']
            # match['players_eliminated'] = participant['players_eliminated']
            # match['total_damage_to_players'] = participant['total_damage_to_players']

            for augment_index, augment in enumerate(participant['augments']):
                # if augment == 'TFT7_Augment_GuildLootHR':
                #     augment = 'TFT7_Augment_BandOfThieves1'
                match[f'augment{augment_index}'] = augment

            for trait_index, trait in enumerate(participant['traits']):
                match[f'{trait["name"]}'] = trait["tier_current"]

            for unit_index, unit in enumerate(participant['units']):
                match[f'{unit["character_id"]}'] = unit["tier"]
                match['TFT7_TrainerDragon_item1'] = 'None'
                match['TFT7_TrainerDragon_item2'] = 'None'
                for item_index, item in enumerate(unit['itemNames']):
                    match[f'{unit["character_id"]}_item{item_index}'] = item.split('_')[-1]

            matches_array.append(match)

    return matches_array


In [376]:
matches_array = process_matches(latest_matches)
matches_patch_array = process_matches(latest_patch_matches)

In [377]:
matches_array[0]
# len(matches_array)

{'match_id': 'NA1_4365110158',
 'placement': 2,
 'augment0': 'TFT6_Augment_TrueTwos',
 'augment1': 'TFT6_Augment_Recombobulator',
 'augment2': 'TFT7_Augment_AxiomArc2',
 'Set7_Assassin': 1,
 'Set7_Bruiser': 2,
 'Set7_Dragon': 1,
 'Set7_Guild': 1,
 'Set7_Legend': 0,
 'Set7_Mage': 0,
 'Set7_Ragewing': 0,
 'Set7_Tempest': 1,
 'Set7_Warrior': 0,
 'Set7_Whispers': 2,
 'TFT7_Qiyana': 2,
 'TFT7_TrainerDragon_item1': 'None',
 'TFT7_TrainerDragon_item2': 'None',
 'TFT7_Shen': 2,
 'TFT7_Sylas': 2,
 'TFT7_Sylas_item0': 'Shroud',
 'TFT7_Sylas_item1': 'RedBuff',
 'TFT7_Sylas_item2': 'DragonsClaw',
 'TFT7_Talon': 2,
 'TFT7_DragonPurple': 3,
 'TFT7_DragonPurple_item0': 'InfinityEdge',
 'TFT7_DragonPurple_item1': 'RapidFireCannon',
 'TFT7_DragonPurple_item2': 'Bloodthirster',
 'TFT7_Ornn': 1,
 'TFT7_Ornn_item0': 'SeraphsEmbrace',
 'TFT7_Ornn_item1': 'Redemption',
 'TFT7_Ornn_item2': 'TitansResolve',
 'TFT7_Pyke': 1,
 'TFT7_Pyke_item0': 'ThiefsGloves',
 'TFT7_Pyke_item1': 'WarmogsArmor',
 'TFT7_Pyke_it

In [378]:
matches_league_df = pd.json_normalize(matches_array)
matches_league_patch_df = pd.json_normalize(matches_patch_array)

In [379]:
matches_league_df

Unnamed: 0,match_id,placement,augment0,augment1,augment2,Set7_Assassin,Set7_Bruiser,Set7_Dragon,Set7_Guild,Set7_Legend,...,TFT7_Vladimir_item0,TFT7_Vladimir_item1,TFT7_Vladimir_item2,TFT7_Shen_item2,TFT7_Thresh_item1,TFT7_Thresh_item2,TFT7_Taric_item1,TFT7_Heimerdinger_item1,TFT7_Heimerdinger_item2,TFT7_Taric_item2
0,NA1_4365110158,2,TFT6_Augment_TrueTwos,TFT6_Augment_Recombobulator,TFT7_Augment_AxiomArc2,1.0,2.0,1.0,1.0,0.0,...,,,,,,,,,,
1,NA1_4365110158,1,TFT7_Augment_BruiserEmblem,TFT6_Augment_CyberneticShell1,TFT6_Augment_CelestialBlessing2,,1.0,,,0.0,...,,,,,,,,,,
2,NA1_4365110158,8,TFT6_Augment_Ascension,TFT6_Augment_PandorasItems,TFT7_Augment_MageEmblem,,0.0,,2.0,,...,,,,,,,,,,
3,NA1_4365110158,7,TFT7_Augment_AxiomArc2,TFT6_Augment_PandorasItems,TFT7_Augment_RagewingTantrum,0.0,0.0,1.0,,,...,,,,,,,,,,
4,NA1_4365110158,5,TFT7_Augment_TempestEmblem,TFT7_Augment_AxiomArc1,TFT6_Augment_PortableForge,0.0,0.0,,,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40539,NA1_4358279197,5,TFT6_Augment_MetabolicAccelerator,TFT6_Augment_ForceOfNature,TFT6_Augment_BlueBattery2,,2.0,,,0.0,...,,,,,,,,,,
40540,NA1_4358279197,6,TFT6_Augment_CelestialBlessing2,TFT6_Augment_MaxLevel10,TFT6_Augment_SecondWind2,1.0,1.0,,3.0,0.0,...,,,,,,,,,,
40541,NA1_4358279197,8,TFT7_Augment_MirageEmblem,TFT6_Augment_LudensEcho3,TFT7_Augment_UrfsGrabBag1,1.0,,1.0,3.0,,...,,,,,,,,,,
40542,NA1_4358279197,1,TFT6_Augment_CyberneticUplink2,TFT6_Augment_CelestialBlessing3,TFT6_Augment_CyberneticImplants2,0.0,2.0,1.0,1.0,0.0,...,,,,,,,,,,


In [380]:
matches_league_df[matches_league_df.isnull().T.any()]

Unnamed: 0,match_id,placement,augment0,augment1,augment2,Set7_Assassin,Set7_Bruiser,Set7_Dragon,Set7_Guild,Set7_Legend,...,TFT7_Vladimir_item0,TFT7_Vladimir_item1,TFT7_Vladimir_item2,TFT7_Shen_item2,TFT7_Thresh_item1,TFT7_Thresh_item2,TFT7_Taric_item1,TFT7_Heimerdinger_item1,TFT7_Heimerdinger_item2,TFT7_Taric_item2
0,NA1_4365110158,2,TFT6_Augment_TrueTwos,TFT6_Augment_Recombobulator,TFT7_Augment_AxiomArc2,1.0,2.0,1.0,1.0,0.0,...,,,,,,,,,,
1,NA1_4365110158,1,TFT7_Augment_BruiserEmblem,TFT6_Augment_CyberneticShell1,TFT6_Augment_CelestialBlessing2,,1.0,,,0.0,...,,,,,,,,,,
2,NA1_4365110158,8,TFT6_Augment_Ascension,TFT6_Augment_PandorasItems,TFT7_Augment_MageEmblem,,0.0,,2.0,,...,,,,,,,,,,
3,NA1_4365110158,7,TFT7_Augment_AxiomArc2,TFT6_Augment_PandorasItems,TFT7_Augment_RagewingTantrum,0.0,0.0,1.0,,,...,,,,,,,,,,
4,NA1_4365110158,5,TFT7_Augment_TempestEmblem,TFT7_Augment_AxiomArc1,TFT6_Augment_PortableForge,0.0,0.0,,,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40539,NA1_4358279197,5,TFT6_Augment_MetabolicAccelerator,TFT6_Augment_ForceOfNature,TFT6_Augment_BlueBattery2,,2.0,,,0.0,...,,,,,,,,,,
40540,NA1_4358279197,6,TFT6_Augment_CelestialBlessing2,TFT6_Augment_MaxLevel10,TFT6_Augment_SecondWind2,1.0,1.0,,3.0,0.0,...,,,,,,,,,,
40541,NA1_4358279197,8,TFT7_Augment_MirageEmblem,TFT6_Augment_LudensEcho3,TFT7_Augment_UrfsGrabBag1,1.0,,1.0,3.0,,...,,,,,,,,,,
40542,NA1_4358279197,1,TFT6_Augment_CyberneticUplink2,TFT6_Augment_CelestialBlessing3,TFT6_Augment_CyberneticImplants2,0.0,2.0,1.0,1.0,0.0,...,,,,,,,,,,


## Sort and reorder columns

In [381]:
def reorder_df_col(df):
    fixed_cols = ['placement', 'match_id', 'augment0', 'augment1', 'augment2']
    all_cols = df.columns
    to_sort_cols = list(set(all_cols) - set(fixed_cols))

    return df.reindex(columns=fixed_cols + sorted(to_sort_cols))


In [382]:
matches_league_df = reorder_df_col(matches_league_df)
matches_league_patch_df = reorder_df_col(matches_league_patch_df)

# Output dataframes

In [383]:
matches_league_df.to_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_matches.pickle'))
matches_league_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_matches.csv'), index=False)
matches_league_patch_df.to_pickle(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_matches.pickle'))
matches_league_patch_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_matches.csv'), index=False)

# Preprocessing

In [403]:
raw_df = matches_league_patch_df.copy()
def impute(df):
    for name in df.select_dtypes("number"):
        df[name] = df[name].fillna(0)
    for name in df.select_dtypes("object"):
        df[name] = df[name].fillna("None")
    return df
raw_df = impute(raw_df)

In [404]:
match_id_df = raw_df['match_id']
X = raw_df.drop(['match_id'], axis=1)
y = X.pop(TARGETNAME)
X.fillna('', inplace=True)
numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()
traits_col = [s for s in numeric_cols if "Set7" in s]
units_col = [s for s in numeric_cols if "TFT7" in s]
augments_col = ['augment0', 'augment1', 'augment2']
items_col = [s for s in categorical_cols if s not in augments_col]
df_unique = X.nunique().to_frame().reset_index()
df_unique.columns = ['Variable','DistinctCount']
unique_items_set = {y for col in items_col for y in X[col].unique().tolist()}
unique_augments_set = {y for col in augments_col for y in X[col].unique().tolist()}
X[f'items_count'] = X[items_col].apply(lambda row: sum(x != 'None' for x in row), axis=1)
X[f'traits_sum'] = X[traits_col].sum(axis=1)
X[f'units_sum'] = X[units_col].sum(axis=1)
X.iloc[X[f'units_sum'].idxmax()]

augment0              TFT7_Augment_MageConference
augment1                   TFT7_Augment_ThinkFast
augment2          TFT6_Augment_CelestialBlessing1
Set7_Assassin                                 0.0
Set7_Astral                                   1.0
                               ...               
TFT7_Zoe_item1                               None
TFT7_Zoe_item2                               None
items_count                                    10
traits_sum                                    8.0
units_sum                                    29.0
Name: 5823, Length: 270, dtype: object

In [405]:
numeric_cols = X.select_dtypes(include=np.number).columns.tolist()
categorical_cols = X.select_dtypes(include=['object','category']).columns.tolist()
X[numeric_cols] = X[numeric_cols].applymap(np.int64)
matches_df = X.copy()
matches_df[TARGETNAME] = y

In [406]:
def get_unit_items_ranking(df: matches_df, unit: str):
    # filter and melt the dataframe
    df = df.filter(regex=f'placement|{unit}_item0|{unit}_item1|{unit}_item2')
    df[f'unit'] = f'{unit}' # fill in current unit
    # join 3 items to 1 column
    df[f'{unit}_items'] = df[[f'{unit}_item0', f'{unit}_item1', f'{unit}_item2']].apply(lambda row: ', '.join(row.values.astype(str)), axis=1)
    # sort items for unique combination
    df[f'{unit}_items'] = df[f'{unit}_items'].apply(lambda x: ', '.join(sorted(x.split(', '))))
    df = df.filter(regex=f'placement|{unit}_items|unit')
    m = df.melt(
        ['placement',f'unit'], value_name=f'{unit}_items_grp') #, value_vars=[f'{unit}_items', f'{unit}']
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'{unit}_items_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'unit', f'{unit}_items_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [407]:
get_unit_items_ranking(df = matches_df[:2], unit='TFT7_Zoe')

Unnamed: 0,unit,TFT7_Zoe_items_grp,value_count,average_placement
0,TFT7_Zoe,"None, None, None",2,1.5


In [408]:
# Get top5 
top5_items_list = []
for unit in units_col:
    df = get_unit_items_ranking(df = matches_df, unit=unit)
    df = df[df['value_count']>=12][:5] #Top 5 with counts >= 12
    top5_items_list.extend(df.values)

In [409]:
top5_items_list = pd.DataFrame(top5_items_list, columns=['unit', 'items',	'value_count',	'average_placement'])

In [410]:
top5_items_list.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_top5_items.csv'), index=False)

In [411]:
pd.options.display.max_colwidth = 160

In [412]:
top5_items_list.groupby('unit').head(1)

Unnamed: 0,unit,items,value_count,average_placement
0,TFT7_Aatrox,"None, None, TitanicHydra",12,3.0
3,TFT7_Anivia,"ArchangelsStaff, ArchangelsStaff, Morellonomicon",13,3.076923
8,TFT7_AoShin,"ArchangelsStaff, HextechGunblade, SpearOfShojin",44,3.318182
13,TFT7_Ashe,"None, None, SparringGloves",14,3.142857
18,TFT7_AurelionSol,"None, None, None",23966,4.498748
19,TFT7_Bard,"None, None, Redemption",13,2.153846
24,TFT7_Braum,"ChainVest, None, None",13,3.923077
29,TFT7_Corki,"GuinsoosRageblade, InfinityEdge, RunaansHurricane",14,3.428571
34,TFT7_Diana,"FrozenHeart, IonicSpark, RedBuff",41,3.804878
39,TFT7_DragonBlue,"GuinsoosRageblade, Quicksilver, RevelEmblemItem",33,2.757576


# Load TFT asset

In [413]:
tft_assets = read_json(os.path.join(ASSETS_DIR, f'en_us.json'))

In [414]:
tft7_set = tft_assets['setData'][6]['champions'] #['apiName'] ['traits']['name'] #['champions'] #['champions'].name

In [415]:
champions_dict = {}

for champion in tft7_set:
    if champion["apiName"] not in champions_dict:
        champions_dict[champion["apiName"]] = []
    for trait in champion["traits"]:
        champions_dict[champion["apiName"]].append(trait)

In [416]:
champions_dict['TFT7_Olaf']

['Scalescorn', 'Bruiser', 'Warrior']

# Team Composition Ranking

In [423]:
def add_traits(units_str):
    # for units in units_str.split(', '):
    comp_array = []
    if len(units_str) == 0:
        return ''
    for unit in units_str.split(', '):
        traits_array = []
        for trait in champions_dict[unit]:
            traits_array.append(trait[:2]+trait[-1:]) # Add first 2 char for trait
        traits_str = '-'.join(traits_array) + f'-{unit}'
        comp_array.append(traits_str)

    # print(f'{"".join(comp_array)}')
    return ','.join(comp_array)

def get_unit_comp_ranking(df: matches_df):
    # filter and melt the dataframe
    df = df.filter(['placement']+units_col)
    # join units lvl > 0 to 1 column
    df['comp'] = df[units_col].apply(lambda row: ', '.join(row[row > 0].index.values.astype(str)), axis=1)
    df['comp'] = df['comp'].apply(add_traits)

    df['comp'] = df['comp'].str.replace('TFT7_','') # remove prefix .split('_',1).str[-1]
    df = df.filter(['placement', 'comp'])
    m = df.melt(
        ['placement'], value_name=f'comp_grp')
    # group and aggregate mean/median average_placement
    dct = {'value_count': (f'comp_grp', 'count'),
           'average_placement': ('placement', 'mean')}
    return m.groupby([f'comp_grp'], as_index=False).agg(**dct).sort_values(by='average_placement')

In [463]:
# for unit in units_col:
#     for trait in champions_dict[unit]:
#         print(f'{trait}')

In [427]:
# Get top5 
comp_df = get_unit_comp_ranking(df = matches_df)

In [452]:
top5_comp_list = []
m = comp_df[comp_df['value_count']>=1] #[:5] #Top 5 with counts >= 12
top5_comp_list.extend(m.values)
comp_ranking_df = pd.DataFrame(top5_comp_list, columns=['comp', 'value_count', 'average_placement'])

In [455]:
comp_ranking_df #.groupby('comp').head(1)

Unnamed: 0,comp,value_count,average_placement
0,"Whs-Brr-Drn-DragonPurple,Whs-Shr-Elise,Jae-Shr-Neeko,Whs-Asn-Pyke,Jae-Str-Soraka,Whs-Mae-Brr-Sylas,Jae-Gun-Taric,Mie-Drr-War-Yasuo",1,1.0
1,"Gud-Myc-Bad-Bard,Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Jae-Str-Soraka,Gud-Swt-Twitch,Mie-Drr-War-Yasuo",1,1.0
2,"Gud-Myc-Bad-Bard,Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Jae-Str-Soraka,Gud-Asn-Talon,Mie-Drr-War-Yasuo",2,1.0
3,"Gud-Myc-Bad-Bard,Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Gud-Asn-Talon,Whs-Gun-Thresh,Mie-Drr-War-Yasuo",1,1.0
4,"Mie-Drn-DragonBlue,Trr-Myc-Evr-Lulu,Tet-Brr-Led-Ornn,Rel-Evr-Sona,Whs-Mae-Brr-Sylas,Mie-Drr-War-Yasuo,She-Spf-Mae-Zoe",1,1.0
...,...,...,...
11953,"Scn-Asn-Diana,Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Tet-Asn-Qiyana,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas",1,8.0
11954,"Scn-Asn-Diana,Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Rag-Brr-War-Shen,Gud-Asn-Talon",1,8.0
11955,"Scn-Asn-Diana,Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Whs-Asn-Pyke,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Rel-Brr-TahmKench",1,8.0
11956,"Scn-Gun-Braum,Scn-Asn-Diana,Scn-Car-Mae-Lillia,Asl-Mae-Myc-Nami,Mie-Car-Nunu,Scn-Brr-War-Olaf,Gud-Mae-Ryze,Asl-Brr-Skarner,Asl-Mae-Vladimir",1,8.0


# Clustering

In [430]:
from sentence_transformers import SentenceTransformer
from sklearn.cluster import DBSCAN

embedder = SentenceTransformer('all-MiniLM-L6-v2')

--- Logging error ---
Traceback (most recent call last):
  File "c:\Users\furyx\miniconda3\envs\tft\lib\logging\__init__.py", line 1104, in emit
    self.flush()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\logging\__init__.py", line 1084, in flush
    self.stream.flush()
OSError: [Errno 22] Invalid argument
Call stack:
  File "c:\Users\furyx\miniconda3\envs\tft\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Users\furyx\miniconda3\envs\tft\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\traitlets\config\application.py", line 976, in launch_instance
    app.start()
  File "c:\Users\furyx\miniconda3\envs\tft\lib\site-packages\ipykernel\kernelapp.py", line 712, in start
    self.io_loop.start()
  File "c:\Users\furyx\m

In [431]:
corpus_sentences = list(comp_ranking_df['comp'])

corpus_embeddings = embedder.encode(corpus_sentences, batch_size=512, show_progress_bar=True, convert_to_tensor=True)
# Normalize the embeddings to unit length
# corpus_embeddings = corpus_embeddings /  np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)

Batches: 100%|██████████| 24/24 [03:17<00:00,  8.22s/it]


In [456]:
clustering_model = DBSCAN(eps=0.029, min_samples=1, metric='cosine', n_jobs=-1)
predict=clustering_model.fit_predict(corpus_embeddings)
comp_ranking_df['group'] = pd.Series(predict, index=comp_ranking_df.index)

In [457]:
comp_ranking_df.sort_values(by='group')

Unnamed: 0,comp,value_count,average_placement,group
0,"Whs-Brr-Drn-DragonPurple,Whs-Shr-Elise,Jae-Shr-Neeko,Whs-Asn-Pyke,Jae-Str-Soraka,Whs-Mae-Brr-Sylas,Jae-Gun-Taric,Mie-Drr-War-Yasuo",1,1.0,0
7419,"Whs-Brr-Drn-DragonPurple,Rag-Car-Hecarim,Gud-Car-Sejuani,Rag-Brr-War-Shen,Gud-Asn-Talon,Gud-Swt-Twitch,Mie-Drr-War-Yasuo",1,6.0,0
7421,"Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Whs-Asn-Pyke,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Rel-Brr-TahmKench,Gud-Asn-Talon",1,6.0,0
7422,"Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Whs-Asn-Pyke,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,Whs-Gun-Thresh",1,6.0,0
2106,"Whs-Brr-Drn-DragonPurple,Scn-Brr-War-Olaf,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,Whs-Gun-Thresh",1,2.0,0
...,...,...,...,...
11945,"Scn-Asn-Diana,She-Drn-Gun-DragonGold,Rag-She-Asn-Kayn,Whs-Asn-Pyke,Gud-Asn-Talon,Mie-Drr-War-Yasuo,She-Spf-Mae-Zoe",1,8.0,2100
11947,"Scn-Asn-Diana,She-Drn-Gun-DragonGold,Trr-Myc-Evr-Lulu,Scn-Brr-War-Olaf,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Rel-Evr-Sona",1,8.0,2101
11950,"Scn-Asn-Diana,Tet-Swt-Ezreal,Scn-Car-Mae-Lillia,Tet-Asn-Qiyana,Gud-Car-Sejuani,Gud-Swt-Twitch",1,8.0,2102
11956,"Scn-Gun-Braum,Scn-Asn-Diana,Scn-Car-Mae-Lillia,Asl-Mae-Myc-Nami,Mie-Car-Nunu,Scn-Brr-War-Olaf,Gud-Mae-Ryze,Asl-Brr-Skarner,Asl-Mae-Vladimir",1,8.0,2103


In [458]:
comp_ranking_df['grp_count'] = comp_ranking_df.groupby(['group'], as_index=False)['value_count'].transform('sum')
comp_ranking_df['grp_placement'] = comp_ranking_df.groupby(['group'], as_index=False)['average_placement'].transform('mean')

In [459]:
comp_ranking_df.sort_values(by='group')[:60]

Unnamed: 0,comp,value_count,average_placement,group,grp_count,grp_placement
0,"Whs-Brr-Drn-DragonPurple,Whs-Shr-Elise,Jae-Shr-Neeko,Whs-Asn-Pyke,Jae-Str-Soraka,Whs-Mae-Brr-Sylas,Jae-Gun-Taric,Mie-Drr-War-Yasuo",1,1.0,0,3020,4.69624
7419,"Whs-Brr-Drn-DragonPurple,Rag-Car-Hecarim,Gud-Car-Sejuani,Rag-Brr-War-Shen,Gud-Asn-Talon,Gud-Swt-Twitch,Mie-Drr-War-Yasuo",1,6.0,0,3020,4.69624
7421,"Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Whs-Asn-Pyke,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Rel-Brr-TahmKench,Gud-Asn-Talon",1,6.0,0,3020,4.69624
7422,"Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Whs-Asn-Pyke,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,Whs-Gun-Thresh",1,6.0,0,3020,4.69624
2106,"Whs-Brr-Drn-DragonPurple,Scn-Brr-War-Olaf,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Whs-Mae-Brr-Sylas,Gud-Asn-Talon,Whs-Gun-Thresh",1,2.0,0,3020,4.69624
7433,"Whs-Brr-Drn-DragonPurple,Asl-Brr-Illaoi,Trr-Myc-Evr-Lulu,Asl-Mae-Myc-Nami,Tet-Brr-Led-Ornn,Gud-Mae-Ryze,Whs-Mae-Brr-Sylas",1,6.0,0,3020,4.69624
2101,"Whs-Brr-Drn-DragonPurple,Rag-She-Asn-Kayn,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Gud-Asn-Talon,Mie-Drr-War-Yasuo",1,2.0,0,3020,4.69624
7441,"Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Rag-Brr-War-Shen,Whs-Mae-Brr-Sylas,Gud-Swt-Twitch,Rag-Swt-Xayah",2,6.0,0,3020,4.69624
7450,"Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Tet-Asn-Qiyana,Asl-Brr-Skarner,Whs-Mae-Brr-Sylas,Gud-Asn-Talon",1,6.0,0,3020,4.69624
7451,"Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Whs-Asn-Pyke,Tet-Asn-Qiyana,Gud-Car-Sejuani,Gud-Asn-Talon,Gud-Swt-Twitch",1,6.0,0,3020,4.69624


In [492]:
top5_comp_ranking_list = []
m = comp_ranking_df[comp_ranking_df['grp_count']>=10] #[:5] #Top 5 with counts >= 12
top5_comp_ranking_list.extend(m.values)
top_comp_ranking_df = pd.DataFrame(top5_comp_ranking_list, columns=['comp', 'value_count', 'average_placement', 'group', 'grp_count', 'grp_placement'])

In [482]:
# dff = top_comp_ranking_df[:5].copy()

In [494]:
def remove_traits(units_str):
    """Remove units traits from text seperated by comma

    Args:
        units_str (str): traits-unit,traits-unit

    Returns:
        str: Units stripped of traits
    """    
    if len(units_str) == 0:
        return ''

    units_array = []
    for unit in units_str.split(','):
        units_array.append(unit.split('-')[-1])
    units = ', '.join(units_array)
    return units

top_comp_ranking_df['comp'] = top_comp_ranking_df['comp'].apply(remove_traits)


In [486]:
# dff.groupby(['group']).head(1).sort_values(by='grp_placement') #Tet-Drn-AoShin,She-Drn-Gun-DragonGold,Jae-Myc-Drn-DragonGreen,Whs-Brr-Drn-DragonPurple,Tet-Brr-Led-Ornn,Jae-Gun-Taric

Unnamed: 0,comp,value_count,average_placement,group,grp_count,grp_placement
1,"Bard,DragonPurple,Ornn,Pyke,Qiyana,Soraka,Twitch,Yasuo",1,1.0,1,4817,3.507283
0,"DragonPurple,Elise,Neeko,Pyke,Soraka,Sylas,Taric,Yasuo",1,1.0,0,3020,4.69624


In [496]:
top_comp_ranking_df['mode'] = top_comp_ranking_df.groupby('group')['comp'].transform(lambda x: pd.Series.mode(x)[0])
top_comp_ranking_df.groupby(['group']).head(1).sort_values(by='grp_placement')

Unnamed: 0,comp,value_count,average_placement,group,grp_count,grp_placement,mode
660,"AoShin, DragonGold, DragonGreen, DragonPurple, Ornn, Taric",1,1.0,146,25,2.739796,"AoShin, DragonGold, DragonGreen, DragonPurple, Ornn"
584,"AoShin, AurelionSol, DragonGold, DragonPurple, Ornn",1,1.0,121,11,2.771429,"AoShin, AurelionSol, DragonGold, DragonPurple, Ornn"
752,"Gnar, Neeko, Sett, Shyvana, Soraka, Swain, Xayah, Yasuo",2,1.0,168,17,3.104167,"Gnar, Hecarim, Neeko, Nidalee, Nunu, Shen, Soraka, Swain"
413,"AoShin, Bard, Nunu, Ornn, Pyke, Sejuani, Sylas, Talon",1,1.0,84,118,3.141914,"AoShin, Bard, Braum, DragonGold, DragonPurple, Sylas"
410,"DragonPurple, Elise, Heimerdinger, Ornn, Pyke, Qiyana, Shen, Soraka, Sylas, Zoe",1,1.0,83,76,3.480676,"DragonPurple, Elise, Gnar, Neeko, Pyke, Ryze, Sylas, Zoe"
1,"Bard, DragonPurple, Ornn, Pyke, Qiyana, Soraka, Twitch, Yasuo",1,1.0,1,4817,3.507283,"Bard, Braum, Corki, DragonGold, Hecarim, Jinx, Lulu, Sona, TrainerDragon, Tristana"
1905,"Ezreal, Hecarim, Illaoi, Ornn, Shen, Soraka, Sylas, Varus, Xayah",1,2.0,353,15,3.730769,"Ezreal, Hecarim, Illaoi, Ornn, Shen, Shyvana, Skarner, Xayah"
215,"Corki, DragonBlue, Hecarim, Jinx, Nunu, Sona, Soraka, TahmKench",1,1.0,40,52,3.754167,"Corki, DragonBlue, Hecarim, Illaoi, Jinx, Nunu, TahmKench"
923,"Bard, DragonBlue, Hecarim, Nunu, Sejuani, Yasuo, Yone, Zoe",1,1.0,201,14,3.857143,"Bard, DragonBlue, Hecarim, Leona, Nunu, Sejuani, Soraka, Yasuo, Zoe"
214,"Corki, DragonBlue, Hecarim, Jinx, Nunu, Sejuani, Sona, Yasuo",1,1.0,39,11,3.95,"Corki, DragonBlue, Hecarim, Jinx, Nunu, Sejuani, Sona, Yasuo"


In [462]:
comp_ranking_df.to_csv(os.path.join(ASSETS_DIR, f'{SERVER}_{LEAGUE}_{LATEST_RELEASE}_{PATCH}_comp_ranking.csv'), index=False)

In [439]:
# from statistics import mode
# import jellyfish

# import pandas as pd

# df = pd.DataFrame({'Code': ['abc', 'abc', 'abc', 'abcc', 'abcc', 'zxc'],
#                    'Description': ['ABC String', 'abc string', 'ABC String and sth', 'abc sth else', 'zxc sth else', 'zxc zxc'],
#                 #    'Value': [10, 20, 30, 40, 100]
#                    })

# df_list = []
# for grp,df in df.groupby('Code'):
#     df['distance'] = df['Description'].apply(lambda x : fuzz.token_set_ratio(x, mode(df['Description'])))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)

In [440]:
# comp_ranking_df.groupby('comp')['comp'].apply(lambda x : fuzz.token_set_ratio(x, ','.join(units_col)))

In [441]:
# df_list = []
# for grp,df in comp_ranking_df.groupby('comp'):
#     df['distance'] = df['comp'].apply(lambda x : fuzz.token_set_ratio(x, 'Aatrox, DragonGold, Kayn, Shen, Twitch, Xayah, Zoe'))
#     # df['Description'] =  mode(df['Description'])
#     df_list.append(df[df['distance'] > 10])

# df = pd.concat(df_list)