## Content list

#### 1. Import libraries
#### 2. Import data
#### 3. Top 3 champion picks
#### 4. Top 3 champion bans
#### 5. Top 3 champions picks & bans win rate
#### 6. Performance data for K'sante and region perfromance

## 1. Import libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import datetime as dt
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
import statsmodels.api as sm # Using .api imports the public access version of statsmodels, which is a library that handles statistical models.
import warnings # This is a library that handles warnings.

In [2]:
# Disable deprecation warnings that could indicate, for instance, a suspended library or feature. These are more relevant to developers and very seldom to analysts.
warnings.filterwarnings("ignore") 

## 2. Import data

In [3]:
# Turn project folder path into a string
path = r'C:\Users\Admin\Desktop\Data Analysis\Advanced Analytics & Dashboard Design\Competitive_LoL_Analysis'

In [4]:
path

'C:\\Users\\Admin\\Desktop\\Data Analysis\\Advanced Analytics & Dashboard Design\\Competitive_LoL_Analysis'

In [5]:
# Import league competitive data
df_lol = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'competitve_data_cleaned_country.csv'), index_col = False)

In [6]:
# Check output
df_lol.head()

Unnamed: 0,gameid,league,year,date,game,patch,participantid,side,position,champion,...,teamkills,teamdeaths,damagetochampions,damageshare,totalgold,golddiffat15,xpdiffat15,kda,region,country
0,LOLTMNT06_13630,LEC,2024,2024-01-13,1,14.01,1,Blue,top,Renekton,...,10,14,10063,0.286116,12202,1690.0,-346.0,2.0,Europe,United Kingdom
1,LOLTMNT06_13630,LEC,2024,2024-01-13,1,14.01,2,Blue,jng,Nocturne,...,10,14,4562,0.129709,9892,-378.0,74.0,2.0,Europe,United Kingdom
2,LOLTMNT06_13630,LEC,2024,2024-01-13,1,14.01,3,Blue,mid,Akali,...,10,14,11408,0.324358,10279,279.0,-330.0,1.0,Europe,United Kingdom
3,LOLTMNT06_13630,LEC,2024,2024-01-13,1,14.01,4,Blue,bot,Kalista,...,10,14,6014,0.170993,11703,213.0,1319.0,2.0,Europe,United Kingdom
4,LOLTMNT06_13630,LEC,2024,2024-01-13,1,14.01,5,Blue,sup,Pyke,...,10,14,3124,0.088823,7580,-75.0,-1313.0,1.0,Europe,United Kingdom


In [7]:
# Print number of rows and columns in df_lol
df_lol.shape

(10512, 35)

## 3. Top 3 champions picks

In [8]:
# Remove data from position which have team, as we are only looking at individual player data
df_lol_indi = df_lol.loc[df_lol['position'] != 'team']

In [9]:
# Check of all 'team' position data is removed.
df_lol_indi['position'].value_counts()

position
top    1752
jng    1752
mid    1752
bot    1752
sup    1752
Name: count, dtype: int64

In [10]:
df_lol_indi['champion'].count()

8760

In [11]:
# Identify the top count for champion pick
df_lol_indi['champion'].value_counts()

champion
K'Sante        406
Azir           339
Varus          338
Nautilus       324
Xin Zhao       273
              ... 
Mordekaiser      1
Vladimir         1
Zilean           1
Xerath           1
Nasus            1
Name: count, Length: 130, dtype: int64

### Top 3 picks for champions are:
#### K'Sante with 406 pick count.
#### Azir with 339 pick count.
#### Varis with 338 pick count.

In [12]:
# Create subset for top 3 champion picks and counts
df_pick = df_lol_indi['champion'].value_counts().head(3).rename_axis('champion').reset_index(name='counts')

In [13]:
# Check output
df_pick

Unnamed: 0,champion,counts
0,K'Sante,406
1,Azir,339
2,Varus,338


In [14]:
# Exporting df_pick as 'top_champion_picks.csv'
df_pick.to_csv(os.path.join(path, '02 Data','Prepared Data', 'top_champion_pick.csv'), index=False)

## 4. Top 3 champions bans

In [15]:
# Extract data from position which have team.
df_lol_team = df_lol.loc[df_lol['position'] == 'team']

In [16]:
# Check if only 'team' position data is present.
df_lol_team['position'].value_counts()

position
team    1752
Name: count, dtype: int64

In [17]:
# Create subset based on ban1, ban2, ban3, ban4, ban5 columns.
ban1 = df_lol_team['ban1']
ban2 = df_lol_team['ban2']
ban3 = df_lol_team['ban3']
ban4 = df_lol_team['ban4']
ban5 = df_lol_team['ban5']

In [18]:
# Append all ban columns togehter
frames = [ban1, ban2, ban3, ban4, ban5]
ban_total = pd.concat(frames)

In [19]:
ban_total.count()

8753

In [20]:
# Identify top count for champion ban
ban_total.value_counts()

Kalista      541
Ashe         451
Vi           400
Senna        399
Varus        369
            ... 
Yuumi          1
Skarner        1
Samira         1
Gangplank      1
Kha'Zix        1
Name: count, Length: 114, dtype: int64

### Top 3 bans for champions are:
#### Kalista with 541 ban counts.
#### Ashe with 451 ban counts.
#### Vi with 400 ban counts.

In [21]:
# Create subset for top 3 champion ban and counts
df_ban = ban_total.value_counts().head(3).rename_axis('champion').reset_index(name='counts')

In [22]:
# Check output
df_ban

Unnamed: 0,champion,counts
0,Kalista,541
1,Ashe,451
2,Vi,400


In [23]:
# Exporting df_ban as 'top_champion_bans.csv'
df_ban.to_csv(os.path.join(path, '02 Data','Prepared Data', 'top_champion_ban.csv'), index=False)

## 5. Top 3 champions picks & bans win rates

In [24]:
# Create a subset for each of the top 3 champions pick and ban
df_ksante = df_lol_indi.loc[df_lol_indi['champion'] == "K'Sante"]
df_azir = df_lol_indi.loc[df_lol_indi['champion'] == "Azir"]
df_varus = df_lol_indi.loc[df_lol_indi['champion'] == "Varus"]
df_kalista = df_lol_indi.loc[df_lol_indi['champion'] == "Kalista"]
df_ashe = df_lol_indi.loc[df_lol_indi['champion'] == "Ashe"]
df_vi = df_lol_indi.loc[df_lol_indi['champion'] == "Vi"]

In [25]:
# Calculate winrate of each champions
df_ksante_winrate = df_ksante.loc[df_ksante['result'] == 1].result.count() /  df_ksante.result.count() * 100
df_azir_winrate = df_azir.loc[df_azir['result'] == 1].result.count() /  df_azir.result.count() * 100
df_varus_winrate = df_varus.loc[df_varus['result'] == 1].result.count() /  df_varus.result.count() * 100
df_kalista_winrate = df_kalista.loc[df_kalista['result'] == 1].result.count() /  df_kalista.result.count() * 100
df_ashe_winrate = df_ashe.loc[df_ashe['result'] == 1].result.count() /  df_ashe.result.count() * 100
df_vi_winrate = df_vi.loc[df_vi['result'] == 1].result.count() /  df_vi.result.count() * 100

In [26]:
print("K'sante win rate = ",df_ksante_winrate)
print("Azir win rate = ",df_azir_winrate)
print("Varus win rate = ",df_varus_winrate)
print("Kalista win rate = ",df_kalista_winrate)
print("Ashe win rate = ",df_ashe_winrate)
print("Vi win rate = ",df_vi_winrate)

K'sante win rate =  55.66502463054187
Azir win rate =  49.26253687315634
Varus win rate =  54.437869822485204
Kalista win rate =  49.5
Ashe win rate =  47.12643678160919
Vi win rate =  48.97959183673469


#### K'sante have the highest win rate of 55.7% among the top 3 champions picks and bans.

In [27]:
# Create list with champion and winrate
winrate_data = [["K'Sante", df_ksante_winrate], ["Azir", df_azir_winrate], ["Varus", df_varus_winrate], ["Kalista", df_kalista_winrate], ["Ashe", df_ashe_winrate], ["Vi", df_vi_winrate]] 

In [28]:
# Create dataframe with champion and winrate details
df_winrate = pd.DataFrame(winrate_data, columns=['champion', 'win_rate'])

In [29]:
df_winrate

Unnamed: 0,champion,win_rate
0,K'Sante,55.665025
1,Azir,49.262537
2,Varus,54.43787
3,Kalista,49.5
4,Ashe,47.126437
5,Vi,48.979592


In [30]:
# Exporting df_winrate as 'champion_winrate.csv'
df_winrate.to_csv(os.path.join(path, '02 Data','Prepared Data', 'champion_winrate.csv'), index=False)

# 6. Performance data for K'sante and region perfromance

#### As K'sante is a top lane position champion, i want to comapre how it's overall performance stack up against the top lane perfromance of 4 various region (LCK - Korea, LPL - China, LEC - Europe, LCS - NA)

In [35]:
# Create subset of for all 4 seperate region
df_lol_kr = df_lol_indi.loc[df_lol_indi['region'] == 'Korea']
df_lol_ch = df_lol_indi.loc[df_lol_indi['region'] == 'China']
df_lol_eu = df_lol_indi.loc[df_lol_indi['region'] == 'Europe']
df_lol_na = df_lol_indi.loc[df_lol_indi['region'] == 'North America']

In [38]:
# Filter out top lane position for all region
df_lol_kr_top = df_lol_kr.loc[df_lol_kr['position'] == 'top']
df_lol_ch_top = df_lol_ch.loc[df_lol_ch['position'] == 'top']
df_lol_eu_top = df_lol_eu.loc[df_lol_eu['position'] == 'top']
df_lol_na_top = df_lol_na.loc[df_lol_na['position'] == 'top']

In [49]:
# Store the min, max and average for 'kda', 'totalgold' & 'damageshare' stats for KR region
kr_kda_min = df_lol_na_top['kda'].min()
kr_kda_max = df_lol_na_top['kda'].max()
kr_kda_avg = df_lol_na_top['kda'].mean()
kr_gold_min = df_lol_na_top['totalgold'].min()
kr_gold_max = df_lol_na_top['totalgold'].max()
kr_gold_avg = df_lol_na_top['totalgold'].mean()
kr_dmg_min = df_lol_na_top['damageshare'].min()
kr_dmg_max = df_lol_na_top['damageshare'].max()
kr_dmg_avg = df_lol_na_top['damageshare'].mean()

In [52]:
# Store the min, max and average for 'kda', 'totalgold' & 'damageshare' stats for China region
ch_kda_min = df_lol_ch_top['kda'].min()
ch_kda_max = df_lol_ch_top['kda'].max()
ch_kda_avg = df_lol_ch_top['kda'].mean()
ch_gold_min = df_lol_ch_top['totalgold'].min()
ch_gold_max = df_lol_ch_top['totalgold'].max()
ch_gold_avg = df_lol_ch_top['totalgold'].mean()
ch_dmg_min = df_lol_ch_top['damageshare'].min()
ch_dmg_max = df_lol_ch_top['damageshare'].max()
ch_dmg_avg = df_lol_ch_top['damageshare'].mean()

In [54]:
# Store the min, max and average for 'kda', 'totalgold' & 'damageshare' stats for Europe region
eu_kda_min = df_lol_eu_top['kda'].min()
eu_kda_max = df_lol_eu_top['kda'].max()
eu_kda_avg = df_lol_eu_top['kda'].mean()
eu_gold_min = df_lol_eu_top['totalgold'].min()
eu_gold_max = df_lol_eu_top['totalgold'].max()
eu_gold_avg = df_lol_eu_top['totalgold'].mean()
eu_dmg_min = df_lol_eu_top['damageshare'].min()
eu_dmg_max = df_lol_eu_top['damageshare'].max()
eu_dmg_avg = df_lol_eu_top['damageshare'].mean()

In [56]:
# Store the min, max and average for 'kda', 'totalgold' & 'damageshare' stats for NA region
na_kda_min = df_lol_na_top['kda'].min()
na_kda_max = df_lol_na_top['kda'].max()
na_kda_avg = df_lol_na_top['kda'].mean()
na_gold_min = df_lol_na_top['totalgold'].min()
na_gold_max = df_lol_na_top['totalgold'].max()
na_gold_avg = df_lol_na_top['totalgold'].mean()
na_dmg_min = df_lol_na_top['damageshare'].min()
na_dmg_max = df_lol_na_top['damageshare'].max()
na_dmg_avg = df_lol_na_top['damageshare'].mean()

In [58]:
# Create list with country and top lane stats
top_stats_data = [["South Korea", kr_kda_min, kr_kda_max, kr_kda_avg, kr_gold_min, kr_gold_max, kr_gold_avg, kr_dmg_min, kr_dmg_max, kr_dmg_avg], 
                ["China", ch_kda_min, ch_kda_max, ch_kda_avg, ch_gold_min, ch_gold_max, ch_gold_avg, ch_dmg_min, ch_dmg_max, ch_dmg_avg], 
                ["United Kingdom", eu_kda_min, eu_kda_max, eu_kda_avg, eu_gold_min, eu_gold_max, eu_gold_avg, eu_dmg_min, eu_dmg_max, eu_dmg_avg], 
                ["United States of America", na_kda_min, na_kda_max, na_kda_avg, na_gold_min, na_gold_max, na_gold_avg, na_dmg_min, na_dmg_max, na_dmg_avg]]

In [60]:
# Create dataframe with country and top lane stats details
df_country_stats = pd.DataFrame(top_stats_data, columns=['country', 'kda_min', 'kda_max', 'kda_avg', 'gold_earned_min', 'gold_earned_max', 'gold_earned_avg', 'damageshare_min', 'damageshare_max', 'damageshare_avg'])

In [61]:
df_country_stats.head()

Unnamed: 0,country,kda_min,kda_max,kda_avg,gold_earned_min,gold_earned_max,gold_earned_avg,damageshare_min,damageshare_max,damageshare_avg
0,South Korea,0.0,16.0,3.826467,7296,20397,12690.639535,0.085048,0.414749,0.218811
1,China,0.0,22.0,4.238091,6646,22387,12752.092,0.070484,0.525338,0.221353
2,United Kingdom,0.0,22.0,4.66324,6526,20975,12324.254335,0.07278,0.463391,0.22705
3,United States of America,0.0,16.0,3.826467,7296,20397,12690.639535,0.085048,0.414749,0.218811


In [62]:
# Exporting df_country_stats as 'country_toplane_stats.csv'
df_country_stats.to_csv(os.path.join(path, '02 Data','Prepared Data', 'country_toplane_stats.csv'), index=False)

In [63]:
# Store the min, max and average for 'kda', 'totalgold' & 'damageshare' stats for K'sante champion
ksante_kda_min = df_ksante['kda'].min()
ksante_kda_max = df_ksante['kda'].max()
ksante_kda_avg = df_ksante['kda'].mean()
ksante_gold_min = df_ksante['totalgold'].min()
ksante_gold_max = df_ksante['totalgold'].max()
ksante_gold_avg = df_ksante['totalgold'].mean()
ksante_dmg_min = df_ksante['damageshare'].min()
ksante_dmg_max = df_ksante['damageshare'].max()
ksante_dmg_avg = df_ksante['damageshare'].mean()

In [65]:
# Create list with ksante stats
ksante_data = [["K'sante", ksante_kda_min, ksante_kda_max, ksante_kda_avg, ksante_gold_min, ksante_gold_max, ksante_gold_avg, ksante_dmg_min, ksante_dmg_max, ksante_dmg_avg]]

In [66]:
# Create dataframe with ksante stats details
df_ksante_stats = pd.DataFrame(ksante_data, columns=['champion', 'kda_min', 'kda_max', 'kda_avg', 'gold_earned_min', 'gold_earned_max', 'gold_earned_avg', 'damageshare_min', 'damageshare_max', 'damageshare_avg'])

In [68]:
# Exporting df_ksante_stats as 'ksante_stats.csv'
df_ksante_stats.to_csv(os.path.join(path, '02 Data','Prepared Data', 'ksante_stats.csv'), index=False)