In [1]:
import numpy as np
import pandas as pd
from datetime import datetime as dt
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import leaguepedia_parser as lolp
import dataclasses

pd.set_option('display.min_rows', None)
plt.style.use("dark_background")

This notebook contains the code used to collect all of the data used in my analysis of League of Legends profesisonal gameplay meta. The main information used (the ban/pick and win/loss information of each game and when they were played) are all scraped from leaguepedia (now lol.fandom.com) using an API built for the website. This notebook is quite messy as it contains test code I used as I was learning the API and figuring out how to optimize my scraper. To suit the needs of this portfolio, output from the scrapers are limited because the original output can be very large (some outputs include information for every game from every tournament documented on the website).

In [2]:
# Learning the methods for this API
dir(lolp)

['__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 'get_all_team_assets',
 'get_game_details',
 'get_games',
 'get_long_team_name_from_trigram',
 'get_regions',
 'get_team_logo',
 'get_team_thumbnail',
 'get_tournaments',
 'parsers',
 'site',
 'transmuters']

In [3]:
lolp.get_tournaments()

[LeaguepediaTournament(name='LCK 2024 Season Opening', start='2024-01-09', end='2024-01-09', region='Korea', league=None, leagueShort=None, rulebook=None, tournamentLevel='Primary', isQualifier=True, isPlayoffs=True, isOfficial=True, overviewPage='LCK 2024 Season Opening'),
 LeaguepediaTournament(name='IWCT 2013', start='2013-08-21', end='2013-08-23', region='Wildcard', league='2014 International Wildcard Tournament', leagueShort=None, rulebook=None, tournamentLevel='Primary', isQualifier=True, isPlayoffs=True, isOfficial=True, overviewPage='Gamescom 2013/International Wildcard Tournament'),
 LeaguepediaTournament(name='IWCT 2014', start='2014-08-13', end='2014-09-01', region='Wildcard', league='2014 International Wildcard Tournament', leagueShort=None, rulebook=None, tournamentLevel='Primary', isQualifier=True, isPlayoffs=True, isOfficial=True, overviewPage='2014 Season International Wild Card Tournament'),
 LeaguepediaTournament(name='Brazil Regional Finals 2014', start='2014-07-18',

In [4]:
for a in lolp.get_tournaments():
    print(a.name)
    print(a.overviewPage)

LCK 2024 Season Opening
LCK 2024 Season Opening
IWCT 2013
Gamescom 2013/International Wildcard Tournament
IWCT 2014
2014 Season International Wild Card Tournament
Brazil Regional Finals 2014
CBLOL/2014 Season/Regional Finals
LAN 2015 Closing Cup
Latin America Cup 2015/LAN/Closing Cup/Regular Season
LAN 2015 Closing Cup Playoffs
Latin America Cup 2015/LAN/Closing Cup/Playoffs
LAN 2015 Closing Cup Promotion
Latin America Cup 2015/LAN/Closing Cup/Promotion
LAN 2015 Opening Cup
Latin America Cup 2015/LAN/Opening Cup/Regular Season
LAN 2015 Opening Cup Playoffs
Latin America Cup 2015/LAN/Opening Cup/Playoffs
LAN 2016 Closing Cup
Latin America Cup/LAN/2016 Season/Closing Cup/Regular Season
LAN 2016 Closing Cup Playoffs
Latin America Cup/LAN/2016 Season/Closing Cup/Playoffs
LAN 2016 Closing Cup Promotion
Latin America Cup/LAN/2016 Season/Closing Cup/Promotion
LAN 2016 Opening Cup
Latin America Cup/LAN/2016 Season/Opening Cup/Regular Season
LAN 2016 Opening Cup Playoffs
Latin America Cup/LAN/2

In [5]:
# There are no listed entries of any games for this particular tournament, and possibly others as well.
lolp.get_games("Intel Arabian Cup 2020/United Arab Emirates/Split 1")

[]

In [6]:
tournaments = lolp.get_tournaments()
for tournament in tournaments:
    if lolp.get_games(tournament.name) != []:
        print(tournament.name)
        print(tournament.overviewPage)
        print(lolp.get_games(tournament.name))
        print("------------------------------------------------------")

Intel Arabian Cup 2020
Intel Arabian Cup 2020
[LolGame(sources=EmptyDataclass(), duration=1524, start='2020-10-01T15:29:00+00:00', creation=None, type=None, queue_id=None, patch='10.20', gameVersion=None, winner='RED', teams=LolGameTeams(BLUE=LolGameTeam(bans=[223, 777, 84, 201, 235], players=[LolGamePlayer(primaryRuneTreeId=None, secondaryRuneTreeId=None, championId=54, id=None, inGameName=None, profileIconId=None, role=None, sources=EmptyDataclass(), runes=[], summonerSpells=[], endOfGameStats=None, snapshots=[], itemsEvents=[], wardsEvents=[], skillsLevelUpEvents=[], largeMonstersKills=[], levelUpEvents=[], spellsUses=[], specialKills=[]), LolGamePlayer(primaryRuneTreeId=None, secondaryRuneTreeId=None, championId=120, id=None, inGameName=None, profileIconId=None, role=None, sources=EmptyDataclass(), runes=[], summonerSpells=[], endOfGameStats=None, snapshots=[], itemsEvents=[], wardsEvents=[], skillsLevelUpEvents=[], largeMonstersKills=[], levelUpEvents=[], spellsUses=[], specialKil

In [7]:
# Checking the structure of data that is returned for each get_games instance
details = lolp.get_games("Intel Arabian Cup 2020")
test_dict = dataclasses.asdict(details[0])
test_dict

{'sources': {},
 'duration': 1524,
 'start': '2020-10-01T15:29:00+00:00',
 'creation': None,
 'type': None,
 'queue_id': None,
 'patch': '10.20',
 'gameVersion': None,
 'winner': 'RED',
 'teams': {'BLUE': {'bans': [223, 777, 84, 201, 235],
   'players': [{'primaryRuneTreeId': None,
     'secondaryRuneTreeId': None,
     'championId': 54,
     'id': None,
     'inGameName': None,
     'profileIconId': None,
     'role': None,
     'sources': {},
     'runes': [],
     'summonerSpells': [],
     'endOfGameStats': None,
     'snapshots': [],
     'itemsEvents': [],
     'wardsEvents': [],
     'skillsLevelUpEvents': [],
     'largeMonstersKills': [],
     'levelUpEvents': [],
     'spellsUses': [],
     'specialKills': []},
    {'primaryRuneTreeId': None,
     'secondaryRuneTreeId': None,
     'championId': 120,
     'id': None,
     'inGameName': None,
     'profileIconId': None,
     'role': None,
     'sources': {},
     'runes': [],
     'summonerSpells': [],
     'endOfGameStats': No

In [8]:
# Test printing relevant information
print(test_dict["patch"])
print(test_dict["winner"])
print(test_dict["teams"]["BLUE"]["bans"])
for i in range(5):
    print(test_dict["teams"]["BLUE"]["players"][i]["championId"])
print(test_dict["teams"]["RED"]["bans"])
for i in range(5):
    print(test_dict["teams"]["RED"]["players"][i]["championId"])

10.20
RED
[223, 777, 84, 201, 235]
54
120
245
360
412
[98, 8, 58, 42, 61]
39
876
236
51
25


In [9]:
# Returns relevant information for each game of a tournament
def list_banpicks(tournament):
    details = lolp.get_games(tournament)
    for i in range(len(details)):
        cur_dict = dataclasses.asdict(details[i])
        print(cur_dict["patch"])
        print(cur_dict["winner"])
        print(cur_dict["teams"]["BLUE"]["bans"])
        for i in range(5):
            print(cur_dict["teams"]["BLUE"]["players"][i]["championId"])
        print(cur_dict["teams"]["RED"]["bans"])
        for i in range(5):
            print(cur_dict["teams"]["RED"]["players"][i]["championId"])
        print("---------------------------------------------------")

In [10]:
# Testing
list_banpicks("2023 Season World Championship/Main Event")

13.19
BLUE
[115, 518, 429, 85, 53]
126
113
61
22
888
[51, 78, 268, 59, 150]
897
64
134
523
526
---------------------------------------------------
13.19
BLUE
[427, 429, 268, 68, 103]
24
163
897
498
12
[61, 7, 57, 111, 526]
887
59
134
81
497
---------------------------------------------------
13.19
BLUE
[134, 429, 57, 68, 888]
897
254
103
145
12
[78, 497, 61, 163, 7]
24
64
518
498
875
---------------------------------------------------
13.19
RED
[429, 163, 85, 518, 58]
86
254
69
498
497
[122, 61, 57, 268, 2]
516
59
103
145
12
---------------------------------------------------
13.19
BLUE
[51, 59, 64, 254, 68]
58
57
61
119
888
[518, 429, 897, 24, 555]
126
526
268
110
12
---------------------------------------------------
13.19
BLUE
[518, 498, 526, 58, 897]
68
78
268
145
89
[57, 61, 254, 111, 53]
516
163
126
429
12
---------------------------------------------------
13.19
BLUE
[78, 429, 64, 12, 234]
24
59
268
498
497
[61, 68, 57, 134, 518]
58
163
126
81
526
-------------------------------

In [11]:
# Determining how many tournaments listed on the website are missing key identifying information
count=0
tournaments = lolp.get_tournaments()
for tournament in tournaments:
    if len(lolp.get_games(tournament.overviewPage)) != 0:
        count+=1

print(count)

781


In [12]:
# Find all tournaments without missing names
tournaments = lolp.get_tournaments()
for tournament in tournaments:
    if len(lolp.get_games(tournament.name)) != 0:
        print(tournament.name)

Intel Arabian Cup 2020
Battle of the Atlantic 2013
Demacia Cup 2019
Demacia Cup 2020
Demacia Cup 2021
Demacia Cup 2022
Demacia Cup 2023
DreamHack Summer 2012
World Cyber Arena 2015


In [13]:
# Collect all available game data
t_with_data = []
tournaments = lolp.get_tournaments()
for tournament in tournaments:
    if len(lolp.get_games(tournament.overviewPage)) != 0:
        t_with_data.append(tournament.overviewPage)
print(t_with_data)

['Gamescom 2013/International Wildcard Tournament', '2014 Season International Wild Card Tournament', 'CBLOL/2014 Season/Regional Finals', 'Latin America Cup 2015/LAN/Closing Cup/Regular Season', 'Latin America Cup 2015/LAN/Closing Cup/Playoffs', 'Latin America Cup 2015/LAN/Closing Cup/Promotion', 'Latin America Cup 2015/LAN/Opening Cup/Regular Season', 'Latin America Cup 2015/LAN/Opening Cup/Playoffs', 'Latin America Cup/LAN/2016 Season/Closing Cup/Regular Season', 'Latin America Cup/LAN/2016 Season/Closing Cup/Playoffs', 'Latin America Cup/LAN/2016 Season/Closing Cup/Promotion', 'Latin America Cup/LAN/2016 Season/Opening Cup/Regular Season', 'Latin America Cup/LAN/2016 Season/Opening Cup/Playoffs', 'Latin America Cup/LAN/2016 Season/Opening Cup/Promotion', 'IEM Season 11/Challenger', 'Intel Arabian Cup 2020', '2015 International Wildcard Tournament/Chile', '2015 International Wildcard Tournament/Turkey', 'Riot Latin America Cup 2014/Colombia', 'Latin America Cup 2015/Closing Cup/Gran

In [14]:
list_banpicks('Season 3 World Championship')

None
RED
[30, 60, 4]
58
59
79
81
412
[61, 37, 28]
98
254
238
42
9
---------------------------------------------------
None
BLUE
[105, 85, 98]
60
64
103
67
143
[61, 51, 238]
54
254
50
42
412
---------------------------------------------------
None
RED
[83, 61, 58]
68
60
105
42
37
[238, 98, 412]
85
266
103
51
143
---------------------------------------------------
None
BLUE
[37, 61, 98]
154
60
103
67
143
[85, 4, 238]
58
59
105
42
412
---------------------------------------------------
None
BLUE
[254, 412, 98]
58
64
61
42
143
[238, 103, 105]
85
59
79
51
267
---------------------------------------------------
None
BLUE
[38, 22, 103]
75
254
61
42
143
[98, 238, 4]
83
60
127
51
37
---------------------------------------------------
None
BLUE
[59, 64, 412]
58
266
103
42
37
[98, 238, 61]
154
56
131
81
9
---------------------------------------------------
None
BLUE
[85, 42, 143]
27
59
13
51
412
[238, 64, 67]
98
254
103
81
37
---------------------------------------------------
None
RED
[38, 103, 

In [15]:
dataclasses.asdict(lolp.get_games('Season 3 World Championship')[0])["teams"]["RED"]["bans"]

[61, 37, 28]

In [16]:
# Function that extracts all relevant information for each game
def store_banpicks(tournament):

    tournament_name = []
    date = []
    duration = []
    
    game_id = []
    champion_id = []
    patch = []
    side = []
    ban_pick = []
    win_lose = []
    details = lolp.get_games(tournament)
    for i in range(len(details)):
        cur_dict = dataclasses.asdict(details[i])
        
        # Blue side bans
        for j in range(len(cur_dict["teams"]["BLUE"]["bans"])):
            game_id.append(i)
            tournament_name.append(tournament)
            date.append(cur_dict["start"][:10])
            duration.append(cur_dict["duration"])
            patch.append(cur_dict["patch"])
            
            champion_id.append(cur_dict["teams"]["BLUE"]["bans"][j])
            side.append("BLUE")
            ban_pick.append("BAN")
            if cur_dict["winner"] == "BLUE":
                win_lose.append("WIN")
            else:
                win_lose.append("LOSE")
                
        # Red side bans
        for k in range(len(cur_dict["teams"]["RED"]["bans"])):
            game_id.append(i)
            tournament_name.append(tournament)
            date.append(cur_dict["start"][:10])
            duration.append(cur_dict["duration"])
            patch.append(cur_dict["patch"])
            
            champion_id.append(cur_dict["teams"]["RED"]["bans"][k])
            side.append("RED")
            ban_pick.append("BAN")
            if cur_dict["winner"] == "RED":
                win_lose.append("WIN")
            else:
                win_lose.append("LOSE")
                
        # Blue side picks
        for l in range(5):
            game_id.append(i)
            tournament_name.append(tournament)
            date.append(cur_dict["start"][:10])
            duration.append(cur_dict["duration"])
            patch.append(cur_dict["patch"])
            
            champion_id.append(cur_dict["teams"]["BLUE"]["players"][l]["championId"])
            side.append("BLUE")
            ban_pick.append("PICK")
            if cur_dict["winner"] == "BLUE":
                win_lose.append("WIN")
            else:
                win_lose.append("LOSE")

        # Red side picks
        for m in range(5):
            game_id.append(i)
            tournament_name.append(tournament)
            date.append(cur_dict["start"][:10])
            duration.append(cur_dict["duration"])
            patch.append(cur_dict["patch"])
            
            champion_id.append(cur_dict["teams"]["RED"]["players"][m]["championId"])
            side.append("RED")
            ban_pick.append("PICK")
            if cur_dict["winner"] == "RED":
                win_lose.append("WIN")
            else:
                win_lose.append("LOSE")
        
    df = pd.DataFrame({"champion_id": champion_id, "patch": patch, "side": side, 
                        "ban_pick": ban_pick, "win_lose": win_lose, "tournament": tournament_name, 
                        "game_id": game_id, "date": date, "duration": duration})
        
    return df

In [17]:
# Testing 
df_worlds_s3 = store_banpicks('Season 3 World Championship')
df_worlds_s3

Unnamed: 0,champion_id,patch,side,ban_pick,win_lose,tournament,game_id,date,duration
0,30,,BLUE,BAN,LOSE,Season 3 World Championship,0,2013-09-15,2221.0
1,60,,BLUE,BAN,LOSE,Season 3 World Championship,0,2013-09-15,2221.0
2,4,,BLUE,BAN,LOSE,Season 3 World Championship,0,2013-09-15,2221.0
3,61,,RED,BAN,WIN,Season 3 World Championship,0,2013-09-15,


In [28]:
# Storing all relevant data in a single dataframe
df = pd.DataFrame()
for t in t_with_data:
    cur_df = store_banpicks(t)
    df = pd.concat([df, cur_df])
df.reset_index(inplace=True)
df.drop("index", axis=1, inplace=True)

In [29]:
# Download up-to-date champion ID information from online
# can switch to show updated version
version = "13.11.1"
version_2 = "13.23.1"
champion_list = pd.read_json(f"https://ddragon.leagueoflegends.com/cdn/{version_2}/data/en_US/champion.json")

champion_list.drop(["type", "format", "version"], axis=1, inplace=True)
champion_list["id"] = ""

for champ in champion_list.index:
    champion_list["id"][champ] = int(champion_list.loc[champ, "data"]["key"])

champion_list.drop("data", axis=1, inplace=True)
champion_list = champion_list.reset_index().rename(columns={"index": "Champion"})
# champion_list.to_csv("Champ_IDs.csv")

In [30]:
df.patch.unique()

array([None, '5.9', '5.10', '5.11', '5.12', '5.13', '5.2', '5.3', '5.5',
       '6.10', '6.11', '6.12', '6.13', '6.15', '6.6', '6.1', '6.2', '6.3',
       '6.4', '6.5', '6.21', '10.20', '10.21', '10.22', '5.15', '5.14',
       '5.6', '3.9', '8.1', '8.2', '8.3', '8.4', '8.13', '8.15', '9.22',
       '5.7', '6.7', '8.10', '5.21', '4.21', '5.1', '5.24', '5.20', '6.8',
       '7.1', '7.2', '7.3', '7.4', '7.5', '7.10', '7.11', '7.12', '7.13',
       '7.14', '7.16', '7.15', '9.1', '9.2', '9.3', '9.4', '9.5', '9.6',
       '14.1', '14.2', '12.1', '12.2', '12.3', '12.4', '12.5', '12.10',
       '12.11', '12.12', '12.13', '12.14', '13.1', '13.3', '13.4', '13.5',
       '13.11', '13.12', '13.13', '13.14', '13.15', '8.5', '8.11', '8.12',
       '8.14', '8.7', '8.16', '9.10', '9.11', '9.12', '9.13', '9.14',
       '9.15', '9.16', '10.1', '10.2', '10.4', '10.5', '10.7', '10.8',
       '10.11', '10.12', '10.13', '10.14', '10.15', '10.16', '11.1',
       '11.2', '11.3', '11.4', '11.5', '11.6', '11.11

In [31]:
# Some tournaments are missing patch information. The game has constant updates that change the meta,
# so identifying the right pacth each game is played on is crucial.
df[df["patch"].isnull()==True].groupby(
    ["tournament"])["game_id"].count().reset_index()

Unnamed: 0,tournament,game_id
0,2012 MLG Pro Circuit/Fall/Championship,560
1,2014 GPL Spring,1440
2,2014 GPL Summer,1488
3,2014 GPL Winter,1504
4,2014 Season Garena Regional Finals,192
5,2014 Season International Wild Card Tournament,192
6,2014 Season Korea Regional Finals,208
7,2015 Demacia Cup/Spring Season,1664
8,2015 Demacia Cup/Summer Season,1504
9,2015 GPL Spring,2304


In [None]:
# I tried to find out what patch each tournament was played on for ones with missing patch info.
# Most were part of ongoing seasons that spanned multiple patches while the rest did not have available 
# info. For these cases, I have decided to automatically fill in patch info using the date of the game.

In [32]:
# The code commented out below cleans the patch notes information. Just use cleaned csv unless new
# patches are needed.
patch_notes = pd.read_csv("lol_patch_notes.csv")
# for i in range(185):
#     patch_notes.Date[i] = patch_notes.Date[i][:-4]
# for i in range(185, 321):
#     patch_notes.Date[i] = patch_notes.Date[i][:-7]
# for i in range(321):
#     patch_notes.Date[i] = patch_notes.Date[i].replace('\n', ' ').strip()
# patch_notes.Date[257] = "June 17 2012"

# for converting str to datetime, pd.to_datetime converts to timestamp obj, which is faster 
# (and better than converting to dt.datetime obj)
# for i in range(321):
#     patch_notes.Date[i] = dt.strptime(patch_notes.Date[i], "%B %d %Y").date()
    
# patch_notes.Date = pd.to_datetime(patch_notes.Date, format="%B %d %Y")
patch_notes

Unnamed: 0,index,Date,Season,Patch,New_Champs,Note
0,0,12/6/2023,Season thirteen,13.24,Hwei,Winterblessed 2023 skins. Magicae Prismatica s...
1,1,11/21/2023,Season thirteen,13.23,,Bees 2023 skins. Various balance changes.
2,2,11/8/2023,Season thirteen,13.22,,HEARTSTEEL skins. True Damage 2023 skin. Vario...
3,3,10/25/2023,Season thirteen,13.21,,2023 Cafe Cuties skins. Return of Nexus Blitz....
4,4,10/11/2023,Season thirteen,13.2,,Visua


In [None]:
df.date = pd.to_datetime(df.date)

In [None]:
# Info on the patch of tournaments that I found online.
update_patch = pd.DataFrame({"tournament": 
                              ['2012 MLG Pro Circuit/Fall/Championship',
                                '2014 Season Garena Regional Finals',
                                '2014 Season International Wild Card Tournament',
                                '2015 GPL Summer Playoffs',
                                'Battle of the Atlantic 2013',
                                'Gamescom 2013/International Wildcard Tournament',
                                'Latin America Cup 2015/LAN/Closing Cup/Promotion',
                                'Season 3 China Regional Finals',
                                'Season 3 Korea Regional Finals',
                                'Season 3 Latin America Regional Finals',
                                'Season 3 World Championship'], 
                              "patch": 
                              ['1.0.0.148',
                                '4.13',
                                '4.13',
                                '5.14',
                                '3.14',
                                '3.1',
                                '5.12',
                                '3.9',
                                '3.9',
                                '3.9',
                                '3.11']})
update_patch.to_csv("leaguepedia_some_missing_patch_info.csv")

In [None]:
df3 = pd.DataFrame(df.groupby("patch")["game_id"].count()).reset_index()
for i in range(len(df3)):
    df3.loc[i, "patch"] = str(df3.loc[i, "patch"]) 

In [None]:
pd.DataFrame(df.groupby("patch")["game_id"].count()).reset_index()

In [None]:
# update main df patch info with info found online.
df2 = df.copy()
for i in df2[df2["patch"].isnull()==True].index:
    for j in range(len(update_patch)):
        if df2.loc[i, "tournament"] == update_patch.loc[j, "tournament"]:
            df2.loc[i, "patch"] = update_patch.loc[j, "patch"]

In [None]:
# update main df patch info by comparing game dates with patch dates (not necessarily accurate).

# took 34 minutes to execute
for i in df2[df2["patch"].isnull()==True].index:
    for j in range(len(patch_notes)):
        if df2.date[i] >= patch_notes.Date[j]:
            df2.loc[i, "patch"] = patch_notes.loc[j, "Patch"]
            break

In [None]:
# drop rows with missing champion_id info (==0)
df2.drop(df2[df2["champion_id"]==0].index, axis=0, inplace=True)
df2.reset_index(inplace=True)
df2.drop(["index"], axis=1, inplace=True)

In [None]:
# df2.to_csv("pick_bans_updated.csv")

In [None]:
# Update the final pick_bans list
df_update = pd.DataFrame()
for t in t_with_data:
    if t not in df["tournament"].unique():
        cur_df = store_banpicks(t)
        df_update = pd.concat([df_update, cur_df])
df_update.reset_index(inplace=True)
df_update.drop("index", axis=1, inplace=True)

In [None]:
# if champion_id == 0 for BAN, it means the team did not ban a champion for that slot.
df_update[df_update["champion_id"]==0]
df_update.drop(df_update[df_update["champion_id"]==0].index, axis=0, inplace=True)
df_update.reset_index(inplace=True)
df_update.drop(["index"], axis=1, inplace=True)
# df_update.to_csv("pick_bans_updated_2023nov26.csv")