In [1]:
from collections import deque
from time import time, sleep
import os
import json
import pickle
import requests
import numpy as np
import pandas as pd

API key는 https://developer.riotgames.com/apis 에서 발급 받을 수 있다. 발급 시점으로 부터 24시간 동안 유효하고, 만료 시 재발급 받아야 한다. 

In [2]:
api_key = 'RGAPI-********-****-****-****-************' # API 키를 입력

In [3]:
request_header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36 Edg/101.0.1210.32",
    "Accept-Language": "ko,en;q=0.9,en-US;q=0.8",
    "Accept-Charset": "application/x-www-form-urlencoded; charset=UTF-8",
    "Origin": "https://developer.riotgames.com",
    "X-Riot-Token": api_key
}

Riot API key는 1초당 최대 20회, 2분당 최대 100회까지 호출 가능하며 횟수를 초과할 시 에러가 발생한다.

에러를 방지하기 위해, api call이 발생할 때 마다 발생된 시각과 100회 이전의 api call이 발생한 시각을 비교하여 이들의 차이가 120초보다 작을 시, 120초보다 작은 만큼 대기 하는 방법을 사용하였다. (실제로는 오차가 발생할 수 있음을 감안하여 121초를 사용함)

In [4]:
class TimeSequence:

    def __init__(self, seq=deque([0]*100)):
        self.seq = seq
    
    
    def update(self):
        diff = time() - self.seq.popleft()
        
        if diff < 121:
            sleep(121-diff)
            
        self.seq.append(time())
        
        
Time = TimeSequence()

다음의 과정을 통해 원하는 정보들을 조회하였다.


1. 찾고자 하는 tier-division에 속해 있는 league를 조회
2. 1.에서 찾은 league들에 소속되어 있는 유저들의 summonerID를 조회
3. 2.에서 찾은 summonerID들의 puuid를 조회
4. 3.에서 찾은 puuid들의 matchID를 조회
5. 4.에서 찾은 matchID들의 timeline data를 조회
6. 4.에서 찾은 matchID들의 match data를 조회

분석 대상이 timeline data임에도 match data 파일을 다운 받은 이유는 해당 파일에 필요한 추가정보가 있어서 이다. (자세한 사항은 후술)

In [5]:
def get_leagueId(tier, division):
    url = f'https://kr.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{tier}/{division}?page=1'
    Time.update()
    league_entries = requests.get(url, headers=request_header).json()
    return list(set(pd.DataFrame(league_entries)['leagueId']))

def get_summonerId(leagueId):  
    url = f'https://kr.api.riotgames.com/lol/league/v4/leagues/{leagueId}'
    Time.update()
    league = requests.get(url, headers=request_header).json()
    return list(pd.DataFrame(league['entries'])['summonerId'])

def get_puuid(summonerId):
    url = f'https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}'
    Time.update()
    return requests.get(url, headers=request_header).json()['puuid']

def get_matchId(puuid, startTime=1651158000):
    url = f'https://asia.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?startTime={startTime}&type=ranked&start=0&count=100'
    Time.update()
    return requests.get(url, headers=request_header).json()

def get_timeline(matchId):
    url = f'https://asia.api.riotgames.com/lol/match/v5/matches/{matchId}/timeline'
    Time.update()
    return requests.get(url, headers=request_header).json()

def get_match(matchId):
    url = f'https://asia.api.riotgames.com/lol/match/v5/matches/{matchId}'
    Time.update()
    return requests.get(url, headers=request_header).json()

In [6]:
print(f'Searching league entries...')

start_time = time()

leagueId = get_leagueId('DIAMOND', 'I')

print(f'Total {len(leagueId)} league entries found. ({time()-start_time:.2f} sec)')

with open('data/leagueId', 'wb') as fp:
    pickle.dump(leagueId, fp)

Searching league entries...
Total 172 league entries found. (0.74 sec)


In [7]:
print(f'Searching summoner IDs...')

start_time = time()
summonerId = []

for Id in leagueId:
    summonerId += get_summonerId(Id)

summonerId = list(set(summonerId))

print(f'Total {len(summonerId)} summoner IDs found. ({(time()-start_time)/60:.2f} min)')

with open('data/summonerId', 'wb') as fp:
    pickle.dump(summonerId, fp)

Searching summoner IDs...


leagueId와 summonerId를 찾는 데에는 많은 시간이 소요되지 않으나, 이후 작업 부터는 필요한 api call 횟수가 크게 증가함에 따라 소요 시간이 길어졌다. Riot api를 사용하는 도중 각종 에러가 발생하였는데, 에러 발생 시 일정 시간 이후 다시 연결을 시도할 수 있게 하였고, 이를 통해서도 해결되지 않는 경우에 대비하여 다운받은 정보를 주기적으로 백업하는 과정을 추가하였다.

In [8]:
load = False

In [14]:
if load:
    with open('data/summonerId_temp', 'rb') as fp:
        summonerId = pickle.load(fp)
        
    with open('data/puuid_temp', 'rb') as fp:
        puuid = pickle.load(fp)

else:
    puuid = []

    
summonerId, puuid = deque(summonerId), deque(puuid)

c, end = 0, len(summonerId)
start_time = time()
num_api_call = 0


while c < end:
    try: 
        puuid.append(get_puuid(summonerId.popleft()))
        num_api_call += 1
        c += 1
        print(f'Total {c} puuids found.\r', end='')

        if num_api_call == 100:
            with open('data/summonerId_temp', 'wb') as fp:
                pickle.dump(summonerId, fp)
                
            with open('data/puuid_temp', 'wb') as fp:
                pickle.dump(puuid, fp)
                
            num_api_call = 0
    
    except KeyboardInterrupt:
        with open('data/summonerId_temp', 'wb') as fp:
                pickle.dump(summonerId, fp)
                
        with open('data/puuid_temp', 'wb') as fp:
                pickle.dump(puuid, fp)
        
        break

    except:
        print('Waiting for reconnection...\r', end='')
        sleep(120)
        num_api_call = 0

print(f'Total {len(puuid)} puuids found. ({(time()-start_time)/60:.2f} min)')

with open('data/puuid', 'wb') as fp:
    pickle.dump(puuid, fp)

Total 40 puuids found. (0.19 min)


In [15]:
load = False

In [17]:
if load:
    with open('data/matchId_temp', 'rb') as fp:
        matchId = pickle.load(fp)

    with open('data/puuid_temp', 'rb') as fp:
        puuid = pickle.load(fp)

else:
    puuid, matchId = deque(puuid), deque()
    
    
c, end = 0, len(puuid)
start_time = time()
num_api_call = 0


while c < end:
    try: 
        matchId.extend(get_matchId(puuid.popleft()))
        num_api_call += 1
        c += 1
        print(f'{c}/{end} puuids completed.\r', end='')

        if num_api_call == 100:
            with open('data/matchId_temp', 'wb') as fp:
                pickle.dump(matchId, fp)
                
            with open('data/puuid_temp', 'wb') as fp:
                pickle.dump(puuid, fp)
                
            num_api_call = 0
            
    except KeyboardInterrupt:
        with open('data/matchId_temp', 'wb') as fp:
            pickle.dump(matchId, fp)

        with open('data/puuid_temp', 'wb') as fp:
            pickle.dump(puuid, fp)
        
        break
    
    
    except:
        print('Waiting for reconnection...\r', end='')
        sleep(120)


matchId = list(set(matchId))
print(f'Total {len(matchId)} match IDs found. ({(time()-start_time)/60:.2f} min)')

with open('data/matchId', 'wb') as fp:
    pickle.dump(matchId, fp)

Total 3478 match IDs found. (0.19 min)


In [12]:
load = True

분석 대상이 게임 시작 후 15분 시점이므로 길이가 최소 16분인 게임들만 다운받기로 하였다.

In [None]:
if load:
    with open('data/matchId', 'rb') as fp:
        matchId = pickle.load(fp)
        
    with open('data/invalid', 'rb') as fp:
        invalid = pickle.load(fp)
    
    match = [i[:-5] for i in os.listdir('data/match')]
    matchId = deque(set(matchId) - set(match) - set(invalid))
    
else:
    invalid = []
    
    
c, end = 0, len(matchId)
start_time = time()


while c < end:
    try:
        fname = matchId.popleft()
        match = get_match(fname)
        pos = []
        na = 0

        if match['info']['gameDuration'] >= 960:

            win = match['info']['teams'][0]['win']

            for i in range(10):
                participants = match['info']['participants'][i]

                if participants['teamPosition'] == '':
                    na = 1

            if na:
                invalid.append(fname)

            else:
                with open(f'data/match/{fname}.json', 'w') as fp:
                    json.dump(match, fp, indent=4)

        else:
            invalid.append(fname)

        c += 1

        print(f'{c}/{end} match IDs completed.\r', end='')
    
    except KeyboardInterrupt:
        print('Stopped.')
        break
    
    except:
        print('Waiting for reconnection...\r', end='')
        sleep(120)
        

print(f'{c} match data downloaded. ({(time()-start_time)/60:.2f} min)')

with open('data/invalid', 'wb') as fp:
    pickle.dump(invalid, fp)

100/51914 match IDs completed.

In [None]:
files = deque(set(os.listdir('data/match')) - set(os.listdir('data/timeline')))
    
c, end = 0, len(files)
start_time = time()
    

while c < end:
    try:
        fname = files.popleft()[:-5]
        timeline = get_timeline(fname)
        
        with open(f'data/timeline/{fname}.json', 'w') as fp:
            json.dump(timeline, fp, indent=4)

        c += 1

        print(f'{c}/{end} match IDs completed.\r', end='')

    except KeyboardInterrupt:
        print('Stopped.')
        break
        
    except:
        print('Waiting for reconnection...\r', end='')
        sleep(120)

print(f'{c}/{end} timeline data downloaded. ({(time()-start_time)/60:.2f} min)')

Waiting for reconnection....

match 파일에서 이용할 정보 중에 `teamPosition` 이라는 항목이 있다. `teamPosition`은 해당 플레이어의 게임 내 포지션을 의미하는데, timeline 파일에서는 제공되지 않는 기능이다. 사실 timeline 파일에서 위치 정보, 스펠 및 아이템를 이용하여 각 플레이어의 포지션을 유추하는 것도 충분히 가능하나 (match 파일에서 제공되는 항목도 추론된 항목이다), match 파일의 정보를 이용하는 것이 더 효율적이라고 판단하였다.

일부 `teamPosition` 항목이 기재되지 않은 플레이어들이 존재하는 게임들이 있다. 해당 게임들을 찾아본 결과, 대부분 정상적으로 진행된 게임과 거리가 멀어보이는 것 같아 대상에서 제외하였다.

In [9]:
files = deque(set(os.listdir('data/match')) & set(os.listdir('data/timeline')))

gameInfo, gameResult = [], []
c, end = 0, len(files)
start_time = time()

for fname in files:
    try:
        match = json.load(open('data/match/' + fname))['info']
        pos = []
        na = 0

        win = match['teams'][0]['win']

        for i in range(10):
            participants = match['participants'][i]
            
            if participants['teamPosition'] == '':
                na = 1
                    
            pos.append((participants['teamPosition'], participants['championName']))
            
        if not na:
            gameInfo.append((fname, pos))
            gameResult.append(win)
        
        
    except KeyError:
        pass
    
    c += 1

    print(f'{c}/{end} match IDs completed. ({(time()-start_time)/60:.2f} min)\r', end='')

        
with open('data/gameInfo', 'wb') as fp:
    pickle.dump(gameInfo, fp)

with open('data/gameResult', 'wb') as fp:
    pickle.dump(gameResult, fp)

In [22]:
len(gameInfo)

70921

추가적인 feature engineering은 분석 및 모델링 단계에서 시행하기로 하였다.

In [2]:
data_dict = {}

for team in ['blue','red']:
    for loc in ['Outer', 'Inner', 'Base']:
        for lane in ['Top', 'Mid', 'Bot']:
            data_dict[f'{team}{loc}{lane}TurretLost'] = 0
    
    for loc in ['Inhibitor', 'NexusTurret']:
        data_dict[f'{team}{loc}Lost'] = 0
    
    for pos in ['Top', 'Jungle', 'Middle', 'Bottom', 'Utility']:
        for col in [
            'ChampionName', 'Kill', 'Death', 'Assist', 'SoloKill', 'SoloKillVictim', 'WardPlaced', 
            'WardKill', 'TotalDamageDoneToChampions', 'TotalDamageTaken', 'JungleMinionsKilled',  
            'Level', 'MinionsKilled', 'TotalGold'
        ]:
            data_dict[f'{team}{pos}{col}'] = 0
    
    for dragon in ['Air', 'Earth', 'Fire', 'Hextech', 'Water']:
        data_dict[f'{team}{dragon}DragonKill'] = 0
    
    data_dict[f'{team}AceKill'] = 0
    data_dict[f'{team}TotalDragonKill'] = 0
    data_dict[f'{team}HeraldKill'] = 0

data_dict[f'blueFirstBloodKill'] = 0
data_dict[f'FirstBloodKiller'] = 0
data_dict[f'FirstBloodVictim'] = 0

In [3]:
def BUILDING_KILL(event:dict):
    tname = 'blue' if event['teamId'] == 100 else 'red'

    if event['buildingType'] == 'TOWER_BUILDING':
        ttype = event['towerType'][:-7].title()
        ltype = event['laneType'][:3].title() if ttype != 'Nexus' else ''
            
        data[f'{tname}{ttype}{ltype}TurretLost'] += 1
        
    else:        
        data[f'{tname}InhibitorLost'] += 1

        
def CHAMPION_KILL(event:dict):
    killer = event['killerId']
    kpos, kteam = position[killer-1][0].title(), teams[killer//6]
    data[f'{kteam}{kpos}Kill'] += 1
    
    victim = event['victimId']
    vpos, vteam = position[victim-1][0].title(), teams[victim//6]
    data[f'{vteam}{vpos}Death'] += 1
    
    try:
        assist = event['assistingParticipantIds']
        
        for p in assist:
            ppos, pteam = position[p-1][0].title(), teams[p//6]
            data[f'{pteam}{ppos}Assist'] += 1
        
    except KeyError:
        data[f'{kteam}{kpos}SoloKill'] += 1
        data[f'{vteam}{vpos}SoloKillVictim'] += 1
        

def CHAMPION_SPECIAL_KILL(event:dict):
    if event['killType'] == 'KILL_ACE':
        tname = teams[event['killerId']//6]
        data[f'{tname}AceKill'] += 1
        

def ELITE_MONSTER_KILL(event:dict):
    tname = 'blue' if event['killerTeamId'] == 100 else 'red'
    mtype = event['monsterType'][-6:].title()
    
    if mtype == 'Dragon':
        stype = event['monsterSubType'][:-7].title()
        
        data[f'{tname}TotalDragonKill'] += 1
        data[f'{tname}{stype}DragonKill'] += 1
    
    else:
        data[f'{tname}HeraldKill'] += 1
        
        
def FIRST_BLOOD(event:dict):
    killer = event['killerId']
    kpos, kteam = position[killer-1][0].title(), teams[killer//6]
    
    if kteam == 'blue':
        data[f'blueFirstBloodKill'] += 1
    
    data[f'FirstBloodKiller'] = kpos
    
    victim = event['victimId']
    vpos = position[victim-1][0].title()
    data[f'FirstBloodVictim'] = vpos
    

def WARD_PLACED(event:dict):
    wtype = event['wardType']

    if wtype not in ['UNDEFINED', 'TEEMO_MUSHROOM']:
        cid = event['creatorId']
        cpos, cteam = position[cid-1][0].title(), teams[cid//6]

        data[f'{cteam}{cpos}WardPlaced'] += 1
        

def WARD_KILL(event:dict):
    wtype = event['wardType']

    if wtype not in ['UNDEFINED', 'TEEMO_MUSHROOM']:
        kid = event['killerId']
        kpos, kteam = position[kid-1][0].title(), teams[kid//6]

        data[f'{kteam}{kpos}WardKill'] += 1

In [4]:
load = True

In [5]:
if load:
    with open('data/gameInfo', 'rb') as fp:
        gameInfo = pickle.load(fp)
        
    with open('data/gameResult', 'rb') as fp:
        gameResult = pickle.load(fp)


teams = ['blue', 'red']

stats = []
c, end = 0, len(gameInfo)
start_time = time()

for info in gameInfo:
    fname, position = info
    file = json.load(open('data/timeline/' + fname))

    data = data_dict.copy()

    for idx, p in enumerate(position):
        pname, cname = p
        data[f'{teams[idx//5]}{pname.title()}ChampionName'] = cname


    frames = file['info']['frames'][:16]

    eventTypes = ['BUILDING_KILL', 'CHAMPION_KILL', 'CHAMPION_SPECIAL_KILL', 'ELITE_MONSTER_KILL', 
                   'WARD_PLACED', 'WARD_KILL']

    firstblood = 0

    for frame in frames:

        for event in frame['events']:
            eventType = event['type']

            if firstblood == 0 and eventType == 'CHAMPION_KILL':
                firstblood = 1
                FIRST_BLOOD(event)

            try:
                eval(eventType + '(event)')

            except NameError:
                pass


    cols = ['totalDamageDoneToChampions', 'totalDamageTaken', 'jungleMinionsKilled', 'level', 'minionsKilled', 'totalGold']

    pf = frame['participantFrames']

    for participant in pf.keys():
        p = int(participant)
        pos, team = position[p-1][0].title(), teams[p//6]

        for col in cols[:2]:
            cname = col[0].upper()+col[1:]
            data[f'{team}{pos}{cname}'] = pf[participant]['damageStats'][col]

        for col in cols[2:]:
            cname = col[0].upper()+col[1:]
            data[f'{team}{pos}{cname}'] = pf[participant][col]

    stats.append(data)
    c += 1
    print(f'{c}/{end} files completed. Time spent: {(time() - start_time)/60:.2f} min\r', end='')

    
stats = pd.concat([pd.DataFrame(stats), pd.DataFrame(gameResult, columns=['blueWin'])], axis=1)

70921/70921 files completed. Time spent: 40.79 min

70000개의 데이터를 train 90%, test 10% 비율로 분할하였다.

In [7]:
from sklearn.model_selection import train_test_split

In [25]:
train, test = train_test_split(stats[:70000], test_size=0.1, random_state=0)

train.to_csv('train.csv', index=False)
test.to_csv('test.csv', index=False)

timeline data에서 킬과 관련된 정보들만 따로 추출하였다.

In [3]:
with open('data/gameInfo', 'rb') as fp:
    gameInfo = pickle.load(fp)

In [13]:
load = True

In [15]:
def id2pos(idx):
    teams = ['blue', 'red']
    return teams[idx//6] + position[idx-1][0].title()

if load:
    with open('data/gameInfo', 'rb') as fp:
        gameInfo = pickle.load(fp)
        
    gameInfo, test = train_test_split(gameInfo[:70700], test_size=700, random_state=0)

    
kill = []
end = len(gameInfo)
start_time = time()
cols = ['gameIdx','assistingParticipantIds', 'killerId', 'loc_x', 'loc_y', 'timestamp', 'victimId']

for idx, info in enumerate(gameInfo):
    fname, position = info
    frames = json.load(open('data/timeline/' + fname))['info']['frames'][:16]
    
    for frame in frames:
        for event in frame['events']:
            event_type = event['type']
            
            if event_type == 'CHAMPION_KILL':
                event['killerId'], event['victimId'] = id2pos(event['killerId']), id2pos(event['victimId'])
                event['gameIdx'] = idx
                
                try:
                    event['assistingParticipantIds'] = [id2pos(pid) for pid in event['assistingParticipantIds']]
                    
                except KeyError:
                    event['assistingParticipantIds'] = np.NaN
                
                event['loc_x'], event['loc_y'] = event['position'].values()
                kill.append([event[col] for col in cols])
                
    print(f'{idx+1}/{end} files completed. Time spent: {(time() - start_time)/60:.2f} min\r', end='')

kill = pd.DataFrame(kill, columns=cols)
kill.to_csv('killLocation.csv', index=False)

70000/70000 files completed. Time spent: 33.77 min