# League of Legends data collection method



In [66]:
import requests
import time
import numpy
import csv
import config1

#Note: in file config1 should be list "API_KEYS" with api keys. (RiotAPI is kinda slow, so you should have more than 1 key)
api_key = config1.API_KEY # "INSERT YOUR RIOT DEVELOPMENT API KEY" 
region = 'europe'

## Getting match data
The following function is responsible for getting match data and checking if the match was a solo queue ranked game. If you want to try out different queues, change the queueId from 420 to a different one. If the match is ranked the function makes an api request to get the timeline data. Timeline data contains match states for each minute of the match.

In [70]:
k = 0
def get_match_data(region, match_id, api_key):
  global k
  while True:
    resp = requests.get("https://" + region + ".api.riotgames.com/lol/match/v5/matches/"+ match_id +"?api_key="+api_key)
    if resp.status_code == 429:
      time.sleep(10)
      k += 1
      k =k % len(config1.API_KEYS)
      time.sleep(10)
      api_key = config1.API_KEYS[k]
      print(f'sleeping w/ {api_key[-4:]}')
      continue
    elif resp.status_code == 200:
      break
    elif resp.status_code == 404:
      return 0
    else:
      print('response is bad: ' + str(resp.status_code))

  match_data = resp.json()
  if match_data['info']['queueId'] == 420:
    while True:
      resp_timeline = requests.get("https://" + region + ".api.riotgames.com/lol/match/v5/matches/" + match_id + "/timeline?api_key=" + api_key)
      if resp_timeline.status_code == 429:
        k += 1
        k =k % len(config1.API_KEYS)
        time.sleep(10)
        api_key = config1.API_KEYS[k]
        print(f'sleeping w/ {api_key[-4:]}')
        continue
      elif resp_timeline.status_code == 200:
        break
      elif resp.status_code == 404:
        return 0
      else:
        print('response is bad: ' + str(resp.status_code))

    timeline_data = resp_timeline.json()
    return timeline_data
    print('timeline data returned')

  else:
    print ('Match wasnt ranked')
    return 0

## Getting the match state at 15 minutes
The function first checks if the match was shorter than 15 minutes. If it was shorter than 15 minutes then it makes no sense to get the match data. If the game was longer than 15 minutes then it returns an array containing all the chosen relevant features.

In [48]:
def get_match_state(data):

  blue_team = data['info']['participants'][:5]
  red_team = data['info']['participants'][5:]

  blueTeamMinionsKilled = 0
  redTeamMinionsKilled = 0
  blueTeamJungleMinions = 0
  redTeamJungleMinions = 0
  blueTeamTotalGold = 0
  redTeamTotalGold = 0
  blueTeamXp = 0
  redTeamXp = 0
  blueTeamTotalDamageToChamps = 0
  redTeamTotalDamageToChamps = 0
  blueTeamWardsPlaced = 0
  redTeamWardsPlaced = 0
  blueTeamTotalKills = 0
  redTeamTotalKills = 0
  blueTeamControlWardsPlaced = 0
  redTeamControlWardsPlaced = 0
  blueTeamDragonKills = 0
  redTeamDragonKills = 0
  blueTeamHeraldKills = 0
  redTeamHeraldKills = 0
  blueTeamTowersDestroyed = 0
  redTeamTowersDestroyed = 0
  blueTeamInhibitorsDestroyed = 0
  redTeamInhibitorsDestroyed = 0
  blueTeamTurretPlatesDestroyed = 0
  redTeamTurretPlatesDestroyed = 0
  blueTeamFirstBlood = 0
  blueWin = 0

  if len(data['info']['frames']) < 16:
    return 0

  for player in blue_team:
    blueTeamMinionsKilled += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['minionsKilled']
    blueTeamTotalGold += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['totalGold']
    blueTeamXp += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['xp']
    blueTeamJungleMinions += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['jungleMinionsKilled']
    blueTeamTotalDamageToChamps += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['damageStats']['totalDamageDoneToChampions']

  for player in red_team:
    redTeamMinionsKilled += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['minionsKilled']
    redTeamTotalGold += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['totalGold']
    redTeamXp += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['xp']
    redTeamJungleMinions += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['jungleMinionsKilled']
    redTeamTotalDamageToChamps += data['info']['frames'][15]['participantFrames'][str(player['participantId'])]['damageStats']['totalDamageDoneToChampions']

  for event in data['info']['frames'][-1]['events']:
    if event['type'] == 'GAME_END':
      if event['winningTeam'] == 100:
        blueWin = 1

  for i in range(16):
    for event in data['info']['frames'][i]['events']:
      if event['type'] == 'WARD_PLACED':
        if event['creatorId'] in range(1,6):
          if event['wardType'] == 'CONTROL_WARD':
              blueTeamControlWardsPlaced += 1
          blueTeamWardsPlaced += 1
        else:
          if event['wardType'] == 'CONTROL_WARD':
            redTeamControlWardsPlaced += 1
          redTeamWardsPlaced += 1

      if event['type'] == 'CHAMPION_KILL':
        if event['killerId'] in range(1,6):
          blueTeamTotalKills += 1
        else:
          redTeamTotalKills += 1

      if event['type'] == 'ELITE_MONSTER_KILL':
        if event['monsterType'] == 'DRAGON':
          if event['killerId'] in range(1,6):
            blueTeamDragonKills += 1
          else:
            redTeamDragonKills += 1
        if event['monsterType'] == 'RIFTHERALD':
          if event['killerId'] in range(1,6):
            blueTeamHeraldKills += 1
          else:
            redTeamHeraldKills += 1

      if event['type'] == 'BUILDING_KILL' and event['buildingType'] == 'TOWER_BUILDING':
        if event['killerId'] in range(1,6):
          blueTeamTowersDestroyed += 1
        else:
          redTeamTowersDestroyed  += 1

      if event['type'] == 'BUILDING_KILL' and event['buildingType'] == 'INHIBITOR_BUILDING':
        if event['killerId'] in range(1,6):
          blueTeamInhibitorsDestroyed += 1
        else:
          redTeamInhibitorsDestroyed  += 1

      if event['type'] == 'TURRET_PLATE_DESTROYED':
        if event['killerId'] in range(1,6):
          redTeamTurretPlatesDestroyed += 1
        else:
          blueTeamTurretPlatesDestroyed  += 1

      if event['type'] == 'CHAMPION_SPECIAL_KILL' and event['killType'] == 'KILL_FIRST_BLOOD':
        if event['killerId'] in range(1,6):
          blueTeamFirstBlood = 1

  row = [[data['metadata']['matchId'], blueTeamControlWardsPlaced, blueTeamWardsPlaced, blueTeamTotalKills,blueTeamDragonKills,blueTeamHeraldKills,blueTeamTowersDestroyed,blueTeamInhibitorsDestroyed,
                       blueTeamTurretPlatesDestroyed,blueTeamFirstBlood,blueTeamMinionsKilled,blueTeamJungleMinions,blueTeamTotalGold,blueTeamXp,blueTeamTotalDamageToChamps, redTeamControlWardsPlaced,
                       redTeamWardsPlaced,redTeamTotalKills,redTeamDragonKills,redTeamHeraldKills,redTeamTowersDestroyed,redTeamInhibitorsDestroyed,redTeamTurretPlatesDestroyed,redTeamMinionsKilled,
                       redTeamJungleMinions,redTeamTotalGold,redTeamXp,redTeamTotalDamageToChamps,blueWin]]

  return row

  '''
  print("Control Wards: " + str(blueTeamControlWardsPlaced) + " : " + str(redTeamControlWardsPlaced))
  print("Wards: " + str(blueTeamWardsPlaced) + " : " + str(redTeamWardsPlaced))
  print("Total Kills: " + str(blueTeamTotalKills) + " : " + str(redTeamTotalKills))
  print("Dragons: " + str(blueTeamDragonKills) + " : " + str(redTeamDragonKills))
  print("Heralds: " + str(blueTeamHeraldKills) + " : " + str(redTeamHeraldKills))
  print("Towers: " + str(blueTeamTowersDestroyed) + " : " + str(redTeamTowersDestroyed))
  print("Inhibitors: " + str(blueTeamInhibitorsDestroyed) + " : " + str(redTeamInhibitorsDestroyed))
  print("Blue win: " + str(blueWin))
  print("Turret Plates: " + str(blueTeamTurretPlatesDestroyed) + " : " + str(redTeamTurretPlatesDestroyed))
  print("Blue first blood: " + str(blueTeamFirstBlood))
  print("Minions killed: " + str(blueTeamMinionsKilled) + " : " + str(redTeamMinionsKilled))
  print("Jungle minions killed: " + str(blueTeamJungleMinions) + " : " + str(redTeamJungleMinions))
  print("Total gold: " + str(blueTeamTotalGold) + " : " + str(redTeamTotalGold))
  print("Total Xp: " + str(blueTeamXp) + " : " + str(redTeamXp))
  print("Total damage: " + str(blueTeamTotalDamageToChamps) + " : " + str(redTeamTotalDamageToChamps))
  '''

## Getting the seed players
The process I used was to find a couple of players who spam a lot of ranked games recently in a desired rank. The loop then takes all of the players that participated in provided matches and puts them into a player_ids array. Around 300 seed matches is enough to yield 30 000 unique solo queue games in a desired rank. The result of this cell is a field containing all the players who participated in seed matches.

In [53]:
# player_ids = []
seed_match_ids = [6958778302, 6958623155, 6958344063, 6958379039, 6958447223, 6958507005, 6958815579, 6958794602, 6958744594, 6958665012, 6958262540, 6956731196, 6958829776, 6958421900, 6958734032, 6958507005, 6957546820
   # Populate this field with recent match ids from a desired rank
   # Rule of thumb: 100 seed matches will yield around 10 000 soloqueue games
  , 6958832944, 6958670455, 6957100312, 6958755009, 6958364051, 6958542269, 6958598414, 6958617736, 6958647754, 6958587185, 6958587185, 6958619610, 6958426836, 6958299521, 6958127241, 6958605571, 6958659823, 6958100884, 6958009709, 6958833836, 6958816933, 6958665012, 6958486644, 6958206832, 6958438964, 6958832865, 6958211129, 6956547501, 6958263339, 6958632091, 6958207021, 6952285263, 6958671632, 6958784898, 6958834571, 6957523075, 6958735730, 6958727694, 6958641864, 6958759279, 6958652351, 6958088093, 6957875530, 6958823762, 6956266617, 6958825107, 6958773627, 6958778302, 6958640796, 6957827847, 6958091869, 6958511452, 6958804394, 6958218138, 6958025176,
                  6958444650, 6958117067, 6958454952, 6958454952, 6958668546
]

i = 0

for seed_match_id in seed_match_ids:
  api_url = "https://" + region + ".api.riotgames.com/lol/match/v5/matches/EUW1_" + str(seed_match_id) + "?api_key="+ api_key

  while True:
    resp = requests.get(api_url)
    if resp.status_code == 429:
      time.sleep(10)
      print('sleeping')
      continue
    elif resp.status_code == 200:
      
      break
          
  match_data = resp.json()

  for participant in match_data['metadata']['participants']:
     if participant not in player_ids:
      player_ids = player_ids + [participant]
  print(len(player_ids))
  if len(player_ids) % 100 == 0:
    print(str(len(player_ids)))
    print(player_ids[-1])

sleeping
130
130
130
130
130
130
130
130
130
130
130
130
130
130
130
130
130
140
145
154
162
169
177
186
195
200
200
DzVc7gWSIFSfBWCxiSm_qFDl95IDBzuZt-5ciNiVyta7ejCH0P3NSFbD3lzLz_ojukPo1HY6SxeuJQ
205
205
214
221
229
236
239
248
258
266
275
283
283
292
302
310
320
325
329
335
340
348
354
360
367
369
375
379
387
396
403
413
418
427
436
446
454
460
460
467
473
481
485
490
497
503
505
512
517
517
521


## Getting the matches
for each player enumerated from the above cell, the following cell takes 100 most recent matches. You can play around with that number but since some players only play occassionaly taking more than 20 matches might give us some matches from a previous season or from a lower / higher than desired rank. But since i was looking for challenger games, i've put that number on 100.


In [54]:
# match_ids = []
for player in player_ids[1:]:
  api_url = "https://" + region + ".api.riotgames.com/lol/match/v5/matches/by-puuid/" + player + "/ids?start=0&count=100&api_key="+ api_key

  while True:
    resp = requests.get(api_url)
    if resp.status_code == 429:
      time.sleep(10)
      print('sleeping')
      continue
    elif resp.status_code == 200:
      break
    else:
      print(str(resp.status_code))

  player_matches = resp.json()
  for match_id in player_matches:
    if match_id not in match_ids:
      match_ids = match_ids + [match_id]

  if len(match_ids) % 100 == 0:
    print(str(len(match_ids)))

sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
17900
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
24400
28700
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
sleeping
34400


In [72]:
pi = 0
processed_matches = []
data = ["heading_row"]
for match_id in match_ids:
  if match_id in processed_matches:
    print('match_already_processed')
  else:
    processed_matches = processed_matches + [match_id]
    timeline_data = get_match_data(region,match_id,config1.API_KEYS[k])
    if timeline_data != 0:
      row = get_match_state(timeline_data)
      if row != 0:
        data = data + [row]
    if i % 100 == 0:
      print(str(i) + " : "  + str(match_id))
    if len(data) % 500 == 0:
      with open('match_data.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerows(data)
      print("match data written at checkpoint: " + str(len(data)))
    i += 1

0 : EUW1_6958829776
sleeping w/ cb4d
sleeping w/ e926
100 : EUW1_6954777188
sleeping w/ 228d
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
200 : EUW1_6955867362
sleeping w/ 8807
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
sleeping w/ fc41
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
300 : EUW1_6955496980
sleeping w/ cb4d
sleeping w/ e926
400 : EUW1_6956507712
sleeping w/ 228d
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
Match wasnt ranked
sleeping w/ 8807
Match wasnt ranke

## Getting the match data
the following cell takes each of the match_ids and gets the match state at 15 minutes. Every 500 processed matches it saves the dataset into a match_data.csv file.

In [None]:
with open('match_data.csv', 'w', newline='') as file:
  writer = csv.writer(file)
  writer.writerows(data[1:])

In [52]:
player_ids = player_ids
match_ids = match_ids