In [5]:
import requests
import os
import json
import joblib
import pandas as pd
import time
from math import ceil
from dotenv import load_dotenv
from tqdm import tqdm

In [6]:
DATA_DIR = './data/'

In [7]:
def getQuery(query, params=None):
    header = {"Authorization": f'Bearer {token}'}
    publicURL = 'https://www.warcraftlogs.com/api/v2/client'
    
    if params is None:
        response = requests.get(url=publicURL, headers=header, params={'query':query})
    else:
        data = {'query':query, 'variables':params}
        response = requests.get(url=publicURL, headers=header, json=data)
    return response

In [8]:
def getRateLimitData():
    query = """query{
        rateLimitData{
            limitPerHour
            pointsSpentThisHour
            pointsResetIn
        }
    }"""
    
    response = getQuery(query)
    return response.json()['data']['rateLimitData']

In [9]:
def getReports(zoneID, max_page=1):
    """max_page = number of pages to retrieve (maximum 25 currently)
       max_page = -1 to retrieve all pages"""
    assert max_page == -1 or max_page in range(1, 26)
            
    query = """query($zone:Int, $page:Int){
        reportData{
            reports(zoneID:$zone, page:$page){
                has_more_pages
                data{
                    code
                    startTime
                }
            }
        }
    }"""
    
    reports = list()
    startTimes = list()
 
    if max_page == -1:
        page = 1
        has_more_pages = True
        while has_more_pages:
            response = getQuery(query, params={'zone':zoneID, 'page':page})
            response = response.json()['data']['reportData']['reports']
            has_more_pages = response['has_more_pages']
            data = response['data']
            reports += [report['code'] for report in data]
            startTimes += [report['startTime'] for report in data]
            page += 1
    else:
        for page in range(1, max_page+1):
            response = getQuery(query, params={'zone':zoneID, 'page':page})
            response = response.json()['data']['reportData']['reports']
            if not response['has_more_pages']:
                break
            data = response['data']
            reports += [report['code'] for report in data]
            startTimes += [report['startTime'] for report in data]
    
    dates = [str(pd.to_datetime(time, unit='ms', utc=True).tz_convert('US/Eastern').date()) for time in startTimes]
    
    return dict(zip(reports, dates))

In [10]:
def getReportDate(reportID):
    """Returns Pandas date of Report Start in US/Eastern Time"""
    query = """query($reportID:String){
        reportData{
            report(code:$reportID){
                startTime
            }
        }
    }"""
    
    response = getQuery(query, params={'reportID':reportID})
    startTime = response.json()['data']['reportData']['report']['startTime']
    datetime = pd.to_datetime(startTime, unit='ms', utc=True).tz_convert('US/Eastern')
    
    return str(datetime.date())

In [11]:
def getFights(reportID, includeTrash=False):
    query = """query($reportID:String){
        reportData{
            report(code:$reportID){
                fights{
                    difficulty
                    encounterID
                    id
                    kill
                }
            }
        }
    }"""
    
    response = getQuery(query, params={'reportID':reportID})
    fights = response.json()['data']['reportData']['report']['fights']
    
    if not includeTrash:
        fights = [fight for fight in fights if fight['encounterID'] in bosses.values()]
        
    return fights

In [12]:
def getFightsParallel(reportID, includeTrash=False):
    fights = getFights(reportID, includeTrash)
    for fight in fights:
        fight['reportID'] = reportID
        
    return fights

In [22]:
def getFightTables(reportID, fightID):
    #duration, damage done, healing done, etc
    query = """query($reportID:String, $fightIDs:[Int]){
        reportData{
            report(code:$reportID){
                table(fightIDs:$fightIDs)
            }
        }
    }"""
    
    response = getQuery(query, params={'reportID':reportID, 'fightIDs':[fightID]})
    
    tables = response.json()['data']['reportData']['report']['table']['data']
    tables['reportID'] = reportID
    tables['fightID'] = fightID
    
    del tables['damageTaken']
    del tables['playerDetails']
    del tables['logVersion']
    del tables['gameVersion']
    
    return tables

In [29]:
def transformFightTable(table):    
    damageDone = {player['name']:player['total'] for player in table['damageDone']}
    healingDone = {player['name']:player['total'] for player in table['healingDone']}
    
    deaths = {}
    for death in table['deathEvents']:
        deaths[death['name']] = deaths.get(death['name'], 0) + 1
    
    output = list()
    for player in table['composition']:
        try:
            spec = player['specs'][0]['spec']
            role = player['specs'][0]['role']
        except:
            spec = ""
            role = ""
            
        output.append({'awda'
           'Report ID':table['reportID'],
           'Fight ID':table['fightID'],
           'Name':player['name'],
           'Class':player['type'],
           'Specialization':spec,
           'Role':role,
           'Damage Done':damageDone.get(player['name'], 0),
           'Healing Done':healingDone.get(player['name'], 0),
           'Death Count':deaths.get(player['name'], 0)   
        })
    
    return output

In [15]:
load_dotenv()
CLIENT_ID = os.environ['CLIENT_ID']
CLIENT_SECRET = os.environ['CLIENT_SECRET']

In [16]:
auth=(CLIENT_ID, CLIENT_SECRET)
data={'grant_type':'client_credentials'}
url = 'https://www.warcraftlogs.com/oauth/token'
response = requests.post(url=url, data=data, auth=auth)
token = response.json().get('access_token')

In [17]:
zoneID = 42
bosses = {
    'Vexie and the Geargrinders':3009,
    'Cauldron of Carnage':3010,
    'Rik Reverb':3011,
    'Stix Bunkjunker':3012,
    'Sprocketmonger Lockenstock':3013,
    'One-Armed Bandit':3014,
    "Mug'Zee, Heads of Security":3015,
    'Chrome King Gallywix':3016
}
difficulties = {
    'LFR':1,
    'Normal':3,
    'Heroic':4,
    'Mythic':5
}

# Report IDs

In [83]:
reports = getReports(zoneID=zoneID, max_page=-1)
ptrReports = list(reports.keys())

In [88]:
with open(DATA_DIR+'ptrReports.csv', 'w') as file:
    file.write('Report ID, Date')
    file.write('\n')
    for report, date in reports.items():
        file.write(report)
        file.write(',')
        file.write(date)
        file.write('\n')

# Fight Metadata

In [14]:
print(f'Total Tasks: {len(ptrReports)}')

all_jobs = [joblib.delayed(getFightsParallel)(reportID) for reportID in ptrReports]
results = joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=10)(all_jobs)

fightInfo = [fight for report in results for fight in report]

Total Tasks: 766


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:    2.6s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:    3.0s
[Parallel(n_jobs=24)]: Done  24 tasks      | elapsed:    3.4s
[Parallel(n_jobs=24)]: Done  37 tasks      | elapsed:    4.6s
[Parallel(n_jobs=24)]: Done  50 tasks      | elapsed:    4.9s
[Parallel(n_jobs=24)]: Done  65 tasks      | elapsed:    6.2s
[Parallel(n_jobs=24)]: Done  80 tasks      | elapsed:    7.2s
[Parallel(n_jobs=24)]: Done  97 tasks      | elapsed:    7.9s
[Parallel(n_jobs=24)]: Done 114 tasks      | elapsed:    9.4s
[Parallel(n_jobs=24)]: Done 133 tasks      | elapsed:   10.6s
[Parallel(n_jobs=24)]: Done 152 tasks      | elapsed:   11.9s
[Parallel(n_jobs=24)]: Done 173 tasks      | elapsed:   13.5s
[Parallel(n_jobs=24)]: Done 194 tasks      | elapsed:   15.4s
[Parallel(n_jobs=24)]: Done 217 tasks      | elapsed:   17.0s
[Parallel(n_jobs=24)]: Done 240 tasks      | elapsed:  

In [15]:
with open(DATA_DIR+'fightMetadata.csv', 'w') as file:
    file.write(','.join(['Difficulty', 'Encounter ID', 'Fight ID', 'Killed', 'Report ID']))
    file.write('\n')
    for fight in fightInfo:
        file.write(','.join([str(value) for value in fight.values()]))
        file.write('\n')

# Fight Data

In [33]:
### Parallel Processing ###
allResults = list()
totalFights = len(fightInfo)

curIndex = 0
while curIndex < totalFights:
    rateLimitData = getRateLimitData()
    pingsRemaining = rateLimitData['limitPerHour'] - ceil(rateLimitData['pointsSpentThisHour'])
    timeRemaining = rateLimitData['pointsResetIn']
    
    if pingsRemaining < 50:
        sleepTime = timeRemaining + 60
        for _ in tqdm(range(sleepTime), desc=f'Sleep {sleepTime} seconds'):
            time.sleep(1)
            
        #reupdate rate limit data
        rateLimitData = getRateLimitData()
        pingsRemaining = rateLimitData['limitPerHour'] - rateLimitData['pointsSpentThisHour']
        timeRemaining = rateLimitData['pointsResetIn']
        
    endIndex = min(curIndex + pingsRemaining - 50, totalFights) #-50 to be safe and not error out from running over
    
    print(f'Total Tasks: {endIndex-curIndex}')
    all_jobs = [joblib.delayed(getFightTables)(fight['reportID'], fight['id']) 
                for fight in tqdm(fightInfo[curIndex:endIndex], total=(endIndex-curIndex))]
    results = joblib.Parallel(n_jobs=joblib.cpu_count(), verbose=10)(all_jobs)
    
    allResults += results
    curIndex = endIndex

Total Tasks: 19397


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:    3.0s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:    3.5s
[Parallel(n_jobs=24)]: Done  24 tasks      | elapsed:    3.5s
[Parallel(n_jobs=24)]: Done  37 tasks      | elapsed:    5.3s
[Parallel(n_jobs=24)]: Done  50 tasks      | elapsed:    5.4s
[Parallel(n_jobs=24)]: Done  65 tasks      | elapsed:    7.0s
[Parallel(n_jobs=24)]: Done  80 tasks      | elapsed:    7.7s
[Parallel(n_jobs=24)]: Done  97 tasks      | elapsed:    8.8s
[Parallel(n_jobs=24)]: Done 114 tasks      | elapsed:   10.8s
[Parallel(n_jobs=24)]: Done 133 tasks      | elapsed:   11.4s
[Parallel(n_jobs=24)]: Done 152 tasks      | elapsed:   13.1s
[Parallel(n_jobs=24)]: Done 173 tasks      | elapsed:   15.1s
[Parallel(n_jobs=24)]: Done 194 tasks      | elapsed:   16.8s
[Parallel(n_jobs=24)]: Done 217 tasks      | elapsed:   18.3s
[Parallel(n_jobs=24)]: Done 240 tasks      | elapsed:  

[Parallel(n_jobs=24)]: Done 9752 tasks      | elapsed: 12.8min
[Parallel(n_jobs=24)]: Done 9893 tasks      | elapsed: 13.0min
[Parallel(n_jobs=24)]: Done 10034 tasks      | elapsed: 13.1min
[Parallel(n_jobs=24)]: Done 10177 tasks      | elapsed: 13.3min
[Parallel(n_jobs=24)]: Done 10320 tasks      | elapsed: 13.5min
[Parallel(n_jobs=24)]: Done 10465 tasks      | elapsed: 13.7min
[Parallel(n_jobs=24)]: Done 10610 tasks      | elapsed: 13.9min
[Parallel(n_jobs=24)]: Done 10757 tasks      | elapsed: 14.1min
[Parallel(n_jobs=24)]: Done 10904 tasks      | elapsed: 14.3min
[Parallel(n_jobs=24)]: Done 11053 tasks      | elapsed: 14.5min
[Parallel(n_jobs=24)]: Done 11202 tasks      | elapsed: 14.7min
[Parallel(n_jobs=24)]: Done 11353 tasks      | elapsed: 14.9min
[Parallel(n_jobs=24)]: Done 11504 tasks      | elapsed: 15.1min
[Parallel(n_jobs=24)]: Done 11657 tasks      | elapsed: 15.3min
[Parallel(n_jobs=24)]: Done 11810 tasks      | elapsed: 15.5min
[Parallel(n_jobs=24)]: Done 11965 tasks   

Total Tasks: 19397


[Parallel(n_jobs=24)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=24)]: Done   2 tasks      | elapsed:    2.0s
[Parallel(n_jobs=24)]: Done  13 tasks      | elapsed:    2.3s
[Parallel(n_jobs=24)]: Done  24 tasks      | elapsed:    2.5s
[Parallel(n_jobs=24)]: Done  37 tasks      | elapsed:    4.7s
[Parallel(n_jobs=24)]: Done  50 tasks      | elapsed:    5.0s
[Parallel(n_jobs=24)]: Done  65 tasks      | elapsed:    7.1s
[Parallel(n_jobs=24)]: Done  80 tasks      | elapsed:    7.7s
[Parallel(n_jobs=24)]: Done  97 tasks      | elapsed:    9.3s
[Parallel(n_jobs=24)]: Done 114 tasks      | elapsed:   11.1s
[Parallel(n_jobs=24)]: Done 133 tasks      | elapsed:   12.4s
[Parallel(n_jobs=24)]: Done 152 tasks      | elapsed:   14.1s
[Parallel(n_jobs=24)]: Done 173 tasks      | elapsed:   15.2s
[Parallel(n_jobs=24)]: Done 194 tasks      | elapsed:   16.9s
[Parallel(n_jobs=24)]: Done 217 tasks      | elapsed:   18.9s
[Parallel(n_jobs=24)]: Done 240 tasks      | elapsed:  

In [105]:
metadata = [{'Report ID':fight['reportID'], 
             'Fight ID':fight['fightID'], 
             'Duration (ms)':fight['totalTime'], 
             'iLvl':fight['itemLevel']} for fight in allResults]

In [106]:
with open(DATA_DIR+'fightMetadata2.csv', 'w') as file:
    file.write(','.join(['Report ID', 'Fight ID', 'Duration (ms)', 'iLvl']))
    file.write('\n')
    
    for fight in metadata:
        file.write(','.join([str(value) for value in fight.values()]))
        file.write('\n')

In [107]:
#merge new metadata with old metadata

fightMetadata = pd.read_csv('./fightMetadata.csv')
newMetadata = pd.DataFrame(metadata)

mergedMetadata = pd.merge(left=fightMetadata, right=newMetadata, on=['Report ID', 'Fight ID'], how='left')

In [108]:
mergedMetadata.to_csv('./fightMetadata.csv', index=False)
os.remove(DATA_DIR+'fightMetadata2.csv') #metadata2 was just an intermediate file

In [112]:
fightData = [player for table in allResults for player in transformFightTable(table)]

In [113]:
with open(DATA_DIR+'fightData.csv', 'w', encoding='utf-8-sig') as file:
    file.write(','.join(['Report ID', 'Fight ID', 'Name', 'Class', 'Specialization', 'Role', 'Damage Done', 
                         'Healing Done', 'Death Count']))
    file.write('\n')
    
    for fight in fightData:
        file.write(','.join([str(value) for value in fight.values()]))
        file.write('\n')