In [8]:
import pandas as pd 
import json
import os
import re
from datetime import datetime
import requests
from bs4 import BeautifulSoup
import sys

#from .. import Helpers
sys.path.append(os.path.join(os.path.abspath(''),'..'))

from helpers import PageParser

## Helpers

In [12]:
#No header/user-agent cycling?
headers = {
    'sec-ch-ua': '"Google Chrome";v="89", "Chromium";v="89", ";Not A Brand";v="99"',
    'Referer': '',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.192 Safari/537.36'
}

In [9]:
def build_api_endpoint(user_id: 'int', start_info=0) -> 'url string': 

    """ OPGG uses AJAX XHR requests to fetch data. 
        We can pose to the public API endpoint per user 
        and fetch data all the same. 
        
        @user_id: integer value found in data-summoner-id param of OPGG user pages
        @start_info: integer value used for pagination;
                     first page is 0, next is Unix epoch 
                     time (https://www.epochconverter.com/)
        
    """

    base_url = 'https://na.op.gg/summoner/matches/ajax/averageAndList/'
    api_substring = 'startInfo={}&summonerId={}&type=soloranked'.format(start_info,
                                                        user_id)
    
    
    api_endpoint = base_url + api_substring
    return api_endpoint

In [10]:
def parse(payload: 'dict', user_name) -> 'pd.DataFrame':
    
    """ Get game data (last 10 ranked games) per user
    
        See: git issue ...
        Note: normally this would pose an error for unranked players;
        however, by looking @ usernames.iypnb we can confirm
        that we only track user IDs for currently ranked players
        who own a division. By default, this means they've played 10 
        games (placements).
    """   
        
    recency = payload['lastInfo']
    #Convert UTC -> readable date
    readable_date = datetime.utcfromtimestamp(recency).strftime('%Y-%m-%d %H:%M:%S')

    #Parse HTML contained in XHR requests
    
    soup = BeautifulSoup(payload['html'])
    games = soup.find_all("div", {"class": "GameItemList"})
    
    p = PageParser()
    d = p.parse_page(games)
    
    d['last_updated'] = readable_date
    d['user_name'] = user_name
    
    df = pd.DataFrame.from_dict(d)
    
    return df

## Load Data

In [11]:
os.path.abspath('')

'C:\\Users\\yeqiu\\Desktop\\opgg\\v0.0.0'

In [None]:
search_params = 's2-user_ids'
fldr = os.path.join(os.path.abspath(''), '..', 'data')

for f in os.listdir(fldr):
    
    if search_params in f:
        
        print(f)
        fp = os.path.join(fldr, f)

fp

In [None]:
df = pd.read_csv(fp)

In [2]:
dfs = []

for i,user_name in enumerate(df.summonerName.values[:10]): 
        
    """Scrape data per user"""
    
    #Get accessor id 
    user_id = accessor_id(user_name)
    
    if user_id is not None:

        #Build api endpoint 
        api_endpoint = build_api_endpoint(user_id)


        #Fetch paylaod
        r = requests.get(api_endpoint, headers=headers)
        payload = json.loads(r.text)

        #Parse for data
        df = parse(payload=payload,
                   user_name=user_name)

        #Append to main 
        dfs.append(df)