In [110]:
from bs4 import BeautifulSoup
import configparser
import requests
import pandas as pd
import pickle

In [7]:
LLHEADER = 'https://www.learnedleague.com'
LOGINFILE = LLHEADER + '/ucp.php?mode=login'
USER_DATA = LLHEADER + '/profiles/previous.php?%s'
QHIST = LLHEADER + '/profiles/qhist.php?%s'
MATCH_DATA = LLHEADER + '/match.php?%s'
ONEDAYS = LLHEADER + '/oneday'
STANDINGS = '/standings.php?'
EXP_STANDINGS = '/standings_ex.php?'
LLSTANDINGS = LLHEADER + STANDINGS
ARUNDLE = LLSTANDINGS + '%d&A_%s'
INPUTDATA = 'logindata.ini'
TOTAL_MATCHES_PER_SEASON = 25

In [3]:
def get_session():
    """
    Read an ini file, establish a login session

    Input:
        inifile -- name of local ini file with control information

    Returns: logged in requests session to be used in later operations
    """
    config = configparser.ConfigParser()
    config.read(INPUTDATA) # reads a file with name 'logindata.ini' (see above definitions). That file should have format:
    """
[DEFAULT]
username = {your username here without braces}
password = {your password here without braces}
    """
    payload = {'login': 'Login'}
    for attrib in ['username', 'password']:
        payload[attrib] = config['DEFAULT'][attrib]
    ses1 = requests.Session()
    try:
        loginfile = config['DEFAULT']['loginfile']
    except KeyError:
        loginfile = LOGINFILE
    ses1.post(loginfile, data=payload)
    return ses1

In [108]:
sess = get_session()
sess

<requests.sessions.Session at 0x7f83a038ea00>

In [111]:
with open('session.pkl', 'wb') as f:
    pickle.dump(sess, f)

In [67]:
def format_rundle(rundle, league, season, div=None, exp=False, md=None):
    if exp is False and md is None:
        print('No MD provided, using most current for chosen season')
    end = '_'.join([str(rundle).capitalize(), str(league).capitalize()])
    if div is not None:
        end += "_Div_" + str(div)
    std = EXP_STANDINGS if exp else STANDINGS
    md = f'{str(md)}&' if md is not None and not exp else ''
    url = LLHEADER + std + f'{str(season)}&' + md + end
    return url
def int_or_float_or_str(s: str):
    try:
        try:
            return int(s)
        except ValueError:
            return float(s)
    except ValueError:
        return str(s)

In [39]:
# Getting the standings for a particular rundle on a particular matchday.
md = 25 # note: expanded standings only capture stats on the current day or final MD of each season.
season = 93
rundle = 'E'
league = 'Central'
div = 1 # only use if the rundle in this league is not the only at its level (ie, Central rundles C, D & E)
exp = True # whether to access the regular (False) or expanded (True) standings
# Note: expanded standings are only available back to approx. LL60.
formatted_rundle = format_rundle(rundle, league, season, div, exp)
formatted_rundle

'https://www.learnedleague.com/standings_ex.php?93&E_Central_Div_1'

In [40]:
standings_html = sess.get(formatted_rundle)
html_text = standings_html.text
soup = BeautifulSoup(html_text, 'html.parser')
table = soup.find('table', attrs={'class': 'sortable std'})
table

<table class="sortable std">
<thead>
<tr>
<td class="std-head-mid"><span style="font-size:0.8em;">Rank</span></td>
<td class="std-head-mid" style="vertical-align:middle;"><img src="/images/misc/updown.png"/></td>
<td class="std-head-left">  Player</td>
<td class="std-head-mid">W</td>
<td class="std-head-mid">L</td>
<td class="std-head-mid">T</td>
<td class="std-head-mid">PTS</td>
<td class="std-head-mid mpd">MPD</td>
<td class="std-head-mid">TMP</td>
<td class="std-head-mid">TCA</td>
<td class="std-head-mid">PCA</td>
<td class="std-head-mid">UfPE</td>
<td class="std-head-mid">OE</td>
<td class="std-head-mid">CAσ</td>
<td class="std-head-mid mpd">Q%</td>
<td class="std-head-mid">TPA</td>
<td class="std-head-mid">CAA</td>
<td class="std-head-mid">PCAA</td>
<td class="std-head-mid">UfPA</td>
<td class="std-head-mid mpd">DE</td>
<td class="std-head-mid">NUfP</td>
<td class="std-head-mid">QPO</td>
<td class="std-head-mid">QPO+</td>
<td class="std-head-mid">QPD</td>
<td class="std-head-mid">

In [105]:
def get_change(td):
    return td.find('img')['src'].split('/')[-1].split('.')[0]

In [106]:
rows = table.find_all('tr')
headers = rows[0]
rows = rows[1:-1]
headers = [i.text.replace(u'\xa0', '') for i in headers.find_all('td')]
headers[1] = 'Change'
data = [[int_or_float_or_str(i.text.strip()) if len(i.text.strip()) > 0
    else get_change(i) for i in row.find_all('td')] for row in rows]
standings_df = pd.DataFrame(data, columns=headers)
standings_df.head()

Unnamed: 0,Rank,Change,Player,W,L,T,PTS,MPD,TMP,TCA,...,QPO+,QPD,QPD+,OPD,QvH,FW,FL,3PT,MCW,STR
0,1,up,GiordanoJ22,16,5,4,36,44,84,64,...,136,1.17,120,1.125,101,2,0,5,25,W1
1,2,down,MezoffA,18,7,0,36,43,101,71,...,168,1.231,135,1.2,110,1,0,12,18,L1
2,3,even,FoleyB2,14,6,5,33,25,84,74,...,114,1.209,134,0.975,104,1,0,4,12,T1
3,4,up,MiltonM,15,8,2,32,28,97,73,...,148,1.129,121,1.013,112,1,0,7,14,T1
4,5,down,RichardsJA,13,6,6,32,17,76,68,...,99,1.127,117,0.919,94,0,0,8,17,L1


In [107]:
standings_df.to_csv(f'./standings_{season}_{md}_{rundle}_{league}_{div}_{exp}.csv', index=False)