# Runner level 

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import re 

In [2]:
runner_link = 'https://www.parkrun.org.uk/results/athleteresultshistory/?athleteNumber=501501'
headers = {'User-Agent':'APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)'}
runner_soup = BeautifulSoup(requests.get(runner_link, 
                                         headers = headers).text, features="html.parser")

In [3]:
runner_title = re.compile('([a-zA-Z]* [a-zA-Z]*) \(([0-9]*) parkruns\)').search(runner_soup.find_all('h2')[0].text)

In [4]:
name = runner_title.group(1)
name

'Marc FORSAITH'

In [5]:
total_park_runs = runner_title.group(2)
total_park_runs

'62'

## Getting the main tables from `h1` tag
We can see that all the main tables on the runner summary page have the same structure. Contained within a `div` element is an `h1` tag and then a table below. We can define a function to pull this using the text within the `h1` tag. 

In [6]:
def table_from_h1(runner_soup, h1_tag):
    for i in runner_soup.find_all('h1'):
        if i.get_text() == h1_tag:
            most_recent_runs = i.find_parent('div').find('table')
    col_headers = [x.text for x in most_recent_runs.find('thead').find_all('th')]
    data = [[y.text for y in x.find_all('td')] for x in most_recent_runs.find_all('tr') if x.find('td')]
    return(pd.DataFrame(data, columns = col_headers))

In [7]:
table_from_h1(runner_soup, 'Most Recent Runs')

Unnamed: 0,Event,Run Date,GenderPos,Overall Position,Time,AgeGrade
0,Finsbury parkrun,13/04/2019,126,149,25:06,51.39%
1,Burgess parkrun,16/03/2019,113,116,23:06,55.84%
2,Fulham Palace parkrun,06/10/2018,81,87,21:34,59.81%
3,Clapham Common parkrun,28/04/2018,41,42,19:52,64.93%
4,Clapham Common parkrun,21/04/2018,50,54,21:02,61.33%
5,Fulham Palace parkrun,24/02/2018,64,69,21:09,60.99%
6,Fulham Palace parkrun,03/02/2018,58,61,20:13,63.81%
7,Royal Tunbridge Wells parkrun,27/01/2018,23,23,24:28,52.72%
8,Fulham Palace parkrun,20/01/2018,61,65,20:30,62.93%
9,Mile End parkrun,13/01/2018,21,21,19:31,66.10%


In [8]:
table_from_h1(runner_soup, 'Event Summaries')

Unnamed: 0,Event,Runs,Best Gender Position,Best Position Overall,Best Time,Unnamed: 6,Unnamed: 7
0,Fulham Palace parkrun,21,15,15,00:19:00,All,
1,Burgess parkrun,15,3,3,00:18:37,All,
2,Oxford parkrun,7,24,27,00:20:34,All,
3,"Brockwell parkrun, Herne Hill",6,27,27,00:20:05,All,
4,Margate parkrun,3,2,2,00:20:11,All,
5,Ashton Court parkrun,2,49,56,00:23:14,All,
6,Clapham Common parkrun,2,41,42,00:19:52,All,
7,Dulwich parkrun,1,24,27,00:19:36,All,
8,Southwark parkrun,1,29,31,00:19:49,All,
9,Mile End parkrun,1,21,21,00:19:31,All,


In [9]:
table_from_h1(runner_soup, 'Volunteer Summary')

Unnamed: 0,Year,Role,Number of Times
0,2018,Barcode Scanning,1
