# Building a class for a player
I think that it would be very useful for us to wrap the functionality of the analysis into a class; one that we can just pass the link to the page and then we have access to all their details. 

In [17]:
# Required imports
import requests
import pandas as pd
from bs4 import BeautifulSoup

### Initial workings
This notebook contains the workings of getting to the class, and I will save a version of the class in a `.py` file ([here](../CricketPlayer.py))as well so we can easily import that into other notebooks later. 

In [18]:
class CricketPlayer:
    def __init__(self, innings_by_innings_link):
        self.link = innings_by_innings_link
        self.soup = BeautifulSoup(requests.get(innings_by_innings_link).text, features="html.parser")
        
    def view_raw_html(self):
        return(self.soup)
    
    def raw_innings(self):
        for caption in self.soup.find_all('caption'):
            if caption.get_text() == 'Innings by innings list':
                main_table = caption.find_parent('table', {'class': 'engineTable'})
                
        columns = [header.get_text() for header in main_table.find('thead').find_all('tr')[0].find_all('th')]
        rows = []

        for innings in [row for row in main_table.find('tbody').find_all('tr')]:
            rows.append([stat.get_text() for stat in innings.find_all('td')])
            
        return(pd.DataFrame(rows, columns=columns))
    
    def innings(self):
        raw_innings = self.raw_innings()
        raw_innings['Opposition'] = raw_innings['Opposition'].str.replace('v ', '')
        raw_innings.replace('-', np.nan, inplace=True)
        raw_innings.columns = raw_innings.columns.str.lower().str.replace(' ', '_')
        raw_innings['is_out'] = raw_innings.score.astype('str').apply(lambda x: np.nan if x == 'nan' else False if '*' in x else True)
        raw_innings['did_bowl'] = raw_innings.overs.astype('str').apply(lambda x: False if x in ['nan', 'DNB'] else True)
        return(raw_innings[['inns', 'score', 'is_out', 'overs', 'conc', 'wkts', 'did_bowl', 'ct', 'st', 'opposition', 'ground', 'start_date']])

In [19]:
virat_kohli = 'http://stats.espncricinfo.com/ci/engine/player/253802.html?class=1;template=results;type=allround;view=innings'
steve_smith = 'http://stats.espncricinfo.com/ci/engine/player/267192.html?class=1;template=results;type=allround;view=innings'
kane_williamson = 'http://stats.espncricinfo.com/ci/engine/player/277906.html?class=1;template=results;type=allround;view=innings'
joe_root = 'http://stats.espncricinfo.com/ci/engine/player/303669.html?class=1;template=results;type=allround;view=innings'

In [20]:
root = CricketPlayer(joe_root)

In [21]:
innings = root.raw_innings()
innings

Unnamed: 0,Inns,Score,Overs,Conc,Wkts,Ct,St,Unnamed: 8,Opposition,Ground,Start Date,Unnamed: 12
0,1,73,-,-,-,-,-,,v India,Nagpur,13 Dec 2012,Test # 2066
1,2,-,1.0,5,0,0,0,,v India,Nagpur,13 Dec 2012,Test # 2066
2,3,20*,-,-,-,-,-,,v India,Nagpur,13 Dec 2012,Test # 2066
3,1,4,-,-,-,-,-,,v New Zealand,Dunedin,6 Mar 2013,Test # 2077
4,2,-,5.0,8,0,0,0,,v New Zealand,Dunedin,6 Mar 2013,Test # 2077
5,3,0,-,-,-,-,-,,v New Zealand,Dunedin,6 Mar 2013,Test # 2077
6,1,10,-,-,-,-,-,,v New Zealand,Wellington,14 Mar 2013,Test # 2080
7,2,-,1.0,6,0,0,0,,v New Zealand,Wellington,14 Mar 2013,Test # 2080
8,3,-,2.0,12,0,0,0,,v New Zealand,Wellington,14 Mar 2013,Test # 2080
9,1,-,2.0,5,0,0,0,,v New Zealand,Auckland,22 Mar 2013,Test # 2084


In [22]:
# Remove 'v ' from the beginning of each opposition
innings['Opposition'] = innings['Opposition'].str.replace('v ', '')

In [23]:
# Remove the empty columns that are used for spacing and links on the website 
innings.drop('', axis=1, inplace = True)
innings.head()

Unnamed: 0,Inns,Score,Overs,Conc,Wkts,Ct,St,Opposition,Ground,Start Date
0,1,73,-,-,-,-,-,India,Nagpur,13 Dec 2012
1,2,-,1.0,5,0,0,0,India,Nagpur,13 Dec 2012
2,3,20*,-,-,-,-,-,India,Nagpur,13 Dec 2012
3,1,4,-,-,-,-,-,New Zealand,Dunedin,6 Mar 2013
4,2,-,5.0,8,0,0,0,New Zealand,Dunedin,6 Mar 2013


I think we want to replace the blank '-' with `np.nan` to neaten up the output. 

In [24]:
import numpy as np
innings.replace('-', np.nan, inplace=True)

In [25]:
innings.head()

Unnamed: 0,Inns,Score,Overs,Conc,Wkts,Ct,St,Opposition,Ground,Start Date
0,1,73,,,,,,India,Nagpur,13 Dec 2012
1,2,,1.0,5.0,0.0,0.0,0.0,India,Nagpur,13 Dec 2012
2,3,20*,,,,,,India,Nagpur,13 Dec 2012
3,1,4,,,,,,New Zealand,Dunedin,6 Mar 2013
4,2,,5.0,8.0,0.0,0.0,0.0,New Zealand,Dunedin,6 Mar 2013


Clean up the column names. 

In [26]:
innings.columns = innings.columns.str.lower().str.replace(' ', '_')

In [27]:
innings.head()

Unnamed: 0,inns,score,overs,conc,wkts,ct,st,opposition,ground,start_date
0,1,73,,,,,,India,Nagpur,13 Dec 2012
1,2,,1.0,5.0,0.0,0.0,0.0,India,Nagpur,13 Dec 2012
2,3,20*,,,,,,India,Nagpur,13 Dec 2012
3,1,4,,,,,,New Zealand,Dunedin,6 Mar 2013
4,2,,5.0,8.0,0.0,0.0,0.0,New Zealand,Dunedin,6 Mar 2013


It will be easier if we have a flag for working out whether the batsman got out or not. 

In [28]:
innings['is_out'] = innings.score.astype('str').apply(lambda x: np.nan if x == 'nan' else False if '*' in x else True)

In [29]:
innings.head()

Unnamed: 0,inns,score,overs,conc,wkts,ct,st,opposition,ground,start_date,is_out
0,1,73,,,,,,India,Nagpur,13 Dec 2012,True
1,2,,1.0,5.0,0.0,0.0,0.0,India,Nagpur,13 Dec 2012,
2,3,20*,,,,,,India,Nagpur,13 Dec 2012,False
3,1,4,,,,,,New Zealand,Dunedin,6 Mar 2013,True
4,2,,5.0,8.0,0.0,0.0,0.0,New Zealand,Dunedin,6 Mar 2013,


Add a flag to indicate whether the player bowled in that particular innings. 

In [30]:
innings['did_bowl'] = innings.overs.astype('str').apply(lambda x: False if x in ['nan', 'DNB'] else True)

In [31]:
innings[['inns', 'score', 'is_out', 'overs', 'conc', 
         'wkts', 'did_bowl', 'ct', 'st', 'opposition', 'ground', 'start_date']].head()

Unnamed: 0,inns,score,is_out,overs,conc,wkts,did_bowl,ct,st,opposition,ground,start_date
0,1,73,True,,,,False,,,India,Nagpur,13 Dec 2012
1,2,,,1.0,5.0,0.0,True,0.0,0.0,India,Nagpur,13 Dec 2012
2,3,20*,False,,,,False,,,India,Nagpur,13 Dec 2012
3,1,4,True,,,,False,,,New Zealand,Dunedin,6 Mar 2013
4,2,,,5.0,8.0,0.0,True,0.0,0.0,New Zealand,Dunedin,6 Mar 2013


Cleaned up `raw_innings()` so that we have another function that returns a clean `pd.DataFrame`. 

In [32]:
root.innings()

Unnamed: 0,inns,score,is_out,overs,conc,wkts,did_bowl,ct,st,opposition,ground,start_date
0,1,73,True,,,,False,,,India,Nagpur,13 Dec 2012
1,2,,,1.0,5,0,True,0,0,India,Nagpur,13 Dec 2012
2,3,20*,False,,,,False,,,India,Nagpur,13 Dec 2012
3,1,4,True,,,,False,,,New Zealand,Dunedin,6 Mar 2013
4,2,,,5.0,8,0,True,0,0,New Zealand,Dunedin,6 Mar 2013
5,3,0,True,,,,False,,,New Zealand,Dunedin,6 Mar 2013
6,1,10,True,,,,False,,,New Zealand,Wellington,14 Mar 2013
7,2,,,1.0,6,0,True,0,0,New Zealand,Wellington,14 Mar 2013
8,3,,,2.0,12,0,True,0,0,New Zealand,Wellington,14 Mar 2013
9,1,,,2.0,5,0,True,0,0,New Zealand,Auckland,22 Mar 2013
