# Cricketer class improvements
A development area where I work out new features to build into the various classes to allow for further analysis. 

In [1]:
# Required imports
import requests
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

# Import my classes
from classes.Cricketer import Cricketer
from classes.Innings import Innings
from classes.Match import Match

In [2]:
root = Cricketer(303669)

In [3]:
root_id = '303669'
first_25 = root.innings(True).head(25)

In [4]:
first_innings = first_25.iloc[0].copy()
first_innings_obj = Match(first_innings['match_url']).select_innings(int(first_innings['inns']))

In [5]:
first_innings_scorecard = first_innings_obj.batting()

In [6]:
first_innings_scorecard[first_innings_scorecard.player_id == root_id]

Unnamed: 0,batsman,how_out,runs,balls_faced,minutes,fours,sixes,strike_rate,is_out,is_keeper,is_captain,player_link,player_id
6,JE Root,c & b Chawla,73,229,289,4,0,31.87,True,False,False,http://www.espncricinfo.com/ci/content/player/...,303669


In [7]:
first_sc_details = first_innings_scorecard[first_innings_scorecard.player_id == root_id]
first_innings['balls_faced'] = int(first_sc_details['balls_faced'].item())
first_innings['position_batted'] = first_sc_details.index.item()
first_innings['minutes'] = first_sc_details.minutes.item()
first_innings['fours'] = first_sc_details.fours.item()
first_innings['sixes'] = first_sc_details.sixes.item()
first_innings['is_captain'] = first_sc_details.is_captain.item()

In [8]:
first_innings

inns                                                               1
score                                                             73
did_bat                                                         True
is_out                                                          True
overs                                                            NaN
conc                                                             NaN
wkts                                                             NaN
did_bowl                                                       False
ct                                                               NaN
st                                                               NaN
opposition                                                     India
ground                                                        Nagpur
start_date                                               13 Dec 2012
match_id                                                      565809
match_url          https://www.esp

With this sort of extra information, we can track other things over time but we will have to start being more aware of the requests that we are doing, as these things take time and resources. 

In [9]:
all_innings = root.innings(True)

In [10]:
print(len(set(all_innings.match_url)))
print(len(all_innings.match_url))

88
331


I think that it makes sense to loop through and create a dictionary of `Match` objects then we aren't going to be creating many unnecessary objects. 

In [11]:
matches = {}
for match_url in set(all_innings.match_url):
    obj = Match(match_url)
    matches[obj.id] = obj

We now have a dictionary of `Match` objects that we can access using the `match_id`. This will allow us to loop through the innings, join to the match and get the extra details for a given innings easily within a loop and doesn't require us to do a request to the page each time. As these matches are historic, we don't need to worry about the stats become out of date in the minutes that we work with this dictionary. 

In [12]:
len(matches)

88

In [13]:
matches

{'743965': <classes.Match.Match at 0x117cfb860>,
 '667901': <classes.Match.Match at 0x118054128>,
 '1152849': <classes.Match.Match at 0x1183db860>,
 '566921': <classes.Match.Match at 0x118911cc0>,
 '667717': <classes.Match.Match at 0x118de9e10>,
 '800467': <classes.Match.Match at 0x118fb97b8>,
 '913647': <classes.Match.Match at 0x1192b0940>,
 '1115780': <classes.Match.Match at 0x1187c2438>,
 '1158062': <classes.Match.Match at 0x1196a4748>,
 '913645': <classes.Match.Match at 0x119a82eb8>,
 '566936': <classes.Match.Match at 0x119dd5c88>,
 '1152839': <classes.Match.Match at 0x11a02bac8>,
 '1034811': <classes.Match.Match at 0x11a5264a8>,
 '1140386': <classes.Match.Match at 0x11a7f0eb8>,
 '766933': <classes.Match.Match at 0x11ab20240>,
 '1119550': <classes.Match.Match at 0x11ae1a518>,
 '1031441': <classes.Match.Match at 0x11b0b0470>,
 '566935': <classes.Match.Match at 0x11b3b8c50>,
 '1072305': <classes.Match.Match at 0x11b7a65c0>,
 '592398': <classes.Match.Match at 0x11ba637b8>,
 '569245': 

In [14]:
matches[first_25.iloc[0].match_id].select_innings(int(first_25.iloc[0].inns)).batting()

Unnamed: 0,batsman,how_out,runs,balls_faced,minutes,fours,sixes,strike_rate,is_out,is_keeper,is_captain,player_link,player_id
1,AN Cook,lbw b Sharma,1,28,44,0,0,3.57,True,False,True,http://www.espncricinfo.com/ci/content/player/...,11728
2,NRD Compton,c †Dhoni b Sharma,3,12,16,0,0,25.0,True,False,False,http://www.espncricinfo.com/ci/content/player/...,11718
3,IJL Trott,b Jadeja,44,133,168,7,0,33.08,True,False,False,http://www.espncricinfo.com/ci/content/player/...,47623
4,KP Pietersen,c Ojha b Jadeja,73,188,202,10,0,38.82,True,False,False,http://www.espncricinfo.com/ci/content/player/...,19296
5,IR Bell,c Kohli b Chawla,1,28,38,0,0,3.57,True,False,False,http://www.espncricinfo.com/ci/content/player/...,9062
6,JE Root,c & b Chawla,73,229,289,4,0,31.87,True,False,False,http://www.espncricinfo.com/ci/content/player/...,303669
7,MJ Prior,b Ashwin,57,142,179,6,0,40.14,True,True,False,http://www.espncricinfo.com/ci/content/player/...,18675
8,TT Bresnan,lbw b Sharma,0,2,4,0,0,0.0,True,False,False,http://www.espncricinfo.com/ci/content/player/...,9310
9,GP Swann,lbw b Chawla,56,91,96,6,2,61.53,True,False,False,http://www.espncricinfo.com/ci/content/player/...,20431
10,JM Anderson,c Pujara b Chawla,4,17,24,0,0,23.52,True,False,False,http://www.espncricinfo.com/ci/content/player/...,8608


In [15]:
first_batting = first_25[first_25.did_bat].copy()

In [16]:
root_id = '303669'
full_stats = []

for i in range(0,len(first_batting)):
    this_innings = first_batting.iloc[i]
    innings_index = this_innings.name
    n_inns = this_innings.inns
    match_id = this_innings.match_id
    match_obj = matches[match_id]
    scorecard = match_obj.select_innings(int(n_inns)).batting()
    specific_score = scorecard[scorecard.player_id == root_id]
    full_stats.append({'match_innings':n_inns,
                       'batting_position':specific_score.index.item(), 
                       'balls_faced':int(specific_score.balls_faced.item()), 
                       'minutes':int(specific_score.minutes.item()),
                       'fours':int(specific_score.fours.item()), 
                       'sixes':int(specific_score.sixes.item()), 
                       'is_captain':specific_score.is_captain.item()})

In [17]:
first_batting['extra_stats'] = full_stats

In [18]:
first_batting = pd.concat([first_batting, first_batting.extra_stats.apply(pd.Series)], axis = 1).drop('extra_stats', axis = 1)
first_batting

Unnamed: 0,inns,score,did_bat,is_out,overs,conc,wkts,did_bowl,ct,st,...,start_date,match_id,match_url,match_innings,batting_position,balls_faced,minutes,fours,sixes,is_captain
0,1,73,True,True,,,,False,,,...,13 Dec 2012,565809,https://www.espncricinfo.com/ci/engine/match/5...,1,6,229,289,4,0,False
2,3,20,True,False,,,,False,,,...,13 Dec 2012,565809,https://www.espncricinfo.com/ci/engine/match/5...,3,6,56,64,1,1,False
3,1,4,True,True,,,,False,,,...,6 Mar 2013,569243,https://www.espncricinfo.com/ci/engine/match/5...,1,6,11,17,1,0,False
5,3,0,True,True,,,,False,,,...,6 Mar 2013,569243,https://www.espncricinfo.com/ci/engine/match/5...,3,7,2,4,0,0,False
6,1,10,True,True,,,,False,,,...,14 Mar 2013,569244,https://www.espncricinfo.com/ci/engine/match/5...,1,6,20,24,1,0,False
10,2,45,True,True,,,,False,,,...,22 Mar 2013,569245,https://www.espncricinfo.com/ci/engine/match/5...,2,5,176,254,4,0,False
12,4,29,True,True,,,,False,,,...,22 Mar 2013,569245,https://www.espncricinfo.com/ci/engine/match/5...,4,6,79,107,2,0,False
13,1,40,True,True,,,,False,,,...,16 May 2013,566921,https://www.espncricinfo.com/ci/engine/match/5...,1,5,114,180,4,0,False
15,3,71,True,True,,,,False,,,...,16 May 2013,566921,https://www.espncricinfo.com/ci/engine/match/5...,3,4,120,178,8,0,False
17,1,104,True,True,,,,False,,,...,24 May 2013,566922,https://www.espncricinfo.com/ci/engine/match/5...,1,5,167,236,9,0,False


In [20]:
rabada = Cricketer(550215)
rabada

<classes.Cricketer.Cricketer at 0x129a18cf8>

In [23]:
bowling = rabada.test_bowling_innings()
max_wickets = bowling.wkts.max()
bowling[bowling.wkts == max_wickets]

Unnamed: 0_level_0,overs,mdns,runs,wkts,econ,pos,inns,opposition,ground,total_balls
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-01-22,29.0,6,112,7,3.86,2,2,v England,Centurion,174


In [53]:
top_figures_interim = bowling[bowling.wkts == max_wickets].copy()
top_figures = top_figures_interim[top_figures_interim.runs == top_figures_interim.runs.min()]
top_figures['strike_rate'] = top_figures.total_balls / top_figures.wkts

In [54]:
top_figures

Unnamed: 0_level_0,overs,mdns,runs,wkts,econ,pos,inns,opposition,ground,total_balls,strike_rate
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-22,29.0,6,112,7,3.86,2,2,v England,Centurion,174,24.857143


In [40]:
bowling.runs = bowling.runs.astype(int)
by_nation = bowling.groupby('opposition').sum().copy()
by_nation['strike_rate'] = by_nation.total_balls / by_nation.wkts
by_nation['average'] = by_nation.runs/by_nation.wkts
by_nation['econ'] = by_nation.runs/by_nation.total_balls * 6

In [41]:
by_nation.sort_values('strike_rate')

Unnamed: 0_level_0,runs,wkts,total_balls,strike_rate,average,econ
opposition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
v Bangladesh,180,15,353,23.533333,12.0,3.05949
v Zimbabwe,24,3,79,26.333333,8.0,1.822785
v Pakistan,318,17,518,30.470588,18.705882,3.683398
v Sri Lanka,753,35,1221,34.885714,21.514286,3.700246
v England,937,38,1468,38.631579,24.657895,3.8297
v Australia,779,38,1494,39.315789,20.5,3.128514
v New Zealand,427,13,777,59.769231,32.846154,3.297297
v India,700,24,1460,60.833333,29.166667,2.876712


We can add in the most recent time that rabada has played each of those nations. 

In [52]:
bowling.reset_index().groupby('opposition').max()[['start_date']].join(by_nation)

Unnamed: 0_level_0,start_date,runs,wkts,total_balls,strike_rate,average,econ
opposition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
v Australia,2018-03-30,779,38,1494,39.315789,20.5,3.128514
v Bangladesh,2017-10-06,180,15,353,23.533333,12.0,3.05949
v England,2017-08-04,937,38,1468,38.631579,24.657895,3.8297
v India,2019-10-19,700,24,1460,60.833333,29.166667,2.876712
v New Zealand,2017-03-25,427,13,777,59.769231,32.846154,3.297297
v Pakistan,2019-01-11,318,17,518,30.470588,18.705882,3.683398
v Sri Lanka,2019-02-21,753,35,1221,34.885714,21.514286,3.700246
v Zimbabwe,2017-12-26,24,3,79,26.333333,8.0,1.822785


In [81]:
england_threshold = '2017-08-05'
up_to_and_including_eng = bowling[:england_threshold]
up_to_and_including_eng.runs.sum() / up_to_and_including_eng.wkts.sum()

24.563218390804597

In [82]:
bowling[england_threshold:].runs.sum() / bowling[england_threshold:].wkts.sum()

20.635416666666668

In [83]:
bowling[england_threshold:].groupby('opposition').sum()

Unnamed: 0_level_0,runs,wkts,total_balls
opposition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
v Australia,443,23,845
v Bangladesh,180,15,353
v India,589,22,1166
v Pakistan,318,17,518
v Sri Lanka,427,16,699
v Zimbabwe,24,3,79


In [84]:
bowling[:england_threshold].groupby('opposition').sum()

Unnamed: 0_level_0,runs,wkts,total_balls
opposition,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
v Australia,336,15,649
v England,937,38,1468
v India,111,2,294
v New Zealand,427,13,777
v Sri Lanka,326,19,522
