## Pandas - working with CSV file

In [1]:
import pandas as pd
import numpy as np

## Problem statement

- isin()
- Find the most destructive death over batsman in the history of IPL
- strike Rate =(Number of runs/number of balls)/100
- min batsman 200 balls in over 16-20

In this Pandas tutorial, we explore the 'isin()' function to find the most destructive death over batsman in the history of IPL. We calculate the strike rate of each batsman (runs per ball) and filter the players who faced a minimum of 200 balls in overs 16 to 20. By analyzing this data, we can identify the most impactful batsman during the crucial final overs of an IPL match.

In [2]:
delivery = pd.read_csv('deliveries.csv')

In [3]:
delivery.sample(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
29711,128,2,Mumbai Indians,Deccan Chargers,9,1,JP Duminy,SR Tendulkar,DR Smith,0,...,0,0,0,0,4,0,4,,,
67092,284,1,Kings XI Punjab,Pune Warriors,2,5,PC Valthaty,AC Gilchrist,SB Wagh,0,...,0,0,0,0,0,0,0,,,
107027,451,2,Delhi Daredevils,Pune Warriors,9,5,BJ Rohrer,IK Pathan,AD Mathews,0,...,0,0,0,0,1,0,1,,,
27185,117,1,Chennai Super Kings,Rajasthan Royals,14,5,SK Raina,MS Dhoni,SK Trivedi,0,...,0,0,0,0,0,0,0,,,
112979,477,1,Sunrisers Hyderabad,Mumbai Indians,10,1,KL Rahul,DA Warner,KA Pollard,0,...,0,0,0,0,1,0,1,,,


In [4]:
# min batsman 200 balls in over 16-20
delivery['over']>15

# add in delivery
# delivery[delivery['over']>15]

# we can also do this step with mask
mask_over = delivery.mask(delivery['over']<15)
mask_over

# mask_over = delivery['over']>15
# mask_over
# delivery_2 = delivery[mask_over]
# delivery_2
# delivery_2.sample(2)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150455,636.0,2.0,Royal Challengers Bangalore,Sunrisers Hyderabad,20.0,2.0,Sachin Baby,CJ Jordan,B Kumar,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,2.0,,,
150456,636.0,2.0,Royal Challengers Bangalore,Sunrisers Hyderabad,20.0,3.0,Sachin Baby,CJ Jordan,B Kumar,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,CJ Jordan,run out,NV Ojha
150457,636.0,2.0,Royal Challengers Bangalore,Sunrisers Hyderabad,20.0,4.0,Iqbal Abdulla,Sachin Baby,B Kumar,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,1.0,,,
150458,636.0,2.0,Royal Challengers Bangalore,Sunrisers Hyderabad,20.0,5.0,Sachin Baby,Iqbal Abdulla,B Kumar,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,,,


In [5]:
mask_over = delivery['over']>15
delivery2 = delivery[mask_over]
delivery2.sample(2)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
53655,227,1,Rajasthan Royals,Kolkata Knight Riders,16,2,AS Raut,P Dogra,JD Unadkat,0,...,0,0,0,0,1,0,1,,,
90575,383,1,Royal Challengers Bangalore,Mumbai Indians,17,1,KB Arun Karthik,CH Gayle,JDP Oram,0,...,0,0,0,0,4,0,4,,,


In [6]:
# data getting from all dataset - delivery
delivery.groupby('batsman')['batsman_runs'].count()

# data comming from all the columns where over is greater then 15
delivery2.groupby('batsman')['batsman_runs'].count()

batsman
A Ashish Reddy    148
A Chandila          7
A Chopra            2
A Choudhary        20
A Flintoff         18
                 ... 
YS Chahal          27
YV Takawale        13
Yashpal Singh      13
Yuvraj Singh      516
Z Khan            109
Name: batsman_runs, Length: 416, dtype: int64

In [7]:
# stored in a variable
all_batsman = delivery2.groupby('batsman')['batsman_runs'].count()

x = all_batsman>200

# mask - result -> all runs greater then 200
all_batsman[x]

batsman
A Mishra             225
AB de Villiers       570
AD Mathews           289
AM Rahane            268
AR Patel             229
AT Rayudu            425
BJ Hodge             385
DA Miller            360
DA Warner            228
DJ Bravo             409
DJ Hussey            234
DPMD Jayawardene     246
Harbhajan Singh      418
IK Pathan            465
JA Morkel            425
JH Kallis            231
JP Duminy            518
JP Faulkner          294
KA Pollard           838
KD Karthik           463
KM Jadhav            338
LRPL Taylor          204
MK Pandey            224
MK Tiwary            423
MS Dhoni            1224
NV Ojha              304
P Kumar              268
PP Chawla            311
R Vinay Kumar        235
RA Jadeja            576
RG Sharma            748
RV Uthappa           275
S Badrinath          283
S Dhawan             243
SK Raina             458
SPD Smith            316
SS Tiwary            300
STR Binny            218
V Kohli              546
WP Saha          

In [8]:
all_batsman[x].shape

(43,)

In [9]:
# get indec from series, because we need just batsman names

all_batsman[x].index

# make a list
all_batsman[x].index.to_list()

# batsman_list - - data store in var, in the form of list
batsman_list =all_batsman[x].index.to_list()
batsman_list

['A Mishra',
 'AB de Villiers',
 'AD Mathews',
 'AM Rahane',
 'AR Patel',
 'AT Rayudu',
 'BJ Hodge',
 'DA Miller',
 'DA Warner',
 'DJ Bravo',
 'DJ Hussey',
 'DPMD Jayawardene',
 'Harbhajan Singh',
 'IK Pathan',
 'JA Morkel',
 'JH Kallis',
 'JP Duminy',
 'JP Faulkner',
 'KA Pollard',
 'KD Karthik',
 'KM Jadhav',
 'LRPL Taylor',
 'MK Pandey',
 'MK Tiwary',
 'MS Dhoni',
 'NV Ojha',
 'P Kumar',
 'PP Chawla',
 'R Vinay Kumar',
 'RA Jadeja',
 'RG Sharma',
 'RV Uthappa',
 'S Badrinath',
 'S Dhawan',
 'SK Raina',
 'SPD Smith',
 'SS Tiwary',
 'STR Binny',
 'V Kohli',
 'WP Saha',
 'Y Venugopal Rao',
 'YK Pathan',
 'Yuvraj Singh']

In [10]:
# strike Rate =(Number of runs/number of balls)/100
# Runs stored by all these 43 batsman
# Balls played by these 43 batsman

# We use isin function here - isin function details are given blow - after this problem.
delivery['batsman'].isin(batsman_list)

# store in delivery - also do this step with mask
# Storing the data in the variable 'delivery' and optionally we can implement a mask for this step.
delivery[delivery['batsman'].isin(batsman_list)].sample(2)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
62774,265,2,Kochi Tuskers Kerala,Deccan Chargers,12,6,R Vinay Kumar,RA Jadeja,A Mishra,0,...,0,0,0,0,1,0,1,,,
144209,610,1,Gujarat Lions,Sunrisers Hyderabad,5,2,SK Raina,BB McCullum,B Kumar,0,...,0,0,0,0,2,0,2,,,


In [11]:
# all batsman in delivery2 are played 16-20 overs
final = delivery2[delivery2['batsman'].isin(batsman_list)]

# calculate runs - sum

runs = final.groupby('batsman')['batsman_runs'].sum()

runs.head(2)

batsman
A Mishra           227
AB de Villiers    1203
Name: batsman_runs, dtype: int64

In [14]:
# calculate all balls - use count()

balls = final.groupby('batsman')['batsman_runs'].count()

balls.head(2)

batsman
A Mishra          225
AB de Villiers    570
Name: batsman_runs, dtype: int64

In [15]:
sr = (runs/balls)*100
sr

batsman
A Mishra            100.888889
AB de Villiers      211.052632
AD Mathews          147.058824
AM Rahane           152.985075
AR Patel            142.794760
AT Rayudu           165.411765
BJ Hodge            157.402597
DA Miller           186.666667
DA Warner           189.473684
DJ Bravo            167.726161
DJ Hussey           175.213675
DPMD Jayawardene    152.032520
Harbhajan Singh     147.607656
IK Pathan           142.580645
JA Morkel           149.882353
JH Kallis           170.562771
JP Duminy           167.760618
JP Faulkner         149.319728
KA Pollard          161.336516
KD Karthik          152.051836
KM Jadhav           144.378698
LRPL Taylor         152.941176
MK Pandey           151.785714
MK Tiwary           140.189125
MS Dhoni            169.607843
NV Ojha             134.868421
P Kumar             109.701493
PP Chawla           120.257235
R Vinay Kumar       108.936170
RA Jadeja           130.729167
RG Sharma           175.668449
RV Uthappa          173.454545
