In [309]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn

import warnings
warnings.filterwarnings("ignore")

In [310]:
Matches = pd.read_csv("matches_processed.csv")
Deliveries = pd.read_csv("deliveries_processed.csv")

In [311]:
print(Matches.columns)
print(Deliveries.columns)

Index(['id', 'player_of_match', 'venue', 'team1', 'team2', 'toss_winner',
       'winner', 'result', 'target_runs', 'target_overs', 'Home_Win', 'loser',
       'Home_Loss', 'Neutral', 'toss_match_win'],
      dtype='object')
Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batter', 'bowler', 'non_striker', 'batsman_runs', 'extra_runs',
       'total_runs', 'extras_type', 'is_wicket', 'player_dismissed',
       'dismissal_kind', 'score'],
      dtype='object')


In [312]:
unique_players = pd.concat([
    Deliveries['batter'],
    Deliveries['bowler'],
    Deliveries['non_striker']
]).dropna().unique()
print(unique_players)

['SC Ganguly' 'BB McCullum' 'RT Ponting' 'DJ Hussey' 'Mohammad Hafeez'
 'R Dravid' 'W Jaffer' 'V Kohli' 'JH Kallis' 'CL White' 'MV Boucher'
 'B Akhil' 'AA Noffke' 'P Kumar' 'Z Khan' 'SB Joshi' 'PA Patel'
 'ML Hayden' 'MEK Hussey' 'MS Dhoni' 'SK Raina' 'JDP Oram' 'S Badrinath'
 'K Goel' 'JR Hopes' 'KC Sangakkara' 'Yuvraj Singh' 'SM Katich'
 'IK Pathan' 'T Kohli' 'YK Pathan' 'SR Watson' 'M Kaif' 'DS Lehmann'
 'RA Jadeja' 'M Rawat' 'D Salunkhe' 'SK Warne' 'SK Trivedi' 'G Gambhir'
 'V Sehwag' 'S Dhawan' 'L Ronchi' 'ST Jayasuriya' 'DJ Thornely'
 'RV Uthappa' 'PR Shah' 'AM Nayar' 'SM Pollock' 'Harbhajan Singh'
 'S Chanderpaul' 'LRPL Taylor' 'AC Gilchrist' 'Y Venugopal Rao'
 'VVS Laxman' 'A Symonds' 'RG Sharma' 'SB Styris' 'AS Yadav' 'SB Bangar'
 'WPUJC Vaas' 'RP Singh' 'WP Saha' 'LR Shukla' 'DPMD Jayawardene'
 'S Sohal' 'B Lee' 'PP Chawla' 'WA Mota' 'Kamran Akmal' 'Shahid Afridi'
 'DJ Bravo' 'MA Khote' 'A Nehra' 'GC Smith' 'Pankaj Singh' 'RR Sarwan'
 'S Sreesanth' 'VRV Singh' 'SS Tiwary' 'DS

In [313]:
Players = pd.DataFrame(unique_players, columns=['player'])

runs_df = Deliveries.groupby('batter')['batsman_runs'].sum().reset_index()

batting_stats = Deliveries.groupby('batter').agg(
    matches_played = ('match_id', lambda x: x.nunique()),
    total_runs     = ('batsman_runs', 'sum'),
    balls_faced    = ('batsman_runs', 'count'),
    total_4s       = ('batsman_runs', lambda x: (x == 4).sum()),
    total_6s       = ('batsman_runs', lambda x: (x == 6).sum())
).reset_index().rename(columns={'Batter': 'player'})

bowling_stats = Deliveries.groupby('bowler').agg(
    balls_bowled = ('match_id', 'count'),
    extras_given = ('extra_runs', 'sum'),
    runs_given   = ('total_runs', 'sum'),
    wickets_taken = ('is_wicket', 'sum')
).reset_index().rename(columns={'Bowler': 'player'})

bowling_stats['overs_bowled'] = bowling_stats['balls_bowled'] / 6
bowling_stats.drop('balls_bowled', axis=1, inplace=True)

batting_stats.rename(columns={'batter': 'player'}, inplace=True)
bowling_stats.rename(columns={'bowler': 'player'}, inplace=True)

players_stats = Players.merge(batting_stats, on='player', how='left')
players_stats = players_stats.merge(bowling_stats, on='player', how='left')

players_stats.fillna(0, inplace=True)

players_stats.head()

Unnamed: 0,player,matches_played,total_runs,balls_faced,total_4s,total_6s,extras_given,runs_given,wickets_taken,overs_bowled
0,SC Ganguly,38.0,1031.0,977.0,104.0,36.0,8.0,273.0,9.0,36.5
1,BB McCullum,90.0,2256.0,1777.0,230.0,103.0,0.0,0.0,0.0,0.0
2,RT Ponting,8.0,77.0,112.0,3.0,2.0,0.0,0.0,0.0,0.0
3,DJ Hussey,57.0,1271.0,1051.0,87.0,58.0,18.0,485.0,10.0,53.833333
4,Mohammad Hafeez,8.0,64.0,84.0,7.0,2.0,10.0,71.0,2.0,10.5


In [314]:
players_stats['player_code'] = players_stats.index+1

In [315]:
players_stats

Unnamed: 0,player,matches_played,total_runs,balls_faced,total_4s,total_6s,extras_given,runs_given,wickets_taken,overs_bowled,player_code
0,SC Ganguly,38.0,1031.0,977.0,104.0,36.0,8.0,273.0,9.0,36.500000,1
1,BB McCullum,90.0,2256.0,1777.0,230.0,103.0,0.0,0.0,0.0,0.000000,2
2,RT Ponting,8.0,77.0,112.0,3.0,2.0,0.0,0.0,0.0,0.000000,3
3,DJ Hussey,57.0,1271.0,1051.0,87.0,58.0,18.0,485.0,10.0,53.833333,4
4,Mohammad Hafeez,8.0,64.0,84.0,7.0,2.0,10.0,71.0,2.0,10.500000,5
...,...,...,...,...,...,...,...,...,...,...,...
707,M Siddharth,0.0,0.0,0.0,0.0,0.0,6.0,72.0,1.0,9.666667,708
708,MP Yadav,0.0,0.0,0.0,0.0,0.0,6.0,88.0,8.0,12.666667,709
709,S Joseph,0.0,0.0,0.0,0.0,0.0,14.0,51.0,0.0,5.000000,710
710,N Thushara,0.0,0.0,0.0,0.0,0.0,20.0,259.0,8.0,28.666667,711


In [316]:
players_stats.to_csv('players_stats.csv', index=False)

In [317]:
Deliveries.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,score
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,1
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,1
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,2
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,2
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,2


In [318]:
Matches.head(5)

Unnamed: 0,id,player_of_match,venue,team1,team2,toss_winner,winner,result,target_runs,target_overs,Home_Win,loser,Home_Loss,Neutral,toss_match_win
0,335982,BB McCullum,"M Chinnaswamy Stadium, Bengaluru",Royal Challengers Bangalore,Kolkata Knight Riders,Royal Challengers Bangalore,Kolkata Knight Riders,runs,223.0,20.0,False,Royal Challengers Bangalore,True,False,False
1,335983,MEK Hussey,"Punjab Cricket Association IS Bindra Stadium, ...",Punjab Kings,Chennai Super Kings,Chennai Super Kings,Chennai Super Kings,runs,241.0,20.0,False,Punjab Kings,True,False,True
2,335984,MF Maharoof,"Arun Jaitley Stadium, Delhi",Delhi Capitals,Rajasthan Royals,Rajasthan Royals,Delhi Capitals,wickets,130.0,20.0,True,Rajasthan Royals,False,False,False
3,335985,MV Boucher,"Wankhede Stadium, Mumbai",Mumbai Indians,Royal Challengers Bangalore,Mumbai Indians,Royal Challengers Bangalore,wickets,166.0,20.0,False,Mumbai Indians,True,False,False
4,335986,DJ Hussey,"Eden Gardens, Kolkata",Kolkata Knight Riders,Sunrisers Hyderabad,Sunrisers Hyderabad,Kolkata Knight Riders,wickets,111.0,20.0,True,Sunrisers Hyderabad,False,False,False


In [319]:
Matches.rename(columns={'id': 'match_id'}, inplace=True)

In [320]:
df_merged = pd.merge(Deliveries, Matches[['match_id', 'team1', 'Neutral']], 
                     on='match_id')

In [321]:
df_merged['is_home_team'] = ((df_merged['batting_team'] == df_merged['team1']) & (df_merged['Neutral'] != True))

In [322]:
df_merged.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,extra_runs,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,score,team1,Neutral,is_home_team
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,1,1,legbyes,0,,,1,Royal Challengers Bangalore,False,False
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,1,Royal Challengers Bangalore,False,False
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,1,1,wides,0,,,2,Royal Challengers Bangalore,False,False
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,2,Royal Challengers Bangalore,False,False
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,0,0,,0,,,2,Royal Challengers Bangalore,False,False


In [323]:
final_scores = (df_merged.groupby(['match_id', 'inning'])['total_runs'].sum().reset_index().rename(columns={'total_runs': 'final_innings_score'}))

In [324]:
df_merged = pd.merge(df_merged, final_scores, on=['match_id', 'inning'], how='left')

In [325]:
df_merged.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,total_runs,extras_type,is_wicket,player_dismissed,dismissal_kind,score,team1,Neutral,is_home_team,final_innings_score
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,1,legbyes,0,,,1,Royal Challengers Bangalore,False,False,222
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,0,,0,,,1,Royal Challengers Bangalore,False,False,222
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,1,wides,0,,,2,Royal Challengers Bangalore,False,False,222
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,0,,0,,,2,Royal Challengers Bangalore,False,False,222
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,0,,0,,,2,Royal Challengers Bangalore,False,False,222


In [326]:
df_merged = pd.merge(df_merged, players_stats[['player','player_code']], left_on='batter', right_on = 'player', how='left')
df_merged.rename(columns={'player_code': 'batter_code'}, inplace=True)
df_merged.drop('player', axis=1, inplace=True)

In [327]:
df_merged = pd.merge(df_merged, players_stats[['player','player_code']], left_on='bowler', right_on = 'player', how='left')
df_merged.rename(columns={'player_code': 'bowler_code'}, inplace=True)
df_merged.drop('player', axis=1, inplace=True)

In [328]:
df_merged = pd.merge(df_merged, players_stats[['player','player_code']], left_on='non_striker', right_on = 'player', how='left')
df_merged.rename(columns={'player_code': 'non_striker_code'}, inplace=True)
df_merged.drop('player', axis=1, inplace=True)

In [329]:
df_merged.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,player_dismissed,dismissal_kind,score,team1,Neutral,is_home_team,final_innings_score,batter_code,bowler_code,non_striker_code
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,,,1,Royal Challengers Bangalore,False,False,222,1,14,2
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,,,1,Royal Challengers Bangalore,False,False,222,2,14,1
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,,,2,Royal Challengers Bangalore,False,False,222,2,14,1
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,,,2,Royal Challengers Bangalore,False,False,222,2,14,1
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,,,2,Royal Challengers Bangalore,False,False,222,2,14,1


### Team Encoding

In [330]:
encoder=dict()
j=0
for i in df_merged['batting_team'].unique():
    encoder[i]=j+1
    j+=1
encoder

{'Kolkata Knight Riders': 1,
 'Royal Challengers Bangalore': 2,
 'Chennai Super Kings': 3,
 'Punjab Kings': 4,
 'Rajasthan Royals': 5,
 'Delhi Capitals': 6,
 'Mumbai Indians': 7,
 'Sunrisers Hyderabad': 8,
 'Gujarat Titans': 9,
 'Lucknow Super Giants': 10}

In [331]:
df_merged['batting_team_code'] = df_merged['batting_team'].map(encoder)
df_merged['bowling_team_code'] = df_merged['bowling_team'].map(encoder)

In [332]:
df_merged.head(5)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batter,bowler,non_striker,batsman_runs,...,score,team1,Neutral,is_home_team,final_innings_score,batter_code,bowler_code,non_striker_code,batting_team_code,bowling_team_code
0,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,1,SC Ganguly,P Kumar,BB McCullum,0,...,1,Royal Challengers Bangalore,False,False,222,1,14,2,1,2
1,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,2,BB McCullum,P Kumar,SC Ganguly,0,...,1,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
2,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,3,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
3,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,4,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2
4,335982,1,Kolkata Knight Riders,Royal Challengers Bangalore,0,5,BB McCullum,P Kumar,SC Ganguly,0,...,2,Royal Challengers Bangalore,False,False,222,2,14,1,1,2


In [333]:
df_merged.to_csv('Featured_Matches_with_Deliveries.csv',index=False)