In [121]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier


In [122]:
# %pip install catboost

In [123]:
matches = pd.read_csv('dataset/match_level_scorecard.csv')  # T20 games from past two years
batsmen = pd.read_csv('dataset/batsman_level_scorecard.csv')  # Batsmen scorecard details
bowlers = pd.read_csv('dataset/bowler_level_scorecard.csv')  # Bowler scorecard details
features= pd.read_csv('dataset/train_data.csv')  # Ready-to-use independent features

In [124]:
# calculating unique match id in matches
match_ids = matches['match id'].unique()
match_ids.shape

(1689,)

In [125]:
# shape of all the dataframes
matches.shape, batsmen.shape, bowlers.shape, features.shape

((1689, 30), (24483, 21), (18539, 18), (948, 23))

## Merging the datasets

In [126]:
batsmen_columns = batsmen.columns

In [127]:
batsmen_columns

Index(['match id', 'batsman', 'batsman_id', 'batsman_details',
       'is_batsman_captain', 'is_batsman_keeper', 'inning', 'runs',
       'balls_faced', 'over_faced_first', 'wicket kind', 'out_by_bowler',
       'out_by_fielder', 'bowler_id', 'bowler_details', 'is_bowler_keeper',
       'is_bowler_captain', 'strike_rate', 'Fours', 'Sixes', 'match_dt'],
      dtype='object')

In [128]:
# changing the column names of batsmen dataframe like bowler_id

""" coumn names - Index(['match id', 'batsman', 'batsman_id', 'batsman_details',
       'is_batsman_captain', 'is_batsman_keeper', 'inning', 'runs',
       'balls_faced', 'over_faced_first', 'wicket kind', 'out_by_bowler',
       'out_by_fielder', 'bowler_id', 'bowler_details', 'is_bowler_keeper',
       'is_bowler_captain', 'strike_rate', 'Fours', 'Sixes', 'match_dt'],
      dtype='object')
      
"""

batsmen.columns = ['match id', 'batsman', 'batsman_id', 'batsman_details','is_batsman_captain', 'is_batsman_keeper', 'inning', 'runs','balls_faced', 'over_faced_first', 'wicket kind', 'out_by_bowler','out_by_fielder', 'bowler_id_batsmen_scorecard', 'bowler_details_batsmen_scorecard', 'is_bowler_keeper_batsmen_scorecard','is_bowler_captain_batsmen_scorecard', 'strike_rate_batsmen_scorecard', 'Fours_batsmen_scorecard', 'Sixes_batsmen_scorecard', 'match_dt_batsmen_scorecard']

In [129]:
batsmen.head()

Unnamed: 0,match id,batsman,batsman_id,batsman_details,is_batsman_captain,is_batsman_keeper,inning,runs,balls_faced,over_faced_first,wicket kind,out_by_bowler,out_by_fielder,bowler_id_batsmen_scorecard,bowler_details_batsmen_scorecard,is_bowler_keeper_batsmen_scorecard,is_bowler_captain_batsmen_scorecard,strike_rate_batsmen_scorecard,Fours_batsmen_scorecard,Sixes_batsmen_scorecard,match_dt_batsmen_scorecard
0,8638034,KD Ce,7907451.0,NZ:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,7,5,1.1,caught,JS Nm,TM Jn,2486896.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,140.0,1.0,,2021-01-01
1,8638034,TL St,4381761.0,NZ:Right-hand bat:None:,0.0,1.0,1,46,46,1.2,caught,R Ra,MG Bl,6718382.0,NZ:Left-hand bat:Slow left-arm orthodox:,0.0,0.0,100.0,4.0,1.0,2021-01-01
2,8638034,HR Cr,4949790.0,NZ:Right-hand bat:Right-arm offbreak:,0.0,0.0,1,9,10,12.1,caught,PF Yd,R Ra,4950294.0,NZ:Right-hand bat:Legbreak googly:,0.0,0.0,90.0,,,2021-01-01
3,8638034,BR Hn,3834305.0,NZ:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,28,22,13.6,caught,HK Bt,FH An,1585464.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,127.27,3.0,,2021-01-01
4,8638034,SC Kn,3776849.0,NZ:Right-hand bat:Right-arm fast-medium:,0.0,0.0,1,18,13,17.2,,,,,,,,138.46,,1.0,2021-01-01


In [130]:
bowlers.columns

Index(['match id', 'bowler', 'bowler_id', 'bowler_details',
       'is_bowler_captain', 'is_bowler_keeper', 'inning', 'runs',
       'wicket_count', 'balls_bowled', 'economy', 'maiden', 'dots', 'Fours',
       'Sixes', 'wides', 'noballs', 'match_dt'],
      dtype='object')

In [131]:


# Function to convert all values to strings and join them
def join_values(series):
    return ','.join(map(str, series))

# Aggregating the data
batsmen_agg = batsmen.groupby('match id').agg({
    'batsman': join_values,
    'batsman_id': join_values,
    'batsman_details': join_values,
    'is_batsman_captain': join_values,
    'is_batsman_keeper': join_values,
    'inning': join_values,
    'runs': join_values,
    'balls_faced': join_values,
    'over_faced_first': join_values,
    'wicket kind': join_values,
    'out_by_bowler': join_values,
    'out_by_fielder': join_values,
    'bowler_id_batsmen_scorecard': join_values,
    'bowler_details_batsmen_scorecard': join_values,
    'is_bowler_keeper_batsmen_scorecard': join_values,
    'is_bowler_captain_batsmen_scorecard': join_values,
    'strike_rate_batsmen_scorecard': join_values,
    'Fours_batsmen_scorecard': join_values,
    'Sixes_batsmen_scorecard': join_values,
    'match_dt_batsmen_scorecard': join_values
}).reset_index()


In [132]:
bowlers.columns

Index(['match id', 'bowler', 'bowler_id', 'bowler_details',
       'is_bowler_captain', 'is_bowler_keeper', 'inning', 'runs',
       'wicket_count', 'balls_bowled', 'economy', 'maiden', 'dots', 'Fours',
       'Sixes', 'wides', 'noballs', 'match_dt'],
      dtype='object')

In [133]:
# Function to convert all values to strings and join them
def join_values(series):
    return ','.join(map(str, series))

# Aggregating the data
bowlers_agg = bowlers.groupby('match id').agg({
    'bowler': join_values,
    'bowler_id': join_values,
    'bowler_details': join_values,
    'is_bowler_keeper': join_values,
    'is_bowler_captain': join_values,
    'inning': join_values,
    'runs': join_values,
    'wicket_count': join_values,
    'balls_bowled': join_values,
    'economy': join_values,
    'maiden': join_values,
    'dots': join_values,
    'Fours': join_values,
    'Sixes': join_values,
    'wides': join_values,
    'noballs': join_values,
    'match_dt': join_values
}).reset_index()

In [134]:
matches.shape

(1689, 30)

In [135]:
features.shape

(948, 23)

In [136]:
match_data.head()

Unnamed: 0,match id,team1,team2,winner,by,win amount,toss winner,toss decision,venue,city,match_dt_match,lighting,series_name,season,ground_id,umpire1,umpire2,inning1_runs,inning1_wickets,inning1_balls,inning2_runs,inning2_wickets,inning2_balls,team1_id,team1_roster_ids,team2_id,team2_roster_ids,series_type,winner_id,player_of_the_match_id,batsman,batsman_id,batsman_details,is_batsman_captain,is_batsman_keeper,inning,runs,balls_faced,over_faced_first,wicket kind,out_by_bowler,out_by_fielder,bowler_id,bowler_details,is_bowler_keeper,is_bowler_captain,strike_rate,Fours,Sixes,match_dt_batsmen,bowler,bowler_id_bowlers,bowler_details_bowlers,is_bowler_captain_bowlers,is_bowler_keeper_bowlers,inning_bowlers,runs_bowlers,wicket_count,balls_bowled,economy,maiden,dots,Fours_bowlers,Sixes_bowlers,wides,noballs,match_dt,team1_features,team1_id_features,team1_roster_ids_features,team2_features,team2_id_features,team2_roster_ids_features,winner_features,winner_id_features,toss winner_features,toss decision_features,venue_features,city_features,match_dt_features,lighting_features,series_name_features,season_features,ground_id_features,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15
0,9331181,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,,HS Di,9373356.0,IND:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,6,5,1.1,run out,R Dn,SL Va,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,0.0,1.0,120.0,1.0,,2022-10-20,R Dn,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,1.0,0.0,1,31,2,24,7.75,0,9.0,3,1,2,0,2022-10-20,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571
1,9331181,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,,HS Di,9373356.0,IND:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,6,5,1.1,run out,R Dn,SL Va,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,0.0,1.0,120.0,1.0,,2022-10-20,VG Aa,8465057.0,IND:Right-hand bat:Right-arm fast-medium:,0.0,0.0,1,28,2,24,7.0,0,8.0,4,0,0,0,2022-10-20,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571
2,9331181,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,,HS Di,9373356.0,IND:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,6,5,1.1,run out,R Dn,SL Va,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,0.0,1.0,120.0,1.0,,2022-10-20,KR Ke,7878989.0,IND:Right-hand bat:Right-arm offbreak:,0.0,0.0,2,13,1,17,4.59,0,5.0,0,0,1,0,2022-10-20,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571
3,9331181,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,,HS Di,9373356.0,IND:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,6,5,1.1,run out,R Dn,SL Va,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,0.0,1.0,120.0,1.0,,2022-10-20,LI Ma,4017523.0,IND:Left-hand bat:Left-arm fast-medium:,0.0,0.0,2,23,0,12,11.5,0,5.0,2,2,0,0,2022-10-20,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571
4,9331181,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,,HS Di,9373356.0,IND:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,6,5,1.1,run out,R Dn,SL Va,2035102.0,IND:Right-hand bat:Right-arm medium-fast:,0.0,1.0,120.0,1.0,,2022-10-20,NA Ra,7883504.0,IND:Left-hand bat:Slow left-arm orthodox:,0.0,0.0,2,21,1,24,5.25,0,9.0,1,0,0,0,2022-10-20,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571


In [137]:
match_data.to_csv('dataset/match_data.csv', index=False)

In [138]:
# combining the train_data with match_data , batsmen_agg and bowlers_agg data with only those match id which are present in train_data

train_data = pd.merge(features, matches, on='match id', how='inner')
train_data = pd.merge(train_data, batsmen_agg, on='match id', how='inner')
train_data = pd.merge(train_data, bowlers_agg, on='match id', how='inner')

In [139]:
train_data.shape

(948, 89)

In [140]:
# filling the missing values with 0
train_data.fillna(0, inplace=True)

In [141]:
# train_data.to_csv('dataset/updated_train_data.csv', index=False)

In [142]:
train_data.columns

Index(['match id', 'team1_x', 'team1_id_x', 'team1_roster_ids_x', 'team2_x',
       'team2_id_x', 'team2_roster_ids_x', 'winner_x', 'winner_id_x',
       'toss winner_x', 'toss decision_x', 'venue_x', 'city_x', 'match_dt_x',
       'lighting_x', 'series_name_x', 'season_x', 'ground_id_x',
       'team_count_50runs_last15', 'team_winp_last5',
       'team1only_avg_runs_last15', 'team1_winp_team2_last15',
       'ground_avg_runs_last15', 'team1_y', 'team2_y', 'winner_y', 'by',
       'win amount', 'toss winner_y', 'toss decision_y', 'venue_y', 'city_y',
       'match_dt_y', 'lighting_y', 'series_name_y', 'season_y', 'ground_id_y',
       'umpire1', 'umpire2', 'inning1_runs', 'inning1_wickets',
       'inning1_balls', 'inning2_runs', 'inning2_wickets', 'inning2_balls',
       'team1_id_y', 'team1_roster_ids_y', 'team2_id_y', 'team2_roster_ids_y',
       'series_type', 'winner_id_y', 'player_of_the_match_id', 'batsman',
       'batsman_id', 'batsman_details', 'is_batsman_captain',
       '

In [143]:
# show full columns in pandas
pd.set_option('display.max_columns', None)
train_data.head()

Unnamed: 0,match id,team1_x,team1_id_x,team1_roster_ids_x,team2_x,team2_id_x,team2_roster_ids_x,winner_x,winner_id_x,toss winner_x,toss decision_x,venue_x,city_x,match_dt_x,lighting_x,series_name_x,season_x,ground_id_x,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,team1_y,team2_y,winner_y,by,win amount,toss winner_y,toss decision_y,venue_y,city_y,match_dt_y,lighting_y,series_name_y,season_y,ground_id_y,umpire1,umpire2,inning1_runs,inning1_wickets,inning1_balls,inning2_runs,inning2_wickets,inning2_balls,team1_id_y,team1_roster_ids_y,team2_id_y,team2_roster_ids_y,series_type,winner_id_y,player_of_the_match_id,batsman,batsman_id,batsman_details,is_batsman_captain,is_batsman_keeper,inning_x,runs_x,balls_faced,over_faced_first,wicket kind,out_by_bowler,out_by_fielder,bowler_id_batsmen_scorecard,bowler_details_batsmen_scorecard,is_bowler_keeper_batsmen_scorecard,is_bowler_captain_batsmen_scorecard,strike_rate_batsmen_scorecard,Fours_batsmen_scorecard,Sixes_batsmen_scorecard,match_dt_batsmen_scorecard,bowler,bowler_id,bowler_details,is_bowler_keeper,is_bowler_captain,inning_y,runs_y,wicket_count,balls_bowled,economy,maiden,dots,Fours,Sixes,wides,noballs,match_dt
0,9331181,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571,Ba,Hl Ph,Hl Ph,wickets,8.0,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,A Bi,Sh De,129,8,122,130.0,2.0,105.0,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,other_domestic,12634,0.0,"HS Di,NA Ra,KR Ke,VR An,KH Pa,AT Ru,P Ca,R Dn","9373356.0,7883504.0,7878989.0,2526390.0,329940...","IND:Left-hand bat:Right-arm medium-fast:,IND:L...","0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",11111122,613393110614,519462416487,"1.1,12.2,17.7,18.6,5.5,7.5,1.1,15.4","run out,caught,caught,nan,bowled,caught,nan,nan","R Dn,R Dn,Ah Vt,nan,Mk Dr,Mk Dr,nan,nan","SL Va,SL Va,Mk Dr,nan,nan,Dy Ri,nan,nan","2035102.0,2035102.0,nan,nan,nan,nan,nan,nan","IND:Right-hand bat:Right-arm medium-fast:,IND:...","0.0,0.0,nan,nan,nan,nan,nan,nan","1.0,1.0,nan,nan,nan,nan,nan,nan","120.0,68.42,75.0,150.0,129.17,62.5,127.08,57.14","1.0,1.0,nan,nan,5.0,1.0,4.0,nan","nan,nan,nan,1.0,nan,nan,2.0,nan","2022-10-20,2022-10-20,2022-10-20,2022-10-20,20...","R Dn,VG Aa,KR Ke,LI Ma,NA Ra,VR An","2035102.0,8465057.0,7878989.0,4017523.0,788350...","IND:Right-hand bat:Right-arm medium-fast:,IND:...","0.0,0.0,0.0,0.0,0.0,0.0","1.0,0.0,0.0,0.0,0.0,0.0",112222,312813232118,221010,242417122418,"7.75,7.0,4.59,11.5,5.25,6.0",0,"9.0,8.0,5.0,5.0,9.0,9.0",340212,100200,201002,0,"2022-10-20,2022-10-20,2022-10-20,2022-10-20,20..."
1,8797060,Ed,20,2089079.0:6139370.0:2076192.0:62432.0:2083409....,Wt Is,41,4690258.0:4069666.0:4230127.0:1942317.0:161392...,Ed,20,Wt Is,field,Kn Ol Bn Bs,Bridgetown,2022-01-23,day/night match,Ed tr of Wt Is,2021/22,1406,1.285714,1.952381,156.0,50.0,103.5,Ed,Wt Is,Ed,runs,1.0,Wt Is,field,Kn Ol Bn Bs,Bridgetown,2022-01-23,day/night match,Ed tr of Wt Is,2021/22,1406,JS Wn,N Dd,171,8,128,170.0,8.0,124.0,20,2089079.0:6139370.0:2076192.0:62432.0:2083409....,41,4690258.0:4069666.0:4230127.0:1942317.0:161392...,international,20,62432.0,"JJ Ry,T Bn,SW Bs,EG Mn,CJ Jn,LA Dn,AU Rd,S Md,...","2089079.0,6139370.0,2083409.0,172199.0,2022957...","ENG:Right-hand bat:None:,ENG:Right-hand bat:No...","0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1....",11111111112222222222,452551327427431022444712442311,311871215234424192228311162046,"1.1,1.2,13.3,14.7,15.6,18.6,19.6,20.3,6.3,7.2,...","caught,caught and bowled,caught,caught,caught,...","R Sd,FA An,AJ Hn,JO Hr,SS Cl,JO Hr,nan,nan,FA ...","KA Pd,nan,SS Cl,BA Kg,JO Hr,FA An,nan,nan,nan,...","4739552.0,4690104.0,3715697.0,2740408.0,346887...","WI:Right-hand bat:Right-arm fast-medium:,WI:Ri...","0.0,0.0,0.0,0.0,0.0,0.0,nan,nan,0.0,0.0,0.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,nan,nan,0.0,1.0,0.0,0....","145.16,138.89,71.43,108.33,180.0,200.0,66.67,1...","6.0,3.0,1.0,2.0,2.0,1.0,nan,1.0,1.0,3.0,nan,na...","2.0,1.0,nan,nan,1.0,nan,nan,nan,nan,1.0,nan,na...","2022-01-23,2022-01-23,2022-01-23,2022-01-23,20...","AJ Hn,FA An,JO Hr,KA Pd,R Sd,SS Cl,AU Rd,CJ Jn...","3715697.0,4690104.0,2740408.0,1613926.0,473955...","WI:Left-hand bat:Slow left-arm orthodox:,WI:Ri...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0","0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",111111222222,155025322623243913241845,122111210310,18242424121824186242424,"5.0,12.5,6.25,8.0,13.0,7.67,6.0,13.0,13.0,6.0,...",100000,"8.0,9.0,14.0,6.0,5.0,8.0,13.0,5.0,3.0,11.0,16....",84143110024,120110242203,2411010012,0,"2022-01-23,2022-01-23,2022-01-23,2022-01-23,20..."
2,9433269,We,10576,3298427.0:2288789.0:7773338.0:3519011.0:368195...,Ne,8987,4003390.0:1749075.0:1626526.0:4172447.0:551672...,We,10576,Ne,field,Tt Be Nm,Nottingham,2023-06-02,day/night match,Vy Bt,2023,251,0.857143,0.672131,173.266667,0.0,154.333333,We,Ne,We,runs,56.0,Ne,field,Tt Be Nm,Nottingham,2023-06-02,day/night match,Vy Bt,2023,251,PK Bn,Sn Sm,226,5,128,170.0,10.0,116.0,10576,3298427.0:2288789.0:7773338.0:3519011.0:368195...,8987,4003390.0:1749075.0:1626526.0:4172447.0:551672...,other_domestic,10576,0.0,"BL Da,MG Bl,MJ Sr,AJ He,Kf Ai,EJ Pk,JA Hs,JM C...","3298427.0,2288789.0,3519011.0,3681957.0,767131...","ENG:Right-hand bat:Legbreak:,NZ:Left-hand bat:...","1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0....",111111122222222222,44556511014225716344293594,212752711525163535681156106,"1.1,1.4,10.2,11.3,16.3,18.5,7.6,1.1,1.3,10.3,1...","caught,bowled,caught and bowled,nan,caught,nan...","SR Pl,M Cr,SJ My,nan,Sn Sh Ai,nan,SJ My,DY Pn,...","C Mo,nan,nan,nan,SJ My,nan,M My,MG Bl,AJ He,EJ...","130437.0,4420828.0,125915.0,nan,7507303.0,nan,...","ENG:Right-hand bat:Slow left-arm orthodox:,ENG...","0.0,0.0,0.0,nan,0.0,nan,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,1.0,nan,0.0,nan,1.0,0.0,1.0,1.0,0.0,1....","209.52,203.7,120.0,188.89,90.91,20.0,168.0,156...","6.0,6.0,1.0,3.0,nan,nan,4.0,3.0,8.0,nan,nan,na...","2.0,2.0,nan,4.0,1.0,nan,2.0,1.0,3.0,1.0,nan,na...","2023-06-02,2023-06-02,2023-06-02,2023-06-02,20...","C Mr,LW Js,M Cr,SJ My,SR Pl,Sn Sh Ai,AW Fh,BL ...","5652828.0,7328110.0,4420828.0,125915.0,130437....","SA:Right-hand bat:Right-arm fast:,ENG:Right-ha...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0","0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0",111111222222,461837284545341114572925,1211041104,18624242424181212242420,"15.33,18.0,9.25,7.0,11.25,11.25,11.33,5.5,7.0,...",0,"3.0,0.0,6.0,10.0,6.0,10.0,2.0,7.0,5.0,4.0,9.0,8.0",222266202324,412112210510,211120000221,1100000,"2023-06-02,2023-06-02,2023-06-02,2023-06-02,20..."
3,9587073,Ga An Ws,36084,8127230.0:4690328.0:4069666.0:7960847.0:469018...,Bs Rs,36070,3462080.0:2436405.0:1798705.0:7550857.0:574247...,Ga An Ws,36084,Ga An Ws,bat,Bn La Sm Ta Td,Tarouba,2023-09-10,day match,Cn Pr Le,2023,14300,2.166667,1.97561,164.266667,50.0,144.25,Ga An Ws,Bs Rs,Ga An Ws,runs,3.0,Ga An Ws,bat,Bn La Sm Ta Td,Tarouba,2023-09-10,day match,Cn Pr Le,2023,14300,CO Wt,LS Rr,181,9,127,178.0,8.0,123.0,36084,8127230.0:4690328.0:4069666.0:7960847.0:469018...,36070,3462080.0:2436405.0:1798705.0:7550857.0:574247...,other_domestic,36084,8127230.0,"Sm Ab,G Me,Am Kn,SO Hr,KA Pl,R Sd,OF Sh,SD He,...","8127230.0,4690328.0,7960847.0,4690188.0,473958...","PAK:Left-hand bat:Right-arm medium-fast:,WI:Le...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0....","0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....",11111111111222222222,587024232050014816394161214427,35841610314011161319714712726,"1.1,1.2,12.6,13.7,17.4,19.3,19.6,2.4,20.2,20.3...","caught and bowled,caught,caught,caught,bowled,...","CR Be,RS Cl,OC My,CR Be,JO Hr,CR Be,CR Be,OC M...","nan,OC My,JP Gs,RE vn dr Me,nan,Qs Ad,nan,nan,...","3200756.0,3462080.0,6347494.0,3200756.0,274040...","WI:Right-hand bat:Right-arm fast-medium:,WI:Ri...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nan,nan,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nan,nan,0....","165.71,87.5,0.0,150.0,230.0,66.67,0.0,125.0,0....","1.0,nan,nan,1.0,1.0,nan,nan,1.0,nan,nan,1.0,na...","6.0,1.0,nan,2.0,2.0,nan,nan,4.0,nan,nan,nan,1....","2023-09-10,2023-09-10,2023-09-10,2023-09-10,20...","CR Be,JO Hr,KR Ms,OC My,Qs Ad,RE vn dr Me,RS C...","3200756.0,2740408.0,2436405.0,6347494.0,639921...","WI:Right-hand bat:Right-arm fast-medium:,WI:Ri...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0....","0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0....",11111112222222,273724253116202438211422448,42020012200111,2424241812612242424661224,"6.75,9.25,6.0,8.33,15.5,16.0,10.0,6.0,9.5,5.25...",0,"11.0,10.0,12.0,6.0,1.0,3.0,5.0,13.0,9.0,9.0,2....",2010112203026,23114221310022,20320000100002,0,"2023-09-10,2023-09-10,2023-09-10,2023-09-10,20..."
4,9516457,Pb Ks,30407,8127181.0:197658.0:4239038.0:2398346.0:5053082...,Gt Ts,48341,1958683.0:7491224.0:8059029.0:4377610.0:225245...,Gt Ts,48341,Gt Ts,field,Pb Ct An IS Ba Sm Mi Ch,Chandigarh,2023-04-13,night match,In Pr Le,2023,7118,0.818182,1.327869,164.666667,0.0,189.0,Pb Ks,Gt Ts,Gt Ts,wickets,6.0,Gt Ts,field,Pb Ct An IS Ba Sm Mi Ch,Chandigarh,2023-04-13,night match,In Pr Le,2023,7118,A Te,J Ml,153,8,129,154.0,4.0,119.0,30407,8127181.0:197658.0:4239038.0:2398346.0:5053082...,48341,1958683.0:7491224.0:8059029.0:4377610.0:225245...,In Pr Le,48341,3759846.0,"MW St,SM Cn,M Sh Kn,Ht Br,S Dn,R Dn,PB Ra,JM S...","4239038.0,4640824.0,5038046.0,8180500.0,197658...","AUS:Right-hand bat:Right-arm offbreak:,ENG:Lef...","0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0....","0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0....",11111111222222,36222288120253067817519,24229581262319491118220,"1.3,13.3,17.8,19.2,2.1,20.6,4.4,7.5,1.1,1.2,12...","bowled,caught,run out,run out,caught,nan,caugh...","Rd Kn,MM Sa,J Le,J Le,J Le,nan,AS Jh,MM Sa,K R...","nan,Sn Gl,WP Sa: DA Mr,WP Sa,AS Jh,nan,Sn Gl,W...","5554254.0,3759846.0,nan,nan,nan,nan,4690230.0,...","AFG:Right-hand bat:Legbreak googly:,IND:Right-...","0.0,0.0,nan,nan,nan,nan,0.0,0.0,0.0,0.0,0.0,na...","0.0,0.0,nan,nan,nan,nan,0.0,0.0,0.0,0.0,0.0,na...","150.0,100.0,244.44,160.0,100.0,100.0,76.92,108...","6.0,1.0,1.0,nan,2.0,nan,1.0,5.0,5.0,7.0,1.0,1....","1.0,1.0,2.0,1.0,nan,nan,nan,nan,nan,1.0,nan,na...","2023-04-13,2023-04-13,2023-04-13,2023-04-13,20...","AS Jh,MM Sa,Md Si,Rd Kn,Ap Sh,Ht Br,K Ra,MW St...","4690230.0,3759846.0,3373285.0,5554254.0,788184...","WI:Right-hand bat:Right-arm fast:,IND:Right-ha...","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0","0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0",1111222222,3218442633203682425,1211111001,2424242424242461823,"8.0,4.5,11.0,6.5,8.25,5.0,9.0,8.0,8.0,6.52",0,"11.0,12.0,9.0,13.0,8.0,8.0,11.0,1.0,4.0,7.0",1273515132,2011001000,5010000000,0,"2023-04-13,2023-04-13,2023-04-13,2023-04-13,20..."


In [144]:
# converting categorical columns to numerical columns
train_data=pd.get_dummies(train_data,columns=['team1_x','team1_roster_ids_x', 'team2_x',
       'team2_roster_ids_x', 
       'toss winner_x', 'toss decision_x', 'venue_x', 'city_x', 'match_dt_x',
       'lighting_x', 'series_name_x', 'season_x', 
        'team1_y', 'team2_y', 'winner_y', 'by',
        'toss winner_y', 'toss decision_y', 'venue_y', 'city_y',
       'match_dt_y', 'lighting_y', 'series_name_y', 'season_y',
       'umpire1', 'umpire2', 
       'team1_roster_ids_y', 'team2_roster_ids_y',
       'series_type',  'batsman',
        'batsman_details', 'is_batsman_captain',
       'is_batsman_keeper', 'inning_x', 'runs_x', 'balls_faced',
       'over_faced_first', 'wicket kind', 'out_by_bowler', 'out_by_fielder',
       'bowler_id_batsmen_scorecard', 'bowler_details_batsmen_scorecard',
       'is_bowler_keeper_batsmen_scorecard',
       'is_bowler_captain_batsmen_scorecard', 'strike_rate_batsmen_scorecard',
       'Fours_batsmen_scorecard', 'Sixes_batsmen_scorecard',
       'match_dt_batsmen_scorecard', 'bowler', 'bowler_id', 'bowler_details',
       'is_bowler_keeper', 'is_bowler_captain', 'inning_y', 'runs_y',
       'wicket_count', 'balls_bowled', 'economy', 'maiden', 'dots', 'Fours',
       'Sixes', 'wides', 'noballs', 'match_dt'])

# train_data=pd.get_dummies(train_data,columns=['match id', 'team1_x', 'team1_id_x', 'team1_roster_ids_x', 'team2_x',
#        'team2_id_x', 'team2_roster_ids_x', 'winner_x', 'winner_id_x',
#        'toss winner_x', 'toss decision_x', 'venue_x', 'city_x', 'match_dt_x',
#        'lighting_x', 'series_name_x', 'season_x', 'ground_id_x',
#        'team_count_50runs_last15', 'team_winp_last5',
#        'team1only_avg_runs_last15', 'team1_winp_team2_last15',
#        'ground_avg_runs_last15', 'team1_y', 'team2_y', 'winner_y', 'by',
#        'win amount', 'toss winner_y', 'toss decision_y', 'venue_y', 'city_y',
#        'match_dt_y', 'lighting_y', 'series_name_y', 'season_y', 'ground_id_y',
#        'umpire1', 'umpire2', 'inning1_runs', 'inning1_wickets',
#        'inning1_balls', 'inning2_runs', 'inning2_wickets', 'inning2_balls',
#        'team1_id_y', 'team1_roster_ids_y', 'team2_id_y', 'team2_roster_ids_y',
#        'series_type', 'winner_id_y', 'player_of_the_match_id', 'batsman',
#        'batsman_id', 'batsman_details', 'is_batsman_captain',
#        'is_batsman_keeper', 'inning_x', 'runs_x', 'balls_faced',
#        'over_faced_first', 'wicket kind', 'out_by_bowler', 'out_by_fielder',
#        'bowler_id_batsmen_scorecard', 'bowler_details_batsmen_scorecard',
#        'is_bowler_keeper_batsmen_scorecard',
#        'is_bowler_captain_batsmen_scorecard', 'strike_rate_batsmen_scorecard',
#        'Fours_batsmen_scorecard', 'Sixes_batsmen_scorecard',
#        'match_dt_batsmen_scorecard', 'bowler', 'bowler_id', 'bowler_details',
#        'is_bowler_keeper', 'is_bowler_captain', 'inning_y', 'runs_y',
#        'wicket_count', 'balls_bowled', 'economy', 'maiden', 'dots', 'Fours',
#        'Sixes', 'wides', 'noballs', 'match_dt'])