#Imports

In [None]:
import pandas
import numpy
import seaborn
import matplotlib.pyplot
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

#Reading Data from CSV File

In [None]:
matches: pandas.DataFrame = pandas.read_csv("IPL_Matches_2008_2022.csv")
matches.columns

Index(['ID', 'City', 'Date', 'Season', 'MatchNumber', 'Team1', 'Team2',
       'Venue', 'TossWinner', 'TossDecision', 'SuperOver', 'WinningTeam',
       'WonBy', 'Margin', 'method', 'Player_of_Match', 'Team1Players',
       'Team2Players', 'Umpire1', 'Umpire2'],
      dtype='object')

In [None]:
matches.shape

(950, 20)

#Preparing Data

In [None]:
matches_required: pandas.DataFrame = matches

In [None]:
matches_required['Team1'].unique()

array(['Rajasthan Royals', 'Royal Challengers Bangalore',
       'Sunrisers Hyderabad', 'Delhi Capitals', 'Chennai Super Kings',
       'Gujarat Titans', 'Lucknow Super Giants', 'Kolkata Knight Riders',
       'Punjab Kings', 'Mumbai Indians', 'Kings XI Punjab',
       'Delhi Daredevils', 'Rising Pune Supergiant', 'Gujarat Lions',
       'Rising Pune Supergiants', 'Pune Warriors', 'Deccan Chargers',
       'Kochi Tuskers Kerala'], dtype=object)

In [None]:
matches_required['MatchNumber'].unique()

array(['Final', 'Qualifier 2', 'Eliminator', 'Qualifier 1', '70', '69',
       '68', '67', '66', '65', '64', '63', '62', '61', '60', '59', '58',
       '57', '56', '55', '54', '53', '52', '51', '50', '49', '48', '47',
       '46', '45', '44', '43', '42', '41', '40', '39', '38', '37', '36',
       '35', '34', '33', '32', '31', '30', '29', '28', '27', '26', '25',
       '24', '23', '22', '21', '20', '19', '18', '17', '16', '15', '14',
       '13', '12', '11', '10', '9', '8', '7', '6', '5', '4', '3', '2',
       '1', '72', '71'], dtype=object)

In [None]:
matches_required.replace('Deccan Chargers','Sunrisers Hyderabad',inplace=True)
matches_required.replace('Kings XI Punjab','Punjab Kings',inplace=True)
matches_required.replace('Delhi Daredevils','Delhi Capitals',inplace=True)
matches_required.replace('Qualifier 2','102',inplace=True)
matches_required.replace('Qualifier 1','100',inplace=True)
matches_required.replace('Eliminator','101',inplace=True)
matches_required.replace('Final','103',inplace=True)

In [None]:
matches_required.replace('Arun Jaitley Stadium, Delhi','Arun Jaitley Stadium', inplace = True)
matches_required.replace('Brabourne Stadium, Mumbai','Brabourne Stadium', inplace = True)
matches_required.replace('Dr DY Patil Sports Academy, Mumbai','Dr DY Patil Sports Academy', inplace = True)
matches_required.replace('Eden Gardens, Kolkata','Eden Gardens', inplace = True)
matches_required.replace('Feroz Shah Kotla','Arun Jaitley Stadium', inplace = True)
matches_required.replace('M.Chinnaswamy Stadium','M Chinnaswamy Stadium', inplace = True)
matches_required.replace('MA Chidambaram Stadium, Chepauk, Chennai','MA Chidambaram Stadium', inplace = True)
matches_required.replace('MA Chidambaram Stadium, Chepauk','MA Chidambaram Stadium', inplace = True)
matches_required.replace('Maharashtra Cricket Association Stadium, Pune','Maharashtra Cricket Association Stadium', inplace = True)
matches_required.replace('Punjab Cricket Association IS Bindra Stadium','Punjab Cricket Association Stadium', inplace = True)
matches_required.replace('Punjab Cricket Association IS Bindra Stadium, Mohali','Punjab Cricket Association Stadium', inplace = True)
matches_required.replace('Punjab Cricket Association Stadium, Mohali','Punjab Cricket Association Stadium', inplace = True)
matches_required.replace('Rajiv Gandhi International Stadium, Uppal','Rajiv Gandhi International Stadium', inplace = True)
matches_required.replace('Wankhede Stadium, Mumbai','Wankhede Stadium', inplace = True)

In [None]:
matches_required.columns

Index(['ID', 'City', 'Date', 'Season', 'MatchNumber', 'Team1', 'Team2',
       'Venue', 'TossWinner', 'TossDecision', 'SuperOver', 'WinningTeam',
       'WonBy', 'Margin', 'method', 'Player_of_Match', 'Team1Players',
       'Team2Players', 'Umpire1', 'Umpire2'],
      dtype='object')

In [None]:
matches_required['Venue'].unique()

array(['Narendra Modi Stadium, Ahmedabad', 'Eden Gardens',
       'Wankhede Stadium', 'Brabourne Stadium',
       'Dr DY Patil Sports Academy',
       'Maharashtra Cricket Association Stadium',
       'Dubai International Cricket Stadium', 'Sharjah Cricket Stadium',
       'Zayed Cricket Stadium, Abu Dhabi', 'Arun Jaitley Stadium',
       'MA Chidambaram Stadium', 'Sheikh Zayed Stadium',
       'Rajiv Gandhi International Stadium',
       'Dr. Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium',
       'Punjab Cricket Association Stadium', 'M Chinnaswamy Stadium',
       'Sawai Mansingh Stadium', 'Holkar Cricket Stadium', 'Green Park',
       'Saurashtra Cricket Association Stadium',
       'Shaheed Veer Narayan Singh International Stadium',
       'JSCA International Stadium Complex',
       'Sardar Patel Stadium, Motera', 'Barabati Stadium',
       'Subrata Roy Sahara Stadium',
       'Himachal Pradesh Cricket Association Stadium', 'Nehru Stadium',
       'Vidarbha Cricket Association S

In [None]:
matches_required.shape

(950, 20)

In [None]:
matches_required['TossDecision'] = (matches_required['TossDecision'] == 'bat').astype(int)

In [None]:
matches_required.isnull().sum()

ID                   0
City                51
Date                 0
Season               0
MatchNumber          0
Team1                0
Team2                0
Venue                0
TossWinner           0
TossDecision         0
SuperOver            4
WinningTeam          4
WonBy                0
Margin              18
method             931
Player_of_Match      4
Team1Players         0
Team2Players         0
Umpire1              0
Umpire2              0
dtype: int64

In [None]:
matches_required['Margin']

0        7.0
1        7.0
2       14.0
3        7.0
4        5.0
       ...  
945      5.0
946      5.0
947      9.0
948     33.0
949    140.0
Name: Margin, Length: 950, dtype: float64

#Training Model - Logistic Regression

In [None]:
matches_required = matches_required[['Venue','TossDecision','TossWinner','Team1', 'Team2','WinningTeam']]

In [None]:
matches_required.isna().sum()
matches_required.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  matches_required.dropna(inplace=True)


In [None]:
matches_required.isna().sum()

Venue           0
TossDecision    0
TossWinner      0
Team1           0
Team2           0
WinningTeam     0
dtype: int64

In [None]:
final_req_data = matches_required.drop(['WinningTeam'],axis=1)
winners = matches_required[['WinningTeam']]

In [None]:
team_names = final_req_data['Team1'].unique()
team_features = pandas.DataFrame()
for team in team_names:
    team_features[f'Team_{team}'] = ((final_req_data['Team1'] == team) | (final_req_data['Team2'] == team)).astype(int)
df_with_features = pandas.concat([final_req_data, team_features], axis=1)
team_cod = df_with_features.drop(columns=['Team1', 'Team2'])
team_names = team_cod['Venue'].unique()
team_features = pandas.DataFrame()
for team in team_names:
    team_features[f'Venue_{team}'] = ((team_cod['Venue'] == team) | (team_cod['Venue'] == team)).astype(int)
df_with_features = pandas.concat([team_cod, team_features], axis=1)
team_venue_cod = df_with_features.drop(columns=['Venue'])

team_names = team_venue_cod['TossWinner'].unique()
team_features = pandas.DataFrame()
for team in team_names:
    team_features[f'TossWinner{team}'] = ((team_venue_cod['TossWinner'] == team) | (team_venue_cod['TossWinner'] == team)).astype(int)
df_with_features = pandas.concat([team_venue_cod, team_features], axis=1)
team_venue_cod = df_with_features.drop(columns=['TossWinner'])

In [None]:
columns = team_venue_cod.columns

In [None]:
columns

Index(['TossDecision', 'Team_Rajasthan Royals',
       'Team_Royal Challengers Bangalore', 'Team_Sunrisers Hyderabad',
       'Team_Delhi Capitals', 'Team_Chennai Super Kings',
       'Team_Gujarat Titans', 'Team_Lucknow Super Giants',
       'Team_Kolkata Knight Riders', 'Team_Punjab Kings',
       'Team_Mumbai Indians', 'Team_Rising Pune Supergiant',
       'Team_Gujarat Lions', 'Team_Rising Pune Supergiants',
       'Team_Pune Warriors', 'Team_Kochi Tuskers Kerala',
       'Venue_Narendra Modi Stadium, Ahmedabad', 'Venue_Eden Gardens',
       'Venue_Wankhede Stadium', 'Venue_Brabourne Stadium',
       'Venue_Dr DY Patil Sports Academy',
       'Venue_Maharashtra Cricket Association Stadium',
       'Venue_Dubai International Cricket Stadium',
       'Venue_Sharjah Cricket Stadium',
       'Venue_Zayed Cricket Stadium, Abu Dhabi', 'Venue_Arun Jaitley Stadium',
       'Venue_MA Chidambaram Stadium', 'Venue_Sheikh Zayed Stadium',
       'Venue_Rajiv Gandhi International Stadium',
     

In [None]:
X_train, X_test, y_train, y_test = train_test_split(team_venue_cod,winners['WinningTeam'], train_size = 0.9,random_state=499)
from sklearn.linear_model import LogisticRegression

logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(X_train, y_train)
y_pred = logistic_regression_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.6947368421052632


In [None]:
datar = {col: 0 for col in team_venue_cod}


datar[f'TossWinnerMumbai Indians'] = 1
datar[f'TossDecision'] = 0
datar[f'Team_Mumbai Indians'] = 1
datar[f'Team_Kolkata Knight Riders'] = 1
datar[f'Venue_Wankhede Stadium'] = 1
dataf = pandas.DataFrame(datar, index=[0])

In [None]:
xr = logistic_regression_model.predict_proba(dataf)

In [None]:
xr

array([[0.00370003, 0.00658959, 0.00423267, 0.00354471, 0.00598108,
        0.19748101, 0.01207571, 0.72225833, 0.00107166, 0.00839355,
        0.00637744, 0.01258971, 0.00577812, 0.00653554, 0.00339086]])

In [None]:
x = list(winners['WinningTeam'].unique())
x.sort()
print(x)

['Chennai Super Kings', 'Delhi Capitals', 'Gujarat Lions', 'Gujarat Titans', 'Kochi Tuskers Kerala', 'Kolkata Knight Riders', 'Lucknow Super Giants', 'Mumbai Indians', 'Pune Warriors', 'Punjab Kings', 'Rajasthan Royals', 'Rising Pune Supergiant', 'Rising Pune Supergiants', 'Royal Challengers Bangalore', 'Sunrisers Hyderabad']


In [None]:
import joblib

joblib.dump(logistic_regression_model, "log_model.joblib")
joblib.dump(columns, "columns.joblib")
joblib.dump(list(matches_required['Venue'].unique()),"stadiums.joblib")
joblib.dump(list(matches_required['Team1'].unique()),"teams.joblib")

['teams.joblib']