# **1. Load the data**

1.1 Load Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
import keras
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error,mean_squared_error
import joblib

1.2 Load Dataset

In [None]:
data=pd.read_csv('/content/ipl_data.csv')
data.head()

Unnamed: 0,mid,date,venue,bat_team,bowl_team,batsman,bowler,runs,wickets,overs,runs_last_5,wickets_last_5,striker,non-striker,total
0,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,P Kumar,1,0,0.1,1,0,0,0,222
1,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,1,0,0.2,1,0,0,0,222
2,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.2,2,0,0,0,222
3,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.3,2,0,0,0,222
4,1,2008-04-18,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,2,0,0.4,2,0,0,0,222


# **2. Overview of the Data**

2.1 Descriptive Statistics

In [None]:
data.shape

(76014, 15)

In [None]:
data.describe()

Unnamed: 0,mid,runs,wickets,overs,runs_last_5,wickets_last_5,striker,non-striker,total
count,76014.0,76014.0,76014.0,76014.0,76014.0,76014.0,76014.0,76014.0,76014.0
mean,308.62774,74.889349,2.415844,9.783068,33.216434,1.120307,24.962283,8.869287,160.901452
std,178.156878,48.823327,2.015207,5.772587,14.914174,1.053343,20.079752,10.795742,29.246231
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,67.0
25%,154.0,34.0,1.0,4.6,24.0,0.0,10.0,1.0,142.0
50%,308.0,70.0,2.0,9.6,34.0,1.0,20.0,5.0,162.0
75%,463.0,111.0,4.0,14.6,43.0,2.0,35.0,13.0,181.0
max,617.0,263.0,10.0,19.6,113.0,7.0,175.0,109.0,263.0


2.2 Missing Value

In [None]:
data.isna().sum()
# as there is no missing value no need to perform any operation

mid               0
date              0
venue             0
bat_team          0
bowl_team         0
batsman           0
bowler            0
runs              0
wickets           0
overs             0
runs_last_5       0
wickets_last_5    0
striker           0
non-striker       0
total             0
dtype: int64

# **3. Data Preparation**

3.1 Feature Encoding

In [None]:
data=data.drop(['mid','date','runs','wickets','runs_last_5','wickets_last_5','striker','non-striker','overs'],axis=1)
data.head()

Unnamed: 0,venue,bat_team,bowl_team,batsman,bowler,total
0,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,SC Ganguly,P Kumar,222
1,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,222
2,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,222
3,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,222
4,M Chinnaswamy Stadium,Kolkata Knight Riders,Royal Challengers Bangalore,BB McCullum,P Kumar,222


In [None]:
#split the data into 2 parts the feature(X) and target(y) part(the output we wish to receive)
X=data.drop(['total'],axis=1)
y=data['total']

In [None]:
#next is label encoding
venue_encoder = LabelEncoder()
batting_team_encoder = LabelEncoder()
bowling_team_encoder = LabelEncoder()
striker_encoder = LabelEncoder()
bowler_encoder = LabelEncoder()

#fit and transform the categorical features with label encoding
X['venue'] = venue_encoder.fit_transform(X['venue'])
X['bat_team'] = batting_team_encoder.fit_transform(X['bat_team'])
X['bowl_team'] = bowling_team_encoder.fit_transform(X['bowl_team'])
X['batsman'] = striker_encoder.fit_transform(X['batsman'])
X['bowler'] = bowler_encoder.fit_transform(X['bowler'])

In [None]:
X.head()

Unnamed: 0,venue,bat_team,bowl_team,batsman,bowler
0,14,6,12,328,201
1,14,6,12,61,201
2,14,6,12,61,201
3,14,6,12,61,201
4,14,6,12,61,201


3.2 Split X and Y

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

3.3 Feature Scaling

In [None]:
scaler=MinMaxScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.fit_transform(X_test)

In [None]:
X_train_scaled
X_test_scaled

array([[0.52941176, 0.15384615, 0.92307692, 0.29756098, 0.3902439 ],
       [0.26470588, 0.23076923, 0.15384615, 0.41707317, 0.2195122 ],
       [1.        , 0.53846154, 0.        , 0.32439024, 0.69512195],
       ...,
       [0.76470588, 0.38461538, 0.69230769, 0.69268293, 0.85365854],
       [0.64705882, 0.30769231, 0.        , 0.30731707, 0.02134146],
       [0.73529412, 0.84615385, 0.23076923, 0.2902439 , 0.25      ]])

# **4. Model Building**

4.1 Train Model

In [None]:
model=keras.Sequential([keras.layers.Input(shape=(X_train_scaled.shape[1],)),keras.layers.Dense(512,activation='relu'),keras.layers.Dense(206,activation='relu'),keras.layers.Dense(1,activation='linear'),])
huber_loss = tf.keras.losses.Huber(delta=1.0)
model.compile(optimizer='adam', loss=huber_loss)

In [None]:
model.fit(X_train_scaled,y_train,epochs=30,batch_size=64,validation_data=(X_test_scaled, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7dd83313a830>

4.3 Model Evaluation

In [None]:
predictions = model.predict(X_test_scaled)
mean_absolute_error(y_test,predictions)



22.032318396255704

4.2 Model Predictions

In [None]:
def predict():
    venue=list(data['venue'])
    bat_team=list(data['bat_team'])
    bowl_team=list(data['bowl_team'])
    striker=list(data['batsman'])
    bowler=list(data['bowler'])
    print("Input Values:")
    print("Venue:", venue[500])
    print("Batting Team:", bat_team[500])
    print("Bowling Team:", bowl_team[500])
    print("Striker:", striker[500])
    print("Bowler:", bowler[500])
    print("\nEncoding Values:")
    encoded_venue = venue_encoder.transform([venue[500]])
    encoded_batting_team = batting_team_encoder.transform([bat_team[500]])
    encoded_bowling_team = bowling_team_encoder.transform([bowl_team[500]])
    encoded_striker = striker_encoder.transform([striker[500]])
    encoded_bowler = bowler_encoder.transform([bowler[500]])
    print("Encoded Batting Team:", encoded_batting_team)
    print("Encoded Bowling Team:", encoded_bowling_team)
    print("Encoded Striker:", encoded_striker)
    print("Encoded Bowler:", encoded_bowler)
    input_data = np.array([encoded_venue, encoded_batting_team, encoded_bowling_team, encoded_striker, encoded_bowler])
    input_data = input_data.reshape(1, X_train_scaled.shape[1])
    input_data = scaler.transform(input_data)
    # Predict score
    predicted_score = model.predict(input_data)
    predicted_score = int(predicted_score[0, 0])
    print("\nPredicted Score:", predicted_score)

predict()

Input Values:
Venue: Eden Gardens
Batting Team: Deccan Chargers
Bowling Team: Kolkata Knight Riders
Striker: Y Venugopal Rao
Bowler: I Sharma

Encoding Values:
Encoded Batting Team: [1]
Encoded Bowling Team: [6]
Encoded Striker: [403]
Encoded Bowler: [111]

Predicted Score: 169




# **5 Improve Model**

5.1 Save the Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
model.save('/content/drive/MyDrive/deep_learning')
joblib.dump(venue_encoder, '/content/drive/MyDrive/python_objects/venue_encoder.pkl')
joblib.dump(batting_team_encoder, '/content/drive/MyDrive/python_objects/batting_team_encoder.pkl')
joblib.dump(bowling_team_encoder, '/content/drive/MyDrive/python_objects/bowling_team_encoder.pkl')
joblib.dump(striker_encoder, '/content/drive/MyDrive/python_objects/striker_encoder.pkl')
joblib.dump(bowler_encoder, '/content/drive/MyDrive/python_objects/bowler_encoder.pkl')

['/content/drive/MyDrive/python_objects/bowler_encoder.pkl']

In [None]:
joblib.dump(scaler,'/content/drive/MyDrive/python_objects/scaler.pkl')

['/content/drive/MyDrive/python_objects/scaler.pkl']

In [None]:
loaded_model=tf.keras.models.load_model('/content/drive/MyDrive/deep_learning')

In [None]:
loaded_model

<keras.src.engine.sequential.Sequential at 0x7dd8334bd030>