/
IPLScores.py
88 lines (65 loc) · 2.62 KB
/
IPLScores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pandas as pd
data = pd.read_csv(r'data\IPL Matches 2008-2020.csv')
df = pd.DataFrame(data, columns= ['team1', 'team2', 'toss_winner','toss_decision', 'winner'])
print(df.shape)
print(df.describe())
pd.options.mode.chained_assignment = None
data = pd.read_csv(r'data\IPL Matches 2008-2020.csv')
df = pd.DataFrame(data, columns= ['team1', 'team2', 'toss_winner','toss_decision', 'winner'])
# To remove warnings generated by panda for copying dataframe
pd.options.mode.chained_assignment = None
# Create a copy for encoding dataframe into numbers
df1 = df.copy()
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df1['toss_winner'] = le.fit_transform(df1['toss_winner'])
df1['winner'] = le.fit_transform(df1['winner'])
df1['team1'] = le.fit_transform(df1['team1'])
df1['team2'] = le.fit_transform(df1['team2'])
df1.loc[(df1.toss_decision =='bat'), 'toss_decision'] = 101
df1.loc[(df1.toss_decision =='field'), 'toss_decision'] = 100
# Prepare data
X = df1.drop(columns=['winner'])
Y = df1['winner']
# Split data
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
# Create model
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(X, Y)
# Find accuracy
dt_accuracy = dt.score(X_test, Y_test)*100
print('DT accuracy: ', str(round(dt.score(X_test, Y_test)*100, 2)) + "%")
# Prediction accuracy using RandomForestClassifier
# from sklearn.ensemble import RandomForestClassifier
# rf = RandomForestClassifier()
# rf.fit(X, Y)
# print('RF accuracy: ', str(round(rf.score(X_test, Y_test)*100, 2)) + "%")
# --- Make predictions for ongoing matches ---
# Load test data
matches = pd.read_csv(r'data\ipl-2021-matches.csv')
df_matches = pd.DataFrame(matches, columns= ['Team1', 'Team2', 'Toss Winner','Toss Decision', 'Winner'])
result = [None]*df_matches.shape[0]
for index, row in df_matches.iterrows():
t1 = str(row['Team1'])
t2 = str(row['Team2'])
toss_winner = row['Toss Winner']
if type(toss_winner) is float:
break
toss_decision = str(row['Toss Decision'])
if(toss_decision == 'bat'):
toss_decision = int(101)
else:
toss_decision = int(100)
# Prepare test data
import numpy as np
test_data = le.transform([t1, t2, toss_winner])
test_data = np.append(test_data, [toss_decision])
result[index] = le.inverse_transform(dt.predict([test_data]))[0]
df_matches['Predicted Winner'] = result
df_matches.to_csv("results/Results.csv")
# Save the model file
import joblib
joblib.dump(dt, 'ipl_winner_prediction.joblib')
# dt = joblib.load('ipl_winner_prediction.joblib')