<a href="https://colab.research.google.com/github/yashguptaab99/Cricket-Prediction/blob/master/Cricket_Predictions_Bowling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# INCREASED PREDICTION ACCURACY IN THE GAME OF CRICKET USING MACHINE LEARNING

Player selection is one the most important tasks for any sport and cricket is no exception. The performance of the players depends on various factors such as the opposition team, the venue, his current form etc. The team management, the coach and the captain select 11 players for each match from a squad of 15 to 20 players. They analyze different characteristics and the statistics of the players to select the best playing 11 for each match. Each batsman contributes by scoring maximum runs possible and each bowler contributes by taking maximum wickets and conceding minimum runs. This paper attempts to predict the performance of players as how many runs will each batsman score and how many wickets will each bowler take for both the teams. Both the problems are targeted as classification problems where number of runs and number of wickets are classified in different ranges. We used naïve bayes, random forest, multiclass SVM and decision tree classifiers to generate the prediction models for both the problems. Random Forest classifier was found to be the most accurate for both the problems. 

# Importing Libraray

In [1]:
import pandas as pd
import re

# Importing Data

In [2]:
# All Innings list after 2 Jan 2000
innings = pd.read_csv("/content/drive/My Drive/Projects/Cricket Prediction/Bowling.csv")

# All Ininngs list from 131Mar 1984 to 2 Jan 2000
inningsExtra = pd.read_csv("/content/drive/My Drive/Projects/Cricket Prediction/Bowling84-02.csv")

In [3]:
inningsExtra

Unnamed: 0,Player,Overs,Mdns,Runs,Wkts,Econ,Inns,Opposition,Ground,Start Date
0,Aamer Hanif,3.4,0,27,1,7.36,1,Sri Lanka,Sharjah,2 Nov 1993
1,Aamer Hanif,10.0,0,38,0,3.80,1,Sri Lanka,Gujranwala,29 Sep 1995
2,Aamer Hanif,2.0,0,21,0,10.50,1,Sri Lanka,Faisalabad,1 Oct 1995
3,Aamer Hanif,6.0,0,36,3,6.00,2,Sri Lanka,Rawalpindi,3 Oct 1995
4,Aamer Hanif,DNB,-,-,-,-,2,Sri Lanka,Sharjah,17 Oct 1995
...,...,...,...,...,...,...,...,...,...,...
28045,Zulqarnain,DNB,-,-,-,-,1,India,Sharjah,18 Apr 1986
28046,Zulqarnain,DNB,-,-,-,-,1,England,Karachi,20 Nov 1987
28047,Zulqarnain,DNB,-,-,-,-,1,England,Peshawar,22 Nov 1987
28048,Zulqarnain,DNB,-,-,-,-,2,India,Gujranwala,18 Dec 1989


# Data Preprocessing

## Bowling data

In [4]:
innings = innings.drop(columns=['Team', 'Mdns', 'Econ', 'Inns'])
inningsExtra = inningsExtra.drop(columns=['Mdns', 'Econ', 'Inns'])

In [5]:
# Cleaning data

innings = innings[innings.Overs != 'DNB']
innings = innings[innings.Overs != 'TDNB']
innings = innings[innings.Overs != 'sub']
innings = innings[innings.Overs != 'absent']
innings = innings.rename(columns={"Player 1":"Player", "Start Date":"StartDate"})

inningsExtra = inningsExtra[inningsExtra.Overs != 'DNB']
inningsExtra = inningsExtra[inningsExtra.Overs != 'TDNB']
inningsExtra = inningsExtra[inningsExtra.Overs != 'sub']
inningsExtra = inningsExtra[inningsExtra.Overs != 'absent']
inningsExtra = inningsExtra.rename(columns={"Start Date":"StartDate"})


In [6]:
innings

Unnamed: 0,Player,Overs,Runs,Wkts,Opposition,Ground,StartDate
0,NP Kenjige,9,40,1,U.A.E.,Sharjah,8 Dec 2019
1,NP Kenjige,7,50,1,Scotland,Sharjah,9 Dec 2019
2,NP Kenjige,7,26,1,U.A.E.,ICCA Dubai,12 Dec 2019
3,NP Kenjige,3,13,0,Scotland,ICCA Dubai,14 Dec 2019
4,ME Sanuth,6,18,0,U.A.E.,Al Amerat,5 Jan 2020
...,...,...,...,...,...,...,...
60061,Zulfiqar Babar,9,35,1,Australia,Sharjah,7 Oct 2014
60062,Zulfiqar Babar,10,52,2,Australia,Dubai (DSC),10 Oct 2014
60063,Zulfiqar Babar,10,42,0,Australia,Abu Dhabi,12 Oct 2014
60064,Zulfiqar Babar,10,61,1,New Zealand,Abu Dhabi,19 Dec 2014


In [7]:
inningsExtra

Unnamed: 0,Player,Overs,Runs,Wkts,Opposition,Ground,StartDate
0,Aamer Hanif,3.4,27,1,Sri Lanka,Sharjah,2 Nov 1993
1,Aamer Hanif,10.0,38,0,Sri Lanka,Gujranwala,29 Sep 1995
2,Aamer Hanif,2.0,21,0,Sri Lanka,Faisalabad,1 Oct 1995
3,Aamer Hanif,6.0,36,3,Sri Lanka,Rawalpindi,3 Oct 1995
7,Aamer Malik,2.0,10,0,Sri Lanka,Dhaka,27 Oct 1988
...,...,...,...,...,...,...,...
28024,DNT Zoysa,6.2,24,0,West Indies,Sharjah,13 Oct 1999
28025,DNT Zoysa,8.0,24,0,Pakistan,Sharjah,15 Oct 1999
28026,DNT Zoysa,7.0,14,1,West Indies,Sharjah,17 Oct 1999
28027,DNT Zoysa,8.0,32,1,Pakistan,Sharjah,18 Oct 1999


In [8]:
#List of all players who played after 2 Jan 2000

listOfBowler = list(innings['Player'].unique())

In [9]:
#Merging player past performance innings which were present in matches after 2005
# for ex. Sachine was senior most so his mast matches performance shoould be added 

for player in listOfBowler:
  playerframe = inningsExtra[inningsExtra.Player == player]
  innings = innings.append(playerframe)


In [10]:
innings['StartDate'] = pd.to_datetime(innings['StartDate'])
# Now innings variable contains all players past played innings
innings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39927 entries, 0 to 28028
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Player      39927 non-null  object        
 1   Overs       39927 non-null  object        
 2   Runs        39927 non-null  object        
 3   Wkts        39927 non-null  object        
 4   Opposition  39927 non-null  object        
 5   Ground      39927 non-null  object        
 6   StartDate   39927 non-null  datetime64[ns]
dtypes: datetime64[ns](1), object(6)
memory usage: 2.4+ MB


In [11]:
#Converting Over to integer and cleaning it
Over = []
for st in innings['Overs'].values:
  st = re.findall(r'[0-9]+', st)
  if not st:
    st.append('0')
  Over.append(float(st[0]))
innings['Overs'] = Over

In [12]:
#Converting Run to integer and cleaning it
run = []
for st in innings['Runs'].values:
  st = re.findall(r'[0-9]+', st)
  if not st:
    st.append('0')
  run.append(float(st[0]))
innings['Runs'] = run

In [13]:
#Converting Run to integer and cleaning it
wkt = []
for st in innings['Wkts'].values:
  st = re.findall(r'[0-9]+', st)
  if not st:
    st.append('0')
  wkt.append(float(st[0]))
innings['Wkts'] = wkt

In [14]:
innings.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39927 entries, 0 to 28028
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Player      39927 non-null  object        
 1   Overs       39927 non-null  float64       
 2   Runs        39927 non-null  float64       
 3   Wkts        39927 non-null  float64       
 4   Opposition  39927 non-null  object        
 5   Ground      39927 non-null  object        
 6   StartDate   39927 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(3)
memory usage: 2.4+ MB


### Calculating The Derived Attributes

#### Consistency

This attribute describes how experienced the player is and how consistent he has been throughout his career. All the traditional attributes used in this formula are calculated over the entire career of the player. 

**Consistency = 0.4174Xno. of overs + 0.2634Xno. of innings + 0.1602XSR + 0.0975Xaverage + 0.0615XFF**


In [15]:
#Please Rate then Calculate

## Consistency 
Consistency = []

for player in listOfBowler:
  runs_conceede = 0
  balls_bowled = 0
  wkt_taken = 0
  playerframe = innings[innings.Player == player]

  ######### Number of innings #########
  numInnings = playerframe.shape[0]

  ######### Overs #########
  overs = playerframe['Overs'].sum()

  ######### Runs concede #########
  runs_concede = playerframe['Runs'].sum()

  ######### Wicket Taken #########
  wkt_taken = playerframe['Wkts'].sum()

  ######### Total Ball bowled #########
  balls_bowled = 6 * overs

  ######### Bowling Average #########
  if (wkt_taken==0):
    average = 0
  else:
    average = runs_concede/wkt_taken

  ######### Strike Rate #########
  if (wkt_taken==0):
    sr = 0
  else:
    sr = (balls_bowled/wkt_taken) * 100

  ######### Wickets haul #########
  ff = playerframe[playerframe.Wkts > 4].shape[0]

####################  Rate the Elements Before Calculation  ####################

  #### numInnings ####
  if (numInnings>=1 and numInnings<=49):
    numInnings = 1
  elif (numInnings>=50 and numInnings<=99):
    numInnings = 2
  elif (numInnings>=100 and numInnings<=124):
    numInnings = 3
  elif (numInnings>=125 and numInnings<=149):
    numInnings = 4
  elif (numInnings>=150):
    numInnings = 5 

  #### average ####
  if (average>=0.0 and average<=24.9):
    average = 1
  elif (average>=25.0 and average<=29.9):
    average = 2
  elif (average>=30.0 and average<=34.9):
    average = 3
  elif (average>=35.0 and average<=49.9):
    average = 4
  elif (average>=50.0):
    average = 5   

  #### sr ####
  if (sr>=0.0 and sr<=29.9):
    sr = 1
  elif (sr>=30.0 and sr<=39.9):
    sr = 2
  elif (sr>=40.0 and sr<=49.0):
    sr = 3
  elif (sr>=50.0 and sr<=59.9):
    sr = 4
  elif (sr>=60.0):
    sr = 5  

  #### Overs ####
  if (overs>=1 and overs<=99):
    overs = 1
  elif (overs>=100 and overs<=249):
    overs = 2
  elif (overs>=250 and overs<=499):
    overs = 3
  elif (overs>=500 and overs<=999):
    overs = 4
  elif (overs>=1000):
    overs = 5 

  #### ff ####
  if (ff>=1 and ff<=2):
    ff = 3
  elif (ff>=3 and ff<=4):
    ff = 4
  elif (ff>=5):
    ff = 5

  consistency =  (0.4174 * overs) + (0.2634 * numInnings) + (0.1602 * sr) + (0.0975*average) + (0.0615 * ff) 
  Consistency.append(consistency)


In [16]:
ConsistencyFrame = pd.DataFrame(Consistency, columns = ["Consistency"])

In [17]:
ConsistencyFrame

Unnamed: 0,Consistency
0,1.6768
1,0.9385
2,3.9142
3,1.9693
4,1.6768
...,...
1307,0.9385
1308,1.9588
1309,1.8718
1310,3.3769


#### Form

Form of a player describes his performance over last one year. All the traditional attributes used in this formula are calculated over the matches played by the player in last 12 months from the day of the match. 

**Form = (0.3269 * overs) + (0.2846 * numInnings) + (0.1877 * sr) + (0.1210 * average) + (0.0798 * ff)**

In [18]:
#Please Rate then Calculate

## Form
Form = []

for player in listOfBowler:
  playerframe = innings[innings.Player == player]
  playerframe = playerframe[playerframe.StartDate > "2019-01-01"]

  if not playerframe.empty:
    runs_conceede = 0
    balls_bowled = 0
    wkt_taken = 0
    playerframe = innings[innings.Player == player]

    ######### Number of innings #########
    numInnings = playerframe.shape[0]

    ######### Overs #########
    overs = playerframe['Overs'].sum()

    ######### Runs concede #########
    runs_concede = playerframe['Runs'].sum()

    ######### Wicket Taken #########
    wkt_taken = playerframe['Wkts'].sum()

    ######### Total Ball bowled #########
    balls_bowled = 6 * overs

    ######### Bowling Average #########
    if (wkt_taken==0):
      average = 0
    else:
      average = runs_concede/wkt_taken

    ######### Strike Rate #########
    if (wkt_taken==0):
      sr = 0
    else:
      sr = (balls_bowled/wkt_taken) * 100

    ######### Wickets haul #########
    ff = playerframe[playerframe.Wkts > 4].shape[0]

  ####################  Rate the Elements Before Calculation  ####################

    #### numInnings ####
    if (numInnings>=1 and numInnings<=4):
      numInnings = 1
    elif (numInnings>=5 and numInnings<=9):
      numInnings = 2
    elif (numInnings>=10 and numInnings<=13):
      numInnings = 3
    elif (numInnings>=12 and numInnings<=14):
      numInnings = 4
    elif (numInnings>=15):
      numInnings = 5 

    #### average ####
    if (average>=0.0 and average<=24.9):
      average = 1
    elif (average>=25.0 and average<=29.9):
      average = 2
    elif (average>=30.0 and average<=34.9):
      average = 3
    elif (average>=35.0 and average<=49.9):
      average = 4
    elif (average>=50.0):
      average = 5   

    #### sr ####
    if (sr>=0.0 and sr<=29.9):
      sr = 1
    elif (sr>=30.0 and sr<=39.9):
      sr = 2
    elif (sr>=40.0 and sr<=49.0):
      sr = 3
    elif (sr>=50.0 and sr<=59.9):
      sr = 4
    elif (sr>=60.0):
      sr = 5  

    #### Overs ####
    if (overs>=1 and overs<=9):
      overs = 1
    elif (overs>=10 and overs<=24):
      overs = 2
    elif (overs>=25 and overs<=49):
      overs = 3
    elif (overs>=50 and overs<=99):
      overs = 4
    elif (overs>=100):
      overs = 5 

    #### ff ####
    if (ff>=1 and ff<=2):
      ff = 4
    elif (ff>=3):
      ff = 5

    form =  (0.3269 * overs) + (0.2846 * numInnings) + (0.1877 * sr) + (0.1210 * average) + (0.0798 * ff)
  else:
    form = 0

  Form.append(form)

In [19]:
FormFrame = pd.DataFrame(Form, columns=['Form'])

In [20]:
FormFrame

Unnamed: 0,Form
0,2.7304
1,1.2471
2,0.0000
3,0.0000
4,2.1189
...,...
1307,0.0000
1308,0.0000
1309,0.0000
1310,0.0000


#### Opposition

Opposition describes a player’s performance against a particular team. All the traditional attributes used in this formula are calculated over all the matches played by the player against the opposition team in his entire career till the day of the match. 

**Opposition = (0.3177 * overs) + (0.3177 * numInnings) + (0.1933 * sr) + (0.1465 * average) + (0.0943 * ff)** 

In [21]:
listOfOpposition = list(innings['Opposition'].unique())
listOfOpposition.sort()

In [22]:
#Please Rate then Calculate

## Opposition 
Oppositions = []

for player in listOfBowler:
  playerframe = innings[innings.Player == player]
  perPlayerOpposition = []
  for opposition in listOfOpposition:
    oppositionframe = playerframe[playerframe.Opposition == opposition]
    if not oppositionframe.empty:
      runs_conceede = 0
      balls_bowled = 0
      wkt_taken = 0
      playerframe = innings[innings.Player == player]

      ######### Number of innings #########
      numInnings = playerframe.shape[0]

      ######### Overs #########
      overs = playerframe['Overs'].sum()

      ######### Runs concede #########
      runs_concede = playerframe['Runs'].sum()

      ######### Wicket Taken #########
      wkt_taken = playerframe['Wkts'].sum()

      ######### Total Ball bowled #########
      balls_bowled = 6 * overs

      ######### Bowling Average #########
      if (wkt_taken==0):
        average = 0
      else:
        average = runs_concede/wkt_taken

      ######### Strike Rate #########
      if (wkt_taken==0):
        sr = 0
      else:
        sr = (balls_bowled/wkt_taken) * 100

      ######### Wickets haul #########
      ff = playerframe[playerframe.Wkts > 4].shape[0]

    ####################  Rate the Elements Before Calculation  ####################

      #### numInnings ####
      if (numInnings>=1 and numInnings<=2):
        numInnings = 1
      elif (numInnings>=3 and numInnings<=4):
        numInnings = 2
      elif (numInnings>=5 and numInnings<=6):
        numInnings = 3
      elif (numInnings>=7 and numInnings<=9):
        numInnings = 4
      elif (numInnings>=10):
        numInnings = 5 

      #### average ####
      if (average>=0.0 and average<=24.9):
        average = 1
      elif (average>=25.0 and average<=29.9):
        average = 2
      elif (average>=30.0 and average<=34.9):
        average = 3
      elif (average>=35.0 and average<=49.9):
        average = 4
      elif (average>=50.0):
        average = 5   

      #### sr ####
      if (sr>=0.0 and sr<=29.9):
        sr = 1
      elif (sr>=30.0 and sr<=39.9):
        sr = 2
      elif (sr>=40.0 and sr<=49.0):
        sr = 3
      elif (sr>=50.0 and sr<=59.9):
        sr = 4
      elif (sr>=60.0):
        sr = 5  

      #### Overs ####
      if (overs>=1 and overs<=9):
        overs = 1
      elif (overs>=10 and overs<=24):
        overs = 2
      elif (overs>=25 and overs<=49):
        overs = 3
      elif (overs>=50 and overs<=99):
        overs = 4
      elif (overs>=100):
        overs = 5 

      #### ff ####
      if (ff>=1 and ff<=2):
        ff = 4
      elif (ff>=3):
        ff = 5

      oppo =  (0.3177 * overs) + (0.3177 * numInnings) + (0.1933 * sr) + (0.1465 * average) + (0.0943 * ff)
    else:
      oppo = 0
    perPlayerOpposition.append(oppo)
  Oppositions.append(perPlayerOpposition)   

In [23]:
OppositionsFrame = pd.DataFrame(Oppositions, columns = listOfOpposition) 

In [24]:
OppositionsFrame

Unnamed: 0,Afghanistan,Africa XI,Asia XI,Australia,Bangladesh,Bermuda,Canada,England,Hong Kong,ICC World XI,India,Ireland,Kenya,Namibia,Nepal,Netherlands,New Zealand,Oman,P.N.G.,Pakistan,Scotland,South Africa,Sri Lanka,U.A.E.,U.S.A.,West Indies,Zimbabwe
0,0.0000,0.0,0.0000,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,0.0000,3.1657,0.0000,0.0000,0.0,0.0,0.0000,3.1657,0.0000,0.0000,3.1657,0.0,0.0000,0.0000
1,0.0000,0.0,0.0000,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,1.6106,1.6106,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,0.0000,1.6106,0.0,0.0000,0.0000
2,0.0000,0.0,0.0000,4.7295,4.7295,0.0,0.0000,4.7295,0.0,0.0,4.7295,0.0000,0.0000,0.0000,0.0000,4.7295,4.7295,0.0,0.0,0.0000,0.0000,4.7295,4.7295,4.7295,0.0,4.7295,4.7295
3,0.0000,0.0,0.0000,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,3.2875,0.0,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0,0.0000,3.2875
4,0.0000,0.0,0.0000,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,2.2126,0.0000,0.0000,0.0000,0.0,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,0.0000,0.0,0.0000,0.0000,0.9752,0.0,0.0000,0.0000,0.0,0.0,0.0000,0.9752,0.0000,0.0000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,0.0000,0.0000,0.0,0.0000,0.0000
1308,4.6425,0.0,0.0000,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,4.6425,0.0000,0.0000,0.0000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0000,0.0000,4.6425,0.0000,0.0,0.0000,4.6425
1309,0.0000,0.0,4.4118,4.4118,4.4118,0.0,4.4118,0.0000,0.0,0.0,0.0000,0.0000,4.4118,0.0000,0.0000,0.0000,0.0000,0.0,0.0,4.4118,0.0000,0.0000,4.4118,0.0000,0.0,0.0000,4.4118
1310,0.0000,0.0,0.0000,4.8137,4.8137,0.0,0.0000,4.8137,0.0,0.0,4.8137,0.0000,0.0000,0.0000,0.0000,0.0000,4.8137,0.0,0.0,4.8137,0.0000,4.8137,0.0000,4.8137,0.0,4.8137,4.8137


In [25]:
OppositionsFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1312 entries, 0 to 1311
Data columns (total 27 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Afghanistan   1312 non-null   float64
 1   Africa XI     1312 non-null   float64
 2   Asia XI       1312 non-null   float64
 3   Australia     1312 non-null   float64
 4   Bangladesh    1312 non-null   float64
 5   Bermuda       1312 non-null   float64
 6   Canada        1312 non-null   float64
 7   England       1312 non-null   float64
 8   Hong Kong     1312 non-null   float64
 9   ICC World XI  1312 non-null   float64
 10  India         1312 non-null   float64
 11  Ireland       1312 non-null   float64
 12  Kenya         1312 non-null   float64
 13  Namibia       1312 non-null   float64
 14  Nepal         1312 non-null   float64
 15  Netherlands   1312 non-null   float64
 16  New Zealand   1312 non-null   float64
 17  Oman          1312 non-null   float64
 18  P.N.G.        1312 non-null 

#### Venue

Venue describes a player’s performance at a particular venue. All the traditional attributes used in this formula are calculated over all the matches played by the player at the venue in his entire career till the day of the match. 

**Venue = (0.3018 * overs) + (0.2783 * numInnings) + (0.1836 * sr) + (0.1391 * average) + (0.0972 * ff)**

In [26]:
listOfVenue = list(innings['Ground'].unique())
listOfVenue.sort()

In [27]:
#Please Rate then Calculate

## Venue
Venues = []

for player in listOfBowler:
  playerframe = innings[innings.Player == player]
  perPlayerVenue = []
  for venue in listOfVenue:
    venueframe = playerframe[playerframe.Ground == venue]
    if not venueframe.empty:
      runs_conceede = 0
      balls_bowled = 0
      wkt_taken = 0
      playerframe = innings[innings.Player == player]

      ######### Number of innings #########
      numInnings = playerframe.shape[0]

      ######### Overs #########
      overs = playerframe['Overs'].sum()

      ######### Runs concede #########
      runs_concede = playerframe['Runs'].sum()

      ######### Wicket Taken #########
      wkt_taken = playerframe['Wkts'].sum()

      ######### Total Ball bowled #########
      balls_bowled = 6 * overs

      ######### Bowling Average #########
      if (wkt_taken==0):
        average = 0
      else:
        average = runs_concede/wkt_taken

      ######### Strike Rate #########
      if (wkt_taken==0):
        sr = 0
      else:
        sr = (balls_bowled/wkt_taken) * 100

      ######### Wickets haul #########
      ff = playerframe[playerframe.Wkts > 4].shape[0]

    ####################  Rate the Elements Before Calculation  ####################

    #### numInnings ####
      if (numInnings==1):
        numInnings = 1
      elif (numInnings==2):
        numInnings = 2
      elif (numInnings==3):
        numInnings = 3
      elif (numInnings==4):
        numInnings = 4
      elif (numInnings>=5):
        numInnings = 5 

      #### average ####
      if (average>=0.0 and average<=24.9):
        average = 1
      elif (average>=25.0 and average<=29.9):
        average = 2
      elif (average>=30.0 and average<=34.9):
        average = 3
      elif (average>=35.0 and average<=49.9):
        average = 4
      elif (average>=50.0):
        average = 5   

      #### sr ####
      if (sr>=0.0 and sr<=29.9):
        sr = 1
      elif (sr>=30.0 and sr<=39.9):
        sr = 2
      elif (sr>=40.0 and sr<=49.0):
        sr = 3
      elif (sr>=50.0 and sr<=59.9):
        sr = 4
      elif (sr>=60.0):
        sr = 5  

      #### Overs ####
      if (overs>=1 and overs<=9):
        overs = 1
      elif (overs>=10 and overs<=19):
        overs = 2
      elif (overs>=20 and overs<=29):
        overs = 3
      elif (overs>=30 and overs<=39):
        overs = 4
      elif (overs>=40):
        overs = 5 

      #### ff ####
      if (ff>=1 and ff<=2):
        ff = 4
      elif (ff>=3):
        ff = 5

      ven = (0.3018 * overs) + (0.2783 * numInnings) + (0.1836 * sr) + (0.1391 * average) + (0.0972 * ff) 
    else:
      ven = 0
    perPlayerVenue.append(ven)
  Venues.append(perPlayerVenue)    

In [28]:
VenuesFrame = pd.DataFrame(Venues, columns = listOfVenue)

In [29]:
VenuesFrame

Unnamed: 0,Aberdeen,Abu Dhabi,Adelaide,Ahmedabad,Al Amerat,Amritsar,Amstelveen,Auckland,Ayr,Ballarat,Basseterre,Belfast,Bengaluru,Benoni,Berri,Birmingham,Bloemfontein,Bogra,Bready,Bridgetown,Brisbane,Bristol,Bulawayo,Cairns,Canberra,Canterbury,Cape Town,Cardiff,Centurion,Chandigarh,Chattogram,Chelmsford,Chennai,Chester-le-Street,Christchurch,Colombo (PSS),Colombo (RPS),Colombo (SSC),Cuttack,Dambulla,...,Port Elizabeth,Port Moresby,Port of Spain,Potchefstroom,Providence,Pune,Queenstown,Quetta,Rajkot,Ranchi,Rawalpindi,Roseau,Rotterdam,Sargodha,Schiedam,Sharjah,Sheikhupura,Sialkot,Singapore,Southampton,Srinagar,St George's,St John's,Sydney,Sylhet,Tangier,Taunton,Taupo,The Hague,The Oval,Thiruvananthapuram,Toronto,Townsville,Vadodara,Vijayawada,Visakhapatnam,Wellington,Whangarei,Windhoek,Worcester
0,0.0000,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,3.4931,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
1,0.0000,0.000,0.0000,0.0000,1.7612,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
2,0.0000,0.000,4.3749,0.0000,0.0000,0.0,0.0,4.3749,0.0,0.0,0.0,0.0,4.3749,0.0,0.0,4.3749,0.0000,0.0,0.0,0.0,4.3749,0.0000,0.0000,0.0,0.0,0.0,4.3749,0.0,4.3749,0.0,0.0000,0.0,0.0,0.0,4.3749,0.0,4.3749,4.3749,0.0,0.0000,...,4.3749,0.0,4.3749,0.0,0.0,0.0000,0.0,4.3749,0.0,0.0,4.3749,0.0,0.0,0.0,0.0,4.3749,4.3749,0.0,4.3749,0.0000,0.0,0.0,0.0,4.3749,0.0,0.0,0.0,0.0,0.0,4.3749,0.0,4.3749,0.0,0.0000,0.0,0.0,4.3749,0.0,0.0,0.0
3,0.0000,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,3.6321,0.0,0.0,0.0
4,2.3564,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307,0.0000,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,1.1811,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
1308,0.0000,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0000,4.6246,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
1309,0.0000,0.000,0.0000,0.0000,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,4.3749,0.0,0.0,0.0,4.3749,0.0000,0.0000,0.0,0.0,0.0,4.3749,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0000,0.0,0.0000,...,4.3749,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0000,0.0000,0.0,0.0,0.0,4.3749,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0
1310,0.0000,0.000,0.0000,4.4855,0.0000,0.0,0.0,4.4855,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,4.4855,4.4855,4.4855,0.0,0.0,0.0,4.4855,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,4.4855,0.0,4.4855,4.4855,0.0,4.4855,...,4.4855,0.0,0.0000,0.0,0.0,4.4855,0.0,0.0000,0.0,0.0,4.4855,0.0,0.0,0.0,0.0,4.4855,0.0000,0.0,0.0000,4.4855,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,4.4855,0.0,0.0000,0.0,4.4855,0.0,0.0,4.4855,0.0,0.0,0.0


In [30]:
VenuesFrame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1312 entries, 0 to 1311
Columns: 172 entries, Aberdeen to Worcester
dtypes: float64(172)
memory usage: 1.7 MB


In [31]:
listOfBowlerFrame = pd.DataFrame(listOfBowler, columns=["Players"])

In [32]:
listOfBowlerFrame

Unnamed: 0,Players
0,NP Kenjige
1,ME Sanuth
2,Aamer Sohail
3,Aamer Yamin
4,Aamir Kaleem
...,...
1307,C Zhuwao
1308,Ziaur Rahman
1309,M Zondeki
1310,DNT Zoysa


#### Final Data for training the model

In [33]:
playerPerformance = pd.concat([listOfBowlerFrame, ConsistencyFrame, FormFrame], axis = 1)
playerOpposition = pd.concat([listOfBowlerFrame, OppositionsFrame], axis = 1)
playerVenue = pd.concat([listOfBowlerFrame, VenuesFrame], axis = 1)

In [34]:
playerPerformance

Unnamed: 0,Players,Consistency,Form
0,NP Kenjige,1.6768,2.7304
1,ME Sanuth,0.9385,1.2471
2,Aamer Sohail,3.9142,0.0000
3,Aamer Yamin,1.9693,0.0000
4,Aamir Kaleem,1.6768,2.1189
...,...,...,...
1307,C Zhuwao,0.9385,0.0000
1308,Ziaur Rahman,1.9588,0.0000
1309,M Zondeki,1.8718,0.0000
1310,DNT Zoysa,3.3769,0.0000


In [35]:
bowling = pd.read_csv("/content/drive/My Drive/Projects/Cricket Prediction/Bowling.csv")

In [36]:
# Cleaning data

bowling = bowling[bowling.Runs != 'DNB']
bowling = bowling[bowling.Runs != 'TDNB']
bowling = bowling[bowling.Runs != 'sub']
bowling = bowling[bowling.Runs != 'absent']
bowling = bowling.rename(columns={"Player 1":"Players", "Start Date":"StartDate"})

In [37]:
bowling = bowling.drop(columns=['Team', 'Mdns', 'Econ', 'Inns', 'Overs','Runs', 'StartDate'])

In [38]:
bowling

Unnamed: 0,Players,Wkts,Opposition,Ground
0,NP Kenjige,1,U.A.E.,Sharjah
1,NP Kenjige,1,Scotland,Sharjah
2,NP Kenjige,1,U.A.E.,ICCA Dubai
3,NP Kenjige,0,Scotland,ICCA Dubai
4,ME Sanuth,0,U.A.E.,Al Amerat
...,...,...,...,...
60065,Zulfiqar Babar,0,Bangladesh,Dhaka
60066,Zulqarnain Haider,-,South Africa,Abu Dhabi
60067,Zulqarnain Haider,-,South Africa,Abu Dhabi
60068,Zulqarnain Haider,-,South Africa,Dubai (DSC)


In [39]:
wkt = []
for st in bowling['Wkts'].values:
  st = re.findall(r'[0-9]+', st)
  if not st:
    st.append('0')
  r = float(st[0])
  ######## Rate the run attribute ########
  if (r>=0 and r<=1):
    r = 1
  elif (r>=2 and r<=3):
    r = 2
  elif (r>=4):
    r = 3  
  wkt.append(r)
bowling['Wkts'] = wkt

In [40]:
bowling.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 60070 entries, 0 to 60069
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Players     60070 non-null  object
 1   Wkts        60070 non-null  int64 
 2   Opposition  60070 non-null  object
 3   Ground      60070 non-null  object
dtypes: int64(1), object(3)
memory usage: 2.3+ MB


In [41]:
bowling

Unnamed: 0,Players,Wkts,Opposition,Ground
0,NP Kenjige,1,U.A.E.,Sharjah
1,NP Kenjige,1,Scotland,Sharjah
2,NP Kenjige,1,U.A.E.,ICCA Dubai
3,NP Kenjige,1,Scotland,ICCA Dubai
4,ME Sanuth,1,U.A.E.,Al Amerat
...,...,...,...,...
60065,Zulfiqar Babar,1,Bangladesh,Dhaka
60066,Zulqarnain Haider,1,South Africa,Abu Dhabi
60067,Zulqarnain Haider,1,South Africa,Abu Dhabi
60068,Zulqarnain Haider,1,South Africa,Dubai (DSC)


In [42]:
# Now we have to join playerPerformance and bowling dataframe to create our final dataset

finalbowling = pd.merge(bowling, playerPerformance, on="Players")

In [43]:
finalbowling.isna().any()

Players        False
Wkts           False
Opposition     False
Ground         False
Consistency    False
Form           False
dtype: bool

In [44]:
finalbowling

Unnamed: 0,Players,Wkts,Opposition,Ground,Consistency,Form
0,NP Kenjige,1,U.A.E.,Sharjah,1.6768,2.7304
1,NP Kenjige,1,Scotland,Sharjah,1.6768,2.7304
2,NP Kenjige,1,U.A.E.,ICCA Dubai,1.6768,2.7304
3,NP Kenjige,1,Scotland,ICCA Dubai,1.6768,2.7304
4,NP Kenjige,2,Nepal,Kirtipur,1.6768,2.7304
...,...,...,...,...,...,...
48469,Zulfiqar Babar,1,Australia,Sharjah,1.9693,0.0000
48470,Zulfiqar Babar,2,Australia,Dubai (DSC),1.9693,0.0000
48471,Zulfiqar Babar,1,Australia,Abu Dhabi,1.9693,0.0000
48472,Zulfiqar Babar,1,New Zealand,Abu Dhabi,1.9693,0.0000


In [45]:
finalPlayersFrame = finalbowling[['Players','Opposition']]
finalbowlingOpposition = pd.merge(finalPlayersFrame, playerOpposition, on="Players")
finalbowlingOpposition =finalbowlingOpposition.drop(columns=['Players', 'Opposition'])
finalPlayersFrame = pd.get_dummies(finalPlayersFrame.Opposition)
finalbowlingOpposition = pd.DataFrame(finalbowlingOpposition.values*finalPlayersFrame.values, columns=finalbowlingOpposition.columns, index=finalbowlingOpposition.index)
finalbowling = pd.concat([finalbowling, finalbowlingOpposition], axis=1)

In [46]:
finalPlayersFrame = finalbowling[['Players','Ground']]
finalbowlingVenue = pd.merge(finalPlayersFrame, playerVenue, on="Players")
finalbowlingVenue =finalbowlingVenue.drop(columns=['Players', 'Ground'])
finalPlayersFrame = pd.get_dummies(finalPlayersFrame.Ground)
colfinalPlayersFrame = finalPlayersFrame.columns
finalbowlingVenue = finalbowlingVenue[colfinalPlayersFrame]
finalbowlingVenue = pd.DataFrame(finalbowlingVenue.values*finalPlayersFrame.values, columns=finalbowlingVenue.columns, index=finalbowlingVenue.index)
finalbowling = pd.concat([finalbowling, finalbowlingVenue], axis=1)

In [47]:
finalbowling = finalbowling.drop(columns=['Opposition', 'Ground'])

In [48]:
finalbowling

Unnamed: 0,Players,Wkts,Consistency,Form,Afghanistan,Africa XI,Asia XI,Australia,Bangladesh,Bermuda,Canada,England,Hong Kong,ICC World XI,India,Ireland,Kenya,Namibia,Nepal,Netherlands,New Zealand,Oman,P.N.G.,Pakistan,Scotland,South Africa,Sri Lanka,U.A.E.,U.S.A.,West Indies,Zimbabwe,Aberdeen,Abu Dhabi,Adelaide,Ahmedabad,Al Amerat,Amstelveen,Auckland,Ayr,Basseterre,...,Paarl,Pallekele,Perth,Peshawar,Pietermaritzburg,Port Elizabeth,Port Moresby,Port of Spain,Potchefstroom,Providence,Pune,Queenstown,Rajkot,Ranchi,Rawalpindi,Roseau,Rotterdam,Schiedam,Sharjah,Sheikhupura,Singapore,Southampton,St George's,St John's,Sydney,Sylhet,Tangier,Taunton,Taupo,The Hague,The Oval,Thiruvananthapuram,Toronto,Townsville,Vadodara,Vijayawada,Visakhapatnam,Wellington,Whangarei,Windhoek
0,NP Kenjige,1,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,3.1657,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,NP Kenjige,1,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,3.1657,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,NP Kenjige,1,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,3.1657,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,NP Kenjige,1,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,3.1657,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,NP Kenjige,2,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.1657,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48469,Zulfiqar Babar,1,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48470,Zulfiqar Babar,2,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48471,Zulfiqar Babar,1,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,4.514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48472,Zulfiqar Babar,1,1.9693,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,3.6052,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,4.514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
finalbowling.to_csv("/content/drive/My Drive/Projects/Cricket Prediction/finalbowling.csv")

In [50]:
finalbowling = pd.read_csv("/content/drive/My Drive/Projects/Cricket Prediction/finalbowling.csv")

In [51]:
X_bowling = finalbowling.drop(columns=['Wkts', 'Unnamed: 0', 'Players'])
y_bowling = finalbowling['Wkts']

In [52]:
X_bowling

Unnamed: 0,Consistency,Form,Afghanistan,Africa XI,Asia XI,Australia,Bangladesh,Bermuda,Canada,England,Hong Kong,ICC World XI,India,Ireland,Kenya,Namibia,Nepal,Netherlands,New Zealand,Oman,P.N.G.,Pakistan,Scotland,South Africa,Sri Lanka,U.A.E.,U.S.A.,West Indies,Zimbabwe,Aberdeen,Abu Dhabi,Adelaide,Ahmedabad,Al Amerat,Amstelveen,Auckland,Ayr,Basseterre,Belfast,Bengaluru,...,Paarl,Pallekele,Perth,Peshawar,Pietermaritzburg,Port Elizabeth,Port Moresby,Port of Spain,Potchefstroom,Providence,Pune,Queenstown,Rajkot,Ranchi,Rawalpindi,Roseau,Rotterdam,Schiedam,Sharjah,Sheikhupura,Singapore,Southampton,St George's,St John's,Sydney,Sylhet,Tangier,Taunton,Taupo,The Hague,The Oval,Thiruvananthapuram,Toronto,Townsville,Vadodara,Vijayawada,Visakhapatnam,Wellington,Whangarei,Windhoek
0,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,3.1657,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,3.1657,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.4931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,3.1657,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,3.1657,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.6768,2.7304,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.1657,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48469,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5140,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48470,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48471,1.9693,0.0000,0.0,0.0,0.0,3.6052,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0000,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,4.514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
48472,1.9693,0.0000,0.0,0.0,0.0,0.0000,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,3.6052,0.0,0.0,0.0,0.0000,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,4.514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [53]:
y_bowling

0        1
1        1
2        1
3        1
4        2
        ..
48469    1
48470    2
48471    1
48472    1
48473    1
Name: Wkts, Length: 48474, dtype: int64

# Oversampling SMOTE

In [54]:
pip install -U imbalanced-learn

Requirement already up-to-date: imbalanced-learn in /usr/local/lib/python3.6/dist-packages (0.7.0)


In [55]:
from imblearn.over_sampling import SMOTE
X_resample_bowling, y_resample_bowling = SMOTE().fit_sample(X_bowling,y_bowling.values.ravel())

In [56]:
X_resample_bowling.shape

(115260, 180)

In [57]:
X_bowling.shape

(48474, 180)

# Taking Care of Zero values

In [58]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values=0, strategy='mean')
imputer.fit(X_resample_bowling)
X_resample_bowling = imputer.transform(X_resample_bowling) 

# Splitting the datasets into the Training set and Test set

In [59]:
###############   FOR bowling   ###############
from sklearn.model_selection import train_test_split
X_train_bowling, X_test_bowling, y_train_bowling, y_test_bowling = train_test_split(X_resample_bowling, y_resample_bowling, test_size=0.3, random_state = 1)

In [60]:
print(X_train_bowling.shape)

print(X_test_bowling.shape)

print(y_train_bowling.shape)

print(y_test_bowling.shape)

(80682, 180)
(34578, 180)
(80682,)
(34578,)


# Feature Scaleing

In [61]:
###############   FOR bowling   ###############
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_bowling = sc.fit_transform(X_train_bowling)
X_test_bowling = sc.transform(X_test_bowling)

In [62]:
X_train_bowling

array([[ 1.32003297e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [ 1.37741881e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [ 1.14594987e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       ...,
       [ 1.02240145e-01,  7.18194630e-01,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [ 4.84402233e-01,  1.28848743e+00,  4.03275324e+00, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [ 1.34493057e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03]])

In [63]:
X_test_bowling

array([[ 1.23063720e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [-1.89255846e-01, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [ 4.38997045e-01, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       ...,
       [ 1.32003297e+00, -4.58297508e-04,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [-3.72137956e-03,  4.63996817e-01,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03],
       [-4.81028825e-04, -2.22845767e+00,  1.66029418e-03, ...,
         4.59376559e-04, -1.23444469e-03,  1.11479130e-03]])

# Model Building

In [64]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [65]:
models_bowling = []
models_bowling.append(('DTC', DecisionTreeClassifier(criterion= 'entropy', random_state=0)))
models_bowling.append(('NB', GaussianNB()))
models_bowling.append(('RFC', RandomForestClassifier(n_estimators=500, criterion='entropy', random_state=0)))
models_bowling.append(('SVC', SVC(random_state = 0, kernel = 'rbf')))

In [66]:
results = []
names = []

for name, model in models_bowling:
  model.fit(X_train_bowling, y_train_bowling)
  y_pred = model.predict(X_test_bowling)
  accuracies = accuracy_score(y_test_bowling, y_pred)
  results.append(accuracies*100)
  names.append(name)
  print("Model Completed")
final_comparison_bowling = pd.DataFrame(list(zip(names, results)), columns = ['Model Name', 'Accuracy'])

Model Completed
Model Completed
Model Completed
Model Completed


In [67]:
final_comparison_bowling.sort_values(by=['Accuracy'], ascending=False)

Unnamed: 0,Model Name,Accuracy
2,RFC,84.571115
0,DTC,79.637342
3,SVC,62.348892
1,NB,34.070796
