<a href="https://colab.research.google.com/github/whitehatjr1001/Football-analysis/blob/main/FootballPredUsingNeuralNer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np 
import requests
import warnings 
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
from sklearn.ensemble import RandomForestClassifier
from tabulate import tabulate
from sklearn.model_selection import GridSearchCV

In [2]:
warnings.filterwarnings('ignore')
url = "https://www.betexplorer.com/soccer/england/premier-league/results/"
soup = BeautifulSoup(requests.get(url).content,"html.parser")

In [3]:
def get_odd_or_text(td):
    if "data-odd" in td.attrs:
        return td["data-odd"]

    odd = td.select_one("[data-odd]")
    if odd:
        return odd["data-odd"]

    return td.get_text(strip=True)

In [4]:
all_data = []
for row in soup.select(".table-main tr:has(td)"):
    tds = [get_odd_or_text(td) for td in row.select("td")]
    round_ = row.find_previous("th").find_previous("tr").th.text
    all_data.append([round_, *tds])

df = pd.DataFrame(
    all_data, columns=["Round", "Match", "Score", "1", "X", "2", "Date"])


df['Home'] = [i.split('-')[0] for i in df['Match']]
df['Away'] = [i.split('-')[1] for i in df['Match']]

In [5]:
df.shape

(297, 9)

In [6]:
#reverse df
df = df.iloc[::-1]


cols = ['1','X','2']

df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df['HomeWin_Prob'] = round(1/df['1'],2)
df['DrawWin_Prob'] = round(1/df['X'],2)
df['AwayWin_Prob'] = round(1/df['2'],2)

df['HomeGoals'] = [i.split(':', 1)[0] for i in df['Score']]

df['AwayGoals'] = [i.split(':', 1)[1] if (':' in i and len(i.split(':', 1)) > 1) else '' for i in df['Score']]

In [7]:


cols = ['1','X','2']
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
df['HomeWin_Prob'] = round(1/df['1'],2)
df['DrawWin_Prob'] = round(1/df['X'],2)
df['AwayWin_Prob'] = round(1/df['2'],2)

df['HomeGoals'] = [i.split(':', 1)[0] for i in df['Score']]
df['AwayGoals'] = [i.split(':', 1)[1] if (':' in i and len(i.split(':', 1)) > 1) else '' for i in df['Score']]

In [8]:
def result(df):
  if df['HomeGoals']>df['AwayGoals']:
    return 1
  if df['HomeGoals']==df['AwayGoals']:
    return 0
  if df['HomeGoals']<df['AwayGoals']:
    return 2
  



In [9]:
df['Result'] = df.apply(result,axis=1)

In [10]:
df

Unnamed: 0,Round,Match,Score,1,X,2,Date,Home,Away,HomeWin_Prob,DrawWin_Prob,AwayWin_Prob,HomeGoals,AwayGoals,Result
296,1. Round,Crystal Palace-Arsenal,0:2,4.58,3.57,1.84,05.08.2022,Crystal Palace,Arsenal,0.22,0.28,0.54,0,2,2
295,1. Round,Tottenham-Southampton,4:1,1.36,5.20,8.69,06.08.2022,Tottenham,Southampton,0.74,0.19,0.12,4,1,1
294,1. Round,Newcastle-Nottingham,2:0,1.58,4.04,6.19,06.08.2022,Newcastle,Nottingham,0.63,0.25,0.16,2,0,1
293,1. Round,Leeds-Wolves,2:1,2.43,3.34,3.00,06.08.2022,Leeds,Wolves,0.41,0.30,0.33,2,1,1
292,1. Round,Fulham-Liverpool,2:2,10.45,6.10,1.28,06.08.2022,Fulham,Liverpool,0.10,0.16,0.78,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,30. Round,Fulham-West Ham,0:1,2.94,3.22,2.54,08.04.,Fulham,West Ham,0.34,0.31,0.39,0,1,2
3,30. Round,Brentford-Newcastle,1:2,4.10,3.49,1.95,08.04.,Brentford,Newcastle,0.24,0.29,0.51,1,2,2
2,30. Round,Aston Villa-Nottingham,2:0,1.58,4.13,5.93,08.04.,Aston Villa,Nottingham,0.63,0.24,0.17,2,0,1
1,30. Round,Liverpool-Arsenal,2:2,2.73,3.57,2.52,09.04.,Liverpool,Arsenal,0.37,0.28,0.40,2,2,0


In [11]:
df.drop(['Round','Score','Date','1','X','2','Match','HomeGoals','AwayGoals'],axis=1,inplace=True)

In [12]:
df.dropna(inplace=True)

In [13]:
df.isna().sum()

Home            0
Away            0
HomeWin_Prob    0
DrawWin_Prob    0
AwayWin_Prob    0
Result          0
dtype: int64

In [14]:
hold_out = df[260::]
hold_out.drop(['Result'],axis=1,inplace=True)
hold_out = hold_out.reset_index(drop=True)
df = df[0:260]

In [15]:
model_recode = {'Southampton':0,
              'Crystal Palace':1,
              'Fulham':2,
              'Liverpool':3,
              'Manchester Utd':4,
              'Newcastle':5,
              'Aston Villa':6,
              'Brentford':7,
              'Tottenham':8,
              'West Ham':9,
              'Chelsea':10,
              'Leicester':11,
              'Manchester City':12,
              'Arsenal':13,
              'Bournemouth':14,
              'Everton':15,
              'Wolves':16,
              'Nottingham':17,
              'Leeds':18,
              'Brighton':19}


In [16]:
# iterate over columns
for key, value in df['Home'].iteritems():
    df['Home'] = df['Home'].apply(lambda x: model_recode.get(x,x))

for key, value in df['Away'].iteritems():
    df['Away'] = df['Away'].apply(lambda x: model_recode.get(x,x))

for key, value in hold_out['Home'].iteritems():
    hold_out['Home'] = hold_out['Home'].apply(lambda x: model_recode.get(x,x))

for key, value in hold_out['Away'].iteritems():
    hold_out['Away'] = hold_out['Away'].apply(lambda x: model_recode.get(x,x))

X = df.drop('Result',axis=1)
y = df['Result']

In [17]:
X=X.values
y=y.values

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
X_train.shape
y_train.shape

(208,)

In [20]:
#Neural Network 
from keras import Sequential
from keras.layers import Dense,Dropout
from keras.wrappers.scikit_learn import KerasClassifier

In [41]:
import tensorflow as tf
from keras.callbacks import ModelCheckpoint 
checkpoint =ModelCheckpoint('best_weights.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')

In [52]:
model1 = Sequential()
model1.add(Dense(128, activation='relu', input_dim=5))
model1.add(Dense(64, activation='relu'))
model1.add(Dense(32, activation='relu'))
model1.add(Dense(16, activation='relu'))
model1.add(Dense(3, activation='softmax'))

# Compile the model
model1.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model1.summary()



Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_55 (Dense)            (None, 128)               768       
                                                                 
 dense_56 (Dense)            (None, 64)                8256      
                                                                 
 dense_57 (Dense)            (None, 32)                2080      
                                                                 
 dense_58 (Dense)            (None, 16)                528       
                                                                 
 dense_59 (Dense)            (None, 3)                 51        
                                                                 
Total params: 11,683
Trainable params: 11,683
Non-trainable params: 0
_________________________________________________________________


In [53]:
his=model1.fit(X_train,y_train,epochs=200, batch_size=16, validation_data=(X_test, y_test),callbacks=[checkpoint])

Epoch 1/200
 1/13 [=>............................] - ETA: 12s - loss: 1.5762 - accuracy: 0.3750
Epoch 1: val_loss did not improve from 0.91422
Epoch 2/200
 1/13 [=>............................] - ETA: 0s - loss: 1.0603 - accuracy: 0.5000
Epoch 2: val_loss did not improve from 0.91422
Epoch 3/200
 1/13 [=>............................] - ETA: 0s - loss: 1.0909 - accuracy: 0.5000
Epoch 3: val_loss did not improve from 0.91422
Epoch 4/200
 1/13 [=>............................] - ETA: 0s - loss: 1.0156 - accuracy: 0.4375
Epoch 4: val_loss did not improve from 0.91422
Epoch 5/200
 1/13 [=>............................] - ETA: 0s - loss: 1.2272 - accuracy: 0.1875
Epoch 5: val_loss did not improve from 0.91422
Epoch 6/200
 1/13 [=>............................] - ETA: 0s - loss: 0.9754 - accuracy: 0.5625
Epoch 6: val_loss did not improve from 0.91422
Epoch 7/200
 1/13 [=>............................] - ETA: 0s - loss: 1.0476 - accuracy: 0.5000
Epoch 7: val_loss did not improve from 0.91422
Epoch

In [59]:
model1.load_weights('best_weights.h5')

In [58]:
model1.save_weights('best_weights.h5')

In [60]:
loss, accuracy = model1.evaluate(X_test, y_test)
print('Test loss:', loss)
print('Test accuracy:', accuracy)
# you can see the accuracy is less but we can improve 
# it is definetly mor than the random forrest 

Test loss: 0.9091438055038452
Test accuracy: 0.5961538553237915


In [61]:
yhat = model1.predict(X_test)



In [66]:
yhat

array([[0.33981678, 0.24432467, 0.4158586 ],
       [0.01271225, 0.5511947 , 0.4360931 ],
       [0.2750754 , 0.44966808, 0.27525648],
       [0.23055181, 0.7067955 , 0.06265264],
       [0.36513454, 0.1620723 , 0.47279304],
       [0.23963703, 0.7085971 , 0.05176582],
       [0.32343087, 0.28254718, 0.39402187],
       [0.19549559, 0.7864555 , 0.01804884],
       [0.20024195, 0.55861187, 0.24114616],
       [0.20808332, 0.7442838 , 0.04763291],
       [0.32270855, 0.36373815, 0.31355336],
       [0.44253984, 0.33702427, 0.2204359 ],
       [0.18516913, 0.64797354, 0.16685729],
       [0.15424499, 0.63596624, 0.20978883],
       [0.33208755, 0.25543252, 0.41248   ],
       [0.2039315 , 0.6463762 , 0.14969243],
       [0.35286674, 0.20007068, 0.44706264],
       [0.3771388 , 0.29040238, 0.33245876],
       [0.26931947, 0.2611001 , 0.46958047],
       [0.2602247 , 0.5869558 , 0.15281962],
       [0.22814624, 0.49426663, 0.27758712],
       [0.4098928 , 0.34508213, 0.24502501],
       [0.

In [67]:
X_test

array([[ 0.  ,  4.  ,  0.28,  0.26,  0.51],
       [15.  ,  0.  ,  0.42,  0.31,  0.31],
       [ 5.  ,  3.  ,  0.4 ,  0.29,  0.36],
       [ 5.  ,  2.  ,  0.67,  0.22,  0.15],
       [ 0.  , 16.  ,  0.32,  0.32,  0.4 ],
       [ 5.  , 16.  ,  0.61,  0.26,  0.17],
       [ 1.  ,  8.  ,  0.3 ,  0.3 ,  0.45],
       [ 3.  ,  0.  ,  0.79,  0.16,  0.1 ],
       [11.  ,  7.  ,  0.53,  0.27,  0.24],
       [ 6.  , 18.  ,  0.5 ,  0.28,  0.27],
       [ 1.  ,  6.  ,  0.4 ,  0.31,  0.33],
       [ 2.  , 17.  ,  0.52,  0.29,  0.24],
       [ 5.  ,  7.  ,  0.6 ,  0.25,  0.2 ],
       [19.  , 14.  ,  0.72,  0.2 ,  0.12],
       [ 0.  ,  6.  ,  0.38,  0.31,  0.35],
       [ 6.  , 11.  ,  0.48,  0.29,  0.28],
       [ 0.  , 11.  ,  0.36,  0.3 ,  0.39],
       [14.  ,  6.  ,  0.25,  0.29,  0.5 ],
       [13.  , 12.  ,  0.31,  0.29,  0.44],
       [11.  , 18.  ,  0.43,  0.28,  0.33],
       [ 6.  ,  7.  ,  0.49,  0.29,  0.27],
       [ 2.  , 15.  ,  0.44,  0.29,  0.31],
       [ 3.  ,  1.  ,  0.79,  0.