## RaceResult Fact Analysis

#### Import

In [1]:
%matplotlib inline
import psycopg2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pip
#import geopandas
import altair as alt
from descartes.patch import PolygonPatch
from datetime import datetime

### Data  Extraction

#### Connection to database 

In [2]:
def DbConnect():
    conn = psycopg2.connect(host="localhost",database="FormulaOne",port=5432,user='postgres',password='root')
    return conn

In [3]:
circuit = pd.read_sql('select * from "CircuitDim"', con=DbConnect())
races= pd.read_sql('select * from "RaceDim"', con=DbConnect())
date= pd.read_sql('select * from "DateTimeDim"', con=DbConnect())
races= pd.read_sql('select * from "RaceDim"', con=DbConnect())
driver= pd.read_sql('select * from "DriverDim"', con=DbConnect())
constructor= pd.read_sql('select * from "ConstructorsDim"', con=DbConnect())
status= pd.read_sql('select * from "StatusDim"', con=DbConnect())
result= pd.read_sql('select * from "RaceResultFact"', con=DbConnect())



### Merging to a single DataFrame

##### Renaming Keys

In [4]:
races.rename(columns = {'datetime_fk':'dateId'}, inplace = True)
races.rename(columns = {'circuit_fk':'CircuitId'}, inplace = True)
result.rename(columns = {'race_fk':'raceId'}, inplace = True)
result.rename(columns = {'driver_fk':'DriverID'}, inplace = True)
result.rename(columns = {'constructor_fk':'constructorId'}, inplace = True)
result.rename(columns = {'status_fk':'statusId'}, inplace = True)

In [5]:
df1 = pd.merge(races,date, on='dateId', how='inner')
Racedim = pd.merge(df1,circuit, on='CircuitId', how='inner')
df2 = pd.merge(result,Racedim, on='raceId', how='inner')
df3 = pd.merge(df2,driver, on='DriverID', how='inner')
df4 = pd.merge(df3,constructor, on='constructorId', how='inner')
df5 = pd.merge(df4,status, on='statusId', how='inner')

### Data Anlysis

##### Preparing

In [6]:
Fact=df5

In [7]:
Fact.columns

Index(['raceId', 'DriverID', 'constructorId', 'statusId', 'points_x', 'rank',
       'laps', 'fastest_lapspeed', 'wins', 'laptime', 'pitstop', 'penalty_fk',
       'round', 'name_x', 'CircuitId', 'dateId', 'weather_fk', 'date', 'day',
       'month', 'year', 'time', 'circuitRef', 'name_y', 'location', 'latitude',
       'longitude', 'Altitude', 'country', 'img', 'DriverRef', 'Number',
       'Code', 'ForeName', 'SurName', 'Dob', 'Nationality', 'points_y', 'car',
       'constructorRef', 'constructorName', 'constructorNationality',
       'status'],
      dtype='object')

##### Cleaning

In [8]:
Fact["nameDriver"]=Fact["ForeName"]+" "+Fact["SurName"]
Fact["nameDriver"]
Fact['Dob']=pd.to_datetime(Fact['Dob'])
date=datetime.today()-Fact['Dob']
Fact['age']=round(date.dt.days/365)

Calculating the drivers' age is more significant that having a birth date as it provides us a mesurable value.

In [9]:
Fact.drop(columns=['raceId',
                   'DriverID',
                   'CircuitId',
                   'dateId',
                   'constructorRef',
                   'DriverRef',
                   'circuitRef',
                   'ForeName',
                   'SurName',
                   'Dob',
                   'Number',
                   'Code',
                    'day',
                   'month'],axis=1,inplace=True)

In [10]:
Fact.rename(columns = {'name_x':'nameGP','name_y':'nameCircuit'}, inplace = True)

In [11]:
Fact.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3543 entries, 0 to 3542
Data columns (total 31 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   constructorId           3543 non-null   int64  
 1   statusId                3543 non-null   int64  
 2   points_x                3543 non-null   float64
 3   rank                    3543 non-null   int64  
 4   laps                    3543 non-null   int64  
 5   fastest_lapspeed        3532 non-null   float64
 6   wins                    3543 non-null   int64  
 7   laptime                 3543 non-null   object 
 8   pitstop                 3543 non-null   object 
 9   penalty_fk              2902 non-null   float64
 10  round                   3543 non-null   int64  
 11  nameGP                  3543 non-null   object 
 12  weather_fk              3543 non-null   int64  
 13  date                    3543 non-null   object 
 14  year                    3543 non-null   

In [12]:
Fact.shape

(3543, 31)

In [13]:
Fact.head()

Unnamed: 0,constructorId,statusId,points_x,rank,laps,fastest_lapspeed,wins,laptime,pitstop,penalty_fk,...,country,img,Nationality,points_y,car,constructorName,constructorNationality,status,nameDriver,age
0,9,1,25.0,1,56,196.523,1,01:37:39.832000,00:01:07.225000,25.0,...,Malaysia,//upload.wikimedia.org/wikipedia/commons/thumb...,German,43.0,Aston Martin Mercedes,Red Bull,Austrian,Finished,Sebastian Vettel,35.0
1,9,1,18.0,2,56,193.677,0,01:37:03.424000,00:00:43.528000,25.0,...,China,https://f1chronicle.com/wp-content/uploads/202...,German,43.0,Aston Martin Mercedes,Red Bull,Austrian,Finished,Sebastian Vettel,35.0
2,9,1,25.0,1,58,213.669,1,01:30:17.558000,00:01:21.609000,25.0,...,Turkey,https://f1chronicle.com/wp-content/uploads/202...,German,43.0,Aston Martin Mercedes,Red Bull,Austrian,Finished,Sebastian Vettel,35.0
3,9,1,25.0,1,66,192.262,1,01:39:03.301000,00:01:20.858000,25.0,...,Spain,https://f1chronicle.com/wp-content/uploads/202...,German,43.0,Aston Martin Mercedes,Red Bull,Austrian,Finished,Sebastian Vettel,35.0
4,9,1,25.0,1,78,157.656,1,02:09:38.373000,00:00:28.536000,25.0,...,Monaco,https://f1chronicle.com/wp-content/uploads/202...,German,43.0,Aston Martin Mercedes,Red Bull,Austrian,Finished,Sebastian Vettel,35.0


In [14]:
Fact.tail()

Unnamed: 0,constructorId,statusId,points_x,rank,laps,fastest_lapspeed,wins,laptime,pitstop,penalty_fk,...,country,img,Nationality,points_y,car,constructorName,constructorNationality,status,nameDriver,age
3538,164,111,0.0,16,39,226.65,0,01:22:09.502000,00:17:15.495000,44.0,...,Italy,https://f1chronicle.com/wp-content/uploads/202...,Australian,115.0,McLaren Mercedes,HRT,Spanish,+14 Laps,Daniel Ricciardo,33.0
3539,5,79,0.0,16,47,196.298,0,01:09:16.358000,00:00:50.933000,113.0,...,Canada,https://www.automobile-magazine.fr/img/f1/circ...,Russian,32.0,AlphaTauri Honda,Toro Rosso,Italian,Drivetrain,Daniil Kvyat,28.0
3540,4,137,0.0,19,28,212.478,0,00:43:19.322000,00:00:33.027000,44.0,...,Australia,https://f1chronicle.com/wp-content/uploads/202...,Australian,115.0,McLaren Mercedes,Renault,French,Damage,Daniel Ricciardo,33.0
3541,4,60,0.0,18,53,201.487,0,01:27:16.482000,00:00:25.676000,44.0,...,Bahrain,https://f1chronicle.com/wp-content/uploads/202...,Australian,115.0,McLaren Mercedes,Renault,French,Out of fuel,Daniel Ricciardo,33.0
3542,210,139,0.0,18,47,222.93,0,01:04:14.971000,00:00:30.708000,104.0,...,Italy,https://f1chronicle.com/wp-content/uploads/202...,Danish,1.0,Haas Ferrari,Haas F1 Team,American,Illness,Kevin Magnussen,30.0


#####  Checking for null Data

In [15]:
Fact.isnull().sum()*100/len(Fact)

constructorId              0.000000
statusId                   0.000000
points_x                   0.000000
rank                       0.000000
laps                       0.000000
fastest_lapspeed           0.310471
wins                       0.000000
laptime                    0.000000
pitstop                    0.000000
penalty_fk                18.092012
round                      0.000000
nameGP                     0.000000
weather_fk                 0.000000
date                       0.000000
year                       0.000000
time                       0.000000
nameCircuit                0.000000
location                   0.000000
latitude                   0.000000
longitude                  0.000000
Altitude                   0.000000
country                    0.000000
img                        0.000000
Nationality                0.000000
points_y                   0.141123
car                        0.141123
constructorName            0.000000
constructorNationality     0

##### Analysis

Status 


## Car performance: merge weather

In [16]:
config=pd.read_csv("DE2-PI.csv",header=0,sep =',')
Factcar=pd.merge(Fact,config, on=["constructorId"], how='inner')
Factcar

Unnamed: 0,constructorId,statusId,points_x,rank,laps,fastest_lapspeed,wins,laptime,pitstop,penalty_fk,...,status,nameDriver,age,constructorName_y,mpg,displacement,horsepower,weight,acceleration,winingRate
0,9,1,25.0,1,56,196.523,1,01:37:39.832000,00:01:07.225000,25.0,...,Finished,Sebastian Vettel,35.0,red_bull,36,91,600,1800,260,1
1,9,1,18.0,2,56,193.677,0,01:37:03.424000,00:00:43.528000,25.0,...,Finished,Sebastian Vettel,35.0,red_bull,36,91,600,1800,260,1
2,9,1,25.0,1,58,213.669,1,01:30:17.558000,00:01:21.609000,25.0,...,Finished,Sebastian Vettel,35.0,red_bull,36,91,600,1800,260,1
3,9,1,25.0,1,66,192.262,1,01:39:03.301000,00:01:20.858000,25.0,...,Finished,Sebastian Vettel,35.0,red_bull,36,91,600,1800,260,1
4,9,1,25.0,1,78,157.656,1,02:09:38.373000,00:00:28.536000,25.0,...,Finished,Sebastian Vettel,35.0,red_bull,36,91,600,1800,260,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3538,164,13,0.0,20,68,194.759,0,01:33:27.563000,00:00:46.377000,44.0,...,+3 Laps,Daniel Ricciardo,33.0,hrt,28,112,880,2605,268,1
3539,164,26,0.0,21,48,184.664,0,01:29:24.467000,00:00:44.885000,44.0,...,Mechanical,Daniel Ricciardo,33.0,hrt,28,112,880,2605,268,1
3540,164,14,0.0,18,66,177.456,0,01:47:37.531000,00:01:08.058000,44.0,...,+4 Laps,Daniel Ricciardo,33.0,hrt,28,112,880,2605,268,1
3541,164,14,0.0,19,57,153.386,0,01:59:46.220000,00:01:52.010000,44.0,...,+4 Laps,Daniel Ricciardo,33.0,hrt,28,112,880,2605,268,1


In [17]:
Factcar.rename(columns = {'constructorName_y':'constructorName'}, inplace = True)
Factcar.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3543 entries, 0 to 3542
Data columns (total 38 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   constructorId           3543 non-null   int64  
 1   statusId                3543 non-null   int64  
 2   points_x                3543 non-null   float64
 3   rank                    3543 non-null   int64  
 4   laps                    3543 non-null   int64  
 5   fastest_lapspeed        3532 non-null   float64
 6   wins                    3543 non-null   int64  
 7   laptime                 3543 non-null   object 
 8   pitstop                 3543 non-null   object 
 9   penalty_fk              2902 non-null   float64
 10  round                   3543 non-null   int64  
 11  nameGP                  3543 non-null   object 
 12  weather_fk              3543 non-null   int64  
 13  date                    3543 non-null   object 
 14  year                    3543 non-null   

In [18]:
CarRS=Factcar.drop(columns=['nameGP',
                   'constructorNationality',
                   'img',
                    'statusId',
                   'points_x',
                   'points_x',
                    'Nationality',
                    'longitude',
                    'latitude',
                    'location',
                        'time',
                           'Altitude',
                           'date',
                            'country',
                            'age',
                            'penalty_fk',
                            'weather_fk',
                            'nameDriver',
                            'winingRate',
                            'displacement',
                            'laps',
                            'fastest_lapspeed',
                            'wins',
                            'laptime',
                            'pitstop',
                            'status',
                            'mpg',
                            'weight',
                            'round',
                            'car'
                           ])
#CarRS['pitstop']=CarRS['pitstop'].astype(str)
#CarRS['pitstop']=CarRS['pitstop'].replace(":", "", regex=True)
#CarRS['nameDriver']=CarRS['nameDriver'].replace(" ", "", regex=True)
#CarRS['pitstop']=CarRS['pitstop'].replace("None", "0", regex=True)


In [19]:
CarRS['points_y'] = CarRS['points_y'].fillna(0).astype(np.int64, errors='ignore')

CarRS.isnull().sum()*100/len(CarRS)

constructorId        0.0
rank                 0.0
year                 0.0
nameCircuit          0.0
points_y             0.0
constructorName_x    0.0
constructorName      0.0
horsepower           0.0
acceleration         0.0
dtype: float64

In [20]:
CarRS['winning']=0
for i in range(0,len(CarRS)):
    pos=CarRS['rank'].iat[i]
    if pos>0 and pos<10:
         CarRS['winning'].iat[i] = int(1)
    else:
        
         CarRS['winning'].iat[i] = int(0)

In [21]:
CarRS.drop(columns=['rank'],axis=1,inplace=True)

In [22]:
df1=CarRS
df1=CarRS.groupby(["constructorId"]).sum('winning')


In [23]:
df1["performance"]=df1["winning"]/CarRS.groupby(["constructorId"]).size()

In [24]:
df=df1["performance"].to_frame()

df.to_csv(r'perfCons.csv')
df

Unnamed: 0_level_0,performance
constructorId,Unnamed: 1_level_1
1,0.521053
3,0.282857
4,0.374302
5,0.254237
6,0.822454
9,0.833846
10,0.584746
15,0.168385
51,0.144444
117,0.352941


In [25]:
constructor

Unnamed: 0,constructorId,constructorRef,constructorName,constructorNationality
0,1,mclaren,McLaren,British
1,2,bmw_sauber,BMW Sauber,German
2,3,williams,Williams,British
3,4,renault,Renault,French
4,5,toro_rosso,Toro Rosso,Italian
...,...,...,...,...
206,209,manor,Manor Marussia,British
207,210,haas,Haas F1 Team,American
208,211,racing_point,Racing Point,British
209,213,alphatauri,AlphaTauri,Italian


In [26]:
df1 = pd.merge(df,constructor, on='constructorId', how='inner')
df2 = pd.merge(df1,config, on='constructorId', how='inner')


In [27]:
CarRS=df2
CarRS=CarRS.drop(columns=['constructorId','constructorRef','constructorName_y','mpg','displacement','winingRate'
                           ])
CarRS.rename(columns = {'constructorName_x':'constructorName'}, inplace = True)
CarRS.sort_values(by="performance",ascending=False).head(15)
CarRS.to_csv(r'perfCons.csv')


In [28]:
from sklearn.preprocessing import LabelEncoder
lb_make = LabelEncoder()
CarRS["constructorName"] = lb_make.fit_transform(CarRS["constructorName"])
CarRS["constructorNationality"] = lb_make.fit_transform(CarRS["constructorNationality"])
CarRS.sort_values(by="performance",ascending=False).head(15)

Unnamed: 0,performance,constructorName,constructorNationality,horsepower,weight,acceleration
10,0.89415,13,4,860,2395,250
5,0.833846,15,1,600,1800,260
4,0.822454,5,6,530,1795,280
6,0.584746,6,5,710,1825,220
14,0.530769,9,2,880,1890,256
0,0.521053,12,2,690,1613,280
17,0.507042,14,2,950,1904,247
19,0.5,2,3,650,1914,258
18,0.468085,1,6,600,1910,233
2,0.374302,16,3,600,1760,260


# Regression Linéaire multiple : Car Performance

## $$f(X_1,X_2,...)= a_0 + a_1*X_1 + a_2*X_2 + .... + a_{12}*X12$$

In [29]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
X = CarRS.drop(['performance'], axis=1)
y = CarRS[['performance']]
X_train, X_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=0)

In [30]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, median_absolute_error, r2_score
#Régression linéaire multiple (toutes les variables de Boston) 
regressor = LinearRegression()

#Fitting model with trainig data
regressor.fit(X, y)
y_pred = regressor.predict(X_test)

print('test_score = ',regressor.score(X_test,y_test)) 
print('R2 = ',r2_score(y_test, y_pred))
print('MAE = ',mean_squared_error(y_test, y_pred))
print('RMSE = ', np.sqrt(mean_squared_error(y_test, y_pred)))
print('MAE = ', mean_absolute_error(y_test, y_pred))
print('MeadianAE = ', median_absolute_error(y_test, y_pred))

print('Intercept = ', regressor.intercept_)
print('Coefficients : ',regressor.coef_)

test_score =  0.45990971979384454
R2 =  0.45990971979384454
MAE =  0.011233880715846774
RMSE =  0.10599000290521166
MAE =  0.0962177680468066
MeadianAE =  0.07650012653641691
Intercept =  [1.54523369]
Coefficients :  [[ 1.04420479e-03 -3.08280285e-02  5.73550631e-05 -1.66952501e-04
  -2.92124305e-03]]


In [31]:
import pickle

pickle.dump(regressor, open('ConstructorperformancePredictorInRace.pkl','wb'))

# Loading model to compare the results
model = pickle.load(open('ConstructorperformancePredictorInRace.pkl','rb'))