# This notebook tries to improve how well the model describes the reality by adjusting it's weights

In [11]:
from python import *

import operator
import pickle
import os
import numpy as np
import json
from ipyparallel import Client
from itertools import repeat

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split

In [12]:
#Create data classes
#Year, Season
seasonsData = None
#RaceId, List of tuples of (driverId, constructorId, time)
qualiResultsData = None
#DriverId, name
driversData = None
#ConstructorId, name
constructorsData = None
#EngineId, name
enginesData = None

with open('data/seasonsData.txt', 'rb') as handle:
    seasonsData = pickle.load(handle)
    #print(seasonsData)
    
with open('data/qualiResultsData.txt', 'rb') as handle:
    qualiResultsData = pickle.load(handle)
    #print(qualiResultsData)
    
with open('data/driversData.txt', 'rb') as handle:
    driversData = pickle.load(handle)
    #print(driversData)
    
with open('data/constructorsData.txt', 'rb') as handle:
    constructorsData = pickle.load(handle)
    #print(constructorsData)
    
with open('data/enginesData.txt', 'rb') as handle:
    enginesData = pickle.load(handle)
    #print(enginesData)

In [13]:
entries = []
results = []
cleaner = F1DataCleaner(seasonsData, qualiResultsData, driversData, constructorsData, enginesData)

#Constants we can change

cleaner.k_engine_change = 0.0145
cleaner.k_const_change = 0.240
cleaner.k_driver_change = 0.19
cleaner.k_const_impact = 0.80
cleaner.k_eng_impact = (1 - cleaner.k_const_impact)
cleaner.k_driver_impact = 0.02
        
cleaner.k_rookie_pwr = 0.40
#cleaner.k_rookie_variance = 5
cleaner.k_race_regress_exp = 0.87  #TODO needs to change!
#cleaner.k_variance_multiplier_end = 1.5

cleaner.k_eng_regress = 1.04
cleaner.k_const_regress = 0.60
cleaner.k_driver_regress = 0.67

entries, errors, results = cleaner.constructDataset()
#print(entries[-10:])
#print(results[-10:])
X = np.array(entries)
y = results

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
forest = RandomForestRegressor(random_state=0, n_estimators=90)
forest.fit(X_train, y_train)
y_hat = forest.predict(X_test)
print('MAE: ' + str(mean_absolute_error(y_test, y_hat)))

print(forest.feature_importances_)

MAE: 0.3300317681071594
[0.17896165 0.48549238 0.14967268 0.05238864 0.07509118 0.05839346
 0.        ]


In [14]:
#Linear regression
reg = LinearRegression().fit(X_train, y_train)
y_hat = reg.predict(X_test)
print('MAE: ' + str(mean_absolute_error(y_test, y_hat)))
#print("R-score: " + str(reg.score(X, y)))
print(reg.coef_)

MAE: 0.4073641609239575
[ 0.06692658  0.05469564  0.0067949  -0.04226528 -0.00904179  0.16381893
  0.        ]


In [15]:
#Random forest regression

forest = RandomForestRegressor(max_depth=5, random_state=0,
                             n_estimators=100)
forest.fit(X, y)
print(forest.feature_importances_)
#Rates based on test data!
print(forest.score(X, y))

[0.12786546 0.76117803 0.05083987 0.00649812 0.03635445 0.01726408
 0.        ]
0.5424937330524164


# Just for testing!

In [5]:
newDrivers = json.load(open('data/newDrivers.json'))["drivers"]
newDrivers = {int(did): cid for did, cid in newDrivers.items()}

outFile = {} # The object where we write output

def getColor(constructor):
    return {
        "Mercedes": "#00d2be",
        "Ferrari": "#dc0000",
        "Red Bull": "#1e41ff",
        "Racing Point": "#f596c8",
        "Williams": "#ffffff",
        "Renault": "#fff500",
        "Toro Rosso": "#469bff",
        "Haas F1 Team": "#f0d787",
        "McLaren": "#ff8700",
        "Alfa Romeo": "#9b0000"
    }.get(constructor, "#000000")

#Manual changes
#cleaner.drivers[8].constructor = cleaner.constructors[15] #Kimi
#cleaner.drivers[841].constructor = cleaner.constructors[15] #Gio
#cleaner.drivers[844].constructor = cleaner.constructors[6] #Leclerc
#cleaner.drivers[1000] = Driver("Lando Norris", cleaner.constructors[1]) #Lando
#cleaner.drivers[832].constructor = cleaner.constructors[1] #Sainz
#cleaner.drivers[840].constructor = cleaner.constructors[10] #Stroll
#cleaner.drivers[842].constructor = cleaner.constructors[5] #Gasly
#cleaner.drivers[817].constructor = cleaner.constructors[4] #Ricciardo
#cleaner.drivers[848] = Driver("Alexander Albon", cleaner.constructors[9]) #Albon
#cleaner.drivers[826].constructor = cleaner.constructors[5] #Kvyat
#cleaner.drivers[1002] = Driver("George Russell", cleaner.constructors[3]) #Russell
#cleaner.drivers[9].constructor = cleaner.constructors[3] #Kubica

#cleaner.drivers[1000].pwr = cleaner.k_rookie_pwr
#cleaner.drivers[1001].pwr = cleaner.k_rookie_pwr
#cleaner.drivers[1002].pwr = cleaner.k_rookie_pwr

driversToWrite = {}
for did, cid in newDrivers.items():
    driversToWrite[int(did)] = {}
    driversToWrite[int(did)]["name"] = cleaner.drivers[int(did)].name
    driversToWrite[int(did)]["constructor"] = cleaner.drivers[int(did)].constructor.name
    driversToWrite[int(did)]["color"] = getColor(cleaner.drivers[int(did)].constructor.name)
print(driversToWrite)
outFile["drivers"] = driversToWrite

{8: {'name': 'Kimi Räikkönen', 'constructor': 'Alfa Romeo', 'color': '#9b0000'}, 841: {'name': 'Antonio Giovinazzi', 'constructor': 'Alfa Romeo', 'color': '#9b0000'}, 20: {'name': 'Sebastian Vettel', 'constructor': 'Ferrari', 'color': '#dc0000'}, 844: {'name': 'Charles Leclerc', 'constructor': 'Ferrari', 'color': '#dc0000'}, 154: {'name': 'Romain Grosjean', 'constructor': 'Haas F1 Team', 'color': '#f0d787'}, 825: {'name': 'Kevin Magnussen', 'constructor': 'Haas F1 Team', 'color': '#f0d787'}, 846: {'name': 'Lando Norris', 'constructor': 'McLaren', 'color': '#ff8700'}, 832: {'name': 'Carlos Sainz', 'constructor': 'McLaren', 'color': '#ff8700'}, 1: {'name': 'Lewis Hamilton', 'constructor': 'Mercedes', 'color': '#00d2be'}, 822: {'name': 'Valtteri Bottas', 'constructor': 'Mercedes', 'color': '#00d2be'}, 815: {'name': 'Sergio Pérez', 'constructor': 'Racing Point', 'color': '#f596c8'}, 840: {'name': 'Lance Stroll', 'constructor': 'Racing Point', 'color': '#f596c8'}, 842: {'name': 'Pierre Gasl

In [6]:
raceId = -1
with open('data/futureRaces.json', 'r') as handle:
    futureRaces = json.load(handle)
    circuit = futureRaces[0]["circuitId"]
    circuitName = futureRaces[0]["name"]
    raceId = futureRaces[0]["raceId"]
    #print(seasonsData)
    
# Edit index file
with open('../F1PredictWeb/src/public/data/index.json', 'r+') as handle:
    data = json.load(handle)
    data[str(futureRaces[0]["year"])][str(raceId)] = circuitName
    handle.seek(0)        # <--- should reset file position to the beginning.
    json.dump(data, handle, indent=4)
    handle.truncate()

outFile["name"] = circuitName
outFile["year"] = futureRaces[0]["year"]

In [7]:
predictedEntrants = []

for did, cid in newDrivers.items():
    print(cleaner.drivers[did].name + ": " + str(cleaner.drivers[did].pwr) + " -- " + str(cleaner.drivers[did].constructor.pwr))
    if circuit not in cleaner.drivers[did].trackpwr:
        cleaner.drivers[did].trackpwr[circuit] = 0 #TODO maybe change defaults
    if circuit not in cleaner.drivers[did].constructor.trackpwr:
        cleaner.drivers[did].constructor.trackpwr[circuit] = 0 #TODO maybe change defaults
    if circuit not in cleaner.drivers[did].constructor.engine.trackpwr:
        cleaner.drivers[did].constructor.engine.trackpwr[circuit] = 0 #TODO maybe change defaults
    
    entry = [
        cleaner.drivers[did].pwr,
        cleaner.drivers[did].constructor.pwr, 
        cleaner.drivers[did].constructor.engine.pwr,
        cleaner.drivers[did].trackpwr[circuit],
        cleaner.drivers[did].constructor.trackpwr[circuit],
        cleaner.drivers[did].constructor.engine.trackpwr[circuit]
    ]
    predictedEntrants.append(entry)

Kimi Räikkönen: -0.0985741271845393 -- -0.03304596460962416
Antonio Giovinazzi: 0.07665247232023727 -- -0.03304596460962416
Sebastian Vettel: -0.12411522107352586 -- -0.6108070047991287
Charles Leclerc: -0.22849371349907854 -- -0.6108070047991287
Romain Grosjean: 0.05758835638559012 -- 0.013097732476475056
Kevin Magnussen: 0.031150717938162938 -- 0.013097732476475056
Lando Norris: 0.010077709924450937 -- 0.04342140260450594
Carlos Sainz: 0.019181573287006058 -- 0.04342140260450594
Lewis Hamilton: -0.2771252275850772 -- -0.5021440344308659
Valtteri Bottas: -0.2160444709106854 -- -0.5021440344308659
Sergio Pérez: -0.04112633334624821 -- 0.41695262229225466
Lance Stroll: 0.11910006639194669 -- 0.41695262229225466
Pierre Gasly: 0.08978769795272104 -- 0.12102014870010903
Max Verstappen: -0.17553985506057287 -- -0.44064246481789443
Daniel Ricciardo: -0.13011108631084498 -- -0.040203208534972924
Nico Hülkenberg: -0.051035769145944 -- -0.040203208534972924
Alexander Albon: 0 -- -0.440642464817

In [8]:
#Linear regression predict
predictedResults = reg.predict(np.array(predictedEntrants))
print(predictedResults)

[ 0.0232668   0.10746158 -0.40319534 -0.49190688  0.19963291  0.18562838
  0.01791405  0.01376689 -0.5501152  -0.51588     0.31078663  0.42254351
  0.2810826  -0.34863003 -0.09517425 -0.03827636 -0.25125854  0.30425293
  0.97543546  1.11995735]


In [9]:
#Forest predict
forestResults = forest.predict(np.array(predictedEntrants))
print(forestResults)

[ 0.02208134  0.14603801 -0.09493246 -0.21172022  0.28435368  0.23226978
  0.05459475 -0.10938669 -0.5344507  -0.55025098  0.65725933  0.70801944
  0.36397348 -0.52697213 -0.17372171 -0.09757444 -0.2420048   0.2556514
  0.64010949  0.91591866]


In [10]:
driverResults = {} # {did: {position: amount}}
orderedResults = [] # [(did, prediction) ...]
for index, (did, cid) in enumerate(newDrivers.items()):
    print("{0} ({1}): {2}".format(cleaner.drivers[int(did)].name, cleaner.drivers[int(did)].constructor.name, forestResults[index]))
    newDrivers[did] = forestResults[index]
    driverResults[int(did)] = {}
    orderedResults.append((did, forestResults[index]))
    
orderedResults.sort(key = operator.itemgetter(1))
outFile["order"] = [a for (a, b) in orderedResults]
    
for i in range(1000):
    scoreList = predictQualiResults(circuit, newDrivers)
    for i, drivRes in enumerate(scoreList):
        if i not in driverResults[drivRes[0]]:
            driverResults[drivRes[0]][i] = 0
        driverResults[drivRes[0]][i] += 1
        
for did, res in driverResults.items():
    print("{0} ({1}):".format(cleaner.drivers[int(did)].name, cleaner.drivers[int(did)].constructor.name))
    for pos, amount in sorted(res.items(), key=lambda posAmount: posAmount[0]):
        print("\t{0}: {1} %".format(pos + 1, amount / 10.0))
        
outFile["predictions"] = driverResults
print(outFile)
with open('../F1PredictWeb/src/public/data/' + str(raceId) + '.json', 'w') as fp:
    json.dump(outFile, fp)

Kimi Räikkönen (Alfa Romeo): 0.022081337973869984
Antonio Giovinazzi (Alfa Romeo): 0.14603800659102634
Sebastian Vettel (Ferrari): -0.09493245778359814
Charles Leclerc (Ferrari): -0.21172022229447382
Romain Grosjean (Haas F1 Team): 0.2843536803170649
Kevin Magnussen (Haas F1 Team): 0.23226978315778657
Lando Norris (McLaren): 0.054594749520119674
Carlos Sainz (McLaren): -0.10938669459517898
Lewis Hamilton (Mercedes): -0.5344507039565258
Valtteri Bottas (Mercedes): -0.5502509766332555
Sergio Pérez (Racing Point): 0.6572593302062267
Lance Stroll (Racing Point): 0.7080194408366629
Pierre Gasly (Toro Rosso): 0.3639734800475442
Max Verstappen (Red Bull): -0.5269721341727198
Daniel Ricciardo (Renault): -0.17372170674738704
Nico Hülkenberg (Renault): -0.09757444474007483
Alexander Albon (Red Bull): -0.2420047952151519
Daniil Kvyat (Toro Rosso): 0.25565139988504815
George Russell (Williams): 0.6401094924876198
Robert Kubica (Williams): 0.915918658555824
Kimi Räikkönen (Alfa Romeo):
	6: 2.2 %
	7