# Time-series forecasting - Lap records in Formula 1

## Overview

Time series analysis and forecasting are used to predict future trends, behaviors, and behaviours based on historical data.

A time series is a sequence of data points collected, recorded, or measured at successive, evenly-spaced time intervals. In this way, e
Each data point represents observations or measurements taken over time, such as stock prices, temperature readings, or sales figurese.

## Taxonomy

<img src="../pictures/forecasting-taxonomy.png" width="800">

In [9]:
# import libraries

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle
from ipywidgets import widgets, interact
pd.options.mode.chained_assignment = None

In [10]:
pd_full_lap_info = pd.read_csv('../data/03-processed/processed-data.csv')
print(pd_full_lap_info.shape)
pd_full_lap_info.columns

(551742, 7)


Index(['driverId', 'lap', 'milliseconds_each_lap', 'event_name',
       'lap_time_wth_pit', 'avg_race_lap_time',
       'event_fastestLapTime_seconds'],
      dtype='object')

In [11]:
training_data = pd_full_lap_info[['lap', 'lap_time_wth_pit', 'avg_race_lap_time', 'event_fastestLapTime_seconds']].values
ground_truth = pd_full_lap_info['milliseconds_each_lap'].values
X_train, X_test, y_train, y_test = train_test_split(training_data, ground_truth, test_size=0.33)

In [12]:
df_test_features = pd.DataFrame(X_test)
df_test_labels = pd.DataFrame(y_test)
df_test_features.to_csv('../data/03-processed/test_data.csv', index=False)
df_test_labels.to_csv('../data/03-processed/test_labels.csv', index=False)

In [13]:
lap_identifier = pd_full_lap_info[['driverId', 'event_name']].values[:X_test.shape[0]]
lap_identifier.shape

(182075, 2)

In [14]:
print(X_train.shape)
print(y_train.shape)

(369667, 4)
(369667,)


In [15]:
print(X_train[:1])
print(y_train[:1])

[[3.60000000e+01 1.07354000e+05 1.10643373e+05 1.05738000e+02]]
[107354.]


In [16]:
# rf_regressor = RandomForestRegressor(n_estimators=50, max_depth=3, max_features="sqrt", criterion="absolute_error", warm_start=True)
rf_regressor = RandomForestRegressor(n_estimators=50, warm_start=True)
rf_regressor.fit(X_train, y_train)

In [17]:
with open('../data/04-trained-models/rf-trained-model.pkl', 'wb') as f:
    pickle.dump(rf_regressor, f)