In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# update import path
import os, sys
sys.path.insert(1, os.path.join(sys.path[0], '..', 'src'))

import io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from importlib import reload
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.externals import joblib

# no warnings on dataframe slices
pd.options.mode.chained_assignment = None

In [None]:
# read csv
data = pd.read_csv(os.path.join('..', 'data', 'logs', 'history_20180919_1238.csv'), index_col=0)

# make datetime index (could also be done directly in read_csv)
data['datetime'] = pd.to_datetime(data['datetime'])
data = data.set_index('datetime')

print (data.shape)
data.head()

In [None]:
# show the boat approaching the target course
_, ax = plt.subplots(figsize=(20, 10))
data['target_angle'].plot(ax=ax)
data['boat_angle'].plot(ax=ax)

In [None]:
# select features and target
y = data['rudder_angle']
x = data[['angle_of_attack', 'boat_heel', 'boat_speed', 'course_error', 'wind_speed']]

# split test/train sets
split = int(0.8 * data.shape[0])
train_x, test_x = x[:split], x[split:]
train_y, test_y = y[:split], y[split:]

# train model
model = GradientBoostingRegressor(n_estimators=100)
model.fit(train_x, train_y)

# show performance
pred_y = model.predict(test_x)
print (mean_absolute_error(pred_y, test_y))

In [None]:
# save model
joblib.dump(model, os.path.join('..', 'data', 'my-model.pkl'))