In [1]:
import os
import yaml
import pandas as pd
from pathlib import Path
from matplotlib import pyplot as plt
import seaborn as sns
sns.set()

os.chdir("../scripts")

import preprocess, train, inference, interpret

In [2]:
with open("../model/params.yaml", "r") as params_file:
    params = yaml.safe_load(params_file)

data_dir = params['data_dir']

In [3]:
file_name = "Trepn_2022.04.04_183605_total.csv"
data,timestamp_s = preprocess.load_data(file_name)
data.head(3)

NameError: name 'df' is not defined

In [None]:
train_df, test_df = preprocess.prep_data(df=data, train_frac=0.7, plot_df=True)

In [None]:
print(train_df.shape)
print(test_df.shape)
train_df.head(10)

In [None]:
test_df.head(10)

In [None]:
plot_df = pd.read_csv(Path(data_dir, 'plot_df.csv'))
plot_df['Battery'] = plot_df.Battery
plot_df = plot_df.dropna()

In [None]:
plot_df['Battery'].plot(figsize=(12,6), rot=45, cmap='viridis')
plt.title('Battery Drain (%)')
plt.show()

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=1)
plot_df['Battery'].plot(ax=axes[0], figsize=(12,6), rot=10, title='a. Bateery Level Change', sharex=True)
plot_df['Battery'].rolling(90).mean().plot(ax=axes[1], figsize=(12,6), rot=90, title='b. Mean', color='orange')
plot_df['Battery'].rolling(90).std().plot(ax=axes[2], figsize=(12,6), rot=0, title='c.  Standard Deviation', color='purple')
plt.show()

In [None]:
sequence_length = 30
batch_size = 24
n_epochs = 200
n_epochs_stop = 20

#sequence_length = 10
#batch_size = 5
#n_epochs = 200
#n_epochs_stop = 50
label_name = 'Battery'

In [None]:
hist = train.train_model(train_df, test_df, label_name, sequence_length, batch_size, n_epochs, n_epochs_stop)

In [None]:
hist.plot(figsize=(12,6), cmap='viridis')
plt.show()

In [None]:
predictions_descaled, labels_descaled = inference.predict(df=test_df,
                                                          label_name='Battery',
                                                          sequence_length=sequence_length)

predictions_descaled.shape, predictions_descaled.shape

print(test_df.shape)

In [None]:
print('Error on all test data:')
inference.print_loss_metrics(labels_descaled, predictions_descaled)
print('\n')
print('Error on partial test data:')
inference.print_loss_metrics(labels_descaled[:30], predictions_descaled[:30])

In [None]:
# plot predictions vs true values
df_pred = pd.DataFrame()
df_pred['predicted'] = predictions_descaled
df_pred['true'] = labels_descaled
df_pred['residual'] = labels_descaled - predictions_descaled
df_pred.plot(figsize=(12,6), cmap='viridis')
plt.xticks(rotation=45)
#plt.xlim(0,300)
plt.xlabel('Min')
plt.ylabel('price')
plt.legend()
plt.show()

In [None]:
from train import TimeSeriesDataset, TSModel
from torch.utils.data import DataLoader
import torch
import shap

In [None]:
background_data_size = 464
test_sample_size = 100
sequence_length = 30

In [None]:
shap_values = interpret.get_important_features(
    background_data_size,
    test_sample_size,
    sequence_length
)

In [None]:
shap_plot = pd.DataFrame(shap_values, columns=train_df.columns.tolist())
shap_plot['Time'] = [i-11 for i in list(range(1,11))]
shap_plot.head()

In [None]:
shap_plot.plot.area(x='Time',figsize=(10, 6), cmap='viridis')
plt.title("Deep SHAP - Feature Importance")
plt.show()