In [2]:
# Import packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from ast import literal_eval
from joblib import dump, load

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [15]:
# Collect data into pandas
data2 = pd.read_csv("data/24-02-2022", names=['schedule', 'value'])
data1 = pd.read_csv("data/28-04-2022_heuristic", names=['schedule', 'value'])
data = pd.concat([data1, data2])

In [26]:
data2

Unnamed: 0,schedule,value
0,"[23, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",46.266667
1,"[23, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ...",46.183507
2,"[23, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ...",46.100335
3,"[23, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, ...",46.017162
4,"[23, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",45.933990
...,...,...
3379,"[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",9.362071
3380,"[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",9.286048
3381,"[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",9.587430
3382,"[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, ...",9.511215


In [18]:
# Convert columns from strings (from data files) to numpy arrays and floats values
data['schedule'] = data['schedule'].apply(literal_eval).apply(np.array)
data['value'] = data['value'].astype(float)

In [19]:
# Extract X and y data from the pandas table
X = data.schedule.apply(pd.Series)
y = data['value']

In [20]:
# Split the data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [49]:
# Multilayer Perceptron (Neural Network) with layers [INPUT, 1000, 500, OUTPUT]
model = MLPRegressor(hidden_layer_sizes=(1000,500,), random_state=42, max_iter=1000, verbose=True, early_stopping=True).fit(X_train, y_train)

Iteration 1, loss = 5.27240064
Validation score: 0.969260
Iteration 2, loss = 0.31847271
Validation score: 0.983921
Iteration 3, loss = 0.16064451
Validation score: 0.989108
Iteration 4, loss = 0.13219889
Validation score: 0.990741
Iteration 5, loss = 0.11383427
Validation score: 0.992337
Iteration 6, loss = 0.09647656
Validation score: 0.993143
Iteration 7, loss = 0.08528338
Validation score: 0.993405
Iteration 8, loss = 0.07645550
Validation score: 0.994800
Iteration 9, loss = 0.06801124
Validation score: 0.995347
Iteration 10, loss = 0.05747392
Validation score: 0.996061
Iteration 11, loss = 0.05053656
Validation score: 0.996498
Iteration 12, loss = 0.04490646
Validation score: 0.996917
Iteration 13, loss = 0.03926248
Validation score: 0.997221
Iteration 14, loss = 0.03771191
Validation score: 0.997415
Iteration 15, loss = 0.03387123
Validation score: 0.997493
Iteration 16, loss = 0.03142194
Validation score: 0.997763
Iteration 17, loss = 0.02951041
Validation score: 0.997916
Iterat

In [50]:
# Score not meaningful particularly
model.score(X_test, y_test)

0.9985280451074038

In [51]:
# Compare some predicted values in the test set to the real values
print(model.predict(X_test[0:5]))
print(y_test[0:5])

[40.7222794   7.39846173  7.71107887  7.50734874  7.11590142]
71      40.850843
1525     7.523561
2495     8.201974
2124     7.334443
1392     7.298137
Name: value, dtype: float64


In [52]:
# Creates a file in the current directory that can then be 'load'ed at a later point
dump(model, 'testing_model.joblib')

['testing_model.joblib']

In [14]:
# Simple linear regression as a reference point
model2 = LinearRegression().fit(X_train, y_train)

In [15]:
model2.score(X_test, y_test)

0.38042686350400157

## RNN Testing

In [30]:
# Recurrent Neural Network model drawn up in Keras
model = keras.models.Sequential()
model.add(keras.Input(shape=(48,1))) # 48 time slots in the schedule
model.add(layers.SimpleRNN(48, activation='relu'))
model.add(layers.Dense(1))
print(model.summary())

loss = keras.losses.MeanSquaredError()
optim = keras.optimizers.Adam(learning_rate=0.001)

model.compile(loss=loss, optimizer=optim)

epochs = 500

model.fit(X_train, y_train, epochs=epochs, verbose=2)

model.evaluate(X_test, y_test, verbose=2)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_4 (SimpleRNN)    (None, 48)                2400      
                                                                 
 dense_1 (Dense)             (None, 1)                 49        
                                                                 
Total params: 2,449
Trainable params: 2,449
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/500
213/213 - 2s - loss: 16.8937 - 2s/epoch - 10ms/step
Epoch 2/500
213/213 - 2s - loss: 3.0835 - 2s/epoch - 7ms/step
Epoch 3/500
213/213 - 1s - loss: 4.1857 - 1s/epoch - 7ms/step
Epoch 4/500
213/213 - 1s - loss: 4.8861 - 1s/epoch - 7ms/step
Epoch 5/500
213/213 - 2s - loss: 2.2818 - 2s/epoch - 7ms/step
Epoch 6/500
213/213 - 1s - loss: 1.5235 - 1s/epoch - 7ms/step
Epoch 7/500
213/213 - 2s - loss: 2.1305 - 2s/epoch - 7ms/step
Epoch 8

0.2012239396572113

In [31]:
# Compare some predicted values in the test set to the real values
print(model.predict(X_test[:10]))
print(y_test[:10])

[[41.578983 ]
 [ 7.2879133]
 [ 7.8509226]
 [ 7.4584002]
 [ 7.285673 ]
 [ 7.452606 ]
 [ 7.675054 ]
 [ 7.4962273]
 [ 7.6242094]
 [41.428764 ]]
71      40.850843
1525     7.523561
2495     8.201974
2124     7.334443
1392     7.298137
7915     8.278072
3643     7.805104
1602     7.517967
5812     7.576562
42      42.950599
Name: value, dtype: float64


In [32]:
# Creates a file in the current directory that can then be 'load'ed at a later point
dump(model, 'testing_model_rnn.joblib')

INFO:tensorflow:Assets written to: ram://3fd859ce-cba8-4dc8-a990-bc70b3655938/assets


['testing_model_rnn.joblib']