## Content list

#### 1. Import libraries
#### 2. Import data
#### 3. CNN data prep
#### 4. Bayesian Hyperparameter Optimization
#### 5. CNN (Convolution Neural Network) model

## 1. Import libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import os
import operator
import time
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.multiclass import type_of_target
import tensorflow as tf
from numpy import unique
from numpy import reshape
from tensorflow.keras.models import Sequential
from sklearn.model_selection import cross_val_score
from tensorflow.keras.layers import Input, Conv1D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from scikeras.wrappers import KerasClassifier  # Use scikeras for scikit-learn compatibility
from math import floor
from bayes_opt import BayesianOptimization
from tensorflow.keras.layers import LeakyReLU  # Use tensorflow.keras instead of keras
LeakyReLU = LeakyReLU(negative_slope=0.1)
import warnings

In [2]:
# Set option to ensure charts are displayed inline in the notebook
%matplotlib inline

## 2. Import data

In [3]:
# Turn project folder path into a string
path = r'C:\Users\Admin\Desktop\Data Analysis\Maching Learning\Climate_Win'

In [4]:
path

'C:\\Users\\Admin\\Desktop\\Data Analysis\\Maching Learning\\Climate_Win'

In [5]:
# Import cleaned weather data
df_cw = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'climatewin_cleaned.csv'))

In [6]:
# Check output
df_cw.head()

Unnamed: 0,BASEL_cloud_cover,BASEL_global_radiation,BASEL_humidity,BASEL_precipitation,BASEL_pressure,BASEL_sunshine,BASEL_temp_max,BASEL_temp_mean,BASEL_temp_min,BELGRADE_cloud_cover,...,STOCKHOLM_temp_min,VALENTIA_cloud_cover,VALENTIA_global_radiation,VALENTIA_humidity,VALENTIA_precipitation,VALENTIA_pressure,VALENTIA_sunshine,VALENTIA_temp_max,VALENTIA_temp_mean,VALENTIA_temp_min
0,7,0.32,0.85,0.09,1.018,0.7,10.9,6.5,0.8,1,...,2.2,5,0.45,0.88,0.34,1.0003,4.7,10.9,8.5,6.0
1,6,0.36,0.84,1.05,1.018,1.1,10.1,6.1,3.3,6,...,3.0,7,0.25,0.91,0.84,1.0007,0.7,12.1,8.9,5.6
2,8,0.18,0.9,0.3,1.018,0.0,9.9,8.5,5.1,6,...,1.3,7,0.17,0.91,0.08,1.0096,0.1,12.9,10.5,8.1
3,3,0.58,0.92,0.0,1.018,4.1,10.6,6.3,3.8,8,...,0.4,7,0.13,0.86,0.98,1.0184,0.0,10.6,7.4,7.3
4,6,0.65,0.95,0.14,1.018,5.4,6.0,3.0,-0.7,8,...,0.8,3,0.46,0.8,0.0,1.0328,5.7,8.4,5.7,3.0


In [7]:
# Check shape
df_cw.shape

(22950, 135)

In [8]:
# Import climatewin prediction data
df_cw_pred = pd.read_csv(os.path.join(path, '02 Data', 'Prepared Data', 'climatewin_answers_cleaned.csv'))

In [9]:
# Check output
df_cw_pred.head()

Unnamed: 0,BASEL_pleasant_weather,BELGRADE_pleasant_weather,BUDAPEST_pleasant_weather,DEBILT_pleasant_weather,DUSSELDORF_pleasant_weather,HEATHROW_pleasant_weather,KASSEL_pleasant_weather,LJUBLJANA_pleasant_weather,MAASTRICHT_pleasant_weather,MADRID_pleasant_weather,MUNCHENB_pleasant_weather,OSLO_pleasant_weather,SONNBLICK_pleasant_weather,STOCKHOLM_pleasant_weather,VALENTIA_pleasant_weather
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
# Check shape
df_cw_pred.shape

(22950, 15)

## 3. CNN data prep

In [11]:
#Create an 'X' and 'y'matrix 
X = df_cw
y = df_cw_pred

In [12]:
# Create array of 'X' and 'y'
X = np.array(X)
y = np.array(y)

In [13]:
X

array([[ 7.  ,  0.32,  0.85, ..., 10.9 ,  8.5 ,  6.  ],
       [ 6.  ,  0.36,  0.84, ..., 12.1 ,  8.9 ,  5.6 ],
       [ 8.  ,  0.18,  0.9 , ..., 12.9 , 10.5 ,  8.1 ],
       ...,
       [ 4.  ,  1.34,  0.76, ..., 13.5 , 10.7 ,  7.9 ],
       [ 5.  ,  1.34,  0.8 , ..., 13.5 , 10.7 ,  7.9 ],
       [ 5.  ,  1.34,  0.84, ..., 13.5 , 10.7 ,  7.9 ]])

In [14]:
# Reshape 'X' for CNN
X = X.reshape(-1, 15, 9)

In [15]:
# Check Reshape output
X

array([[[  7.  ,   0.32,   0.85, ...,  10.9 ,   6.5 ,   0.8 ],
        [  1.  ,   0.88,   0.81, ...,   7.9 ,   3.7 ,  -0.9 ],
        [  4.  ,   0.44,   0.67, ...,   5.1 ,   2.4 ,  -0.4 ],
        ...,
        [  4.  ,   0.48,   0.73, ...,  -3.2 ,  -5.9 ,  -8.5 ],
        [  5.  ,   0.05,   0.98, ...,   4.9 ,   4.2 ,   2.2 ],
        [  5.  ,   0.45,   0.88, ...,  10.9 ,   8.5 ,   6.  ]],

       [[  6.  ,   0.36,   0.84, ...,  10.1 ,   6.1 ,   3.3 ],
        [  6.  ,   0.25,   0.84, ...,   4.4 ,   2.9 ,   2.2 ],
        [  4.  ,   0.18,   0.67, ...,   3.1 ,   2.3 ,   1.4 ],
        ...,
        [  6.  ,   0.21,   0.97, ...,  -8.5 ,  -9.5 , -10.5 ],
        [  5.  ,   0.05,   0.62, ...,   5.  ,   4.  ,   3.  ],
        [  7.  ,   0.25,   0.91, ...,  12.1 ,   8.9 ,   5.6 ]],

       [[  8.  ,   0.18,   0.9 , ...,   9.9 ,   8.5 ,   5.1 ],
        [  6.  ,   0.67,   0.77, ...,   6.4 ,   3.1 ,  -0.5 ],
        [  4.  ,   0.3 ,   0.67, ...,   5.3 ,   2.7 ,   1.7 ],
        ...,
        [  8

In [16]:
# Check 'X' shape
X.shape

(22950, 15, 9)

In [18]:
# Check 'y' output
y

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [19]:
# Reshape y from one-hot encoding to numerical value
y =  np.argmax(y, axis = 1)
print("y shape after argmax:", y.shape)

y shape after argmax: (22950,)


In [20]:
# Check unique values in y to ensure it's correct
print("Unique values in y:", np.unique(y))

Unique values in y: [ 0  1  2  3  4  5  6  7  8  9 10 11 13 14]


In [22]:
# Check class of y
from sklearn.utils.multiclass import type_of_target
type_of_target(y)

'multiclass'

In [26]:
# Check shape for x and y
print(X.shape)
print(y.shape)

(22950, 15, 9)
(22950,)


In [24]:
#Split data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2)

In [25]:
# Check training and testing shape
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(18360, 15, 9) (18360,)
(4590, 15, 9) (4590,)


## 4. Bayesian Hyperparameter Optimization

In [28]:
# Determine the number of time steps for the input data
timesteps = X_train.shape[1]

# Determine the dimensionality of the input data
input_dim = X_train.shape[2]

# Specify the number of classes for the target variable
n_classes = 15  #15 = Number of weather stations

# Create a scorer for accuracy
score_acc = make_scorer(accuracy_score)

In [29]:
# Create function
def bay_area(neurons, activation, kernel, optimizer, learning_rate, batch_size, epochs, layers1, layers2, normalization, dropout, dropout_rate):
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl', 'SGD']
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', LeakyReLU, 'relu']
    
    neurons = round(neurons)
    kernel = round(kernel)
    activation = activationL[round(activation)]
    optimizer_name = optimizerL[round(optimizer)]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)

    def cnn_model():
        model = Sequential()
        model.add(Input(shape=(timesteps, input_dim)))
        model.add(Conv1D(neurons, kernel_size=kernel, activation=activation))
        if normalization > 0.5:
            model.add(BatchNormalization())
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=127))
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation))
        model.add(MaxPooling1D())
        model.add(Flatten())
        model.add(Dense(n_classes, activation='softmax'))  # sigmoid softmax
        
        # Create a new optimizer instance for each iteration
        if optimizer_name == 'Adam':
            optimizer_instance = Adam(learning_rate=learning_rate)
        elif optimizer_name == 'SGD':
            optimizer_instance = SGD(learning_rate=learning_rate)
        elif optimizer_name == 'RMSprop':
            optimizer_instance = RMSprop(learning_rate=learning_rate)
        elif optimizer_name == 'Adadelta':
            optimizer_instance = Adadelta(learning_rate=learning_rate)
        elif optimizer_name == 'Adagrad':
            optimizer_instance = Adagrad(learning_rate=learning_rate)
        elif optimizer_name == 'Adamax':
            optimizer_instance = Adamax(learning_rate=learning_rate)
        elif optimizer_name == 'Nadam':
            optimizer_instance = Nadam(learning_rate=learning_rate)
        elif optimizer_name == 'Ftrl':
            optimizer_instance = Ftrl(learning_rate=learning_rate)
        
        model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer_instance, metrics=['accuracy'])
        return model

    # K-fold cross-validation
    es = EarlyStopping(monitor='accuracy', mode='max', verbose=1, patience=20)
    kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=127)
    results = []
    for train, test in kfold.split(X, y):
        model = cnn_model()
        model.fit(X[train], y[train], epochs=epochs, batch_size=batch_size, verbose=0, callbacks=[es])
        scores = model.evaluate(X[test], y[test], verbose=1)
        results.append(scores[1])  # Assuming accuracy is the second metric
    return np.mean(results)


In [30]:
# Start timing the Bayesian Optimization process
start = time.time()

# Define the hyperparameter space for Bayesian Optimization
params = {
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation': (0, 9),  # 9
    'optimizer': (0, 7),  # 7
    'learning_rate': (0.001, 1),
    'batch_size': (200, 1000), #(10, 50), #
    'epochs': (20, 100),
    'layers1': (1, 3),
    'layers2': (1, 3),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0.3, 0.5)
}

# Run Bayesian Optimization
nn_opt = BayesianOptimization(bay_area, params, random_state=127)
nn_opt.maximize(init_points=15, n_iter=4)  # 25
print('Search took %s minutes' % ((time.time() - start)/60))

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  kernel   |  layers1  |  layers2  | learni... |  neurons  | normal... | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Epoch 26: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accuracy: 0.2019 - loss: 113.9428
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accuracy: 0.1860 - loss: 177.5042
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accuracy: 0.6520 - loss: 98.0048
Epoch 23: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accuracy: 0.6617 - loss: 77.9587
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 868us/step - accura

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6552 - loss: 1.1419
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6520 - loss: nan
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6617 - loss: 1.1226
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6544 - loss: 1.1302
| [0m9        [0m | [0m0.6434   [0m | [0m1.71     [0m | [0m383.0    [0m | [0m0.1859   [0m | [0m0.3541   [0m | [0m79.76    [0m | [0m1.194    [0m | [0m1.526    [0m | [0m2.075    [0m | [0m0.9041   [0m | [0m84.38    [0m | [0m0.8003   [0m | [0m0.09905  [0m |
Epoch 29: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 735us/step - accuracy: 0.6451 - loss: 1.2960
Epoch 33: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 910us/step - accuracy: 0.6451 - loss: 1.1405
Epoch 28: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 889us/step - accuracy: 0.6552 - loss: 1.1513
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 955us/step - accuracy: 0.6520 - loss: 1.1460
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 875us/step - accuracy: 0.6617 - loss: 1.1309
Epoch 20: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step - accuracy: 0.6544 - loss: 1.1337
| [0m18       [0m | [0m0.6434   [0m | [0m0.0      [0m | [0m282.1    [0m | [0m1.0      [0m | [0m0.3      [0m | [0m34.19    [0m | [0m2.857    [0m | [0m2.524    [0m | [0m1.0      [0m | [0m1.0      [0m | [0m77.56    [0m | [0m0.8134   [0m | [0m5.367    [0m |
Epoch 22: early stopping
[1m144/144[0m [32m━━━━━━━━━━━━━━━━━

In [31]:
# Print best search result
best_params = nn_opt.max['params']
best_score = nn_opt.max['target']

print(f"Best Parameters: {best_params}")
print(f"Highest Accuracy: {best_score}")

Best Parameters: {'activation': 2.2101334577457843, 'batch_size': 284.29016503530556, 'dropout': 0.820796192529784, 'dropout_rate': 0.30631415814119844, 'epochs': 35.41422470029453, 'kernel': 2.6171873384811173, 'layers1': 2.453293027387388, 'layers2': 1.6404905917542365, 'learning_rate': 0.8516155355653221, 'neurons': 75.63734281167133, 'normalization': 0.6817477276996559, 'optimizer': 2.782307937709919}
Highest Accuracy: 0.8558605670928955


In [37]:
# Retrieve the best parameters from the optimization result
optimum = nn_opt.max['params']

# Assign the best parameters to their respective variables
learning_rate = optimum['learning_rate']
activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', 'LeakyReLU', 'relu']
activation = activationL[round(optimum['activation'])]

# Convert the hyperparameters to their integer form where necessary
optimum['batch_size'] = round(optimum['batch_size'])
optimum['epochs'] = round(optimum['epochs'])
optimum['layers1'] = round(optimum['layers1'])
optimum['layers2'] = round(optimum['layers2'])
optimum['neurons'] = round(optimum['neurons'])
optimum['kernel'] = round(optimum['kernel'])

optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Ftrl']
optimizerD = {
    'SGD': SGD(learning_rate=learning_rate),
    'Adam': Adam(learning_rate=learning_rate),
    'RMSprop': RMSprop(learning_rate=learning_rate),
    'Adadelta': Adadelta(learning_rate=learning_rate),
    'Adagrad': Adagrad(learning_rate=learning_rate),
    'Adamax': Adamax(learning_rate=learning_rate),
    'Nadam': Nadam(learning_rate=learning_rate),
    'Ftrl': Ftrl(learning_rate=learning_rate)
}

# Retrieve the optimizer name
optimizer_name = optimizerL[round(optimum['optimizer'])]

# Print the optimum parameters in a readable format
print(f"Best Parameters: ")
print(f"Activation: {activation}")
print(f"Batch Size: {optimum['batch_size']}")
print(f"Dropout Rate: {optimum['dropout_rate']:.4f}")
print(f"Dropout: {optimum['dropout']:.4f}")
print(f"Epochs: {optimum['epochs']}")
print(f"Kernel Size: {optimum['kernel']}")
print(f"Layers1: {optimum['layers1']}")
print(f"Layers2: {optimum['layers2']}")
print(f"Learning Rate: {optimum['learning_rate']:.4f}")
print(f"Neurons: {optimum['neurons']}")
print(f"Normalization: {optimum['normalization']:.4f}")
print(f"Optimizer: {optimizer_name}")

Best Parameters: 
Activation: softplus
Batch Size: 284
Dropout Rate: 0.3063
Dropout: 0.8208
Epochs: 35
Kernel Size: 3
Layers1: 2
Layers2: 2
Learning Rate: 0.8516
Neurons: 76
Normalization: 0.6817
Optimizer: Adadelta


## 5. CNN (Convolution Neural Network) model

In [33]:
# Setting up label key for all weather stations
stations = {
    0: 'BASEL',
    1: 'BELGRADE',
    2: 'BUDAPEST',
    3: 'DEBILT',
    4: 'DUSSELDORF',
    5: 'HEATHROW',
    6: 'KASSEL',
    7: 'LJUBLJANA',
    8: 'MAASTRICHT',
    9: 'MADRID',
   10: 'MUNCHENB',
   11: 'OSLO',
   12: 'SONNBLICK',
   13: 'STOCKHOLM',
   14: 'VALENTIA',
}

In [38]:
# Best parameters from optimization
best_params = {
    'activation': 'softplus',
    'batch_size': 284,
    'dropout_rate':  0.3063,
    'dropout': 0.8208,
    'epochs':35,
    'kernel': 3,
    'layers1': 2,
    'layers2': 2,
    'learning_rate': 0.8516,
    'neurons': 76,
    'normalization': 0.6817,
    'optimizer': 'Adadelta'
}

# Initialize optimizer with learning rate
optimizers = {
    'Adam': Adam(learning_rate=best_params['learning_rate']),
    'SGD': SGD(learning_rate=best_params['learning_rate']),
    'RMSprop': RMSprop(learning_rate=best_params['learning_rate']),
    'Adadelta': Adadelta(learning_rate=best_params['learning_rate']),
    'Adagrad': Adagrad(learning_rate=best_params['learning_rate']),
    'Adamax': Adamax(learning_rate=best_params['learning_rate']),
    'Nadam': Nadam(learning_rate=best_params['learning_rate']),
    'Ftrl': Ftrl(learning_rate=best_params['learning_rate'])
}

optimizer = optimizers[best_params['optimizer']]

timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
#n_classes = len(y_train[0])

model = Sequential()
model.add(Conv1D(best_params['neurons'], kernel_size=best_params['kernel'], activation=best_params['activation'], input_shape=(15,9)))

if best_params['normalization'] > 0.5:
    model.add(BatchNormalization())

for _ in range(best_params['layers1']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

if best_params['dropout'] > 0.5:
    model.add(Dropout(best_params['dropout_rate'], seed=123))

for _ in range(best_params['layers2']):
    model.add(Dense(best_params['neurons'], activation=best_params['activation']))

model.add(MaxPooling1D())
model.add(Flatten())
model.add(Dense(n_classes, activation='softmax')) # sigmoid, tanh, softmax

In [39]:
# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [40]:
# Train model
model.fit(X_train, y_train, batch_size=best_params['batch_size'], epochs=best_params['epochs'], verbose=2)

Epoch 1/35
65/65 - 2s - 32ms/step - accuracy: 0.6338 - loss: 1.1525
Epoch 2/35
65/65 - 0s - 6ms/step - accuracy: 0.6992 - loss: 0.9244
Epoch 3/35
65/65 - 0s - 6ms/step - accuracy: 0.7181 - loss: 0.8321
Epoch 4/35
65/65 - 0s - 6ms/step - accuracy: 0.7355 - loss: 0.7718
Epoch 5/35
65/65 - 0s - 6ms/step - accuracy: 0.7449 - loss: 0.7349
Epoch 6/35
65/65 - 0s - 5ms/step - accuracy: 0.7590 - loss: 0.6894
Epoch 7/35
65/65 - 0s - 5ms/step - accuracy: 0.7681 - loss: 0.6584
Epoch 8/35
65/65 - 0s - 5ms/step - accuracy: 0.7791 - loss: 0.6117
Epoch 9/35
65/65 - 0s - 5ms/step - accuracy: 0.7966 - loss: 0.5801
Epoch 10/35
65/65 - 0s - 6ms/step - accuracy: 0.8040 - loss: 0.5536
Epoch 11/35
65/65 - 0s - 5ms/step - accuracy: 0.8107 - loss: 0.5291
Epoch 12/35
65/65 - 0s - 6ms/step - accuracy: 0.8190 - loss: 0.5047
Epoch 13/35
65/65 - 0s - 6ms/step - accuracy: 0.8293 - loss: 0.4703
Epoch 14/35
65/65 - 0s - 5ms/step - accuracy: 0.8316 - loss: 0.4610
Epoch 15/35
65/65 - 0s - 6ms/step - accuracy: 0.8408 - l

<keras.src.callbacks.history.History at 0x2290afba0d0>

In [41]:
# View model summary
model.summary()

In [73]:
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [74]:
# Assuming X_train and y_train are your input data and labels
# One-hot encode y_train
y_train_one_hot = to_categorical(y_train, num_classes=15)

In [75]:
# Check shapes
print(f'X_train shape: {X_train.shape}')
print(f'y_train_one_hot shape: {y_train_one_hot.shape}')

X_train shape: (18360, 15, 9)
y_train_one_hot shape: (18360, 15)


In [76]:
model.fit(X_train, y_train_one_hot, batch_size=best_params['batch_size'], epochs=best_params['epochs'], verbose=2)

Epoch 1/35
65/65 - 2s - 26ms/step - accuracy: 0.9542 - loss: 0.1309
Epoch 2/35
65/65 - 0s - 5ms/step - accuracy: 0.9522 - loss: 0.1410
Epoch 3/35
65/65 - 0s - 6ms/step - accuracy: 0.9513 - loss: 0.1359
Epoch 4/35
65/65 - 0s - 6ms/step - accuracy: 0.9589 - loss: 0.1147
Epoch 5/35
65/65 - 0s - 5ms/step - accuracy: 0.9577 - loss: 0.1231
Epoch 6/35
65/65 - 0s - 6ms/step - accuracy: 0.9519 - loss: 0.1365
Epoch 7/35
65/65 - 0s - 6ms/step - accuracy: 0.9551 - loss: 0.1255
Epoch 8/35
65/65 - 0s - 6ms/step - accuracy: 0.9533 - loss: 0.1314
Epoch 9/35
65/65 - 0s - 6ms/step - accuracy: 0.9583 - loss: 0.1213
Epoch 10/35
65/65 - 0s - 6ms/step - accuracy: 0.9577 - loss: 0.1225
Epoch 11/35
65/65 - 0s - 6ms/step - accuracy: 0.9614 - loss: 0.1098
Epoch 12/35
65/65 - 0s - 5ms/step - accuracy: 0.9578 - loss: 0.1196
Epoch 13/35
65/65 - 0s - 5ms/step - accuracy: 0.9609 - loss: 0.1118
Epoch 14/35
65/65 - 0s - 6ms/step - accuracy: 0.9571 - loss: 0.1208
Epoch 15/35
65/65 - 0s - 5ms/step - accuracy: 0.9581 - l

<keras.src.callbacks.history.History at 0x22918360690>

In [77]:
def confusion_matrix(y_true, y_pred, stations):
    # Check if y_true and y_pred are one-hot encoded or already class indices
    if y_true.ndim == 1:
        y_true_labels = y_true
    else:
        y_true_labels = np.argmax(y_true, axis=1)
    
    if y_pred.ndim == 1:
        y_pred_labels = y_pred
    else:
        y_pred_labels = np.argmax(y_pred, axis=1)
        
    # Map numeric labels to activity names
    y_true_series = pd.Series([stations[y] for y in y_true_labels])
    y_pred_series = pd.Series([stations[y] for y in y_pred_labels])
    
    return pd.crosstab(y_true_series, y_pred_series, rownames=['True'], colnames=['Pred'])

In [78]:
# Run model to predict
y_pred = model.predict(X_test)

[1m144/144[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [79]:
cm = confusion_matrix(y_test, y_pred, stations)
print(cm)

Pred        BASEL  BELGRADE  BUDAPEST  DEBILT  DUSSELDORF  HEATHROW  KASSEL  \
True                                                                          
BASEL        2932        10         3       0           0         0       0   
BELGRADE      136       694        15       7           3        10       1   
BUDAPEST       24         4       110       3           2         7       0   
DEBILT          9         0         2      51           1         1       0   
DUSSELDORF     11         0         0       6          12         5       0   
HEATHROW        6         0         0       2           2        74       1   
KASSEL          2         0         0       0           0         1       7   
LJUBLJANA       4         1         2       0           0         3       0   
MAASTRICHT      2         0         0       0           2         0       0   
MADRID         29         2         1       2           2         9       0   
MUNCHENB        1         0         0       0       