In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, LSTM, Bidirectional, Dropout, Dense, Input, GlobalAveragePooling1D, Multiply, Permute, RepeatVector, Flatten, Activation, Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.regularizers import l2
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

In [None]:
# Load preprocessed data
data = np.load("../dataset.npz")
X_train, y_train = data['X_train'], data['y_train']
X_val, y_val = data['X_val'], data['y_val']
X_test, y_test = data['X_test'], data['y_test']

# Debugging: Print shapes and unique classes
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

In [None]:
# Model Parameters
input_shape = X_train.shape[1:]  # (sequence_length, features)
num_classes = y_train.shape[1]     # Number of output classes

In [None]:
# Reshape data to include the channel dimension (features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_val   = X_val.reshape(X_val.shape[0], X_val.shape[1], 1)
X_test  = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Debugging: Verify reshaped data
print(f"Reshaped X_train shape: {X_train.shape}")
print(f"Reshaped X_val shape: {X_val.shape}")
print(f"Reshaped X_test shape: {X_test.shape}")

In [None]:
def sum_over_time(x):
    return tf.keras.backend.sum(x, axis=1)

In [None]:
# Define a simple self-attention block
def attention_3d_block(inputs):
    # inputs.shape = (batch_size, time_steps, input_dim)
    input_dim = int(inputs.shape[2])
    # Learn an attention vector of size (time_steps, 1)
    a = Dense(1, activation='tanh')(inputs)
    a = Flatten()(a)
    a = Activation('softmax')(a)
    a = RepeatVector(input_dim)(a)
    a = Permute([2, 1])(a)
    # Apply the attention weights
    output = Multiply()([inputs, a])
    # Sum over time steps to get a context vector
    output = Lambda(sum_over_time)(output)
    return output

In [None]:
# Input layer
inputs = Input(shape=(X_train.shape[1], 1))

# --- Model Architecture --- #
# First Bidirectional LSTM block
x = Bidirectional(LSTM(256, return_sequences=True, kernel_regularizer=l2(0.0005)))(inputs)
x = Dropout(0.4)(x)

# First CNN block
x = Conv1D(256, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0005))(x)
x = MaxPooling1D(pool_size=2)(x)
x = BatchNormalization()(x)

# Second Bidirectional LSTM block
x = Bidirectional(LSTM(128, return_sequences=True, kernel_regularizer=l2(0.0005)))(x)
x = Dropout(0.4)(x)

# Second CNN block
x = Conv1D(128, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0005))(x)
x = MaxPooling1D(pool_size=2)(x)
x = BatchNormalization()(x)

# Third Bidirectional LSTM block
x = Bidirectional(LSTM(64, return_sequences=True, kernel_regularizer=l2(0.0005)))(x)
x = Dropout(0.4)(x)

# Third CNN block
x = Conv1D(64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0005))(x)
x = MaxPooling1D(pool_size=2)(x)
x = BatchNormalization()(x)

# Final LSTM block
x = LSTM(32, return_sequences=True, kernel_regularizer=l2(0.0005))(x)
x = Dropout(0.3)(x)

# --- Attention Block --- #
attention_output = attention_3d_block(x)

# Dense layers after attention
x = Dense(128, activation='relu', kernel_regularizer=l2(0.0005))(attention_output)
x = Dropout(0.3)(x)

# Output layer
outputs = Dense(num_classes, activation='softmax')(x)

In [None]:
# Define and compile the model
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=Adam(learning_rate=0.0002), 
            loss='categorical_crossentropy', 
            metrics=['accuracy'])
model.summary()

# Debugging: Verify model output shape
print(f"Model output shape: {model.output_shape}")

In [None]:
# Callbacks
checkpoint = ModelCheckpoint("../model/best_model.keras", monitor="val_accuracy", save_best_only=True, verbose=1)
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-8)


In [None]:
# Train Model
history = model.fit(
    X_train, y_train, 
    validation_data=(X_val, y_val),
    epochs=100,
    batch_size=128, 
    callbacks=[early_stopping, reduce_lr, checkpoint]
)

# Debugging: Print final training and validation accuracy
print(f"Final Training Accuracy: {history.history['accuracy'][-1]:.4f}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")

In [None]:
# Evaluate on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# --- Confusion Matrix and Classification Report --- #
# Convert predictions and true labels to class indices
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Compute confusion matrix
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=range(num_classes), yticklabels=range(num_classes))
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()

# Print classification report
class_report = classification_report(y_true_classes, y_pred_classes)
print("Classification Report:")
print(class_report)

In [None]:
# Plot training history
history_dict = history.history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_dict["accuracy"], label="Training Accuracy", color="blue")
plt.plot(history_dict["val_accuracy"], label="Validation Accuracy", color="red")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Training & Validation Accuracy")
plt.legend()
plt.grid()

plt.subplot(1, 2, 2)
plt.plot(history_dict["loss"], label="Training Loss", color="blue")
plt.plot(history_dict["val_loss"], label="Validation Loss", color="red")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training & Validation Loss")
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()

In [None]:
# Save the final model
model.save("../model/lstm_cnn_model.keras")
print("Model training completed and saved in .keras format.")