# Transformer Encoder for Human Activity Recognition - Google Colab Ready

This notebook implements a custom Transformer Encoder-based model specifically designed for time-series sensor data classification in elderly care applications.

## Transformer Architecture for HAR:
- **Multi-Head Self-Attention**: Focuses on different parts of sensor sequences simultaneously
- **Stacked Encoder Blocks**: 2 consecutive Transformer encoder layers with self-attention and feed-forward networks
- **Positional Encoding**: Captures temporal relationships in sensor data
- **Residual Connections**: Help with gradient flow and training stability
- **Layer Normalization**: Stabilizes training and improves convergence
- **Lightweight Design**: Optimized for embedded/real-time applications

## Key Features:
- **Sensor Data**: Accelerometer (x,y,z), Gyroscope (x,y,z), Heart Rate
- **Actions**: 8 different human activities
- **Input Shape**: 128 timesteps × 7 features (optimized for sensor windows)
- **Output**: Dense classification layers for activity class probabilities
- **Regularization**: Dropout and layer normalization for overfitting prevention
- **Colab Ready**: Optimized for Google Colab execution with file upload support

## Instructions for Google Colab:
1. Upload your HAR_synthetic_full.csv file using the file upload cell below
2. Run all cells sequentially
3. Results will be saved and can be downloaded


In [None]:
# Install required packages
%pip install -q tensorflow scikit-learn pandas numpy matplotlib seaborn joblib


In [None]:
# Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.metrics import TopKCategoricalAccuracy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import warnings
import json
import time
import joblib
from google.colab import files
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("TensorFlow version:", tf.__version__)
print("GPU available:", tf.config.list_physical_devices('GPU'))


In [None]:
# File Upload for Google Colab
print("Please upload your HAR_synthetic_full.csv file:")
uploaded = files.upload()

# Get the uploaded file name
csv_filename = list(uploaded.keys())[0]
print(f"Uploaded file: {csv_filename}")


In [None]:
# Set mixed precision for better performance
from tensorflow.keras import mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Memory optimization
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        print(e)


In [None]:
# Data Loading and Preprocessing Functions

def load_sensor_data(csv_path):
    """Load and preprocess sensor data from CSV"""
    print("Loading sensor data...")
    df = pd.read_csv(csv_path)
    
    # Remove rows with NaN labels
    df = df.dropna(subset=['label'])
    
    print(f"Data shape: {df.shape}")
    print(f"Columns: {list(df.columns)}")
    print(f"Unique labels: {df['label'].unique()}")
    print(f"Label distribution:")
    print(df['label'].value_counts())
    
    return df

def create_sequences(df, sequence_length=128, overlap=0.5):
    """Create sequences from sensor data for Transformer input"""
    print(f"Creating sequences with length {sequence_length} and overlap {overlap}...")
    
    # Feature columns (excluding timestamp and label)
    feature_cols = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z', 'heart_rate_bpm']
    
    sequences = []
    labels = []
    
    # Group by label to create sequences for each activity
    for label in df['label'].unique():
        label_data = df[df['label'] == label].copy()
        
        # Sort by timestamp to maintain temporal order
        label_data = label_data.sort_values('timestamp_ms')
        
        # Extract features
        features = label_data[feature_cols].values
        
        # Create overlapping sequences
        step_size = int(sequence_length * (1 - overlap))
        
        for i in range(0, len(features) - sequence_length + 1, step_size):
            sequence = features[i:i + sequence_length]
            if len(sequence) == sequence_length:
                sequences.append(sequence)
                labels.append(label)
    
    sequences = np.array(sequences, dtype=np.float32)
    labels = np.array(labels)
    
    print(f"Created {len(sequences)} sequences")
    print(f"Sequence shape: {sequences.shape}")
    
    return sequences, labels

def preprocess_data(sequences, labels):
    """Preprocess sequences and labels"""
    print("Preprocessing data...")
    
    # Normalize features
    scaler = StandardScaler()
    original_shape = sequences.shape
    sequences_reshaped = sequences.reshape(-1, sequences.shape[-1])
    sequences_normalized = scaler.fit_transform(sequences_reshaped)
    sequences = sequences_normalized.reshape(original_shape)
    
    # Encode labels
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)
    labels_onehot = to_categorical(labels_encoded)
    
    print(f"Number of classes: {len(label_encoder.classes_)}")
    print(f"Classes: {label_encoder.classes_}")
    
    return sequences, labels_onehot, label_encoder, scaler


In [None]:
# Load and preprocess the data
# Use uploaded file for Colab
csv_path = csv_filename

# Load data
df = load_sensor_data(csv_path)

# Create sequences (128 timesteps for Transformer)
sequences, labels = create_sequences(df, sequence_length=128, overlap=0.5)

# Preprocess data
X, y, label_encoder, scaler = preprocess_data(sequences, labels)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=np.argmax(y, axis=1)
)

print(f"\nTraining set: {X_train.shape}")
print(f"Test set: {X_test.shape}")
print(f"Number of features: {X_train.shape[2]}")
print(f"Number of classes: {y_train.shape[1]}")


In [None]:
# Data Augmentation for Sensor Data

def augment_sensor_sequence(sequence, noise_factor=0.1, time_shift=0.1):
    """Apply data augmentation to sensor sequences"""
    augmented = sequence.copy()
    
    # Add Gaussian noise
    if np.random.random() < 0.5:
        noise = np.random.normal(0, noise_factor, sequence.shape)
        augmented = augmented + noise
    
    # Time shifting (circular shift)
    if np.random.random() < 0.3:
        shift = int(sequence.shape[0] * time_shift * np.random.uniform(-1, 1))
        augmented = np.roll(augmented, shift, axis=0)
    
    # Scaling
    if np.random.random() < 0.3:
        scale_factor = np.random.uniform(0.9, 1.1)
        augmented = augmented * scale_factor
    
    return augmented

def create_augmented_data(X_train, y_train, augmentation_factor=2):
    """Create augmented training data"""
    print(f"Creating augmented data with factor {augmentation_factor}...")
    
    X_augmented = []
    y_augmented = []
    
    for i in range(len(X_train)):
        # Original data
        X_augmented.append(X_train[i])
        y_augmented.append(y_train[i])
        
        # Augmented data
        for _ in range(augmentation_factor):
            aug_seq = augment_sensor_sequence(X_train[i])
            X_augmented.append(aug_seq)
            y_augmented.append(y_train[i])
    
    return np.array(X_augmented), np.array(y_augmented)

# Create augmented training data
X_train_aug, y_train_aug = create_augmented_data(X_train, y_train, augmentation_factor=1)

print(f"Original training data: {X_train.shape}")
print(f"Augmented training data: {X_train_aug.shape}")


In [None]:
# Transformer Encoder Implementation for HAR

class PositionalEncoding(Layer):
    """Positional encoding for time-series data"""
    
    def __init__(self, max_len=128, d_model=64, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.max_len = max_len
        self.d_model = d_model
        
        # Create positional encoding matrix
        pos_encoding = np.zeros((max_len, d_model))
        position = np.arange(0, max_len, dtype=np.float32)[:, np.newaxis]
        
        div_term = np.exp(np.arange(0, d_model, 2, dtype=np.float32) * 
                         -(np.log(10000.0) / d_model))
        
        pos_encoding[:, 0::2] = np.sin(position * div_term)
        pos_encoding[:, 1::2] = np.cos(position * div_term)
        
        # Store as numpy array, will be converted to match input dtype in call()
        self.pos_encoding_np = pos_encoding[np.newaxis, :, :]
    
    def call(self, inputs):
        seq_len = tf.shape(inputs)[1]
        # Convert positional encoding to match input dtype
        pos_encoding = tf.cast(self.pos_encoding_np, dtype=inputs.dtype)
        return inputs + pos_encoding[:, :seq_len, :]
    
    def get_config(self):
        config = super(PositionalEncoding, self).get_config()
        config.update({
            'max_len': self.max_len,
            'd_model': self.d_model
        })
        return config

class TransformerEncoderBlock(Layer):
    """Single Transformer Encoder Block"""
    
    def __init__(self, d_model=64, num_heads=8, dff=128, dropout_rate=0.1, **kwargs):
        super(TransformerEncoderBlock, self).__init__(**kwargs)
        self.d_model = d_model
        self.num_heads = num_heads
        self.dff = dff
        self.dropout_rate = dropout_rate
        
        # Multi-head self-attention
        self.mha = MultiHeadAttention(
            num_heads=num_heads,
            key_dim=d_model,
            dropout=dropout_rate
        )
        
        # Feed-forward network
        self.ffn = Sequential([
            Dense(dff, activation='relu'),
            Dense(d_model)
        ])
        
        # Layer normalization
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        
        # Dropout
        self.dropout1 = Dropout(dropout_rate)
        self.dropout2 = Dropout(dropout_rate)
    
    def call(self, inputs, training=None):
        # Multi-head self-attention
        attn_output = self.mha(inputs, inputs, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        
        # Feed-forward network
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        
        return out2
    
    def get_config(self):
        config = super(TransformerEncoderBlock, self).get_config()
        config.update({
            'd_model': self.d_model,
            'num_heads': self.num_heads,
            'dff': self.dff,
            'dropout_rate': self.dropout_rate
        })
        return config

def create_transformer_har_model(input_shape, num_classes, d_model=64, num_heads=8, num_layers=2, dff=128, dropout_rate=0.1):
    """Create Transformer Encoder model for HAR"""
    
    inputs = Input(shape=input_shape)
    
    # Input projection to d_model dimensions
    x = Dense(d_model)(inputs)
    
    # Add positional encoding
    x = PositionalEncoding(max_len=input_shape[0], d_model=d_model)(x)
    
    # Stack of Transformer encoder blocks
    for _ in range(num_layers):
        x = TransformerEncoderBlock(
            d_model=d_model,
            num_heads=num_heads,
            dff=dff,
            dropout_rate=dropout_rate
        )(x)
    
    # Global average pooling
    x = GlobalAveragePooling1D()(x)
    
    # Classification head
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)
    outputs = Dense(num_classes, activation='softmax', dtype='float32')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

def create_lightweight_transformer(input_shape, num_classes):
    """Create lightweight Transformer for faster training"""
    return create_transformer_har_model(
        input_shape=input_shape,
        num_classes=num_classes,
        d_model=32,
        num_heads=4,
        num_layers=2,
        dff=64,
        dropout_rate=0.1
    )

def create_standard_transformer(input_shape, num_classes):
    """Create standard Transformer model"""
    return create_transformer_har_model(
        input_shape=input_shape,
        num_classes=num_classes,
        d_model=64,
        num_heads=8,
        num_layers=2,
        dff=128,
        dropout_rate=0.1
    )

def create_deep_transformer(input_shape, num_classes):
    """Create deep Transformer with more layers"""
    return create_transformer_har_model(
        input_shape=input_shape,
        num_classes=num_classes,
        d_model=64,
        num_heads=8,
        num_layers=3,
        dff=128,
        dropout_rate=0.1
    )

def create_wide_transformer(input_shape, num_classes):
    """Create wide Transformer with more attention heads"""
    return create_transformer_har_model(
        input_shape=input_shape,
        num_classes=num_classes,
        d_model=128,
        num_heads=16,
        num_layers=2,
        dff=256,
        dropout_rate=0.1
    )

# Model selection
TRANSFORMER_MODEL_CHOICES = {
    1: ("Lightweight Transformer", create_lightweight_transformer),
    2: ("Standard Transformer", create_standard_transformer),
    3: ("Deep Transformer", create_deep_transformer),
    4: ("Wide Transformer", create_wide_transformer)
}

print("Available Transformer models:")
for key, (name, _) in TRANSFORMER_MODEL_CHOICES.items():
    print(f"{key}. {name}")

# Get input shape and number of classes
input_shape = (X_train.shape[1], X_train.shape[2])
num_classes = y_train.shape[1]

print(f"\nInput shape: {input_shape}")
print(f"Number of classes: {num_classes}")


In [None]:
# Build and compile the Transformer model
model_name, model_func = TRANSFORMER_MODEL_CHOICES[2]  # Using Standard Transformer
print(f"Building {model_name}...")

model = model_func(input_shape, num_classes)

# Custom loss function with label smoothing
def smooth_categorical_crossentropy(y_true, y_pred, alpha=0.1):
    """Label smoothing for better generalization"""
    num_classes = tf.cast(tf.shape(y_true)[-1], tf.float32)
    y_true_smooth = y_true * (1.0 - alpha) + alpha / num_classes
    return tf.keras.losses.categorical_crossentropy(y_true_smooth, y_pred)

# Optimizer
optimizer = AdamW(
    learning_rate=0.001,
    weight_decay=1e-4,
    beta_1=0.9,
    beta_2=0.999
)

# Metrics
top_3_accuracy = TopKCategoricalAccuracy(k=3, name='top_3_accuracy')
top_5_accuracy = TopKCategoricalAccuracy(k=5, name='top_5_accuracy')

# Compile the model
model.compile(
    optimizer=optimizer,
    loss=smooth_categorical_crossentropy,
    metrics=['accuracy', top_3_accuracy, top_5_accuracy]
)

total_params = model.count_params()
print(f"Total parameters: {total_params:,}")
model.summary()


In [None]:
# Training Configuration

# Callbacks
callbacks = [
    EarlyStopping(
        monitor='val_accuracy',
        patience=20,
        restore_best_weights=True,
        verbose=1,
        min_delta=0.001
    ),
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-7,
        verbose=1,
        cooldown=3
    ),
    ModelCheckpoint(
        'best_transformer_har_model.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1,
        save_weights_only=False
    )
]

# Data generator for training
class SensorDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, X, y, batch_size=32, shuffle=True, augment=True):
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.augment = augment
        self.indices = np.arange(len(X))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        X_batch = self.X[indices].copy()
        y_batch = self.y[indices]

        if self.augment:
            for i in range(len(X_batch)):
                if np.random.random() < 0.3:  # 30% chance of augmentation
                    X_batch[i] = augment_sensor_sequence(X_batch[i])

        return X_batch, y_batch

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

# Create data generators
train_gen = SensorDataGenerator(X_train_aug, y_train_aug, batch_size=32, augment=True)
val_gen = SensorDataGenerator(X_test, y_test, batch_size=32, augment=False, shuffle=False)

print(f"Training batches: {len(train_gen)}")
print(f"Validation batches: {len(val_gen)}")


In [None]:
# Train the Transformer model
print(f"Starting training with {model_name}...")
print(f"Total parameters: {total_params:,}")

history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=50,
    callbacks=callbacks,
    verbose=1
)

# Evaluate the model
test_results = model.evaluate(val_gen, verbose=0)
test_loss = test_results[0]
test_acc = test_results[1]
test_top3 = test_results[2] if len(test_results) > 2 else 0
test_top5 = test_results[3] if len(test_results) > 3 else 0

print(f"\n=== TRANSFORMER RESULTS ===")
print(f"Model: {model_name}")
print(f"Parameters: {total_params:,}")
print(f"Test Accuracy: {test_acc*100:.2f}%")
print(f"Top-3 Accuracy: {test_top3*100:.2f}%")
print(f"Top-5 Accuracy: {test_top5*100:.2f}%")


In [None]:
# Visualization and Analysis

# Training history plots
plt.figure(figsize=(15, 10))

# Accuracy
plt.subplot(2, 3, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy', linewidth=2)
plt.plot(history.history['val_accuracy'], label='Val Accuracy', linewidth=2)
plt.title(f'{model_name} - Accuracy\nFinal: {test_acc*100:.1f}%', fontweight='bold')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Loss
plt.subplot(2, 3, 2)
plt.plot(history.history['loss'], label='Train Loss', linewidth=2)
plt.plot(history.history['val_loss'], label='Val Loss', linewidth=2)
plt.title('Training Loss', fontweight='bold')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Top-3 Accuracy
plt.subplot(2, 3, 3)
if 'top_3_accuracy' in history.history:
    plt.plot(history.history['top_3_accuracy'], label='Train Top-3', linewidth=2)
    plt.plot(history.history['val_top_3_accuracy'], label='Val Top-3', linewidth=2)
    plt.title(f'Top-3 Accuracy\nFinal: {test_top3*100:.1f}%', fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('Top-3 Accuracy')
    plt.legend()
    plt.grid(True, alpha=0.3)

# Learning rate
plt.subplot(2, 3, 4)
if 'lr' in history.history:
    plt.plot(history.history['lr'], linewidth=2, color='red')
    plt.title('Learning Rate Schedule', fontweight='bold')
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.yscale('log')
    plt.grid(True, alpha=0.3)

# Model summary
plt.subplot(2, 3, 5)
plt.text(0.1, 0.8, f'Model: {model_name}', fontsize=12, fontweight='bold')
plt.text(0.1, 0.7, f'Parameters: {total_params:,}', fontsize=10)
plt.text(0.1, 0.6, f'Final Accuracy: {test_acc*100:.2f}%', fontsize=10)
plt.text(0.1, 0.5, f'Top-3 Accuracy: {test_top3*100:.2f}%', fontsize=10)
plt.text(0.1, 0.4, f'Input Shape: {input_shape}', fontsize=9)
plt.text(0.1, 0.3, f'Classes: {num_classes}', fontsize=9)
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.axis('off')
plt.title('Model Summary', fontweight='bold')

# Confusion matrix
plt.subplot(2, 3, 6)
y_pred = model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
cm = confusion_matrix(y_true_classes, y_pred_classes)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=label_encoder.classes_, 
            yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix', fontweight='bold')
plt.xlabel('Predicted')
plt.ylabel('Actual')

plt.tight_layout()
plt.show()


In [None]:
# Detailed Classification Report
print("\n=== TRANSFORMER CLASSIFICATION REPORT ===")
print(classification_report(y_true_classes, y_pred_classes, 
                          target_names=label_encoder.classes_))

# Per-class metrics
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_true_classes, y_pred_classes, average=None)
recall = recall_score(y_true_classes, y_pred_classes, average=None)
f1 = f1_score(y_true_classes, y_pred_classes, average=None)

metrics_df = pd.DataFrame({
    'Class': label_encoder.classes_,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1
})

print("\n=== PER-CLASS METRICS ===")
print(metrics_df.round(3))

# Save metrics
metrics_df.to_csv('transformer_har_metrics.csv', index=False)
print("\nTransformer metrics saved to 'transformer_har_metrics.csv'")


In [None]:
# Model Testing and Prediction

# Test on a few samples
print("\n=== TRANSFORMER SAMPLE PREDICTIONS ===")
for i in range(5):
    sample_idx = np.random.randint(0, len(X_test))
    sample = X_test[sample_idx:sample_idx+1]
    
    prediction = model.predict(sample, verbose=0)
    predicted_class = np.argmax(prediction)
    confidence = np.max(prediction)
    actual_class = np.argmax(y_test[sample_idx])
    
    print(f"Sample {i+1}:")
    print(f"  Predicted: {label_encoder.classes_[predicted_class]} ({confidence*100:.1f}%)")
    print(f"  Actual: {label_encoder.classes_[actual_class]}")
    print(f"  Correct: {'✓' if predicted_class == actual_class else '✗'}")
    print()

# Save the model
model.save('transformer_har_model_final.keras')
print("\nTransformer model saved as 'transformer_har_model_final.keras'")

# Save preprocessing objects
import joblib
joblib.dump(scaler, 'transformer_sensor_scaler.pkl')
joblib.dump(label_encoder, 'transformer_sensor_label_encoder.pkl')
print("Transformer preprocessing objects saved")

print("\n=== TRANSFORMER TRAINING COMPLETE ===")
print(f"Final Test Accuracy: {test_acc*100:.2f}%")
print(f"Model Parameters: {total_params:,}")
print(f"Classes: {', '.join(label_encoder.classes_)}")


In [None]:
# Comprehensive Results Collection for Journal Publication

print("=== COLLECTING COMPREHENSIVE RESULTS ===")

# Get predictions for comprehensive evaluation
y_pred_proba = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_pred_proba, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate comprehensive metrics
from sklearn.metrics import precision_score, recall_score, f1_score

accuracy = np.mean(y_pred == y_true)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

# ROC AUC (multi-class)
try:
    auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr', average='macro')
except:
    auc = 0.0

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Classification report
class_report = classification_report(y_true, y_pred, target_names=label_encoder.classes_, output_dict=True)

# Store comprehensive results
results = {
    'model_name': 'Transformer Encoder',
    'accuracy': float(accuracy),
    'precision': float(precision),
    'recall': float(recall),
    'f1_score': float(f1),
    'auc': float(auc),
    'confusion_matrix': cm.tolist(),
    'classification_report': class_report,
    'predictions': y_pred.tolist(),
    'true_labels': y_true.tolist(),
    'prediction_probabilities': y_pred_proba.tolist(),
    'training_time': training_time,
    'total_parameters': int(total_params),
    'sequence_length': 128,
    'classes': label_encoder.classes_.tolist()
}

# Save results as JSON
with open('transformer_results.json', 'w') as f:
    json.dump(results, f, indent=2)

# Save detailed classification report
class_report_df = pd.DataFrame(class_report).T
class_report_df.to_csv('transformer_classification_report.csv')

# Save the model
model.save('transformer_model_final.keras')

# Save preprocessing objects
joblib.dump(scaler, 'transformer_sensor_scaler.pkl')
joblib.dump(label_encoder, 'transformer_sensor_label_encoder.pkl')

print("=== RESULTS SUMMARY ===")
print(f"Model: Transformer Encoder")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision*100:.2f}%")
print(f"Recall: {recall*100:.2f}%")
print(f"F1-Score: {f1*100:.2f}%")
print(f"AUC: {auc*100:.2f}%")
print(f"Training Time: {training_time:.2f} seconds")
print(f"Parameters: {total_params:,}")

print("\n=== FILES SAVED ===")
print("- transformer_model_final.keras")
print("- transformer_results.json")
print("- transformer_classification_report.csv")
print("- transformer_sensor_scaler.pkl")
print("- transformer_sensor_label_encoder.pkl")
print("- transformer_results.png (from visualization cell)")

print("\n=== DOWNLOAD FILES ===")
print("Run the next cell to download all results files")


In [None]:
# Download Results Files for Google Colab
print("Downloading all result files...")

# Create a zip file with all results
import zipfile
import os

# List of files to include in the download
files_to_download = [
    'transformer_model_final.keras',
    'transformer_results.json', 
    'transformer_classification_report.csv',
    'transformer_sensor_scaler.pkl',
    'transformer_sensor_label_encoder.pkl'
]

# Add PNG file if it exists
if os.path.exists('transformer_results.png'):
    files_to_download.append('transformer_results.png')

# Create zip file
with zipfile.ZipFile('transformer_results.zip', 'w') as zipf:
    for file in files_to_download:
        if os.path.exists(file):
            zipf.write(file)
            print(f"Added {file} to download")

# Download the zip file
files.download('transformer_results.zip')

print("\n=== TRANSFORMER MODEL EXECUTION COMPLETE ===")
print("All results have been saved and downloaded!")
print("You can now use these files for your journal publication.")
