<a href="https://colab.research.google.com/github/vbonato/cnnTestBench/blob/main/pauNaJaca.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive





---


---



---
**Dataset WISDM**



---



---



---





In [None]:
# Create time windows

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader
import re

# Function to create time windows
def create_time_windows(data, labels, window_size):
    X = []
    y = []

    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])  # Select window of data
        y.append(labels[i + (window_size-1)])  # Label is from the last element of the window

    return np.array(X), np.array(y)

# Updated data loading function
def load_data(file_path):
    features = []
    labels = []

    with open(file_path, 'r') as f:
        for line in f:
            # Use regular expression to match the pattern {{feature_vector}, label}
            match = re.match(r"\{\{([0-9.,-]+)\},\s*(\d+)\}", line.strip())

            if match:
                # Extract feature vector and label
                feature_str = match.group(1)  # The feature string "8.24,-2.11,3.87"
                label = int(match.group(2))  # The label "4"

                # Convert the feature string to a list of floats
                feature_vector = list(map(float, feature_str.split(',')))

                features.append(feature_vector)
                labels.append(label)

    return np.array(features), np.array(labels)

# Load train and test data
train_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/WISDM/HAR-Dataset/train.dat'  # Adjust path to your file
test_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/WISDM/HAR-Dataset/test.dat'  # Adjust path to your file

# Define the window size
window_size = 100

# Create time windows
X_train, y_train = load_data(train_data_file)
X_train, y_train = create_time_windows(X_train, y_train, window_size)

X_test, y_test = load_data(test_data_file)
X_test, y_test = create_time_windows(X_test, y_test, window_size)

# Print the first feature vector and label
print("First feature vector in X_train:")
print(X_train[0])  # First row (first feature vector)
print("First label in y_train:")
print(y_train[0])  # First label

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)  # For classification (long type for labels)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# Print the first feature vector and label
print("First feature vector in X_train:")
print(X_train[0])  # First row (first feature vector)
print("First label in y_train:")
print(y_train[0])  # First label

# Check the shapes of the loaded data
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

In [26]:
# Define a model and data normalization according to the train data set

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # Assume 'train_data_file' is your original training data
        X_train_temp, y_train_temp = load_data(train_data_file)
        scaler = StandardScaler()
        scaler.fit(X_train_temp)

        # Just to print
        original_means = scaler.mean_
        original_stds = scaler.scale_
        print(f"MEANS = {original_means.tolist()}")
        print(f"STDS = {original_stds.tolist()}")

        # Convert the numpy arrays to torch tensors
        means_tensor = torch.tensor(scaler.mean_, dtype=torch.float32)
        stds_tensor = torch.tensor(scaler.scale_, dtype=torch.float32)

        # Register them as non-trainable buffers
        # They are not considered model parameters to be trained.
        self.register_buffer('means', means_tensor)
        self.register_buffer('stds', stds_tensor)
        # ------------------------------------

        # CNN layers
        self.conv1 = nn.Conv1d(in_channels=3, out_channels=16, kernel_size=3, padding=1)  # input channels = number of features
        #self.conv2 = nn.Conv1d(in_channels=16, out_channels=16, kernel_size=3, padding=1)  # input channels = number of features

        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)

        # Fully connected layers
        # Calculate the input size for the fully connected layer based on the output size of conv1
        self.fc1 = nn.Linear(16 * (X_train.shape[1] // 2), 64)  # Flattened size after pooling
        self.fc2 = nn.Linear(64, 6)  # Assuming 6 classes for classification

    def forward(self, x):
        x = (x - self.means) / self.stds

        x = x.permute(0, 2, 1)  # Add channel dimension (change shape to [batch_size, channels, seq_length])

        #x = torch.relu(self.conv1(x))  # Apply conv1 and pooling
        #x = self.pool(torch.relu(self.conv2(x)))  # Apply conv2 and pooling
        x = self.pool(torch.relu(self.conv1(x)))  # Apply conv1 and pooling

        x = x.view(-1, 16 * (x.shape[2]))  # Flatten for fully connected layer
        x = torch.relu(self.fc1(x))  # Apply first fully connected layer
        x = self.fc2(x)  # Output layer (no activation since we'll apply softmax in loss)
        return x


# Initialize the model
model = SimpleCNN()

# Loss function (cross-entropy for classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# DataLoader for batching
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

MEANS = [1.0182769835326968, 7.7139884937195875, 0.3890242449048468]
STDS = [6.662064228097328, 6.77728797625153, 4.938553052818819]


In [27]:
# Training loop
num_epochs = 1  # You can adjust the number of epochs

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass

        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        # Track loss and accuracy
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct_preds / total_preds
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

Epoch 1/1, Loss: 0.1807, Accuracy: 0.9392


In [28]:
# Evaluate the model
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    correct_preds = (predicted == y_test).sum().item()
    accuracy = correct_preds / len(y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.7893


In [29]:
# Save the trained model
torch.save(model.state_dict(), 'simple_cnn_model.pth')

# Load the model (if needed)
model = SimpleCNN()
model.load_state_dict(torch.load('simple_cnn_model.pth'))
model.eval()

MEANS = [1.0182769835326968, 7.7139884937195875, 0.3890242449048468]
STDS = [6.662064228097328, 6.77728797625153, 4.938553052818819]


SimpleCNN(
  (conv1): Conv1d(3, 16, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=800, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=6, bias=True)
)

In [None]:
# --- Apply pruning and export whole model ---

import torch
import torch.nn.utils.prune as prune
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# --- Assume your SimpleCNN and data (X_train, y_train, X_test, y_test, window_size) are already defined --- #

# Initialize the model and load trained weights
model = SimpleCNN()
model.load_state_dict(torch.load('simple_cnn_model.pth'))
model.eval()  # Set to evaluation mode

# --- Optional: Evaluate original model accuracy ---
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f"Original Test Accuracy: {accuracy:.4f}")

# --- Apply pruning ---
prune_amount = 0.3  # prune 30% of the weights

# Layer-wise pruning
prune.l1_unstructured(model.conv1, name='weight', amount=prune_amount)
prune.l1_unstructured(model.fc1, name='weight', amount=prune_amount)
prune.l1_unstructured(model.fc2, name='weight', amount=prune_amount)

# --- Optional: Evaluate pruned model (with mask) ---
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f"Test Accuracy after pruning (with mask): {accuracy:.4f}")

# --- Make pruning permanent (remove mask) ---
prune.remove(model.conv1, 'weight')
prune.remove(model.fc1, 'weight')
prune.remove(model.fc2, 'weight')

# --- Evaluate pruned model (permanent pruning) ---
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f"Test Accuracy after permanent pruning: {accuracy:.4f}")

# --- Save pruned model weights ---
torch.save(model.state_dict(), 'simple_cnn_pruned.pth')
print("✅ Pruned model saved as simple_cnn_pruned.pth")

# --- Export the pruned model to ONNX for Netron ---
dummy_input = torch.randn(1, window_size, 3)  # batch_size, seq_len, features
onnx_file = "simple_cnn_pruned.onnx"

torch.onnx.export(
    model,                # pruned model
    dummy_input,          # dummy input
    onnx_file,            # output file
    export_params=True,   # store trained weights
    opset_version=12,
    input_names=['input'],
    output_names=['output']
)

print(f"✅ Pruned model exported to ONNX: {onnx_file}")


In [None]:
pip install onnx onnxruntime

In [31]:

# --- Exports the model structure and weights into a single .onnx file ---

import torch

# Load the saved weights from your training session
model.load_state_dict(torch.load('simple_cnn_model.pth'))
model.eval() # Set the model to evaluation mode

# --- Export to ONNX ---
# Create a dummy input tensor with the correct shape: [batch_size, seq_length, features]
# This shape must match your model's input exactly.
dummy_input = torch.randn(1, window_size, 3)

# Define the output file name
onnx_file = "simple_cnn_wisdm.onnx"

print(f"Exporting model to {onnx_file}...")

# Export the model
torch.onnx.export(model,               # The model to export
                  dummy_input,         # A sample input
                  onnx_file,      # Where to save the model
                  export_params=True,  # Store the trained weights
                  opset_version=12,    # The ONNX version to use
                  input_names=['input'], # The name for the input tensor
                  output_names=['output']) # The name for the output tensor

print("Model has been successfully converted to ONNX format! ✅")

Exporting model to simple_cnn_wisdm.onnx...
Model has been successfully converted to ONNX format! ✅


  torch.onnx.export(model,               # The model to export


In [None]:
# --- Convert from onnx to tf --- #

!pip install onnx2tf
!pip install onnx_graphsurgeon
!pip install ai-edge-litert
!pip install sng4onnx


!onnx2tf -i simple_cnn_wisdm.onnx -o my_tf_model
#!onnx2tf -i simple_cnn_pruned.onnx -o my_tf_model



In [40]:
# --- Quantize the tf model using TFLM and export as .h --- #
# --- TFLM (TensorFlow Lite for Microcontrollers) --- #

import tensorflow as tf
import numpy as np # Make sure numpy is imported

# This function is your calibration step
def representative_data_gen():
    """
    Feeds sample data from X_train to the converter.
    """
    # Convert X_train tensor to a NumPy array if it isn't already
    # We only need a subset, e.g., the first 100 samples
    x_train_np = X_train.numpy() if isinstance(X_train, torch.Tensor) else X_train
    num_calibration_samples = min(100, x_train_np.shape[0]) # Use up to 100 samples

    print(f"Providing {num_calibration_samples} samples from X_train for calibration...")

    for i in range(num_calibration_samples):
        # 1. Get one window from X_train. Shape: [window_size, features] (e.g., [100, 3])
        sample = x_train_np[i]

        # 2. Transpose to match model input: [features, window_size] (e.g., [3, 100])
        #    This is CRUCIAL because Conv1D expects channels (features) first.
        sample_transposed = np.transpose(sample, (1, 0))

        # 3. Add the batch dimension: [1, features, window_size] (e.g., [1, 3, 100])
        sample_batch = np.expand_dims(sample_transposed, axis=0).astype(np.float32)

        #print(sample_batch)

        # 4. Yield the data in the required list format
        yield [sample_batch]




converter = tf.lite.TFLiteConverter.from_saved_model('my_tf_model')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8

tflite_model_quant = converter.convert()

with open('simple_cnn_quantized.tflite', 'wb') as f:
#with open('simple_cnn_prunedANDquantized.tflite', 'wb') as f:
    f.write(tflite_model_quant)

print("✅ Success! Your quantized model has been saved as simple_cnn_xxx.tflite")


!xxd -i model_for_microbit.tflite > model_data.h

✅ Success! Your quantized model has been saved as simple_cnn_xxx.tflite
xxd: model_for_microbit.tflite: No such file or directory


In [41]:
# --- Test int8 TFLite model properly --- #
# --- Test for pruned, quantized or prunedANDquantized ---#

import tensorflow as tf
import numpy as np

# Load the TFLite model
tflite_model_path = 'simple_cnn_quantized.tflite'
#tflite_model_path = 'simple_cnn_prunedANDquantized.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("Input details:", input_details)
print("Output details:", output_details)

# Prepare test data as NumPy arrays
X_test_np = X_test.numpy() if isinstance(X_test, torch.Tensor) else X_test
y_test_np = y_test.numpy() if isinstance(y_test, torch.Tensor) else y_test

# Get input quantization parameters
input_scale, input_zero_point = input_details[0]['quantization']
output_scale, output_zero_point = output_details[0]['quantization']

# Accuracy tracking
correct_preds = 0

for i in range(len(X_test_np)):
    # 1. Take one sample
    sample = X_test_np[i]  # shape: [window_size, features]

    # 2. Transpose to [features, window_size]
    sample_transposed = np.transpose(sample, (1, 0))

    # 3. Add batch dimension
    sample_batch = np.expand_dims(sample_transposed, axis=0).astype(np.float32)

    # 4. Quantize input
    input_data = np.round(sample_batch / input_scale + input_zero_point).astype(np.int8)

    # 5. Set input tensor
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # 6. Invoke the interpreter
    interpreter.invoke()

    # 7. Get output and dequantize
    output_data = interpreter.get_tensor(output_details[0]['index'])
    output_data = output_scale * (output_data.astype(np.float32) - output_zero_point)

    # 8. Predicted class
    predicted_class = np.argmax(output_data)

    # 9. Compare with true label
    if predicted_class == y_test_np[i]:
        correct_preds += 1

# Calculate accuracy
accuracy = correct_preds / len(X_test_np)
print(f"Test Accuracy of TFLite int8 model: {accuracy:.4f}")


Input details: [{'name': 'serving_default_input:0', 'index': 0, 'shape': array([  1,   3, 100], dtype=int32), 'shape_signature': array([  1,   3, 100], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
Output details: [{'name': 'PartitionedCall:0', 'index': 23, 'shape': array([1, 6], dtype=int32), 'shape_signature': array([1, 6], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


  input_data = np.round(sample_batch / input_scale + input_zero_point).astype(np.int8)
  input_data = np.round(sample_batch / input_scale + input_zero_point).astype(np.int8)


ValueError: Cannot set tensor: Got value of type INT8 but expected type FLOAT32 for input 0, name: serving_default_input:0 





---


---



---
**Dataset PAMAP2**



---



---

In [None]:
#remove columns heartrate and temp of the three IMUs

import pandas as pd

# Define the file path
train_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/pamap2.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(train_data_file)

# Remove columns
df = df.drop(df.columns[2], axis=1)
df = df.drop(df.columns[2], axis=1)
df = df.drop(df.columns[11], axis=1)
df = df.drop(df.columns[20], axis=1)


# Save the updated DataFrame to a new CSV file
output_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/pamap2_columns_removed.csv'
df.to_csv(output_file, index=False)

# Print a message to confirm that the file was saved
print(f"Updated CSV saved to: {output_file}")


In [None]:
# Replace the activity names in the first column with their corresponding IDs

import pandas as pd

# Define the file path for the modified CSV
modified_train_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/pamap2_columns_removed.csv'

# Load the modified CSV into a DataFrame without assuming a header
df = pd.read_csv(modified_train_data_file, header=None)

# Print the first few rows to check the content
print("Original DataFrame first few rows:")
print(df.head())

# Create a dictionary to map activity names to activity IDs
activity_map = {
    'lying': 1,
    'sitting': 2,
    'standing': 3,
    'walking': 4,
    'running': 5,
    'cycling': 6,
    'nordic_walking': 7,
    'watching_TV': 9,
    'computer_work': 10,
    'car_driving': 11,
    'ascending_stairs': 12,
    'descending_stairs': 13,
    'vacuum_cleaning': 16,
    'ironing': 17,
    'folding_laundry': 18,
    'house_cleaning': 19,
    'playing_soccer': 20,
    'rope_jumping': 24,
    'other': 0
}

# Replace the activity names in the first column with their corresponding IDs
df.iloc[:, 0] = df.iloc[:, 0].map(activity_map)

# Print the first few rows after the transformation to ensure it's working
print("\nDataFrame after replacing activity names with IDs:")
print(df.head())

# Save the updated DataFrame to a new CSV file
output_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/pamap2_with_activityIDs.csv'
df.to_csv(output_file, index=False, header=False)

# Print a confirmation message
print(f"Updated CSV with activity IDs saved to: {output_file}")


In [None]:
#Split CSV in train (excluding user 5) and test (only user 5) csv files

import pandas as pd

# Define the file path for the CSV file containing activity IDs
train_data_file_with_ids = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/pamap2_with_activityIDs.csv'

# Load the CSV file into a DataFrame without assuming a header
df = pd.read_csv(train_data_file_with_ids, header=None)

# Print the first few rows to check the content
print("Original DataFrame first few rows:")
print(df.head())

# Filter rows where the user ID is 5 and save them as test_pamap2.csv
test_df = df[df.iloc[:, 1] == 5]
test_df.to_csv('/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/test_pamap2.csv', index=False, header=False)

# Filter rows where the user ID is not 5 (excluding) and save them as train_pamap2.csv
train_df = df[df.iloc[:, 1] != 5]
train_df.to_csv('/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/train_pamap2.csv', index=False, header=False)

# Print a confirmation message
print("Data has been split into train_pamap2.csv (excluding user ID 5) and test_pamap2.csv (user ID 5 only).")


In [None]:
#Convert .csv to .dat

import pandas as pd

# Define file paths
train_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/train_pamap2.csv'
test_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/test_pamap2.csv'

# Function to convert rows into the required format and save as .dat
def convert_to_dat(input_file, output_file):
    # Load the CSV file without a header
    df = pd.read_csv(input_file, header=None)

    # Open the output .dat file to write
    with open(output_file, 'w') as file:
        for _, row in df.iterrows():
            # Convert row into the required format: {{column 2, column 3, column 4, ...}, column 0}
            row_data = "{{" + ",".join(map(str, row[2:])) + "}," + str(int(row[0])) + "}\n"
            file.write(row_data)

# Convert train_pamap2.csv to train_pamap2.dat
convert_to_dat(train_file, '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/train_pamap2.dat')

# Convert test_pamap2.csv to test_pamap2.dat
convert_to_dat(test_file, '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/test_pamap2.dat')

print("Conversion to .dat files is complete!")


Conversion to .dat files is complete!


In [None]:
# Create time windows

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
import re

# Function to create time windows
def create_time_windows(data, labels, window_size):
    X = []
    y = []

    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])  # Select window of data
        y.append(labels[i + (window_size-1)])  # Label is from the last element of the window

    return np.array(X), np.array(y)

# Updated data loading function
def load_data(file_path):
    features = []
    labels = []

    with open(file_path, 'r') as f:
        for line in f:
            # Use regular expression to match the pattern {{feature_vector}, label}
            match = re.match(r"\{\{([0-9.,-]+)\},\s*(\d+)\}", line.strip())

            if match:
                # Extract feature vector and label
                feature_str = match.group(1)  # The feature string "8.24,-2.11,3.87"
                label = int(match.group(2))  # The label "4"

                # Convert the feature string to a list of floats
                feature_vector = list(map(float, feature_str.split(',')))

                features.append(feature_vector)
                labels.append(label)

    return np.array(features), np.array(labels)

# Load train and test data
train_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/train_pamap2.dat'  # Adjust path to your file
test_data_file = '/content/drive/MyDrive/MyBox/prof/projetos de pesquisa/pauNaJaca/dataset/PAMAP2/test_pamap2.dat'  # Adjust path to your file

print("File loaded:")

# Define the window size
window_size = 10

# Create time windows
X_train, y_train = load_data(train_data_file)
X_train, y_train = create_time_windows(X_train, y_train, window_size)

X_test, y_test = load_data(test_data_file)
X_test, y_test = create_time_windows(X_test, y_test, window_size)

print("Time window created:")

# Print the first feature vector and label
print("First feature vector in X_train:")
print(X_train[0])  # First row (first feature vector)
print("Second feature vector in X_train:")
print(X_train[1])  # First row (first feature vector)

print("First label in y_train:")
print(y_train[0])  # First label
print("Second label in y_train:")
print(y_train[1])  # First label

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)  # For classification (long type for labels)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

# # Print the first feature vector and label
# print("First feature vector in X_train:")
# print(X_train[0])  # First row (first feature vector)
# print("First label in y_train:")
# print(y_train[0])  # First label

# Check the shapes of the loaded data
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

In [None]:
# Define a model

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        # CNN layers
        self.conv1 = nn.Conv1d(in_channels=27, out_channels=27, kernel_size=3, padding=1)  # input channels = number of features
        self.conv2 = nn.Conv1d(in_channels=27, out_channels=16, kernel_size=3, padding=1)  # input channels = number of features

        self.pool = nn.MaxPool1d(kernel_size=2, stride=2)

        # Fully connected layers
        # Calculate the input size for the fully connected layer based on the output size of conv1
        self.fc1 = nn.Linear(16 * (X_train.shape[1] // 2), 64)  # Flattened size after pooling
        self.fc2 = nn.Linear(64, 25)  # Assuming 25 classes for classification

    def forward(self, x):
        x = x.permute(0, 2, 1)  # Add channel dimension (change shape to [batch_size, channels, seq_length])

        x = torch.relu(self.conv1(x))  # Apply conv1 and pooling
        x = self.pool(torch.relu(self.conv2(x)))  # Apply conv2 and pooling
        #x = self.pool(torch.relu(self.conv1(x)))  # Apply conv1 and pooling

        x = x.view(-1, 16 * (x.shape[2]))  # Flatten for fully connected layer
        x = torch.relu(self.fc1(x))  # Apply first fully connected layer
        x = self.fc2(x)  # Output layer (no activation since we'll apply softmax in loss)

        return x


# Initialize the model
model = SimpleCNN()

# Loss function (cross-entropy for classification)
criterion = nn.CrossEntropyLoss()

# Optimizer (Adam)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# DataLoader for batching
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
# Training loop

num_epochs = 3  # You can adjust the number of epochs

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct_preds = 0
    total_preds = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass

        # Calculate loss
        loss = criterion(outputs, labels)
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights

        # Track loss and accuracy
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        correct_preds += (predicted == labels).sum().item()
        total_preds += labels.size(0)

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct_preds / total_preds
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

Epoch 1/3, Loss: 0.2979, Accuracy: 0.9041
Epoch 2/3, Loss: 0.1936, Accuracy: 0.9378
Epoch 3/3, Loss: 0.1731, Accuracy: 0.9447


In [None]:
# Evaluate the model
model.eval()  # Set model to evaluation mode
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    correct_preds = (predicted == y_test).sum().item()
    accuracy = correct_preds / len(y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

Test Accuracy: 0.7133
