In [1]:
#funnel shaped hyperparameters
#evaluated on training set
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Paths
DATA_DIR = "/kaggle/input/movement2/movement"
LABEL_PATH = "/kaggle/input/patient-labels2/patient_labels.csv"

# Load functions
def load_bin_file(filepath):
    data = np.fromfile(filepath, dtype=np.float32)
    assert data.shape[0] == 128832
    return data.reshape(132, 976)

def load_all_files(data_dir):
    X = []
    filepaths = sorted([
        os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".bin")
    ])
    print(f"Found {len(filepaths)} files.")
    for path in tqdm(filepaths):
        X.append(load_bin_file(path))
    return np.stack(X), filepaths

def load_labels(label_path):
    df = pd.read_csv(label_path)
    return df['label'].values.astype(np.float32)

# Model
class OmniScale1DLight(nn.Module):
    def __init__(self, input_channels=132, neurons=64):
        super(OmniScale1DLight, self).__init__()
        self.features = nn.Sequential(
            nn.Conv1d(input_channels, 64, kernel_size=7, padding=3),
            nn.BatchNorm1d(64),
            nn.ReLU(),

            nn.Conv1d(64, 128, kernel_size=5, padding=2),
            nn.BatchNorm1d(128),
            nn.ReLU(),

            nn.Conv1d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),

            nn.AdaptiveAvgPool1d(1)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, neurons),
            nn.LayerNorm(neurons),
            nn.LeakyReLU(),
            nn.Dropout(0.3),
            nn.Linear(neurons, 1)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)

# Main function
def main():
    # Load data
    X, _ = load_all_files(DATA_DIR)
    y = load_labels(LABEL_PATH)

    # Normalize
    X = (X - X.mean(axis=(0, 2), keepdims=True)) / (X.std(axis=(0, 2), keepdims=True) + 1e-6)
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    # Class balancing
    num_positives = (y_tensor == 1).sum().item()
    num_negatives = (y_tensor == 0).sum().item()
    pos_weight = torch.tensor([num_negatives / num_positives]).to(DEVICE)

    # Train-val split
    X_train, X_val, y_train, y_val = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

    # Hyperparameters
    batch_size = 32
    neurons = 64
    lr = 0.001

    print(f"\nTraining with fixed hyperparameters: batch_size={batch_size}, neurons={neurons}, lr={lr}")

    # DataLoaders
    train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(TensorDataset(X_val, y_val), batch_size=batch_size)

    # Model, loss, optimizer
    model = OmniScale1DLight(neurons=neurons).to(DEVICE)
    criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Training loop
    best_acc = 0
    best_metrics = {}

    for epoch in range(50):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(DEVICE), y_batch.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluation on validation set
        model.eval()
        all_preds, all_labels = [], []
        total_loss = 0
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch = X_batch.to(DEVICE)
                y_batch = y_batch.to(DEVICE)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                total_loss += loss.item()
                probs = torch.sigmoid(outputs).cpu().numpy()
                preds = (probs > 0.5).astype(int)
                all_preds.extend(preds)
                all_labels.extend(y_batch.cpu().numpy())

        accuracy = accuracy_score(all_labels, all_preds)
        precision = precision_score(all_labels, all_preds)
        recall = recall_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds)
        val_loss = total_loss / len(val_loader)

        print(f"Epoch {epoch+1} - Accuracy: {accuracy:.4f}, Val Loss: {val_loss:.4f}")

        if accuracy > best_acc:
            best_acc = accuracy
            best_metrics = {
                "precision": precision,
                "recall": recall,
                "f1": f1
            }

    # Final best validation metrics
    print(f"\nBest Validation Accuracy: {best_acc:.4f}")
    print(f"Precision: {best_metrics['precision']:.4f}")
    print(f"Recall: {best_metrics['recall']:.4f}")
    print(f"F1 Score: {best_metrics['f1']:.4f}")

    # -----------------------------------------
    # Training set evaluation
    # -----------------------------------------
    model.eval()
    train_preds, train_labels = [], []
    with torch.no_grad():
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(DEVICE)
            y_batch = y_batch.to(DEVICE)
            outputs = model(X_batch)
            probs = torch.sigmoid(outputs).cpu().numpy()
            preds = (probs > 0.5).astype(int)
            train_preds.extend(preds)
            train_labels.extend(y_batch.cpu().numpy())

    train_accuracy = accuracy_score(train_labels, train_preds)
    train_precision = precision_score(train_labels, train_preds)
    train_recall = recall_score(train_labels, train_preds)
    train_f1 = f1_score(train_labels, train_preds)

    print(f"\nTraining Set Performance:")
    print(f"Accuracy: {train_accuracy:.4f}")
    print(f"Precision: {train_precision:.4f}")
    print(f"Recall: {train_recall:.4f}")
    print(f"F1 Score: {train_f1:.4f}")

if __name__ == "__main__":
    main()


Found 469 files.


100%|██████████| 469/469 [00:06<00:00, 70.19it/s]



Training with fixed hyperparameters: batch_size=32, neurons=64, lr=0.001
Epoch 1 - Accuracy: 0.3936, Val Loss: 0.5816
Epoch 2 - Accuracy: 0.5426, Val Loss: 0.5468
Epoch 3 - Accuracy: 0.5532, Val Loss: 0.5449
Epoch 4 - Accuracy: 0.4787, Val Loss: 0.6950
Epoch 5 - Accuracy: 0.5319, Val Loss: 0.6358
Epoch 6 - Accuracy: 0.7021, Val Loss: 0.5718
Epoch 7 - Accuracy: 0.6489, Val Loss: 0.6685
Epoch 8 - Accuracy: 0.6809, Val Loss: 0.6021
Epoch 9 - Accuracy: 0.4894, Val Loss: 0.7941
Epoch 10 - Accuracy: 0.4681, Val Loss: 0.9874
Epoch 11 - Accuracy: 0.5957, Val Loss: 0.5875
Epoch 12 - Accuracy: 0.6809, Val Loss: 0.6011
Epoch 13 - Accuracy: 0.5532, Val Loss: 0.8795
Epoch 14 - Accuracy: 0.7021, Val Loss: 0.7500
Epoch 15 - Accuracy: 0.6277, Val Loss: 0.8752
Epoch 16 - Accuracy: 0.6915, Val Loss: 0.9610
Epoch 17 - Accuracy: 0.5638, Val Loss: 0.8731
Epoch 18 - Accuracy: 0.5532, Val Loss: 0.9353
Epoch 19 - Accuracy: 0.6702, Val Loss: 0.9966
Epoch 20 - Accuracy: 0.6915, Val Loss: 0.9790
Epoch 21 - Accu