In [57]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from sklearn.metrics import confusion_matrix, roc_curve, auc, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.utils.class_weight import compute_class_weight

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np

In [5]:
sequence = pd.read_csv('./data.csv')
label_seq = pd.read_csv('label.csv')

train_labels_flat = label_seq.values.flatten()
train_labels_flat

# Compute class weights (inverse of class frequencies)
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(train_labels_flat), y=train_labels_flat)
class_weights = torch.tensor(class_weights, dtype=torch.float32)
class_weights

tensor([0.6155, 2.6641])

In [18]:
# Parameters
sequence_length = 256  # Length of each subsequence
num_subsequences = 100  # Number of subsequences to extract = 100 samples

# Get possible starting indices, ensuring there's enough space for a full subsequence
possible_starts = np.arange(0, len(sequence) - sequence_length + 1)
np.random.shuffle(possible_starts)

# Select the first `num_subsequences` starting indices
selected_starts = possible_starts[:num_subsequences]

# Create a list to hold the subsequences
subsequences = [sequence.iloc[start:start + sequence_length].values for start in selected_starts]
sublabels = [label_seq.iloc[start:start + sequence_length].values for start in selected_starts]

# Convert subsequences list to a NumPy array
subsequences_array = np.array(subsequences)
sublabels_array = np.array(sublabels)

# Reshape subsequences_array to (num_subsequences, num_channels, sequence_length)
subsequences_array = subsequences_array.transpose(0, 2, 1)  # From (100, 256, 36) to (100, 36, 256)

# Reshape sublabels_array to (num_subsequences, 1, sequence_length)
sublabels_array = sublabels_array.transpose(0, 2, 1)  # From (100, 256, 1) to (100, 1, 256)

# subsequences_array will have shape (num_subsequences, sequence_length, num_channels)
print(subsequences_array.shape)  # Should print (100, 100, 36)
print(sublabels_array.shape)

(100, 36, 256)
(100, 1, 256)


In [50]:
"""class ResNet1D(nn.Module):
    def __init__(self, input_channels, num_classes=1):
        super(ResNet1D, self).__init__()
        # Use resnet18 but replace the first layer with Conv1d
        self.resnet = resnet18(pretrained=False)
        self.resnet.conv1 = nn.Conv1d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.resnet(x)


class ResNet1D(nn.Module):
    def __init__(self, input_channels, num_classes=1):
        super(ResNet1D, self).__init__()
        
        # Load the ResNet-18 architecture
        self.resnet = resnet18(pretrained=False)
        
        # Modify the first layer to work with 1D convolutions
        self.resnet.conv1 = nn.Conv1d(input_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Replace 2D BatchNorm and Pooling layers with 1D equivalents
        self.resnet.bn1 = nn.BatchNorm1d(64)
        self.resnet.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        
        # Modify the fully connected layer to match the number of output classes
        self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
    
    def forward(self, x):
        return self.resnet(x)"""

# Define a Residual Block for 1D Convolution with stride=1
class BasicBlock1D(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += identity
        out = self.relu(out)

        return out

# Define ResNet-18 using 1D convolutions without downsampling
class ResNet1D(nn.Module):
    def __init__(self, block, layers, in_channels=36, num_classes=1):
        super(ResNet1D, self).__init__()
        self.in_channels = 64

        # Initial layers
        self.conv1 = nn.Conv1d(in_channels, 64, kernel_size=7, stride=1, padding=3)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU(inplace=True)

        # Remove max pooling to preserve time dimension
        # self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)

        # ResNet Layers
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1])
        self.layer3 = self._make_layer(block, 256, layers[2])
        self.layer4 = self._make_layer(block, 512, layers[3])

        # Final layer
        self.conv_out = nn.Conv1d(512, num_classes, kernel_size=1)

        # Ensure output time dimension is preserved
        self.upsample = nn.Upsample(size=256, mode='linear', align_corners=False)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm1d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.conv_out(x)

        # Apply upsampling to ensure the output time dimension is 256
        x = self.upsample(x)
        
        return torch.sigmoid(x)

# Example of instantiating the model with 36 input channels (your time series data)

# ResNet-18 specific configuration
model = ResNet1D(BasicBlock1D, [2, 2, 2, 2], in_channels=36, num_classes=1).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))  # Output 1 for binary classification



In [46]:
model

ResNet1D(
  (conv1): Conv1d(36, 64, kernel_size=(7,), stride=(2,), padding=(3,))
  (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool1d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock1D(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock1D(
      (conv1): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (bn1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv1d(64, 64, kernel_size=(3,), stride=(1,), paddin

In [51]:
model(train_data).shape, train_data.shape, train_labels.shape

(torch.Size([80, 1, 256]), torch.Size([80, 36, 256]), torch.Size([80, 1, 256]))

In [52]:
# Define the dataset
class NeuroimagingDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)
    
neuroimaging_data = subsequences_array
labels = sublabels_array

input_size = neuroimaging_data.shape[2]  # Number of features (channels)

# Preprocessing (e.g., normalization)
scaler = StandardScaler()
neuroimaging_data = scaler.fit_transform(neuroimaging_data.reshape(-1, input_size)).reshape(neuroimaging_data.shape)

# Convert numpy arrays to PyTorch tensors
train_data, test_data, train_labels, test_labels = train_test_split(neuroimaging_data, labels, test_size=0.2, random_state=42)
train_data, test_data = torch.tensor(train_data, dtype=torch.float32).to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), torch.tensor(test_data, dtype=torch.float32).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
train_labels, test_labels = torch.tensor(train_labels, dtype=torch.float32).to(torch.device("cuda" if torch.cuda.is_available() else "cpu")), torch.tensor(test_labels, dtype=torch.float32).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

# Create dataset and dataloader
train_dataset = NeuroimagingDataset(train_data, train_labels)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True)


In [53]:
criterion = nn.BCELoss()  # Binary Cross-Entropy loss
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [54]:
# Training loop
for epoch in range(10):  # Number of epochs
    model.train()
    running_loss = 0.0
    for data, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

print("Training complete.")

  return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)


Epoch 1, Loss: 0.9839934825897216
Epoch 2, Loss: 0.20158927142620087
Epoch 3, Loss: 0.05556968897581101
Epoch 4, Loss: 0.015971654653549196
Epoch 5, Loss: 0.009441157430410385
Epoch 6, Loss: 0.004571363562718034
Epoch 7, Loss: 0.0025194106390699746
Epoch 8, Loss: 0.0017326485132798553
Epoch 9, Loss: 0.0010830884915776552
Epoch 10, Loss: 0.000905243307352066
Training complete.


In [55]:

# Define the evaluation function
def evaluate(model, dataloader):
    model.eval()
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for data, labels in dataloader:
            outputs = model(data)
            preds = (outputs > 0.5).float()  # Convert probabilities to binary predictions
            all_labels.append(labels.cpu().numpy())
            all_preds.append(preds.cpu().numpy())

    # Flatten the lists to compare predictions and labels
    all_labels = np.concatenate(all_labels, axis=0).flatten()
    all_preds = np.concatenate(all_preds, axis=0).flatten()

    # Calculate accuracy
    acc = accuracy_score(all_labels, all_preds)
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_preds)

    return acc, cm

In [58]:
acc, cm = evaluate(model, train_loader)
print(f'Accuracy: {acc:.4f}')
print('Confusion Matrix:')
print(cm)

  return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)


Accuracy: 0.9999
Confusion Matrix:
[[16232     2]
 [    0  4246]]
