# CNN only, run 10 times

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import random
import matplotlib.pyplot as plt
from itertools import product
from collections import defaultdict

# Constants
k = 6
input_fasta = 'data1/fungi_ITS_sorted90.fasta'
cleaned_fasta = 'data1/fungi_ITS_cleaned.fasta'
kmer_vector_file = 'data1/fungi_ITS_kmer_vector.txt'
filtered_vector_file = 'data1/fungi_ITS_kmer_vector_filtered.txt'
num_epochs = 100
num_runs = 10

# Helper functions
def generate_kmers(k):
    return [''.join(p) for p in product('ACGT', repeat=k)]

def kmer_vector(seq, k, kmer_index):
    vector = [0] * len(kmer_index)
    for i in range(len(seq) - k + 1):
        kmer = seq[i:i+k]
        if kmer in kmer_index:
            vector[kmer_index[kmer]] += 1
    return vector

class KmerDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx]), self.labels[idx]

class CNNModel(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv1d(1, 32, 7, padding='same'), nn.BatchNorm1d(32), nn.LeakyReLU(), nn.MaxPool1d(2),
            nn.Conv1d(32, 64, 7, padding='same'), nn.BatchNorm1d(64), nn.LeakyReLU(), nn.MaxPool1d(2),
            nn.Conv1d(64, 128, 7, padding='same'), nn.BatchNorm1d(128), nn.LeakyReLU(), nn.MaxPool1d(2),
            nn.Flatten(),
            nn.Linear((input_dim//8)*128, 1024), nn.LeakyReLU(), nn.Dropout(0.2),
            nn.Linear(1024, 256), nn.LeakyReLU(),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.model(x.unsqueeze(1))

# Step 1: Clean FASTA headers
with open(input_fasta, 'r') as infile, open(cleaned_fasta, 'w') as outfile:
    for line in infile:
        if line.startswith('>'):
            parts = line.strip().split()
            outfile.write(f'>{parts[1]}\n' if len(parts) > 1 else line)
        else:
            outfile.write(line)

# Step 2: k-mer vectorization
kmers = generate_kmers(k)
kmer_index = {kmer: idx for idx, kmer in enumerate(kmers)}

with open(cleaned_fasta, 'r') as infile, open(kmer_vector_file, 'w') as outfile:
    current_sequence, header = '', ''
    for line in infile:
        if line.startswith('>'):
            if current_sequence:
                vector = kmer_vector(current_sequence, k, kmer_index)
                outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")
            header = line.strip()
            current_sequence = ''
        else:
            current_sequence += line.strip()
    if current_sequence:
        vector = kmer_vector(current_sequence, k, kmer_index)
        outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")

# Step 2: Filter sequences by genus frequency
genera_count = defaultdict(int)
with open(kmer_vector_file, 'r') as f:
    for line in f:
        genus = line.split()[0][1:]
        genera_count[genus] += 1

with open(kmer_vector_file, 'r') as infile, open(filtered_vector_file, 'w') as outfile:
    for line in infile:
        genus = line.split()[0][1:]
        if genera_count[genus] >= 10:
            outfile.write(line)

# Main Experiment Loop
best_accuracies = []

for run in range(num_runs):
    data_by_class = defaultdict(list)
    with open(filtered_vector_file, 'r') as f:
        for line in f:
            label = line.split('\t')[0][1:]
            data_by_class[label].append(line.strip())

    train_data, test_data = [], []
    for samples in data_by_class.values():
        test_sample = random.choice(samples)
        test_data.append(test_sample)
        train_data.extend(s for s in samples if s != test_sample)

    train_vectors = [list(map(int, line.split('\t')[1].split())) for line in train_data]
    train_labels = [line.split('\t')[0][1:] for line in train_data]

    test_vectors = [list(map(int, line.split('\t')[1].split())) for line in test_data]
    test_labels = [line.split('\t')[0][1:] for line in test_data]

    label_encoder = LabelEncoder().fit(train_labels)
    train_labels_encoded = label_encoder.transform(train_labels)
    test_labels_encoded = label_encoder.transform(test_labels)

    train_loader = DataLoader(KmerDataset(train_vectors, train_labels_encoded), batch_size=32, shuffle=True)
    test_loader = DataLoader(KmerDataset(test_vectors, test_labels_encoded), batch_size=32)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CNNModel(len(train_vectors[0]), len(label_encoder.classes_)).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001)

    max_accuracy = 0
    for epoch in range(num_epochs):
        model.train()
        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(device), labels.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs, 1)
                correct += (predicted == labels).sum().item()
                total += labels.size(0)

        accuracy = correct / total
        if accuracy > max_accuracy:
            max_accuracy = accuracy

    best_accuracies.append(max_accuracy)

avg_accuracy = sum(best_accuracies) / len(best_accuracies)
print(f'Average Highest Accuracy over {num_runs} runs: {avg_accuracy:.4f}')


# resnet, run 10 times

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import random
import matplotlib.pyplot as plt
from itertools import product
from collections import defaultdict

# Constants
k = 6
input_fasta = 'data1/fungi_ITS_sorted90.fasta'
cleaned_fasta = 'data1/fungi_ITS_cleaned.fasta'
kmer_vector_file = 'data1/fungi_ITS_kmer_vector.txt'
filtered_vector_file = 'data1/fungi_ITS_kmer_vector_filtered.txt'
num_epochs = 100
num_runs = 10

# Helper functions
def generate_kmers(k):
    return [''.join(p) for p in product('ACGT', repeat=k)]

def kmer_vector(seq, k, kmer_index):
    vector = [0] * len(kmer_index)
    for i in range(len(seq) - k + 1):
        kmer = seq[i:i+k]
        if kmer in kmer_index:
            vector[kmer_index[kmer]] += 1
    return vector

class KmerDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return (torch.tensor(self.data[idx], dtype=torch.float),
                torch.tensor(self.labels[idx], dtype=torch.long))


# ----------------------------
# New ResidualBlock and CNNModel
# ----------------------------
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=7, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        padding = kernel_size // 2
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.LeakyReLU()
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, stride=1, padding=padding, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = downsample  # To match dimensions if needed

    def forward(self, x):
        identity = x
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        if self.downsample is not None:
            identity = self.downsample(x)
        out += identity  # Residual connection
        out = self.relu(out)
        return out

class CNNModel(nn.Module):
    def __init__(self, input_length, num_classes):
        super(CNNModel, self).__init__()
        self.in_channels = 32  # Reduced from 64

        # Initial convolution layer
        self.conv1 = nn.Conv1d(1, 32, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu = nn.LeakyReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
        self.dropout = nn.Dropout(0.3)  # Add dropout

        # Residual layers with fewer blocks
        self.layer1 = self._make_layer(32, layers=1)
        self.layer2 = self._make_layer(64, layers=1, stride=2)
        self.layer3 = self._make_layer(128, layers=1, stride=2)
        self.layer4 = self._make_layer(256, layers=1, stride=2)

        # Adaptive pooling and fully connected layer
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, out_channels, layers, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                nn.Conv1d(self.in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(out_channels),
            )
        layers_list = []
        layers_list.append(ResidualBlock(self.in_channels, out_channels, stride=stride, downsample=downsample))
        self.in_channels = out_channels
        return nn.Sequential(*layers_list)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension
        x = self.dropout(self.relu(self.bn1(self.conv1(x))))
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.avgpool(x)
        x = x.squeeze(-1)
        x = self.fc(x)
        return x

# ----------------------------
# Preprocessing steps remain unchanged
# ----------------------------

# Step 1: Clean FASTA headers
with open(input_fasta, 'r') as infile, open(cleaned_fasta, 'w') as outfile:
    for line in infile:
        if line.startswith('>'):
            parts = line.strip().split()
            outfile.write(f'>{parts[1]}\n' if len(parts) > 1 else line)
        else:
            outfile.write(line)

# Step 2: k-mer vectorization
kmers = generate_kmers(k)
kmer_index = {kmer: idx for idx, kmer in enumerate(kmers)}

with open(cleaned_fasta, 'r') as infile, open(kmer_vector_file, 'w') as outfile:
    current_sequence, header = '', ''
    for line in infile:
        if line.startswith('>'):
            if current_sequence:
                vector = kmer_vector(current_sequence, k, kmer_index)
                outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")
            header = line.strip()
            current_sequence = ''
        else:
            current_sequence += line.strip()
    if current_sequence:
        vector = kmer_vector(current_sequence, k, kmer_index)
        outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")

# Step 2: Filter sequences by genus frequency
genera_count = defaultdict(int)
with open(kmer_vector_file, 'r') as f:
    for line in f:
        genus = line.split()[0][1:]
        genera_count[genus] += 1

with open(kmer_vector_file, 'r') as infile, open(filtered_vector_file, 'w') as outfile:
    for line in infile:
        genus = line.split()[0][1:]
        if genera_count[genus] >= 10:
            outfile.write(line)

# ----------------------------
# Main Experiment Loop using Residual CNNModel
# ----------------------------
best_accuracies = []

for run in range(num_runs):
    data_by_class = defaultdict(list)
    with open(filtered_vector_file, 'r') as f:
        for line in f:
            label = line.split('\t')[0][1:]
            data_by_class[label].append(line.strip())

    train_data, test_data = [], []
    for samples in data_by_class.values():
        test_sample = random.choice(samples)
        test_data.append(test_sample)
        train_data.extend(s for s in samples if s != test_sample)

    train_vectors = [list(map(int, line.split('\t')[1].split())) for line in train_data]
    train_labels = [line.split('\t')[0][1:] for line in train_data]

    test_vectors = [list(map(int, line.split('\t')[1].split())) for line in test_data]
    test_labels = [line.split('\t')[0][1:] for line in test_data]

    label_encoder = LabelEncoder().fit(train_labels)
    train_labels_encoded = label_encoder.transform(train_labels)
    test_labels_encoded = label_encoder.transform(test_labels)

    train_loader = DataLoader(KmerDataset(train_vectors, train_labels_encoded), batch_size=32, shuffle=True)
    test_loader = DataLoader(KmerDataset(test_vectors, test_labels_encoded), batch_size=32)

    # Initialize the model using the new CNNModel
    input_length = len(train_vectors[0])
    num_classes = len(label_encoder.classes_)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CNNModel(input_length, num_classes).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)

    max_accuracy = 0
    # Training loop with testing using Residual CNNModel
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        total_correct_train = 0
        total_train_samples = 0

        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            _, predicted_train = torch.max(outputs, 1)
            total_correct_train += (predicted_train == labels).sum().item()
            total_train_samples += labels.size(0)

        avg_train_loss = total_train_loss / len(train_loader)
        train_accuracy = total_correct_train / total_train_samples * 100
        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%')

        # Evaluate on test dataset
        model.eval()
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(device), labels.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += labels.size(0)

        accuracy = total_correct / total_samples * 100
        print(f'Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {accuracy:.2f}%')
        if accuracy > max_accuracy:
            max_accuracy = accuracy

    best_accuracies.append(max_accuracy)

avg_accuracy = sum(best_accuracies) / len(best_accuracies)
print(f'Average Highest Accuracy over {num_runs} runs: {avg_accuracy:.4f}')


Epoch [1/100], Training Loss: 3.5650, Training Accuracy: 19.98%
Epoch [1/100], Test Accuracy: 8.64%
Epoch [2/100], Training Loss: 2.7112, Training Accuracy: 35.61%
Epoch [2/100], Test Accuracy: 16.05%
Epoch [3/100], Training Loss: 2.1840, Training Accuracy: 44.85%
Epoch [3/100], Test Accuracy: 19.75%
Epoch [4/100], Training Loss: 1.8370, Training Accuracy: 52.79%
Epoch [4/100], Test Accuracy: 25.93%
Epoch [5/100], Training Loss: 1.5241, Training Accuracy: 60.73%
Epoch [5/100], Test Accuracy: 24.69%
Epoch [6/100], Training Loss: 1.2495, Training Accuracy: 68.30%
Epoch [6/100], Test Accuracy: 43.21%
Epoch [7/100], Training Loss: 0.9598, Training Accuracy: 78.23%
Epoch [7/100], Test Accuracy: 40.74%
Epoch [8/100], Training Loss: 0.7449, Training Accuracy: 85.11%
Epoch [8/100], Test Accuracy: 45.68%
Epoch [9/100], Training Loss: 0.5635, Training Accuracy: 91.56%
Epoch [9/100], Test Accuracy: 67.90%
Epoch [10/100], Training Loss: 0.4205, Training Accuracy: 94.67%
Epoch [10/100], Test Accura

# ConvNext, run 10 times

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
import random
import matplotlib.pyplot as plt
from itertools import product
from collections import defaultdict

# Constants
k = 6
input_fasta = 'data1/fungi_ITS_sorted90.fasta'
cleaned_fasta = 'data1/fungi_ITS_cleaned.fasta'
kmer_vector_file = 'data1/fungi_ITS_kmer_vector.txt'
filtered_vector_file = 'data1/fungi_ITS_kmer_vector_filtered.txt'
num_epochs = 100
num_runs = 10

# ----------------------------
# Helper functions for k-mer processing
# ----------------------------
def generate_kmers(k):
    return [''.join(p) for p in product('ACGT', repeat=k)]

def kmer_vector(seq, k, kmer_index):
    vector = [0] * len(kmer_index)
    for i in range(len(seq) - k + 1):
        kmer = seq[i:i+k]
        if kmer in kmer_index:
            vector[kmer_index[kmer]] += 1
    return vector

class KmerDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return (torch.tensor(self.data[idx], dtype=torch.float),
                torch.tensor(self.labels[idx], dtype=torch.long))


# ----------------------------
# Data Preprocessing: Clean FASTA headers and k-mer vectorization
# ----------------------------
# Clean FASTA headers
with open(input_fasta, 'r') as infile, open(cleaned_fasta, 'w') as outfile:
    for line in infile:
        if line.startswith('>'):
            parts = line.strip().split()
            outfile.write(f'>{parts[1]}\n' if len(parts) > 1 else line)
        else:
            outfile.write(line)

# k-mer vectorization
kmers = generate_kmers(k)
kmer_index = {kmer: idx for idx, kmer in enumerate(kmers)}

with open(cleaned_fasta, 'r') as infile, open(kmer_vector_file, 'w') as outfile:
    current_sequence, header = '', ''
    for line in infile:
        if line.startswith('>'):
            if current_sequence:
                vector = kmer_vector(current_sequence, k, kmer_index)
                outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")
            header = line.strip()
            current_sequence = ''
        else:
            current_sequence += line.strip()
    if current_sequence:
        vector = kmer_vector(current_sequence, k, kmer_index)
        outfile.write(f"{header}\t{' '.join(map(str, vector))}\n")

# Filter sequences by genus frequency (keeping only those with frequency >= 10)
genera_count = defaultdict(int)
with open(kmer_vector_file, 'r') as f:
    for line in f:
        genus = line.split()[0][1:]
        genera_count[genus] += 1

with open(kmer_vector_file, 'r') as infile, open(filtered_vector_file, 'w') as outfile:
    for line in infile:
        genus = line.split()[0][1:]
        if genera_count[genus] >= 10:
            outfile.write(line)

# ----------------------------
# Define ConvNeXt components: DropPath, ConvNeXtBlock1D and CNNModel
# ----------------------------
class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample"""
    def __init__(self, drop_prob=None):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob

    def forward(self, x):
        if self.drop_prob == 0.0 or not self.training:
            return x
        keep_prob = 1 - self.drop_prob
        shape = (x.shape[0],) + (1,) * (x.ndim - 1)
        random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
        random_tensor.floor_()
        output = x.div(keep_prob) * random_tensor
        return output

class ConvNeXtBlock1D(nn.Module):
    def __init__(self, dim, drop_path=0.0, layer_scale_init_value=1e-6):
        super(ConvNeXtBlock1D, self).__init__()
        self.dwconv = nn.Conv1d(dim, dim, kernel_size=7, padding=3, groups=dim)
        self.norm = nn.LayerNorm(dim, eps=1e-6)
        self.pwconv1 = nn.Linear(dim, 4 * dim)
        self.act = nn.GELU()
        self.pwconv2 = nn.Linear(4 * dim, dim)
        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True) if layer_scale_init_value > 0 else None
        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()

    def forward(self, x):
        # x shape: [batch_size, channels, length]
        input = x
        x = self.dwconv(x)
        x = x.permute(0, 2, 1)  # Change to [batch_size, length, channels] for LayerNorm
        x = self.norm(x)
        x = self.pwconv1(x)
        x = self.act(x)
        x = self.pwconv2(x)
        if self.gamma is not None:
            x = self.gamma * x
        x = self.drop_path(x)
        x = x + input.permute(0, 2, 1)
        x = x.permute(0, 2, 1)  # Back to [batch_size, channels, length]
        return x

class CNNModel(nn.Module):
    def __init__(self, input_length, num_classes, depths=[3, 3, 9, 3], dims=[64, 128, 256, 512], drop_path_rate=0.0):
        super(CNNModel, self).__init__()

        # Stem Layer: initial downsampling
        self.downsample_layers = nn.ModuleList()
        stem = nn.Sequential(
            nn.Conv1d(1, dims[0], kernel_size=7, stride=2, padding=3),
            nn.BatchNorm1d(dims[0])
        )
        self.downsample_layers.append(stem)

        # Downsampling Layers for subsequent stages
        for i in range(3):
            downsample_layer = nn.Sequential(
                nn.BatchNorm1d(dims[i]),
                nn.Conv1d(dims[i], dims[i+1], kernel_size=2, stride=2),
            )
            self.downsample_layers.append(downsample_layer)

        # Compute drop path rates for each block
        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]

        # Stages: stack ConvNeXt blocks per stage
        self.stages = nn.ModuleList()
        cur = 0
        for i in range(4):
            stage = nn.Sequential(
                *[ConvNeXtBlock1D(dim=dims[i], drop_path=dp_rates[cur + j]) for j in range(depths[i])]
            )
            self.stages.append(stage)
            cur += depths[i]

        # Final normalization and classification head
        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)
        self.head = nn.Linear(dims[-1], num_classes)

    def forward(self, x):
        x = x.unsqueeze(1)  # Add channel dimension: [batch_size, 1, length]
        for i in range(4):
            x = self.downsample_layers[i](x)
            x = self.stages[i](x)
        x = x.mean(-1)  # Global average pooling over length dimension
        x = self.norm(x)
        x = self.head(x)
        return x

# ----------------------------
# Main Experiment Loop: Run the training/testing 10 times using the ConvNeXt-based model
# ----------------------------
best_accuracies = []

# Read filtered k-mer vectors and group by genus
data_by_class = defaultdict(list)
with open(filtered_vector_file, 'r') as f:
    for line in f:
        label = line.split('\t')[0][1:]
        data_by_class[label].append(line.strip())

for run in range(num_runs):
    # Create train and test splits: For each genus, choose one sample for testing and the rest for training.
    train_data, test_data = [], []
    for samples in data_by_class.values():
        test_sample = random.choice(samples)
        test_data.append(test_sample)
        train_data.extend(s for s in samples if s != test_sample)

    train_vectors = [list(map(int, line.split('\t')[1].split())) for line in train_data]
    train_labels = [line.split('\t')[0][1:] for line in train_data]
    test_vectors = [list(map(int, line.split('\t')[1].split())) for line in test_data]
    test_labels = [line.split('\t')[0][1:] for line in test_data]

    label_encoder = LabelEncoder().fit(train_labels)
    train_labels_encoded = label_encoder.transform(train_labels)
    test_labels_encoded = label_encoder.transform(test_labels)

    train_loader = DataLoader(KmerDataset(train_vectors, train_labels_encoded), batch_size=32, shuffle=True)
    test_loader = DataLoader(KmerDataset(test_vectors, test_labels_encoded), batch_size=32)

    input_length = len(train_vectors[0])
    num_classes = len(label_encoder.classes_)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = CNNModel(input_length, num_classes, drop_path_rate=0.1).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

    max_accuracy = 0

    # Training loop for this run
    for epoch in range(num_epochs):
        model.train()
        total_train_loss = 0
        total_correct_train = 0
        total_train_samples = 0

        for data, labels in train_loader:
            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_train_loss += loss.item()
            _, predicted_train = torch.max(outputs, 1)
            total_correct_train += (predicted_train == labels).sum().item()
            total_train_samples += labels.size(0)

        avg_train_loss = total_train_loss / len(train_loader)
        train_accuracy = total_correct_train / total_train_samples * 100
        print(f'Run {run+1}/{num_runs}, Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%')

        # Evaluate on test dataset
        model.eval()
        total_correct = 0
        total_samples = 0
        with torch.no_grad():
            for data, labels in test_loader:
                data, labels = data.to(device), labels.to(device)
                outputs = model(data)
                _, predicted = torch.max(outputs, 1)
                total_correct += (predicted == labels).sum().item()
                total_samples += labels.size(0)

        accuracy = total_correct / total_samples * 100
        print(f'Run {run+1}/{num_runs}, Epoch [{epoch+1}/{num_epochs}], Test Accuracy: {accuracy:.2f}%')
        if accuracy > max_accuracy:
            max_accuracy = accuracy

        scheduler.step()

    best_accuracies.append(max_accuracy)
    print(f'Run {run+1} Best Test Accuracy: {max_accuracy:.2f}%')

avg_accuracy = sum(best_accuracies) / len(best_accuracies)
print(f'\nAverage Highest Accuracy over {num_runs} runs: {avg_accuracy:.4f}%')


Run 1/10, Epoch [1/100], Training Loss: 2.6408, Training Accuracy: 34.00%
Run 1/10, Epoch [1/100], Test Accuracy: 45.68%
Run 1/10, Epoch [2/100], Training Loss: 1.5930, Training Accuracy: 55.09%
Run 1/10, Epoch [2/100], Test Accuracy: 45.68%
Run 1/10, Epoch [3/100], Training Loss: 1.3097, Training Accuracy: 61.60%
Run 1/10, Epoch [3/100], Test Accuracy: 49.38%
Run 1/10, Epoch [4/100], Training Loss: 1.0906, Training Accuracy: 67.68%
Run 1/10, Epoch [4/100], Test Accuracy: 49.38%
Run 1/10, Epoch [5/100], Training Loss: 0.8874, Training Accuracy: 73.64%
Run 1/10, Epoch [5/100], Test Accuracy: 53.09%
Run 1/10, Epoch [6/100], Training Loss: 0.6406, Training Accuracy: 81.58%
Run 1/10, Epoch [6/100], Test Accuracy: 62.96%
Run 1/10, Epoch [7/100], Training Loss: 0.3933, Training Accuracy: 89.33%
Run 1/10, Epoch [7/100], Test Accuracy: 67.90%
Run 1/10, Epoch [8/100], Training Loss: 0.2325, Training Accuracy: 94.98%
Run 1/10, Epoch [8/100], Test Accuracy: 59.26%
Run 1/10, Epoch [9/100], Trainin