In [1]:
import numpy as np
import torch
    
def enhancer_collate_fn(batch):
    """This pads the data to the max sequence length in the batch to reduce memory usage during training
        Returns the sequences as padded, mask tensor for attention parameter, and labels
    """
    sequences, labels = zip(*batch)  # batch is list of (tokens, target)

    # First normalize all seqs to (L_i, embed_dim)
    arrays = []
    lengths = []
    for seq in sequences:
        arr = np.array(seq, dtype=np.float32)
        if arr.ndim == 1:
            # Treat as a single-token sequence: (embed_dim,) -> (1, embed_dim)
            arr = arr[None, :]
        arrays.append(arr)
        lengths.append(arr.shape[0])

    max_len = max(lengths)
    embed_dim = arrays[0].shape[1]

    padded = np.zeros((len(arrays), max_len, embed_dim), dtype=np.float32)
    mask = np.zeros((len(arrays), max_len), dtype=np.bool_)

    for i, arr in enumerate(arrays):
        L = arr.shape[0]
        padded[i, :L, :] = arr
        mask[i, :L] = 1  # 1 for real token, 0 for padding

    padded = torch.from_numpy(padded)         # (batch, max_len, embed_dim)
    mask = torch.from_numpy(mask)             # (batch, max_len)
    labels = torch.tensor(labels, dtype=torch.float32)  # (batch,)

    return padded, mask, labels


## Training step

In [None]:
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

from EnhancerDataset import EnhancerDataset
from EnhancerAttention import EnhancerAttention

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# Dataset & DataLoader
train_dataset = EnhancerDataset(data_type="train")
train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    collate_fn=enhancer_collate_fn,
)

# Model, loss, optimizer
embed_size = 100  # dna2vec dim
model = EnhancerAttention(embed_size=embed_size).to(device)

loss_fn = nn.BCEWithLogitsLoss()
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)

# Training loop
epochs = 
print_every = 100  # steps

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for step, (inputs, masks, labels) in enumerate(train_loader, start=1):
        # Move to device
        inputs = inputs.to(device)           # (batch, N, embed_dim)
        masks = masks.to(device) if masks is not None else None
        labels = labels.to(device).float()   # (batch,)

        # Forward
        logits = model(inputs, masks)        # (batch,)
        loss = loss_fn(logits, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if step % print_every == 0:
            avg_loss = running_loss / print_every
            print(f"Epoch [{epoch+1}/{epochs}] Step [{step}] - Loss: {avg_loss:.4f}")
            running_loss = 0.0

print("Training complete.")

Using device: cpu


  arr = np.array(seq, dtype=np.float32)


Epoch [1/2] Step [100] - Loss: 0.6680
Epoch [1/2] Step [200] - Loss: 0.6342
Epoch [1/2] Step [300] - Loss: 0.6288
Epoch [1/2] Step [400] - Loss: 0.6319
Epoch [1/2] Step [500] - Loss: 0.6386
Epoch [1/2] Step [600] - Loss: 0.6145
Epoch [1/2] Step [700] - Loss: 0.6338
Epoch [1/2] Step [800] - Loss: 0.6187
Epoch [1/2] Step [900] - Loss: 0.6330
Epoch [1/2] Step [1000] - Loss: 0.6300
Epoch [1/2] Step [1100] - Loss: 0.6292
Epoch [1/2] Step [1200] - Loss: 0.6142
Epoch [1/2] Step [1300] - Loss: 0.6263
Epoch [1/2] Step [1400] - Loss: 0.6363
Epoch [1/2] Step [1500] - Loss: 0.6372
Epoch [1/2] Step [1600] - Loss: 0.6290
Epoch [1/2] Step [1700] - Loss: 0.6148
Epoch [1/2] Step [1800] - Loss: 0.5969
Epoch [1/2] Step [1900] - Loss: 0.6196
Epoch [1/2] Step [2000] - Loss: 0.6105
Epoch [1/2] Step [2100] - Loss: 0.6069
Epoch [1/2] Step [2200] - Loss: 0.6241
Epoch [1/2] Step [2300] - Loss: 0.6135
Epoch [1/2] Step [2400] - Loss: 0.6101
Epoch [1/2] Step [2500] - Loss: 0.6230
Epoch [1/2] Step [2600] - Loss: 0.

KeyboardInterrupt: 

In [5]:
import time
import os

import torch
from torch.utils.data import DataLoader, Subset
import torch.nn as nn
import torch.optim as optim

from EnhancerDataset import EnhancerDataset
from EnhancerAttention import EnhancerAttention

# for memory usage on CPU
try:
    import psutil
    USE_PSUTIL = True
except ImportError:
    USE_PSUTIL = False

# ---------------------------
# Device
# ---------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 10% Subset of dataset
full_train_dataset = EnhancerDataset(data_type="train")
full_size = len(full_train_dataset)
subset_fraction = 0.10
subset_size = int(full_size * subset_fraction)

# take the first 10% of indices
subset_indices = list(range(subset_size))
train_dataset = Subset(full_train_dataset, subset_indices)

print(f"Full training set size: {full_size}")
print(f"Using subset size: {subset_size} ({subset_fraction*100:.1f}% of data)")

train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    collate_fn=enhancer_collate_fn,
)


embed_size = 100  # dna2vec dim
model = EnhancerAttention(embed_size=embed_size).to(device)

loss_fn = nn.BCEWithLogitsLoss()
lr = 1e-3
optimizer = optim.Adam(model.parameters(), lr=lr)

# Training loop (benchmark run)
epochs = 1 # for preliminary benchmarking
print_every = 100 # steps

start_time = time.time()
if USE_PSUTIL:
    process = psutil.Process(os.getpid())

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for step, (inputs, masks, labels) in enumerate(train_loader, start=1):
        inputs = inputs.to(device) # (batch, N, embed_dim)
        masks = masks.to(device) if masks is not None else None
        labels = labels.to(device).float() # (batch,)

        # Forward
        logits = model(inputs, masks) # (batch,)
        loss = loss_fn(logits, labels)

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if step % print_every == 0:
            avg_loss = running_loss / print_every
            if USE_PSUTIL:
                mem_mb = process.memory_info().rss / (1024 ** 2)
                print(
                    f"Epoch [{epoch+1}/{epochs}] Step [{step}] "
                    f"- Loss: {avg_loss:.4f} - RAM: {mem_mb:.1f} MB"
                )
            else:
                print(f"Epoch [{epoch+1}/{epochs}] Step [{step}] - Loss: {avg_loss:.4f}")
            running_loss = 0.0

total_time = time.time() - start_time
print(f"Benchmark training complete in {total_time:.2f} seconds.")


Using device: cpu
Full training set size: 119040
Using subset size: 11904 (10.0% of data)


  arr = np.array(seq, dtype=np.float32)


Epoch [1/1] Step [100] - Loss: 0.0881 - RAM: 275.0 MB
Epoch [1/1] Step [200] - Loss: 0.0009 - RAM: 147.8 MB
Epoch [1/1] Step [300] - Loss: 0.0004 - RAM: 160.8 MB
Epoch [1/1] Step [400] - Loss: 0.0002 - RAM: 136.0 MB
Epoch [1/1] Step [500] - Loss: 0.0002 - RAM: 222.3 MB
Epoch [1/1] Step [600] - Loss: 0.0001 - RAM: 205.2 MB


KeyboardInterrupt: 