In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split

# Intro to tensors

In [None]:
# Working with tensors
a = torch.rand(10, 5)
b = torch.rand(5, 17)
mult = torch.matmul(a, b)
print(mult.shape)

In [None]:
# Working with tensors (batch multiplication)
a = torch.rand(16, 10, 5)
b = torch.rand(16, 5, 17)
mult = torch.matmul(a, b)
print(mult.shape)

In [None]:
# Gradient example
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([4, 5, 6])
c = torch.Tensor([7, 8, 9])

a.requires_grad = True
b.requires_grad = True
c.requires_grad = True

torch.sum((a * b) + c).backward()
print(a.grad), print(b.grad), print(c.grad)

In [None]:
a = torch.Tensor(torch.rand(1, 4))
a.requires_grad = True
b = a**2
c = b*2
d = c.mean()
e = c.sum()

In [None]:
d.backward(retain_graph=True) # fine
e.backward(retain_graph=True) # fine
d.backward() # also fine
e.backward() # error will occur!

# Train model

In [None]:
# Create random dataset. Every Dataset has to implement __len__ and __getitem__
class SyntheticDataset(Dataset):
    def __init__(self, num_samples=1000, input_dim=20):
        self.X = torch.rand(num_samples, input_dim)
        self.y = (torch.mean(self.X, dim=1) > 1/2).type(torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [None]:
# Create model
class MultiLayerNet(nn.Module):
    def __init__(self, input_dim=20, hidden_dims=[64, 32], output_dim=2):
        super(MultiLayerNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dims[0])
        self.bn1 = nn.BatchNorm1d(hidden_dims[0])
        self.activation1 = nn.ReLU()
            
        self.fc2 = nn.Linear(hidden_dims[0], hidden_dims[1])
        self.bn2 = nn.BatchNorm1d(hidden_dims[1])
        self.activation2 = nn.ReLU()
            
        self.head = nn.Linear(hidden_dims[1], output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.activation1(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.activation2(x)
        x = self.head(x)
        return x

In [None]:
# Training loop
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    avg_loss = total_loss / len(dataloader)
    print(f"Train Loss: {avg_loss:.4f}")

In [None]:
# Validation loop
def validate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            preds = outputs.argmax(dim=1)
            correct += (preds == targets).sum().item()
            total += targets.size(0)

    avg_loss = total_loss / len(dataloader)
    accuracy = 100.0 * correct / total
    print(f"Validation Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    return avg_loss, accuracy

In [None]:
# Hyperparameters and setup
input_dim = 20
batch_size = 32
epochs = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Dataset and split
dataset = SyntheticDataset(num_samples=1000, input_dim=input_dim)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Model, criterion, optimizer
model = MultiLayerNet(input_dim=input_dim).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

In [None]:
# Full training + validation loop
for epoch in range(epochs):
    print(f"\nEpoch {epoch + 1}/{epochs}")
    train(model, train_loader, criterion, optimizer, device)
    validate(model, val_loader, criterion, device)

# Tasks
- add early stopping
- play with number of parameters in each layer, activation function and regularization parameters and observe how the training changes