In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torchvision.transforms import ToTensor
from torch.utils.data import Subset

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
#epoch = 5
batch_size = 32
LR = 1e-4

cpu


In [3]:
train_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)
])

In [4]:
test_tf = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(0.5,0.5)
])

In [5]:
train_set = datasets.FashionMNIST(
    root = "/mnt/d/Fashion-MNIST-1/",
    train = True,
    download = True,
    transform = train_tf
)
print(len(train_set))

60000


# NOTE: this block is used to test robustness against malformed inputs

In [6]:
import random
import torch

class DirtyWrapper(torch.utils.data.Dataset):
    def __init__(self, base_ds, break_prob=0.02, seed=42):
        self.base = base_ds
        self.break_prob = break_prob
        self.rng = random.Random(seed)

    def __len__(self):
        return len(self.base)

    def __getitem__(self, idx):
        x, y = self.base[idx]  # x expected: (1,28,28)
        if self.rng.random() < self.break_prob:
            mode = self.rng.choice(["drop_channel", "add_dim"])
            if mode == "drop_channel":
                x = x.squeeze(0)        # (28,28)  ❌
            else:
                x = x.unsqueeze(0)       # (1,1,28,28) ❌
        return x, y


In [7]:
train_set = DirtyWrapper(train_set, break_prob=0.02, seed=42)

In [8]:
test_set = datasets.FashionMNIST(
    root = "/mnt/d/Fashion-MNIST-1/",
    train = False,
    download = True,
    transform = test_tf
)
print(len(test_set))

10000


In [9]:
train_load = DataLoader(train_set,batch_size=batch_size,shuffle=True,num_workers=0)
test_load = DataLoader(test_set,batch_size=batch_size,shuffle=False)
print(len(train_load))
print(len(test_load))

1875
313


In [10]:
for X,y in train_load:
    print(X.shape)
    print(y.shape,y.dtype)
    break

RuntimeError: stack expects each tensor to be equal size, but got [1, 28, 28] at entry 0 and [28, 28] at entry 19

In [16]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        #self.flatten = nn.Flatten()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28,512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10)
        )
    def forward(self,x):
        logits = self.net(x)
        #print(logits.shape)
        return logits
model = MLP().to(device)
print(model)

MLP(
  (net): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): ReLU()
    (3): Linear(in_features=512, out_features=512, bias=True)
    (4): ReLU()
    (5): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [17]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=LR)

In [23]:
def train_epoch(train_load,model,loss_fn,optimizer):
    model.train()
    #print("len(train_set) =", len(train_set))
    #print("len(train_load) =", len(train_load))
    total_loss,correct,total = 0.0,0,0
    for x,y in train_load:
        x,y = x.to(device),y.to(device)
        assert x.ndim == 4 and x.shape[1:] == (1, 28, 28), f"Bad x shape: {x.shape}"
        pred = model(x)
        loss = loss_fn(pred,y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()* x.size(0)
        correct += (pred.argmax(1) == y).sum().item()
        total += x.size(0)

    return total_loss / total, correct / total

In [19]:
def test_epoch(test_load,model,loss_fn):
    model.eval()
    total_loss,correct,total = 0.0,0,0
    with torch.no_grad():
        for x,y in test_load:
            x,y = x.to(device),y.to(device)
            pred = model(x)
            loss = loss_fn(pred,y)

            total_loss += loss.item()* x.size(0)
            correct += (pred.argmax(1) == y).sum().item()
            total += x.size(0)

    return total_loss / total, correct / total

In [11]:
epoch = 10
best_acc = 0.0
for ep in range(epoch):
    tr_loss, tr_acc = train_epoch(train_load,model,loss_fn,optimizer)
    te_loss, te_acc = test_epoch(test_load,model,loss_fn)
    print(f"Epoch {ep}: train_loss={tr_loss:.4f} acc={tr_acc:.4f} | test_loss={te_loss:.4f} acc={te_acc:.4f}")

    if te_acc > best_acc:
        best_acc = te_acc
        torch.save(model.state_dict(), "best_model.pt")
        print(f"  ✓ Saved best model (acc={best_acc:.4f})")

NameError: name 'train_epoch' is not defined