In [2]:
import torch
import torchvision
import torchvision.transforms.v2 as T

In [3]:
toTensor = T.Compose([T.ToImage(), T.ToDtype(torch.float32,
scale=True)])
train_and_valid_data = torchvision.datasets.FashionMNIST(
root="datasets", train=True, download=True,
transform=toTensor)
test_data = torchvision.datasets.FashionMNIST(
root="datasets", train=False, download=True,
transform=toTensor)
torch.manual_seed(42)
train_data, valid_data = torch.utils.data.random_split(
train_and_valid_data, [55_000, 5_000])

100%|██████████| 26.4M/26.4M [00:05<00:00, 5.03MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 602kB/s]
100%|██████████| 4.42M/4.42M [00:01<00:00, 3.28MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 6.76MB/s]


In [6]:
from torch.utils.data import DataLoader

In [26]:
train_loader = DataLoader(train_data, batch_size=32,
shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=32)
test_loader = DataLoader(test_data, batch_size=32)

In [8]:
X_sample, y_sample = train_data[0]

In [13]:
X_sample.shape, X_sample.dtype

(torch.Size([1, 28, 28]), torch.float32)

In [27]:
import torch.nn as nn

class ImageClassifier(nn.Module):
    def __init__(self, n_inputs, n_hidden1, n_hidden2, n_classes):
        super().__init__()
        self.mlp = nn.Sequential(
            nn.Flatten(),
            nn.Linear(n_inputs, n_hidden1),
            nn.ReLU(),
            nn.Linear(n_hidden1, n_hidden2),
            nn.ReLU(),
            nn.Linear(n_hidden2, n_classes)
        )

    def forward(self, X):
        return self.mlp(X)
    
torch.manual_seed(42)
model = ImageClassifier(n_inputs=28 * 28, n_hidden1=300, n_hidden2=100, n_classes=10)
xentropy = nn.CrossEntropyLoss()

In [28]:
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

In [29]:
import torchmetrics

accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)

In [32]:
def train(model, optimizer, criterion, train_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        total_loss = 0
        for X_batch, y_batch in train_loader:
            # X_batch, y_batch = X_batch.to('mps'), y_batch.to('mps')
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        mean_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {mean_loss:.4f}")

In [33]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

train(model, optimizer, xentropy, train_loader, 20)

Epoch 1/20, Loss: 0.6061
Epoch 2/20, Loss: 0.4069
Epoch 3/20, Loss: 0.3639
Epoch 4/20, Loss: 0.3365
Epoch 5/20, Loss: 0.3146
Epoch 6/20, Loss: 0.2979
Epoch 7/20, Loss: 0.2841
Epoch 8/20, Loss: 0.2737
Epoch 9/20, Loss: 0.2623
Epoch 10/20, Loss: 0.2534
Epoch 11/20, Loss: 0.2453
Epoch 12/20, Loss: 0.2383
Epoch 13/20, Loss: 0.2299
Epoch 14/20, Loss: 0.2211
Epoch 15/20, Loss: 0.2161
Epoch 16/20, Loss: 0.2092
Epoch 17/20, Loss: 0.2029
Epoch 18/20, Loss: 0.1952
Epoch 19/20, Loss: 0.1923
Epoch 20/20, Loss: 0.1892


In [38]:
def evaluate_tm(model, data_loader, metric):
    model.eval()
    metric.reset()
    with torch.no_grad():
        for X_batch, y_batch in data_loader:
            # X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            metric.update(y_pred, y_batch)
            return metric.compute() 

In [39]:
evaluate_tm(model, valid_loader, accuracy)

tensor(0.8438, device='mps:0')

In [40]:
evaluate_tm(model, train_loader, accuracy)

tensor(0.9375, device='mps:0')

In [42]:
model.eval()
X_new, y_new = next(iter(valid_loader))
X_new = X_new[:3]
with torch.no_grad():
    y_pred_logits = model(X_new)
    y_pred = y_pred_logits.argmax(dim=1)

In [43]:
y_pred

tensor([7, 4, 2])

In [44]:
import torch.nn.functional as F

y_proba = F.softmax(y_pred_logits, dim=1)
y_proba.round(decimals=3)

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.9850, 0.0000,
         0.0150],
        [0.0000, 0.0000, 0.0140, 0.0000, 0.9850, 0.0000, 0.0010, 0.0000, 0.0000,
         0.0000],
        [0.0000, 0.0000, 0.9480, 0.0000, 0.0340, 0.0000, 0.0180, 0.0000, 0.0000,
         0.0000]])

In [45]:
import optuna

def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
    n_hidden = trial.suggest_int('n_hidden', 20, 300)
    model = ImageClassifier(1 * 28 * 28, n_hidden, n_hidden, 10)
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    xentropy = nn.CrossEntropyLoss()
    train(model, optimizer, xentropy, train_loader, 20)
    return evaluate_tm(model, valid_loader, accuracy)

In [46]:
torch.manual_seed(42)
sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction="maximize",
sampler=sampler)
study.optimize(objective, n_trials=5)

[I 2025-10-31 22:48:10,950] A new study created in memory with name: no-name-79d54f3e-f882-4822-aec7-b7b3339f908a


Epoch 1/20, Loss: 2.2769
Epoch 2/20, Loss: 2.2093
Epoch 3/20, Loss: 2.1164
Epoch 4/20, Loss: 1.9776
Epoch 5/20, Loss: 1.7867
Epoch 6/20, Loss: 1.5775
Epoch 7/20, Loss: 1.3979
Epoch 8/20, Loss: 1.2605
Epoch 9/20, Loss: 1.1573
Epoch 10/20, Loss: 1.0782
Epoch 11/20, Loss: 1.0162
Epoch 12/20, Loss: 0.9665
Epoch 13/20, Loss: 0.9258
Epoch 14/20, Loss: 0.8918
Epoch 15/20, Loss: 0.8629
Epoch 16/20, Loss: 0.8382
Epoch 17/20, Loss: 0.8165
Epoch 18/20, Loss: 0.7974
Epoch 19/20, Loss: 0.7803


[I 2025-10-31 22:49:31,216] Trial 0 finished with value: 0.75 and parameters: {'learning_rate': 0.00031489116479568613, 'n_hidden': 287}. Best is trial 0 with value: 0.75.


Epoch 20/20, Loss: 0.7649
Epoch 1/20, Loss: 1.1392
Epoch 2/20, Loss: 0.6219
Epoch 3/20, Loss: 0.5251
Epoch 4/20, Loss: 0.4826
Epoch 5/20, Loss: 0.4571
Epoch 6/20, Loss: 0.4404
Epoch 7/20, Loss: 0.4237
Epoch 8/20, Loss: 0.4120
Epoch 9/20, Loss: 0.4019
Epoch 10/20, Loss: 0.3921
Epoch 11/20, Loss: 0.3840
Epoch 12/20, Loss: 0.3743
Epoch 13/20, Loss: 0.3665
Epoch 14/20, Loss: 0.3594
Epoch 15/20, Loss: 0.3520
Epoch 16/20, Loss: 0.3457
Epoch 17/20, Loss: 0.3399
Epoch 18/20, Loss: 0.3342
Epoch 19/20, Loss: 0.3280


[I 2025-10-31 22:50:41,586] Trial 1 finished with value: 0.875 and parameters: {'learning_rate': 0.008471801418819975, 'n_hidden': 188}. Best is trial 1 with value: 0.875.


Epoch 20/20, Loss: 0.3226
Epoch 1/20, Loss: 2.3091
Epoch 2/20, Loss: 2.3005
Epoch 3/20, Loss: 2.2929
Epoch 4/20, Loss: 2.2859
Epoch 5/20, Loss: 2.2792
Epoch 6/20, Loss: 2.2726
Epoch 7/20, Loss: 2.2662
Epoch 8/20, Loss: 2.2598
Epoch 9/20, Loss: 2.2534
Epoch 10/20, Loss: 2.2469
Epoch 11/20, Loss: 2.2402
Epoch 12/20, Loss: 2.2332
Epoch 13/20, Loss: 2.2256
Epoch 14/20, Loss: 2.2177
Epoch 15/20, Loss: 2.2094
Epoch 16/20, Loss: 2.2006
Epoch 17/20, Loss: 2.1913
Epoch 18/20, Loss: 2.1813
Epoch 19/20, Loss: 2.1705


[I 2025-10-31 22:51:41,892] Trial 2 finished with value: 0.3125 and parameters: {'learning_rate': 4.207988669606632e-05, 'n_hidden': 63}. Best is trial 1 with value: 0.875.


Epoch 20/20, Loss: 2.1592
Epoch 1/20, Loss: 2.3102
Epoch 2/20, Loss: 2.3072
Epoch 3/20, Loss: 2.3042
Epoch 4/20, Loss: 2.3012
Epoch 5/20, Loss: 2.2983
Epoch 6/20, Loss: 2.2954
Epoch 7/20, Loss: 2.2925
Epoch 8/20, Loss: 2.2897
Epoch 9/20, Loss: 2.2868
Epoch 10/20, Loss: 2.2840
Epoch 11/20, Loss: 2.2812
Epoch 12/20, Loss: 2.2784
Epoch 13/20, Loss: 2.2757
Epoch 14/20, Loss: 2.2729
Epoch 15/20, Loss: 2.2701
Epoch 16/20, Loss: 2.2672
Epoch 17/20, Loss: 2.2644
Epoch 18/20, Loss: 2.2615
Epoch 19/20, Loss: 2.2586


[I 2025-10-31 22:52:57,198] Trial 3 finished with value: 0.28125 and parameters: {'learning_rate': 1.7073967431528103e-05, 'n_hidden': 263}. Best is trial 1 with value: 0.875.


Epoch 20/20, Loss: 2.2557
Epoch 1/20, Loss: 1.8684
Epoch 2/20, Loss: 0.9865
Epoch 3/20, Loss: 0.7694
Epoch 4/20, Loss: 0.6826
Epoch 5/20, Loss: 0.6248
Epoch 6/20, Loss: 0.5834
Epoch 7/20, Loss: 0.5518
Epoch 8/20, Loss: 0.5279
Epoch 9/20, Loss: 0.5084
Epoch 10/20, Loss: 0.4928
Epoch 11/20, Loss: 0.4800
Epoch 12/20, Loss: 0.4692
Epoch 13/20, Loss: 0.4604
Epoch 14/20, Loss: 0.4532
Epoch 15/20, Loss: 0.4459
Epoch 16/20, Loss: 0.4394
Epoch 17/20, Loss: 0.4342
Epoch 18/20, Loss: 0.4288
Epoch 19/20, Loss: 0.4240


[I 2025-10-31 22:54:06,647] Trial 4 finished with value: 0.78125 and parameters: {'learning_rate': 0.002537815508265664, 'n_hidden': 218}. Best is trial 1 with value: 0.875.


Epoch 20/20, Loss: 0.4200
