In [1]:
from torch import nn
from torch.nn import functional as F
import torch

from torchvision.datasets import MNIST
from torchvision import transforms

from torch.utils.data import DataLoader, random_split

from torch import optim

device = torch.device(0)
print(device)

cuda:0


In [None]:
class ANN1(nn.Module):
  def __init__(self, input_size: int, hidden1: int, hidden2: int, output_size: int):
    super().__init__()
    self.flattened = nn.Flatten()
    self.fc1 = nn.Linear(input_size, hidden1)
    self.fc2 = nn.Linear(hidden1, hidden2)
    self.fc3 = nn.Linear(hidden2, output_size)
  
  def forward(self, x):
    x = self.flattened(x)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x  

In [None]:
class ANN2(nn.Module):
  def __init__(self, input_size: int, output_size: int):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
      nn.Linear(input_size, 512),
      nn.ReLU(),
      nn.Linear(512, 512),
      nn.ReLU(),
      nn.Linear(512, output_size)
		)

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
model = ANN2(28*28, 10).to("cuda")
print(model)

ANN2(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [10]:
model = ANN1(input_size=28*28, hidden1=32, hidden2=32, output_size=10)

In [32]:
transoform_pipeline = transforms.Compose([transforms.ToTensor(),
                                         transforms.Normalize(mean=(0.5), std=(0.5))])

train_size, val_size = 50000, 10000

full_train_data = MNIST(root='./data', train=True, download=True, transform=transoform_pipeline)
test_data = MNIST(root='./data', train=False, download=True, transform=transoform_pipeline)
train_data, val_data = random_split(full_train_data, [train_size, val_size])

train_loader = DataLoader(train_data, shuffle=True, batch_size=64,
                          num_workers=1)
val_loader = DataLoader(val_data, batch_size=64, shuffle=True,
                        num_workers=1)
test_loader = DataLoader(test_data, shuffle=True, batch_size=64,
                         num_workers=1)


In [31]:
val_loader

<torch.utils.data.dataloader.DataLoader at 0x7d39fd1e96f0>

In [33]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 5
for epoch in range(num_epochs):
	# ---- Training ----
	model.train()
	train_loss = 0
	for images, labels in train_loader:
		optimizer.zero_grad()
		outputs = model(images)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()
		train_loss += loss.item()
	
	# ---- Validation ----
	model.eval()
	val_loss = 0
	correct = 0
	with torch.no_grad():
		for images, labels in val_loader:
			outputs = model(images)
			loss = criterion(outputs, labels)
			val_loss += loss.item()
			preds = outputs.argmax(dim=1)
			correct += (preds == labels).sum().item()
	
	val_acc = correct / len(val_data)
	print(f"Epoch {epoch+1}: Train Loss={train_loss/len(train_loader):.4f}, "
				f"Val Loss={val_loss/len(val_loader):.4f}, Val Acc={val_acc:.4f}")
    

Epoch 1: Train Loss=0.2192, Val Loss=0.1985, Val Acc=0.9418
Epoch 2: Train Loss=0.1886, Val Loss=0.1836, Val Acc=0.9441
Epoch 3: Train Loss=0.1670, Val Loss=0.1642, Val Acc=0.9490
Epoch 4: Train Loss=0.1523, Val Loss=0.1626, Val Acc=0.9497
Epoch 5: Train Loss=0.1417, Val Loss=0.1601, Val Acc=0.9527
