# Clasificación de Imágenes con PyTorch y MLP

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torchvision.transforms import ToTensor
from albumentations.pytorch import ToTensorV2
import albumentations as A
import numpy as np
from PIL import Image
from torch.utils.tensorboard import SummaryWriter

In [2]:
class AlbumentationsDataset(torch.utils.data.Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __getitem__(self, index):
        img, label = self.dataset[index]
        img = np.array(img)
        if self.transform:
            img = self.transform(image=img)['image']
        return img, label

    def __len__(self):
        return len(self.dataset)

transform = A.Compose([
    A.Resize(64, 64),
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
])

In [3]:
class SimpleMLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )

    def forward(self, x):
        return self.net(x)

In [4]:
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader, criterion, device):
    model.eval()
    correct, total_loss = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = criterion(output, y)
            total_loss += loss.item()
            preds = output.argmax(1)
            correct += (preds == y).sum().item()
    acc = correct / len(loader.dataset)
    return total_loss / len(loader), acc

Uso un dataset de prueba (CIFAR-10) para probar la clasificación de imágenes con PyTorch y MLP simple. Se podría aplicar al dataset de Split_smol.

In [5]:
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    base_train = CIFAR10(root='./data', train=True, download=True)
    base_test = CIFAR10(root='./data', train=False, download=True)

    train_dataset = AlbumentationsDataset(base_train, transform)
    test_dataset = AlbumentationsDataset(base_test, transform)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64)

    input_size = 3 * 64 * 64
    hidden_size = 256
    num_classes = 10

    model = SimpleMLP(input_size, hidden_size, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    writer = SummaryWriter()

    for epoch in range(10):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = evaluate(model, test_loader, criterion, device)

        print(f"Epoch {epoch+1}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")
        writer.add_scalar("Loss/Train", train_loss, epoch)
        writer.add_scalar("Loss/Validation", val_loss, epoch)
        writer.add_scalar("Accuracy/Validation", val_acc, epoch)

    writer.close()

main()

100%|██████████| 170M/170M [01:04<00:00, 2.64MB/s] 


Epoch 1: train_loss=1.7815, val_loss=1.5877, val_acc=0.4538
Epoch 2: train_loss=1.5412, val_loss=1.5496, val_acc=0.4600
Epoch 3: train_loss=1.4875, val_loss=1.5488, val_acc=0.4604
Epoch 4: train_loss=1.4488, val_loss=1.6101, val_acc=0.4618
Epoch 5: train_loss=1.4189, val_loss=1.5882, val_acc=0.4643
Epoch 6: train_loss=1.3781, val_loss=1.5971, val_acc=0.4691
Epoch 7: train_loss=1.3533, val_loss=1.4963, val_acc=0.5028
Epoch 8: train_loss=1.3269, val_loss=1.5489, val_acc=0.4973
Epoch 9: train_loss=1.2856, val_loss=1.5591, val_acc=0.4997
Epoch 10: train_loss=1.2699, val_loss=1.5751, val_acc=0.4906


In [7]:
# %load_ext tensorboard