# Clasificación de Imágenes con PyTorch y MLP

Adapto el modelo simple para que se pueda usar con el dataset de Split_smol y le agrego un par de modificaciones para complejizarlo y volverlo más eficiente.

In [4]:
# Clasificación de imágenes con MLP y PyTorch

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import ToTensor
from torch.utils.tensorboard import SummaryWriter
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchvision.datasets import CIFAR10
from torchvision import transforms
from PIL import Image
import numpy as np
import os

from pathlib import Path
import pandas as pd
from matplotlib import pyplot as plt

def get_class(x):
    return str(x.parent).split("/")[-1]

# Transformaciones con Albumentations
def get_transforms():
    return A.Compose([
        A.Resize(64, 64),
        A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        ToTensorV2()
    ])

class AlbumentationsDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe.reset_index(drop=True)
        self.transform = transform
        # Create a mapping from class name to integer label
        self.class_to_idx = {cls: idx for idx, cls in enumerate(sorted(self.dataframe['class'].unique()))}

    def __getitem__(self, index):
        row = self.dataframe.iloc[index]
        img = np.array(Image.open(row['path']).convert('RGB'))
        label = self.class_to_idx[row['class']]
        if self.transform:
            img = self.transform(image=img)['image']
        return img, label

    def __len__(self):
        return len(self.dataframe)

# Modelo MLP con BatchNorm y Dropout
class MLP(nn.Module):
    def __init__(self, in_features=3*64*64, num_classes=10):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        return self.net(x)

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.zeros_(m.bias)

# Entrenamiento
def train(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Validación
def evaluate(model, loader, criterion, device):
    model.eval()
    correct, total_loss = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            output = model(x)
            loss = criterion(output, y)
            total_loss += loss.item()
            preds = output.argmax(1)
            correct += (preds == y).sum().item()
    acc = correct / len(loader.dataset)
    return total_loss / len(loader), acc

# Main
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    transform = get_transforms()

    # train_base = CIFAR10(root='./data', train=True, download=True)
    # test_base = CIFAR10(root='./data', train=False, download=True)
    data_dir = r'data/Split_smol/train/'
    p = Path(data_dir).glob('**/*')
    files = [(x, get_class(x), Image.open(x).size,Image.open(x)) for x in p if x.is_file()]
    train_base = pd.DataFrame(files, columns=["path", "class", "resolution","data"])
    data_dir = r'data/Split_smol/val/'
    p = Path(data_dir).glob('**/*')
    files = [(x, get_class(x), Image.open(x).size,Image.open(x)) for x in p if x.is_file()]
    test_base = pd.DataFrame(files, columns=["path", "class", "resolution","data"])

    train_dataset = AlbumentationsDataset(train_base, transform)
    test_dataset = AlbumentationsDataset(test_base, transform)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=64)

    model = MLP().to(device)
    model.init_weights()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

    writer = SummaryWriter()

    for epoch in range(10):
        train_loss = train(model, train_loader, optimizer, criterion, device)
        val_loss, val_acc = evaluate(model, test_loader, criterion, device)

        print(f"Epoch {epoch+1}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}, val_acc={val_acc:.4f}")
        writer.add_scalar('Loss/Train', train_loss, epoch)
        writer.add_scalar('Loss/Validation', val_loss, epoch)
        writer.add_scalar('Accuracy/Validation', val_acc, epoch)

        for name, param in model.named_parameters():
            writer.add_histogram(name, param, epoch)

    writer.close()

main()

Epoch 1: train_loss=2.2254, val_loss=1.9371, val_acc=0.4420
Epoch 2: train_loss=1.7873, val_loss=1.6101, val_acc=0.4807
Epoch 3: train_loss=1.6657, val_loss=1.4576, val_acc=0.4751
Epoch 4: train_loss=1.4957, val_loss=1.3797, val_acc=0.5193
Epoch 5: train_loss=1.3663, val_loss=1.3160, val_acc=0.5028
Epoch 6: train_loss=1.2932, val_loss=1.3240, val_acc=0.4972
Epoch 7: train_loss=1.2585, val_loss=1.2185, val_acc=0.5359
Epoch 8: train_loss=1.2113, val_loss=1.1601, val_acc=0.5967
Epoch 9: train_loss=1.0887, val_loss=1.1198, val_acc=0.5691
Epoch 10: train_loss=1.0403, val_loss=1.1311, val_acc=0.5580


In [None]:
# %load_ext tensorboard