# Clasificación de imágenes con PyTorch y MLP

## Modelo Simple

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import mlflow
from torch.utils.tensorboard import SummaryWriter
import torchvision.utils as vutils

from helper import *

In [2]:
# Entrenamiento
def train_model(device, model, train_loader, criterion, optimizer, epoch, n_epochs=10):
	model.train()
	running_loss = 0.0
	correct, total = 0, 0

	for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{n_epochs}"):
		images, labels = images.to(device), labels.to(device)

		optimizer.zero_grad()
		outputs = model(images)
		loss = criterion(outputs, labels)
		loss.backward()
		optimizer.step()

		running_loss += loss.item()
		_, preds = torch.max(outputs, 1)
		correct += (preds == labels).sum().item()
		total += labels.size(0)

	train_loss = running_loss / len(train_loader)
	train_acc = 100.0 * correct / total

	return train_loss, train_acc

In [3]:
# Validación
def evaluate_model(device, model, loader, criterion, writer, train_dataset, epoch=None, prefix="val"):
	log_classification_report(device, model, loader, writer, epoch, train_dataset, prefix)
	model.eval()
	correct, total, loss_sum = 0, 0, 0.0

	all_preds = []
	all_labels = []

	with torch.no_grad():
		for i, (images, labels) in enumerate(loader):
			images, labels = images.to(device), labels.to(device)
			outputs = model(images)
			loss = criterion(outputs, labels)
			_, preds = torch.max(outputs, 1)

			loss_sum += loss.item()
			correct += (preds == labels).sum().item()
			total += labels.size(0)

			all_preds.extend(preds.cpu().numpy())
			all_labels.extend(labels.cpu().numpy())

			# Loguear imágenes del primer batch
			if i == 0 and epoch is not None:
				img_grid = vutils.make_grid(images[:8].cpu(), normalize=True)
				writer.add_image(f"{prefix}/images", img_grid, global_step=epoch)

	acc = 100.0 * correct / total
	avg_loss = loss_sum / len(loader)

	if epoch is not None:
		writer.add_scalar(f"{prefix}/loss", avg_loss, epoch)
		writer.add_scalar(f"{prefix}/accuracy", acc, epoch)

	return avg_loss, acc

In [4]:
def train_and_validate(device, model, writer, train_loader, val_loader, criterion, optimizer,
                       train_dir, val_dir, train_dataset, n_epochs=10, batch_size=32):
	with mlflow.start_run():
		# Log hiperparámetros
		mlflow.log_params({
			"model": "MLPClassifier",
			"input_size": 64*64*3,
			"batch_size": batch_size,
			"lr": 1e-3,
			"epochs": n_epochs,
			"optimizer": "Adam",
			"loss_fn": "CrossEntropyLoss",
			"train_dir": train_dir,
			"val_dir": val_dir,
		})
		for epoch in range(n_epochs):
			train_loss, train_acc = train_model(device, model, train_loader, criterion, optimizer, epoch, n_epochs)
			val_loss, val_acc = evaluate_model(device, model, val_loader, criterion, writer, train_dataset, epoch, prefix="val")

			print(f"Epoch {epoch+1}:")
			print(f"  Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%")
			print(f"  Val   Loss: {val_loss:.4f}, Accuracy: {val_acc:.2f}%")

			writer.add_scalar("train/loss", train_loss, epoch)
			writer.add_scalar("train/accuracy", train_acc, epoch)

			# Log en MLflow
			mlflow.log_metrics({
				"train_loss": train_loss,
				"train_accuracy": train_acc,
				"val_loss": val_loss,
				"val_accuracy": val_acc
			}, step=epoch)

		# Guardar modelo
		torch.save(model.state_dict(), "mlp_model.pth")
		print("Modelo guardado como 'mlp_model.pth'")
		mlflow.log_artifact("mlp_model.pth")
		mlflow.pytorch.log_model(model, artifact_path="pytorch_model")
		print("Modelo guardado como 'mlp_model.pth'")

	return train_loss, train_acc, val_loss, val_acc

In [5]:
# Modelo simple
class MLPClassifier(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.ReLU(),
			nn.Linear(512, 128),
			nn.ReLU(),
			nn.Linear(128, num_classes)
		)

	def forward(self, x):
		return self.model(x)

In [6]:
# Paths
train_dir = "../data/Split_smol/train"
val_dir = "../data/Split_smol/val/"

# Transformaciones
train_transform = A.Compose([
	A.Resize(64, 64),
	A.HorizontalFlip(p=0.5),
	A.RandomBrightnessContrast(p=0.2),
	A.Normalize(),
	ToTensorV2()
])

val_test_transform = A.Compose([
	A.Resize(64, 64),
	A.Normalize(),
	ToTensorV2()
])

train_dataset = CustomImageDataset(train_dir, transform=train_transform)
val_dataset   = CustomImageDataset(val_dir, transform=val_test_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size)

In [7]:
# Loop de entrenamiento y validación
n_epochs = 10

# End any active MLflow run before starting a new one
if mlflow.active_run() is not None:
	mlflow.end_run()
mlflow.set_experiment("MLP_Clasificador_Imagenes")

# Crear directorio de logs
log_dir = "runs/mlp_experimento_1"
writer = SummaryWriter(log_dir=log_dir)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(set(train_dataset.labels))

model = MLPClassifier(num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

train_and_validate(device, model, writer, train_loader, val_loader, criterion, optimizer, train_dir, val_dir, train_dataset, n_epochs=n_epochs, batch_size=batch_size)

Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  4.08it/s]


Epoch 1:
  Train Loss: 3.0769, Accuracy: 24.10%
  Val   Loss: 2.1521, Accuracy: 34.25%


Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  4.27it/s]


Epoch 2:
  Train Loss: 1.8779, Accuracy: 43.33%
  Val   Loss: 1.7341, Accuracy: 43.65%


Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  4.29it/s]


Epoch 3:
  Train Loss: 1.4187, Accuracy: 47.35%
  Val   Loss: 1.4304, Accuracy: 50.83%


Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  4.37it/s]


Epoch 4:
  Train Loss: 1.3427, Accuracy: 52.08%
  Val   Loss: 1.2475, Accuracy: 53.04%


Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  4.24it/s]


Epoch 5:
  Train Loss: 1.1715, Accuracy: 56.96%
  Val   Loss: 1.3846, Accuracy: 51.38%


Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  4.34it/s]


Epoch 6:
  Train Loss: 1.0876, Accuracy: 59.83%
  Val   Loss: 1.5588, Accuracy: 46.96%


Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  4.39it/s]


Epoch 7:
  Train Loss: 1.0860, Accuracy: 60.55%
  Val   Loss: 1.2791, Accuracy: 59.12%


Epoch 8/10: 100%|██████████| 22/22 [00:04<00:00,  4.41it/s]


Epoch 8:
  Train Loss: 0.9512, Accuracy: 63.85%
  Val   Loss: 1.1850, Accuracy: 54.14%


Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  4.36it/s]


Epoch 9:
  Train Loss: 0.9200, Accuracy: 65.14%
  Val   Loss: 1.3597, Accuracy: 57.46%


Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.93it/s]


Epoch 10:
  Train Loss: 0.9669, Accuracy: 63.70%
  Val   Loss: 1.2550, Accuracy: 53.59%
Modelo guardado como 'mlp_model.pth'




Modelo guardado como 'mlp_model.pth'


(0.9669482057744806, 63.70157819225251, 1.2550140917301178, 53.591160220994475)

In [None]:
# model = MLPClassifier(num_classes=10)
# model.load_state_dict(torch.load("mlp_model.pth"))
# model.eval()  # Para inferencia

%load_ext tensorboard
%tensorboard --logdir=runs/mlp_experimento_1

## Actividades de modificación

In [None]:
import torch
import torch.nn as nn

class MLPClassifierComplete(nn.Module):
    """
	MLPClassifier con opciones avanzadas:
    - Dropout
	- Batch Normalization
    - Weight Initialization (He or Xavier)
	"""
    def __init__(self, input_size=64*64*3, num_classes=10, use_dropout=False, dropout_p=0.5, use_batchnorm=False, init_type=None):
        super().__init__()
        layers = [nn.Flatten()]
        
        # Primera capa
        layers.append(nn.Linear(input_size, 512))
        if use_batchnorm:
            layers.append(nn.BatchNorm1d(512))
        layers.append(nn.ReLU())
        if use_dropout:
            layers.append(nn.Dropout(dropout_p))
        
        # Segunda capa
        layers.append(nn.Linear(512, 128))
        if use_batchnorm:
            layers.append(nn.BatchNorm1d(128))
        layers.append(nn.ReLU())
        if use_dropout:
            layers.append(nn.Dropout(dropout_p))
        
        # Salida
        layers.append(nn.Linear(128, num_classes))
        
        self.model = nn.Sequential(*layers)
        
        # Inicialización de pesos
        if init_type is not None:
            self.init_weights(init_type)
    
    def forward(self, x):
        return self.model(x)
    
    def init_weights(self, init_type):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if init_type == 'he':
                    nn.init.kaiming_normal_(m.weight)
                elif init_type == 'xavier':
                    nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

In [None]:
variantes = [
	"simple",
	# "pocas_imagenes",
	# "grises",
	"dropout",
	"batchnorm",
	"batchnorm_dropout",
	"weight_decay",
	"augmentation",
	"init_he",
	"init_xavier",
	"histogramas"
]

# Loop de entrenamiento y validación
n_epochs = 10

# End any active MLflow run before starting a new one
if mlflow.active_run() is not None:
	mlflow.end_run()
mlflow.set_experiment("MLP_Clasificador_Imagenes_Adv")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(set(train_dataset.labels))

for var in variantes:
	print(f"Entrenando variante: {var}")
	
	if var == "base" or var == "weight_decay" or var == "augmentation" or var == "histogramas":
		model = MLPClassifierComplete(num_classes=num_classes, use_dropout=False, use_batchnorm=False).to(device)
	# elif var == "grises":
	# elif var == "pocas_imagenes":
	elif var == "init_he":
		model = MLPClassifierComplete(init_type='he', num_classes=num_classes).to(device)
	elif var == "init_xavier":
		model = MLPClassifierComplete(init_type='xavier', num_classes=num_classes).to(device)
	elif var == "dropout":
		model = MLPClassifierComplete(num_classes=num_classes, use_dropout=True, dropout_p=0.5).to(device)
	elif var == "batchnorm":
		model = MLPClassifierComplete(num_classes=num_classes, use_batchnorm=True).to(device)
	elif var == "batchnorm_dropout":
		model = MLPClassifierComplete(num_classes=num_classes, use_dropout=True, dropout_p=0.5, use_batchnorm=True).to(device)
	else:
		raise ValueError(f"Variante '{var}' no reconocida.")

	if var == "weight_decay":
		optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
	else:
		optimizer = optim.Adam(model.parameters(), lr=1e-3)
	if var == "augmentation":
		train_transform_adv = A.Compose([
			A.Resize(64, 64),
			A.HorizontalFlip(p=0.5),
			# A.VerticalFlip(p=0.5),
			A.RandomBrightnessContrast(p=0.2),
			A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
			# A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
			A.Normalize(),
			ToTensorV2()
		])
		train_dataset_adv = CustomImageDataset(train_dir, transform=train_transform_adv)
		train_loader_t = DataLoader(train_dataset_adv, batch_size=batch_size, shuffle=True)
	else:
		train_loader_t = train_loader

	criterion = nn.CrossEntropyLoss()

	writer = SummaryWriter(log_dir=f"runs/experimento_{var}")

	with mlflow.start_run(run_name=f"MLP_{var}"):
		# Log hiperparámetros
		mlflow.log_params({
			"model": "MLPClassifierComplete",
			"input_size": 64*64*3,
			"batch_size": batch_size,
			"lr": 1e-3,
			"epochs": n_epochs,
			"optimizer": "Adam",
			"loss_fn": "CrossEntropyLoss",
			"train_dir": train_dir,
			"val_dir": val_dir,
		})
		for epoch in range(n_epochs):
			train_loss, train_acc = train_model(device, model, train_loader, criterion, optimizer, epoch, n_epochs)
			val_loss, val_acc = evaluate_model(device, model, val_loader, criterion, writer, train_dataset, epoch, prefix="val")

			writer.add_scalar("train/loss", train_loss, epoch)
			writer.add_scalar("train/accuracy", train_acc, epoch)

			# Log en MLflow
			mlflow.log_metrics({
				"train_loss": train_loss,
				"train_accuracy": train_acc,
				"val_loss": val_loss,
				"val_accuracy": val_acc
			}, step=epoch)
			if var == "histogramas":
				# Histogramas
				for name, param in model.named_parameters():
					writer.add_histogram(name, param, epoch)
		
		torch.save(model.state_dict(), f"models/mlp_{var}.pth")
		mlflow.log_param("variante", var)
		mlflow.log_artifact(f"models/mlp_{var}.pth")
	
	writer.close()
	print(f"Variante {var} entrenada y guardada.\n")

Entrenando variante: base


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.82it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.82it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.98it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.95it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.95it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.92it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.94it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.93it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.92it/s]


Variante base entrenada y guardada.

Entrenando variante: dropout


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.86it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.91it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.88it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  4.04it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  4.08it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.95it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.85it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.94it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.82it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.90it/s]


Variante dropout entrenada y guardada.

Entrenando variante: batchnorm


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.86it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.81it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.93it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  4.09it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.88it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.75it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.91it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  4.10it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  4.09it/s]


Variante batchnorm entrenada y guardada.

Entrenando variante: batchnorm_dropout


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  4.11it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.96it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.92it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.99it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.89it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.78it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.89it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.81it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.78it/s]


Variante batchnorm_dropout entrenada y guardada.

Entrenando variante: weight_decay


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.70it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.80it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.83it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.94it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.99it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  4.00it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.67it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.85it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.89it/s]
  original_init(self, **validated_kwargs)


Variante weight_decay entrenada y guardada.

Entrenando variante: augmentation


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.82it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.78it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  3.87it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.83it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.84it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.82it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.81it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.70it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.77it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.79it/s]


Variante augmentation entrenada y guardada.

Entrenando variante: init_he


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.73it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.91it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  4.07it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.92it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  4.12it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  4.08it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.72it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.99it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  4.07it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  4.06it/s]


Variante init_he entrenada y guardada.

Entrenando variante: init_xavier


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  3.84it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  4.02it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  4.04it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  4.03it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  3.86it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.80it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.85it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.92it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.91it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.94it/s]


Variante init_xavier entrenada y guardada.

Entrenando variante: histogramas


Epoch 1/10: 100%|██████████| 22/22 [00:05<00:00,  4.00it/s]
Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.87it/s]
Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  4.02it/s]
Epoch 4/10: 100%|██████████| 22/22 [00:05<00:00,  3.96it/s]
Epoch 5/10: 100%|██████████| 22/22 [00:06<00:00,  3.47it/s]
Epoch 6/10: 100%|██████████| 22/22 [00:06<00:00,  3.53it/s]
Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.74it/s]
Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]
Epoch 9/10: 100%|██████████| 22/22 [00:05<00:00,  3.98it/s]
Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.96it/s]


Variante histogramas entrenada y guardada.



In [None]:
# mlflow      -> http://localhost:5000
# tensorboard -> http://localhost:6006

!mlflow ui
# load_ext tensorboard
!tensorboard --logdir=runs