# Clasificación de imágenes con PyTorch y MLP

## Modelo Simple

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import io

In [2]:
import mlflow
import mlflow.pytorch

In [None]:
# End any active MLflow run before starting a new one
if mlflow.active_run() is not None:
	mlflow.end_run()
mlflow.set_experiment("MLP_Clasificador_Imagenes")

2025/06/18 04:07:07 INFO mlflow.tracking.fluent: Experiment with name 'MLP_Clasificador_Imagenes' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:/Users/valen/Storage/ITBA/20251Q/Redes/test/old/mlruns/503415633472286498', creation_time=1750230427408, experiment_id='503415633472286498', last_update_time=1750230427408, lifecycle_stage='active', name='MLP_Clasificador_Imagenes', tags={}>

In [4]:
from torch.utils.tensorboard import SummaryWriter
import torchvision.utils as vutils

In [None]:
# Función para loguear una figura matplotlib en TensorBoard
def plot_to_tensorboard(fig, writer, tag, step):
	buf = io.BytesIO()
	fig.savefig(buf, format='png')
	buf.seek(0)
	image = Image.open(buf).convert("RGB")
	image = np.array(image)
	image = torch.tensor(image).permute(2, 0, 1) / 255.0
	writer.add_image(tag, image, global_step=step)
	plt.close(fig)

In [None]:
# Función para matriz de confusión y clasificación
def log_classification_report(model, loader, writer, step, prefix="val"):
	model.eval()
	all_preds = []
	all_labels = []

	with torch.no_grad():
		for images, labels in loader:
			images = images.to(device)
			outputs = model(images)
			_, preds = torch.max(outputs, 1)
			all_preds.extend(preds.cpu().numpy())
			all_labels.extend(labels.numpy())

	# Confusion matrix
	cm = confusion_matrix(all_labels, all_preds)
	fig_cm, ax = plt.subplots(figsize=(6, 6))
	disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=train_dataset.label_encoder.classes_)
	disp.plot(ax=ax, cmap='Blues', xticks_rotation=45)
	ax.set_title(f'{prefix.title()} - Confusion Matrix')

	# Guardar localmente y subir a MLflow
	fig_path = f"confusion_matrix_{prefix}_epoch_{step}.png"
	fig_cm.savefig(fig_path)
	mlflow.log_artifact(fig_path)
	os.remove(fig_path)

	plot_to_tensorboard(fig_cm, writer, f"{prefix}/confusion_matrix", step)

	cls_report = classification_report(all_labels, all_preds, target_names=train_dataset.label_encoder.classes_)
	writer.add_text(f"{prefix}/classification_report", f"<pre>{cls_report}</pre>", step)

	# También loguear texto del reporte
	with open(f"classification_report_{prefix}_epoch_{step}.txt", "w") as f:
		f.write(cls_report)
	mlflow.log_artifact(f.name)
	os.remove(f.name)


In [7]:
# Crear directorio de logs
log_dir = "runs/mlp_experimento_1"
writer = SummaryWriter(log_dir=log_dir)


In [None]:
class CustomImageDataset(Dataset):
	def __init__(self, root_dir, transform=None):
		self.root_dir = root_dir
		self.transform = transform

		self.image_paths = []
		self.labels = []

		class_names = sorted(os.listdir(root_dir))
		self.class_to_idx = {cls: idx for idx, cls in enumerate(class_names)}

		for cls in class_names:
			cls_dir = os.path.join(root_dir, cls)
			for fname in os.listdir(cls_dir):
				if fname.lower().endswith((".png", ".jpg", ".jpeg")):
					self.image_paths.append(os.path.join(cls_dir, fname))
					self.labels.append(cls)

		self.label_encoder = LabelEncoder()
		self.labels = self.label_encoder.fit_transform(self.labels)

	def __len__(self):
		return len(self.image_paths)

	def __getitem__(self, idx):
		image = np.array(Image.open(self.image_paths[idx]).convert("RGB"))
		label = self.labels[idx]

		if self.transform:
			augmented = self.transform(image=image)
			image = augmented["image"]

		return image, label

In [None]:
train_transform = A.Compose([
	A.Resize(64, 64),
	A.HorizontalFlip(p=0.5),
	A.RandomBrightnessContrast(p=0.2),
	A.Normalize(),
	ToTensorV2()
])

In [None]:
val_test_transform = A.Compose([
	A.Resize(64, 64),
	A.Normalize(),
	ToTensorV2()
])

In [11]:
# Paths
train_dir = "data/Split_smol/train"
val_dir = "data/Split_smol/val/"

In [12]:
train_dataset = CustomImageDataset(train_dir, transform=train_transform)
val_dataset   = CustomImageDataset(val_dir, transform=val_test_transform)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size)

In [None]:
class MLPClassifier(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.ReLU(),
			nn.Linear(512, 128),
			nn.ReLU(),
			nn.Linear(128, num_classes)
		)

	def forward(self, x):
		return self.model(x)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = len(set(train_dataset.labels))
model = MLPClassifier(num_classes=num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# Entrenamiento y validación
def evaluate(model, loader, epoch=None, prefix="val"):
	log_classification_report(model, val_loader, writer, step=epoch, prefix="val")
	model.eval()
	correct, total, loss_sum = 0, 0, 0.0

	all_preds = []
	all_labels = []

	with torch.no_grad():
		for i, (images, labels) in enumerate(loader):
			images, labels = images.to(device), labels.to(device)
			outputs = model(images)
			loss = criterion(outputs, labels)
			_, preds = torch.max(outputs, 1)

			loss_sum += loss.item()
			correct += (preds == labels).sum().item()
			total += labels.size(0)

			all_preds.extend(preds.cpu().numpy())
			all_labels.extend(labels.cpu().numpy())

			# Loguear imágenes del primer batch
			if i == 0 and epoch is not None:
				img_grid = vutils.make_grid(images[:8].cpu(), normalize=True)
				writer.add_image(f"{prefix}/images", img_grid, global_step=epoch)

	acc = 100.0 * correct / total
	avg_loss = loss_sum / len(loader)

	if epoch is not None:
		writer.add_scalar(f"{prefix}/loss", avg_loss, epoch)
		writer.add_scalar(f"{prefix}/accuracy", acc, epoch)

	return avg_loss, acc

In [None]:
# Loop de entrenamiento
n_epochs = 10

# End any active MLflow run before starting a new one
# if mlflow.active_run() is not None:
#     mlflow.end_run()

with mlflow.start_run():
	# Log hiperparámetros
	mlflow.log_params({
		"model": "MLPClassifier",
		"input_size": 64*64*3,
		"batch_size": batch_size,
		"lr": 1e-3,
		"epochs": n_epochs,
		"optimizer": "Adam",
		"loss_fn": "CrossEntropyLoss",
		"train_dir": train_dir,
		"val_dir": val_dir,
	})
	for epoch in range(n_epochs):
		model.train()
		running_loss = 0.0
		correct, total = 0, 0

		for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{n_epochs}"):
			images, labels = images.to(device), labels.to(device)

			optimizer.zero_grad()
			outputs = model(images)
			loss = criterion(outputs, labels)
			loss.backward()
			optimizer.step()

			running_loss += loss.item()
			_, preds = torch.max(outputs, 1)
			correct += (preds == labels).sum().item()
			total += labels.size(0)

		train_loss = running_loss / len(train_loader)
		train_acc = 100.0 * correct / total
		val_loss, val_acc = evaluate(model, val_loader, epoch=epoch, prefix="val")

		print(f"Epoch {epoch+1}:")
		print(f"  Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.2f}%")
		print(f"  Val   Loss: {val_loss:.4f}, Accuracy: {val_acc:.2f}%")

		writer.add_scalar("train/loss", train_loss, epoch)
		writer.add_scalar("train/accuracy", train_acc, epoch)

		# Log en MLflow
		mlflow.log_metrics({
			"train_loss": train_loss,
			"train_accuracy": train_acc,
			"val_loss": val_loss,
			"val_accuracy": val_acc
		}, step=epoch)

	# Guardar modelo
	torch.save(model.state_dict(), "mlp_model.pth")
	print("Modelo guardado como 'mlp_model.pth'")
	mlflow.log_artifact("mlp_model.pth")
	mlflow.pytorch.log_model(model, artifact_path="pytorch_model")
	print("Modelo guardado como 'mlp_model.pth'")

Epoch 1/10: 100%|██████████| 22/22 [00:13<00:00,  1.67it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Epoch 1:
  Train Loss: 2.8399, Accuracy: 26.11%
  Val   Loss: 1.8854, Accuracy: 37.57%


Epoch 2/10: 100%|██████████| 22/22 [00:05<00:00,  3.86it/s]


Epoch 2:
  Train Loss: 1.8681, Accuracy: 39.31%
  Val   Loss: 1.8871, Accuracy: 35.91%


Epoch 3/10: 100%|██████████| 22/22 [00:05<00:00,  4.01it/s]


Epoch 3:
  Train Loss: 1.5531, Accuracy: 45.05%
  Val   Loss: 1.3917, Accuracy: 50.28%


Epoch 4/10: 100%|██████████| 22/22 [00:06<00:00,  3.61it/s]


Epoch 4:
  Train Loss: 1.2979, Accuracy: 53.37%
  Val   Loss: 1.2783, Accuracy: 52.49%


Epoch 5/10: 100%|██████████| 22/22 [00:05<00:00,  4.15it/s]


Epoch 5:
  Train Loss: 1.1456, Accuracy: 55.81%
  Val   Loss: 1.3216, Accuracy: 47.51%


Epoch 6/10: 100%|██████████| 22/22 [00:05<00:00,  3.88it/s]


Epoch 6:
  Train Loss: 1.1186, Accuracy: 58.11%
  Val   Loss: 1.2882, Accuracy: 49.72%


Epoch 7/10: 100%|██████████| 22/22 [00:05<00:00,  3.85it/s]


Epoch 7:
  Train Loss: 1.0056, Accuracy: 58.82%
  Val   Loss: 1.1191, Accuracy: 61.33%


Epoch 8/10: 100%|██████████| 22/22 [00:05<00:00,  3.96it/s]


Epoch 8:
  Train Loss: 1.0012, Accuracy: 61.84%
  Val   Loss: 1.2247, Accuracy: 56.35%


Epoch 9/10: 100%|██████████| 22/22 [00:06<00:00,  3.59it/s]


Epoch 9:
  Train Loss: 0.9597, Accuracy: 64.56%
  Val   Loss: 1.1891, Accuracy: 57.46%


Epoch 10/10: 100%|██████████| 22/22 [00:05<00:00,  3.97it/s]


Epoch 10:
  Train Loss: 0.9509, Accuracy: 63.13%
  Val   Loss: 1.2390, Accuracy: 56.91%


In [27]:
%load_ext tensorboard
%tensorboard --logdir=runs/mlp_experimento_1

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 21764), started 0:01:14 ago. (Use '!kill 21764' to kill it.)

## Actividades de modificación

### 7. Regularización

MLP con Dropout

In [None]:
class MLPDropout(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10, p=0.5):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.ReLU(),
			nn.Dropout(p),
			nn.Linear(512, 128),
			nn.ReLU(),
			nn.Dropout(p),
			nn.Linear(128, num_classes)
		)

	def forward(self, x):
		return self.model(x)

MLP con BatchNorm

In [None]:
class MLPBatchNorm(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.BatchNorm1d(512),
			nn.ReLU(),
			nn.Linear(512, 128),
			nn.BatchNorm1d(128),
			nn.ReLU(),
			nn.Linear(128, num_classes)
		)

	def forward(self, x):
		return self.model(x)

MLP con BatchNorm y Dropout

In [None]:
class MLPBatchNormDropout(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10, p=0.5):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.BatchNorm1d(512),
			nn.ReLU(),
			nn.Dropout(p),
			nn.Linear(512, 128),
			nn.BatchNorm1d(128),
			nn.ReLU(),
			nn.Dropout(p),
			nn.Linear(128, num_classes)
		)

	def forward(self, x):
		return self.model(x)

# Hay que agregar:
# mlflow.log_param("batchnorm", True)
# mlflow.log_param("dropout_p", 0.5)

Optimizer con Weight Decay (L2)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

# Hay que agregar:
# mlflow.log_param("weight_decay", 1e-4)

Data Augmentation avanzado

In [None]:
train_transform_adv = A.Compose([
	A.Resize(64, 64),
	A.HorizontalFlip(p=0.5),
	# A.VerticalFlip(p=0.5),
	A.RandomBrightnessContrast(p=0.2),
	A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, p=0.5),
	# A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
	A.Normalize(),
	ToTensorV2()
])

train_dataset_adv = CustomImageDataset(train_dir, transform=train_transform_adv)
train_loader_adv = DataLoader(train_dataset_adv, batch_size=batch_size, shuffle=True)

  original_init(self, **validated_kwargs)


Early Stopping

In [None]:
best_val_loss = float('inf')
patience = 3
counter = 0

for epoch in range(n_epochs):
	# Train...
	val_loss, val_acc = evaluate(model, val_loader, epoch=epoch, prefix="val")
	if val_loss < best_val_loss:
		best_val_loss = val_loss
		counter = 0
		torch.save(model.state_dict(), "best_model.pth")
	else:
		counter += 1
		if counter >= patience:
			print(f"Early stopping en epoch {epoch+1}")
			break

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Early stopping en epoch 4


### 8. Inicialización de Parámetros

In [None]:
# Inicialización manual de pesos
class MLPInit(nn.Module):
	def __init__(self, input_size=64*64*3, num_classes=10, init_type='he'):
		super().__init__()
		self.model = nn.Sequential(
			nn.Flatten(),
			nn.Linear(input_size, 512),
			nn.ReLU(),
			nn.Linear(512, 128),
			nn.ReLU(),
			nn.Linear(128, num_classes)
		)
		self.init_weights(init_type)

	def forward(self, x):
		return self.model(x)

	def init_weights(self, init_type):
		for m in self.modules():
			if isinstance(m, nn.Linear):
				if init_type == 'he':
					nn.init.kaiming_normal_(m.weight)
				elif init_type == 'xavier':
					nn.init.xavier_uniform_(m.weight)
				if m.bias is not None:
					nn.init.zeros_(m.bias)

# Hay que agregar:
# mlflow.log_param("init_type", "he")

In [None]:
# Visualización de pesos
for name, param in model.named_parameters():
	if 'weight' in name:
		writer.add_histogram(name, param, epoch)

In [None]:
!mlflow ui # http://localhost:5000
!tensorboard --logdir=runs # http://localhost:6006

# import pandas as pd
# import matplotlib.pyplot as plt
# results = pd.read_csv("resultados_experimentos.csv")
# display(results.sort_values("val_accuracy", ascending=False))
# variantes = ["Base", "Dropout", "BatchNorm", "Dropout+BatchNorm", "Init He", "Init Xavier", "L2", "Augment", "Grises"]
# plt.figure(figsize=(10,5))
# plt.bar(variantes, results.sort_values("val_accuracy", ascending=False)["val_accuracy"], color='skyblue')
# plt.ylabel("Validation Accuracy")
# plt.title("Comparación de variantes MLP")
# plt.xticks(rotation=45)
# plt.show()

In [None]:
variantes = [
	"base",
	# "pocas_imagenes",
	# "grises",
	"dropout",
	"batchnorm",
	"batchnorm_dropout",
	"weight_decay",
	"augmentation",
	"init_he",
	"init_xavier",
	"histogramas"
]

for var in variantes:
	print(f"Entrenando variante: {var}")
	
	if var == "base" or var == "weight_decay" or var == "augmentation" or var == "histogramas":
		model = MLPClassifier(num_classes=num_classes).to(device)
	# elif var == "grises":
	# elif var == "pocas_imagenes":
	elif var == "init_he":
		model = MLPInit(init_type='he', num_classes=num_classes).to(device)
	elif var == "init_xavier":
		model = MLPInit(init_type='xavier', num_classes=num_classes).to(device)
	elif var == "dropout":
		model = MLPDropout(num_classes=num_classes).to(device)
	elif var == "batchnorm":
		model = MLPBatchNorm(num_classes=num_classes).to(device)
	elif var == "batchnorm_dropout":
		model = MLPBatchNormDropout(num_classes=num_classes).to(device)
	else:
		raise ValueError(f"Variante '{var}' no reconocida.")

	if var == "weight_decay":
		optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
	else:
		optimizer = optim.Adam(model.parameters(), lr=1e-3)
	if var == "augmentation":
		train_loader = train_loader_adv

	writer = SummaryWriter(log_dir=f"runs/experimento_{var}")
	
	for epoch in range(n_epochs):
		# Train...
		# writer.add_scalar(...)
		if var == "histogramas":
			# Histogramas de pesos
			for name, param in model.named_parameters():
				if 'weight' in name:
					writer.add_histogram(name, param, epoch)
	
	torch.save(model.state_dict(), f"mlp_{var}.pth")
	mlflow.log_param("variante", var)
	mlflow.log_artifact(f"mlp_{var}.pth")
	
	writer.close()
	print(f"Variante {var} entrenada y guardada.\n")