In [27]:
import os
import cv2
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

In [12]:
# 1. Leer las imágenes

ruta_carpeta = '/data_lids/home/walter/Lab/alignment/basep/processed/rotations_064'
nombres_imagenes = [f for f in os.listdir(ruta_carpeta) if f.endswith('.png')]

In [13]:
len(nombres_imagenes)

8148

In [14]:
datos = []

for nombre_imagen in nombres_imagenes:
    ruta_imagen = os.path.join(ruta_carpeta, nombre_imagen)
    imagen = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)  # Leer en escala de grises
    
    # 2. Aplicar el filtro de Sobel
    sobelx = cv2.Sobel(imagen, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(imagen, cv2.CV_64F, 0, 1, ksize=3)
    magnitud_sobel = np.sqrt(sobelx**2 + sobely**2)
    
    # Aplanar la imagen
    imagen_aplanada = magnitud_sobel.flatten()
    
    # 3. Extraer el ángulo del nombre de la imagen
    angulo = int(nombre_imagen.split('_r')[-1].split('.png')[0])
    
    # Agregar al conjunto de datos
    datos.append(np.append(imagen_aplanada, angulo))

In [15]:
# 4. Crear el DataFrame
columnas = [f'pixel_{i}' for i in range(64*64)] + ['Angulo']
df = pd.DataFrame(datos, columns=columnas)


In [16]:
df.head()

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095,Angulo
0,0.0,172.0,452.0,362.0,36.0,190.0,134.0,2.0,150.0,232.0,...,196.0,106.0,150.0,200.0,180.0,98.0,536.0,748.0,0.0,0.0
1,0.0,214.0,92.0,354.0,310.0,86.0,70.0,164.0,124.0,44.0,...,402.0,40.0,280.0,290.0,154.0,40.0,200.0,106.0,0.0,30.0
2,0.0,134.0,36.0,180.0,94.0,166.0,396.0,362.0,84.0,96.0,...,262.0,168.0,584.0,438.0,58.0,236.0,388.0,214.0,0.0,30.0
3,0.0,164.0,238.0,420.0,244.0,300.0,170.0,496.0,296.0,44.0,...,166.0,242.0,494.0,44.0,634.0,338.0,466.0,122.0,0.0,0.0
4,0.0,190.0,248.0,192.0,158.0,170.0,82.0,222.0,462.0,316.0,...,216.0,128.0,376.0,490.0,350.0,112.0,32.0,212.0,0.0,15.0


In [20]:
# 1. Dividir el DataFrame
X = df.drop(columns=['Angulo'])
y = df['Angulo']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [21]:
# 2. Entrenar los modelos

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# XGBoost
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgb_model.fit(X_train, y_train)


In [22]:
# 3. Evaluar los modelos
y_pred_rf = rf.predict(X_test)
y_pred_xgb = xgb_model.predict(X_test)

mae_rf = mean_absolute_error(y_test, y_pred_rf)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)

print(f'MAE Random Forest: {mae_rf}')
print(f'MAE XGBoost: {mae_xgb}')

MAE Random Forest: 10.268006134969324
MAE XGBoost: 11.038879342011514


In [24]:
rmse_rf = lambda y_true, y_pred_rf: np.sqrt(mean_squared_error(y_true, y_pred_rf))
rmse_xgb = lambda y_true, y_pred_xgb: np.sqrt(mean_squared_error(y_true, y_pred_xgb))

print(f'RMSE Random Forest: {rmse_rf(y_test, y_pred_rf)}')
print(f'RMSE XGBoost: {rmse_xgb(y_test, y_pred_xgb)}')

RMSE Random Forest: 12.171950746257773
RMSE XGBoost: 12.989393744639706


In [25]:
# 1. Aplicar PCA
pca = PCA(n_components=200)
X_pca = pca.fit_transform(df.drop(columns=['Angulo']))

# 2. Dividir el DataFrame
y = df['Angulo']
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# 3. Entrenar y evaluar los modelos

# Random Forest
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# XGBoost
xgb_model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
xgb_model.fit(X_train, y_train)

# Evaluar los modelos
y_pred_rf = rf.predict(X_test)
y_pred_xgb = xgb_model.predict(X_test)

mae_rf = mean_absolute_error(y_test, y_pred_rf)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)

print(f'MAE Random Forest (con PCA): {mae_rf}')
print(f'MAE XGBoost (con PCA): {mae_xgb}')

MAE Random Forest (con PCA): 10.285214723926382
MAE XGBoost (con PCA): 11.124560795395286


In [29]:
rmse_rf = lambda y_true, y_pred_rf: np.sqrt(mean_squared_error(y_true, y_pred_rf))
rmse_xgb = lambda y_true, y_pred_xgb: np.sqrt(mean_squared_error(y_true, y_pred_xgb))

print(f'RMSE Random Forest: {rmse_rf(y_test, y_pred_rf)}')
print(f'RMSE XGBoost: {rmse_xgb(y_test, y_pred_xgb)}')

RMSE Random Forest: 12.18398904123995
RMSE XGBoost: 13.043534845677199


# Improve hyperparameters

In [28]:
# Reducción de dimensionalidad con PCA
pca = PCA(n_components=200)
X_pca = pca.fit_transform(df.drop(columns=['Angulo']))

# División en conjunto de entrenamiento y prueba
y = df['Angulo']
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# Definición del espacio de búsqueda para hiperparámetros
param_dist = {
    'learning_rate': [0.01, 0.05, 0.1, 0.3, 0.5],
    'max_depth': [3, 4, 5, 6, 7, 8, 9, 10],
    'min_child_weight': [1, 2, 3, 4, 5],
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5],
    'subsample': [0.6, 0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0],
}

# Iniciar el modelo XGBoost
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

# Iniciar RandomizedSearchCV
search = RandomizedSearchCV(
    xgb_model, 
    param_distributions=param_dist, 
    n_iter=100, 
    scoring='neg_mean_absolute_error', 
    cv=3, 
    verbose=1, 
    n_jobs=-1
)

# Ajustar el modelo con RandomizedSearchCV
search.fit(X_train, y_train)

# Mostrar los mejores hiperparámetros encontrados
print(search.best_params_)



Fitting 3 folds for each of 100 candidates, totalling 300 fits
{'subsample': 0.6, 'min_child_weight': 2, 'max_depth': 3, 'learning_rate': 0.01, 'gamma': 0.1, 'colsample_bytree': 1.0}


In [30]:
# 1. Obtener los mejores hiperparámetros
best_params = search.best_params_

# 2. Entrenar XGBoost con los hiperparámetros óptimos
optimized_xgb = xgb.XGBRegressor(**best_params, objective='reg:squarederror')
optimized_xgb.fit(X_train, y_train)

# Hacer predicciones en el conjunto de prueba
y_pred = optimized_xgb.predict(X_test)

# 3. Evaluar el modelo
mae = mean_absolute_error(y_test, y_pred)
print(f'MAE del modelo XGBoost optimizado: {mae}')
print(f'RMSE XGBoost: {rmse_xgb(y_test, y_pred_xgb)}')

MAE del modelo XGBoost optimizado: 10.175528546198745
RMSE XGBoost: 13.043534845677199
