In [1]:
import os
import cv2
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.decomposition import PCA
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV

In [2]:
# 1. Leer las imágenes

ruta_carpeta = '/data_lids/home/walter/Lab/alignment/basep/processed/rotations_064'
nombres_imagenes = [f for f in os.listdir(ruta_carpeta) if f.endswith('.png')]

In [3]:
len(nombres_imagenes)

8148

In [4]:
datos = []

for nombre_imagen in nombres_imagenes:
    ruta_imagen = os.path.join(ruta_carpeta, nombre_imagen)
    imagen = cv2.imread(ruta_imagen, cv2.IMREAD_GRAYSCALE)  # Leer en escala de grises
    
    # 2. Aplicar el filtro de Sobel
    sobelx = cv2.Sobel(imagen, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(imagen, cv2.CV_64F, 0, 1, ksize=3)
    magnitud_sobel = np.sqrt(sobelx**2 + sobely**2)
    
    # Aplanar la imagen
    imagen_aplanada = magnitud_sobel.flatten()
    
    # 3. Extraer el ángulo del nombre de la imagen
    angulo = int(nombre_imagen.split('_r')[-1].split('.png')[0])
    
    # Agregar al conjunto de datos
    datos.append(np.append(imagen_aplanada, angulo))

In [5]:
# 4. Crear el DataFrame
columnas = [f'pixel_{i}' for i in range(64*64)] + ['Angulo']
df = pd.DataFrame(datos, columns=columnas)


In [6]:
df.head()

Unnamed: 0,pixel_0,pixel_1,pixel_2,pixel_3,pixel_4,pixel_5,pixel_6,pixel_7,pixel_8,pixel_9,...,pixel_4087,pixel_4088,pixel_4089,pixel_4090,pixel_4091,pixel_4092,pixel_4093,pixel_4094,pixel_4095,Angulo
0,0.0,172.0,452.0,362.0,36.0,190.0,134.0,2.0,150.0,232.0,...,196.0,106.0,150.0,200.0,180.0,98.0,536.0,748.0,0.0,0.0
1,0.0,214.0,92.0,354.0,310.0,86.0,70.0,164.0,124.0,44.0,...,402.0,40.0,280.0,290.0,154.0,40.0,200.0,106.0,0.0,30.0
2,0.0,134.0,36.0,180.0,94.0,166.0,396.0,362.0,84.0,96.0,...,262.0,168.0,584.0,438.0,58.0,236.0,388.0,214.0,0.0,30.0
3,0.0,164.0,238.0,420.0,244.0,300.0,170.0,496.0,296.0,44.0,...,166.0,242.0,494.0,44.0,634.0,338.0,466.0,122.0,0.0,0.0
4,0.0,190.0,248.0,192.0,158.0,170.0,82.0,222.0,462.0,316.0,...,216.0,128.0,376.0,490.0,350.0,112.0,32.0,212.0,0.0,15.0


# Improve hyperparameters

In [7]:
# 1. Transformar el target
df['Angulo'] = df['Angulo'].replace({0: 0, 15: 1, 30: 2})

# Datos y etiquetas
X = df.drop(columns=['Angulo'])
y = df['Angulo']

# Aplicar PCA
pca = PCA(n_components=200)
X_pca = pca.fit_transform(X)

# 3. Dividir el conjunto de datos
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

# 4. Entrenar y evaluar RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred_rf = rf_classifier.predict(X_test)

print("Resultados de RandomForestClassifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# Entrenar y evaluar XGBClassifier
xgb_classifier = xgb.XGBClassifier(objective='multi:softprob', num_class=3, n_estimators=100)
xgb_classifier.fit(X_train, y_train)
y_pred_xgb = xgb_classifier.predict(X_test)

print("\nResultados de XGBClassifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))



Resultados de RandomForestClassifier:
Accuracy: 0.3343558282208589
              precision    recall  f1-score   support

         0.0       0.33      0.39      0.35       521
         1.0       0.32      0.28      0.30       540
         2.0       0.36      0.33      0.35       569

    accuracy                           0.33      1630
   macro avg       0.33      0.34      0.33      1630
weighted avg       0.34      0.33      0.33      1630


Resultados de XGBClassifier:
Accuracy: 0.3595092024539877
              precision    recall  f1-score   support

         0.0       0.36      0.40      0.38       521
         1.0       0.34      0.34      0.34       540
         2.0       0.38      0.34      0.36       569

    accuracy                           0.36      1630
   macro avg       0.36      0.36      0.36      1630
weighted avg       0.36      0.36      0.36      1630



In [8]:
# Espacio de búsqueda para XGBoost
param_dist_xgb = {
    'learning_rate': [0.01, 0.05, 0.1, 0.3, 0.5],
    'max_depth': list(range(3, 11)),
    'min_child_weight': [1, 2, 3, 4, 5],
    'gamma': [0, 0.1, 0.2, 0.3, 0.4, 0.5],
    'subsample': [0.6, 0.7, 0.8, 0.9, 1.0],
    'colsample_bytree': [0.6, 0.7, 0.8, 0.9, 1.0],
    'n_estimators': [10, 50, 100, 200, 300, 400, 500]
}

xgb_classifier = xgb.XGBClassifier(objective='multi:softprob', num_class=3)

search_xgb = RandomizedSearchCV(
    xgb_classifier, 
    param_distributions=param_dist_xgb, 
    n_iter=100, 
    scoring='accuracy', 
    cv=3, 
    verbose=1, 
    n_jobs=-1
)

search_xgb.fit(X_train, y_train)
best_params_xgb = search_xgb.best_params_
print("Mejores hiperparámetros para XGBoost:", best_params_xgb)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Mejores hiperparámetros para XGBoost: {'subsample': 0.6, 'n_estimators': 400, 'min_child_weight': 1, 'max_depth': 10, 'learning_rate': 0.01, 'gamma': 0.4, 'colsample_bytree': 1.0}


In [9]:
# Entrenar el modelo RandomForestClassifier con hiperparámetros óptimos
optimized_rf_classifier = RandomForestClassifier(**best_params_rf)
optimized_rf_classifier.fit(X_train, y_train)

# Hacer predicciones con RandomForestClassifier
y_pred_rf = optimized_rf_classifier.predict(X_test)

# Evaluar RandomForestClassifier
print("Resultados de RandomForestClassifier optimizado:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

NameError: name 'best_params_rf' is not defined

In [11]:
optimized_xgb_classifier = xgb.XGBClassifier(**best_params_xgb, objective='multi:softprob', num_class=3)
optimized_xgb_classifier.fit(X_train, y_train)

# Hacer predicciones con XGBClassifier
y_pred_xgb = optimized_xgb_classifier.predict(X_test)

# Evaluar XGBClassifier
print("\nResultados de XGBClassifier optimizado:")
print("Accuracy:", accuracy_score(y_test, y_pred_xgb))
print(classification_report(y_test, y_pred_xgb))



Resultados de XGBClassifier optimizado:
Accuracy: 0.36012269938650304
              precision    recall  f1-score   support

         0.0       0.36      0.46      0.41       521
         1.0       0.34      0.32      0.33       540
         2.0       0.38      0.31      0.34       569

    accuracy                           0.36      1630
   macro avg       0.36      0.36      0.36      1630
weighted avg       0.36      0.36      0.36      1630

