# 📊 Model Comparison with RandomizedSearchCV

In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

# Cargar datos
df = pd.read_csv('../data/dataset.csv')
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

# Definir modelos y parámetros
models = {
    'LogisticRegression': (LogisticRegression(max_iter=1000), {
        'C': np.logspace(-4, 4, 20),
        'penalty': ['l1', 'l2'],
        'solver': ['liblinear']
    }),
    'RandomForest': (RandomForestClassifier(), {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10]
    }),
    'XGBoost': (XGBClassifier(eval_metric='logloss'), {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 6, 10]
    })
}


In [None]:

best_models = {}
for name, (model, params) in models.items():
    print(f"Training {name}...")
    search = RandomizedSearchCV(model, params, cv=5, n_iter=10, scoring='accuracy', random_state=42)
    search.fit(X_train, y_train)
    best_models[name] = search.best_estimator_
    print(f"Best params for {name}: {search.best_params_}")


In [None]:

# Evaluar los modelos
for name, model in best_models.items():
    y_pred = model.predict(X_test)
    print(f"\n{name} Classification Report:")
    print(classification_report(y_test, y_pred))


In [None]:

# Confusion Matrix para el mejor modelo (por ejemplo XGBoost)
best = best_models['XGBoost']
y_pred_best = best.predict(X_test)
cm = confusion_matrix(y_test, y_pred_best)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix - XGBoost")
plt.savefig('../outputs/confusion_matrix.png')
plt.show()


In [None]:

# Guardar modelo
with open('../outputs/best_model.pkl', 'wb') as f:
    pickle.dump(best, f)
