In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

In [2]:
# 1. Cargar los datos procesados
df_model = pd.read_csv('data/processed_data.csv')

In [3]:
# Separar características (X) y variable objetivo (y)
X = df_model.drop('condition', axis=1)
y = df_model['condition']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Apply one-hot encoding to categorical features
categorical_features = ['listing_type_id', 'buying_mode']
preprocessor = ColumnTransformer(
	transformers=[
		('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
	],
	remainder='passthrough'
)

X_train_encoded = preprocessor.fit_transform(X_train)
X_test_encoded = preprocessor.transform(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded)
X_test_scaled = scaler.transform(X_test_encoded)

In [5]:
# Train the Logistic Regression model
print("=== Regresión Logística ===")
model_lr = LogisticRegression()
model_lr.fit(X_train_scaled, y_train)
y_pred_lr = model_lr.predict(X_test_scaled)

# Print the classification report and accuracy
print(classification_report(y_test, y_pred_lr))
print("Accuracy:", accuracy_score(y_test, y_pred_lr))

=== Regresión Logística ===
              precision    recall  f1-score   support

           0       0.91      0.47      0.62      9283
           1       0.67      0.96      0.79     10717

    accuracy                           0.73     20000
   macro avg       0.79      0.71      0.70     20000
weighted avg       0.78      0.73      0.71     20000

Accuracy: 0.7303


In [6]:
# Árbol de Decisión
print("\n=== Árbol de Decisión ===")
model_dt = DecisionTreeClassifier(random_state=42)
model_dt.fit(X_train_scaled, y_train)
y_pred_dt = model_dt.predict(X_test_scaled)
print(classification_report(y_test, y_pred_dt))
print("Accuracy:", accuracy_score(y_test, y_pred_dt))


=== Árbol de Decisión ===
              precision    recall  f1-score   support

           0       0.77      0.88      0.82      9283
           1       0.88      0.77      0.82     10717

    accuracy                           0.82     20000
   macro avg       0.82      0.82      0.82     20000
weighted avg       0.83      0.82      0.82     20000

Accuracy: 0.8181


In [7]:
# Random Forest
print("\n=== Random Forest ===")
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train_scaled, y_train)
y_pred_rf = model_rf.predict(X_test_scaled)
print(classification_report(y_test, y_pred_rf))
print("Accuracy:", accuracy_score(y_test, y_pred_rf))



=== Random Forest ===
              precision    recall  f1-score   support

           0       0.78      0.87      0.82      9283
           1       0.87      0.79      0.83     10717

    accuracy                           0.82     20000
   macro avg       0.82      0.83      0.82     20000
weighted avg       0.83      0.82      0.82     20000

Accuracy: 0.8223


In [8]:
# K-Nearest Neighbors (KNN)
print("\n=== K-Nearest Neighbors (KNN) ===")
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train_scaled, y_train)
y_pred_knn = model_knn.predict(X_test_scaled)
print(classification_report(y_test, y_pred_knn))
print("Accuracy:", accuracy_score(y_test, y_pred_knn))


=== K-Nearest Neighbors (KNN) ===


found 0 physical cores < 1
  File "C:\Users\valen\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


              precision    recall  f1-score   support

           0       0.79      0.76      0.78      9283
           1       0.80      0.83      0.81     10717

    accuracy                           0.80     20000
   macro avg       0.80      0.79      0.80     20000
weighted avg       0.80      0.80      0.80     20000

Accuracy: 0.79665
