In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score

In [3]:
# Cargar los datos
train_path = 'train.csv'  
test_path = 'test.csv'  
train_data = pd.read_csv(train_path)
test_data = pd.read_csv(test_path)

In [4]:
# Preprocesamiento
train_data = train_data.drop(columns=['Unnamed: 0'])
test_data = test_data.drop(columns=['Unnamed: 0'])

X = train_data.drop(columns=['class'])
y = train_data['class']

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # E -> 0, P -> 1

X_encoded = pd.get_dummies(X, drop_first=True)
test_data_encoded = pd.get_dummies(test_data, drop_first=True)

test_data_encoded = test_data_encoded.reindex(columns=X_encoded.columns, fill_value=0)

X_train, X_val, y_train, y_val = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

In [5]:
# Modelos y evaluación
# Modelo 1: k-NN
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_val)
accuracy_knn = accuracy_score(y_val, y_pred_knn)
f1_knn = f1_score(y_val, y_pred_knn, average='weighted')

# Modelo 2: Árbol de Decisión
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_tree = decision_tree.predict(X_val)
accuracy_tree = accuracy_score(y_val, y_pred_tree)
f1_tree = f1_score(y_val, y_pred_tree, average='weighted')

# Modelo 3: SVM
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_val)
accuracy_svm = accuracy_score(y_val, y_pred_svm)
f1_svm = f1_score(y_val, y_pred_svm, average='weighted')

In [6]:
# Elegimos el modelo Árbol de Decisión por simplicidad (todos tienen rendimiento perfecto)
test_predictions = decision_tree.predict(test_data_encoded)
test_predictions_labels = label_encoder.inverse_transform(test_predictions)

In [7]:
# Crear el archivo de submission
submission = pd.DataFrame({
    'ID': test_data.index + 1,  # Ajustar ID para que empiece desde 1
    'Edible': test_predictions_labels
})

submission.to_csv('submission_labels.csv', index=False)