In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, learning_curve
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from IPython.display import display

data = pd.read_csv('HCV-Egy-Data.csv')
features = data.columns[:-1]
target = data.columns[-1]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(data[features])
X_train, X_test, y_train, y_test = train_test_split(X_scaled, data[target], test_size=0.2, random_state=42)

params = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.01, 0.1, 1],
    'kernel': ['rbf']
}

svm_clf = SVC()
grid_search = GridSearchCV(svm_clf, params, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)

best_svm = grid_search.best_estimator_
best_params = grid_search.best_params_
print(f"Mejores parámetros: {best_params}")

best_svm.fit(X_train, y_train)

y_pred = best_svm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=1, output_dict=True)

print(f"\nPrecisión: {accuracy}")
print("\nInforme de Clasificación:")
display(pd.DataFrame(report).transpose())

conf_matrix = confusion_matrix(y_test, y_pred)
print("\nMatriz de Confusión:")
print(conf_matrix)

train_sizes, train_scores, test_scores = learning_curve(
    best_svm, X_train, y_train, cv=5, train_sizes=np.linspace(0.1, 1.0, 5), n_jobs=-1
)

train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

plt.figure()
plt.title("Curva de Aprendizaje - SVM")
plt.xlabel("Tamaño del conjunto de entrenamiento")
plt.ylabel("Precisión")
plt.grid()

plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.1, color="r")
plt.fill_between(train_sizes, test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std, alpha=0.1, color="g")

plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Entrenamiento")
plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Validación")

plt.legend(loc="best")
plt.show()

if train_scores_mean[-1] > test_scores_mean[-1]:
    print("\nDiagnóstico: El modelo funciona mejor en entrenamiento que en validación.")
elif test_scores_mean[-1] < 0.6:
    print("\nDiagnóstico: El modelo no generaliza bien.")
else:
    print("\nDiagnóstico: Buen desempeño general del modelo.")