<a href="https://colab.research.google.com/github/vincimech010233/MachineLearningBasics/blob/master/breast_cancer_detection_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Descargar el conjunto de datos de UCI y cargarlo
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
column_names = ["id", "diagnosis"] + [f"feature_{i}" for i in range(1, 31)]
data = pd.read_csv(data_url, header=None, names=column_names)


In [None]:
# Dividir características y etiquetas
X = data.drop(["id", "diagnosis"], axis=1)
y = data["diagnosis"]

# Dividir en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Estandarizar características
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Crear modelos
logreg_model = LogisticRegression()
rf_model = RandomForestClassifier(random_state=42)
svm_model = SVC()


In [None]:
# Entrenar modelos
logreg_model.fit(X_train_scaled, y_train)
rf_model.fit(X_train_scaled, y_train)
svm_model.fit(X_train_scaled, y_train)


In [None]:
# Predecir en el conjunto de prueba
logreg_pred = logreg_model.predict(X_test_scaled)
rf_pred = rf_model.predict(X_test_scaled)
svm_pred = svm_model.predict(X_test_scaled)


In [None]:
# Evaluar modelos
print("Logistic Regression Accuracy:", accuracy_score(y_test, logreg_pred))
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))

print("\nLogistic Regression Report:\n", classification_report(y_test, logreg_pred))
print("\nRandom Forest Report:\n", classification_report(y_test, rf_pred))
print("\nSVM Report:\n", classification_report(y_test, svm_pred))


Logistic Regression Accuracy: 0.9736842105263158
Random Forest Accuracy: 0.9649122807017544
SVM Accuracy: 0.9824561403508771

Logistic Regression Report:
               precision    recall  f1-score   support

           B       0.97      0.99      0.98        71
           M       0.98      0.95      0.96        43

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


Random Forest Report:
               precision    recall  f1-score   support

           B       0.96      0.99      0.97        71
           M       0.98      0.93      0.95        43

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114


SVM Report:
               precision    recall  f1-score   support

           B       0.97      1.00      0.99        71
           M       1.00      0.95      0.98 