In [18]:
## Import data and data partitioning
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

## Load breast cancer data set
X,y = load_breast_cancer(return_X_y = True)

## 80% train, 20% test data partition
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
## Import all relevant metrics
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)

In [20]:
## K-Nearest
from sklearn.preprocessing import StandardScaler
from sklearn import neighbors

## Initialize list for recording model metrics
results = []

## Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Loop for training and testing KNN model on n_neighbors hyperparameter
for k in [3, 5, 7, 9]:
    model = neighbors.KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    results.append({
        "Model": "KNN",
        "Hyperparameter": f"n_neighbors={k}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })

In [21]:
## Decision Tree
from sklearn import tree

## Loop for training and test of max_depth hyperparameter to append to results
for depth in [1, 2, 3, 4, 5, 10, None]:
    model = tree.DecisionTreeClassifier(max_depth=depth)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results.append({
        "Model": "Decision Tree",
        "Hyperparameter": f"max_depth={depth}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })




In [22]:
## Random Forest 
from sklearn.ensemble import RandomForestClassifier

## Loop for training and test of max_depth hyperparameter to append to results
for depth in [2, 4, 6, 8, 10, None]:
    clf = RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=42) ## 100 trees
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    results.append({
        "Model": "Random Forest",
        "Hyperparameter": f"max_depth={depth}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })



In [23]:
import pandas as pd

## Show top model + hyperparameter configuration for highest accuracy
df_results = pd.DataFrame(results)
print(df_results.sort_values(by='Accuracy',ascending=False)[['Model','Hyperparameter', 'Accuracy']]) ## Change to check other metrics by changing 'by' parameter

            Model  Hyperparameter  Accuracy
3             KNN   n_neighbors=9  0.964912
13  Random Forest     max_depth=6  0.964912
14  Random Forest     max_depth=8  0.964912
16  Random Forest  max_depth=None  0.964912
15  Random Forest    max_depth=10  0.964912
11  Random Forest     max_depth=2  0.964912
12  Random Forest     max_depth=4  0.964912
0             KNN   n_neighbors=3  0.947368
1             KNN   n_neighbors=5  0.947368
2             KNN   n_neighbors=7  0.947368
6   Decision Tree     max_depth=3  0.938596
7   Decision Tree     max_depth=4  0.938596
8   Decision Tree     max_depth=5  0.938596
10  Decision Tree  max_depth=None  0.938596
5   Decision Tree     max_depth=2  0.929825
9   Decision Tree    max_depth=10  0.929825
4   Decision Tree     max_depth=1  0.894737
