In [9]:
## Import data and data partitioning
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

## Load breast cancer data set
X,y = load_breast_cancer(return_X_y = True)

## 80% train, 20% test data partition
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
## Import all relevant metrics
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)

In [None]:
## K-Nearest
from sklearn.preprocessing import StandardScaler
from sklearn import neighbors

## Initialize list for recording model metrics
results = []

## Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Loop for training and testing KNN model on n_neighbors hyperparameter
for k in [3, 5, 7, 9]:
    model = neighbors.KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    results.append({
        "Model": "KNN",
        "Hyperparameter": f"n_neighbors={k}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })

    # Print confusion matrix
    print(f"\nConfusion Matrix for KNN (k={k}):")
    print(confusion_matrix(y_test, y_pred))



Confusion Matrix for KNN (k=3):
[[40  3]
 [ 3 68]]

Confusion Matrix for KNN (k=5):
[[40  3]
 [ 3 68]]

Confusion Matrix for KNN (k=7):
[[40  3]
 [ 3 68]]

Confusion Matrix for KNN (k=9):
[[41  2]
 [ 2 69]]


In [20]:
## Decision Tree
from sklearn import tree

## Loop for training and test of max_depth hyperparameter to append to results
for depth in [1, 2, 3, 4, 5, 10, None]:
    model = tree.DecisionTreeClassifier(max_depth=depth)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results.append({
        "Model": "Decision Tree",
        "Hyperparameter": f"max_depth={depth}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })

    # Print confusion matrix
    print(f"\nConfusion Matrix for Decision Tree (max_depth={depth}):")
    print(confusion_matrix(y_test, y_pred))





Confusion Matrix for Decision Tree (max_depth=1):
[[39  4]
 [ 8 63]]

Confusion Matrix for Decision Tree (max_depth=2):
[[37  6]
 [ 2 69]]

Confusion Matrix for Decision Tree (max_depth=3):
[[39  4]
 [ 2 69]]

Confusion Matrix for Decision Tree (max_depth=4):
[[40  3]
 [ 3 68]]

Confusion Matrix for Decision Tree (max_depth=5):
[[40  3]
 [ 4 67]]

Confusion Matrix for Decision Tree (max_depth=10):
[[40  3]
 [ 3 68]]

Confusion Matrix for Decision Tree (max_depth=None):
[[40  3]
 [ 4 67]]


In [21]:
## Random Forest 
from sklearn.ensemble import RandomForestClassifier

## Loop for training and test of max_depth hyperparameter to append to results
for depth in [2, 4, 6, 8, 10, None]:
    clf = RandomForestClassifier(n_estimators=100, max_depth=depth, random_state=42) ## 100 trees
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    results.append({
        "Model": "Random Forest",
        "Hyperparameter": f"max_depth={depth}",
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='binary'),
        "Recall": recall_score(y_test, y_pred, average='binary'),
        "F1 Score": f1_score(y_test, y_pred, average='binary')
    })

    # Print confusion matrix
    print(f"\nConfusion Matrix for Random Forest (max_depth={depth}):")
    print(confusion_matrix(y_test, y_pred))





Confusion Matrix for Random Forest (max_depth=2):
[[40  3]
 [ 1 70]]

Confusion Matrix for Random Forest (max_depth=4):
[[40  3]
 [ 1 70]]

Confusion Matrix for Random Forest (max_depth=6):
[[40  3]
 [ 1 70]]

Confusion Matrix for Random Forest (max_depth=8):
[[40  3]
 [ 1 70]]

Confusion Matrix for Random Forest (max_depth=10):
[[40  3]
 [ 1 70]]

Confusion Matrix for Random Forest (max_depth=None):
[[40  3]
 [ 1 70]]


In [14]:
import pandas as pd

## Show top model + hyperparameter configuration for highest accuracy
df_results = pd.DataFrame(results)
print(df_results.sort_values(by='Accuracy',ascending=False)[['Model','Hyperparameter', 'Accuracy']]) ## Change to check other metrics by changing 'by' parameter

            Model  Hyperparameter  Accuracy
3             KNN   n_neighbors=9  0.964912
13  Random Forest     max_depth=6  0.964912
14  Random Forest     max_depth=8  0.964912
16  Random Forest  max_depth=None  0.964912
15  Random Forest    max_depth=10  0.964912
11  Random Forest     max_depth=2  0.964912
12  Random Forest     max_depth=4  0.964912
0             KNN   n_neighbors=3  0.947368
1             KNN   n_neighbors=5  0.947368
7   Decision Tree     max_depth=4  0.947368
2             KNN   n_neighbors=7  0.947368
9   Decision Tree    max_depth=10  0.947368
10  Decision Tree  max_depth=None  0.947368
6   Decision Tree     max_depth=3  0.938596
8   Decision Tree     max_depth=5  0.938596
5   Decision Tree     max_depth=2  0.929825
4   Decision Tree     max_depth=1  0.894737


In [15]:
print(df_results.sort_values(by='Accuracy',ascending=False)[['Model','Hyperparameter', 'Precision']]) ## Change to check other metrics by changing 'by' parameter

            Model  Hyperparameter  Precision
3             KNN   n_neighbors=9   0.971831
13  Random Forest     max_depth=6   0.958904
14  Random Forest     max_depth=8   0.958904
16  Random Forest  max_depth=None   0.958904
15  Random Forest    max_depth=10   0.958904
11  Random Forest     max_depth=2   0.958904
12  Random Forest     max_depth=4   0.958904
0             KNN   n_neighbors=3   0.957746
1             KNN   n_neighbors=5   0.957746
7   Decision Tree     max_depth=4   0.957746
2             KNN   n_neighbors=7   0.957746
9   Decision Tree    max_depth=10   0.957746
10  Decision Tree  max_depth=None   0.957746
6   Decision Tree     max_depth=3   0.944444
8   Decision Tree     max_depth=5   0.944444
5   Decision Tree     max_depth=2   0.920000
4   Decision Tree     max_depth=1   0.940299


In [16]:
print(df_results.sort_values(by='Accuracy',ascending=False)[['Model','Hyperparameter', 'Recall']]) ## Change to check other metrics by changing 'by' parameter

            Model  Hyperparameter    Recall
3             KNN   n_neighbors=9  0.971831
13  Random Forest     max_depth=6  0.985915
14  Random Forest     max_depth=8  0.985915
16  Random Forest  max_depth=None  0.985915
15  Random Forest    max_depth=10  0.985915
11  Random Forest     max_depth=2  0.985915
12  Random Forest     max_depth=4  0.985915
0             KNN   n_neighbors=3  0.957746
1             KNN   n_neighbors=5  0.957746
7   Decision Tree     max_depth=4  0.957746
2             KNN   n_neighbors=7  0.957746
9   Decision Tree    max_depth=10  0.957746
10  Decision Tree  max_depth=None  0.957746
6   Decision Tree     max_depth=3  0.957746
8   Decision Tree     max_depth=5  0.957746
5   Decision Tree     max_depth=2  0.971831
4   Decision Tree     max_depth=1  0.887324


In [17]:
print(df_results.sort_values(by='Accuracy',ascending=False)[['Model','Hyperparameter', 'F1 Score']]) ## Change to check other metrics by changing 'by' parameter

            Model  Hyperparameter  F1 Score
3             KNN   n_neighbors=9  0.971831
13  Random Forest     max_depth=6  0.972222
14  Random Forest     max_depth=8  0.972222
16  Random Forest  max_depth=None  0.972222
15  Random Forest    max_depth=10  0.972222
11  Random Forest     max_depth=2  0.972222
12  Random Forest     max_depth=4  0.972222
0             KNN   n_neighbors=3  0.957746
1             KNN   n_neighbors=5  0.957746
7   Decision Tree     max_depth=4  0.957746
2             KNN   n_neighbors=7  0.957746
9   Decision Tree    max_depth=10  0.957746
10  Decision Tree  max_depth=None  0.957746
6   Decision Tree     max_depth=3  0.951049
8   Decision Tree     max_depth=5  0.951049
5   Decision Tree     max_depth=2  0.945205
4   Decision Tree     max_depth=1  0.913043


In [18]:
print(df_results.sort_values)

<bound method DataFrame.sort_values of             Model  Hyperparameter  Accuracy  Precision    Recall  F1 Score
0             KNN   n_neighbors=3  0.947368   0.957746  0.957746  0.957746
1             KNN   n_neighbors=5  0.947368   0.957746  0.957746  0.957746
2             KNN   n_neighbors=7  0.947368   0.957746  0.957746  0.957746
3             KNN   n_neighbors=9  0.964912   0.971831  0.971831  0.971831
4   Decision Tree     max_depth=1  0.894737   0.940299  0.887324  0.913043
5   Decision Tree     max_depth=2  0.929825   0.920000  0.971831  0.945205
6   Decision Tree     max_depth=3  0.938596   0.944444  0.957746  0.951049
7   Decision Tree     max_depth=4  0.947368   0.957746  0.957746  0.957746
8   Decision Tree     max_depth=5  0.938596   0.944444  0.957746  0.951049
9   Decision Tree    max_depth=10  0.947368   0.957746  0.957746  0.957746
10  Decision Tree  max_depth=None  0.947368   0.957746  0.957746  0.957746
11  Random Forest     max_depth=2  0.964912   0.958904  0.985