In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import neighbors
from sklearn.tree import DecisionTreeClassifier 
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler

In [5]:
data = load_breast_cancer()
X, y = data.data, data.target

In [53]:
def KNN5_train():
    
    # Split dataset train, test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    # Scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  
    X_test = scaler.transform(X_test)        
    
    # Initialize
    model = neighbors.KNeighborsClassifier(n_neighbors = 5, weights = 'uniform')
    
    # Training
    model.fit(X_train, y_train)
    
    # Predict
    y_pred = model.predict(X_test)
    
    # Results
    
    print("KNN 5")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
 
def KNN2_train():
    
    # Split dataset train, test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    # Scaler
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  
    X_test = scaler.transform(X_test)       
    
    # Initialize 
    model = neighbors.KNeighborsClassifier(n_neighbors = 30, weights = 'uniform')
    
    # Training
    model.fit(X_train, y_train)

    # Predict 
    y_pred = model.predict(X_test)
    
    # Results
    print("KNN 2")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

In [56]:
KNN5_train()
KNN2_train()

KNN 5
Accuracy: 0.9473684210526315
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
 [[40  3]
 [ 3 68]]
KNN 2
Accuracy: 0.956140350877193
Classification Report:
               precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

Confusion Matrix:
 [[39  4]
 [ 1 70]]


In [63]:
def decision_train():
    
    # Split dataset train, test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
    
    # Classification
    model = DecisionTreeClassifier() 
    
    # Training
    model.fit(X_train, y_train) 
    
    # Predict
    y_pred = model.predict(X_test) 

    # Results
    print("Decision Default")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

def decision_maxd(max_depth = None):
    # Split dataset train, test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
    
    # Classification
    model = DecisionTreeClassifier(max_depth = max_depth, random_state = 42) 
   
    # Training
    model.fit(X_train, y_train) 
    
    # Prfedict
    y_pred = model.predict(X_test) 
    
    # Results
    print("Decision MaxD")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

In [69]:
decision_train()
decision_maxd(2)
decision_maxd(30)

Decision Default
Accuracy: 0.9473684210526315
Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.93      0.93        43
           1       0.96      0.96      0.96        71

    accuracy                           0.95       114
   macro avg       0.94      0.94      0.94       114
weighted avg       0.95      0.95      0.95       114

Confusion Matrix:
 [[40  3]
 [ 3 68]]
Decision MaxD
Accuracy: 0.9298245614035088
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.86      0.90        43
           1       0.92      0.97      0.95        71

    accuracy                           0.93       114
   macro avg       0.93      0.92      0.92       114
weighted avg       0.93      0.93      0.93       114

Confusion Matrix:
 [[37  6]
 [ 2 69]]
Decision MaxD
Accuracy: 0.9473684210526315
Classification Report:
               precision    recall  f1-score   support

           

In [75]:
def RandomForest_train(n_estimators = 100):
    
    # Split dataset train, test
    X, y = make_classification(n_samples=1000, n_features=4, random_state = 42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
    
    # Classification
    model = RandomForestClassifier(n_estimators = n_estimators, random_state = 42)
    
    # Training
    model.fit(X_train, y_train) 
    
    # Predict
    y_pred = model.predict(X_test) 

    # Results
    print("Default Forest")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

def RandomForestmaxd_train(n_estimators = 100, min_samples_split = 50):
    
    # Split dataset train, test
    X, y = make_classification(n_samples=1000, n_features=4, random_state = 42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
    
    # For classification
    model = RandomForestClassifier(n_estimators = n_estimators, min_samples_split = min_samples_split, random_state = 42)
    
    # Train the model
    model.fit(X_train, y_train) 
    
    # Evaluate accuracy
    y_pred = model.predict(X_test) # Classification

    # Classification metrics
    print("Forest Split")
    print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("Classification Report:\n", metrics.classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))

In [76]:
RandomForest_train()
RandomForestmaxd_train(min_samples_split=5)
RandomForestmaxd_train(min_samples_split=32)


Default Forest
Accuracy: 0.945
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.95      0.95       101
           1       0.95      0.94      0.94        99

    accuracy                           0.94       200
   macro avg       0.95      0.94      0.94       200
weighted avg       0.95      0.94      0.94       200

Confusion Matrix:
 [[96  5]
 [ 6 93]]
Forest Split
Accuracy: 0.94
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.94      0.94       101
           1       0.94      0.94      0.94        99

    accuracy                           0.94       200
   macro avg       0.94      0.94      0.94       200
weighted avg       0.94      0.94      0.94       200

Confusion Matrix:
 [[95  6]
 [ 6 93]]
Forest Split
Accuracy: 0.935
Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.96      0.94       101
 