# Small Project 2: ML Classifiers 
## 

In [25]:
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn import metrics
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

cancer = load_breast_cancer()

scaler = StandardScaler() 
x_std = scaler.fit_transform(cancer.data)


X_train, X_test, y_train, y_test = train_test_split(x_std, cancer.target, test_size=0.2, random_state=109)

# Logistic Regression Classifier
log_clf = LogisticRegression(max_iter=10000)
log_clf_fit = log_clf.fit(X_train,y_train)
y_pred = log_clf.predict(X_test)

print("Accuracy for Logistic Classifier:",metrics.accuracy_score(y_test, y_pred))


# Perform k-fold cross-validation
k = 5
cv_scores = cross_val_score(log_clf, X_train, y_train, cv=k, scoring='accuracy')

print("Cross-Validation Accuracy Scores:", cv_scores)



Accuracy for Logistic Classifier: 0.9912280701754386
Cross-Validation Accuracy Scores: [0.97802198 0.97802198 0.95604396 1.         0.93406593]


## Printing accuracy, sensitivity, and specificity

In [26]:
mean_accuracy = cv_scores.mean()
print("Mean Accuracy:", mean_accuracy)

# print("Precision:",metrics.precision_score(y_test, y_pred))

# print("Recall:",metrics.recall_score(y_test, y_pred))

conf_matrix = metrics.confusion_matrix(y_test, y_pred)
#print("Confusion Matrix:\n", conf_matrix)

# Calculate sensitivity and specificity
TP = conf_matrix[1, 1]
FP = conf_matrix[0, 1]
TN = conf_matrix[0, 0]
FN = conf_matrix[1, 0]

sensitivity = TP / (TP + FN)

specificity = TN / (TN + FP)

print("Sensitivity (True Positive Rate):", sensitivity)
print("Specificity (True Negative Rate):", specificity)


Mean Accuracy: 0.9692307692307691
Sensitivity (True Positive Rate): 1.0
Specificity (True Negative Rate): 0.975


In [27]:
# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],  
    'solver': ['liblinear', 'saga']}

# Perform GridSearch
grid_search = GridSearchCV(estimator=log_clf, param_grid=param_grid, cv=k, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)

best_clf = grid_search.best_estimator_
best_clf.fit(X_train, y_train)

y_pred = best_clf.predict(X_test)

print("Accuracy for Logistic Regression with GridSearchCV:", metrics.accuracy_score(y_test, y_pred))

Best Parameters: {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Accuracy for Logistic Regression with GridSearchCV: 0.9912280701754386
