**Logistic Regression Example**

In [1]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt

# Sample data (X: features, y: binary labels)
X = np.array([[35, 50000], [45, 60000], [25, 30000], [30, 35000]])
y = np.array([1, 1, 0, 0])  # 1: Buys product, 0: Does not buy product

# Create and train the logistic regression model
model = LogisticRegression()
model.fit(X, y)

# Make predictions (probabilities and class labels)
y_pred_prob = model.predict_proba(X)[:, 1]  # Probabilities for class 1
y_pred = model.predict(X)  # Predicted class labels

# Print predictions
print("Predicted Probabilities: ", y_pred_prob)
print("Predicted Class Labels: ", y_pred)


Predicted Probabilities:  [9.99999865e-01 1.00000000e+00 3.60599414e-12 1.36628901e-07]
Predicted Class Labels:  [1 1 0 0]


**Confusion Matrix Example**

In [2]:
from sklearn.metrics import confusion_matrix

# True labels (y_true) and predicted labels (y_pred)
y_true = np.array([1, 0, 1, 1, 0, 0, 1])
y_pred = np.array([1, 0, 1, 0, 0, 1, 1])

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print("Confusion Matrix: \n", conf_matrix)

# Matrix Breakdown
# [ [True Negative, False Positive]
#   [False Negative, True Positive] ]


Confusion Matrix: 
 [[2 1]
 [1 3]]


**Performance Metrics (Accuracy, Recall, Precision, Specificity)**

In [3]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Calculate precision (for positive class 1)
precision = precision_score(y_true, y_pred)
print(f"Precision: {precision:.2f}")

# Calculate recall (True Positive Rate / Sensitivity)
recall = recall_score(y_true, y_pred)
print(f"Recall: {recall:.2f}")

# Calculate specificity (True Negative Rate)
# Specificity can be derived manually from confusion matrix
tn, fp, fn, tp = conf_matrix.ravel()
specificity = tn / (tn + fp)
print(f"Specificity: {specificity:.2f}")

# f1_score can handle unbalanced data set and should be used instead of accuracy_score

Accuracy: 0.71
Precision: 0.75
Recall: 0.75
Specificity: 0.67


**Threshold and Scores Example**

In [4]:
# Adjust classification threshold for logistic regression

threshold = 0.7  # Set threshold
y_pred_threshold = (y_pred_prob >= threshold).astype(int)  # Assign class based on threshold

print(f"Predictions with Threshold {threshold}: {y_pred_threshold}")


Predictions with Threshold 0.7: [1 1 0 0]
