In [19]:
# Imports
from logr_utils import sigmoid, logistic_sigmoid_regression, min_max_normalize

import time
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Read the data
data = pd.read_csv('data/framingham.csv')
data = data.dropna()

data.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [20]:
# Extract data for train test split
X = data.drop(columns='TenYearCHD')  # Features
y = data['TenYearCHD']  # Target variable

# Ratio 7:3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalize the data
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
# Sci-kit Learn Logistic Regression implementation

# Initialize the model
logreg = LogisticRegression(max_iter=10000)

# Fit the model and time the training process
start_time_logreg = time.time()
logreg.fit(X_train, y_train)
end_time_logreg = time.time()

# Measure inference time for Logistic Regression
start_inference_logreg = time.time()
y_pred_logreg = logreg.predict(X_test)
end_inference_logreg = time.time()

# Calculate metrics for Logistic Regression
logreg_acc = accuracy_score(y_test, y_pred_logreg)
logreg_prec = precision_score(y_test, y_pred_logreg, average='binary', pos_label=1)
logreg_recall = recall_score(y_test, y_pred_logreg, average='binary', pos_label=1)
logreg_f1 = f1_score(y_test, y_pred_logreg, average='binary')
logreg_conf_matrix = confusion_matrix(y_test, y_pred_logreg)

print("Logistic Regression Metrics:")
print(f"Accuracy: {logreg_acc:.4f}")
print(f"Precision: {logreg_prec:.4f}")
print(f"Recall: {logreg_recall:.4f}")
print(f"F1 Score: {logreg_f1:.4f}")
print("Confusion Matrix:\n", logreg_conf_matrix)
print(f"Time taken for training: {end_time_logreg - start_time_logreg:.4f} seconds")
print(f"Time taken for inference: {end_inference_logreg - start_inference_logreg:.4f} seconds")

Logistic Regression Metrics:
Accuracy: 0.8469
Precision: 0.6071
Recall: 0.0977
F1 Score: 0.1683
Confusion Matrix:
 [[912  11]
 [157  17]]
Time taken for training: 0.3815 seconds
Time taken for inference: 0.0010 seconds
