In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

logreg_no_scaling = LogisticRegression(max_iter=5000, solver="lbfgs")
logreg_no_scaling.fit(X_train, y_train)

y_pred_no_scaling = logreg_no_scaling.predict(X_test)
acc_no_scaling = accuracy_score(y_test, y_pred_no_scaling)
iterations_no_scaling = logreg_no_scaling.n_iter_
print("Logistic Regression without Standardization:")
print(f"Accuracy: {acc_no_scaling:.4f}")
print(f"Iterations until convergence: {iterations_no_scaling}")

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

logreg_scaled = LogisticRegression(max_iter=5000, solver="lbfgs")
logreg_scaled.fit(X_train_scaled, y_train)

y_pred_scaled = logreg_scaled.predict(X_test_scaled)
acc_scaled = accuracy_score(y_test, y_pred_scaled)
iterations_scaled = logreg_scaled.n_iter_
print("\nLogistic Regression with Standardization:")
print(f"Accuracy: {acc_scaled:.4f}")
print(f"Iterations until convergence: {iterations_scaled}")

print("\nPerformance Comparison:")
print(f"Accuracy improvement: {acc_scaled - acc_no_scaling:.4f}")
print(f"Iteration difference: {int(iterations_no_scaling - iterations_scaled)}")


Logistic Regression without Standardization:
Accuracy: 0.9474
Iterations until convergence: [2021]

Logistic Regression with Standardization:
Accuracy: 0.9883
Iterations until convergence: [19]

Performance Comparison:
Accuracy improvement: 0.0409
Iteration difference: 2002


  print(f"Iteration difference: {int(iterations_no_scaling - iterations_scaled)}")
