In [4]:
# Cell 1: Cross Validation with Scaling 💡
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import numpy as np

# Load dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Pipeline: Scaling + Model
pipeline = make_pipeline(StandardScaler(), LogisticRegression(max_iter=2000))
scores = cross_val_score(pipeline, X, y, cv=5)

print("Cross-validation scores:", scores)
print("Average CV score:", np.mean(scores))


Cross-validation scores: [0.98245614 0.98245614 0.97368421 0.97368421 0.99115044]
Average CV score: 0.9806862288464524


In [6]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer

# Load data
data = load_breast_cancer()
X, y = data.data, data.target

# Pipeline
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(max_iter=3000))
])

# GridSearch parameters
param_grid = {
    'logreg__C': [0.01, 0.1, 1, 10, 100],
    'logreg__solver': ['lbfgs', 'liblinear']
}

# GridSearchCV setup
grid = GridSearchCV(pipe, param_grid, cv=5)
grid.fit(X, y)

print("Best Parameters:", grid.best_params_)
print("Best CV Score:", grid.best_score_)


Best Parameters: {'logreg__C': 0.1, 'logreg__solver': 'liblinear'}
Best CV Score: 0.9824406148113647
