In [16]:
# ==============================================================================
# Import modules
# ==============================================================================

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ==============================================================================
# Load data and select features Split into training and test data
# ==============================================================================
data = datasets.load_breast_cancer()
X = data.data
y = data.target
print(data.feature_names)

# Split data into training and test data (70% training, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

# ==============================================================================
# Scale features using StandardScaler class in scikit-learn
# ==============================================================================

# Initialise standard scaler and compute mean and STD from training data
sc = StandardScaler()
sc.fit(X_train)

# Transform (standardise) both X_train and X_test with mean and stddev from
# training data
X_train_sc = sc.transform(X_train)
X_test_sc = sc.transform(X_test)

print("Mean of X_train_sc:", np.mean(X_train_sc, axis=0))
print("Stddev of X_train_sc:", np.std(X_train_sc, axis=0))

C = 10
clf = LogisticRegression(max_iter=100, solver="liblinear", multi_class="auto", random_state=1, penalty="l2", C=C)
clf.fit(X_train_sc, y_train)

y_pred = clf.predict(X_test_sc)
print("Misclassified samples: {0}".format((y_test != y_pred).sum()))

train_score = clf.score(X_train_sc, y_train)
test_score = clf.score(X_test_sc, y_test)
print("Accuracy on the train set:", train_score)
print("Accuracy on the test set:", test_score)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Mean of X_train_sc: [-2.34430008e-15  4.54773014e-15  1.92754802e-15  5.46742997e-16
  3.99457128e-15  3.28045798e-16  6.70875973e-17  3.70446276e-16
 -4.32610396e-15 -4.97424547e-15 -5.57901017e-17  2.39897437e-16
  1.84107336e-17  2.78950509e-17 -4.06151941e-16  6.18014852e-16
  1.76017771e-16  1.39028934e-15 -3.06789769e-15  6.66343028e-16
  2.23997258e-15  3.76025286e-16  3.08519263e-15 -3.94436019e-16
  5.99353063e-15 -2.78671558e-16  3