In [None]:
# ==============================================================================
# Import modules
# ==============================================================================

import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ==============================================================================
# Load data and select features Split into training and test data
# ==============================================================================
data = datasets.load_iris()
X = data.data
y = data.target
print(data.feature_names)

# Split data into training and test data (70% training, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1, stratify=y)

# ==============================================================================
# Scale features using StandardScaler class in scikit-learn
# ==============================================================================

# Initialise standard scaler and compute mean and STD from training data
sc = StandardScaler()
sc.fit(X_train)

# Transform (standardise) both X_train and X_test with mean and stddev from
# training data
X_train_sc = sc.transform(X_train)
X_test_sc = sc.transform(X_test)

print("Mean of X_train_sc:", np.mean(X_train_sc, axis=0))
print("Stddev of X_train_sc:", np.std(X_train_sc, axis=0))

C = 100
clf = LogisticRegression(max_iter=100, solver="liblinear", multi_class="auto", random_state=1, penalty="l2", C=C)
clf.fit(X_train_sc, y_train)

y_pred = clf.predict(X_test_sc)
print("Misclassified samples: {0}".format((y_test != y_pred).sum()))

train_score = clf.score(X_train_sc, y_train)
test_score = clf.score(X_test_sc, y_test)
print("Accuracy on the train set:", train_score)
print("Accuracy on the test set:", test_score)