## Example 2: Logistic Regression with Iris Data

In [1]:
# Import the required libraries
import numpy as np
from sklearn.datasets import load_iris
import warnings
warnings.filterwarnings("ignore")


#Import the learning algorithm
from sklearn.linear_model import LogisticRegression

# Split iris data in train and test data
X, y = load_iris(return_X_y=True)


### Step 1,2: Data Preprocessing, Feature Engineering

In [None]:
# No need for Preprocessing and any feature selection

### Step 3: Train/Test Data Splitting

In [2]:
# Split iris data in train and test data
# A random permutation, to split the data randomly
iris_X_train = X[:-20]
iris_X_test  = X[-20:]
iris_y_train = y[:-20]
iris_y_test  = y[-20:]

### Step 4: Model Creation and Training

In [3]:
# Create and fit a linear regression model and predict the target values
# Model training with fit function

clf = LogisticRegression(random_state=0, solver='lbfgs',multi_class='ovr').fit(X, y)

# Result with the trained model
clf.predict(X[:2, :])
clf.predict_proba(X[:2, :])

#Evalution of the algorithm
clf.score(X, y)

0.9533333333333334

### Step 5: Model Evaluation

In [4]:
# The mean square error

# Explained variance score: 1 is perfect prediction
# and 0 means that there is no linear relationship
# between X and y.


### Advanced Analysis for Model Selection and Evaluation 

#### Cross Validation 

In [5]:
# Import CV from model selection section
from sklearn.model_selection import cross_val_score


#A new KNN model for CV
Logregr_CV = LogisticRegression()


#train model with cv of 10 
cv_scores = cross_val_score(Logregr_CV, X, y, cv=10)


#print each cv score (accuracy) and average them
print(cv_scores)
print('cv_scores mean:{}'.format(np.mean(cv_scores)))


# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

[1.         1.         1.         0.93333333 0.93333333 0.93333333
 0.8        0.93333333 1.         1.        ]
cv_scores mean:0.9533333333333334


#### Tuning model Parameters using GridSearch

In [6]:
# Finding the hyperparameter value of the learning algorithm giving the best model

from sklearn.model_selection import GridSearchCV


# CReate a new KNN for GS

Iris_LogR = LogisticRegression()

# Create a model parameter list - Parameter list you can supply for your ML algorithm
# Create regularization penalty space
#penalty = ['l1', 'l2']

# Create regularization hyperparameter space
C = np.logspace(0, 4, 10)

# Optimization algorithms for Linear Regression
solver = ['lbfgs', 'liblinear', 'sag', 'saga']
# Create hyperparameter options
hyperparameters = dict(C=C, solver= solver)


# GridSearchCV will give you best parameters' list with cross validation 
LogR_GS = GridSearchCV(Iris_LogR, hyperparameters, cv=10)
LogR_GS.fit(iris_X_train,iris_y_train)


## Find the best parameters for KNN
print(LogR_GS.best_params_)

## Find the best score with the best parameters
LogR_GS.best_score_# Create regularization penalty space

#print('Best Penalty:', LogR_GS.best_estimator_.get_params()['penalty'])
print('Best C:', LogR_GS.best_estimator_.get_params()['C'])
print('Best Solver:', LogR_GS.best_estimator_.get_params()['solver'])


# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)



{'C': 21.544346900318832, 'solver': 'liblinear'}
Best C: 21.544346900318832
Best Solver: liblinear
