## Logistic Regression 

#### Training Binary Classifier : only two classes

In [659]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# load data
iris = load_iris()
features = iris.data [:100, :]
targets = iris.target [:100]

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# create logistic reg
logistic_reg = LogisticRegression(random_state = 420). \
    fit(features_standardized, targets)

# Create new observation
new_observation = [[.5, .5, .5, .5]]

# predict values
logistic_reg.predict(new_observation)  
logistic_reg.predict_proba(new_observation)

array([[0.17738424, 0.82261576]])

#### Training Multiclass Classifier

In [657]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# load data
iris = load_iris()
features = iris.data
targets = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# create logistic reg
logistic_reg = LogisticRegression(random_state = 420, 
                                 multi_class = 'ovr'). \
    fit(features_standardized, targets)

# Create new observation
new_observation = [[.5, .5, .5, .5]]

# predict values
logistic_reg.predict(new_observation)  
logistic_reg.predict_proba(new_observation)

# with multiclass logisitc regression, we add the parameter
# multi_class = 'ovr' : one-vs-rest

array([[0.0387617 , 0.40669108, 0.55454723]])

#### Reducing Variance Through Regularization 

In [None]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# load data
iris = load_iris()
features = iris.data
targets = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# Create logistic regression with regularization parameter
logistic_reg = LogisticRegressionCV(penalty='l2', Cs = 10, 
                                   random_state = 0, n_jobs =-1)
logistic_reg.fit(features_standardized, targets)

# We add a regularition parmater: l1 or l2 with a hyperparameter
# of its own:Cs. There is no way to optimize the Cs parameters

#### Training Classifier a Very Large Data

In [662]:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# load data
iris = load_iris()
features = iris.data
targets = iris.target

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# create logistic regression
logistic_regression = LogisticRegression(random_state = 420,
                                        solver = 'sag', 
                                        multi_class = 'ovr')
logistic_regression.fit(features_standardized, targets)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=420, solver='sag', tol=0.0001, verbose=0,
                   warm_start=False)

#### Handling Imbalanced Classes

In [667]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris

# load data 
iris = load_iris()
features = iris.data
target = iris.target

# Make class highly imbalanced by removing first 40 observations
features = features[40:,:]
target = target[40:]

# create target vector for each class
target = np.where((target == 0), 0, 1)

# standardize features
features_standardized = StandardScaler().fit_transform(features)

# Create Logistic Regression
logistic_regression = LogisticRegression(random_state = 0, 
                                        class_weight = 'balanced')
logistic_regression.fit(features_standardized, target)

LogisticRegression(C=1.0, class_weight='balanced', dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=0, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)