# Logistic Regression Exercise
> Determine if an iris with measurement: 4.8,2.5,5.3,2.4 is Iris Virginica

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model

In [2]:
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename',
 'data_module']

In [4]:
print(iris.DESCR)


.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

### Extract the input X and output Y from the data set

In [20]:
X = iris["data"][:, 0:4]  # sepal length, sepal width, pedal length, petal width
y = (iris["target"] == 2).astype(np.int32)  # 1 if Iris-Virginica, else 0
print('first 5 rows of data', X[0:5,:])

data [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]


### Create a logistic regression model and train it with data extracted from the data set

In [16]:
from sklearn.linear_model import LogisticRegression
log_reg = LogisticRegression(solver="liblinear", random_state=42)
log_reg.fit(X, y)

LogisticRegression(random_state=42, solver='liblinear')

### Coefficients and intercept 
The intercept w_0 and coefficients/weights of the features defined the output
y(w, x) = w_0 + (w_1 * x_1) + (w_2 * x_2) + (w_3 * x_3) + (w_4 * x_4)

In [17]:
coef = log_reg.coef_
intercept = log_reg.intercept_
print('coef= ', coef)
print('intercept= ', intercept)

coef=  [[-1.70751526 -1.53427768  2.47096755  2.55537041]]
intercept=  [-1.21470917]


### Prediction

The model predicts the iris with measurement 4.8,2.5,5.3,2.4 is an Iris Virginica

In [18]:
log_reg.predict([[4.8,2.5,5.3,2.4]])

array([1], dtype=int32)

with the probability of ~99,7%

In [19]:
log_reg.predict_proba([[4.8,2.5,5.3,2.4]])

array([[0.00251606, 0.99748394]])