# Logistic Regression

In [1]:
from warnings import filterwarnings
filterwarnings("ignore")

In [1]:
import pandas as pd
df = pd.read_csv("iris.csv")
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
df.isna().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [2]:
df.duplicated().sum()

1

### Step 3 - Seperate X and y

In [5]:
x = df.drop(columns=["species"])
y = df[["species"]]

In [6]:
x.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [7]:
y.head()

Unnamed: 0,species
0,setosa
1,setosa
2,setosa
3,setosa
4,setosa


In [15]:
x.dtypes

sepal_length    float64
sepal_width     float64
petal_length    float64
petal_width     float64
dtype: object

### Step -4 - create a preprocessing pipeline on x

In [10]:
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [11]:
num_pipe = make_pipeline(
    SimpleImputer(strategy="mean"),
    StandardScaler()
).set_output(transform="pandas")

In [14]:
x_pre = num_pipe.fit_transform(x)
x_pre.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.900681,1.019004,-1.340227,-1.315444
1,-1.143017,-0.131979,-1.340227,-1.315444
2,-1.385353,0.328414,-1.397064,-1.315444
3,-1.506521,0.098217,-1.283389,-1.315444
4,-1.021849,1.249201,-1.340227,-1.315444


In [13]:
num_pipe

### Step -5 Train Test Split

In [33]:
from sklearn.model_selection import train_test_split
xtrain,xtest, ytrain, ytest = train_test_split(x_pre, y , test_size=0.33,random_state=22)

In [34]:
xtrain.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
85,0.18983,0.788808,0.421734,0.527406
145,1.038005,-0.131979,0.819596,1.448832
15,-0.173674,3.090775,-1.283389,-1.05218
20,-0.537178,0.788808,-1.169714,-1.315444
40,-1.021849,1.019004,-1.397064,-1.183812


In [35]:
ytrain.head()

Unnamed: 0,species
85,versicolor
145,virginica
15,setosa
20,setosa
40,setosa


In [36]:
xtest.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
10,-0.537178,1.479398,-1.283389,-1.315444
115,0.674501,0.328414,0.876433,1.448832
54,0.795669,-0.592373,0.478571,0.395774
146,0.553333,-1.282963,0.705921,0.922303
63,0.310998,-0.362176,0.535409,0.264142


In [37]:
ytest.head()

Unnamed: 0,species
10,setosa
115,virginica
54,versicolor
146,virginica
63,versicolor


### Step 6 - model building

In [42]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(xtrain,ytrain)

  y = column_or_1d(y, warn=True)


### Step -7 Model Evaluation

In [43]:
model.score(xtrain,ytrain)

0.95

In [30]:
model.score(xtrain,xtest)

0.95

In [45]:
ypred = model.predict(xtest)
ypred

array(['setosa', 'virginica', 'versicolor', 'virginica', 'versicolor',
       'versicolor', 'versicolor', 'virginica', 'versicolor', 'setosa',
       'virginica', 'versicolor', 'virginica', 'virginica', 'setosa',
       'virginica', 'versicolor', 'versicolor', 'versicolor',
       'versicolor', 'setosa', 'virginica', 'setosa', 'versicolor',
       'virginica', 'setosa', 'virginica', 'virginica', 'virginica',
       'virginica', 'setosa', 'setosa', 'versicolor', 'versicolor',
       'versicolor', 'setosa', 'setosa', 'setosa', 'virginica',
       'virginica', 'versicolor', 'versicolor', 'setosa', 'setosa',
       'virginica', 'versicolor', 'virginica', 'virginica', 'setosa',
       'versicolor'], dtype=object)

In [None]:
from sklearn.metrics import classification_report
print(classification_report())


### Step - 8 Out of Sample prediction

In [46]:
xnew = pd.read_csv("iris_sample.csv")
xnew

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.5,2.5,4.0,1.3
1,6.9,3.1,5.1,2.3
2,5.1,2.5,3.0,1.1
3,4.4,2.9,,0.2
4,5.9,3.0,5.1,1.8
5,5.5,3.5,1.3,0.2
6,5.6,,3.6,1.3
7,6.5,3.0,5.2,2.0
8,5.3,3.7,1.5,0.2
9,5.1,3.4,1.5,


In [47]:
xnew_pre = num_pipe.transform(xnew)
xnew_pre.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,-0.41601,-1.282963,0.137547,0.13251
1,1.28034,0.098217,0.762758,1.448832
2,-0.900681,-1.282963,-0.430828,-0.130755
3,-1.748856,-0.362176,0.0,-1.315444
4,0.068662,-0.131979,0.762758,0.790671


In [52]:
ypred = model.predict(xnew_pre)
ypred

array(['versicolor', 'virginica', 'versicolor', 'setosa', 'virginica',
       'setosa', 'versicolor', 'virginica', 'setosa', 'setosa',
       'versicolor', 'versicolor', 'virginica', 'versicolor',
       'versicolor'], dtype=object)

In [57]:
yprob = model.predict_proba(xnew_pre)

In [58]:
model.classes_

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [59]:
xnew["species_pred"] = ypred
xnew[model.classes_] = yprob

In [60]:
xnew

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species_pred,setosa,versicolor,virginica
0,5.5,2.5,4.0,1.3,versicolor,0.026505,0.891421,0.082074
1,6.9,3.1,5.1,2.3,virginica,0.000158,0.077326,0.922516
2,5.1,2.5,3.0,1.1,versicolor,0.144834,0.844234,0.010932
3,4.4,2.9,,0.2,setosa,0.807801,0.191924,0.000275
4,5.9,3.0,5.1,1.8,virginica,0.005333,0.315733,0.678934
5,5.5,3.5,1.3,0.2,setosa,0.967305,0.032693,2e-06
6,5.6,,3.6,1.3,versicolor,0.171142,0.78609,0.042769
7,6.5,3.0,5.2,2.0,virginica,0.000711,0.172721,0.826568
8,5.3,3.7,1.5,0.2,setosa,0.986001,0.013998,1e-06
9,5.1,3.4,1.5,,setosa,0.905806,0.093942,0.000252
