# Multiple Logistic regression

## Load relevant libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

## Load the data

In [2]:
raw_data = pd.read_excel('buy_laptop_gender.xlsx')
raw_data.head()

Unnamed: 0,Price,Buy,Gender
0,2726,Yes,Male
1,3584,No,Female
2,3908,No,Female
3,3306,Yes,Male
4,3186,Yes,Male


In [3]:
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 3 columns):
Price     168 non-null int64
Buy       168 non-null object
Gender    168 non-null object
dtypes: int64(1), object(2)
memory usage: 4.0+ KB


## Explore descriptive statistics

In [4]:
raw_data.describe(include='all')

Unnamed: 0,Price,Buy,Gender
count,168.0,168,168
unique,,2,2
top,,No,Male
freq,,94,90
mean,3390.547619,,
std,366.038034,,
min,2668.0,,
25%,3095.0,,
50%,3383.0,,
75%,3689.0,,


## Creat dummy variables

In [5]:
data = pd.get_dummies(raw_data, drop_first=True)
data.head()

Unnamed: 0,Price,Buy_Yes,Gender_Male
0,2726,1,1
1,3584,0,0
2,3908,0,0
3,3306,1,1
4,3186,1,1


In [6]:
data.rename(columns={'Buy_Yes':'Buy'}, inplace=True)
data.rename(columns={'Gender_Male':'Gender'}, inplace=True)
data.head()

Unnamed: 0,Price,Buy,Gender
0,2726,1,1
1,3584,0,0
2,3908,0,0
3,3306,1,1
4,3186,1,1


##  Regression

### Declare the inputs and the targets

In [7]:
y = data['Buy']
x = data[['Price','Gender']]

### Logistic regression

In [8]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(C=1e9, solver='lbfgs')
log_reg.fit(x,y)

LogisticRegression(C=1000000000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='lbfgs', tol=0.0001, verbose=0, warm_start=False)

In [9]:
log_reg.coef_

array([[-0.0203009 ,  1.94489458]])

In [10]:
log_reg.intercept_

array([ 66.40394276])

### Assessing the accuracy of regression (Confusion matrix)

In [11]:
y_pred = log_reg.predict(x)
y_pred

array([1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 1], dtype=uint8)

In [12]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y,y_pred)
print(confusion_matrix)

[[90  4]
 [ 5 69]]


In [13]:
accuracy = (90+69)/(90+69+4+5) * 100
accuracy

94.64285714285714