## Implementing logistic regression in Python
### Data with on characteristic

In [1]:
import numpy as np
import pandas as pd

- Define Cost Function

In [2]:
def CostFunc(y,y_pre):
    return sum(-(y*np.log(y_pre)+(1-y)*np.log(1-y_pre)))/len(y)

- Define Sigmoid Function

In [3]:
def Sigmoid(x,w,b):
    z=w*x+b
    return 1/(1+np.exp(-z))

### Practice using breast cancer data from scikit-learn

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
cancer=load_breast_cancer()

In [6]:
cancer_data=pd.DataFrame(data=cancer.data,columns=cancer.feature_names)

In [7]:
x_data=np.array(cancer_data['mean radius'])
y_data=np.array(cancer['target'])

In [8]:
print(np.shape(x_data),np.shape(y_data))

(569,) (569,)


In [9]:
w=np.random.randn()
b=np.random.randn()
alpha=0.01

In [10]:
print(w,b)

0.388210672427952 0.2937489891375353


In [11]:
x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,test_size=0.3)

In [12]:
CostFunc(y_test,Sigmoid(x_test,w,b))

2.365082340375891

In [13]:
y=Sigmoid(x_test,w,b)
y_pred=np.where(y>0.5,1,0)

In [14]:
accuracy_score(y_test, y_pred)

0.6666666666666666

- Gradient Descent Algorithm

In [15]:
h=0.0001
while(1):
    dw=(CostFunc(y_train,Sigmoid(x_train,w+h,b))-CostFunc(y_train,Sigmoid(x_train,w,b)))/h
    db=(CostFunc(y_train,Sigmoid(x_train,w,b+h))-CostFunc(y_train,Sigmoid(x_train,w,b)))/h
    nw=w-alpha*dw
    nb=b-alpha*db
    if (abs(nw-w)<0.0001)&(abs(nb-b)<0.0001):
        break
    w=nw; b=nb;
y=Sigmoid(x_test,w,b)
y_pred=np.where(y>0.5,1,0)

In [16]:
CostFunc(y_test,Sigmoid(x_test,w,b))

0.30318294652361943

In [17]:
accuracy_score(y_test, y_pred)

0.8830409356725146