In [34]:
from sklearn import datasets
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

In [21]:
data = datasets.load_breast_cancer()
x = np.array(data.data[:,:])
y = np.array(data.target)


In [22]:
class LogisticRegression_new:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False,lamb = 0.1,theta=np.zeros(10)):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.verbose = verbose
        self.theta = theta
        self.lamb = lamb
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h) + self.lamb*(self.theta * self.theta).sum()).mean() 
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        # weights initialization
        self.theta = np.zeros(X.shape[1])
        loss = 0
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = (np.dot(X.T, (h - y)) + self.lamb * 2 * self.theta)/ y.size
            self.theta -= self.lr * gradient
            
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            loss = self.__loss(h, y)
                
            if(self.verbose ==True and i % 100 == 0):
                print("loss:" ,{loss} )
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X):
        return self.predict_prob(X).round()

    def print_weights(self):
        print(self.theta)
    def get_weights(self):
        return self.theta

In [23]:
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size = 0.4, random_state = 0)
model = LogisticRegression_new(lr=0.1, num_iter=1000,lamb = 0.01)
model.fit(x_train,y_train)
preds = model.predict(x_test)
model.print_weights()
print("Score = ",(preds == y_test).mean())


[ 3.78126876e+00  2.90641376e+01  4.39258732e+01  1.67801789e+02
  8.06097529e+01  2.47109200e-01 -1.57856506e-01 -5.75317156e-01
 -2.52774949e-01  4.50478631e-01  2.04363409e-01  1.68594159e-01
  3.19505753e+00 -7.40784969e-01 -7.78809947e+01  1.45196469e-02
 -2.68696549e-02 -5.72770378e-02 -5.49830212e-03  3.36842263e-02
  4.33078622e-03  3.06975933e+01  5.38009631e+01  1.66351109e+02
 -1.08915732e+02  2.90811290e-01 -6.24246879e-01 -1.23971279e+00
 -3.22618604e-01  5.66518788e-01  1.80845795e-01]
('Score = ', 0.8859649122807017)


  app.launch_new_instance()
  app.launch_new_instance()


In [24]:
reg = 0
max_score = 0
for i in range(0,10,1):
    model = LogisticRegression_new(lr=0.1, num_iter=1000,lamb = i)
    model.fit(x_train,y_train)
    preds = model.predict(x_test)
    score = (preds == y_test).mean()
    if(score >= max_score):
        reg = i
        max_score = score
        

  app.launch_new_instance()
  app.launch_new_instance()


In [25]:
print(reg, max_score)

(3, 0.9254385964912281)


In [26]:
def k_fold_cross(x, y, k):
    n = len(x)
    batches = n//k
    print(batches)
    optimum_weights = []
    acc = 0
    for i in range(0,k):
        x_test = x[i*batches: i*batches + batches]
        y_test = y[i*batches: i*batches + batches]
        arr = [j for j in range(i*batches, i*batches + batches)]
        x_train = []
        y_train = []
        for j in range(0,i*batches):
            x_train.append(x[j])
            y_train.append(y[j])
        for j in range(i*batches + batches + 1, n):
            x_train.append(x[j])
            y_train.append(y[j])
            
        print(len(x_train),len(y_train),len(x_test))
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        model = LogisticRegression_new(lr=0.1, num_iter=1000,lamb = max_score)
        model.fit(x_train,y_train)
        preds = model.predict(x_test)
        score = (preds == y_test).mean()
        print("Score = ",score)
        if(score>acc):
            acc = score
            optimum_weights = model.get_weights()
    return optimum_weights

In [27]:
optimum_weights = k_fold_cross(x,y,10)

56
(512, 512, 56)
('Score = ', 0.75)
(512, 512, 56)


  app.launch_new_instance()
  app.launch_new_instance()


('Score = ', 0.9464285714285714)
(512, 512, 56)
('Score = ', 0.9642857142857143)
(512, 512, 56)
('Score = ', 0.8571428571428571)
(512, 512, 56)
('Score = ', 0.6607142857142857)
(512, 512, 56)
('Score = ', 0.9642857142857143)
(512, 512, 56)
('Score = ', 0.8571428571428571)
(512, 512, 56)
('Score = ', 0.8214285714285714)
(512, 512, 56)
('Score = ', 0.8928571428571429)
(512, 512, 56)
('Score = ', 0.8392857142857143)


In [28]:
optimum_weights

array([ 3.87024108e+00,  2.95738577e+01,  4.30305567e+01,  1.72534883e+02,
        7.46613189e+01,  2.75412979e-01, -1.06197108e-01, -5.11175418e-01,
       -2.17046068e-01,  5.30710648e-01,  2.17451276e-01,  1.19810119e-01,
        2.83344365e+00, -7.83869309e-01, -7.11587727e+01,  1.55325192e-02,
       -3.25091208e-02, -6.53003409e-02, -7.04398092e-03,  5.10811920e-02,
        2.92009806e-03,  3.11725195e+01,  5.54568439e+01,  1.74303472e+02,
       -9.94495044e+01,  3.48157433e-01, -4.82408636e-01, -1.03410869e+00,
       -2.37255522e-01,  7.50133489e-01,  2.03709675e-01])

In [31]:
x_train,x_test, y_train, y_test = train_test_split(x,y,test_size = 0.4, random_state = 0)


In [32]:
new_model = LogisticRegression_new(lr=0.1, num_iter=1000,lamb = reg, theta = optimum_weights)
y_pred1 = new_model.predict(x_train)
y_pred2 = new_model.predict(x_test)


In [33]:
print("Score = ",(y_pred1 == y_train).mean())
print("Score = ",(y_pred2 == y_test).mean())

('Score = ', 0.9090909090909091)
('Score = ', 0.9385964912280702)
