In [1]:
# Name: Zhihao Zhang
# NetID: zz2432
%matplotlib inline
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split


### My impelmentation of logistic regression on lasso regularization 

#### The idea of how to implement lasso comes from this post: https://xavierbourretsicotte.github.io/lasso_derivation.html

Since the lasso term is indifferentiable at 0, we need to look at different intervals and find the the relative changes, which is usage of subderivative.


<img src="equation1.png" height="500" width="500">

Then set three cases to zero, we can compute three equations under different condition, which update the current jth feature of X

We create a soft_threhold function to evaluate those three conditions


<img src="equation_formula_2.png" height="600" width="600">
<img src="equation3.png" height="600" width="600">

During the updating stage, we update weights feature-wise. For example, theta= [theta1, theta2, theta3]. Then we look at all the examples for theta1, follow the idea about to compute the gradient and use solf-threhold to evaluate it then update theta1; Next turn is theta2 for all the examples, and follow the same procedures until we finished updating all thetas.

Repeat this process for number of iterations...

In [233]:
def sigmoid(z):
    return 1 / (1+np.e**(-z))

def hypothesis(X , w):
    return sigmoid(X.dot(w))

 
def soft_threshold(rho,lamda):
    '''Soft threshold function used for normalized data and lasso regression'''
    if rho < - lamda:
        return (rho + lamda)
    elif rho >  lamda:
        return (rho - lamda)
    else: 
        return 0

def coordinate_descent_lasso(X, y, learning_rate, num_iters, lamda = 0.01):
    '''Coordinate gradient descent for lasso regression'''
    # Initialize w to be a zero vector
    w = np.zeros((X.shape[1], 1))
    #Initialisation of useful values 
    m,n = X.shape
    # reshape y as a column vector
    y = y.reshape(y.shape[0],1)

    for i in range(num_iters):       
        #Looping through each coordinate, update each feature accordingly
        for j in range(n):      
            #Vectorized implementation
            X_j = X[:,j].reshape(m,1)
            y_pred = hypothesis(X,w)
            rho = (learning_rate/m) * X_j.T.dot(y - y_pred  + w[j]*X_j)
 
            if j == 0: 
                w[j] =  rho 
            else:
                w[j] =  soft_threshold(rho, lamda)  
   
            
    return w.flatten()

### 1. Dataset used in class: breast_cancer

In [238]:
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
X_train_tmp, X_test_tmp, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# Scale the data since we will be using gradient ascent
scaler = preprocessing.StandardScaler().fit(X_train_tmp)
X_train = scaler.transform(X_train_tmp)
X_test = scaler.transform(X_test_tmp)

# Append a column of ones to X_train and X_test
ones = np.ones(X_train.shape[0]).reshape((X_train.shape[0],1))
X_train = np.hstack((ones, X_train))
ones = np.ones(X_test.shape[0]).reshape((X_test.shape[0], 1))
X_test = np.hstack((ones, X_test))

#### Implement lasso

In [239]:
learning_rate = 0.9
new_w = coordinate_descent_lasso(X_train, y_train, learning_rate,num_iters=5000, lamda=0.02)
print(f'Weights:\n{new_w}')
# set threshold
threshold = 0.5
y_pred = hypothesis(X_test,new_w)
ones_idx,zeros_idx = np.where(y_pred >=threshold), np.where(y_pred <threshold)
y_pred[ones_idx],y_pred[zeros_idx] = 1, 0
# change y_pred shape to the same shape as y_pred
y_pred = y_pred.reshape(y_pred.shape[0])
print('Accuracy: {}'.format(np.where(y_pred == y_test)[0].size / y_pred.size))

Weights:
[ 0.27492043 -0.2097498  -0.13611075 -0.21051871 -0.19045538 -0.03009046
 -0.00855919 -0.15174555 -0.27884892  0.          0.         -0.14630241
  0.         -0.10525505 -0.09686716  0.          0.          0.
  0.          0.          0.00114735 -0.27301691 -0.22092445 -0.26407832
 -0.22755563 -0.15351341 -0.10787207 -0.18899421 -0.2921711  -0.17405755
  0.        ]
Accuracy: 0.9787234042553191


### 2. Dataset outside class: wine

In [242]:
from sklearn.datasets import load_wine
data = load_wine()
target_name = data.target_names
print(target_name)
print('we only focus on class_0 and class_1 for the purpose of simplicity. (do binary classification)')
# we only focus on class_0 and class_1 for the purpose of simplicity. (do binary classification)
X = data.data[0:130]
y = data.target[0:130]

X_train_tmp, X_test_tmp, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
# Scale the data since we will be using gradient ascent
scaler = preprocessing.StandardScaler().fit(X_train_tmp)
X_train = scaler.transform(X_train_tmp)
X_test = scaler.transform(X_test_tmp)

# Append a column of ones to X_train and X_test
ones = np.ones(X_train.shape[0]).reshape((X_train.shape[0],1))
X_train = np.hstack((ones, X_train))
ones = np.ones(X_test.shape[0]).reshape((X_test.shape[0], 1))
X_test = np.hstack((ones, X_test))

['class_0' 'class_1' 'class_2']
we only focus on class_0 and class_1 for the purpose of simplicity. (do binary classification)


### Implement lasso

In [241]:
learning_rate = 0.9
new_w = coordinate_descent_lasso(X_train, y_train, learning_rate,num_iters=5000, lamda=0.02)
print(f'Weights:\n{new_w}')
# set threshold
threshold = 0.5
y_pred = hypothesis(X_test,new_w)
ones_idx,zeros_idx = np.where(y_pred >=threshold), np.where(y_pred <threshold)
y_pred[ones_idx],y_pred[zeros_idx] = 1, 0
# change y_pred shape to the same shape as y_pred
y_pred = y_pred.reshape(y_pred.shape[0])
print('Accuracy: {}'.format(np.where(y_pred == y_test)[0].size / y_pred.size))

Weights:
[ 0.20979497 -0.55540613 -0.03690661 -0.19429156  0.37258714 -0.22065127
  0.         -0.16862806  0.05294112 -0.05000681 -0.37839111  0.
 -0.15395089 -0.72279016]
Accuracy: 0.9767441860465116


## Observation:
Lasso shrinks the less important feature’s coefficient to zero thus, removing some feature altogether. So Lasso is ideal to select features