# 1. Load Data

A familiar dataset which I've played with and have parsed

In [1]:
from sklearn import datasets
import numpy

x_sparse, y = datasets.load_svmlight_file('diabetes')
x = x_sparse.todense()

print('Shape of x: ' + str(x.shape))
print('Shape of y: ' + str(y.shape))

Shape of x: (768, 8)
Shape of y: (768,)


In [2]:
# partition the data to training and test sets
n = x.shape[0]
n_train = 640
n_test = n - n_train

rand_indices = numpy.random.permutation(n)
train_indices = rand_indices[0:n_train]
test_indices = rand_indices[n_train:n]

x_train = x[train_indices, :]
x_test = x[test_indices, :]
y_train = y[train_indices].reshape(n_train, 1)
y_test = y[test_indices].reshape(n_test, 1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))
print('Shape of y_train: ' + str(y_train.shape))
print('Shape of y_test: ' + str(y_test.shape))

Shape of x_train: (640, 8)
Shape of x_test: (128, 8)
Shape of y_train: (640, 1)
Shape of y_test: (128, 1)


In [3]:
# Standardization
import numpy

# calculate mu and sig using the training set
d = x_train.shape[1]
mu = numpy.mean(x_train, axis=0).reshape(1, d)
sig = numpy.std(x_train, axis=0).reshape(1, d)

# transform the training features
x_train = (x_train - mu) / (sig + 1E-6)

# transform the test features
x_test = (x_test - mu) / (sig + 1E-6)

print('test mean = ')
print(numpy.mean(x_test, axis=0))

print('test std = ')
print(numpy.std(x_test, axis=0))

test mean = 
[[-0.04785122 -0.12557859  0.05500977  0.05743532 -0.02749454  0.04597634
   0.09983854  0.16046459]]
test std = 
[[1.00633571 0.89920662 1.05102339 0.95716193 1.04239613 0.9975458
  0.91969456 1.10635728]]


In [4]:
n_train, d = x_train.shape
x_train = numpy.concatenate((x_train, numpy.ones((n_train, 1))), axis=1)

n_test, d = x_test.shape
x_test = numpy.concatenate((x_test, numpy.ones((n_test, 1))), axis=1)

print('Shape of x_train: ' + str(x_train.shape))
print('Shape of x_test: ' + str(x_test.shape))

Shape of x_train: (640, 9)
Shape of x_test: (128, 9)


# 2. SAGA for logistic loss

No regularizer in the loss functions

In [116]:
# logistic loss objective value 
# y: scalar 
# x: 1 by d vector 
# w: d by 1 vector
def logistic_loss(y,x,w):
    exponent = float(numpy.exp(-y * numpy.dot(x,w))) # scalar
    objective = numpy.log(1 + exponent) 
    return objective

In [128]:
# gradient of logisitic loss
# y: scalar 
# x: 1 by d vector 
# w: d by 1 vector
def logistic_gradient(y,x,w):
    exponent = float(numpy.exp(y * numpy.dot(x,w))) # scalar
    derivative = (-y * x).T / (1 + exponent) # d by 1
    return derivative

SAGA with $l2$ norm. 

In [130]:
# X: training set, n by d 
# y: training labels, n by 1 
# lam: l2 norm regularization
def saga_l2(X,y,lam, step_size,max_epochs,proximal,obj_func,grad_func):
    obj_vals = []
    n, d = X.shape  
    w = numpy.zeros((d,1))
    derivatives = numpy.zeros((n,d))
    # initialize table with derivative w/weight 0
    for i in range(d):
        derivatives[i,:] = grad_func(y[i],X[i,:],w).reshape(9,)
        
    for epoch in range(max_epochs):
        permutation = numpy.random.permutation(n)
        X_shuffled = X[permutation,:]
        y_shuffled = y[permutation,:]
        obj_epoch = 0
        for i in range(n):
            xi = X_shuffled[i,:]
            yi = y_shuffled[i]
            
            updated_deriv = grad_func(yi,xi,w) # d by 1
            derivatives[permutation[i],:] = updated_deriv.reshape(d) 
            previous_deriv = derivatives[permutation[i],:].reshape((d,1)) # d by 1
            table_avg = numpy.mean(derivatives,axis=0).reshape((d,1))
            update = updated_deriv - previous_deriv + table_avg
            w = (1-step_size*lam) * w - (step_size * update)
            w = proximal(w)
            obj_iter = obj_func(y_shuffled[i],X_shuffled[i,:],w)
            obj_epoch += obj_iter
        obj_epoch /= n
        obj_vals.append(obj_epoch)
        print("Obj val at epoch " + str(epoch) + ' is ' + str(obj_epoch))

In [131]:
# h(x) = 0
def proximal(x):
    return x

In [132]:
_,eigs,_ = numpy.linalg.svd(x_train * x_train.T)
n,d = numpy.shape(x_train)
alpha = 1E-6
step_size = 1 / (n * alpha + 1/4 * eigs[0])
saga_l2(x_train,y_train,alpha,step_size,100,proximal,logistic_loss,logistic_gradient)

Obj val at epoch 0 is 0.6612772064254964
Obj val at epoch 1 is 0.5567893702877738
Obj val at epoch 2 is 0.51582944864235
Obj val at epoch 3 is 0.5011042114342608
Obj val at epoch 4 is 0.4908513274020966
Obj val at epoch 5 is 0.48746501185642827
Obj val at epoch 6 is 0.48432366120333825
Obj val at epoch 7 is 0.4824687919427225
Obj val at epoch 8 is 0.4811736841593891
Obj val at epoch 9 is 0.48034575290768355
Obj val at epoch 10 is 0.47967347219603934
Obj val at epoch 11 is 0.4794622071492786
Obj val at epoch 12 is 0.4789443990543848
Obj val at epoch 13 is 0.4787026487686383
Obj val at epoch 14 is 0.4786254227387948
Obj val at epoch 15 is 0.47839203129431607
Obj val at epoch 16 is 0.4784159643668195
Obj val at epoch 17 is 0.4783023778924946
Obj val at epoch 18 is 0.47826962176136567
Obj val at epoch 19 is 0.47822786412359986
Obj val at epoch 20 is 0.4782081872847647
Obj val at epoch 21 is 0.47818824941895777
Obj val at epoch 22 is 0.47816676700567673
Obj val at epoch 23 is 0.478197074562