In [140]:
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.datasets import load_digits 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [219]:
# Sigmoid function 
# Input: arbitrary vector 
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

# Logistic regression prediction 
# Input: parameters, example 
def logRegPrediction(w, x): 
    return sigmoid(np.dot(x,w))

# Logsistic regression NLL cost function 
def logRegCost(w, x, y): 
    predicted = logRegPrediction(w,x)
    print(np.amin(predicted))
    print(np.amax(predicted))
    posCost = - y * np.log(predicted)
    negCost = - (1.0 - y) * np.log(1.0 - predicted)
    totCost = posCost + negCost 
    return np.sum(totCost) / y.size 

# Logistic regression cost gradient 
def logRegCostGrad(w, x, y):
    predicted = logRegPrediction(w,y)
    return np.dot(x.t, predicted - y)

# Logistic regression weight update 
def logRegUpdate(w, x, y, learnRate):
    grad = logRegCostGrad(w, x, y) / len(y)
    w -= lr * grad
    return w 

# Training 
def logRegTrain(w, x, y, numIter, logFreq, learnRate): 
    costs = [] 
    for i in range(numIter):
        w = logRegUpdate(w, x, y, learnRate)
        cost = logRegCost(w, x, y)
        costs.append(logRegCost(w, x, y))
        if i % logFreq == 0: 
            print("Epoch:", i, "Cost:", cost)
    return w, costs

In [670]:
# Load dataset
digits = load_digits()
digX = digits.images 
digY = digits.target
# Reshape images into 1D vectors 
digX = np.reshape(digX, \
    (digX.shape[0], digX.shape[1] * digX.shape[2]))
# Normalize all images 
for i in range(digX.shape[0]):
    digX[i] = np.divide(digX[i], np.amax(digX,axis=1)[i])

# Split training and test sets 
digXTrain, digXTest, digYTrain, digYTest = \
    train_test_split(digX, digY, test_size=0.20)

# Ensure dimensionality matches 
digYTrain = np.expand_dims(digYTrain, axis=1)
digYTest = np.expand_dims(digYTest, axis=1)

ones = digYTrain
ones[ones != 1] = 0.0

# Initialize weights
w = np.random.rand(digXTrain.shape[1],1)
predict = 1.0 / (1.0 + np.exp(-np.dot(digXTrain,w)))

cost = ones * np.log(predict) + (1.0 - ones) * np.log(1.0 - predict)
cost = - cost
cost.sum() / digYTrain.shape[0]

gradient = np.dot(digXTrain.T, predict - digYTrain)
gradient / digYTrain.shape[0]

array([[ 0.00000000e+00],
       [ 1.89704622e-02],
       [ 3.04656176e-01],
       [ 6.78367075e-01],
       [ 6.72874426e-01],
       [ 3.22573896e-01],
       [ 7.72237293e-02],
       [ 7.52399533e-03],
       [ 3.04447872e-04],
       [ 1.22438769e-01],
       [ 6.17014620e-01],
       [ 6.55696465e-01],
       [ 5.47080565e-01],
       [ 4.55110384e-01],
       [ 1.02967530e-01],
       [ 6.74123850e-03],
       [ 1.30440635e-04],
       [ 1.55206941e-01],
       [ 5.69696105e-01],
       [ 3.34554868e-01],
       [ 3.54997168e-01],
       [ 4.40868962e-01],
       [ 1.05092726e-01],
       [ 3.34891836e-03],
       [-1.49188438e-08],
       [ 1.38023253e-01],
       [ 5.07273547e-01],
       [ 4.62031385e-01],
       [ 5.35179528e-01],
       [ 4.29370663e-01],
       [ 1.42071700e-01],
       [ 1.30479617e-04],
       [ 0.00000000e+00],
       [ 1.37680217e-01],
       [ 4.32544016e-01],
       [ 4.88275151e-01],
       [ 5.52136068e-01],
       [ 5.12094988e-01],
       [ 1.8