In [1]:
import numpy as np

In [2]:
from sklearn.datasets import load_digits

In [3]:
digits = load_digits()

In [4]:
print(digits.data.shape)
print(digits.target.shape)

(1797, 64)
(1797,)


In [5]:
A = digits.data

In [6]:
print(digits.DESCR)

.. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 1797
    :Number of Attributes: 64
    :Attribute Information: 8x8 image of integer pixels in the range 0..16.
    :Missing Attribute Values: None
    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
    :Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into nonoverlapping blocks of
4x4 and the number of on pixels are counted in each blo

In [7]:
y = 1.0*np.ones([A.shape[0],1])
for i in range(digits.target.shape[0]):
  y[i] = digits.target[i]

In [8]:
def evalf(x):
  res = ((np.linalg.norm(np.subtract(np.matmul(A,x),y)))**2)/2
  return res

In [9]:
def evalg(x):
  arr = np.zeros(64)
  err = np.subtract(np.matmul(A,x),y)
  for i in range(10):
    arr[i] = np.matmul(err.transpose(),A[:,i])
  return arr.reshape(64,1)

In [10]:
def evalh(x):
  hes = np.zeros((64,64))
  for i in range(64):
    for j in range(64):
      hes[i][j] = np.matmul(A[:,i],A[:,j])
  return hes

In [11]:
def compute_steplength_backtracking_scaled_direction(x, p, gradf, alpha_start, rho, gamma, d_k):
  #assert type(x) is np.ndarray and len(x) == 2
  #assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf(x+alpha*np.matmul(d_k,p)) > evalf(x) + gamma*alpha*(np.matmul(gradf.transpose(),np.matmul(d_k,p))):
    alpha = rho*alpha

  return alpha

In [12]:
def Newton(n, tol, *args):
  xlist = []
  x = np.zeros(n).reshape(n,1)
  xlist.append(x)
  grad_f = evalg(x)

  alpha = args[0]
  rho = args[1]
  gamma = args[2]

  hes_f = evalh(x)

  k = 0

  while np.linalg.norm(grad_f) > tol and k < 10000:
    p = -grad_f
    d = np.linalg.inv(hes_f)
    step_length = compute_steplength_backtracking_scaled_direction(x, p, grad_f, alpha, rho, gamma, d)
    x = np.add(x, np.multiply(step_length, np.matmul(d,p)))
    xlist.append(x)
    k += 1 
    grad_f = evalg(x) 
    hes_f = evalh(x)
  return k, x, evalf(x), xlist

In [13]:
Newton(64, 1e-5, 1, 0.5, 0.5)

LinAlgError: ignored

In [14]:
def evalf_l(x, lambd):
  res = np.multiply(lambd/2,np.matmul(x.transpose(),x))+((np.linalg.norm(np.subtract(np.matmul(A,x),y)))**2)/2
  return res

In [15]:
def evalg_l(x, lambd):
  arr = np.zeros(64)
  err = np.subtract(np.matmul(A,x),y)
  for i in range(64):
    arr[i] = lambd*x[i] + np.matmul(err.transpose(),A[:,i])
  return arr.reshape(64,1)

In [16]:
def evalh_l(x, lambd):
  hes = np.zeros((64,64))
  for i in range(64):
    for j in range(64):
      hes[i][j] = np.matmul(A[:,i],A[:,j])
      if i == j:
        hes[i][j] += lambd
  return hes

In [17]:
def compute_steplength_backtracking_scaled_direction(x, p, gradf, alpha_start, rho, gamma, d_k, lambd):
  #assert type(x) is np.ndarray and len(x) == 2
  #assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf_l(x+alpha*np.matmul(d_k,p), lambd) > evalf_l(x, lambd) + gamma*alpha*(np.matmul(gradf.transpose(),np.matmul(d_k,p))):
    alpha = rho*alpha

  return alpha

In [18]:
def Newton_l(n, tol, lambd, *args):
  xlist = []
  x = np.zeros(n).reshape(n,1)
  xlist.append(x)
  grad_f = evalg_l(x, lambd)

  alpha = args[0]
  rho = args[1]
  gamma = args[2]

  hes_f = evalh_l(x, lambd)

  k = 0

  while np.linalg.norm(grad_f) > tol and k < 10000:
    p = -grad_f
    d = np.linalg.inv(hes_f)
    step_length = compute_steplength_backtracking_scaled_direction(x, p, grad_f, alpha, rho, gamma, d, lambd)
    x = np.add(x, np.multiply(step_length, np.matmul(d,p)))
    xlist.append(x)
    k += 1 
    grad_f = evalg_l(x, lambd) 
    hes_f = evalh_l(x, lambd)
  return k, x, evalf_l(x, lambd), xlist

In [19]:
iter, opt, fv, xl = Newton_l(64, 1e-5, 0.001, 1, 0.5, 0.5)

In [20]:
opt

array([[ 0.00000000e+00],
       [ 9.69076882e-02],
       [-4.32192762e-03],
       [-7.75916338e-03],
       [ 7.49591987e-02],
       [ 1.13946582e-02],
       [-2.71293921e-02],
       [-7.34410664e-03],
       [ 9.98267907e-01],
       [-2.88089376e-02],
       [ 1.18688356e-01],
       [ 6.60922719e-02],
       [-5.57075915e-02],
       [-6.97056149e-02],
       [ 9.65844014e-02],
       [ 2.55196013e-01],
       [-7.29830665e-01],
       [ 2.42711745e-02],
       [ 7.73241544e-02],
       [-2.33008498e-02],
       [-5.64077619e-02],
       [ 5.72413971e-02],
       [-4.88675248e-02],
       [-2.62555998e-01],
       [-9.06071169e-01],
       [-1.49769638e-01],
       [ 5.64022790e-02],
       [ 8.96665319e-02],
       [ 8.39315938e-02],
       [ 9.85410047e-02],
       [ 1.69269848e-03],
       [-2.96649688e+00],
       [ 0.00000000e+00],
       [-1.54361470e-01],
       [-9.32404640e-03],
       [ 1.39497978e-01],
       [-3.69237437e-02],
       [ 5.46111773e-02],
       [-9.2

#Ans 1:

In Newton's method, on using OLSLR, we get an error, since the hessian matrix comes out to be a singular matrix whose inverse does not exist.

On using regularized OLSLR, we are able to get a solution successfully, since the hessian matrix has $\lambda$ as its diagonal elements which leads to a matrix whose inverse is computable.

$x_{f\lambda}^*$ : [[ 0.00000000e+00],
       [ 9.69076882e-02],
       [-4.32192762e-03],
       [-7.75916338e-03],
       [ 7.49591987e-02],
       [ 1.13946582e-02],
       [-2.71293921e-02],
       [-7.34410664e-03],
       [ 9.98267907e-01],
       [-2.88089376e-02],
       [ 1.18688356e-01],
       [ 6.60922719e-02],
       [-5.57075915e-02],
       [-6.97056149e-02],
       [ 9.65844014e-02],
       [ 2.55196013e-01],
       [-7.29830665e-01],
       [ 2.42711745e-02],
       [ 7.73241544e-02],
       [-2.33008498e-02],
       [-5.64077619e-02],
       [ 5.72413971e-02],
       [-4.88675248e-02],
       [-2.62555998e-01],
       [-9.06071169e-01],
       [-1.49769638e-01],
       [ 5.64022790e-02],
       [ 8.96665319e-02],
       [ 8.39315938e-02],
       [ 9.85410047e-02],
       [ 1.69269848e-03],
       [-2.96649688e+00],
       [ 0.00000000e+00],
       [-1.54361470e-01],
       [-9.32404640e-03],
       [ 1.39497978e-01],
       [-3.69237437e-02],
       [ 5.46111773e-02],
       [-9.20425586e-03],
       [ 0.00000000e+00],
       [ 1.03326506e-01],
       [ 1.23983484e-01],
       [-1.37635230e-02],
       [ 5.40029021e-03],
       [ 1.31185700e-01],
       [ 5.49577815e-02],
       [ 2.24935899e-02],
       [ 7.48046265e-03],
       [ 6.17507773e-01],
       [ 2.44100619e-02],
       [ 1.42356436e-03],
       [-6.21114801e-02],
       [-2.07024631e-01],
       [-3.38510517e-02],
       [ 1.05486785e-01],
       [-1.40336482e-01],
       [-9.82347827e-01],
       [-1.14473639e-01],
       [ 2.10485229e-02],
       [-4.36083412e-02],
       [ 1.87372531e-02],
       [-6.66562217e-02],
       [ 1.19384465e-02],
       [-5.27782515e-02]]


In [21]:
def compute_steplength_backtracking(x, gradf, B, alpha_start, rho, gamma):
  #assert type(x) is np.ndarray and len(x) == 2
  #assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf(x+alpha*-np.matmul(B,gradf)) > evalf(x) + gamma*alpha*np.matmul(gradf.transpose(),-np.matmul(B,gradf)):
    alpha = rho*alpha

  return alpha

In [22]:
def BFGS(n, tol, *args):
  xlist = []
  x1 = np.zeros(n).reshape(n,1)
  xlist.append(x1)
  grad_f = evalg(x1)

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  I = np.identity(n)

  B = I

  k = 0

  while np.linalg.norm(grad_f) > tol and k < 3000:
    alpha = compute_steplength_backtracking(x1, grad_f, B, alpha_start, rho, gamma)
    x2 = np.add(x1, np.multiply(alpha,np.matmul(B,-grad_f)))
    s = x2 - x1
    y = evalg(x2) - evalg(x1)
    mu = 1/np.matmul(y.transpose(),s)
    #print(p,alpha,x2,s,y,mu)
    B = np.add(np.matmul(np.matmul(np.subtract(I,np.matmul(np.multiply(mu,s),y.transpose())),B),np.subtract(I,np.matmul(np.multiply(mu,y),s.transpose()))),np.matmul(np.multiply(mu,s),s.transpose()))
    x1 = x2
    xlist.append(x1)
    grad_f = evalg(x2)
    k = k+1
    #print(np.linalg.norm(grad_f))
  return k, xlist, x1, evalf(x1)

In [23]:
iter, xl, opt, fv = BFGS(64, 1e-5, 1, 0.5, 0.5)

In [24]:
opt

array([[ 0.        ],
       [-0.24846114],
       [-0.00220601],
       [ 0.1728994 ],
       [ 0.15515614],
       [ 0.05214737],
       [ 0.13749949],
       [ 0.05760156],
       [ 0.49559015],
       [-0.01296211],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0

In [25]:
def compute_steplength_backtracking(x, gradf, B, alpha_start, rho, gamma, lambd):
  #assert type(x) is np.ndarray and len(x) == 2
  #assert type(gradf) is np.ndarray and len(gradf) == 2
  
  alpha = alpha_start

  while evalf_l(x+alpha*-np.matmul(B,gradf), lambd) > evalf_l(x, lambd) + gamma*alpha*np.matmul(gradf.transpose(),-np.matmul(B,gradf)):
    alpha = rho*alpha

  return alpha

In [26]:
def BFGS_l(n, tol, lambd, *args):
  xlist = []
  x1 = np.zeros(n).reshape(n,1)
  xlist.append(x1)
  grad_f = evalg_l(x1, lambd)

  alpha_start = args[0]
  rho = args[1]
  gamma = args[2]

  I = np.identity(n)

  B = I

  k = 0

  while np.linalg.norm(grad_f) > tol and k < 3000:
    alpha = compute_steplength_backtracking(x1, grad_f, B, alpha_start, rho, gamma, lambd)
    x2 = np.add(x1, np.multiply(alpha,np.matmul(B,-grad_f)))
    s = x2 - x1
    y = evalg_l(x2, lambd) - evalg_l(x1, lambd)
    mu = 1/np.matmul(y.transpose(),s)
    #print(p,alpha,x2,s,y,mu)
    B = np.add(np.matmul(np.matmul(np.subtract(I,np.matmul(np.multiply(mu,s),y.transpose())),B),np.subtract(I,np.matmul(np.multiply(mu,y),s.transpose()))),np.matmul(np.multiply(mu,s),s.transpose()))
    x1 = x2
    xlist.append(x1)
    grad_f = evalg_l(x2, lambd)
    k = k+1
    #print(np.linalg.norm(grad_f))
  return k, x1, evalf_l(x1, lambd), xlist

In [27]:
iter, opt, fv, xl = BFGS_l(64, 1e-5, 0.001, 1, 0.5, 0.5)

In [28]:
opt

array([[ 0.00000000e+00],
       [ 9.69076881e-02],
       [-4.32192760e-03],
       [-7.75916338e-03],
       [ 7.49591987e-02],
       [ 1.13946582e-02],
       [-2.71293921e-02],
       [-7.34410665e-03],
       [ 9.98267910e-01],
       [-2.88089376e-02],
       [ 1.18688356e-01],
       [ 6.60922720e-02],
       [-5.57075915e-02],
       [-6.97056149e-02],
       [ 9.65844014e-02],
       [ 2.55196013e-01],
       [-7.29830678e-01],
       [ 2.42711745e-02],
       [ 7.73241544e-02],
       [-2.33008498e-02],
       [-5.64077619e-02],
       [ 5.72413971e-02],
       [-4.88675248e-02],
       [-2.62555998e-01],
       [-9.06071136e-01],
       [-1.49769638e-01],
       [ 5.64022790e-02],
       [ 8.96665319e-02],
       [ 8.39315938e-02],
       [ 9.85410047e-02],
       [ 1.69269849e-03],
       [-2.96649687e+00],
       [ 0.00000000e+00],
       [-1.54361470e-01],
       [-9.32404641e-03],
       [ 1.39497978e-01],
       [-3.69237437e-02],
       [ 5.46111773e-02],
       [-9.2

#Ans 2:

On using BFGS, we successfully get a solution for both OLSLR and regularized OLSLR without facing any difficulties.
Though, we can see that we get a lower value in regularized OLSLR in comparison to OLSLR.

$x_{f}^*$ : [[ 0.        ],
       [-0.24846114],
       [-0.00220601],
       [ 0.1728994 ],
       [ 0.15515614],
       [ 0.05214737],
       [ 0.13749949],
       [ 0.05760156],
       [ 0.49559015],
       [-0.01296211],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ],
       [ 0.        ]]

----------------------------------------------------------------------

$x_{f\lambda}^*$ : [[ 0.00000000e+00],
       [ 9.69076881e-02],
       [-4.32192760e-03],
       [-7.75916338e-03],
       [ 7.49591987e-02],
       [ 1.13946582e-02],
       [-2.71293921e-02],
       [-7.34410665e-03],
       [ 9.98267910e-01],
       [-2.88089376e-02],
       [ 1.18688356e-01],
       [ 6.60922720e-02],
       [-5.57075915e-02],
       [-6.97056149e-02],
       [ 9.65844014e-02],
       [ 2.55196013e-01],
       [-7.29830678e-01],
       [ 2.42711745e-02],
       [ 7.73241544e-02],
       [-2.33008498e-02],
       [-5.64077619e-02],
       [ 5.72413971e-02],
       [-4.88675248e-02],
       [-2.62555998e-01],
       [-9.06071136e-01],
       [-1.49769638e-01],
       [ 5.64022790e-02],
       [ 8.96665319e-02],
       [ 8.39315938e-02],
       [ 9.85410047e-02],
       [ 1.69269849e-03],
       [-2.96649687e+00],
       [ 0.00000000e+00],
       [-1.54361470e-01],
       [-9.32404641e-03],
       [ 1.39497978e-01],
       [-3.69237437e-02],
       [ 5.46111773e-02],
       [-9.20425584e-03],
       [ 0.00000000e+00],
       [ 1.03326506e-01],
       [ 1.23983484e-01],
       [-1.37635230e-02],
       [ 5.40029021e-03],
       [ 1.31185700e-01],
       [ 5.49577815e-02],
       [ 2.24935899e-02],
       [ 7.48046269e-03],
       [ 6.17507773e-01],
       [ 2.44100619e-02],
       [ 1.42356435e-03],
       [-6.21114801e-02],
       [-2.07024631e-01],
       [-3.38510517e-02],
       [ 1.05486785e-01],
       [-1.40336482e-01],
       [-9.82347828e-01],
       [-1.14473639e-01],
       [ 2.10485229e-02],
       [-4.36083412e-02],
       [ 1.87372531e-02],
       [-6.66562217e-02],
       [ 1.19384465e-02],
       [-5.27782515e-02]]