In [37]:
import numpy as np
import scipy.optimize as optimize
import scipy.io as sio

            

In [135]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

def h(t, X):
    row, col = x.shape     
    hypo = sigmoid(np.dot(X, t))
    new_hypo = hypo.reshape((row, 1)) # need to reshape (#rows,1)
    return new_hypo

In [137]:
def costfn(theta, X, y, reg_factor):
    m, n = X.shape
    theta_new = theta.reshape((n,1))
    cost = -y*np.log(h(theta,X))-(1-y)*np.log(1-h(theta,X))
    reg_term = reg_factor/(2*m)*np.sum(np.square(theta_new))
    J = 1/m*np.sum(cost) + reg_term
    return J

In [130]:
def gradfn(theta, X, y, reg_factor):
    m, n = X.shape
    regtheta = np.ones( (X.shape[1], 1) )
    regtheta[0] = 0
    part_one = (np.transpose(X))
    part_two = h(theta, X) - y
    main_term = ( np.dot(part_one, part_two) ) /m
    reg_term = (regtheta * theta.reshape( (n, 1)) ) * (reg_factor)/m
    grad = main_term + reg_term
    grad = grad.flatten()
    return grad

In [149]:
def one_vs_all(X, y, num_labels, reg_factor):
    m, n = X.shape
    initial_theta = np.zeros((n, 1))
    all_theta = np.zeros((num_labels, n))
    for i in range(1, num_labels + 1):
        print(i)
        xopt, fopt, iter, funcalls, warnflag = optimize.fmin_cg(costfn, x0=initial_theta, fprime=gradfn, maxiter=200, disp=True, args=(X, (y==i)*1, reg_factor), full_output = True )
        print(iter)
        print("theta: ",xopt)
    return all_theta

In [150]:
data=sio.loadmat("ex3.mat")
X = data.get("X")
y = data.get("y")
num_labels = 10
reg_factor = 0.1
one_vs_all(X, y, num_labels, reg_factor)

1
Optimization terminated successfully.
         Current function value: 0.013721
         Iterations: 105
         Function evaluations: 405
         Gradient evaluations: 405
405
theta:  [ 0.00000000e+00  0.00000000e+00  1.02878820e-04 -9.51259085e-04
 -1.03529533e-03  6.98456816e-03 -7.96099772e-02 -3.38890723e-02
  2.84561063e-03  1.41932406e-03  2.32552621e-04  1.04422173e-03
  4.94404811e-03  2.79950767e-03  1.98312660e-03  2.12964879e-03
  1.81709231e-04 -3.10699663e-05 -2.12258748e-08  0.00000000e+00
 -6.36534616e-09  2.91430791e-06 -1.24782259e-03  1.11957684e-02
  1.26344804e-02  9.76014439e-03 -1.14045642e-01 -4.76663846e-02
  6.14781872e-03 -2.04080198e-02 -2.97284134e-03 -1.19411504e-02
 -5.41812712e-02 -2.28850958e-02 -1.02302017e-02 -1.29633618e-02
  1.17824776e-02  1.26448146e-02  5.27124127e-05  1.05434931e-04
  5.37219791e-08 -4.89470009e-04  8.64122982e-04  4.45147899e-02
  8.54950778e-02  1.44247791e-01  1.86158293e-01 -5.56532609e-02
 -1.59492249e-01 -1.57845778e-0

         Current function value: 0.068221
         Iterations: 200
         Function evaluations: 580
         Gradient evaluations: 580
580
theta:  [ 0.00000000e+00  0.00000000e+00 -9.28013304e-05 -2.06478514e-03
  3.82533630e-02  3.05999710e-01 -9.04110100e-02 -1.57873828e-01
 -3.27359848e-02  4.86325664e-03  1.31132042e-04 -1.04790253e-02
 -4.17299304e-02  1.79576056e-01  2.82401133e-01 -3.92793406e-03
 -2.43477881e-03  7.71687652e-05  5.38082570e-06  0.00000000e+00
 -5.03783387e-08 -1.47914703e-05  1.31780806e-03 -2.66937561e-03
 -8.14706067e-02 -8.42750099e-01 -1.02624916e+00 -6.58095249e-01
 -2.48287141e-01 -2.19990494e-01 -6.18495446e-02  1.45551959e-01
  4.18819232e-01 -2.06952104e+00 -3.22291753e+00 -2.85282872e-02
 -3.59061669e-02 -1.10984386e-01 -1.85154904e-02  4.75208527e-03
  4.34432595e-07 -6.88349506e-04 -2.28865731e-02  2.26575077e-01
  1.57060979e+00  2.17318307e+00  6.26474755e-01  6.05565517e-01
 -4.78684383e-02 -1.65429930e-01 -4.49423468e-01 -1.93570040e+00
 -2.96

         Current function value: 0.054546
         Iterations: 200
         Function evaluations: 599
         Gradient evaluations: 599
599
theta:  [ 0.00000000e+00  0.00000000e+00 -4.11128152e-07  3.77525114e-04
 -4.60264887e-03 -4.05642505e-02  5.46894743e-05 -1.02394991e-02
 -5.04790474e-03  2.19938007e-03  1.84598868e-04  3.24204921e-03
  2.14752519e-02 -1.04714516e-01 -1.62290879e-01  1.09990354e-02
  1.51179922e-03 -2.53870021e-05 -9.52319289e-06  0.00000000e+00
 -2.63065826e-07 -2.35762375e-05 -1.44300861e-03  1.54977948e-02
  3.15872450e-02  3.80242084e-03  3.78767097e-02 -3.14254567e-02
 -1.94732652e-02 -5.43726525e-02  9.41398818e-03 -2.31060260e-02
 -1.98302978e-01  1.16699015e+00  1.80543360e+00 -1.11874977e-01
 -3.98702599e-02 -2.25064057e-02  8.48802782e-04  1.06614062e-07
  2.35051318e-06  5.56968527e-05  2.29431399e-02 -1.88116233e-01
 -7.31692772e-01 -1.26543424e+00 -1.70492328e+00 -4.56772346e-01
 -1.65047097e-01  2.16959795e-01  3.58750669e-02 -3.72825813e-01
 -1.04

Optimization terminated successfully.
         Current function value: 0.031478
         Iterations: 191
         Function evaluations: 647
         Gradient evaluations: 647
647
theta:  [ 0.00000000e+00  0.00000000e+00 -1.09550386e-05 -2.57990057e-04
  4.02805287e-03  3.13684283e-02  9.40793327e-02 -6.54892223e-03
 -3.23228859e-03 -3.45501745e-03 -1.26445151e-03  8.54174481e-04
  4.12386008e-03  1.62046566e-03 -3.46155172e-04  3.40246954e-05
  3.03247378e-06  1.23601629e-07 -4.14915361e-08  0.00000000e+00
 -7.73623582e-09  1.79723278e-08  1.17913596e-04  3.42136624e-03
 -5.29953974e-02 -4.82088548e-01  3.99214698e-02  4.90719049e-02
 -2.22030098e-02 -9.05839337e-02 -1.22057738e-03 -4.97459574e-02
 -1.30793193e-01 -3.29354082e-02  4.00288410e-02  1.82241658e-02
  2.31088445e-02  8.88091752e-03  1.30269732e-03  2.46708224e-03
  6.62088007e-08 -1.04169721e-06 -1.89759132e-03  1.55513648e-02
  1.67497930e-02  6.42000771e-01 -2.14597113e-01  2.13541783e+00
  3.96427258e+00  3.27627577e+00 

         Current function value: 0.083669
         Iterations: 200
         Function evaluations: 555
         Gradient evaluations: 555
555
theta:  [ 0.00000000e+00  0.00000000e+00 -2.26771786e-06  7.22280798e-05
 -4.26792264e-04 -7.03325746e-03 -1.35086553e-02  1.00891749e-03
 -2.22923071e-02 -5.95854341e-03  1.72791862e-03  4.24174177e-03
  1.09215924e-02  1.12603096e-02  2.05295246e-02  1.43404994e-02
 -4.38566274e-04  5.33378227e-05 -7.93204347e-06  0.00000000e+00
 -4.77644541e-08 -6.88951081e-06  5.98956185e-05  3.19242705e-04
 -2.56158796e-03  4.85129159e-02 -2.43868457e-01 -4.11277185e-01
 -5.52800287e-01 -2.37662409e-01 -1.52632001e-02 -8.78402944e-02
 -7.38542014e-02 -5.66174310e-02 -2.11737641e-01 -1.60457887e-01
  1.72525541e-02  2.36818605e-02 -1.01393547e-03 -8.23154472e-03
  4.08866561e-07  5.13658183e-05 -3.57705009e-04 -2.39702075e-03
  1.33171284e-02 -2.81560330e-01 -3.73063677e-01 -1.78300149e+00
 -3.86782770e+00 -5.07518647e-01  6.55580416e-01 -4.23164820e-02
 -3.20

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [125]:
m, n = X.shape
reg_theta = np.ones((n, 1))#400*1
reg_theta[0] = 0
grad = 1/m*(np.dot(X.T, h(initial_theta, X) - y)) + reg_theta*initial_theta.reshape((n,1))*reg_factor/m
grad
X = np.concatenate((np.ones((1,X.shape[1])),X))

array([[ 0.00000000e+00],
       [ 0.00000000e+00],
       [-7.74530186e-08],
       [ 3.19876600e-06],
       [ 1.89536237e-05],
       [-7.06376094e-04],
       [-8.97395355e-04],
       [-3.72741263e-04],
       [-1.10787541e-04],
       [-1.37049401e-04],
       [-4.11905416e-05],
       [ 3.09307938e-05],
       [ 7.56273049e-05],
       [ 1.66101324e-04],
       [ 1.88959823e-04],
       [ 1.11618541e-04],
       [ 3.44740605e-05],
       [ 2.31849497e-07],
       [-3.65944989e-07],
       [ 0.00000000e+00],
       [-2.71480120e-07],
       [ 2.68348312e-06],
       [ 1.35802658e-06],
       [ 5.10971729e-05],
       [ 9.29459372e-05],
       [-1.13518367e-03],
       [-4.04630781e-03],
       [-5.65616107e-03],
       [-4.38249876e-03],
       [-1.22936584e-03],
       [ 1.06402725e-03],
       [ 1.67724463e-03],
       [ 1.83122226e-03],
       [ 1.41359436e-03],
       [ 1.00669534e-03],
       [ 6.22600175e-04],
       [ 2.88468971e-04],
       [ 1.69300222e-04],
       [ 4.5

In [109]:
m, n = X.shape
initial_theta = np.zeros((n, 1))
h(initial_theta , X).shape

(5000, 1)

In [120]:
X = np.concatenate((np.ones((1,X.shape[1])),X))
X.shape

(5001, 400)

In [115]:
np.concatenate((np.ones((1,X.shape[1])),X))

array([[1., 1., 1., ..., 1., 1., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [119]:
X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [98]:
a = np.array([1, 2, 3])

b = np.array([[4, 5, 6],[7,8,9]])

np.vstack((a, b))


array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])