In [1]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from scipy.optimize import minimize

In [2]:
data = sio.loadmat('Rdatatrans.mat')
raw_X =data['FeatureAllMatrix']
raw_y = data['Label']

In [3]:
raw_X

array([[ 6.95887249,  5.84889693,  7.5427904 , ...,  1.0875    ,
         1.05      ,  1.0375    ],
       [ 2.38926984,  3.8235673 ,  6.06348775, ...,  1.        ,
         1.        ,  1.        ],
       [ 0.7761943 ,  1.28524766,  2.17574786, ...,  0.9875    ,
         0.9875    ,  0.9875    ],
       ...,
       [ 8.42057759,  8.84926699,  9.63983765, ...,  0.7625    ,
         0.7625    ,  0.65      ],
       [21.0408627 ,  4.67376369,  4.79569697, ...,  0.8375    ,
         0.775     ,  0.85      ],
       [ 1.54021325,  1.81700343,  1.04055364, ...,  0.725     ,
         0.875     ,  0.875     ]])

In [4]:
X = np.insert(raw_X,0,values=1,axis=1)
X.shape

(2480, 191)

In [5]:
raw_y.shape

(2480, 1)

In [6]:
raw_y

array([[1],
       [1],
       [1],
       ...,
       [2],
       [2],
       [2]], dtype=uint8)

In [7]:
# one hot

In [8]:
def one_hot_encoder(raw_y):
    
    result=[]
    
    for i in raw_y: #1 to 2480
        y_temp=np.zeros(2)# 1 to 2
        y_temp[i-1]=1
        
        result.append(y_temp)
    return np.array(result)

In [9]:
y=one_hot_encoder(raw_y)

In [10]:
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [11]:
raw_y

array([[1],
       [1],
       [1],
       ...,
       [2],
       [2],
       [2]], dtype=uint8)

In [12]:
y.shape

(2480, 2)

In [13]:
# theta = sio.loadmat('ex4weights.mat')

# theta1,theta2=theta['Theta1'],theta['Theta2']
# theta1.shape,theta2.shape

In [14]:
# theta1

In [15]:
# theta2

In [16]:
def serialize(a,b):
    return np.append(a.flatten(),b.flatten())

In [17]:
theta_serialize=np.random.uniform(-0.5,0.5,1932)
# 191*10+11*2=1932

In [18]:
# theta_serialize=serialize(theta1,theta2)

In [19]:
# theta_serialize.shape

In [20]:
def deserialize(theta_serialize):
    theta1 = theta_serialize[:10*191].reshape(10,191)
    theta2 = theta_serialize[10*191:].reshape(2,11)
    return theta1,theta2
    

In [21]:
theta1,theta2 = deserialize(theta_serialize)

In [22]:
theta1,theta2

(array([[ 0.10798976,  0.05687233, -0.38480969, ..., -0.44677962,
         -0.1488554 ,  0.31013747],
        [-0.25556596,  0.11809122,  0.27644693, ..., -0.123445  ,
          0.08865906,  0.2747182 ],
        [-0.4263402 ,  0.1882188 ,  0.22107313, ..., -0.09838779,
          0.48084601, -0.05567815],
        ...,
        [-0.14528582, -0.43196719, -0.37378669, ...,  0.4987702 ,
         -0.01603071, -0.38320769],
        [ 0.3125445 ,  0.08694657, -0.15775167, ...,  0.25815401,
         -0.1881025 , -0.11103308],
        [ 0.36147221,  0.46951795, -0.11143268, ..., -0.08146772,
          0.16225299, -0.33820918]]),
 array([[-0.42615651,  0.41533919,  0.41605795, -0.05053658, -0.44351089,
         -0.34361084,  0.06486751,  0.02545942, -0.45636228, -0.3091665 ,
         -0.46198238],
        [-0.08867892,  0.27468564,  0.37625407, -0.17928646,  0.09011092,
         -0.38742055,  0.49140054, -0.43843127, -0.48181199,  0.05613752,
         -0.11859907]]))

In [23]:
theta1.shape,theta2.shape

((10, 191), (2, 11))

In [24]:
X.shape

(2480, 191)

In [25]:
# Forward Propagation

In [26]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [27]:
def feed_forward(theta_serialize,X):
    theta1,theta2 = deserialize(theta_serialize)
    a1 = X
    z2 = a1@theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2,0,values=1,axis=1)
    z3 = a2@theta2.T
    h = sigmoid(z3)
    return a1,z2,a2,z3,h
    
    

In [28]:
def cost(theta_serialize,X,y):
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    J = -np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(X)
    return J

In [29]:
# cost(theta_serialize,X,y)

In [30]:
# Regualize

In [31]:
def reg_cost(theta_serialize,X,y,lamda):
    sum1 = np.sum(np.power(theta1[:,1:],2))
    sum2 = np.sum(np.power(theta2[:,1:],2))
    reg = (sum1+sum2)*lamda/(2*len(X))
    return reg+cost(theta_serialize,X,y)

In [32]:
# lamda =1
# reg_cost(theta_serialize,X,y,lamda)

In [33]:
# Back propagation

In [34]:
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

In [35]:
def gradient(theta_serialize,X,y):
    theta1,theta2 = deserialize(theta_serialize)
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    d3 = h-y
    d2 = d3@theta2[:,1:]*sigmoid_gradient(z2)
    D2 = (d3.T@a2)/len(X)
    D1 = (d2.T@a1)/len(X)
    return serialize(D1,D2)

In [36]:
# Regualized gradient

In [37]:
def reg_gradient(theta_serialize,X,y,lamda):
    D = gradient(theta_serialize,X,y)
    
    D1,D2=deserialize(D)
    
    theta1,theta2 = deserialize(theta_serialize)
    D1[:,1:] = D1[:,1:] + theta1[:,1:]*lamda/len(X)
    D2[:,1:] = D2[:,1:] + theta2[:,1:]*lamda/len(X)
    
    return serialize(D1,D2)
    

In [38]:
from scipy.optimize import minimize

def nn_training(X,y):
    
    init_theta = np.random.uniform(-0.5,0.5,1932)
    res = minimize(fun=reg_cost,
                   x0=init_theta,
                   args=(X,y,lamda),
                   method='TNC',
                   jac=reg_gradient,
                   options={'maxiter':100000})
    
    return res

In [39]:
lamda = 10
res = nn_training(X,y)

In [40]:
res

     fun: 0.5822472416870981
     jac: array([-6.58083013e-05, -1.01104635e-03,  8.82857135e-04, ...,
       -3.71697004e-04, -6.62190517e-04, -5.40502755e-04])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 3108
     nit: 313
  status: 1
 success: True
       x: array([ 7.7626981 ,  0.02039121, -0.03737987, ..., -1.47996482,
       -1.8535552 , -1.54339902])

In [41]:
raw_y  = data['Label'].reshape(2480,)

In [42]:
X.shape

(2480, 191)

In [43]:
_,_,_,_,h = feed_forward(res.x,X)
y_pred = np.argmax(h,axis=1)+1
acc = np.mean(y_pred==raw_y)

acc

0.969758064516129

In [44]:
y_pred

array([1, 1, 1, ..., 2, 2, 2], dtype=int64)

In [45]:
theta_serialize,
# theta_serialize.shape

(array([ 0.46971566, -0.36530469,  0.17025056, ...,  0.41469956,
         0.37245274, -0.00879782]),)

In [46]:
res.x,
# res.x.shape

(array([ 2.73927417,  0.13406288,  0.10399072, ..., -1.35159889,
         1.5652617 ,  2.01119635]),)