In [1]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
from scipy.optimize import minimize
# np.set_printoptions(threshold=np.inf)

In [2]:
data = sio.loadmat('Rdatatrans.mat')
raw_X =data['QuMFCCAllMatrix']
raw_X = raw_X/32
raw_y = data['Label']

In [3]:
raw_X

array([[-6.5582007 ,  2.18126958,  2.18028664, ...,  1.83537195,
         1.83139946,  1.82706364],
       [-6.86384797,  0.6395561 ,  0.63950859, ...,  2.92615511,
         2.90693993,  2.88613045],
       [-6.88136547,  1.96676608,  1.96607175, ...,  2.37864409,
         2.36921914,  2.35896535],
       ...,
       [-5.36475125,  1.86217282,  1.86148689, ...,  3.8523291 ,
         3.78728383,  3.71812302],
       [-5.70366836,  1.62224722,  1.62179518, ...,  2.33145164,
         2.3206034 ,  2.30881569],
       [-5.72881689,  1.99068389,  1.9898401 , ...,  1.03575493,
         1.03482273,  1.03380286]])

In [4]:
X = np.insert(raw_X,0,values=1,axis=1)
X.shape

(2480, 105)

In [5]:
X

array([[ 1.        , -6.5582007 ,  2.18126958, ...,  1.83537195,
         1.83139946,  1.82706364],
       [ 1.        , -6.86384797,  0.6395561 , ...,  2.92615511,
         2.90693993,  2.88613045],
       [ 1.        , -6.88136547,  1.96676608, ...,  2.37864409,
         2.36921914,  2.35896535],
       ...,
       [ 1.        , -5.36475125,  1.86217282, ...,  3.8523291 ,
         3.78728383,  3.71812302],
       [ 1.        , -5.70366836,  1.62224722, ...,  2.33145164,
         2.3206034 ,  2.30881569],
       [ 1.        , -5.72881689,  1.99068389, ...,  1.03575493,
         1.03482273,  1.03380286]])

In [5]:
raw_y.shape

(2480, 1)

In [6]:
raw_y

array([[1],
       [1],
       [1],
       ...,
       [2],
       [2],
       [2]], dtype=uint8)

In [7]:
# one hot

In [88]:
def one_hot_encoder(raw_y):
    
    result=[]
    
    for i in raw_y: #1 to 2480
        y_temp=np.zeros(2)# 1 to 2
        y_temp[i-1]=1
        
        result.append(y_temp)
    return np.array(result)

In [89]:
y=one_hot_encoder(raw_y)

In [123]:
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]])

In [124]:
raw_y

array([1, 1, 1, ..., 2, 2, 2], dtype=uint8)

In [125]:
y.shape

(2480, 2)

In [126]:
# theta1

In [127]:
# theta2

In [128]:
def serialize(a,b):
    return np.append(a.flatten(),b.flatten())

In [129]:
theta_serialize=np.random.uniform(-0.5,0.5,751)
# 105*7+8*2 = 751

In [130]:
# theta_serialize=serialize(theta1,theta2)

In [131]:
# theta_serialize.shape

In [132]:
def deserialize(theta_serialize):
    theta1 = theta_serialize[:7*105].reshape(7,105)
    theta2 = theta_serialize[7*105:].reshape(2,8)
    return theta1,theta2
    

In [133]:
theta1,theta2 = deserialize(theta_serialize)

In [134]:
# theta1,theta2

In [135]:
theta1.shape,theta2.shape

((7, 105), (2, 8))

In [136]:
X.shape

(2480, 105)

In [137]:
# Forward Propagation

In [138]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [139]:
def feed_forward(theta_serialize,X):
    theta1,theta2 = deserialize(theta_serialize)
    a1 = X
    z2 = a1@theta1.T
    a2 = sigmoid(z2)
    a2 = np.insert(a2,0,values=1,axis=1)
    z3 = a2@theta2.T
    h = sigmoid(z3)
    return a1,z2,a2,z3,h
    
    

In [140]:
def cost(theta_serialize,X,y):
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    J = -np.sum(y*np.log(h)+(1-y)*np.log(1-h))/len(X)
    return J

In [141]:
# cost(theta_serialize,X,y)

In [142]:
# Regualize

In [143]:
def reg_cost(theta_serialize,X,y,lamda):
    sum1 = np.sum(np.power(theta1[:,1:],2))
    sum2 = np.sum(np.power(theta2[:,1:],2))
    reg = (sum1+sum2)*lamda/(2*len(X))
    return reg+cost(theta_serialize,X,y)

In [144]:
# lamda =1
# reg_cost(theta_serialize,X,y,lamda)

In [145]:
# Back propagation

In [146]:
def sigmoid_gradient(z):
    return sigmoid(z)*(1-sigmoid(z))

In [147]:
def gradient(theta_serialize,X,y):
    theta1,theta2 = deserialize(theta_serialize)
    a1,z2,a2,z3,h=feed_forward(theta_serialize,X)
    d3 = h-y
    d2 = d3@theta2[:,1:]*sigmoid_gradient(z2)
    D2 = (d3.T@a2)/len(X)
    D1 = (d2.T@a1)/len(X)
    return serialize(D1,D2)

In [148]:
# Regualized gradient

In [149]:
def reg_gradient(theta_serialize,X,y,lamda):
    D = gradient(theta_serialize,X,y)
    
    D1,D2=deserialize(D)
    
    theta1,theta2 = deserialize(theta_serialize)
    D1[:,1:] = D1[:,1:] + theta1[:,1:]*lamda/len(X)
    D2[:,1:] = D2[:,1:] + theta2[:,1:]*lamda/len(X)
    
    return serialize(D1,D2)
    

In [150]:
from scipy.optimize import minimize

def nn_training(X,y):
    
    init_theta = np.random.uniform(-0.5,0.5,751) #751 initila Theta in serialiezed
    res = minimize(fun=reg_cost,
                   x0=init_theta,
                   args=(X,y,lamda),
                   method='TNC',
                   jac=reg_gradient,
                   options={'maxiter':100000})
    
    return res

In [151]:
lamda = 10
res = nn_training(X,y)

In [152]:
# res

In [153]:
raw_y  = data['Label'].reshape(2480,)

In [154]:
X.shape

(2480, 105)

In [155]:
_,_,_,_,h = feed_forward(res.x,X)
y_pred = np.argmax(h,axis=1)+1
acc = np.mean(y_pred==raw_y)

acc

0.9387096774193548

In [43]:
print(y_pred)

[1 1 1 ... 2 2 2]


In [44]:
# theta_serialize,
# theta_serialize.shape

In [4]:
# res.x,
# res.x.shape