<a href="https://colab.research.google.com/github/youhen3233/Image-Processing/blob/main/FNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys, os
import numpy as np
import matplotlib.pyplot as plt


train_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/train.npz')
test_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/test.npz')


# Data Pre_process==========================
x_train = train_data['image']
y_train = train_data['label']

x_test = test_data['image']
y_test = test_data['label']

one_hot_list = [ [1,0,0,0,0,0], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,0,0,0,1]]

x_train_row = x_train.reshape(x_train.shape[0] , x_train.shape[1]*x_train.shape[2])
y_train_one_hot = []
for i in range(len(y_train)):
  y_train_one_hot.append( one_hot_list[y_train[i]] )
y_train_lab = np.array(y_train_one_hot) 

x_test_row = x_test.reshape(x_test.shape[0] , x_test.shape[1]*x_test.shape[2])
y_test_one_hot = []
for i in range(len(y_test)):
  y_test_one_hot.append( one_hot_list[y_test[i]] )
y_test_lab = np.array(y_test_one_hot) 

x_train_row = x_train_row / 255
x_test_row = x_test_row / 255


print(y_train_lab)

# Set up Model=============================
nn_struct = [
    {"input_dim": 1024, "output_dim": 256, "act": "relu"},
    {"input_dim": 256, "output_dim": 64, "act": "relu"},
    {"input_dim": 64, "output_dim": 6, "act": "softmax"},
]


#===functions===============
def relu(Z):
    return np.maximum(0,Z)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0
    return dZ

def softmax(Z):
    c = np.max(Z)
    exp_Z = np.exp(Z - c)
    sum_exp_Z = np.sum(exp_Z)
    return exp_Z / sum_exp_Z


def softmax_backward(dA ,Z):
    dZ = Z - dA
    return dZ



#Loss and accuracy===============

def convert_prob_into_class(probs):   #one hot prob
    probs_ = np.copy(probs)
    probs_[probs_ > 0.5] = 1
    probs_[probs_ <= 0.5] = 0
    return probs_

def get_accuracy_value(Y_hat, Y):
    #print(Y)
    #print(Y_hat)
    Y_hat_ = convert_prob_into_class(Y_hat)
    #print(Y_hat_)
    return (Y_hat_ == Y).all(axis=0).mean()


def cross_entropy(Y_hat, Y):
    delta = 1e-7
    return -np.sum(Y * np.log(Y_hat + delta))

"""
def cross_entropy(Y_hat, Y):
    epsilon =1e-12
    Y_hat = np.clip(Y_hat, epsilon, 1.-epsilon)
    N = Y_hat.shape[0]
    ce = - np.sum(Y * np.log(Y_hat)) 
    return ce
"""
#=======Update new param

def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values


#=====set up model=====
def init_layers(nn_architecture, seed=99):
    np.random.seed(seed)
    W_B_DIC = {}   #dict for Weight and bias
    for idx, layer in enumerate(nn_architecture):
        input_size = layer["input_dim"]
        output_size = layer["output_dim"]

        W_B_DIC['W' + str(idx)] = np.random.randn( output_size, input_size) / np.sqrt(4)
        W_B_DIC['b' + str(idx)] = np.random.randn( output_size, 1) / np.sqrt(4)        

    return W_B_DIC


def for_prop(X, W_B_DIC, nn_architecture):
    memory = {}
    A_curr = X.T

    for idx, layer in enumerate(nn_architecture):
        A_prev = A_curr

        act_curr = layer["act"]
        W_curr = W_B_DIC["W" + str(idx)]
        b_curr = W_B_DIC["b" + str(idx)]
        #print("X_CURR",A_prev.shape)
        #print("W_CURR",W_curr.shape)
        #print("B_CURR",b_curr.shape)
        Z_curr = np.dot(W_curr, A_prev) + b_curr
        #print("Z_CURR",Z_curr.shape)
        if act_curr == "relu":
            activation_func = relu
        elif act_curr == "softmax":
            activation_func = softmax

        A_curr = activation_func(Z_curr)

        memory["A" + str(idx)] = A_prev
        memory["Z" + str(idx)] = Z_curr
    #print("final_output:",A_curr.shape)
    return A_curr, memory    


def backward_propagation(Y_hat, Y, memory, W_B_DIC, nn_architecture):
    grads_values = {}
    Y = Y.T
    dA_prev = Y


    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        activ_function_curr = layer["act"]

        dA_curr = dA_prev

        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_prev)]
        W_curr = W_B_DIC["W" + str(layer_idx_prev)]
        if activ_function_curr == "relu":
            backward_activation_func = relu_backward
        elif activ_function_curr == "softmax":    
            backward_activation_func = softmax_backward
            
        dZ_curr = backward_activation_func(dA_curr, Z_curr)
        dW_curr = np.dot(dZ_curr, A_prev.T) 
        db_curr = np.sum(dZ_curr, axis=1, keepdims=True) 
        dA_prev = np.dot(W_curr.T, dZ_curr)

        grads_values["dW" + str(layer_idx_prev)] = dW_curr
        grads_values["db" + str(layer_idx_prev)] = db_curr

    return grads_values



def train(X, Y, nn_architecture, epochs, learning_rate):
    W_B_DIC = init_layers(nn_architecture,55)
    loss_list = []
    accuracy_list = []
    loss_dict = {}
    for i in range(epochs):
        Y_hat, cashe = for_prop( X, W_B_DIC, nn_architecture)   #Y_hat = Colm major(估算值)
        print(y_train_lab)
        print(Y_hat)
        loss = cross_entropy(Y_hat.T, Y)
        loss_list.append(loss)
        accuracy = get_accuracy_value(Y_hat.T, Y)
        accuracy_list.append(accuracy)
        loss_dict[i]=loss
        print("epoch " ,i," loss :",loss ," accuracy :", accuracy)
        #print(i)
        #print(loss)
        #if i % 100 == 0:
        #    print("epoch " ,i," loss :",loss)

        grads_values = backward_propagation(Y_hat, Y, cashe, W_B_DIC, nn_architecture)
        W_B_DIC = update(W_B_DIC, grads_values, nn_architecture, learning_rate)

    return W_B_DIC, loss_list, accuracy_list, loss_dict



zz,lost,acc,loss_dict = train(x_train_row, y_train_lab, nn_struct, 200, 0.001)


[[1 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 1 0 0 0 0]
 ...
 [0 1 0 0 0 0]
 [0 0 0 0 0 1]
 [0 1 0 0 0 0]]
[[1 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 1 0 0 0 0]
 ...
 [0 1 0 0 0 0]
 [0 0 0 0 0 1]
 [0 1 0 0 0 0]]
[[2.29397761e-142 3.46833946e-207 1.50855730e-227 ... 2.28233509e-211
  8.15040121e-145 2.84228808e-200]
 [6.42103883e-225 2.69209875e-236 2.78168437e-228 ... 2.83138299e-219
  5.20093629e-223 1.92277736e-265]
 [6.65553434e-225 9.69671666e-237 1.16078271e-237 ... 2.71486336e-197
  2.71271594e-270 5.02709138e-245]
 [0.00000000e+000 6.31053224e-274 4.42929851e-320 ... 9.93000375e-270
  0.00000000e+000 1.74961995e-308]
 [7.96106403e-209 1.62560960e-236 2.85477698e-253 ... 2.72276952e-215
  1.22311926e-215 2.61947529e-252]
 [6.44420768e-253 3.03606187e-201 1.52720864e-229 ... 8.82795436e-199
  1.13929397e-243 2.04551778e-216]]
epoch  0  loss : 822022.8781988743  accuracy : 0.0
[[1 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 1 0 0 0 0]
 ...
 [0 1 0 0 0 0]
 [0 0 0 0 0 1]
 [0 1 0 0 0 0]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 

KeyboardInterrupt: ignored

In [None]:
import sys, os
import numpy as np
import matplotlib.pyplot as plt


train_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/train.npz')
test_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/test.npz')


# Data Pre_process==========================
x_train = train_data['image']
y_train = train_data['label']

x_test = test_data['image']
y_test = test_data['label']


one_hot_list = [ [1,0,0,0,0,0], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,0,0,0,1]]

x_train_row = x_train.reshape(x_train.shape[0] , x_train.shape[1]*x_train.shape[2])
y_train_one_hot = []
for i in range(len(y_train)):
  y_train_one_hot.append( one_hot_list[y_train[i]] )
y_train_lab = np.array(y_train_one_hot) 
y_train_lab

array([[1, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0],
       ...,
       [0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0, 0]])

In [None]:
nn_struct = [
    {"input_dim": 1024, "output_dim": 256, "act": "relu"},
    {"input_dim": 256, "output_dim": 64, "act": "relu"},
    {"input_dim": 64, "output_dim": 6, "act": "softmax"},
]


for layer_idx_prev, layer in reversed(list(enumerate(nn_struct))):

  print(layer_idx_prev)
  print(layer)

2
{'input_dim': 64, 'output_dim': 6, 'act': 'softmax'}
1
{'input_dim': 256, 'output_dim': 64, 'act': 'relu'}
0
{'input_dim': 1024, 'output_dim': 256, 'act': 'relu'}


In [None]:
nn_architecture = [
    {"input_dim": 1024, "output_dim": 256, "act": "sigmoid"},
    {"input_dim": 256, "output_dim": 64, "act": "sigmoid"},
    {"input_dim": 64, "output_dim": 6, "act": "sigmoid"},
]

def init_layers(nn_architecture, seed=99):
    np.random.seed(seed)
    W_B_DIC = {}   #dict for Weight and bias
    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx 
        input_size = layer["input_dim"]
        output_size = layer["output_dim"]

        W_B_DIC['W' + str(layer_idx)] = np.random.randn( output_size, input_size) / np.sqrt(4)
        W_B_DIC['b' + str(layer_idx)] = np.random.randn( output_size, 1) / np.sqrt(4)        

    return W_B_DIC

ccc = init_layers(nn_architecture, 87) 


In [None]:
import sys, os
sys.path.append(os.pardir)

import numpy as np
import matplotlib.pyplot as plt

nn_architecture = [
    {"input_dim": 1024, "output_dim": 256, "act": "sigmoid"},
    {"input_dim": 256, "output_dim": 64, "act": "sigmoid"},
    {"input_dim": 64, "output_dim": 6, "act": "sigmoid"},
]

def init_layers(nn_architecture, seed=99):
    np.random.seed(seed)
    params_values = {}

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        layer_input_size = layer["input_dim"]
        layer_output_size = layer["output_dim"]

        params_values['W' + str(layer_idx)] = np.random.randn(
            layer_output_size, layer_input_size) / np.sqrt(4)
        params_values['b' + str(layer_idx)] = np.random.randn(
            layer_output_size, 1) / np.sqrt(4)

    return params_values

def sigmoid(Z):
    return 1/(1+np.exp(-Z))

def relu(Z):
    return np.maximum(0,Z)

def sigmoid_backward(dA, Z):
    sig = sigmoid(Z)
    return dA * sig * (1 - sig)

def relu_backward(dA, Z):
    dZ = np.array(dA, copy = True)
    dZ[Z <= 0] = 0
    return dZ

def softmax(Z):
    return np.maximum(0,Z)

def softmax_backward(dA, Z):
    
    return Z


def linear(Z):
    return Z

def linear_backward(dA, Z):
    dZ = np.array(dA, copy=True)
    return dZ

def forward_propagation(X, params_values, nn_architecture):
    memory = {}
    A_curr = X

    for idx, layer in enumerate(nn_architecture):
        layer_idx = idx + 1
        A_prev = A_curr

        activ_function_curr = layer["activation"]
        W_curr = params_values["W" + str(layer_idx)]
        b_curr = params_values["b" + str(layer_idx)]
        
        Z_curr = np.dot(W_curr, A_prev) + b_curr
        
        if activ_function_curr == "relu":
            activation_func = relu
        elif activ_function_curr == "sigmoid":
            activation_func = sigmoid
        elif activ_function_curr == "linear":
            activation_func = linear

        A_curr = activation_func(Z_curr)

        memory["A" + str(idx)] = A_prev
        memory["Z" + str(layer_idx)] = Z_curr

    return A_curr, memory

def get_loss_value(Y_hat, Y): #Binary Cross-entropy
    m = Y_hat.shape[1]
    loss = -1 / m * (np.dot(Y, np.log(Y_hat).T) + np.dot(1 - Y, np.log(1 - Y_hat).T))
    return np.squeeze(loss)

def get_accuracy_value(Y_hat, Y):
    print(Y_hat)
    Y_hat_ = convert_prob_into_class(Y_hat)
    print(Y_hat_)
    return (Y_hat_ == Y).all(axis=0).mean()


def convert_prob_into_class(probs):
    probs_ = np.copy(probs)
    probs_[probs_ > 0.5] = 1
    probs_[probs_ <= 0.5] = 0
    return probs_


def backward_propagation(Y_hat, Y, memory, params_values, nn_architecture):
    grads_values = {}
    Y = Y.reshape(Y_hat.shape)

    dA_prev = - (np.divide(Y, Y_hat) - np.divide(1 - Y, 1 - Y_hat))

    for layer_idx_prev, layer in reversed(list(enumerate(nn_architecture))):
        layer_idx_curr = layer_idx_prev + 1
        activ_function_curr = layer["activation"]

        dA_curr = dA_prev

        A_prev = memory["A" + str(layer_idx_prev)]
        Z_curr = memory["Z" + str(layer_idx_curr)]
        W_curr = params_values["W" + str(layer_idx_curr)]
        # b_curr = params_values["b" + str(layer_idx_curr)]
    
        if activ_function_curr == "relu":
            backward_activation_func = relu_backward
        elif activ_function_curr == "sigmoid":
            backward_activation_func = sigmoid_backward
        elif activ_function_curr == "linear":
            backward_activation_func = linear_backward
        elif activ_function_curr == "softmax":    
            backward_activation_func = softmax_backward
            
        dZ_curr = backward_activation_func(dA_curr, Z_curr)
        dW_curr = np.dot(dZ_curr, A_prev.T) 
        db_curr = np.sum(dZ_curr, axis=1, keepdims=True) 
        dA_prev = np.dot(W_curr.T, dZ_curr)

        grads_values["dW" + str(layer_idx_curr)] = dW_curr
        grads_values["db" + str(layer_idx_curr)] = db_curr

    return grads_values

def update(params_values, grads_values, nn_architecture, learning_rate):
    for layer_idx, layer in enumerate(nn_architecture,1):
        params_values["W" + str(layer_idx)] -= learning_rate * grads_values["dW" + str(layer_idx)]
        params_values["b" + str(layer_idx)] -= learning_rate * grads_values["db" + str(layer_idx)]

    return params_values


def train(X, Y, nn_architecture, epochs, learning_rate):
    params_values = init_layers(nn_architecture,87)
    loss_list = []
    accuracy_list = []
    loss_dict = {}
    for i in range(epochs):
        Y_hat, cashe = forward_propagation(X, params_values, nn_architecture)
        loss = get_loss_value(Y_hat, Y)
        loss_list.append(loss)
        accuracy = get_accuracy_value(Y_hat, Y)
        accuracy_list.append(accuracy)
        loss_dict[i]=loss
        if i % 1000 == 0:
            print("epoch " ,i," loss :",loss)

        grads_values = backward_propagation(Y_hat, Y, cashe, params_values, nn_architecture)
        params_values = update(params_values, grads_values, nn_architecture, learning_rate)

    return params_values, loss_list, accuracy_list, loss_dict


def generate_linear(n=100):
    pts = np.random.uniform(0, 1, (n, 2))
    inputs = []
    labels = []
    for pt in pts:
        inputs.append([pt[0], pt[1]])
        # distance = (pt[0] - pt[1]) / 1.414
        if pt[0] > pt[1]:
            labels.append(0)
        else:
            labels.append(1)
    return np.array(inputs), np.array(labels).reshape(n, 1)


def generate_XOR_easy():
    inputs = []
    labels = []
    for i in range(11):
        inputs.append([0.1 * i, 0.1 * i])
        labels.append(0)

        if (0.1 * i == 0.5):
            continue
        inputs.append([0.1 * i, 1 - 0.1 * i])
        labels.append(1)
    return np.array(inputs), np.array(labels).reshape(21, 1)


def show_result(x, y, pred_y):
    plt.subplot(1, 2, 1)
    plt.title("Ground truth", fontsize=18)
    for i in range(x.shape[0]):
        if y[i] == 0:
            plt.plot(x[i][0], x[i][1], 'ro')
        else:
            plt.plot(x[i][0], x[i][1], 'bo')
    plt.subplot(1, 2, 2)
    plt.title("Predict result", fontsize=18)
    for i in range(x.shape[0]):
        if pred_y[i] ==0:
            plt.plot(x[i][0], x[i][1], 'ro')
        else:
            plt.plot(x[i][0], x[i][1], 'bo')
    plt.show()


x,y=generate_linear(n=100)
x,y=generate_XOR_easy()

#x = xxx.png
x = x.reshape(-1)

zz,lost,acc,loss_dict = train(x.transpose(),y.transpose(),nn_architecture,20000,0.01)
out,_= forward_propagation(x.transpose(),zz,nn_architecture)
np.set_printoptions(suppress=True)
print("Predition:")
print(out)
out = convert_prob_into_class(out)
show_result(x,y,out.transpose())
print("Accuracy:",acc[-1]*100,"%")

plt.xlabel("epoch")
plt.ylabel("loss")
plt.title("Learning Curve", fontsize=18)
plt.plot(list(loss_dict),list(loss_dict.values()))
plt.show()


KeyError: ignored

In [2]:
import sys, os
import numpy as np
import matplotlib.pyplot as plt


train_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/train.npz')
test_data = np.load('./drive/MyDrive/Colab Notebooks/FNN/test.npz')


# Data Pre_process==========================
x_train = train_data['image']
y_train = train_data['label']

x_test = test_data['image']
y_test = test_data['label']

print(x_train.shape)
print(y_train.shape)

print(x_test.shape)
print(y_test.shape)



one_hot_list = [ [1,0,0,0,0,0], [0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,0,0,0,1]]

x_train_row = x_train.reshape(x_train.shape[0] , x_train.shape[1]*x_train.shape[2])
y_train_one_hot = []
for i in range(len(y_train)):
  y_train_one_hot.append( one_hot_list[y_train[i]] )
y_train_lab = np.array(y_train_one_hot) 

x_test_row = x_test.reshape(x_test.shape[0] , x_test.shape[1]*x_test.shape[2])
y_test_one_hot = []
for i in range(len(y_test)):
  y_test_one_hot.append( one_hot_list[y_test[i]] )
y_test_lab = np.array(y_test_one_hot) 

x_train_row = x_train_row / 255
x_test_row = x_test_row / 255


print(y_train_lab)

(51000, 32, 32)
(51000,)
(7954, 32, 32)
(7954,)
