In [35]:
import numpy as np
import time
from IPython.core.debugger import Tracer
#this one triggers the debugger
from collections import defaultdict
from pprint import pprint

In [36]:
def gen_input_neurons_from_file(f_name):
    r"""generate input neurons from a given file.
    
        each line of the file contains the labels and sentences.
        they are separated by a tab.
        
        Parameters
        ----------
        f_name:
            the text file name

        Returns
        -------
        idx_dict:
            dict, idxs of the word in format of idx_dict[w]=idx
        input_neorons:
            list, each item is a np array contains word freq of a sentence
            format : input_neurons[0]=(0,0,...1,2,...0)
        labels:
            list, each item is a interger of 1 and -1 indicating the label
        
        """
    idxs_dict=defaultdict(int)
    input_neorons=[]
    labels=[]
    
    for line in open(f_name,"r",encoding="utf-8"):
        label, sentence = line.split("\t")
        for w in sentence.split():
            if w not in idxs_dict:
                idxs_dict[w]=len(idxs_dict)
    len_input_neurons=len(idxs_dict)
    for line in open(f_name,"r",encoding="utf-8"):
        input_neoron=np.zeros(len_input_neurons)
        label,sentence=line.split("\t")
        for w in sentence.split():
            input_neoron[idxs_dict[w]]+=1
        input_neorons.append(input_neoron)
        labels.append(np.array(int(label)))
    return idxs_dict,input_neorons,labels
        

In [37]:
idxs_dict,input_neurons,labels=gen_input_neurons_from_file("../../data/titles-en-train.labeled")
#pprint(idxs_dict)
#pprint(input_neurons)
#pprint(labels)

In [38]:
def init_network(idxs_dict, *num_neurons_network):
    r"""initial the network based on the input neorons and network structure
    
        input:
        --------------
        idxs_dict:

            dict, containing words and its idxes

        num_neurons_network:

            list, containing intergers representing the number of neurons in each layer

        output:
        --------------
        weights_network:

            list, containing np matrix representing weights for each layer
            the matrixs were init by random floating number between -1 and 1

        biases_network:

            list, containing np array of 1 for each layer

        neurons_network:

            list, containing np array representing the value of the neurons,
            initialised by 0s
    
        """
    weights_network = []
    biases_network = []
    neurons_network = []
    num_neurons_prev = len(idxs_dict)
    neurons = np.zeros(num_neurons_prev)
    neurons_network.append(neurons)
    
    for num_neurons in num_neurons_network:
        weights = np.random.rand(num_neurons, num_neurons_prev)
        num_neurons_prev = num_neurons
        neurons = np.zeros(num_neurons_prev)
        weights_network.append(weights)
        biases_network.append(np.ones((num_neurons, )))
        neurons_network.append(neurons)
        #Tracer()()
    return weights_network, biases_network, neurons_network

In [39]:
weights_network,biases_network,neurons_network=init_network(idxs_dict,2,1)
#pprint(weights_network)
#pprint(biases_network)
#pprint(neurons_network)

In [60]:
def forward_nn(input_neuron):
    r"""update the values of neurons given weights and biases of the network, and the input neurons
        
        input:
        -------------
        weights_network:
            list, containing the weight matrix of each layer
            
        biases_network:
            list, containing the bias arrays of each layer
            
        neurons_network:
            list, containing the neurons in the network, including the input layer
            
        input_neuron:
            np array, one input to the network
            
        output:
        ------------------
        neurons_network:
            list, containing the calculated values of neurons in the network, 
            including the input layer
        """
    global weights_network
    global biases_network
    global neurons_network
    for idx_layer in range(len(neurons_network)):
        
        if idx_layer==0:
            assert len(neurons_network[idx_layer])==len(input_neuron)
            neurons_network[idx_layer]=input_neuron
        
        else:
            idx_layer_prev=idx_layer-1
            neurons_prev=neurons_network[idx_layer_prev]
            weights=weights_network[idx_layer_prev]
            biases=biases_network[idx_layer_prev]
            #Tracer()()
            neurons_network[idx_layer]=np.tanh(np.dot(weights,neurons_prev)+biases)
    
    return neurons_network

In [14]:
neurons_network=forward_nn(weights_network,biases_network,neurons_network,input_neorons[0])
#pprint(neurons_network)

NameError: name 'input_neorons' is not defined

In [61]:
def backword_nn(label):
    r"""return a grad_network that correspond to neurons_network
        each layer containing the gradient value of the nodes in that layer
        
        input:
        ---------
        weights_network:
            list, containing the weight matrix of each layer
            
        biases_network:
            list, containing the bias arrays of each layer
            
        neurons_network:
            list, containing the neurons in the network, including the input layer
            
        label:
            int, either -1 or 1 indicating the label of the output
            
        output:
        -----------
        grads_network:
            list, containing the np arrays that represents the grad values at each layer
            of the network. the structure of the list and arrays are same as neurons_network,
            except there is no input layer
        """
    global weights_network
    global biases_network
    global neurons_network
    deltas_network=[]
    grads_network=[]
    
    deltas_prev=label-neurons_network[-1]
    deltas_network.append(deltas_prev)
    
    for idx_layer in range(1,len(neurons_network)):
        grads=deltas_prev*(1-np.square(neurons_network[-idx_layer]))
        #Tracer()()
        deltas_prev=np.dot(neurons_network[-idx_layer],grads)
        deltas_network.append(deltas_prev)
        grads_network.append(grads)
    return list(reversed(grads_network))

In [None]:
grads_network=backword_nn(weights_network,biases_network,neurons_network,labels[0])
pprint(grads_network)
pprint(neurons_network)
pprint(wei)

In [63]:
def update_weights(
                   grads_network,
                   learning_rate=0.01):
    r"""return updated weights and biases given the grads
        
        input:
        -------------
        weights_network:
            list, containing the weight matrix of each layer
            
        biases_network:
            list, containing the bias arrays of each layer
            
        neurons_network:
            list, containing the neurons in the network, including the input layer
            
        grads_network:
            list, containing the np arrays that represents the grad values at each layer
            
        learning_rate
            int,　default 0.01
            
        output:
        --------------
        new_weights_network:
            list, weight matrices
            
        new_bias_network:
            list, bias array
        """
    global weights_network
    global biases_network
    global neurons_network
    new_weights_network = []
    new_bias_network = []
    for idx_layer in range(len(weights_network)):
        grads_neuron_layer = grads_network[idx_layer]
        neurons_layers = neurons_network[idx_layer]
        grads_weights_layer = np.outer(grads_neuron_layer, neurons_layers)
        weights_layer = learning_rate * grads_weights_layer
        biases_layer = learning_rate *grads_neuron_layer
        new_bias_network.append(biases_layer)
        new_weights_network.append(weights_layer)
    return new_weights_network, new_bias_network

In [None]:
pprint(weights_network)
pprint(biases_network)
weights_network, biases_network = update_weights(
    weights_network,
    biases_network,
    neurons_network,
    grads_network,
    learning_rate=0.01)
pprint(weights_network)
pprint(biases_network)

In [67]:
def train_model(num_iters ,learning_rate=0.01):
    global weights_network
    global biases_network
    global neurons_network
    for i in range(num_iters):
        start=time.time()
        for idx, input_neuron in enumerate(input_neurons):
            neurons_network = forward_nn( input_neuron)
            
            grads_network = backword_nn( labels[idx])
            
            weights_network, biases_network = update_weights(
                grads_network,
                learning_rate=0.01)
        print(neurons_network[-1])
        if True:
            print("epoch {:^10} used {:^20}".format(i,time.time()-start))
    return weights_network,biases_network

In [74]:
idxs_dict,input_neurons,labels=gen_input_neurons_from_file("../../data/titles-en-train.labeled")
weights_network,biases_network,neurons_network=init_network(idxs_dict,2,1)
num_iters=10
for i in range(num_iters):
    start=time.time()
    for idx, input_neuron in enumerate(input_neurons):
        neurons_network = forward_nn( input_neuron)

        grads_network = backword_nn( labels[idx])

        weights_network, biases_network = update_weights(
            grads_network,
            learning_rate=0.01)
        if idx%100 ==0:
            pprint(weights_network[-1])
            pprint(biases_network[-1])
            pprint(neurons_network[-1])
            print()
    print("epoch {:^10} used {:^20}".format(i,time.time()-start))
    neurons_network = forward_nn(
                                 input_neurons[1])
    pprint(neurons_network[-1])

array([[  2.32552963e-05,   2.32552963e-05]])
array([  2.32552963e-05])
array([ 0.9656036])

array([[ -1.08865209e-05,  -1.08865209e-05]])
array([-0.01009801])
array([ 0.00989975])

array([[  6.05673243e-06,   6.05673243e-06]])
array([ 0.01009799])
array([-0.00989767])

array([[  4.45352701e-05,   4.45352701e-05]])
array([ 0.01009801])
array([-0.00989982])

array([[  9.89666567e-06,   9.89666567e-06]])
array([ 0.00990005])
array([ 0.00989765])

array([[  1.48422075e-05,   1.48422075e-05]])
array([-0.00989801])
array([-0.01009761])

array([[  1.21111317e-05,   1.21111317e-05]])
array([ 0.01009799])
array([-0.00989768])

array([[  2.03756895e-05,   2.03756895e-05]])
array([ 0.00990003])
array([ 0.00989988])

array([[ -1.94055807e-06,  -1.94055807e-06]])
array([-0.00990003])
array([-0.00989971])

array([[ -3.09077886e-06,  -3.09077886e-06]])
array([ 0.01009997])
array([-0.01009964])

array([[  1.68244624e-05,   1.68244624e-05]])
array([ 0.00990005])
array([ 0.0098976])

array([[  5.047270

array([[ -6.92778621e-06,  -6.92778621e-06]])
array([-0.01009801])
array([ 0.00989973])

array([[ -8.07562977e-06,  -8.07562977e-06]])
array([ 0.01009995])
array([-0.01009763])

array([[  6.05555684e-06,   6.05555684e-06]])
array([ 0.01009799])
array([-0.00989769])

array([[ -5.93801095e-06,  -5.93801095e-06]])
array([-0.00990005])
array([-0.00989766])

array([[  9.89480604e-07,   9.89480604e-07]])
array([-0.00989801])
array([-0.01009766])

array([[ -1.31228669e-05,  -1.31228669e-05]])
array([ 0.00989799])
array([ 0.01009964])

array([[ -2.67812481e-05,  -2.67812481e-05]])
array([ 0.01009997])
array([-0.01009967])

array([[ -3.23010171e-05,  -3.23010171e-05]])
array([ 0.01009994])
array([-0.01009729])

array([[ -9.89478464e-06,  -9.89478464e-06]])
array([ 0.00989801])
array([ 0.01009765])

array([[  8.90341709e-06,   8.90341709e-06]])
array([-0.00989801])
array([-0.01009763])

array([[ -1.38553866e-05,  -1.38553866e-05]])
array([-0.00990005])
array([-0.00989759])

array([[  5.93810148e

array([[ -1.16409601e-05,  -1.16409601e-05]])
array([-0.00990003])
array([-0.00989974])

array([[ -5.15129104e-06,  -5.15129104e-06]])
array([ 0.01009997])
array([-0.01009964])

array([[  1.26110283e-05,   1.26110283e-05]])
array([ 0.00990003])
array([ 0.00989975])

array([[ -1.00944022e-05,  -1.00944022e-05]])
array([ 0.01009995])
array([-0.01009761])

array([[ -1.48418455e-05,  -1.48418455e-05]])
array([-0.00990005])
array([-0.00989759])

array([[  2.37475641e-05,   2.37475641e-05]])
array([-0.00989801])
array([-0.01009757])

array([[ -1.78104949e-05,  -1.78104949e-05]])
array([-0.01009801])
array([ 0.0098998])

array([[  1.21110721e-05,   1.21110721e-05]])
array([ 0.01009799])
array([-0.00989766])

array([[ -6.18031220e-06,  -6.18031220e-06]])
array([ 0.01009997])
array([-0.01009961])

array([[  9.89667411e-07,   9.89667411e-07]])
array([ 0.00990005])
array([ 0.00989767])

array([[ -2.27626291e-05,  -2.27626291e-05]])
array([-0.01009801])
array([ 0.00989977])

array([[  6.05798084e-

array([[  1.11062147e-05,   1.11062147e-05]])
array([-0.00989799])
array([-0.01009965])

array([[ -2.52362331e-05,  -2.52362331e-05]])
array([ 0.00989799])
array([ 0.01009964])

array([[  1.41296056e-05,   1.41296056e-05]])
array([ 0.01009799])
array([-0.00989767])

array([[ -1.31228841e-05,  -1.31228841e-05]])
array([-0.01009799])
array([ 0.00989759])

array([[  2.01852008e-06,   2.01852008e-06]])
array([ 0.01009799])
array([-0.00989769])

array([[  1.13327851e-05,   1.13327851e-05]])
array([-0.01009997])
array([ 0.01009963])

array([[  2.03715488e-05,   2.03715488e-05]])
array([ 0.00990003])
array([ 0.00989977])

array([[ -9.89482850e-06,  -9.89482850e-06]])
array([ 0.00989801])
array([ 0.01009765])

array([[  1.87959416e-05,   1.87959416e-05]])
array([-0.00989802])
array([-0.01009755])

array([[ -4.85138529e-06,  -4.85138529e-06]])
array([-0.00990003])
array([-0.00989971])

array([[  9.08702920e-06,   9.08702920e-06]])
array([-0.01009995])
array([ 0.01009764])

array([[  1.71606992e

array([[ -4.85039197e-06,  -4.85039197e-06]])
array([-0.00990003])
array([-0.00989973])

array([[ -1.16433188e-05,  -1.16433188e-05]])
array([-0.00990003])
array([-0.00989971])

array([[ -3.88031422e-06,  -3.88031422e-06]])
array([-0.00990003])
array([-0.00989973])

array([[ -2.27578094e-05,  -2.27578094e-05]])
array([-0.00990005])
array([-0.00989763])

array([[  1.58317388e-05,   1.58317388e-05]])
array([-0.00989801])
array([-0.01009757])

array([[ -2.06051766e-06,  -2.06051766e-06]])
array([ 0.01009997])
array([-0.01009963])

array([[  3.02777580e-06,   3.02777580e-06]])
array([ 0.01009799])
array([-0.00989768])

array([[  3.09076082e-06,   3.09076082e-06]])
array([-0.01009997])
array([ 0.01009963])

array([[ -2.07789705e-05,  -2.07789705e-05]])
array([-0.00990005])
array([-0.00989765])

array([[  1.38555088e-05,   1.38555088e-05]])
array([ 0.01009801])
array([-0.00989973])

array([[  1.00944757e-05,   1.00944757e-05]])
array([-0.00989799])
array([-0.01009964])

array([[  2.78168115e

array([[ -2.96842837e-06,  -2.96842837e-06]])
array([-0.00990005])
array([-0.00989769])

array([[ -3.88030746e-06,  -3.88030746e-06]])
array([-0.00990003])
array([-0.00989973])

array([[  9.89272789e-06,   9.89272789e-06]])
array([-0.00989801])
array([-0.01009763])

array([[ -9.08503310e-06,  -9.08503310e-06]])
array([ 0.01009995])
array([-0.01009761])

array([[  4.05670411e-05,   4.05670411e-05]])
array([ 0.01009801])
array([-0.00990008])

array([[  2.37475409e-05,   2.37475409e-05]])
array([-0.00989801])
array([-0.01009761])

array([[  8.90521485e-06,   8.90521485e-06]])
array([ 0.00990005])
array([ 0.00989764])

array([[ -2.96782102e-06,  -2.96782102e-06]])
array([ 0.00989801])
array([ 0.01009764])

array([[  8.07563720e-06,   8.07563720e-06]])
array([ 0.01009799])
array([-0.00989766])

array([[ -2.62512413e-05,  -2.62512413e-05]])
array([ 0.01009995])
array([-0.01009763])

array([[ -2.52187192e-05,  -2.52187192e-05]])
array([-0.00990003])
array([-0.00990028])

array([[ -1.18760212e

array([[  2.72494229e-05,   2.72494229e-05]])
array([ 0.01009799])
array([-0.00989752])

array([[ -1.31229310e-05,  -1.31229310e-05]])
array([-0.01009799])
array([ 0.00989763])

array([[ -7.91734386e-06,  -7.91734386e-06]])
array([-0.00990005])
array([-0.00989765])

array([[ -3.95792031e-06,  -3.95792031e-06]])
array([ 0.00989801])
array([ 0.01009766])

array([[ -1.71603507e-05,  -1.71603507e-05]])
array([ 0.00989799])
array([ 0.01009971])

array([[ -2.17726377e-05,  -2.17726377e-05]])
array([-0.00990005])
array([-0.00989765])

array([[  1.78141728e-05,   1.78141728e-05]])
array([ 0.00990005])
array([ 0.00989757])

array([[  5.04723899e-06,   5.04723899e-06]])
array([-0.00989799])
array([-0.01009962])

array([[  8.07405589e-06,   8.07405589e-06]])
array([ 0.01009799])
array([-0.00989768])

array([[ -2.96842837e-06,  -2.96842837e-06]])
array([-0.00990005])
array([-0.00989769])

array([[ -3.88030746e-06,  -3.88030746e-06]])
array([-0.00990003])
array([-0.00989973])

array([[  9.89272789e

array([ 0.00990005])
array([ 0.00989767])

array([[  1.38526645e-05,   1.38526645e-05]])
array([ 0.00990005])
array([ 0.00989768])

array([[  6.05672798e-06,   6.05672798e-06]])
array([-0.01009995])
array([ 0.01009764])

array([[  1.31256371e-05,   1.31256371e-05]])
array([-0.01009995])
array([ 0.01009766])

array([[  1.48420562e-05,   1.48420562e-05]])
array([ 0.00990005])
array([ 0.00989765])

array([[  9.70065472e-06,   9.70065472e-06]])
array([ 0.00990003])
array([ 0.00989976])

array([[  2.01889921e-06,   2.01889921e-06]])
array([-0.00989799])
array([-0.01009961])

array([[ -1.97895337e-06,  -1.97895337e-06]])
array([-0.00990005])
array([-0.00989769])

array([[  3.53377799e-05,   3.53377799e-05]])
array([-0.00989799])
array([-0.01009982])

array([[ -1.16433121e-05,  -1.16433121e-05]])
array([-0.00990003])
array([-0.00989973])

array([[ -1.13328103e-05,  -1.13328103e-05]])
array([ 0.01009997])
array([-0.01009964])

array([[ -2.17684859e-05,  -2.17684859e-05]])
array([ 0.00989801])


array([[  9.89666567e-06,   9.89666567e-06]])
array([ 0.00990005])
array([ 0.00989765])

array([[  1.48422075e-05,   1.48422075e-05]])
array([-0.00989801])
array([-0.01009761])

array([[  1.21111317e-05,   1.21111317e-05]])
array([ 0.01009799])
array([-0.00989768])

array([[  2.03756895e-05,   2.03756895e-05]])
array([ 0.00990003])
array([ 0.00989988])

array([[ -1.94055807e-06,  -1.94055807e-06]])
array([-0.00990003])
array([-0.00989971])

array([[ -3.09077886e-06,  -3.09077886e-06]])
array([ 0.01009997])
array([-0.01009964])

array([[  1.68244624e-05,   1.68244624e-05]])
array([ 0.00990005])
array([ 0.0098976])

array([[  5.04727002e-06,   5.04727002e-06]])
array([-0.01009995])
array([ 0.01009764])

array([[  1.33932883e-05,   1.33932883e-05]])
array([-0.01009997])
array([ 0.01009963])

array([[ -7.06614560e-06,  -7.06614560e-06]])
array([ 0.01009995])
array([-0.01009762])

array([[ -3.12869319e-05,  -3.12869319e-05]])
array([-0.01009799])
array([ 0.00989767])

array([[  1.68207927e-

array([[ -3.23010171e-05,  -3.23010171e-05]])
array([ 0.01009994])
array([-0.01009729])

array([[ -9.89478464e-06,  -9.89478464e-06]])
array([ 0.00989801])
array([ 0.01009765])

array([[  8.90341709e-06,   8.90341709e-06]])
array([-0.00989801])
array([-0.01009763])

array([[ -1.38553866e-05,  -1.38553866e-05]])
array([-0.00990005])
array([-0.00989759])

array([[  5.93810148e-06,   5.93810148e-06]])
array([ 0.01009801])
array([-0.00989972])

array([[ -1.51417666e-05,  -1.51417666e-05]])
array([ 0.00989799])
array([ 0.01009963])

array([[ -4.94737718e-06,  -4.94737718e-06]])
array([-0.00990005])
array([-0.00989768])

array([[  1.97936389e-06,   1.97936389e-06]])
array([ 0.01009801])
array([-0.00989971])

array([[  5.04622553e-06,   5.04622553e-06]])
array([ 0.01009799])
array([-0.00989767])

array([[ -2.77106097e-05,  -2.77106097e-05]])
array([-0.00990005])
array([-0.00989754])

array([[ -1.31228166e-05,  -1.31228166e-05]])
array([ 0.00989799])
array([ 0.01009964])

epoch     7      used

array([[  2.37475641e-05,   2.37475641e-05]])
array([-0.00989801])
array([-0.01009757])

array([[ -1.78104949e-05,  -1.78104949e-05]])
array([-0.01009801])
array([ 0.0098998])

array([[  1.21110721e-05,   1.21110721e-05]])
array([ 0.01009799])
array([-0.00989766])

array([[ -6.18031220e-06,  -6.18031220e-06]])
array([ 0.01009997])
array([-0.01009961])

array([[  9.89667411e-07,   9.89667411e-07]])
array([ 0.00990005])
array([ 0.00989767])

array([[ -2.27626291e-05,  -2.27626291e-05]])
array([-0.01009801])
array([ 0.00989977])

array([[  6.05798084e-06,   6.05798084e-06]])
array([-0.01009995])
array([ 0.01009764])

array([[  1.28658594e-05,   1.28658594e-05]])
array([ 0.01009801])
array([-0.00989973])

array([[ -6.92778621e-06,  -6.92778621e-06]])
array([-0.01009801])
array([ 0.00989973])

array([[ -8.07562977e-06,  -8.07562977e-06]])
array([ 0.01009995])
array([-0.01009763])

array([[  6.05555684e-06,   6.05555684e-06]])
array([ 0.01009799])
array([-0.00989769])

array([[ -5.93801095e-

array([[  2.01852008e-06,   2.01852008e-06]])
array([ 0.01009799])
array([-0.00989769])

array([[  1.13327851e-05,   1.13327851e-05]])
array([-0.01009997])
array([ 0.01009963])

array([[  2.03715488e-05,   2.03715488e-05]])
array([ 0.00990003])
array([ 0.00989977])

array([[ -9.89482850e-06,  -9.89482850e-06]])
array([ 0.00989801])
array([ 0.01009765])

array([[  1.87959416e-05,   1.87959416e-05]])
array([-0.00989802])
array([-0.01009755])

array([[ -4.85138529e-06,  -4.85138529e-06]])
array([-0.00990003])
array([-0.00989971])

array([[  9.08702920e-06,   9.08702920e-06]])
array([-0.01009995])
array([ 0.01009764])

array([[  1.71606992e-05,   1.71606992e-05]])
array([-0.01009995])
array([ 0.01009762])

array([[  1.31229222e-05,   1.31229222e-05]])
array([-0.01009995])
array([ 0.01009763])

array([[  2.67864490e-05,   2.67864490e-05]])
array([-0.01009997])
array([ 0.01009977])

array([[ -1.16409601e-05,  -1.16409601e-05]])
array([-0.00990003])
array([-0.00989974])

array([[ -5.15129104e

In [78]:
neurons_network = forward_nn(
                             input_neurons[4])
pprint(neurons_network[-1])

array([-0.00989971])
