In [34]:
import numpy as np
import time
import theano
from theano import tensor as T
from logistic_regression import LogisticRegressioin,load_data

In [32]:
class Hidden(object):
    '''
    For tanh activation function results obtained in [Xavier10] show that 
    the interval should be [-\sqrt{\frac{6}{fan_{in}+fan_{out}}},\sqrt{\frac{6}{fan_{in}+fan_{out}}}], 
    '''
    def __init__(self , random_stream, input, n_in , 
                 n_out , weight= None,  bias= None,activation=T.tanh):
        if weight is None:
            W_values = np.asarray(
                random_stream.uniform(
                    low = -np.sqrt(6./(n_in + n_out)),
                    upper = np.sqrt(6./(n_in + n_out))
                ),
                dtype = theano.config.floatX
            )
            if activation == tensor.nnet.sigmoid:
                W_value *=4
            Weight = theano.shared(value=W_values,name='weight',borrow = True) # false is deepcopy

        if bias is None:
            bias_values = np.zeros(n_out,dtype = theano.config.floatX)
            bias = theano.shared(value=bias_values,name='bias',borrow=True)

        self.W = Weight
        self.b = bias
        self.input = input
        self.params = [self.W,self.b] # parameter of the model
        linear_output = T.dot(input,self.W) + self.b
        self.output = [linear_output if activation is None else activation(linear_output)]

In [33]:
class MLP(object):
    def __init__(self,random_steam,input,n_in,n_hidden,n_out):
        
        self.hidden_layer = HiddenLayer(random_stream,
                                        input = input,
                                        n_in = n_in,
                                        n_out = n_hidden,
                                        activation = T.tanh)
        
        self.LogisticRegressionLayer = LogisticRegression(
                                        input = self.hidden_layer.output,
                                        n_in = n_hidden,
                                        n_out = n_out,
                                        activation = T.tanh
                                        ) 
        
        ## compute l1 norm (sum) and squared l2 norm
        self.L1 = (
                    abs(self.hidden_layer.W) + abs(self.LogisticRegressionLayer.W)
                 )
        
        self.L2 = (
                     (self.hidden_layer.W **2).sum() + (self.LogisticRegressionLayer.W **2).sum()
                    )
        
        self.neg_loglikelihood = self.LogisticRegressionLayer.negative_loglikelihood
        self.error = self.LogisticRegressionLayer.error
        self.params = self.hidden_layer.params + self.LogisticRegressionLayer.params
        self.input = input

In [None]:
def test(learning_rate = 0.01,l1_reg=0.0,l2_reg=0.0001,
         n_epoch=1000,batch_size=20,hidden_units=500):
    
    dataset = load_data()
    trian_x,train_y = dataset[0]
    validation_x,validation_y = dataset[1]
    test_x,test_y = dataset[2]
    
    ## compute the number of minibatches
    n_train_batches = train_x.get_value(borrow=True).shape[0] // 2
    n_test_batches = test_x.get_value(borrow=True).shape[0] //2
    n_validation_batches = validation_x.get_value(borrow=True).shape[0] //2
    
    print 'building the model....'
    index = T.lscalar() ## index
    x = T.matrix('x')
    y = T.ivector('y') ## labels 
    random_state = np.random.RandomState(1234)
    
    classifier = MLP(
                    random_state = random_state,
                    input = x,
                    n_in = 28*28,
                    n_hidden = hidden_units,
                    n_out = 10
                )
    
    # loss function (cost function) plus regularization 
    cost  = (
            classifier.neg_loglikelihood(y) + 
            l1_reg * classifier.L1 +
            l2_reg * classifier.L2
    )
    
    test_model = theano.function(
                inputs =[index],
                outputs = classifier.error(y),
                givens = {
                    x:test_x[index * batch_size : (index+1) * batch_size],
                    y:test_y[index * batch_size : (index+1) * batch_size]
        }
    )
    
    validation_model = theano.function(
                inputs = [index],
                outputs = classifier.error(y),
                givens ={
                    x:validation_x[index * batch_size : (index+1) * batch_size],
                    y:validation_y[index * batch_size : (index+1) * batch_size]
        }
    )
    ## gradient descent
    gparams = [T.grad(cost,params) for params in classifier.params]
    
    updates = [(params , params - learning_rate * gparams)  for params,gparams in zip(classifier.params,gparams)]
    
    train_model = theano.function(
                inputs = [index],
                outputs = cost,
                updates = updates,
                givens = {
                    x:train_x[index * batch_size : (index+1) * batch_size],
                    y:train_y[index * batch_size : (index+1) * batch_size]
        }
    )
    
    print 'complete the building model'
    print 'training the model....'
    
    patience = 10000
    patience_improvement = 2
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches , patience //2)
    best_validation_loss = np.inf
    best_iteration = 0.
    test_score = 0.
    start_time = time.time()
    epoch = 0
    looping = False
    
    print 'complete the training model'
    