In [4]:
## Implementaion Dropout + ReLU(Rectifier Linear Unit)
import sys
import numpy as np
np.seterr(all="ignore")

{'divide': 'ignore', 'invalid': 'ignore', 'over': 'ignore', 'under': 'ignore'}

In [5]:
## define functions
def sigmoid(x):
    return 1. / (1 + np.exp(-x))
def dsigmoid(x):
    return x * (1. - x)
def tanh(x):
    return np.tanh(x)
def dtanh(x):
    return 1. - x * x
def softmax(x):
    e = np.exp(x - np.max(x))
    if e.ndim == 1:
        return e / np.sum(e, axis=1)
    else:
        return e / np.array([np.sum(e, axis=1)]).T
def ReLU(x):
    return x * (x > 0)
def dReLU(x):
    return 1 * (x > 0)

In [16]:
## dropout
class Dropout(object):
    def __init__(self, input, label, n_in, hidden_layer_sizes, n_out, rng=None, activation=ReLU):
        self.x = input
        self.y = label
        self.hidden_layers = []
        self.n_layers = len(hidden_layer_sizes)
        if rng == None:
            rng = np.random.RandomState(1234)
        assert self.n_layers > 0 # raise error if n_layers less than 0
        
        # construct multi-layer
        for i in xrange(self.n_layers):
            
            # layer size
            if i == 0:
                input_size = n_in # n_in means the number of features(input layer)
            else:
                input_size = hidden_layer_sizes[i-1]

            # layer input
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.hidden_layers[-1].output()    
'''
Although hidden_layers object doesn't exist at this moment, 
it's OK because the 1st roop(i==0) always assignes self.x.
The following procecss generates hidden_layer object.
'''
                                                            
            # construct hidden layer
            hidden_layer = HiddenLayer(input=layer_input,
                                       n_in=input_size,
                                       n_out=hidden_layer_sizes[i],
                                       rng=rng,
                                       activation=activation)
            self.hidden_layers.append(hidden_layer)
            
            # layer for output using Logistic regression(softmax)
            self.log_layer = LogisticRegression(input=self.hidden_layers[-1].output(),
                                                label=self.y,
                                                n_in=hidden_layer_sizes[-1],
                                                n_out=n_out)

    def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None):
        for epoch in xrange(epochs):
            dropout_masks = []
            
            # forward hidden layers
            for i in xrange(self.n_layers):
                if i == 0:
                    layer_input = self.x
                layer_input = self.hidden_layers[i].forward(input=layer_input)
                if dropout == True:
                    mask = self.hidden_layers[i].dropout(input=layer_input, p=p_dropout, rng=rng)
                    layer_input *= mask
                    dropout_masks.append(mask)
                    
            # forward & backward log layer
            self.log_layer.train(input=layer_input)
            
            # backward hidden layers
            for i in reversed(xrange(0, self.n_layers)):
                if i == self.n_layers-1:
                    prev_layer = self.log_layer
                else:
                    prev_layer = self.hidden_layers[i+1]
                
                self.hidden_layers[i].backward(prev_layer=prev_layer)
                
                if dropout == True:
                    self.hidden_layers[i].d_y *= dropout_masks[i]

    def predict(self, x, dropout=True, p_dropout=0.5):
        layer_input = x

        for i in xrange(self.n_layers):
            if dropout == True:
                self.hidden_layers[i].W = p_dropout * self.hidden_layers[i].W
                self.hidden_layers[i].b = p_dropout * self.hidden_layers[i].b
            
            layer_input = self.hidden_layers[i].output(input=layer_input)

        return self.log_layer.predict(layer_input)

In [14]:
class HiddenLayer(object):
    def __init__(self, input, n_in, n_out, W=None, b=None, rng=None, activation=tanh):
        
        if rng is None:
            rng = np.random.RandomState(1234)

        if W is None:
            a = 1. / n_in
            W = np.array(rng.uniform(low=-a,
                                     high=a,
                                     size=(n_in, n_out)))  # initialize W uniformly

        if b is None:
            b = np.zeros(n_out)  # initialize bias 0

        self.rng = rng
        self.x = input

        self.W = W
        self.b = b

        if activation == tanh:
            self.dactivation = dtanh
        elif activation == sigmoid:
            self.dactivation = dsigmoid
        elif activation == ReLU:
            self.dactivation = dReLU
        else:
            raise ValueError('activation function not supported.')

        self.activation = activation
        
    def output(self, input=None):
        if input is not None:
            self.x = input
        
        linear_output = np.dot(self.x, self.W) + self.b

        return (linear_output if self.activation is None
                else self.activation(linear_output))

    def sample_h_given_v(self, input=None):
        if input is not None:
            self.x = input

        v_mean = self.output()
        h_sample = self.rng.binomial(size=v_mean.shape,
                                     n=1,
                                     p=v_mean)
        return h_sample

    def forward(self, input=None):
        return self.output(input=input)

    def backward(self, prev_layer, lr=0.1, input=None):
        if input is not None:
            self.x = input

        d_y = self.dactivation(prev_layer.x) * np.dot(prev_layer.d_y, prev_layer.W.T)

        self.W += lr * np.dot(self.x.T, d_y)
        self.b += lr * np.mean(d_y, axis=0)

        self.d_y = d_y

    def dropout(self, input, p, rng=None):
        if rng is None:
            rng = np.random.RandomState(123)

        mask = rng.binomial(size=input.shape,
                            n=1,
                            p=1-p)  # p is the prob of dropping

        return mask

In [10]:
class LogisticRegression(object):
    def __init__(self, input, label, n_in, n_out):
        self.x = input
        self.y = label
        self.W = np.zeros((n_in, n_out))  # initialize W 0
        self.b = np.zeros(n_out)          # initialize bias 0


    def train(self, lr=0.1, input=None, L2_reg=0.00):
        if input is not None:
            self.x = input

        p_y_given_x = softmax(np.dot(self.x, self.W) + self.b)
        d_y = self.y - p_y_given_x
        
        # Weight update
        self.W += lr * np.dot(self.x.T, d_y) - lr * L2_reg * self.W
        self.b += lr * np.mean(d_y, axis=0)

        self.d_y = d_y

        # cost = self.negative_log_likelihood()
        # return cost

    def negative_log_likelihood(self):
        sigmoid_activation = softmax(np.dot(self.x, self.W) + self.b)

        cross_entropy = - np.mean(
            np.sum(self.y * np.log(sigmoid_activation) +
            (1 - self.y) * np.log(1 - sigmoid_activation), axis=1))

        return cross_entropy


    def predict(self, x):
        return softmax(np.dot(x, self.W) + self.b)

    def output(self, x):
        return self.predict(x)

In [12]:
def test_dropout(n_epochs=5000, dropout=True, p_dropout=0.5):

    # XOR
    x = np.array([[0,  0],
                  [0,  1],
                  [1,  0],
                  [1,  1]])

    y = np.array([[0, 1],
                  [1, 0],
                  [1, 0],
                  [0, 1]])

    rng = np.random.RandomState(123)

    # construct Dropout MLP
    classifier = Dropout(input=x, label=y, n_in=2, hidden_layer_sizes=[10, 10], n_out=2,
                         rng=rng, activation=ReLU)

    # train
    classifier.train(epochs=n_epochs, dropout=dropout, p_dropout=p_dropout, rng=rng)

    # test
    print classifier.predict(x)

In [17]:
if __name__ == "__main__":
    test_dropout()

[[  5.52481527e-02   9.44751847e-01]
 [  1.00000000e+00   2.82725901e-20]
 [  1.00000000e+00   1.90429882e-21]
 [  5.52483354e-02   9.44751665e-01]]


In [32]:
xxx = [10, 10]
xlen = len(xxx)
for i in xrange(xlen):
    print i
for i in range(xlen):
    print i

#print range(xlen)

0
1
0
1
