diff --git a/python/Dropout.py b/python/Dropout.py index 16c203f..ba99116 100755 --- a/python/Dropout.py +++ b/python/Dropout.py @@ -87,10 +87,11 @@ def train(self, epochs=5000, dropout=True, p_dropout=0.5, rng=None): else: prev_layer = self.hidden_layers[i+1] - self.hidden_layers[i].backward(prev_layer=prev_layer) - if dropout == True: - self.hidden_layers[i].d_y *= dropout_masks[i] # also mask here + self.hidden_layers[i].backward(prev_layer=prev_layer, dropout=True, mask=dropout_masks[i]) + else: + self.hidden_layers[i].backward(prev_layer=prev_layer) + def predict(self, x, dropout=True, p_dropout=0.5): diff --git a/python/HiddenLayer.py b/python/HiddenLayer.py index 75f8ab2..a97bc61 100755 --- a/python/HiddenLayer.py +++ b/python/HiddenLayer.py @@ -57,12 +57,15 @@ def forward(self, input=None): return self.output(input=input) - def backward(self, prev_layer, lr=0.1, input=None): + def backward(self, prev_layer, lr=0.1, input=None, dropout=False, mask=None): if input is not None: self.x = input d_y = self.dactivation(prev_layer.x) * numpy.dot(prev_layer.d_y, prev_layer.W.T) + if dropout == True: + d_y *= mask + self.W += lr * numpy.dot(self.x.T, d_y) self.b += lr * numpy.mean(d_y, axis=0) self.d_y = d_y