In [1]:
import numpy as np

In [53]:
class Layer(object):
    def __init__(self,name):
        self.name = name
        self.params,self.grads = None,None
    def forward(self,inputs):
        raise NotImplementedError
    def backward(self,grad):
        raise NotImplementedError

In [58]:
def ZerosInit(*args):
    print(args)
    return np.zeros(args[0])

def RandomInit(*args):
    print(args)
    return np.random.randn(args[0][0],args[0][1])



In [63]:
np.random.randn(1,2)
np.random.randn(2,2)

array([[ 0.09211359, -1.89176291],
       [ 0.33310465, -1.32088441]])

In [106]:
class Dense(Layer):
    def __init__(self,num_in,num_out,w_init=RandomInit,b_init=ZerosInit):
        # 在layer中层的名字定义为Linear
        super().__init__("Linear")
        self.params={
            "w":w_init([num_in,num_out]),
            "b":b_init([1,num_out])
        }
        self.inputs = None
    
    def forward(self,inputs):
        self.inputs = inputs
        return inputs * self.params["w"]+self.params["b"]
    
    def backward(self,grad):
        # Doutput/Dw  当前层w的梯度
        self.grads["w"] = self.inputs * grad
        # Doutput/Db，当前层b的梯度
        self.grads["b"] = np.sum(grad,axis=0)
        # Doutput/Dx,用于链式规则的后续部分求导
        return grad * self.params["w"]

In [107]:
RandomInit([2,2]),ZerosInit([10,10])

([2, 2],)
([10, 10],)


(array([[-0.4177726 ,  1.05959241],
        [ 0.44516585,  1.14050681]]),
 array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]))

In [109]:
class Activation(Layer):
    # base activation layer
    def __init__(self,name):
        super().__init__(name)
        self.inputs = None
        
    def forward(self,inputs):
        self.inputs = inputs
        return self.func(inputs)
    def backward(self,grad):
        return self.derivative_func(self.inputs) * grad
    
    def func(self,x):
        raise NotImplementedError
    def derivative_func(self,x):
        raise NotImplementedError

In [110]:
class ReLU(Activation):
    # ReLU activation function
    def __init__(self):
        super().__init__("ReLU")
    def func(self,x):
        # relu激活函数的前向计算过程
        return np.maximum(x,0.0)
    def derivative_func(self,x):
        # drelu/dx
        # 当x>0时，drelu/dx=1 true
        # 当x<=0时，drelu/dx=0 false
        return x > 0.0
class Sigmoid(Activation):
    # sigmoid activation function
    def __init__(self):
        super().__init__("Sigmoid")
    def func(self,x):
        # sigmoid
        return 1/(1+np.exp(-x))
    def derivative_func(self,x):
        y = self.func(x)
        return y * (1-y)

In [111]:
layer1 = Dense(2,4)
layer2 = Sigmoid()
layer3 = Dense(4,1)

x1=[0.9,0.1]
x2=[0.8,0.2]

([2, 4],)
([1, 4],)
([4, 1],)
([1, 1],)


In [112]:
x=layer1.forward(x1)

ValueError: operands could not be broadcast together with shapes (2,) (2,4) 

In [113]:
x=layer2.forward(x)

In [114]:
x=layer3.forward(x)

In [115]:
x

array([[-0.18265831],
       [ 0.60634778],
       [ 0.76488293],
       [-0.348134  ]])

In [116]:
def model(x):
    x=layer1.forward(x)
    x=layer2.forward(x)
    x=layer3.forward(x)
    return x


In [117]:
model(x1),model(x1)

ValueError: operands could not be broadcast together with shapes (2,) (2,4) 

In [105]:
layer1.backward(1)

TypeError: unsupported operand type(s) for @: 'list' and 'int'

In [91]:
layer1.name,layer1.params,layer1.grad

AttributeError: 'Dense' object has no attribute 'grad'

In [88]:
layer2.name

'Sigmoid'

In [90]:
layer3.params

{'b': array([[0.]]), 'w': array([[-0.58958781],
        [-0.27086268],
        [-0.15791097],
        [-0.37147295]])}