In [None]:
import abc

In [None]:
import numpy as np

In [None]:
np.seterr(all='raise', under='warn')

In [None]:
# nn model define by layers:
# (node_count, active_function)
model_define = [
#     (20, relu, relu_derivative),
#     (env.action_space.n, sigmoid, sigmoid_derivative),
    (20, None, None),
    (env.action_space.n, None, None),
]

---

activation function

In [None]:
class ActivationFunction(abc.ABC):
    def __init__(self):
        pass
    
    @abstractmethod
    def apply(self, v):
        pass
    
    @abstractmethod
    def derivative(self, v, activated_value):
        pass

In [None]:
class ActivationNone(ActivationFunction):
    def __init__(self):
        pass
    
    def apply(self, v):
        return v
    
    def derivative(self, v, activated_value=True):
        return 1

In [None]:
class ActivationSigmoid(ActivationFunction):
    def __init__(self, x_upper_bound=None, x_lower_bound=None):
        self.x_upper_bound = x_upper_bound
        self.x_lower_bound = x_lower_bound
    
    def apply(self, v):
        return 1.0 / (1.0 + np.exp(-v.clip(max=self.x_upper_bound, min=self.x_lower_bound)))
    
    def derivative(self, v, activated_value=True):
        if not activated_value:
            v = self.apply(v)
        return v * (1 - v)

In [None]:
class ActivationRelu(ActivationFunction):
    def __init__(self):
        pass
    
    def apply(self, v):
        return v.clip(min=0)
    
    def derivative(self, v, activated_value=True):
        if not activated_value:
            v = self.apply(v)
        return np.where(v>0,1,0)

---

nn model

In [None]:
class NNLayer:
    def __init__(self, node_count, activation):
        self.node_count = node_count
        self.activation = activation
        self.weights = None
        self.interceptions = None
    
    def init_weights(self, prev_node_count, has_interception):
        self.weights, self.interceptions = self.xavier_weight_init(prev_node_count, self.node_count, has_interception)
    
    def xavier_weight_init(self, prev_node_count, cur_node_count, has_interception):
        weights = np.random.randn(prev_node_count, cur_node_count)/np.sqrt(prev_node_count)
        interceptions = np.random.randn(cur_node_count)/np.sqrt(prev_node_count) \
            if has_interception else np.zeros(cur_node_count)
        return weights, interceptions

In [None]:
class NNModel:
    def __init__(self, X_size, layers, has_interception=True):
        self.X_size = X_size
        self.has_interception = has_interception
        self.model = layers
        self.hidden_layer_res_buf = []
        self.init_model()
    
    def init_model(self):
        prev_node_count = self.X_size
        for layer in self.model:
            layer.init_weights(prev_node_count, self.has_interception)
            prev_node_count = layer.node_count
    
    # forward pass, calculate predict value with current model
    def model_forward(self, X):
        cur_res = X
        self.hidden_layer_res_buf.clear()
        for layer in self.model:
            self.hidden_layer_res_buf.append(cur_res)
            cur_res = np.dot(cur_res, layer.weights) + layer.interceptions
            cur_res = layer.activation.apply(cur_res)
        return cur_res
    
    # batch back propagation to get gradient
    def back_propagation(self, td_err, learning_rate):
        reversed_grads = []
        delta = np.atleast_2d(td_err)
        
        for 
        
        # TODO: bug here, layers mismatched during calculating delta with layer activation derivative
        for layer_input, layer in zip(reversed(self.hidden_layer_res_buf), reversed(model)):
            reversed_grads.append((np.dot(np.atleast_2d(layer_input).T, delta)/delta.shape[0],
                                   delta.mean(axis=0)))
            delta = np.dot(delta, layer.weights.T) * layer.activation.derivative(layer_input)
        self.update_model(reversed(reversed_grads), learning_rate)
    
    # update model
    def update_model(self, grads, learning_rate)
        for i in range(len(self.model)):
            self.model[i][0] = self.model[i][0] + learning_rate * grads[i][0]
            self.model[i][1] = self.model[i][1] + learning_rate * grads[i][1]