In [54]:
# prepare
import numpy as np
import pandas as pd
import sys
import warnings
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

seed = 782
np.random.seed(seed)
from sklearn.preprocessing import Normalizer

In [55]:
# load data
train_df= pd.read_csv("./datasets/train.csv")
train = train_df.values
test = pd.read_csv("./datasets/test.csv").values

In [56]:
X_train = train[:, 1:]
y_train = train[:, 0]

In [57]:
def normalization(x, mu, sigma):
    
    x_norm = np.zeros_like(x)

    for n in range(len(x)):
        for j in range(len(x[n])):
            if(sigma[j]!=0):
                x_norm[n,j] = (x[n,j] - mu[j]) / sigma[j]
            else:
                x_norm[n,j] = 0
                    
    return x_norm

In [58]:
mu = np.mean(X_train, axis = 0)
sigma = np.max(X_train, axis=0) - np.min(X_train, axis=0)

In [59]:
# test = normalization(test, mu, sigma)
normalizer = Normalizer(norm='max')
X_train = normalizer.fit_transform(X_train)
test = normalizer.transform(test)

In [63]:
y_train = pd.get_dummies(y_train).values

In [64]:
# 定义模型
def Relu(x, derivative=False):
    if derivative == False:
        return x * (x > 0)
    else:
        return 1 * (x > 0)

In [65]:
def Softmax(x):
    x -= np.max(x)
    sm = (np.exp(x).T / np.sum(np.exp(x),axis=1)).T
    return sm

In [115]:
def createWeightAndbiases():
    n_inputs = 28 * 28
    hidden1 = 30
    n_outputs = 10
    
    # layer1
    w1 = np.random.normal(0, n_inputs ** -0.5, [n_inputs, hidden1])
    b1 = np.random.normal(0, n_inputs ** -0.5, [1, hidden1])
    
    w2 = np.random.normal(0, hidden1 ** -0.5, [hidden1, n_outputs])
    b2 = np.random.normal(0, hidden1 ** -0.5, [1, n_outputs])
    
    return [w1, w2, b1, b2]

In [178]:
# dorpout
def Dropout(x, dropout_percent):
    data = [np.ones_like(x)]
    mask = np.random.binomial( data, (1 - dropout_percent) )[0] / (1 - dropout_percent)  
    return mask

In [187]:
# predict
def predict(weights, X, dropout_percent=0):
    w1, w2, b1, b2 = weights
    
    first = Relu(np.dot(x, w1) + b1)
    
    return [first, Softmax(np.dot(first, w2) + b2)]


In [188]:
# 衡量精确度
def accuracy(output, y):
    hit = 0
    output = np.argmax(output, axis = 1)
    y = np.argmax(y, axis=1)
    for x, y in zip(output, y):
        if(x == y):
            hit += 1
    p = (hit * 100) / output.shape[0]
    return p

In [190]:
def log2(x):
    if x != 0:
        return np.log(x)
    else:
        return -np.inf

def log(y):
    return [[log2(nx) for nx in x] for x in y]

def cost(Y_predict, Y_right, weights, nabla):
    w1, w2, b1, b2 = weights
    weights_sum_square = np.mean(w1 ** 2) + np.mean(w2 ** 2)
    Loss = -np.mean(Y_right * log(Y_predict) + (1 - Y_right) * log(1 - Y_predict) + nabla / 2 * weights_sum_square)
    return Loss

In [1]:
import numpy as np

# sigmoid
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x))

# sigmoid导数
def sigmoid_derivative(output):
    return output * (1.0 - output)

# 生成整数与二进制数转化字典
int2binary = {}
binary_dim = 8

largest_number = pow(2, binary_dim)
binary = np.unpackbits(np.array([range(largest_number)], dtype=np.uint8).T,
                       axis=1)
for i in range(largest_number):
    int2binary[i] = binary[i]

# 模型参数
input_dim = 2
hidden_dim = 16
output_dim = 1
learing_rate = 1e-1

# 初始化模型参数
# 模型: h(t) = sigmoid(Ux + Vh(t-1)) -> output(t) = sigmoid(Wh(t))
U = np.random.randn(input_dim, hidden_dim)
V = np.random.randn(hidden_dim, hidden_dim)
W = np.random.randn(hidden_dim, output_dim)

# 初始化参数梯度
dU = np.zeros_like(U)
dV = np.zeros_like(V)
dW = np.zeros_like(W)

iterations = 20000
# 训练过程：不使用batch
for i in range(iterations):
    # 生成一个简单的加法问题 （a+b = c), a, b 除以2防止c溢出
    a_int = np.random.randint(largest_number / 2)
    a = int2binary[a_int]
    b_int = np.random.randint(largest_number / 2)
    b = int2binary[b_int]

    c_int = a_int + b_int
    c = int2binary[c_int]

    d = np.zeros_like(c)
    # 训练样本
    X = np.array([a, b]).T
    y = np.array([c]).T

    loss = 0  # 损失函数

    hs = []  # 保存每个时间步长下的隐含特征
    hs.append(np.zeros((1, hidden_dim)))  # 初始化0时刻特征为0
    os = []  # 保存每个时间步长的预测值

    # forward过程
    for t in range(binary_dim):
        # 当前时刻特征
        xt = X[binary_dim - t - 1]
        # 隐含层
        ht = sigmoid(xt.dot(U) + hs[-1].dot(V))
        # 输出层
        ot = sigmoid(ht.dot(W))
        # 存储结果
        hs.append(ht)
        os.append(ot)
        # 计算loss，采用L1
        loss += np.abs(ot - y[binary_dim - t - 1])[0][0]
        # 预测值
        d[binary_dim - t - 1] = np.round(ot)[0][0]

    # backward过程
    future_d_ht = np.zeros((1, hidden_dim))  # 从上一个时刻传递的梯度
    for t in reversed(range(binary_dim)):
        xt = X[binary_dim - t - 1].reshape(1, -1)
        ht = hs[t+1]
        ht_prev = hs[t]
        ot = os[t]
        # d_loss/d_ot
        d_ot = ot - y[binary_dim - t - 1]
        d_ot_output = sigmoid_derivative(ot) * d_ot
        dW += ht.T.dot(d_ot_output)
        d_ht = d_ot_output.dot(W.T) + future_d_ht  # 别忘来了上一时刻传入的梯度
        d_ht_output = sigmoid_derivative(ht) * d_ht
        dU += xt.T.dot(d_ht_output)
        dV += ht_prev.T.dot(d_ht_output)

        # 更新future_d_ht
        future_d_ht = d_ht_output.dot(V.T)

    # SGD更新参数
    U -= learing_rate * dU
    V -= learing_rate * dV
    W -= learing_rate * dW

    # 重置梯度
    dU *= 0
    dV *= 0
    dW *= 0

    # 输出loss和预测结果
    if (i % 1000 == 0):
        print("loss:" + str(loss))
        print("Pred:" + str(d))
        print("True:" + str(c))
        out = 0
        for index, x in enumerate(reversed(d)):
            out += x * pow(2, index)
        print(str(a_int) + " + " + str(b_int) + " = " + str(out))
        print("------------")

loss:3.9963178148149465
Pred:[1 1 1 1 1 1 1 1]
True:[0 1 1 1 0 0 0 1]
86 + 27 = 255
------------
loss:4.234053567702413
Pred:[1 0 1 0 1 0 1 1]
True:[1 1 0 1 0 1 0 0]
119 + 93 = 171
------------
loss:3.6456427813137875
Pred:[1 1 1 0 0 0 1 1]
True:[1 0 1 1 0 0 1 0]
121 + 57 = 227
------------
loss:2.6733470877103773
Pred:[0 1 0 1 1 1 1 0]
True:[0 1 0 1 1 1 1 0]
63 + 31 = 94
------------
loss:1.6113645322097327
Pred:[1 0 0 0 0 0 1 1]
True:[1 0 0 0 0 0 1 1]
49 + 82 = 131
------------
loss:0.48996264820318886
Pred:[0 1 1 1 1 1 1 1]
True:[0 1 1 1 1 1 1 1]
84 + 43 = 127
------------
loss:0.5103971606538684
Pred:[1 0 0 1 1 1 0 0]
True:[1 0 0 1 1 1 0 0]
37 + 119 = 156
------------
loss:0.39572893953758626
Pred:[1 0 0 1 0 1 0 0]
True:[1 0 0 1 0 1 0 0]
100 + 48 = 148
------------
loss:0.49936533673331374
Pred:[1 1 0 0 1 1 0 0]
True:[1 1 0 0 1 1 0 0]
79 + 125 = 204
------------
loss:0.37111513571270416
Pred:[0 1 1 0 0 0 0 1]
True:[0 1 1 0 0 0 0 1]
70 + 27 = 97
------------
loss:0.3093858794284285
