### 数值计算试验
#### 实验目的
* 手动实现求导和梯度下降
* 实现一个简单的DNN模型在iris数据集上进行试验

In [1]:
import numpy as np
import pandas as pd

#### 基本操作的实现

In [2]:
class Operation:

    def __init__(self):
        self.inp = np.zeros(0)

    # 前向生成结果
    def forward(self, inp):
        self.inp = inp
        
    # 反向求导生成梯度
    def backward(self, grad):
        pass

    def __call__(self, *args, **kwargs):
        return self.forward(args[0])


class Linear(Operation):

    def __init__(self, inp_size, out_size):
        super().__init__()
        self.k = np.random.rand(inp_size, out_size) - 0.5
        self.k_grad = np.zeros((inp_size, out_size)) - 0.5
        self.b = np.random.rand(out_size) - 0.5
        self.b_grad = np.zeros(out_size) - 0.5

    def forward(self, inp):
        super().forward(inp)
        return np.matmul(inp, self.k) + self.b

    def backward(self, grad):
        self.b_grad = grad.sum(axis=0)
        self.k_grad = np.matmul(self.inp.transpose(), grad)
        return np.matmul(grad, self.k.transpose())


class ReLU(Operation):

    def __init__(self):
        super().__init__()

    def forward(self, inp):
        super().forward(inp)
        inp[inp < 0] = 0
        return inp

    def backward(self, grad):
        grad[self.inp < 0] = 0
        return grad


class Sigmoid(Operation):

    def __init__(self):
        super().__init__()

    def forward(self, inp):
        super().forward(inp)
        return 1 / (np.exp(-inp) + 1)

    def backward(self, grad):
        output = 1 / (np.exp(-self.inp) + 1)
        return np.exp(-self.inp) * np.power(output, 2) * grad


class L2Loss(Operation):

    def __init__(self, label):
        super().__init__()
        self.label = label

    def forward(self, inp):
        super().forward(inp)
        return np.power((inp - self.label), 2).sum()

    def backward(self, grad):
        return 2 * (self.inp - self.label) * grad


In [3]:
# 基于基本操作实现DNN模型
class Dnn:

    def __init__(self, inp_size, hidden_nodes, batch_size):
        self.l1 = Linear(inp_size, hidden_nodes)
        self.relu = ReLU()
        self.l2 = Linear(hidden_nodes, 1)
        self.sigmoid = Sigmoid()
        self.loss = L2Loss([])
        self.batch_size = batch_size

    def forward(self, inp):
        x = self.l1(inp)
        x = self.relu(x)
        x = self.l2(x)
        return self.sigmoid(x)

    def loss_value(self, inp, label):
        output = self.forward(inp)
        self.loss = L2Loss(label)
        return self.loss(output)

    def backward(self):
        x = self.loss.backward(1)
        x = self.sigmoid.backward(x)
        x = self.l2.backward(x)
        x = self.relu.backward(x)
        return self.l1.backward(x)

    def optm(self, l):
        self.l1.k -= l * self.l1.k_grad
        self.l2.k -= l * self.l2.k_grad
        self.l1.b -= l * self.l1.b_grad
        self.l2.b -= l * self.l2.b_grad

    def __call__(self, *args, **kwargs):
        return self.forward(args[0])


#### 在iris数据集上训练及测试

In [4]:
datas = pd.read_csv("/data1/iris.data")

In [5]:
datas.loc[datas.query("label == 'Iris-setosa'").index, "label_bool"] = 1
datas.loc[datas.query("label != 'Iris-setosa'").index, "label_bool"] = 0

In [6]:
datas.sample(10)

Unnamed: 0,f1,f2,f3,f4,label,label_bool
121,5.6,2.8,4.9,2.0,Iris-virginica,0.0
147,6.5,3.0,5.2,2.0,Iris-virginica,0.0
53,5.5,2.3,4.0,1.3,Iris-versicolor,0.0
22,4.6,3.6,1.0,0.2,Iris-setosa,1.0
129,7.2,3.0,5.8,1.6,Iris-virginica,0.0
57,4.9,2.4,3.3,1.0,Iris-versicolor,0.0
52,6.9,3.1,4.9,1.5,Iris-versicolor,0.0
11,4.8,3.4,1.6,0.2,Iris-setosa,1.0
20,5.4,3.4,1.7,0.2,Iris-setosa,1.0
70,5.9,3.2,4.8,1.8,Iris-versicolor,0.0


In [7]:
datas = datas[["f1", "f2", "f3", "f4", "label_bool"]]

In [8]:
m = Dnn(4, 20, 15)
for i in range(0, 1000):
    t = datas.sample(15)
    tf = t[["f1", "f2", "f3", "f4"]]
    m(tf.values)
    loss = m.loss_value(tf.values, t["label_bool"].values.reshape(15, 1))
    if i % 100  == 0:
        print(loss)
    m.backward()
    m.optm(0.1)

9.906133281506982
5.999831306129411
2.9998719755558625
2.9998576731173774
3.999596848200819
3.9935063085696276
0.014295955296485221
0.0009333518819264006
0.0010388260442712738
0.0010148543295591939


In [9]:
t = datas.sample(15)
tf = t[["f1", "f2", "f3", "f4"]]

In [10]:
m(tf.values)

array([[9.78928532e-01],
       [4.46041558e-03],
       [9.99233038e-01],
       [5.66360301e-04],
       [3.68723359e-05],
       [9.99483837e-01],
       [4.73294911e-03],
       [2.54151343e-03],
       [9.91031053e-01],
       [9.96397577e-01],
       [4.46202802e-03],
       [9.99095849e-01],
       [3.07506143e-05],
       [1.72161469e-04],
       [1.83486180e-04]])

In [11]:
t

Unnamed: 0,f1,f2,f3,f4,label_bool
24,4.8,3.4,1.9,0.2,1.0
69,5.6,2.5,3.9,1.1,0.0
32,5.2,4.1,1.5,0.1,1.0
76,6.8,2.8,4.8,1.4,0.0
134,6.1,2.6,5.6,1.4,0.0
15,5.7,4.4,1.5,0.4,1.0
71,6.1,2.8,4.0,1.3,0.0
97,6.2,2.9,4.3,1.3,0.0
1,4.9,3.0,1.4,0.2,1.0
27,5.2,3.5,1.5,0.2,1.0
