In [139]:
import paddle 
from paddle.nn import Linear
import paddle.nn.functional as F

import numpy as np
import pandas as pd
import os
import random
import warnings
warnings.filterwarnings('ignore')

import sklearn.datasets as datasets

In [140]:
def load_data():
    load_boston = datasets.load_boston()
    X = pd.DataFrame(load_boston['data'],columns=load_boston['feature_names'])
    y = pd.DataFrame(load_boston['target'],columns=['target'])
    data = pd.concat([X,y],axis=1).values
    feature_num = len(load_boston['feature_names']) 
    
    # 划分训练集
    ratio = 0.8
    offset = int(data.shape[0]*ratio)
    training_data = data[:offset]

    # 归一化
    maximums, minimums = training_data.max(axis=0), training_data.min(axis=0)
    
    # 全局变量 记录数据的归一化参数，在预测时对数据做归一化
    # 注意：只拿训练集的归一化参数进行归一化，不然会泄露到测试集
    global max_values
    global min_values
   
    max_values = maximums
    min_values = minimums
    
    # 对数据进行归一化
    for i in range(feature_num):
        data[:,i] = (data[:,i]- min_values[i]) / (max_values[i]- min_values[i])
        
    
    training_data = data[:offset]
    test_data = data[offset:]
    return training_data , test_data
        

In [141]:
training_data, test_data = load_data()
print(training_data.shape)
print(training_data[1,:])

(404, 14)
[2.35922539e-04 0.00000000e+00 2.62405717e-01 0.00000000e+00
 1.72839506e-01 5.47997701e-01 7.82698249e-01 3.48961980e-01
 4.34782609e-02 1.14822547e-01 5.53191489e-01 1.00000000e+00
 2.04470199e-01 2.16000000e+01]


In [142]:
class Regressor(paddle.nn.Layer):
    
    def __init__(self):
        # 初始化 父类参数
        super(Regressor,self).__init__()
        # 定义网络，全连接
        self.fc = Linear(in_features=13,out_features=1)
        
    def forward(self,input):
        x = self.fc(input)
        return x


In [143]:
# 初始化类实例
model = Regressor()
# 开启训练
model.train()
#定义优化算法
opt = paddle.optimizer.SGD(learning_rate=0.01,parameters=model.parameters())


In [148]:
# 设置外层循环次数、batch大小
EPOCH_NUM = 10   
BATCH_SIZE = 10  

# 分10个epoch，每个epoch按10个样本为一个batch
for epoch_id in range(EPOCH_NUM):
    # 训练数据随机打散
    np.random.shuffle(training_data)
    mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0,len(training_data),BATCH_SIZE)]
    for iter_id , mini_batch in enumerate(mini_batches):
        x = np.array(mini_batch[:, :-1])
        y = np.array(mini_batch[:,-1])
        
        # 转成tensor 
        # 天坑：不指定float32就报错，不懂~
        house_features = paddle.to_tensor(x,dtype='float32')
        prices = paddle.to_tensor(y,dtype='float32')
        
        # 前向传播
        predicts = model.forward(house_features)
        
        #计算损失
        loss = F.square_error_cost(predicts,label=prices)
        avg_loss = paddle.mean(loss)
        # 每 20个样本打印一次loss
        if iter_id % 20 == 0 :
            print("epoch: {}, iter: {}, loss is: {}".format(epoch_id, iter_id, avg_loss.numpy()))
        
        # 反向传播 计算梯度
        avg_loss.backward()
        # 更新参数
        opt.step()
        # 清除梯度
        opt.clear_grad()
            


epoch: 0, iter: 0, loss is: [50.793915]
epoch: 0, iter: 20, loss is: [55.211212]
epoch: 0, iter: 40, loss is: [200.32037]
epoch: 1, iter: 0, loss is: [125.28409]
epoch: 1, iter: 20, loss is: [153.13754]
epoch: 1, iter: 40, loss is: [86.842285]
epoch: 2, iter: 0, loss is: [95.10344]
epoch: 2, iter: 20, loss is: [62.061363]
epoch: 2, iter: 40, loss is: [19.123714]
epoch: 3, iter: 0, loss is: [207.5177]
epoch: 3, iter: 20, loss is: [135.27391]
epoch: 3, iter: 40, loss is: [114.60943]
epoch: 4, iter: 0, loss is: [30.73014]
epoch: 4, iter: 20, loss is: [87.01141]
epoch: 4, iter: 40, loss is: [39.162884]
epoch: 5, iter: 0, loss is: [142.39726]
epoch: 5, iter: 20, loss is: [52.89307]
epoch: 5, iter: 40, loss is: [183.2455]
epoch: 6, iter: 0, loss is: [32.101078]
epoch: 6, iter: 20, loss is: [89.32934]
epoch: 6, iter: 40, loss is: [57.3537]
epoch: 7, iter: 0, loss is: [32.868263]
epoch: 7, iter: 20, loss is: [157.14088]
epoch: 7, iter: 40, loss is: [174.61716]
epoch: 8, iter: 0, loss is: [131.

In [149]:
# 模型保存 ,用于后续预测、调用
paddle.save(model.state_dict(),'LR_model.pdparams')

In [150]:
# 预测

model_dict = paddle.load('LR_model.pdparams')
model.load_dict(model_dict)
# 模型调整为预测（校验）模式
model.eval()


test_x = paddle.to_tensor(test_data[:3, :-1],dtype='float32')
test_y = paddle.to_tensor(test_data[:3, -1],dtype='float32')

# 预测
predict = model.forward(test_x)

# # 对结果做反归一化处理
# predict = predict * (max_values[-1] - min_values[-1]) + min_values[-1]
# # 对label数据做反归一化处理
# test_y = test_y * (max_values[-1] - min_values[-1]) + min_values[-1]

print("Inference result is {}, '\n' the corresponding label is {}".format(predict.numpy(), test_y))

Inference result is [[21.835506]
 [23.405983]
 [21.526829]], '
' the corresponding label is Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
       [8.50000000 , 5.         , 11.89999962])
