# 线性回归模型

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import sklearn
import os
import sys
import time
import tensorflow as tf

from tensorflow import keras
print(tf.__version__)
print(sys.version_info)
for module in np, pd ,sklearn, tf, keras:
    print(module.__name__,module.__version__)

In [None]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

# print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)


In [None]:
from sklearn.model_selection import train_test_split

x_train_all,x_test,y_train_all,y_test=train_test_split(housing.data, housing.target,random_state=7)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all,y_train_all,random_state=11)

In [None]:
from sklearn.preprocessing import StandardScaler
transfer = StandardScaler()
x_train_scaled = transfer.fit_transform(x_train)
x_test_scaled = transfer.transform(x_test)
x_valid_scaled = transfer.transform(x_valid)

In [None]:
# KerasRegressor
# 1.转化为sklearn的model
# 2.定义参数集合
# 3.搜索参数
def build_model(hidden_layers=1,
                       layer_size = 30,
                       learnning_rate = 3e-3):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(layer_size, activation='relu', input_shape=x_train.shape[1:]))
    for _ in range(hidden_layers-1):
        model.add(keras.layers.Dense(layer_size, activation='relu'))
    model.add(keras.layers.Dense(1))

    optimizer = keras.optimizers.SGD(learnning_rate)
    model.compile(loss='mean_squared_error',
                 optimizer = optimizer )
    return model
sklearn_model = keras.wrappers.scikit_learn.KerasRegressor(build_model)

callbacks = [keras.callbacks.EarlyStopping(patience=5,min_delta=1e-2)]
histroy = sklearn_model.fit(x_train_scaled,y_train,epoch=100,
                  validation_data=(x_valid_scaled,y_valid), 
                  callbacks= callbacks )

In [None]:
def plot_learning_curves(history):
    pd.DataFrame(history.history).plot(figsize=[8,5])
    plt.grid(True)
    plt.gca().set_ylim(0,1)
    plt.show()

plot_learning_curves(history)

In [None]:
# model.evaluate(x_test_scaled,y_test，verbose=2)
#sklearn_model没有evaluate函数

In [None]:
from scipy.stats import reciprocal
param_distribution={
    hidden_layer = [1,2,3,4,5,],
    layer_size = np.arange(1,100),
    learnning_rate = reciprocal(1e-4,1e-2)
    }
from sklearn.model_selection import RandomizedSearchCV
estimator = RandomizedSearchCV(sklearn_model, #模型estimator
                                                 param_distribution,#定义好的搜索空间
                                                 n_size = 10,#要搜索的参数集合数量
                                                 n_jobs = 5，#并行处理数据数量
                                                  )

callbacks = [keras.callbacks.EarlyStopping(patience=5,min_delta=1e-2)]
estimator.fit(x_train_scaled,y_train,epoch=100,
                  validation_data=(x_valid_scaled,y_valid), 
                  callbacks= callbacks)

In [4]:
# reciprocal函数演示
# from scipy.stats import reciprocal
# # f(x) = 1/(x*log(b/a))     a<x<b
# reciprocal.rvs(1e-4, 1e-2, size =10)

array([0.00079598, 0.00014802, 0.00016905, 0.00495027, 0.00032098,
       0.00118896, 0.00122923, 0.00014384, 0.00020629, 0.00061938])

In [None]:
#最佳参数
print('最佳参数：\n',estimator.best_params_)
#最佳结果
print('最佳结果:\n',estimator.best_score_)
#最佳估计器
print('最佳估计器：\n',estimator.best_estimator_)
#交叉验证结果
# print('交叉验证结果:\n',estimator.cv_results_)

In [None]:
model = estimator.best_estimator_.model
model.evaluate(x_test_scaled, y_test)