In [1]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import sys
import time
import sklearn
import tensorflow as tf
from tensorflow import keras
import pandas as pd

print(tf.__version__)
print(sys.version_info)
for module in mpl, np, pd,  sklearn, tf, keras:
    print(module.__name__, module.__version__)

2.1.0
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)
matplotlib 3.1.3
numpy 1.18.1
pandas 1.0.1
sklearn 0.21.0
tensorflow 2.1.0
tensorflow_core.python.keras.api._v2.keras 2.2.4-tf


In [2]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
print(housing.DESCR)
print(housing.data.shape)
print(housing.target.shape)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

In [3]:
from sklearn.model_selection import train_test_split

x_train_all, x_test, y_train_all, y_test = train_test_split(housing.data, housing.target, random_state =7)
x_train, x_valid, y_train, y_valid = train_test_split(x_train_all, y_train_all, random_state =11)

print(x_valid.shape, y_valid.shape)
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(3870, 8) (3870,)
(11610, 8) (11610,)
(5160, 8) (5160,)


In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)

In [6]:
# metric 使用
metric = keras.metrics.MeanSquaredError()
print(metric([5.], [2.]))
print(metric([1.], [2.]))
print(metric.result())

metric.reset_states()
metric([1.], [3.])
print(metric.result())

tf.Tensor(9.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(5.0, shape=(), dtype=float32)
tf.Tensor(4.0, shape=(), dtype=float32)


In [9]:
# 1. batch 遍历训练集 metric
#      1.1 自动求导
# 2. epoch结束 验证集 metric
epochs = 100
batch_size = 32
steps_per_epoch = len(x_train_scaled) // batch_size
optimizer = keras.optimizers.SGD()
metric = keras.metrics.MeanSquaredError()

def random_batch(x, y, batch_size = 32):
    idx = np.random.randint(0, len(x), size = batch_size)
    return x[idx], y[idx]

model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu',
                       input_shape = x_train.shape[1:]),
    keras.layers.Dense(1),
])

for epoch in range(epochs):
    metric.reset_states()
    for step in range(steps_per_epoch):
        x_batch, y_batch = random_batch(x_train_scaled, y_train, batch_size)
        with tf.GradientTape() as tape:
            y_pred = model(x_batch)
            loss = tf.reduce_mean(keras.losses.mean_squared_error(y_batch, y_pred))
            metric(y_batch, y_pred)
        grads = tape.gradient(loss, model.variables)
        grads_and_vars = zip(grads, model.variables)
        optimizer.apply_gradients(grads_and_vars)
        print('\rEpoch', epoch, 'train_mse: ',  metric.result().numpy(), end = '')
    y_valid_pred = model(x_valid_scaled)
    valid_loss = tf.reduce_mean(keras.losses.mean_squared_error(y_valid, y_valid_pred))
    print('\t', 'valid_mse: ', valid_loss.numpy())



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch 0 train_mse:  1.8239747	 valid_mse:  2.2994666
Epoch 1 train_mse:  3.14710281 train_mse:  1.5223792	 valid_mse:  2.0310862
Epoch 2 train_mse:  5.0915694	 valid_mse:  1.7280037
Epoch 3 train_mse:  1.4735624	 valid_mse:  1.4075376
Epoch 4 train_mse:  1.267382	 valid_mse:  1.3952736
Epoch 5 train_mse:  1.2417814	 valid_mse:  1.3898062
Epoch 6 train_mse:  1.2771201	 valid_mse:  1.39172
Epoch 7 train_mse:  1.2508684	 valid_mse:  1.3902473
Epoch 8 train_mse:  1.2418983	 valid_mse:  1.3904111
Epoch 9 train_mse:  1.2578303	 valid_mse:  1.3868707
Epoch 10 train_mse:  1.250557	 valid_mse:  1.3888435
Epoch 11 train_mse:  1.2542729	 valid_mse:  1.3886325
Epoch 12 train_mse:  1.2667555	 valid_mse:

In [10]:
model.variables

[<tf.Variable 'dense_4/kernel:0' shape=(8, 30) dtype=float32, numpy=
 array([[-3.00352693e-01, -4.87425923e-02,  6.64812746e-03,
          8.87374505e-02, -2.78891355e-01,  7.11354660e-03,
          8.11677203e-02,  4.55152104e-03,  2.34965593e-01,
          2.70316780e-01,  3.92019795e-03,  2.44341940e-02,
          4.65687327e-02, -1.27483591e-01, -1.51236534e-01,
          5.93427457e-02,  2.22092569e-02,  2.24588085e-02,
         -2.11796209e-01,  7.08153099e-02,  1.19280450e-01,
         -8.60700756e-02, -1.88025445e-01,  1.97396412e-01,
          3.59093606e-01,  2.05393568e-01,  3.63285281e-02,
         -2.80048370e-01, -5.55911846e-03,  1.52606249e-01],
        [-1.18966311e-01, -4.08890918e-02,  5.68286479e-02,
         -9.79080796e-02, -1.78452045e-01, -1.17971689e-01,
          1.88890714e-02, -1.51860074e-03, -2.44096175e-01,
          2.60230213e-01, -1.06924577e-02,  1.28615886e-01,
         -4.09584790e-02, -5.67833474e-03,  3.09483379e-01,
          1.80083755e-02, -3.7

In [None]:
history = model.fit(x_train_scaled, y_train,
                    epochs = 100,
                    validation_data = [x_valid_scaled, y_valid],
                    callbacks = callbacks)

In [None]:
def plot_learning_curves(history):
    pd.DataFrame(history.history).plot(figsize = (8, 5))
    plt.grid(True)
    plt.gca().set_ylim(0, 1)
    plt.show()
    
plot_learning_curves(history)