# Boston HousingをKerasでやってみた

In [1]:
import keras
from keras.datasets import boston_housing

import numpy as np

Using TensorFlow backend.


### データセットの読み込み

In [2]:
boston_housing = keras.datasets.boston_housing
 
(train_data, train_labels), (test_data, test_labels) = boston_housing.load_data()


In [3]:
print('train_data = ' + str(train_data.shape))
print('train_labels = ' + str(train_labels.shape))
print('train_data = ' + str(test_data.shape))
print('train_data = ' + str(test_labels.shape))

train_data = (404, 13)
train_labels = (404,)
train_data = (102, 13)
train_data = (102,)


### データフレームで中身を確認

In [4]:
import pandas as pd
 
column_names = ['犯罪率', '広い家の割合', '非小売業の割合', '川の隣か', 'NOx濃度', '平均部屋数', '古い家の割合', '主要施設への距離', '主要高速道路へのアクセス性',
                '固定資産税率', '生徒と先生の比率', '町ごとの黒人の割合', '低所得者人口の割合']
 
df = pd.DataFrame(train_data, columns=column_names)
df.head()

Unnamed: 0,犯罪率,広い家の割合,非小売業の割合,川の隣か,NOx濃度,平均部屋数,古い家の割合,主要施設への距離,主要高速道路へのアクセス性,固定資産税率,生徒と先生の比率,町ごとの黒人の割合,低所得者人口の割合
0,1.23247,0.0,8.14,0.0,0.538,6.142,91.7,3.9769,4.0,307.0,21.0,396.9,18.72
1,0.02177,82.5,2.03,0.0,0.415,7.61,15.7,6.27,2.0,348.0,14.7,395.38,3.11
2,4.89822,0.0,18.1,0.0,0.631,4.97,100.0,1.3325,24.0,666.0,20.2,375.52,3.26
3,0.03961,0.0,5.19,0.0,0.515,6.037,34.5,5.9853,5.0,224.0,20.2,396.9,8.01
4,3.69311,0.0,18.1,0.0,0.713,6.376,88.4,2.5671,24.0,666.0,20.2,391.43,14.65


### 特徴を正規化する（特徴の平均を減算して標準偏差で除算）

In [5]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)
train_data = (train_data - mean) / std
test_data = (test_data - mean) / std

### モデルを作成する

In [6]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(train_data.shape[1],)))
model.add(Dense(64, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', 
              loss='mse', 
              metrics=['mae'])


In [7]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                896       
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 65        
Total params: 5,121
Trainable params: 5,121
Non-trainable params: 0
_________________________________________________________________


### 学習する

In [8]:
history = model.fit(train_data, 
                    train_labels,
                    batch_size=1,
                    epochs=50,
                    verbose=1,
                    validation_data=(test_data, test_labels))

Train on 404 samples, validate on 102 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### 誤差を調査する

In [9]:
score = model.evaluate(test_data, test_labels, verbose=0)
print()
print("Testing set Mean Abs Error: ${:7.2f}".format( score[1] * 1000))


Testing set Mean Abs Error: $2494.66


### テストデータの中身

In [10]:
myArray = np.array(test_labels * 1000)
myArray = myArray.astype(int)
myArray

array([ 7200, 18800, 19000, 27000, 22200, 24500, 31200, 22900, 20500,
       23200, 18600, 14500, 17800, 50000, 20800, 24300, 24200, 19800,
       19100, 22700, 12000, 10200, 20000, 18500, 20900, 23000, 27500,
       30100,  9500, 22000, 21200, 14100, 33100, 23400, 20100,  7400,
       15400, 23800, 20100, 24500, 33000, 28400, 14100, 46700, 32500,
       29600, 28400, 19800, 20200, 25000, 35400, 20300,  9700, 14500,
       34900, 26600,  7200, 50000, 32400, 21600, 29800, 13100, 27500,
       21200, 23100, 21900, 13000, 23200,  8100,  5600, 21700, 29600,
       19600,  7000, 26400, 18900, 20900, 28100, 35400, 10200, 24300,
       43100, 17600, 15400, 16200, 27100, 21400, 21500, 22400, 25000,
       16600, 18600, 22000, 42800, 35100, 21500, 36000, 21900, 24100,
       50000, 26700, 25000])

### テストデータで予測する

In [11]:
test_predictions = model.predict(test_data).flatten() 

In [12]:
myArray = np.array(test_predictions * 1000)
myArray = myArray.astype(int)
myArray

array([ 6945, 17991, 21353, 33486, 24141, 21840, 27688, 21395, 18655,
       18448, 21992, 16100, 14709, 42943, 16936, 19782, 25132, 18713,
       16970, 23542, 10393, 12634, 21656, 15469, 18606, 21837, 28256,
       28418,  9583, 19302, 18573, 13355, 31716, 23377, 17647,  6534,
       15586, 17207, 17462, 26126, 30220, 27492, 12068, 44834, 27030,
       27220, 28059, 18138, 20326, 22274, 35547, 20934,  9024, 13200,
       34358, 27454, 10941, 48286, 31883, 22942, 19866, 13959, 12405,
       19132, 22892, 20357, 11564, 21774, 12320,  6803, 21688, 29336,
       25913, 11850, 25969, 18552, 18940, 24000, 36310,  8848, 21474,
       36651, 16332, 11413, 16786, 16731, 22731, 20980, 21531, 24313,
       20124, 18870, 26199, 44783, 36515, 20075, 34147, 34010, 24948,
       42965, 29457, 18305])

In [41]:
model.save('boston_housing2_model')