# 회귀 - 보스톤 집값 예측

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.models import Sequential
#층을 담는 그릇(Sequential은 keras에 포함되어 있는데
# 딥러닝 모델을 한층 한층 쌓기 쉽게 해주는 함수)
from tensorflow.keras.layers import Dense
#층을 만드는 것(각 층이 가질 특성을 각각 다르게 지정 가능) 
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping  # 모델 최고값 저장
import warnings
warnings.filterwarnings("ignore")

In [3]:
from sklearn.datasets import load_boston
boston = load_boston()

In [4]:
# seed값 생성
seed = 2021
np.random.seed(seed)
tf.random.set_seed(seed)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    boston.data, boston.target, 
    test_size=0.1, random_state=seed
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((455, 13), (51, 13), (455,), (51,))

# 모델 정의/설정/학습

In [6]:
model = Sequential([
    Dense(30, input_dim=13, activation='relu'),
    Dense(6, activation='relu'),
    Dense(1)
])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                420       
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 186       
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 7         
Total params: 613
Trainable params: 613
Non-trainable params: 0
_________________________________________________________________


In [8]:
# 모델 컴파일
model.compile(
    loss = 'mean_squared_error',    # 선형 회귀니깐
    optimizer ='adam'
)

In [9]:
history = model.fit(X_train, y_train, epochs=500, batch_size=40, verbose=0)

# 평가

In [10]:
pred = model.predict(X_test)
pred.shape

(51, 1)

In [11]:
y_test.shape

(51,)

In [12]:
pred = pred.flatten()   # 2D -> 1D
pred.shape

(51,)

# - 예측값과 실제값의 비교

In [14]:
df = pd.DataFrame({
    '실제가격':y_test, '예측가격':pred
})
df.head(10)

Unnamed: 0,실제가격,예측가격
0,21.7,17.551901
1,15.6,16.504126
2,20.0,19.020443
3,12.8,13.016994
4,50.0,44.522835
5,20.6,15.77732
6,22.6,23.189152
7,24.1,25.792273
8,24.4,19.575884
9,36.1,33.336262
