#### 나이브 베이즈 Regression
- 알고리즘 : BayesianRidge
- data : 주택가격(house_price.csv)

In [1]:
# X,y 나누기
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data2 = pd.read_csv('house_price.csv')
X = data2[data2.columns[1:5]]
y = data2[['house_value']]

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

In [4]:
# 모델적용 - train data 
from sklearn.linear_model import BayesianRidge
model = BayesianRidge()
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)

0.5455724466331763

In [5]:
# test data
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

0.5626859871488648

In [6]:
# RMSE 오차 확인
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train = mean_squared_error(y_train, pred_train)
MSE_test = mean_squared_error(y_test, pred_test)
print("train data RMSE", np.sqrt(MSE_train))
print("test data RMSE", np.sqrt(MSE_test))

train data RMSE 64340.34302948542
test data RMSE 63220.68115643447


In [7]:
# Grid Search
param_grid = {'alpha_1': [1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1, 2, 3, 4],
             'lambda_1': [1e-06, 1e-05, 1e-04, 1e-03, 1e-02, 1e-01, 1, 2, 3, 4]}
from sklearn.model_selection import GridSearchCV
grid_search = GridSearchCV(BayesianRidge(), param_grid, cv=5)
grid_search.fit(X_scaled_train, y_train)

GridSearchCV(cv=5, estimator=BayesianRidge(),
             param_grid={'alpha_1': [1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1,
                                     2, 3, 4],
                         'lambda_1': [1e-06, 1e-05, 0.0001, 0.001, 0.01, 0.1, 1,
                                      2, 3, 4]})

In [8]:
print("Best Parameter: {}".format(grid_search.best_params_))
print("Best Score: {:.4f}".format(grid_search.best_score_))
print("TestSet Score: {:.4f}".format(grid_search.score(X_scaled_test, y_test)))

Best Parameter: {'alpha_1': 4, 'lambda_1': 1e-06}
Best Score: 0.5452
TestSet Score: 0.5627


In [9]:
# Random Search 
from scipy.stats import randint
param_distribs = {'alpha_1': randint(low=1e-06, high=10),
                 'lambda_1': randint(low=1e-06, high=10)}
from sklearn.model_selection import RandomizedSearchCV
random_search = RandomizedSearchCV(BayesianRidge(),
                                  param_distributions = param_distribs,
                                  n_iter=50, cv=5)
random_search.fit(X_scaled_train, y_train)

RandomizedSearchCV(cv=5, estimator=BayesianRidge(), n_iter=50,
                   param_distributions={'alpha_1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f9f39e951d0>,
                                        'lambda_1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f9f39e95250>})

In [10]:
print("Best Parameter: {}".format(random_search.best_params_))
print("Best Score: {:.4f}".format(random_search.best_score_))
print("TestSet Score: {:.4f}".format(random_search.score(X_scaled_test, y_test)))

Best Parameter: {'alpha_1': 8, 'lambda_1': 0}
Best Score: 0.5452
TestSet Score: 0.5627
