In [275]:
# 라이브러리 및 데이터 준비
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate

import warnings
warnings.filterwarnings('ignore')

# 데이터 준비
data = pd.read_csv('bodyfat.csv', encoding='euc-kr')
data['Weight'] = data['Weight'] * 0.45
data['Height'] = data['Height'] * 2.54

x_data = data[['Density','Age','Weight','Height','Neck','Chest','Abdomen','Hip','Thigh','Knee','Ankle','Forearm','Wrist']].to_numpy()

y_data = data[['BodyFat']].to_numpy()

In [276]:
# 데이터 나누기
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(x_data, y_data, random_state=42)

In [277]:
# 선형회귀 모델
from sklearn.linear_model import LinearRegression
mlr = LinearRegression()
mlr.fit(train_input, train_target) 

In [278]:
# 과소적합
print("훈련세트 r2 : ", mlr.score(train_input, train_target))
print("테스트세트 r2 : ", mlr.score(test_input, test_target))

훈련세트 r2 :  0.9737499247092697
테스트세트 r2 :  0.9923531696557769


In [279]:
from sklearn.metrics import r2_score
y_pred = mlr.predict(test_input)
r2 = r2_score(test_target, y_pred)
print('선형회귀 r2 : ', r2)

선형회귀 r2 :  0.9923531696557769


In [280]:
from sklearn.metrics import mean_squared_error 

rmse=np.sqrt(mean_squared_error(test_target, y_pred))
print('선형회귀 RMSE : ', rmse)

선형회귀 RMSE :  0.6472300198528246


In [281]:
# 예측
print(mlr.predict([[1.0853,22,77,183,38.5,93.6,83.0,98.7,58.7,37.3,23.4,28.9,18.2]]))

[[6.27599221]]


In [282]:
# 과대적합
# 랜덤포레스트
rf = RandomForestRegressor(n_jobs=-1, random_state=42)

# 교차 검증
scores = cross_validate(rf, train_input, train_target, return_train_score=True, n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9947433724903003 0.9587707154309694


In [283]:
# 훈련 및 특성중요도
rf.fit(train_input, train_target)
print(rf.feature_importances_)

[9.79105545e-01 9.01730170e-04 3.12211053e-03 2.48585410e-03
 1.16683568e-03 2.78077131e-03 3.53292716e-03 7.61396679e-04
 9.84959806e-04 2.12039245e-03 4.56424428e-04 9.84762644e-04
 1.59628957e-03]


In [284]:
y_pred = rf.predict(test_input)
r2 = r2_score(test_target, y_pred)
print('랜덤포레스트 r2 : ', r2)

랜덤포레스트 r2 :  0.9980805400058889


In [285]:
rmse=np.sqrt(mean_squared_error(test_target, y_pred))
print('랜덤포레스트 RMSE : ', rmse)

랜덤포레스트 RMSE :  0.3242705121952421


In [286]:
# 예측
print(rf.predict([[1.0853,22,77,183,38.5,93.6,83.0,98.7,58.7,37.3,23.4,28.9,18.2]]))

[6.13]
