In [3]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_log_error

def evaluate_reg_all(y_test, y_predict):
    MSE = mean_squared_error(y_test,y_predict,squared=True)
    RMSE = mean_squared_error(y_test,y_predict,squared=False)
    MAE = mean_absolute_error(y_test,y_predict)
    R2 = r2_score(y_test,y_predict)
    
    print(f'MSE: {MSE:.3f}, RMSE: {RMSE:.3F}, MAE: {MAE:.3F}, R^2: {R2:.3F}')

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')

##########데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

##########데이터 분석

##########데이터 전처리

x_data = fish_df.drop(['무게'], axis=1)
y_data = fish_df['무게']

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3)

##########모델 생성

model = LinearRegression()
# (random_state=111, min_samples_leaf=2, max_depth=3) 

##########모델 학습

model.fit(x_train, y_train)

##########모델 검증
print('R2_train',model.score(x_train, y_train)) 
print('R2_test',model.score(x_test, y_test)) 

y_predict = model.predict(x_test)
evaluate_reg_all(y_test, y_predict) 

###다양한 평가지표 출력
print(model.score(x_train, y_train))
print(model.score(x_test, y_test))

##########모델 예측

x_real = np.array([
    [50]
])

y_real_predict = model.predict(x_real)

print(f'실제 예측값 : {y_real_predict[0]}')


R2_train 0.9238801082291322
R2_test 0.9067803270600862
MSE: 11198.830, RMSE: 105.825, MAE: 90.407, R^2: 0.907
0.9238801082291322
0.9067803270600862
실제 예측값 : 1206.2911549216315


In [7]:
from sklearn.preprocessing import PolynomialFeatures


# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')


# 데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

x_data = fish_df.drop(['무게'], axis=1)
y_data = fish_df['무게']

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.3, random_state=777)


# degree = 2 인 2차 다항식으로 변환하기 위해 PolynomialFeatures를 이용하여 변환
poly = PolynomialFeatures(degree=2, include_bias=False)
poly.fit(x_train)
poly_train = poly.transform(x_train)
poly_test = poly.transform(x_test)


model = LinearRegression()
model.fit(poly_train, y_train) #학습

y_preds = model.predict(poly_test) #예측

# 평가
print(lr.score(poly_train, y_train))
print(lr.score(poly_test, y_test))
evaluate_reg_all(y_test, y_preds) 


0.9740693759612158
0.9608860584752101
MSE: 3850.020, RMSE: 62.049, MAE: 38.157, R^2: 0.961


In [8]:
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.model_selection import train_test_split

# 경고메세지 끄기
import warnings
warnings.filterwarnings(action='ignore')


##########데이터 로드
fish_df = pd.read_csv('./data/fish.csv')

##########데이터 전처리
X_data = fish_df.drop(['무게'], axis=1)
y_data = fish_df['무게']

x_train, x_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3, random_state=45)


##########모델 생성

# max_depth, min_samples_leaf : hyperparameter -> 머신러닝, 딥러닝에서 학습시 개발자가 학습 정도 조절할 때 사용

# model = DecisionTreeClassifier(random_state=156,max_depth=3) # 트리성장 깊이 조절
# model = DecisionTreeClassifier(random_state=156,min_samples_leaf=5) # 리프노드의 최소 샘플 수
model = DecisionTreeRegressor(max_depth=6) 
# random_state=111, min_samples_leaf=2, max_depth=3

##########모델 학습
model.fit(x_train , y_train)


##########모델 평가
print('R2_train',model.score(x_train, y_train)) 
print('R2_test',model.score(x_test, y_test)) 

y_predict = model.predict(x_test)
evaluate_reg_all(y_test, y_predict) 


##########모델 예측
x_real = np.array([
    [50]
])

y_real_predict = model.predict(x_real)

print(f'예측값: {y_real_predict[0]}') 

R2_train 0.9917767483968946
R2_test 0.982999978932456
MSE: 2232.821, RMSE: 47.253, MAE: 29.976, R^2: 0.983
예측값: 1000.0
