# Machine Learning: Regression Basic
---
- 모델에 집중하기 위해, 전처리가 필요없는 데이터셋으로 진행하겠습니다!
- https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html

## 1. 데이터 로딩

In [None]:
from sklearn.datasets import load_diabetes
data = load_diabetes()

In [None]:
data

In [None]:
X = data["data"]
print(X.shape)
print(X)

In [None]:
y = data["target"]
print(y.shape)
print(y)

## 2. 훈련/테스트 데이터셋 분리

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from sklearn.linear_model import LinearRegression

## 3. 교차검중

<img src="https://scikit-learn.org/stable/_images/grid_search_cross_validation.png"/>

In [None]:
from sklearn.model_selection import cross_val_score
reg = LinearRegression()
#neg_root_mean_squared_error인 이유는?
scores = cross_val_score(reg, X_train, y_train, cv=5,scoring='neg_root_mean_squared_error')

In [None]:
scores

## 4. 예측

In [None]:
from sklearn.metrics import mean_squared_error
reg = LinearRegression().fit(X_train, y_train)
y_pred = reg.predict(X_test)
mean_squared_error(y_test, y_pred)

import numpy as np
np.sqrt(mean_squared_error(y_test, y_pred))


## 5. 여러 모델 비교해보기

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor

In [None]:
regs = [
    RandomForestRegressor(max_depth=4),
    SVR(kernel='rbf', C=10, gamma='scale'),
    LinearRegression(),
    DecisionTreeRegressor(max_depth=4),
    MLPRegressor(max_iter=5000)
]

best_regressor = None
import math
best_score = math.inf
for reg in regs:
    scores = cross_val_score(reg, X_train, y_train, cv=5,scoring='neg_root_mean_squared_error')
    scores = scores*-1
    print("="*20)
    print(reg.__class__.__name__)
    print(scores)
    avg_score = sum(scores)/len(scores)
    print(avg_score)
    print("="*20)
    if best_score > avg_score:
        best_score = avg_score
        best_regressor = reg

In [None]:
print(best_regressor.__class__.__name__)
best_regressor.fit(X_train, y_train)
y_pred = best_regressor.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))
