<a href="https://colab.research.google.com/github/sungjin-kim-data/ML/blob/master/diabetes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

np.random.seed(42)

In [None]:
diabetes = load_diabetes()

In [None]:
diabetes["feature_names"]

당뇨병 데이터에서 사용되는 변수명 'feature_names'
 - age : 나이
 - sex : 성별
 - bmi : Body Mass index
 - bp : Average blood pressure
 - s1, s2, s3, s4, s5, s6 : 혈청에 대한 6가지 지표들

In [None]:
data, target = diabetes["data"], diabetes["target"]

In [None]:
data[0]

In [None]:
target[0]

In [None]:
df = pd.DataFrame(data, columns = diabetes["feature_names"])
df.head()

In [None]:
df.describe()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
print("train ratio : {:.2f}".format(len(X_train)/len(data)))
print("train ratio : {:.2f}".format(len(X_test)/len(data)))

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
multi = LinearRegression()
multi.fit(X_train, y_train)

In [None]:
multi.intercept_

In [None]:
multi.coef_

In [None]:
multi_train_pred = multi.predict(X_train)
multi_test_pred = multi.predict(X_test)

In [None]:
from sklearn.metrics import mean_squared_error

multi_train_mse = mean_squared_error(multi_train_pred, y_train)
multi_test_mse = mean_squared_error(multi_test_pred, y_test)

In [None]:
from sklearn.linear_model import Ridge

ridge = Ridge()
ridge.fit(X_train, y_train)

In [None]:
ridge.intercept_

In [None]:
ridge.coef_

In [None]:
multi.coef_

In [None]:
ridge_train_pred = ridge.predict(X_train)
ridge_test_pred = ridge.predict(X_test)

In [None]:
ridge_train_mse = mean_squared_error(ridge_train_pred, y_train)
ridge_test_mse = mean_squared_error(ridge_test_pred, y_test)

In [None]:
print(round(ridge_train_mae, 2))
print(round(ridge_test_mae, 2))

In [None]:
from sklearn.linear_model import Lasso

In [None]:
lasso = Lasso()
lasso.fit(X_train, y_train)

In [None]:
lasso.intercept_

In [None]:
ridge.intercept_

In [None]:
lasso.coef_

In [None]:
ridge.coef_

In [None]:
np.array(diabetes['feature_names'])[lasso.coef_ != 0]

In [None]:
lasso_train_pred = lasso.predict(X_train)
lasso_test_pred = lasso.predict(X_test)

In [None]:
lasso_train_mse = mean_squared_error(lasso_train_pred, y_train)
lasso_test_mse = mean_squared_error(lasso_test_pred, y_test)

In [None]:
print(round(lasso_train_mse, 2))
print(round(lasso_test_mse, 2))

In [None]:
print(round(ridge_train_mse, 2))
print(round(ridge_test_mse, 2))

In [None]:
print(f"Multi Regression Test MSE is {multi_test_mse}")
print(f"Ridge Regression Test MSE is {ridge_test_mse}")
print(f"Lasso Regression Test MSE is {lasso_test_mse}")

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 3, figsize=(15, 5))
preds = [
    ("Multi Regression", multi_test_pred),
    ("Ridge Regression", ridge_test_pred),
    ("Lasso Regression", lasso_test_pred)
]

for idx, (name, test_pred) in enumerate(preds):
  ax = axes[idx]
  ax.scatter(test_pred, y_test)
  ax.plot(np.linspace(0, 350, 100), np.linspace(0, 350, 100), c='r')
  ax.set_xlabel("Predict")
  ax.set_ylabel("Real")
  ax.set_title(name)