In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
import random

#시드 고정
SEED = 42
random.seed = SEED
np.random.seed = SEED

#1 sklearn diabetes 자료 로드
diabetes = load_diabetes()
X_data = diabetes.data
y_data = diabetes.target

In [2]:
#2. train_test_split()

X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size = 0.2, random_state = SEED)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(353, 10) (353,)
(89, 10) (89,)


In [3]:
from sklearn.linear_model import LinearRegression

#3. sklearn LinearRegression
lr = LinearRegression()

lr.fit(X_train, y_train)

pred_train_sklearn = lr.predict(X_train)
score_train_sklearn = r2_score(y_train, pred_train_sklearn)
pred_test_sklearn = lr.predict(X_test)
score_test_sklearn = r2_score(y_test, pred_test_sklearn)

print(f"Linear Regression (sklearn) r2_score for Train Data : {score_train_sklearn}")
print(f"Linear Regression (sklearn) r2_score for Test Data : {score_test_sklearn}")

Linear Regression (sklearn) r2_score for Train Data : 0.5279193863361498
Linear Regression (sklearn) r2_score for Test Data : 0.4526027629719195


In [4]:
#hypterparameter
learning_rate = 0.005
epoch = 1000

#random weight(10,1)
weight = np.random.rand(10,1)
bias = 10

In [5]:
def predict (X,W,b):
    return X.dot(W)+b

def get_gradient(X, error) :
    return X.T.dot(error), np.sum(error)

def get_error(y_pred, y_true) :
    return y_pred - y_true.reshape(353,1)

def update(W, b, gradient, learning_rate) :
    W = W - learning_rate * gradient[0]
    b = b - learning_rate * gradient[1]
    return W, b

In [6]:
#4. train / 1000epochs
print("\n <<Train>>\n")
for i in range(epoch) :
    pred = predict(X_train, weight, bias)
    error = get_error(pred, y_train)
    r2_my = r2_score(y_train, pred)
    gradient = get_gradient(X_train, error)
    weight, bias = update(weight, bias , gradient, learning_rate)
    if (i+1) % 50 == 0 :
        print(f"r2_score at epoch {i} : {r2_my:.4f}")
print("\n <<Train Done>>\n")


 <<Train>>

r2_score at epoch 49 : 0.3183
r2_score at epoch 99 : 0.4184
r2_score at epoch 149 : 0.4602
r2_score at epoch 199 : 0.4826
r2_score at epoch 249 : 0.4962
r2_score at epoch 299 : 0.5049
r2_score at epoch 349 : 0.5107
r2_score at epoch 399 : 0.5145
r2_score at epoch 449 : 0.5170
r2_score at epoch 499 : 0.5188
r2_score at epoch 549 : 0.5200
r2_score at epoch 599 : 0.5209
r2_score at epoch 649 : 0.5215
r2_score at epoch 699 : 0.5219
r2_score at epoch 749 : 0.5222
r2_score at epoch 799 : 0.5224
r2_score at epoch 849 : 0.5226
r2_score at epoch 899 : 0.5227
r2_score at epoch 949 : 0.5228
r2_score at epoch 999 : 0.5229

 <<Train Done>>



In [7]:
print(weight)

[[  39.60921493]
 [-234.00730958]
 [ 546.50789945]
 [ 339.12614624]
 [ -93.55388394]
 [-128.40474806]
 [-217.28467092]
 [ 147.48851074]
 [ 406.47923626]
 [  77.68739712]]


In [8]:
print(bias)

151.30686582816992


In [9]:
#4. Test / my model
pred_my = predict(X_test, weight, bias)
r2_test_my = r2_score(y_test, pred_my)


print(f"Linear Regression (my) MSE for Test Data : {r2_test_my}")

Linear Regression (my) MSE for Test Data : 0.4567291872463044


In [10]:
#5. 비교

print(f"Linear Regression (my) r2_score for Test Data : {r2_test_my}")
print(f"Linear Regression (sklearn) r2_score for Test Data : {score_test_sklearn}")

Linear Regression (my) r2_score for Test Data : 0.4567291872463044
Linear Regression (sklearn) r2_score for Test Data : 0.4526027629719195
