In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
import random

#시드 고정
SEED = 42
random.seed = SEED
np.random.seed = SEED

#1 sklearn diabetes 자료 로드
diabetes = load_diabetes()
X_data = diabetes.data
y_data = diabetes.target

In [2]:
#2. train_test_split()

X_train, X_test, y_train, y_test = train_test_split(diabetes.data, diabetes.target, test_size = 0.2, random_state = SEED)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(353, 10) (353,)
(89, 10) (89,)


In [3]:
from sklearn.linear_model import LinearRegression

#3. sklearn LinearRegression
lr = LinearRegression()

lr.fit(X_train, y_train)

pred_train_sklearn = lr.predict(X_train)
score_train_sklearn = r2_score(y_train, pred_train_sklearn)
pred_test_sklearn = lr.predict(X_test)
score_test_sklearn = r2_score(y_test, pred_test_sklearn)

print(f"Linear Regression (sklearn) r2_score for Train Data : {score_train_sklearn}")
print(f"Linear Regression (sklearn) r2_score for Test Data : {score_test_sklearn}")

Linear Regression (sklearn) MSE for Train Data : 2868.549702835577
Linear Regression (sklearn) MSE for Test Data : 2900.193628493482


In [4]:
#hypterparameter
learning_rate = 0.0001
epoch = 1000

#random weight(10,1)
weight = np.random.rand(10,1)
bias = 10

In [5]:
def predict (X,W,b):
    return X.dot(W)+b

def get_gradient(X, error) :
    return X.T.dot(error), np.sum(error)

def get_error(y_pred, y_true) :
    return y_pred - y_true.reshape(353,1)

def update(W, b, gradient, learning_rate) :
    W = W - learning_rate * gradient[0]
    b = b - learning_rate * gradient[1]
    return W, b

In [6]:
#4. train / 1000epochs
print("\n <<Train>>\n")
for i in range(epoch) :
    pred = predict(X_train, weight, bias)
    error = get_error(pred, y_train)
    r2_my = r2_score(y_train, pred)
    gradient = get_gradient(X_train, error)
    weight, bias = update(weight, bias , gradient, learning_rate)
    if (i+1) % 50 == 0 :
        print(f"r2_score at epoch {i} : {r2_my:.4f}")
print("\n <<Train Done>>\n")


 <<Train>>

MSE score at epoch 49 : 6605.8019
MSE score at epoch 99 : 5944.7254
MSE score at epoch 149 : 5862.8022
MSE score at epoch 199 : 5798.5493
MSE score at epoch 249 : 5736.4779
MSE score at epoch 299 : 5676.1114
MSE score at epoch 349 : 5617.3873
MSE score at epoch 399 : 5560.2562
MSE score at epoch 449 : 5504.6705
MSE score at epoch 499 : 5450.5840
MSE score at epoch 549 : 5397.9519
MSE score at epoch 599 : 5346.7309
MSE score at epoch 649 : 5296.8790
MSE score at epoch 699 : 5248.3554
MSE score at epoch 749 : 5201.1207
MSE score at epoch 799 : 5155.1367
MSE score at epoch 849 : 5110.3661
MSE score at epoch 899 : 5066.7732
MSE score at epoch 949 : 5024.3231
MSE score at epoch 999 : 4982.9819

 <<Train Done>>



In [7]:
print(weight)

[[ 21.82122257]
 [ -1.12407388]
 [ 71.42475227]
 [ 53.76850457]
 [ 20.15317367]
 [ 14.44784677]
 [-43.01373913]
 [ 47.25775925]
 [ 64.67015061]
 [ 44.83547706]]


In [8]:
print(bias)

153.32740930713132


In [9]:
#4. Test / my model
pred_my = predict(X_test, weight, bias)
r2_test_my = r2_score(y_test, pred_my)


print(f"Linear Regression (my) MSE for Test Data : {r2_test_my}")

Linear Regression (my) MSE for Test Data : 4346.829578284101


In [11]:
#5. 비교

print(f"Linear Regression (my) r2_score for Test Data : {r2_test_my}")
print(f"Linear Regression (sklearn) r2_score for Test Data : {score_test_sklearn}")

Linear Regression (my) MSE for Test Data : 4346.829578284101
Linear Regression (sklearn) MSE for Test Data : 2900.193628493482
sklearn win!
