In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
% matplotlib inline

In [2]:
# Load the diabetes dataset
diabetes = datasets.load_diabetes()

In [3]:
diabetes.data.shape

(442, 10)

In [4]:
diabetes.target.shape

(442,)

In [5]:
# Use only one feature
diabetes_X = diabetes.data

In [6]:
test_set_size = int(0.3 * diabetes_X.shape[0])

In [7]:
test_set_size

132

In [8]:
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-test_set_size]
diabetes_X_test = diabetes_X[-test_set_size:]

In [9]:
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-test_set_size]
diabetes_y_test = diabetes.target[-test_set_size:]

### multiple regression

<pre>
y = X B
where:
    y = output (dependent)
    X = input (independent)
    B = beeta values
    
Y_hat = B' * X
error = Y_hat - y
B = B + learning_rate * error * X
</pre>

In [10]:
X = np.ones((diabetes_X_train.shape[0], diabetes_X_train.shape[1]+1))
X[:,1:] = diabetes_X_train

In [11]:
X.shape

(310, 11)

In [12]:
NO_OF_EPOCHS = 10000
coefs = [0.0 for i in range(X.shape[1])]
LEARNING_RATE = 0.001

for epoch in range(NO_OF_EPOCHS):
    epoch_error = 0
    for i in range(X.shape[0]):
        row = X[i,:]
        y = diabetes_y_train[i]
        y_hat = np.dot(row, coefs)
        error = y - y_hat
        coefs = coefs + np.dot(LEARNING_RATE * error, row)
        epoch_error += error** 2
    if epoch % 1000 == 0:
        print(epoch, 'coefs:', ', '.join(['%.2f'%(c) for c in coefs]), 'error:%.2f' %(epoch_error/X.shape[0]))

0 coefs: 39.98, 0.13, 0.02, 0.56, 0.42, 0.16, 0.14, -0.36, 0.36, 0.53, 0.34 error:22666.61
1000 coefs: 152.19, 37.92, -86.10, 323.58, 204.60, 25.36, -17.65, -169.67, 145.08, 298.22, 149.97 error:3287.39
2000 coefs: 152.61, 18.82, -174.93, 432.33, 252.36, -12.24, -85.29, -193.25, 140.78, 390.40, 153.39 error:3033.77
3000 coefs: 152.78, 7.13, -224.10, 485.34, 270.82, -31.12, -120.85, -197.97, 134.75, 437.45, 144.41 error:2968.88
4000 coefs: 152.86, 1.32, -249.22, 513.14, 278.38, -39.78, -138.26, -198.05, 132.19, 464.23, 135.20 error:2950.94
5000 coefs: 152.90, -1.41, -261.47, 528.10, 281.15, -44.11, -146.84, -196.65, 131.53, 480.19, 127.63 error:2945.62
6000 coefs: 152.92, -2.63, -267.08, 536.33, 281.82, -46.67, -151.08, -194.78, 131.78, 490.11, 121.89 error:2943.88
7000 coefs: 152.93, -3.12, -269.37, 540.98, 281.64, -48.52, -153.11, -192.79, 132.44, 496.55, 117.70 error:2943.22
8000 coefs: 152.94, -3.28, -270.07, 543.69, 281.15, -50.11, -153.96, -190.85, 133.28, 500.91, 114.70 error:294

In [13]:
X_test = np.ones((diabetes_X_test.shape[0], diabetes_X_test.shape[1]+1))
X_test[:,1:] = diabetes_X_test

In [14]:
y_pred = np.matmul(X_test, coefs)

In [15]:
# The coefficients
print('Coefficients:', coefs[1:])
# The intercept 
print('Intercept:', coefs[0])
# The mean squared error
print("Mean squared error: %.4f"
      % mean_squared_error(diabetes_y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.4f' % r2_score(diabetes_y_test, y_pred))

Coefficients: [  -3.2709719  -269.75783061  546.31909144  280.09496185  -53.1351474
 -153.97261265 -187.2469581   135.14928933  506.24658244  111.09683613]
Intercept: 152.94377997888603
Mean squared error: 2788.5432
Variance score: 0.5090


## Validate our model with the sklearn package.

In [16]:
# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)



In [17]:
# The coefficients
print('Coefficients:', regr.coef_)
# The intercept 
print('Intercept:', regr.intercept_)
# The mean squared error
print("Mean squared error: %.4f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.4f' % r2_score(diabetes_y_test, diabetes_y_pred))

Coefficients: [  -4.06035767 -266.98169237  547.20535959  279.52632656 -394.10840711
  115.92175016  -28.24786192  182.52226293  627.99560396  106.18549789]
Intercept: 152.6883674111717
Mean squared error: 2742.7850
Variance score: 0.5171
