In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
% matplotlib inline

In [2]:
# Load the diabetes dataset
diabetes = datasets.load_diabetes()

In [3]:
diabetes.data.shape

(442, 10)

In [4]:
diabetes.target.shape

(442,)

In [5]:
# Use only one feature
diabetes_X = diabetes.data

In [6]:
test_set_size = int(0.3 * diabetes_X.shape[0])

In [7]:
test_set_size

132

In [8]:
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-test_set_size]
diabetes_X_test = diabetes_X[-test_set_size:]

In [9]:
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-test_set_size]
diabetes_y_test = diabetes.target[-test_set_size:]

### multiple regression

<pre>
y = X B
where:
    y = output (dependent)
    X = input (independent)
    B = beeta values
    
X' * y = X'XB
(X'X)^-1 X' y = (X'X)^-1 X'X B
(X'X)^-1 X'y = B
</pre>

In [10]:
X = np.ones((diabetes_X_train.shape[0], diabetes_X_train.shape[1]+1))
X[:,1:] = diabetes_X_train

In [11]:
X.shape

(310, 11)

In [12]:
X_transpose = np.matrix.transpose(X)

In [13]:
B = np.matmul(np.matmul(np.linalg.inv(np.matmul(X_transpose, X)), X_transpose), diabetes_y_train)

In [14]:
X_test = np.ones((diabetes_X_test.shape[0], diabetes_X_test.shape[1]+1))
X_test[:,1:] = diabetes_X_test

In [15]:
y_pred = np.matmul(X_test, B)

In [16]:
# The coefficients
print('Coefficients:', B[1:])
# The intercept 
print('Intercept:', B[0])
# The mean squared error
print("Mean squared error: %.4f"
      % mean_squared_error(diabetes_y_test, y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.4f' % r2_score(diabetes_y_test, y_pred))

Coefficients: [  -4.06035767 -266.98169237  547.20535959  279.52632656 -394.10840711
  115.92175016  -28.24786192  182.52226293  627.99560396  106.18549789]
Intercept: 152.6883674111716
Mean squared error: 2742.7850
Variance score: 0.5171


## Validate our model with the sklearn package.

In [17]:
# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)



In [18]:
# The coefficients
print('Coefficients:', regr.coef_)
# The intercept 
print('Intercept:', regr.intercept_)
# The mean squared error
print("Mean squared error: %.4f"
      % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.4f' % r2_score(diabetes_y_test, diabetes_y_pred))

Coefficients: [  -4.06035767 -266.98169237  547.20535959  279.52632656 -394.10840711
  115.92175016  -28.24786192  182.52226293  627.99560396  106.18549789]
Intercept: 152.6883674111717
Mean squared error: 2742.7850
Variance score: 0.5171
