__Importing Necessary Libraries__

In [51]:
import numpy as np
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt 

__Loading Diabetes Dataset__

In [52]:
from sklearn import datasets
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True,scaled=True)

__Splitting dataset into training, test, and validation__

In [102]:
X_train, X_test, y_train, y_test = train_test_split(diabetes_X, diabetes_y, test_size=0.2, random_state=0)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

__Mulivariate Linear Regression on all variables__

In [103]:
multilinearreg = LinearRegression()

In [104]:
multilinearreg.fit(X_train, y_train)

In [106]:
from sklearn.metrics import mean_absolute_percentage_error

y_pred = multilinearreg.predict(X_test)
print('Mean Absolute error: %.2f' % metrics.mean_absolute_error(y_test, y_pred))
print('R-Squared value: %.2f' % r2_score(y_test,y_pred))
print('Mean Absolute Percentage Error: %.2f' % mean_absolute_percentage_error(y_test, y_pred))

Mean Absolute error: 45.80
R-Squared value: 0.35
Mean Absolute Percentage Error: 0.37


In [107]:
print("Coefficients:", multilinearreg.coef_)
print("Intercept:", multilinearreg.intercept_)

Coefficients: [ -41.59252917 -205.25691985  496.36500863  404.61631695 -741.41158983
  339.6956825   156.13455577  289.83642178  741.2866383    41.42436464]
Intercept: 151.91610185416363


__Polynomial Regression (2nd degree) on BMI feature__

In [108]:
from sklearn.preprocessing import PolynomialFeatures
X_train_bmi = X_train[:, 2:3]
X_test_bmi = X_test[:, 2:3]
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_bmi)
X_test_poly = poly.transform(X_test_bmi)

poly_model = LinearRegression()

In [109]:
poly_model.fit(X_train_poly, y_train)

In [110]:
y_bmi_pred = poly_model.predict(X_test_poly)
print('Mean Absolute error: %.2f' % metrics.mean_absolute_error(y_test, y_bmi_pred))
print('R-Squared value: %.2f' % r2_score(y_test,y_bmi_pred))
print('Mean Absolute Percentage Error: %.2f' % mean_absolute_percentage_error(y_test,y_bmi_pred))


Mean Absolute error: 52.63
R-Squared value: 0.20
Mean Absolute Percentage Error: 0.44


__Multivariate polynomial regression of the 2nd degree on all variables__

In [111]:
poly_multi = PolynomialFeatures(degree=2, include_bias=False)
X_poly_multi = poly_multi.fit_transform(X_train)
model2 = LinearRegression()

In [112]:
model2.fit(X_train, y_train)
y_poly_pred = model2.predict(X_test)

In [114]:
print('Mean Absolute error: %.2f' % metrics.mean_absolute_error(y_test, y_poly_pred))
print('R-Squared value: %.2f' % r2_score(y_test,y_poly_pred))
print('Mean Absolute Percentage Error: %.2f' % mean_absolute_percentage_error(y_test,y_poly_pred))

Mean Absolute error: 45.80
R-Squared value: 0.35
Mean Absolute Percentage Error: 0.37


We calculated performance metrics such as Mean Absolute Error(MAE), Mean Absolute Percentage Error(MAPE), and R-Squared to evaluate each of the models. 

Models:
1. Multivariate Linear Regression on all variables
2. Polynomial Regression of 2nd Degree on BMI Feature
3. Multivariate Polynomial Regression of 2nd Degree on all variables


R-Squared:

_For Model 1 and Model 3, the value for R-Squared is 0.35, whereas for Model 2, the value is 0.20. This implies that the Model 1 and 3, are able to explain 35% of the pattern or relationship in the data, and the remaining 65% is unexplained._

Mean Absolute Percentage Error (MAPE):

_MAPE determines how much predictions differ from the actual values as a percentage of the actual values. For Model 1 and Model 3, MAPE value is 0.37 (37%), which means that the predictions are off by 44% as compared to actual values. For Model 2, the MAPE value is 0.44 (44%)._

Mean Absolute Error (MAE):

_MAE is the average of absolute difference between predicted and actual values. For Model 1 and Model 3, MAE value is 45.80. For Model 2, MAE value is 52.63. A lower value of MAE implies that the predictions are closer to actual values._


__No of Parameters for each Model__

In [115]:
#Polynomial Regression with degree 2 on only 1 feature (bmi). Therefore, there are 2 parameters. 
poly.get_feature_names_out()


array(['x0', 'x0^2'], dtype=object)

In [116]:
#Polynomial Regression with degree 2 on all variables. The x0......x9 represent the 10 features. 
#The x0^2......x9^2 represent the square of the 10 features. 
#The x0x1......x8x9 represent the product of the 10 features.
poly_multi.get_feature_names_out()

array(['x0', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x0^2',
       'x0 x1', 'x0 x2', 'x0 x3', 'x0 x4', 'x0 x5', 'x0 x6', 'x0 x7',
       'x0 x8', 'x0 x9', 'x1^2', 'x1 x2', 'x1 x3', 'x1 x4', 'x1 x5',
       'x1 x6', 'x1 x7', 'x1 x8', 'x1 x9', 'x2^2', 'x2 x3', 'x2 x4',
       'x2 x5', 'x2 x6', 'x2 x7', 'x2 x8', 'x2 x9', 'x3^2', 'x3 x4',
       'x3 x5', 'x3 x6', 'x3 x7', 'x3 x8', 'x3 x9', 'x4^2', 'x4 x5',
       'x4 x6', 'x4 x7', 'x4 x8', 'x4 x9', 'x5^2', 'x5 x6', 'x5 x7',
       'x5 x8', 'x5 x9', 'x6^2', 'x6 x7', 'x6 x8', 'x6 x9', 'x7^2',
       'x7 x8', 'x7 x9', 'x8^2', 'x8 x9', 'x9^2'], dtype=object)

For this particular dataset, I would deploy Model 1. On the basis of performance metrics such as MAE, MAPE, and R-Squared, Model 1 performs better than Model 2. 
Model 3 has the same performance as Model 1. 