## Multiple Regression

Import necessary libraries

In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

Import dataset from scikit learn datasets

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

In [None]:
#load_boston() is like a dictionary in which we can retrieve the keys and values
boston.keys()

In [None]:
#data contains a 2D numpy array of attribute values
boston['data']

In [None]:
#target contains a 1D numpy array of target attribute values
boston['target']

In [None]:
#feature_names contains a 1D numpy array of names of the attributes
boston['feature_names']

In [None]:
#DESCR contains the text description of the dataset
print(boston['DESCR'])

Converting the dataset into a dataframe

In [None]:
house = pd.DataFrame(boston.data, columns = boston.feature_names) #sets the feature_names as the column headers
house.head()

In [None]:
#add the target column to our dataframe under "PRICE"
house['PRICE'] = boston.target
house.head()

Dividing the dataset into independent and dependent variables

In [None]:
X_house = house.iloc[:, :-1].values
y_house = house.iloc[:, -1].values

Splitting the dataset into train and test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_house, y_house, random_state = 0)

Training the algorithm

In [None]:
linreg = LinearRegression().fit(X_train, y_train)

In [None]:
#Retrieving the intercept and coefficients

print('linear model intercept: {}'
     .format(linreg.intercept_))
print('linear model coeff:\n{}'
     .format(linreg.coef_))

Testing the algorithm

In [None]:
#Comparing predicted results from the model to the actual results in the test set

y_pred = linreg.predict(X_test)
compare = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

compare.head()

Evaluating our model by getting the coefficient of determination (R-squared)

In [None]:
print('R-squared score (training): {:.3f}'     #shows the result up to 3 decimals
     .format(linreg.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'         #shows the result up to 3 decimals
     .format(linreg.score(X_test, y_test)))

### Plotting

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

Y_pred = linreg.predict(X_test)
plt.figure()
plt.scatter(y_test, Y_pred, s=20, alpha=.8) # s: size of the marker ; alpha: marker transparency
plt.plot(y_test, y_test, 'r-') # r-: red line
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("Actual Price vs Predicted Price")
plt.show()