# Regression Example

## Load libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

You might need to download **scikit-lean** library in one of three ways:

- Using Anaconda Navigator
- pip install scikit-learn (in command prompt)
- conda install scikit-learn (in anaconda prompt)

## Get data

In [None]:
from sklearn.datasets import load_boston
boston = load_boston()

print("keys:",boston.keys())
print("shape:",boston.data.shape)
print("feature names:",boston.feature_names)
print("Description:",boston.DESCR)

In [None]:
#Convert to data frames
bos = pd.DataFrame(boston.data) #create the data frame
bos.columns = boston.feature_names #label columns
bos['PRICE'] = boston.target #Create price column
bos.describe()

In [None]:
bos.head()

In [None]:
# Look at a few relationships
sns.pairplot(bos[['PRICE', 'CRIM', 'RM', 'PTRATIO']])

# Regression: Step-by-step

In [None]:
Y=np.array(bos.PRICE)
X=np.vstack([np.ones_like(Y), np.array(bos.CRIM), np.array(bos.RM), np.array(bos.PTRATIO)])

In [None]:
A=np.linalg.inv(np.dot(X,X.T)) #Note that in the above X is really X'

In [None]:
B=np.dot(X,Y.T)

In [None]:
C=np.dot(A,B)

In [None]:
print(C)

# Regression using numpy: np.linalg.lstsq

In [None]:
np.linalg.lstsq(X.T,Y.T,rcond=-1)

In [None]:
np.linalg.lstsq?

# Regression using statsmodels: sm.ols

In [None]:
import statsmodels.formula.api as sm

In [None]:
result = sm.ols(formula="PRICE ~ CRIM + RM + PTRATIO", data=bos).fit()

In [None]:
print(result.summary())