## Simple Linear Regression with scikit-learn

In [1]:
#Step1 - Import the classes and packages required
import numpy as np
from sklearn.linear_model import LinearRegression

In [12]:
#Step2 - Provide data to work with
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])

In [15]:
#Step3 - Create a model and fit it
model = LinearRegression().fit(x, y)

`LinearRegression()` can have following parameters
- **`fit_intercept`** is a [Boolean](https://realpython.com/python-boolean/) (`True` by default) that decides whether to calculate the intercept 𝑏₀ (`True`) or consider it equal to zero (`False`).
- **`normalize`** is a Boolean (`False` by default) that decides whether to normalize the input variables (`True`) or not (`False`).
- **`copy_X`** is a Boolean (`True` by default) that decides whether to copy (`True`) or overwrite the input variables (`False`).
- **`n_jobs`** is an integer or `None` (default) and represents the number of jobs used in parallel computation. `None` usually means one job and `1` to use all processors.

In [17]:
#Step4 - Get results
r_sq = model.score(x, y)
print("Coefficient of Determination: ", r_sq)
print('Intercept: ', model.intercept_)
print('Slope: ', model.coef_)

Coefficient of Determination:  0.7158756137479542
Intercept:  5.633333333333329
Slope:  [0.54]


In [18]:
#We can interchange the shape of y getting the similar result
new_model = LinearRegression().fit(x, y.reshape((-1, 1)))
print('New Intercept: ', new_model.intercept_)
print('New Slope: ', new_model.coef_)

New Intercept:  [5.63333333]
New Slope:  [[0.54]]


In [22]:
#Step5 - Predict response
y_pred = model.predict(x)
print('Predicted Response:', y_pred, sep="\n")

Predicted Response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [30]:
#Identical as Step 5
y_pred_1 = model.intercept_ + model.coef_ * x
print('Predicted Response:', y_pred_1, sep="\n")

Predicted Response:
[[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


In [31]:
x_new = np.arange(5).reshape((-1, 1))
print(x_new)
y_new = model.predict(x_new)
print(y_new)

[[0]
 [1]
 [2]
 [3]
 [4]]
[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]


## Multiple Linear Regression With scikit-learn

In [32]:
#Step1 - Importing packages
import numpy as np
from sklearn.linear_model import LinearRegression

In [37]:
#Step2 - Provide data
x = [[0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)
print(f"X----->\n{x}\nShape: {x.shape}")
print(f"Y----->\n{y}\nShape: {y.shape}")

X----->
[[ 0  1]
 [ 5  1]
 [15  2]
 [25  5]
 [35 11]
 [45 15]
 [55 34]
 [60 35]]
Shape: (8, 2)
Y----->
[ 4  5 20 14 32 22 38 43]
Shape: (8,)


In [38]:
#Step3 - Create model and fit it
model = LinearRegression().fit(x, y)

In [39]:
#Step4 - Results
r_sq = model.score(x, y)
print('coefficient of determination: ', r_sq)
print('intercept: ', model.intercept_)
print('slope: ', model.coef_)

coefficient of determination:  0.8615939258756776
intercept:  5.52257927519819
slope:  [0.44706965 0.25502548]


In [40]:
#Step5 - Predict response
y_pred = model.predict(x)
#Alternatively
#y_pred = model.intercept_ + np.sum(model.coef_ * x, axis=1)
print('predicted response: ', y_pred, sep='\n')

predicted response: 
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [41]:
#The above can be applied to predict on new data
x_new = np.arange(10).reshape((-1, 2))
print(x_new)
y_new = model.predict(x_new)
print(y_new)

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
[ 5.77760476  7.18179502  8.58598528  9.99017554 11.3943658 ]


## Polynomial Regression With scikit-learn

In [42]:
#Step 1: Import packages and classes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [43]:
#Step2a - Provide Data
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([15, 11, 2, 8, 25, 32])

#Step 2b: Transform input data


## Cross Validation and MSE

In [3]:
from sklearn . model_selection import KFold
import numpy as np

X = np.hstack([np.ones((len(x),1)), x.reshape(-1,1)])
kf = KFold(n_splits =10) # 10 - fold CV
MSE =[]
for train_index , test_index in kf.split(X):
    X_test = X[test_index]
    y_test = y_obs[test_index]
    X_train = X[train_index]
    y_train = y_obs[train_index]
lreg = LinearRegression(fit_intercept = False)
res = lreg.fit( X_train , y_train )
MSE.append(np.mean((res.predict(X_test) - y_test) ** 2) )
print("cross - valdiation error =", np.mean(MSE))

NameError: name 'x' is not defined