## Feature Engineering and Polynomial Regression


In [7]:
import numpy as np
import matplotlib as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler

In [2]:
x = np.arange(0, 20, 1)

y_train = 1 + x**2

X_train = np.c_[x, x**2, x**3]

In [3]:
scaler = StandardScaler()

X_norm = scaler.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")


Peak to Peak range by column in Raw        X:[  19  361 6859]
Peak to Peak range by column in Normalized X:[3.29501788 3.18076489 3.28307153]


In [4]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm, y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

SGDRegressor()
number of iterations completed: 833, number of weight updates: 16661.0


In [5]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_
print(f"model parameters:   w: {w_norm}, b: {b_norm}")

model parameters:   w: [29.64844697 41.86061279 43.51349957], b: [124.49730383]


In [6]:
# make a prediction using sgdr.predict()
y_pred_sgd = sgdr.predict(X_norm)
# make a prediction using w,b. 
y_pred = np.dot(X_norm, w_norm) + b_norm  
print(f"prediction using np.dot() and sgdr.predict match: {(y_pred == y_pred_sgd).all()}")

print(f"Prediction on training set:\n{y_pred[:4]}" )
print(f"Target values \n{y_train[:4]}")

prediction using np.dot() and sgdr.predict match: True
Prediction on training set:
[-7.49386431 -1.96251056  4.43147633 11.81306322]
Target values 
[ 1  2  5 10]
