In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

In [2]:
n_dots = 200
X = np.linspace(-2*np.pi, 2*np.pi, n_dots)
Y = np.sin(X) + 0.2 * np.random.rand(n_dots) - 0.1
X = X.reshape(-1, 1)
Y = Y.reshape(-1, 1)

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

In [4]:
def polynomial_model(degree=1):
    polynomial_features = PolynomialFeatures(degree=degree, include_bias=False)
    linear_regression = LinearRegression(normalize=True)
    pipline = Pipeline([("polynomial_features", polynomial_features), ("linear_regression", linear_regression)])
    return pipline

In [5]:
from sklearn.metrics import mean_squared_error

In [6]:
degrees = [2, 3, 5, 10]
results = []
for d in degrees:
    model = polynomial_model(degree=d)
    model.fit(X, Y)
    train_score = model.score(X, Y)
    mse = mean_squared_error(Y, model.predict(X))
    results.append({"model": model, "degree": d, "score": train_score, "mse": mse})
for r in results:
    print("degree: {}; train score: {}; mean squared error: {}".format(r["degree"], r["score"], r["mse"]))

degree: 2; train score: 0.11666086179009827; mean squared error: 0.5320550529426891
degree: 3; train score: 0.21642350613136843; mean squared error: 0.4719657659172518
degree: 5; train score: 0.7901978641639715; mean squared error: 0.12636855049345996
degree: 10; train score: 0.8710643630174454; mean squared error: 0.07766083737665348
