## Supervised Learning

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np

### 01 線性迴歸Linear Regression

In [3]:
from sklearn.linear_model import LinearRegression

In [5]:
X = [[10.0], [8.0], [13.0], [9.0], [11.0], [14.0],
     [6.0], [4.0], [12.0], [7.0], [5.0]]

In [6]:
y = [8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26, 10.84, 4.82, 5.68]

In [7]:
model = LinearRegression()

In [8]:
model.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [9]:
print("截距:", model.intercept_)

截距: 3.0000909090909103


In [11]:
print("斜率:", float(model.coef_))

斜率: 0.5000909090909091


In [13]:
y_pred = model.predict([[0], [1]])

In [14]:
print(y_pred)

[3.00009091 3.50018182]


### 02 正則化Regularization
使用脊迴歸來逼近$y=sin(2\pi x)$函數，建立六次方程式時，利用的是PolynomialFeatures。

In [4]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [5]:
train_size = 20
test_size = 12

train_X = np.random.uniform(low=0, high=1.2, size=train_size)
test_X = np.random.uniform(low=0.1, high=1.3, size=test_size)

train_y = np.sin(2*np.pi*train_X) + np.random.normal(0, 0.2, train_size)
test_y = np.sin(2*np.pi*test_X) + np.random.normal(0, 0.2, test_size)

In [14]:
train_X.reshape(train_size, 1)

array([[0.40501539],
       [0.32478826],
       [0.62902075],
       [0.67119094],
       [1.03768232],
       [0.95209094],
       [1.07399086],
       [0.18958331],
       [0.48721214],
       [0.4052506 ],
       [0.49018802],
       [0.62003869],
       [0.37933775],
       [0.68488235],
       [1.10551828],
       [1.00530047],
       [1.11187886],
       [0.90761569],
       [0.10237288],
       [1.02766907]])

In [8]:
train_y

array([ 0.34815743,  1.25619942, -0.73382499, -0.72910261,  0.09060496,
        0.01110344,  0.40819585,  0.78981231,  0.16765448,  0.59484327,
        0.2721987 , -0.71593379,  0.50310392, -0.88134383,  0.35106292,
        0.18474046,  0.66450842, -0.48753168,  0.65001626,  0.18670928])

In [6]:
poly = PolynomialFeatures(6)  # 次方數為6

In [15]:
train_poly_X = poly.fit_transform(train_X.reshape(train_size, 1))
test_poly_X = poly.fit_transform(test_X.reshape(test_size, 1))

In [16]:
model = Ridge(alpha=1.0)

In [19]:
model.fit(train_poly_X, train_y)

Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)

In [20]:
train_pred_y = model.predict(train_poly_X)
test_pred_y = model.predict(test_poly_X)

In [21]:
print(mean_squared_error(train_pred_y, train_y))
print(mean_squared_error(test_pred_y, test_y))

0.18270958354096115
0.2660464446261073
