# Simple Linear Regression

In [1]:
import numpy as np 
from sklearn.linear_model import LinearRegression

In [2]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1)) # 將x轉為二維度, one column and as many rows as necessay
y = np.array([5, 20, 14, 32, 22, 38])

In [3]:
# With .fit(), you calculate the optimal values of the weights 𝑏₀ and 𝑏₁
model = LinearRegression().fit(x,y)
model

In [4]:
# obtain the coefficient of determination, 𝑅², with .score()
r_sq = model.score(x, y)
r_sq

0.7158756137479542

In [5]:
# The attributes of model are .intercept_, which represents the coefficient 𝑏₀, and .coef_, which represents 𝑏₁
model.intercept_

5.633333333333329

In [6]:
model.coef_

array([0.54])

In [7]:
# can use it for predictions with either existing or new data. To obtain the predicted response, use .predict()
y_pred = model.predict(x)
y_pred

array([ 8.33333333, 13.73333333, 19.13333333, 24.53333333, 29.93333333,
       35.33333333])

In [8]:
y_pred = model.intercept_ + model.coef_ * x 
y_pred # perdicted response

array([[ 8.33333333],
       [13.73333333],
       [19.13333333],
       [24.53333333],
       [29.93333333],
       [35.33333333]])

In [9]:
x_new = np.arange(5).reshape(-1,1)
x_new

array([[0],
       [1],
       [2],
       [3],
       [4]])

In [10]:
y_new = model.predict(x_new)
y_new

array([5.63333333, 6.17333333, 6.71333333, 7.25333333, 7.79333333])

# Multiple Linear Regression

In [12]:
import numpy as np
from sklearn.linear_model import LinearRegression

x = [
    [0,1], [5,1], [15,2], [25,5], [35,11], [45,15], [55,34], [60,35]
]
y = [4, 5, 20, 14, 32, 22, 38,43]
x, y = np.array(x), np.array(y)
x

array([[ 0,  1],
       [ 5,  1],
       [15,  2],
       [25,  5],
       [35, 11],
       [45, 15],
       [55, 34],
       [60, 35]])

In [13]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [14]:
model = LinearRegression().fit(x, y)

In [15]:
r_sq = model.score(x, y)
print(f"coefficient of determination: {r_sq}")
print(f"intercept: {model.intercept_}")
print(f"coefficients: {model.coef_}")

coefficient of determination: 0.8615939258756775
intercept: 5.52257927519819
coefficients: [0.44706965 0.25502548]


In [16]:
y_pred = model.predict(x)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [18]:
y_pred = model.intercept_ + np.sum(model.coef_ * x, axis = 1)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [19]:
x_new = np.arange(10).reshape(-1,2)
x_new

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [20]:
y_new = model.predict(x_new)
y_new

array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])

# Polynomial Regression 

In [1]:
# There’s only one extra step: you need to transform the array of inputs to include nonlinear terms such as 𝑥².
import numpy as np 
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [16]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape(-1,1)
y = np.array([15, 11, 2, 8, 25, 32])
x

array([[ 5],
       [15],
       [25],
       [35],
       [45],
       [55]])

In [3]:
# 建立 𝑥²
transformer = PolynomialFeatures(degree=2, include_bias=False)

In [4]:
transformer.fit(x)

In [6]:
x_ = transformer.transform(x)
x_

array([[   5.,   25.],
       [  15.,  225.],
       [  25.,  625.],
       [  35., 1225.],
       [  45., 2025.],
       [  55., 3025.]])

In [7]:
model = LinearRegression().fit(x_, y)

In [8]:
r_sq = model.score(x_,y)
print(f'coefficient of determination: {r_sq}')

print(f'intercept: {model.intercept_}')

print(f'coefficients: {model.coef_}')

coefficient of determination: 0.8908516262498564
intercept: 21.37232142857146
coefficients: [-1.32357143  0.02839286]


In [9]:
# method 2 
x_ = PolynomialFeatures(degree=2, include_bias=True).fit_transform(x)

In [10]:
x_

array([[1.000e+00, 5.000e+00, 2.500e+01],
       [1.000e+00, 1.500e+01, 2.250e+02],
       [1.000e+00, 2.500e+01, 6.250e+02],
       [1.000e+00, 3.500e+01, 1.225e+03],
       [1.000e+00, 4.500e+01, 2.025e+03],
       [1.000e+00, 5.500e+01, 3.025e+03]])

In [12]:
# the intercept is already included -> fit_intercept=False
model = LinearRegression(fit_intercept=False).fit(x_,y)

In [13]:
r_sq = model.score(x_,y)
print(f'coefficient of determination: {r_sq}')

print(f'intercept: {model.intercept_}')

print(f'coefficients: {model.coef_}')

coefficient of determination: 0.8908516262498563
intercept: 0.0
coefficients: [21.37232143 -1.32357143  0.02839286]


In [14]:
y_pred = model.predict(x_)
print(f'predicted response:\n{y_pred}')

predicted response:
[15.46428571  7.90714286  6.02857143  9.82857143 19.30714286 34.46428571]


In [17]:
# Step 1: Import packages and classes
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Step 2a: Provide data
x = [
  [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

# Step 2b: Transform input data
x_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(x)

# Step 3: Create a model and fit it
model = LinearRegression().fit(x_, y)

# Step 4: Get results
r_sq = model.score(x_, y)
intercept, coefficients = model.intercept_, model.coef_

# Step 5: Predict response
y_pred = model.predict(x_)

In [20]:
# 𝑓(𝑥₁, 𝑥₂) = 𝑏₀ + 𝑏₁𝑥₁ + 𝑏₂𝑥₂ + 𝑏₃𝑥₁² + 𝑏₄𝑥₁𝑥₂ + 𝑏₅𝑥₂².
print(f"coefficient of determination: {r_sq}")
# coefficient of determination: 0.9453701449127822

print(f"intercept: {intercept}")
# intercept: 0.8430556452395876

print(f"coefficients:\n{coefficients}")
# coefficients:
# [ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]

print(f"predicted response:\n{y_pred}")
# predicted response:
# [ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
# 39.05631386 41.92339046]

coefficient of determination: 0.9453701449127819
intercept: 0.8430556452396445
coefficients:
[ 2.44828275  0.16160353 -0.15259677  0.47928683 -0.4641851 ]
predicted response:
[ 0.54047408 11.36340283 16.07809622 15.79139    29.73858619 23.50834636
 39.05631386 41.92339046]


# Advanced Linear Regression

In [22]:
import numpy as np
import statsmodels.api as sm

In [23]:
x = [
    [0, 1],[5, 1],[15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35]
    ]
y = [4, 5, 20, 14, 32, 22, 38, 43]
x, y = np.array(x), np.array(y)

In [24]:
# add intercept 𝑏₀
x = sm.add_constant(x)

In [25]:
x

array([[ 1.,  0.,  1.],
       [ 1.,  5.,  1.],
       [ 1., 15.,  2.],
       [ 1., 25.,  5.],
       [ 1., 35., 11.],
       [ 1., 45., 15.],
       [ 1., 55., 34.],
       [ 1., 60., 35.]])

In [26]:
y

array([ 4,  5, 20, 14, 32, 22, 38, 43])

In [27]:
model = sm.OLS(y, x)

In [28]:
result = model.fit()

In [29]:
result.summary()



0,1,2,3
Dep. Variable:,y,R-squared:,0.862
Model:,OLS,Adj. R-squared:,0.806
Method:,Least Squares,F-statistic:,15.56
Date:,"Mon, 06 Nov 2023",Prob (F-statistic):,0.00713
Time:,19:20:55,Log-Likelihood:,-24.316
No. Observations:,8,AIC:,54.63
Df Residuals:,5,BIC:,54.87
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,5.5226,4.431,1.246,0.268,-5.867,16.912
x1,0.4471,0.285,1.567,0.178,-0.286,1.180
x2,0.2550,0.453,0.563,0.598,-0.910,1.420

0,1,2,3
Omnibus:,0.561,Durbin-Watson:,3.268
Prob(Omnibus):,0.755,Jarque-Bera (JB):,0.534
Skew:,0.38,Prob(JB):,0.766
Kurtosis:,1.987,Cond. No.,80.1


In [32]:
print(f"coefficient of determination: {result.rsquared}")
# coefficient of determination: 0.8615939258756776 : 𝑅²

print(f"adjusted coefficient of determination: {result.rsquared_adj}")
# adjusted coefficient of determination: 0.8062314962259487
# adjusted 𝑅²—that is, 𝑅² corrected according to the number of input features.
print(f"regression coefficients: {result.params}")
# regression coefficients: [5.52257928 0.44706965 0.25502548]
# refers the array with 𝑏₀, 𝑏₁, and 𝑏₂.

coefficient of determination: 0.8615939258756776
adjusted coefficient of determination: 0.8062314962259487
regression coefficients: [5.52257928 0.44706965 0.25502548]


In [33]:
>>> print(f"predicted response:\n{result.fittedvalues}")
# predicted response:
# [ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
#  38.78227633 41.27265006]

>>> print(f"predicted response:\n{result.predict(x)}")
# predicted response:
# [ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
#  38.78227633 41.27265006]

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]
predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [35]:
x_new = sm.add_constant(np.arange(10).reshape((-1, 2)))
# x_new
# array([[1., 0., 1.],
#        [1., 2., 3.],
#        [1., 4., 5.],
#        [1., 6., 7.],
#        [1., 8., 9.]])

y_new = result.predict(x_new)
y_new
# array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])

array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])

### scikit-learn if you don’t need detailed results and want to use the approach consistent with other regression techniques
### statsmodels if you need the advanced statistical parameters of a model