In [1]:
# 用 scikit learn 套件算迴歸參數


import numpy
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression


# 1. 產生 ( x , y ) 資料

# a. 直接自己做

# 要記得把 x , y 轉成 2D 陣列，即便只有一排資料，這樣 regression 才能跑

# x 要做 reshape(-1,1) , 第一個參數代表選取所有列，第二個是欄 , y 也是一樣 , 但 random 要用 tuple ,

# 隨機變數產生可以選 random.random (只會產生 0 到 1 數字) 或 random.uniform (可以自己選上下數字範圍)

x = numpy.linspace(0,10,20).reshape(-1,1)

y = 2*x + 1 + numpy.random.uniform(-1,1,20).reshape(-1,1)

# b. 用模組的方法做

# x , y = make_regression(n_samples=20,n_features=1,noise=10,random_state=42)


# 2. 做訓練資料集和測試資料集

x_test , x_train , y_test , y_train = train_test_split(x,y,test_size=0.2,random_state=42)


# 3. 進行迴歸

model = LinearRegression()

model.fit(x,y)


# 4. 產出結果

print(f"a參數 = {model.coef_} , b參數 = {model.intercept_} , 模型 R square 為 {model.score(x_test,y_test)}")


a參數 = [[2.0190522]] , b參數 = [0.85198196] , 模型 R square 為 0.9847933078952412


In [2]:
# 改用時間序列資料以及 statsmodels 來做 OLS 迴歸，並產生 p-value 和 F 指標來判斷模型好壞


import numpy
from statsmodels.api import add_constant , OLS
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score


# 1. 先載入股價資料

stock_price = numpy.array([6449.80,6468.54,6466.58,6445.76,6373.45,6389.45,6340.00,6345.06,6299.19,6329.94,6238.01,6339.39,6362.90,6370.86,6389.77,6388.64,6363.35,6358.91,6309.62,6305.60,6296.79,6297.36,6263.70,6243.76])


# 2. 做出時間序列的訓練和測試資料集，為今天和前幾天的資料作為解釋變數

# a.先區分訓練集和測試集

# 算 array 的元素總數可以先做成切片再來計算，不然只能算一維的

N = len(stock_price)
stock_price_train = stock_price[:int(numpy.floor(N*0.6))]
stock_price_test = stock_price[int(numpy.floor(N*0.6)):]

# b. 再做成變數資料

y_train = []
X_1_train = []
X_2_train = []

N_train = len(stock_price_train)

for i in range(N_train-2) : 

    y_train.append(stock_price_train[i])
    X_1_train.append(stock_price_train[i+1])
    X_2_train.append(stock_price_train[i+2])

y_test = []
X_1_test = []
X_2_test = []

N_test = len(stock_price_test)

for i in range(N_test-2) : 

    y_test.append(stock_price_train[i])
    X_1_test.append(stock_price_train[i+1])
    X_2_test.append(stock_price_train[i+2])


# 3. 把資料合併並加上常數項

# stack 的資料來源可以是 list，做完就轉成 numpy array

# x1 和 x2 變數算是欄位，所以要把資料反轉，讓資料變成列，所以用 stack 搭配 axis = 1

x_train_data = numpy.stack((X_1_train,X_2_train),axis=1)

x_train_set = add_constant(x_train_data)

x_test_data = numpy.stack((X_1_test,X_2_test),axis=1)


# 4. 做 OLS 回歸 , 結果和 scikit learn 一樣

# statsmodel 的 OLS 迴歸

model = OLS(y_train,x_train_set)
result = model.fit()

print(result.summary())

# Scikit learn 的 LinearRegression 迴歸

model_2 = LinearRegression()
model_2.fit(x_train_data,y_train)

print(model_2.coef_,model_2.intercept_)


# 5. 做樣本外 R square 測試 , OLS 只能用 scikit learn 的 r2_score 來算配適程度

# OLS 算 R square ( 要記得加常數項，還有要用 fit 完的結果做預測，不是 model，和 Linear Regression 一樣 )

x_test_set = add_constant(x_test_data)

y_prediction = result.predict(x_test_set)

print(r2_score(y_test,y_prediction))

# Linear Regression 算 R square

print(model_2.score(x_test_data,y_test))






                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.571
Model:                            OLS   Adj. R-squared:                  0.476
Method:                 Least Squares   F-statistic:                     5.988
Date:                Thu, 21 Aug 2025   Prob (F-statistic):             0.0222
Time:                        20:51:37   Log-Likelihood:                -62.836
No. Observations:                  12   AIC:                             131.7
Df Residuals:                       9   BIC:                             133.1
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        591.2658   1726.740      0.342      0.7

  return hypotest_fun_in(*args, **kwds)
