# **建立複迴歸模型 Building LPM**

In [2]:
import numpy as np
from sklearn.linear_model import LinearRegression

X = np.array([
    [10, 80], [8, 0], [8, 200], [5, 200], [7, 300], [8, 230], [7, 40], [9, 0], [6, 330], [9, 180]
])
y = np.array([469, 366, 371, 208, 246, 297, 363, 436, 198, 364])

lm = LinearRegression()
lm.fit(X, y)

# 印出係數 Print Coefficients
print(lm.coef_)

# 印出截距 Print Intercept
print(lm.intercept_ )

[41.51347826 -0.34088269]
65.32391638894836


# **利用複迴歸模型預測 Predict using LPM**

In [3]:
# 新蛋糕店資料 New Cake Shop Information
to_be_predicted = np.array([
    [10, 110]
])
predicted_sales = lm.predict(to_be_predicted)

# 預測新蛋糕店的單月銷量 Predict the Monthly Sales for the New Cake Shop
print(predicted_sales)

[442.96160353]


# **複迴歸模型的績效 Evaluation (mse, r^2, and adjusted r^2)**

In [6]:
# 模型績效 Evaluation
mse = round(sum((y - lm.predict(X)) ** 2)/(len(X)-(X.shape[1]+1)), 6)
r_squared = round(lm.score(X, y), 6)
adjusted_r_squared = round(r_squared - (1 - r_squared) * (X.shape[1] / (X.shape[0] - X.shape[1] - 1)), 6)
# adjusted_r_squared_test = 1 - (1-r_squared)*(len(y)-1)/(len(y)-X.shape[1]-1)

# 印出模型績效 Print Evaluation
print("mse =", mse)
print("r^2 =", r_squared)
print("ad.r^2 =", adjusted_r_squared)

mse = 596.143731
r^2 = 0.945236
ad.r^2 = 0.929589


# **複迴歸模型的係數檢定 Evaluation (testing coefficients)**

In [7]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats

X = np.array([
    [10, 80], [8, 0], [8, 200], [5, 200], [7, 300], [8, 230], [7, 40], [9, 0], [6, 330], [9, 180]
])
y = np.array([469, 366, 371, 208, 246, 297, 363, 436, 198, 364])

lm = LinearRegression()
lm.fit(X, y)

params = np.append(lm.intercept_,lm.coef_)
predictions = lm.predict(X)

newX = pd.DataFrame({"Constant":np.ones(len(X))}).join(pd.DataFrame(X))
MSE = (sum((y-predictions)**2))/(len(newX)-len(newX.columns))

var_b = MSE*(np.linalg.inv(np.dot(newX.T,newX)).diagonal()) #用矩陣相乘得var
# 程式補充講義>橫斷面迴歸分析>模型設定與推論>P53 BEST，beta變異數是用母體變異數(標準差平方)乘上變異數反矩陣，這邊因為未知所以用MSE代替
sd_b = np.sqrt(var_b) #標準差 Standard Deviation
ts_b = params/ sd_b #T值 T-value
p_values =[2*(stats.t.sf(np.abs(i),(len(newX)-3))) for i in ts_b]

sd_b = np.round(sd_b,5)
ts_b = np.round(ts_b,6)
p_values = np.round(p_values,7)
params = np.round(params,4)

from IPython.display import display

pd.options.display.float_format = '{:.8f}'.format
table = pd.DataFrame()
table.index = ["Intercept", "the area of the cake shops(X1)", "the distance from train station(X2)"]
table["Coefficients"],table["Standard Errors"],table["t-values"],table["P-values"] = [params,sd_b,ts_b,p_values]
display(table)

Unnamed: 0,Coefficients,Standard Errors,t-values,P-values
Intercept,65.3239,55.73834,1.171974,0.2795463
the area of the cake shops(X1),41.5135,6.25612,6.635662,0.0002944
the distance from train station(X2),-0.3409,0.07814,-4.362419,0.0033044
