In [3]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm

# 讀取資料
file_path = 'encoded_data.xlsx'
data = pd.read_excel(file_path)

# 查看資料結構
data.head()

Unnamed: 0,薪資,學歷要求編碼,工作經驗編碼,工作地區編碼,是否需出差外派編碼,公司規模,是否需要相關科系,產業類別編號,職務內容可讀性,職務內容主觀性,其他條件可讀性,其他條件主觀性
0,52500,2,2,2,1,1,0,3,96.879405,0.0,0.0,0.0
1,44000,2,1,2,0,1,0,3,96.754356,0.0,49.006458,0.0
2,181250,2,4,1,0,1,1,3,96.754356,0.527083,55.748398,0.508858
3,102083,4,2,5,0,1,1,3,82.69049,0.40664,65.568294,0.5
4,87500,3,3,1,0,2,1,3,25.234792,0.425,24.649091,0.1


### 模型 1

In [14]:
# 模型 1：僅考慮依變數，使用不同優化器
formula1 = "薪資 ~ 1 + (1 | 工作地區編碼)"
model1 = mixedlm(formula1, data, groups=data["工作地區編碼"])
result1 = model1.fit(method='nm')  # 使用 Nelder-Mead 優化器

# 顯示結果
print("模型 1 結果")
result1.summary()

模型 1 結果


0,1,2,3
Model:,MixedLM,Dependent Variable:,薪資
No. Observations:,2315,Method:,REML
No. Groups:,5,Scale:,783003530.8152
Min. group size:,1,Log-Likelihood:,-26976.9121
Max. group size:,1497,Converged:,Yes
Mean group size:,463.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,58465.166,10482.446,5.577,0.000,37919.950,79010.382
1 | 工作地區編碼,-1023.921,3157.449,-0.324,0.746,-7212.409,5164.566
Group Var,79262171.651,2802.026,,,,


### 模型2 加入個體層級變數

In [None]:
# 模型 2：加入個體層級變數，不包括總體層級變數
formula2 = ("薪資 ~ 學歷要求編碼 + 工作經驗編碼 +  + 是否需出差外派編碼 + "
            "是否需要相關科系 + 職務內容主觀性 + 其他條件可讀性 + 其他條件主觀性 + "
            "公司規模 + 產業類別編號")
model2 = mixedlm(formula2, data, groups=data["工作地區編碼"])
result2 = model2.fit(method='nm')  # 使用 Nelder-Mead 優化器

# 顯示結果
print("模型 2 結果")
print(result2.summary())

### 模型3 加入總體層級變數(工作地區)

In [15]:
# 建立公式
formula = ("薪資 ~ 學歷要求編碼 + 工作經驗編碼 + 公司規模 + "
           "是否需要相關科系 + 產業類別編號 + 職務內容可讀性 + "
           "職務內容主觀性 + 其他條件可讀性 + 其他條件主觀性 + "
           "(1 | 工作地區編碼)")

# 建立混合效應模型
model = mixedlm(formula, data, groups=data["工作地區編碼"])
result = model.fit(method="nm")

# 顯示結果
result.summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,薪資
No. Observations:,2315,Method:,REML
No. Groups:,5,Scale:,571922385.8146
Min. group size:,1,Log-Likelihood:,-26544.8235
Max. group size:,1497,Converged:,Yes
Mean group size:,463.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,73127.757,8789.828,8.320,0.000,55900.011,90355.503
學歷要求編碼,4698.411,732.162,6.417,0.000,3263.400,6133.423
工作經驗編碼,9879.612,508.974,19.411,0.000,8882.042,10877.183
公司規模,-2975.550,642.387,-4.632,0.000,-4234.606,-1716.494
是否需要相關科系,-5380.958,1073.542,-5.012,0.000,-7485.063,-3276.854
產業類別編號,-6001.114,1495.567,-4.013,0.000,-8932.370,-3069.857
職務內容可讀性,-179.143,24.288,-7.376,0.000,-226.746,-131.540
職務內容主觀性,15147.471,2879.368,5.261,0.000,9504.013,20790.929
其他條件可讀性,1.099,11.858,0.093,0.926,-22.142,24.340
