In [41]:
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm

In [42]:
panel = pd.read_csv('ddm_cpc.csv')
panel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1177 entries, 0 to 1176
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   门店ID    1177 non-null   int64  
 1   平台i     1177 non-null   object 
 2   日期      1177 non-null   object 
 3   cpc总费用  1177 non-null   float64
 4   cpc曝光量  1177 non-null   int64  
 5   cpc访问量  1177 non-null   int64  
 6   自然曝光量   1165 non-null   float64
 7   自然访问量   1165 non-null   float64
dtypes: float64(3), int64(3), object(2)
memory usage: 73.7+ KB


In [43]:
panel.head()

Unnamed: 0,门店ID,平台i,日期,cpc总费用,cpc曝光量,cpc访问量,自然曝光量,自然访问量
0,8184590,美团,2019-12-10,225.65,2711,173,1427.0,159.0
1,8223184,美团,2019-12-10,261.1,3665,169,11.0,86.0
2,8106681,美团,2019-12-10,177.5,2115,129,874.0,165.0
3,8165842,美团,2019-12-10,240.3,2937,164,614.0,162.0
4,2001220953,饿了么,2019-12-10,623.5,4190,401,1872.0,387.0


In [44]:
# 填充缺失值，用前一天的自然曝光量和访问量来填充
panel.sort_values(by=["门店ID", "日期"])
panel['自然曝光量']=panel['自然曝光量'].fillna(axis = 0,method = 'ffill')
panel['自然访问量']=panel['自然访问量'].fillna(axis = 0,method = 'ffill')

In [45]:
pd.isna(panel).sum() 

门店ID      0
平台i       0
日期        0
cpc总费用    0
cpc曝光量    0
cpc访问量    0
自然曝光量     0
自然访问量     0
dtype: int64

In [46]:
#转化日期类型
panel['日期'] = pd.to_datetime(panel['日期'])

In [47]:
#虚拟变量
platform = pd.get_dummies(panel['平台i'])
panel = pd.concat([panel,platform],axis=1)
panel.drop(['平台i'],axis = 1,inplace = True)

In [48]:
panel.head()

Unnamed: 0,门店ID,日期,cpc总费用,cpc曝光量,cpc访问量,自然曝光量,自然访问量,美团,饿了么
0,8184590,2019-12-10,225.65,2711,173,1427.0,159.0,1,0
1,8223184,2019-12-10,261.1,3665,169,11.0,86.0,1,0
2,8106681,2019-12-10,177.5,2115,129,874.0,165.0,1,0
3,8165842,2019-12-10,240.3,2937,164,614.0,162.0,1,0
4,2001220953,2019-12-10,623.5,4190,401,1872.0,387.0,0,1


In [49]:
# 设置面板
meituan = panel[panel['美团']==1].reset_index(drop=True)
meituan=meituan.set_index('日期', append=True)
meituan.index = meituan.index.swaplevel(0,1)

In [51]:
# 2-way fixed effect 美团
exog = sm.add_constant(meituan['cpc总费用'])
meituan_femodel = PanelOLS(meituan['cpc访问量'],meituan['cpc总费用'], entity_effects=True, time_effects=False)
meituan_fe = meituan_femodel.fit()
print(meituan_fe)

                          PanelOLS Estimation Summary                           
Dep. Variable:                 cpc访问量   R-squared:                        0.9213
Estimator:                   PanelOLS   R-squared (Between):              0.9570
No. Observations:                 446   R-squared (Within):               0.9213
Date:                Wed, Feb 08 2023   R-squared (Overall):              0.9585
Time:                        15:22:07   Log-likelihood                   -1764.3
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1990.1
Entities:                         275   P-value                           0.0000
Avg Obs:                       1.6218   Distribution:                   F(1,170)
Min Obs:                       1.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             1990.1
                            

In [52]:
# two-way fixed effect：饿了么
eleme = panel[panel['饿了么']==1].reset_index(drop=True)
eleme=eleme.set_index('日期', append=True)
eleme.index = eleme.index.swaplevel(0,1)

In [53]:
exog = sm.add_constant(eleme['cpc总费用'])
eleme_femodel = PanelOLS(eleme['cpc访问量'],eleme['cpc总费用'], entity_effects=True, time_effects=False)
eleme_fe = eleme_femodel.fit()
print(eleme_fe)

                          PanelOLS Estimation Summary                           
Dep. Variable:                 cpc访问量   R-squared:                        0.9514
Estimator:                   PanelOLS   R-squared (Between):              0.9557
No. Observations:                 731   R-squared (Within):               0.9514
Date:                Wed, Feb 08 2023   R-squared (Overall):              0.9601
Time:                        15:23:22   Log-likelihood                   -3072.3
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      8145.4
Entities:                         314   P-value                           0.0000
Avg Obs:                       2.3280   Distribution:                   F(1,416)
Min Obs:                       1.0000                                           
Max Obs:                       7.0000   F-statistic (robust):             8145.4
                            