# Fixed Effect Regression Analysis

In [20]:
# import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [21]:
# import data
df = pd.read_csv('/Users/jennyshen/Desktop/processed_data_revised.csv')

In [22]:
# filter the data for Cincinnati, Seattle, Madison, Durham
df = df[df['city'].isin(['cincinnati', 'seattle', 'madison', 'durham'])]
df.head()

Unnamed: 0,date,total_activity,citation_issued,citation_rate,day_of_week,month,days_end_month,end_of_month,year,days_end_year,end_of_year,quarter,days_end_quarter,end_of_quarter,city,state
13789,2006-01-01,121,70,0.578512,7,1,30,False,2006,180.0,False,3.0,89.0,False,seattle,wa
13790,2006-01-02,137,67,0.489051,1,1,29,False,2006,179.0,False,3.0,88.0,False,seattle,wa
13791,2006-01-03,114,62,0.54386,2,1,28,False,2006,178.0,False,3.0,87.0,False,seattle,wa
13792,2006-01-04,124,74,0.596774,3,1,27,False,2006,177.0,False,3.0,86.0,False,seattle,wa
13793,2006-01-05,109,67,0.614679,4,1,26,False,2006,176.0,False,3.0,85.0,False,seattle,wa


## Number of days to the end of the Month analysis

In [23]:
df_month = df.copy()

In [24]:
from linearmodels.panel import PanelOLS

# Create a panel data frame
df_month = df_month.set_index(['city', 'year'])

# Define the model
model = PanelOLS.from_formula('citation_issued ~ 1 + days_end_month + EntityEffects + TimeEffects', df_month)

# Estimate the model
fe_reg_month = model.fit(cov_type='clustered', cluster_entity=True)

# View the results
print(fe_reg_month)

                          PanelOLS Estimation Summary                           
Dep. Variable:        citation_issued   R-squared:                        0.0003
Estimator:                   PanelOLS   R-squared (Between):             -0.0025
No. Observations:               16740   R-squared (Within):               0.0002
Date:                Wed, Apr 19 2023   R-squared (Overall):              0.0002
Time:                        01:59:19   Log-likelihood                -7.634e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5.0340
Entities:                           4   P-value                           0.0249
Avg Obs:                       4185.0   Distribution:                 F(1,16716)
Min Obs:                       3428.0                                           
Max Obs:                       5112.0   F-statistic (robust):             2.4801
                            

A coefficient of 0.0457 for `days_end_month` means that, all other variables held constant, a one-unit increase in the number of days left to the end of the month is associated with an increase of 0.0457 in the number of citations issued.

The p-value of 0.1153 indicates the statistical significance of the coefficient estimate. In this case, the p-value is greater than the conventional threshold of 0.05, which suggests that the positive relationship between the number of days left to the end of the month and the number of citation issued is not statistically significant at the 5% level of significance. 

## Number of days to the end of the Quarter analysis

In [25]:
df_quarter = df.copy()

In [26]:
from linearmodels.panel import PanelOLS

# Create a panel data frame
df_quarter = df_quarter.set_index(['city', 'year'])

# Define the model
model = PanelOLS.from_formula('citation_issued ~ 1 + days_end_quarter + EntityEffects + TimeEffects', df_quarter)

# Estimate the model
fe_reg_quarter = model.fit(cov_type='clustered', cluster_entity=True)

# View the results
print(fe_reg_quarter)


                          PanelOLS Estimation Summary                           
Dep. Variable:        citation_issued   R-squared:                        0.0003
Estimator:                   PanelOLS   R-squared (Between):             -0.0023
No. Observations:               16740   R-squared (Within):               0.0002
Date:                Wed, Apr 19 2023   R-squared (Overall):              0.0002
Time:                        01:59:19   Log-likelihood                -7.634e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      4.5812
Entities:                           4   P-value                           0.0323
Avg Obs:                       4185.0   Distribution:                 F(1,16716)
Min Obs:                       3428.0                                           
Max Obs:                       5112.0   F-statistic (robust):             3.5960
                            

A coefficient of 0.0145 for `days_end_quarter` means that, all other variables held constant, a one-unit increase in the number of days left to the end of the quarter is associated with an increase of 0.0145 in the number of citations issued.

The p-value of 0.0579 indicates the statistical significance of the coefficient estimate. In this case, the p-value is greater than the conventional threshold of 0.05, which suggests that the positive relationship between the number of days left to the end of the quarter and the number of citation issued is not statistically significant at the 5% level of significance. 

## Number of days to the end of the Year analysis

In [27]:
df_year = df.copy()

In [28]:
from linearmodels.panel import PanelOLS

# Create a panel data frame
df_year = df_year.set_index(['city', 'year'])

# Define the model
model = PanelOLS.from_formula('citation_issued ~ 1 + days_end_year + EntityEffects + TimeEffects', df_year)

# Estimate the model
fe_reg_year = model.fit(cov_type='clustered', cluster_entity=True)

# View the results
print(fe_reg_year)

                          PanelOLS Estimation Summary                           
Dep. Variable:        citation_issued   R-squared:                        0.0022
Estimator:                   PanelOLS   R-squared (Between):              0.0008
No. Observations:               16740   R-squared (Within):               0.0004
Date:                Wed, Apr 19 2023   R-squared (Overall):              0.0008
Time:                        01:59:20   Log-likelihood                -7.633e+04
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      36.232
Entities:                           4   P-value                           0.0000
Avg Obs:                       4185.0   Distribution:                 F(1,16716)
Min Obs:                       3428.0                                           
Max Obs:                       5112.0   F-statistic (robust):             3.4181
                            

A coefficient of -0.0103 for `days_end_year` means that, all other variables held constant, a one-unit increase in the number of days left to the end of the quarter is associated with a decrease of 0.0103 in the number of citations issued.

The p-value of 0.0645 indicates the statistical significance of the coefficient estimate. In this case, the p-value is greater than the conventional threshold of 0.05, which suggests that the negative relationship between the number of days left to the end of the quarter and the number of citation issued is not statistically significant at the 5% level of significance. 