# Оцениваем регрессии на данных HS0 в спецификациях:
- Сырые данные ~ ковид и ограничения + контроль на дамми
- Сырые данные ~ ковид и ограничения + контроль на дамми | инструментальные переменные

Для второй спецификации считаем бутстраповские доверительные интервалы

In [15]:
library(arrow)
library(stargazer)
library(tidyverse)
library(plotly)
library(fixest)
signif.code = signif.code = c("***" = 0.001, "**" = 0.01, "*" = 0.05, "." = 0.10)

## Сырые данные

Под каждой регрессией вывожу summary с кластеризацией ошибок. В конце блока сравниваю робастность результата по используемым ошибкам.

In [16]:
# Скачиваю данные и переназываю переменные
df = read_parquet('../data/processed_data/trade_hs0_x_covid.parquet.gzip')
# Переменная pair нужна, чтобы считать НАС стандартные ошибки
df = df %>% mutate(pair = paste(i, j))

df$deaths_i = df$deaths
df$deaths_j = df$`deaths:r_j`

df$median_age_i = df$median_age
df$median_age_j = df$`median_age:r_j`

df$average_surface_temperature_i = df$average_surface_temperature
df$average_surface_temperature_j = df$`average_surface_temperature:r_j`

df$C1M_School_closing_i = df$C1M_School_closing
df$C1M_School_closing_j = df$`C1M_School_closing:r_j`

df$population_density_i = df$population_density
df$population_density_j = df$`population_density:r_j`

df$StringencyIndex_Average_i = df$StringencyIndex_Average
df$StringencyIndex_Average_j = df$`StringencyIndex_Average:r_j`

df$share_1_members_i = df$share_1_members
df$share_1_members_j = df$`share_1_members:r_j`

df$H6M_Facial_Coverings_i = df$H6M_Facial_Coverings
df$H6M_Facial_Coverings_j = df$`H6M_Facial_Coverings:r_j`

df$C8EV_International_travel_controls_i = df$C8EV_International_travel_controls
df$C8EV_International_travel_controls_j = df$`C8EV_International_travel_controls:r_j`
df %>% head()

t,i,j,v,q,v19,q19,country,t:r_i,cases,⋯,population_density_i,population_density_j,StringencyIndex_Average_i,StringencyIndex_Average_j,share_1_members_i,share_1_members_j,H6M_Facial_Coverings_i,H6M_Facial_Coverings_j,C8EV_International_travel_controls_i,C8EV_International_travel_controls_j
<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<int>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2021,LUX,LBY,968.018,210.173,501.858,127.012,LUX,2021,241.1512329,⋯,248.620682,4.055137,46.04334,63.2468,33.34,,2.0082192,2.827397,2.534247,2.520548
2021,RUS,SEN,1307165.125,3131615.5,789625.305,2232100.632,RUS,2021,138.1677534,⋯,8.838488,89.44511,46.46488,24.90279,25.69,6.92,2.3150685,3.40274,2.610959,2.561644
2020,SOM,KOR,3136.611,2045.876,5608.268,4002.77,SOM,2020,0.7276796,⋯,26.54253,531.109006,34.6591,49.42956,1.53,31.75,0.7622951,1.653005,2.101093,2.415301
2020,TUR,CRI,57792.285,98850.453,60858.013,86163.873,TUR,2020,67.6935912,⋯,108.343854,98.596161,54.57148,54.39423,6.37,12.07,2.2732241,1.218579,2.040984,2.322404
2021,KNA,BIH,0.52,0.008,0.46,0.005,KNA,2021,164.5090411,⋯,179.857692,63.37709,,38.78658,,11.5,,2.476712,,1.0
2022,AZE,MLT,22060.938,39007.156,12476.49,33493.4,AZE,2022,56.3560822,⋯,122.707272,1660.971875,49.01625,22.13003,6.77,22.64,0.6575342,2.323288,3.0,1.263014


In [26]:
df$C3M_Cancel_public_events_i = df$C3M_Cancel_public_events
df$C3M_Cancel_public_events_j = df$`C3M_Cancel_public_events:r_j`

In [None]:
# FE (т.е. оценка на панельных данных с использованием внутригрупповой дисперсии)
fe_no_iv = feols(log(v/v19) ~ log(deaths_i) + log(deaths_j) +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j| i + j + year,
 data = df)
fe_no_iv %>% summary(cluster = ~ i+j)

In [8]:
# Добавляю гетерогенность эффектов во времени
heterogenous_fe = feols(log(v/v19) ~ year/log(deaths_i) + year/log(deaths_j) +
year/C8EV_International_travel_controls_i + year/C8EV_International_travel_controls_j|
i + j ,
data = df)
heterogenous_fe %>% summary(cluster = ~ i+j)

NOTE: 13,568 observations removed because of NA and infinite values (RHS: 13,568).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 56,739
Fixed-effects: i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                               Estimate Std. Error   t value
year2021                                       0.078145   0.077102  1.013525
year2022                                       0.132887   0.088895  1.494875
year2020:log(deaths_i)                        -0.014540   0.006968 -2.086681
year2021:log(deaths_i)                        -0.014148   0.006784 -2.085578
year2022:log(deaths_i)                        -0.021569   0.007546 -2.858254
year2020:log(deaths_j)                        -0.010634   0.006580 -1.616124
year2021:log(deaths_j)                         0.001444   0.006831  0.211366
year2022:log(deaths_j)                         0.003974   0.007462  0.532606
year2020:C8EV_International_travel_controls_i -0.057193   0.025847 -2.212732
year2021:C8EV_International_travel_controls_i -0.026873   0.012924 -2.079314
year2022:C8EV_International

### Инструментальные переменные на панельных данных

In [30]:
# Спецификация с фиксированными эффектами, куда я добавил инструменты, чтобы убрать потенциальную двустороннюю связь. 

fe_iv = feols(log(v/v19) ~  1 +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j|
 log(deaths_i) + log(deaths_j) ~ 
# H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
# +
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
# +
# average_surface_temperature_i + average_surface_temperature_j # Добавляет пропусков

 ,
 data = df
 )
fe_iv %>% summary(cluster = ~ i+j)

NOTE: 13,906 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/10,661).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : log(deaths_i), log(deaths_j)
                  Instr.   : C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j
Second stage: Dep. Var.: log(v/v19)
Observations: 56,401
Fixed-effects: year: 3,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|)
fit_log(deaths_i)                    -0.060122   0.024117 -2.492990 0.013602
fit_log(deaths_j)                     0.019263   0.021785  0.884262 0.377775
C8EV_International_travel_controls_i -0.039837   0.018595 -2.142345 0.033556
C8EV_International_travel_controls_j -0.008328   0.013863 -0.600745 0.548791
                                        
fit_log(deaths_i)                    *  
fit_log(deaths_j)                       
C8EV_International_travel_controls_i *  
C8EV_International_travel_controls_j    
---
Signif. codes:  0 '***' 0.001 

In [6]:
# Слово влияние это экспертное мнение на основе методов

### Точечная оценка руками

In [7]:
# Т.к. данные корректные, это одно и то же
df %>% distinct(t, i, deaths_i, C8EV_International_travel_controls_i)  %>% count() %>% print()
df %>% distinct(t, i)  %>% count() %>% print()

[90m# A tibble: 1 × 1[39m
      n
  [3m[90m<int>[39m[23m
[90m1[39m   660
[90m# A tibble: 1 × 1[39m
      n
  [3m[90m<int>[39m[23m
[90m1[39m   660


In [8]:
# Дистинкт он вручную
iv_df = df %>% group_by(year, i) %>% 
summarise(deaths_i = min(deaths_i),
          C8EV_International_travel_controls_i = min(C8EV_International_travel_controls_i),
          C1M_School_closing_i = min(C1M_School_closing_i),
          median_age_i = min(median_age_i),
          population_density_i = min(population_density_i),
          H6M_Facial_Coverings = min(H6M_Facial_Coverings),
          C3M_Cancel_public_events = min(C3M_Cancel_public_events),
          .groups = 'drop') 
head(iv_df)

year,i,deaths_i,C8EV_International_travel_controls_i,C1M_School_closing_i,median_age_i,population_density_i,H6M_Facial_Coverings,C3M_Cancel_public_events
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2020,ABW,1.20447514,2.051913,1.385246,40.399,603.26111,1.054645,1.4153005
2020,AFG,0.14687845,1.461749,1.748634,16.411,59.90062,1.743169,0.9125683
2020,AGO,0.03096685,3.248634,2.079235,16.321,26.83174,2.467213,1.3005464
2020,AIA,0.0,,,36.071,,,
2020,ALB,1.10790055,1.836066,1.653005,35.127,103.57113,1.991803,1.6284153
2020,AND,2.87596685,1.557377,1.229508,42.125,164.6383,2.081967,1.1202186


In [9]:
# Оцениваю регрессию первого шага
set.seed(123)
first_stage = feols(log(deaths_i) ~ C8EV_International_travel_controls_i + 
C1M_School_closing_i + 
median_age_i + 
population_density_i
  | year+i,
 data = iv_df)

first_stage %>% wald(.,keep = 'C1M_School_closing_i|median_age_i|population_density_i', vcov = ~i)  # Проверка релевантности инструментов
first_stage  %>% summary(cluster = ~ i)

NOTE: 156 observations removed because of NA and infinite values (LHS: 74, RHS: 130).



Wald test, H0: joint nullity of C1M_School_closing_i, median_age_i and population_density_i
 stat = 14.0, p-value = 1.385e-8, on 3 and 323 DoF, VCOV: Clustered (i).

OLS estimation, Dep. Var.: log(deaths_i)
Observations: 504
Fixed-effects: year: 3,  i: 175
Standard-errors: Clustered (i) 
                                      Estimate Std. Error  t value   Pr(>|t|)
C8EV_International_travel_controls_i -0.612210   0.126406 -4.84322 2.8097e-06
C1M_School_closing_i                  0.552634   0.142851  3.86862 1.5452e-04
median_age_i                          0.722052   0.223577  3.22955 1.4824e-03
population_density_i                 -0.002437   0.001035 -2.35422 1.9677e-02
                                        
C8EV_International_travel_controls_i ***
C1M_School_closing_i                 ***
median_age_i                         ** 
population_density_i                 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.793277     Adj. R2: 0.784621
                 Within R2: 0.123806

In [10]:
# Предикт регрессии первого шага
new_iv_df = iv_df[, c('i', 'year', 'deaths_i', 'C8EV_International_travel_controls_i', 'C1M_School_closing_i',
                      'median_age_i', 'population_density_i')]  %>% drop_na()  %>% filter(deaths_i > 0)  %>% select(year, i) # Т.к. логарифм нуля это бесконечность
new_iv_df$predicted_log_deaths_i = first_stage %>% predict()
new_iv_df %>% head()

year,i,predicted_log_deaths_i
<chr>,<chr>,<dbl>
2020,ABW,0.40567115
2020,AFG,-1.99526479
2020,AGO,-3.32115465
2020,ALB,-0.6668283
2020,AND,0.03465424
2020,ARE,-2.10135519


In [11]:
# Джоиню предикт на исходный датафрейм
df_w_predictions = df %>% 
merge(., new_iv_df, by = c('year', 'i')) %>% 
merge(., new_iv_df, by.x = c('year', 'j'), by.y = c('year', 'i')) 
names(df_w_predictions)[names(df_w_predictions) == 'predicted_log_deaths_i.x'] <- 'predicted_log_deaths_i'
names(df_w_predictions)[names(df_w_predictions) == 'predicted_log_deaths_i.y'] <- 'predicted_log_deaths_j'
df_w_predictions %>% head()

Unnamed: 0_level_0,year,j,i,t,v,q,v19,q19,country,t:r_i,⋯,population_density_i,population_density_j,share_1_members_i,share_1_members_j,H6M_Facial_Coverings_i,H6M_Facial_Coverings_j,C8EV_International_travel_controls_i,C8EV_International_travel_controls_j,predicted_log_deaths_i,predicted_log_deaths_j
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2020,ABW,HUN,2020,272.22,29.893,490.397,55.795,HUN,2020,⋯,106.83924,603.2611,32.08,21.37,1.360656,1.054645,2.909836,2.051913,0.8023361,0.4056712
2,2020,ABW,PRT,2020,1310.497,866.1259,964.039,663.928,PRT,2020,⋯,112.40668,603.2611,21.44,21.37,1.505464,1.054645,2.434426,2.051913,0.4363426,0.4056712
3,2020,ABW,DNK,2020,486.216,152.809,414.461,129.961,DNK,2020,⋯,145.7851,603.2611,,21.37,0.84153,1.054645,2.666667,2.051913,-0.4828896,0.4056712
4,2020,ABW,CZE,2020,15.502,2.777,17.646,8.501,CZE,2020,⋯,138.57593,603.2611,32.51,21.37,2.193989,1.054645,2.63388,2.051913,0.5047623,0.4056712
5,2020,ABW,CUB,2020,41.84,19.63,55.13,36.3,CUB,2020,⋯,107.672,603.2611,21.14,21.37,2.254098,1.054645,2.472678,2.051913,-2.4601167,0.4056712
6,2020,ABW,PRY,2020,10960.152,1451.1599,10232.72,1280.786,PRY,2020,⋯,16.62154,603.2611,10.39,21.37,1.251366,1.054645,2.765027,2.051913,0.1523012,0.4056712


In [12]:
# Регрессия второго шага
second_stage = feols(log(v/v19) ~ 
 predicted_log_deaths_i + predicted_log_deaths_j +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j,
 data = df_w_predictions)
second_stage %>% summary(cluster = ~ i+j)

OLS estimation, Dep. Var.: log(v/v19)
Observations: 56,401
Fixed-effects: year: 3,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value  Pr(>|t|)
predicted_log_deaths_i               -0.062746   0.021371 -2.936055 0.0037737
predicted_log_deaths_j                0.018943   0.021979  0.861875 0.3899423
C8EV_International_travel_controls_i -0.044554   0.017667 -2.521957 0.0125681
C8EV_International_travel_controls_j -0.009087   0.013965 -0.650704 0.5160958
                                        
predicted_log_deaths_i               ** 
predicted_log_deaths_j                  
C8EV_International_travel_controls_i *  
C8EV_International_travel_controls_j    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.981048     Adj. R2: 0.038391
                 Within R2: 3.317e-4

In [13]:
# Экспорт в латех
etable(first_stage, vcov = ~i, signif.code=signif.code, tex=TRUE)
etable(second_stage, vcov = ~i+j, signif.code=signif.code, tex=TRUE)

\begingroup
\centering
\begin{tabular}{lc}
   \tabularnewline \midrule \midrule
   Dependent Variable:                          & log(deaths\_i)\\   
   Model:                                       & (1)\\  
   \midrule
   \emph{Variables}\\
   C8EV\_International\_travel\_controls\_i     & -0.6122$^{***}$\\   
                                                & (0.1264)\\   
   C1M\_School\_closing\_i                      & 0.5526$^{***}$\\   
                                                & (0.1429)\\   
   median\_age\_i                               & 0.7221$^{**}$\\   
                                                & (0.2236)\\   
   population\_density\_i                       & -0.0024$^{*}$\\   
                                                & (0.0010)\\   
   \midrule
   \emph{Fixed-effects}\\
   year                                         & Yes\\  
   i                                            & Yes\\  
   \midrule
   \emph{Fit statistics}\\
   Observations               

\begingroup
\centering
\begin{tabular}{lc}
   \tabularnewline \midrule \midrule
   Dependent Variable:                          & log(v/v19)\\  
   Model:                                       & (1)\\  
   \midrule
   \emph{Variables}\\
   predicted\_log\_deaths\_i                    & -0.0628$^{**}$\\   
                                                & (0.0214)\\   
   predicted\_log\_deaths\_j                    & 0.0189\\   
                                                & (0.0220)\\   
   C8EV\_International\_travel\_controls\_i     & -0.0445$^{*}$\\   
                                                & (0.0177)\\   
   C8EV\_International\_travel\_controls\_j     & -0.0091\\   
                                                & (0.0140)\\   
   \midrule
   \emph{Fixed-effects}\\
   year                                         & Yes\\  
   i                                            & Yes\\  
   j                                            & Yes\\  
   \midrule
   \emph{Fit statis

### Разные наборы инструментов

In [31]:
# List of instrumental variables
instrumental_variables <- c( "C1M_School_closing", "median_age", "population_density")

# Generate all subsets
all_subsets <- lapply(0:length(instrumental_variables), function(n) combn(instrumental_variables, n, simplify = FALSE))

# Flatten the list of subsets
all_subsets <- unlist(all_subsets, recursive = FALSE)

# Transform subsets to strings with _i and _j suffixes
transformed_subsets <- sapply(all_subsets, function(subset) {
  if (length(subset) == 0) return("")  # Handle empty subset
  # Create pairs for each variable and collapse with " + "
  paste(
    unlist(lapply(subset, function(var) paste0(var, c("_i", "_j"))), 
         recursive = TRUE),
    collapse = " + "
  )
})

# Remove empty string for empty subset (optional)
transformed_subsets <- transformed_subsets[transformed_subsets != ""]

formulas = lapply(transformed_subsets, function(subset){
    formula(paste("log(v/v19) ~  1 + C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j | log(deaths_i) + log(deaths_j) ~", subset))
})
formulas[3][[1]]

log(v/v19) ~ 1 + C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | 
    year + i + j | log(deaths_i) + log(deaths_j) ~ population_density_i + 
    population_density_j
<environment: 0x000001b20bb209e0>

In [32]:
headers <- sapply(all_subsets, function(subset) {
  if (length(subset) == 0) return("")  # Handle empty subset
  # Create pairs for each variable and collapse with " + "
  paste(
    unlist(subset, recursive = TRUE),
    collapse = " + "
  )
})
headers[2:length(headers)]

In [33]:
replace_vars_with_numbers <- function(headers) {
  # Define all unique variables
  variables <- c("C1M_School_closing",
               "median_age",
               "population_density")
  
  # Create a named vector for mapping
  var_numbers <- setNames(as.character(seq_along(variables)), variables)
  
  # Split and replace components
  sapply(headers, function(h) {
    components <- unlist(strsplit(h, " \\+ "))
    numbers <- var_numbers[components]
    paste(numbers, collapse = " + ")
  })
}
numbered_headers <- replace_vars_with_numbers(headers[2:length(headers)])
numbered_headers

In [34]:
list_of_iv_models = list()
for (i in 1:length(formulas)){
    list_of_iv_models[[i]] = feols(formulas[i][[1]], data = df)
}
etable(list_of_iv_models, vcov=~i+j, signif.code=signif.code, fitstat=~ . + sargan.p + wh.p + ivf,
    headers=numbered_headers, tex=TRUE)
etable(list_of_iv_models, vcov=~i+j, signif.code=signif.code, fitstat=~ . + sargan.p + wh.p + ivf,
    headers=numbered_headers, tex=FALSE)

NOTE: 13,568 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/9,802).

NOTE: 13,568 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/47).

NOTE: 13,906 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/2,257).

NOTE: 13,568 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/9,802).

NOTE: 13,906 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/10,661).

NOTE: 13,906 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/2,257).

NOTE: 13,906 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/10,661).



\begingroup
\centering
\begin{tabular}{lccccccc}
   \tabularnewline \midrule \midrule
   Dependent Variable: & \multicolumn{7}{c}{log(v/v19)}\\
                                                & 1             & 2            & 3        & 1 + 2         & 1 + 3         & 2 + 3    & 1 + 2 + 3 \\   
   Model:                                       & (1)           & (2)          & (3)      & (4)           & (5)           & (6)      & (7)\\  
   \midrule
   \emph{Variables}\\
   log(deaths\_i)                               & -0.0743$^{*}$ & -0.0307      & -0.0729  & -0.0602$^{*}$ & -0.0752$^{*}$ & -0.0318  & -0.0601$^{*}$\\   
                                                & (0.0340)      & (0.0295)     & (0.0478) & (0.0245)      & (0.0326)      & (0.0281) & (0.0241)\\   
   log(deaths\_j)                               & 0.0100        & 0.0459$^{.}$ & -0.0580  & 0.0229        & 0.0050        & 0.0354   & 0.0193\\   
                                                & (0.0283)      & (0.0260)    

Unnamed: 0_level_0,Unnamed: 1_level_0,model 1,model 2,model 3,model 4,model 5,model 6,model 7
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,,1,2,3,1 + 2,1 + 3,2 + 3,1 + 2 + 3
2,Dependent Var.:,log(v/v19),log(v/v19),log(v/v19),log(v/v19),log(v/v19),log(v/v19),log(v/v19)
3,,,,,,,,
4,log(deaths_i),-0.0743* (0.0340),-0.0307 (0.0295),-0.0729 (0.0478),-0.0602* (0.0245),-0.0752* (0.0326),-0.0318 (0.0281),-0.0601* (0.0241)
5,log(deaths_j),0.0100 (0.0283),0.0459. (0.0260),-0.0580 (0.0789),0.0229 (0.0219),0.0050 (0.0271),0.0354 (0.0281),0.0193 (0.0218)
6,C8EV_International_travel_controls_i,-0.0455* (0.0213),-0.0284 (0.0194),-0.0434 (0.0284),-0.0400* (0.0184),-0.0457* (0.0211),-0.0286 (0.0197),-0.0398* (0.0186)
7,C8EV_International_travel_controls_j,-0.0145 (0.0149),0.0007 (0.0167),-0.0422 (0.0401),-0.0090 (0.0136),-0.0144 (0.0150),-0.0016 (0.0175),-0.0083 (0.0139)
8,Fixed-Effects:,-----------------,----------------,----------------,-----------------,-----------------,----------------,-----------------
9,year,Yes,Yes,Yes,Yes,Yes,Yes,Yes
10,i,Yes,Yes,Yes,Yes,Yes,Yes,Yes


### Робастность с выкидываем одного года

In [18]:
fe_iv_no2020 = feols(log(v/v19) ~  1 +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j|
 log(deaths_i) + log(deaths_j) ~ 
# H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
# +
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
# +
# average_surface_temperature_i + average_surface_temperature_j # Добавляет пропусков

 ,
 data = df %>% filter(year != "2020")
 )

fe_iv_no2020 %>% summary(cluster = ~ i+j)

feols(log(v/v19) ~  1 + log(deaths_i) + log(deaths_j) +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j,
 data = df %>% filter(year != "2020")) %>% summary(cluster = ~ i+j)

NOTE: 8,375 observations removed because of NA and infinite values (RHS: 6,383, IV: 2,686/7,242).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : log(deaths_i), log(deaths_j)
                  Instr.   : C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j
Second stage: Dep. Var.: log(v/v19)
Observations: 38,010
Fixed-effects: year: 2,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|) 
fit_log(deaths_i)                    -0.038613   0.036766 -1.050219  0.29507 
fit_log(deaths_j)                     0.005131   0.032703  0.156888  0.87552 
C8EV_International_travel_controls_i -0.034057   0.025004 -1.362064  0.17494 
C8EV_International_travel_controls_j -0.006258   0.018649 -0.335578  0.73759 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 1.03094     Adj. R2:  0.03273 
                Within R2: -1.469e-4
F-test (1st stage), log(deaths_i): stat = 530.1     , p < 2.2e-16 , on 6 and 37,653 DoF.
F-te

NOTE: 8,037 observations removed because of NA and infinite values (RHS: 8,037).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 38,348
Fixed-effects: year: 2,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|) 
log(deaths_i)                        -0.012427   0.007960 -1.561136  0.12031 
log(deaths_j)                         0.001750   0.007853  0.222812  0.82394 
C8EV_International_travel_controls_i -0.020393   0.018478 -1.103686  0.27125 
C8EV_International_travel_controls_j -0.012554   0.019027 -0.659795  0.51026 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 1.03103     Adj. R2: 0.032831
                Within R2: 1.12e-4 

In [19]:
fe_iv_no2021 = feols(log(v/v19) ~  1 +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j|
 log(deaths_i) + log(deaths_j) ~ 
# H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
# +
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
# +
# average_surface_temperature_i + average_surface_temperature_j # Добавляет пропусков

 ,
 data = df %>% filter(year != "2021")
 )

fe_iv_no2021 %>% summary(cluster = ~ i+j)

feols(log(v/v19) ~  1 + log(deaths_i) + log(deaths_j) +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j,
 data = df %>% filter(year != "2021")) %>% summary(cluster = ~ i+j)

NOTE: 9,780 observations removed because of NA and infinite values (RHS: 6,512, IV: 4,510/7,371).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : log(deaths_i), log(deaths_j)
                  Instr.   : C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j
Second stage: Dep. Var.: log(v/v19)
Observations: 36,969
Fixed-effects: year: 2,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error  t value Pr(>|t|)    
fit_log(deaths_i)                    -0.066556   0.024162 -2.75457 0.006502 ** 
fit_log(deaths_j)                     0.020572   0.020131  1.02192 0.308235    
C8EV_International_travel_controls_i -0.052320   0.032441 -1.61279 0.108602    
C8EV_International_travel_controls_j -0.029075   0.027235 -1.06756 0.287197    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.963672     Adj. R2:  0.045084
                 Within R2: -0.001063
F-test (1st stage), log(deaths_i): stat = 1,345.3   , p < 2.2e-16 , on 6 and 36,6

NOTE: 9,442 observations removed because of NA and infinite values (RHS: 9,442).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 37,307
Fixed-effects: year: 2,  i: 175,  j: 175
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value  Pr(>|t|)
log(deaths_i)                        -0.024073   0.008215 -2.930532 0.0038381
log(deaths_j)                        -0.004361   0.008051 -0.541649 0.5887530
C8EV_International_travel_controls_i -0.030196   0.024647 -1.225141 0.2221780
C8EV_International_travel_controls_j -0.046039   0.027666 -1.664077 0.0978969
                                        
log(deaths_i)                        ** 
log(deaths_j)                           
C8EV_International_travel_controls_i    
C8EV_International_travel_controls_j .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.963722     Adj. R2: 0.046687
                 Within R2: 6.399e-4

In [20]:
fe_iv_no2022 = feols(log(v/v19) ~  1 +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j|
 log(deaths_i) + log(deaths_j) ~ 
# H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
# +
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
# +
# average_surface_temperature_i + average_surface_temperature_j # Добавляет пропусков

 ,
 data = df %>% filter(year != "2022")
 )

fe_iv_no2022 %>% summary(cluster = ~ i+j)

feols(log(v/v19) ~  1 + log(deaths_i) + log(deaths_j) +
 C8EV_International_travel_controls_i + C8EV_International_travel_controls_j | year+i+j,
 data = df %>% filter(year != "2022")) %>% summary(cluster = ~ i+j)

NOTE: 9,657 observations removed because of NA and infinite values (RHS: 6,709, IV: 4,938/6,709).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : log(deaths_i), log(deaths_j)
                  Instr.   : C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j
Second stage: Dep. Var.: log(v/v19)
Observations: 37,823
Fixed-effects: year: 2,  i: 171,  j: 171
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|)
fit_log(deaths_i)                    -0.077656   0.037527 -2.069365 0.040025
fit_log(deaths_j)                     0.032304   0.037056  0.871771 0.384563
C8EV_International_travel_controls_i -0.028841   0.021968 -1.312869 0.190997
C8EV_International_travel_controls_j  0.000174   0.013966  0.012477 0.990059
                                        
fit_log(deaths_i)                    *  
fit_log(deaths_j)                       
C8EV_International_travel_controls_i    
C8EV_International_travel_controls_j    
---
Signif. codes:  0 '***' 0.001 

NOTE: 9,657 observations removed because of NA and infinite values (RHS: 9,657).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 37,823
Fixed-effects: year: 2,  i: 171,  j: 171
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|) 
log(deaths_i)                        -0.008123   0.007331 -1.108128  0.26937 
log(deaths_j)                        -0.007513   0.007523 -0.998707  0.31936 
C8EV_International_travel_controls_i -0.015374   0.016756 -0.917493  0.36018 
C8EV_International_travel_controls_j -0.009799   0.010460 -0.936821  0.35018 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.939947     Adj. R2: 0.029125
                 Within R2: 9.698e-5

### Экспорт результатов в латех

In [21]:
etable(fe_no_iv, heterogenous_fe, fe_iv, vcov = ~i+j, signif.code=signif.code, fitstat=~ . + sargan.p + wh.p + ivf, tex=TRUE)

\begingroup
\centering
\begin{tabular}{lccc}
   \tabularnewline \midrule \midrule
   Dependent Variable: & \multicolumn{3}{c}{log(v/v19)}\\
   Model:                                                          & (1)           & (2)            & (3)\\  
   \midrule
   \emph{Variables}\\
   log(deaths\_i)                                                  & -0.0156$^{*}$ &                & -0.0601$^{*}$\\   
                                                                   & (0.0061)      &                & (0.0241)\\   
   log(deaths\_j)                                                  & -0.0032       &                & 0.0193\\   
                                                                   & (0.0053)      &                & (0.0218)\\   
   C8EV\_International\_travel\_controls\_i                        & -0.0212       &                & -0.0398$^{*}$\\   
                                                                   & (0.0139)      &                & (0.0186)\\   
   C8EV\_Int

In [22]:
# Совмещаю гетерогенность с инструментами
feols(log(v/v19) ~ 
C8EV_International_travel_controls_i + C8EV_International_travel_controls_j|
i + j | year/log(deaths_i) + year/log(deaths_j)
~
H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
+
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
+
average_surface_temperature_i + average_surface_temperature_j
,
data = df) %>% summary(cluster = ~ i+j)

NOTE: 17,571 observations removed because of NA and infinite values (RHS: 9,802, IV: 6,067/15,000).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : year, year:log(deaths_i), year:log(deaths_j)
                  Instr.   : H6M_Facial_Coverings_i, H6M_Facial_Coverings_j, C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j, average_surface_temperature_i, average_surface_temperature_j
Second stage: Dep. Var.: log(v/v19)
Observations: 52,736
Fixed-effects: i: 166,  j: 166
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value  Pr(>|t|)
fit_year2021                          0.278363   0.102155  2.724918 0.0071255
fit_year2022                          0.351154   0.115421  3.042371 0.0027316
fit_year2020:log(deaths_i)           -0.036202   0.047670 -0.759427 0.4486807
fit_year2021:log(deaths_i)           -0.048061   0.030965 -1.552105 0.1225532
fit_year2022:log(deaths_i)           -0.063724   0.026392 -2.414505 0.0168501
fit_year2020:log(deaths_j)           -0.

In [37]:
# Совмещаю гетерогенность с инструментами
feols(log(v/v19) ~ 
C8EV_International_travel_controls_i + C8EV_International_travel_controls_j|
i + j + year| deaths_i + deaths_j
~
# H6M_Facial_Coverings_i+H6M_Facial_Coverings_j
# +
C1M_School_closing_i+C1M_School_closing_j
+
median_age_i + median_age_j
+
population_density_i + population_density_j
# +
# average_surface_temperature_i + average_surface_temperature_j
,
data = df) %>% summary(cluster = ~ i+j)

NOTE: 11,717 observations removed because of NA values (RHS: 9,802, IV: 1,683/10,661).



TSLS estimation - Dep. Var.: log(v/v19)
                  Endo.    : deaths_i, deaths_j
                  Instr.   : C1M_School_closing_i, C1M_School_closing_j, median_age_i, median_age_j, population_density_i, population_density_j
Second stage: Dep. Var.: log(v/v19)
Observations: 58,590
Fixed-effects: i: 176,  j: 176,  year: 3
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|)
fit_deaths_i                         -0.103772   0.052719 -1.968393 0.050602
fit_deaths_j                          0.040706   0.040900  0.995262 0.320983
C8EV_International_travel_controls_i -0.031528   0.021111 -1.493456 0.137119
C8EV_International_travel_controls_j -0.014769   0.013653 -1.081778 0.280840
                                        
fit_deaths_i                         .  
fit_deaths_j                            
C8EV_International_travel_controls_i    
C8EV_International_travel_controls_j    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 

In [29]:
# Совмещаю гетерогенность с инструментами
feols(log(v/v19) ~ deaths_i + deaths_j +
C8EV_International_travel_controls_i + C8EV_International_travel_controls_j|
i + j + year
,
data = df) %>% summary(cluster = ~ i+j)

NOTE: 11,377 observations removed because of NA values (RHS: 11,377).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 58,930
Fixed-effects: i: 176,  j: 176,  year: 3
Standard-errors: Clustered (i & j) 
                                      Estimate Std. Error   t value Pr(>|t|)
deaths_i                             -0.000866   0.004215 -0.205459 0.837452
deaths_j                             -0.009835   0.003781 -2.601532 0.010076
C8EV_International_travel_controls_i -0.014892   0.012893 -1.155082 0.249631
C8EV_International_travel_controls_j -0.022658   0.013162 -1.721482 0.086931
                                        
deaths_i                                
deaths_j                             *  
C8EV_International_travel_controls_i    
C8EV_International_travel_controls_j .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
RMSE: 0.987368     Adj. R2: 0.037829
                 Within R2: 1.796e-4

In [31]:
# Совмещаю гетерогенность с инструментами
feols(log(v/v19) ~ year/deaths_i + year/deaths_j +
year/C8EV_International_travel_controls_i + year/C8EV_International_travel_controls_j|
i + j 
,
data = df) %>% summary(cluster = ~ i+j)

NOTE: 11,377 observations removed because of NA values (RHS: 11,377).



OLS estimation, Dep. Var.: log(v/v19)
Observations: 58,930
Fixed-effects: i: 176,  j: 176
Standard-errors: Clustered (i & j) 
                                               Estimate Std. Error   t value
year2021                                       0.064779   0.077509  0.835761
year2022                                       0.125198   0.092179  1.358212
year2020:deaths_i                              0.000763   0.009298  0.082032
year2021:deaths_i                             -0.002453   0.004312 -0.568838
year2022:deaths_i                             -0.017255   0.014403 -1.197975
year2020:deaths_j                             -0.025880   0.007699 -3.361387
year2021:deaths_j                             -0.007041   0.004195 -1.678409
year2022:deaths_j                             -0.006701   0.014829 -0.451846
year2020:C8EV_International_travel_controls_i -0.040867   0.023181 -1.762989
year2021:C8EV_International_travel_controls_i -0.023526   0.012415 -1.894997
year2022:C8EV_International