05_Panel_DID/Panel2.Rmd

---
title: "Panel Data 2: Implementation in R"
author: "Instructor: Yuta Toyama"
date: "Last updated: `r Sys.Date()`"
fig_width: 6 
fig_height: 4 
output:
  xaringan::moon_reader:
    css: xaringan-themer.css
    nature:
      highlightStyle: github
      highlightLines: yes
      countIncrementalSlides: no
      ratio: '16:9'

knit: pagedown::chrome_print
---
class: title-slide-section, center, middle
name: logistics
# Panel

---
## Preliminary:

- I use the following package
    - `lfe` package.

---
## Panel Data Regression

- I use the dataset `Fatalities` in `AER` package.
    - See https://www.rdocumentation.org/packages/AER/versions/1.2-6/topics/Fatalities for details.

```{r, message=FALSE, warning=FALSE, eval=FALSE}
library(AER)
data(Fatalities)

str(Fatalities)
```

```{r, message=FALSE, warning=FALSE, echo=FALSE}
library(AER)
data(Fatalities)

library(dplyr)
str(Fatalities %>% select(state:drinkage))
```
---
```{r, echo=FALSE}
str(Fatalities %>% select(-(state:drinkage)))
```

---

- As a preliminary analysis, let's plot the relationship between fatality rate and beer tax in 1998.

```{r,message=FALSE}
Fatalities %>%
  mutate(fatal_rate = fatal / pop * 10000) %>% 
  filter(year == "1988") -> data
```

```{r, eval=FALSE}
plot(x = data$beertax, 
     y = data$fatal_rate, 
     xlab = "Beer tax (in 1988 dollars)",
     ylab = "Fatality rate (fatalities per 10000)",
     main = "Traffic Fatality Rates and Beer Taxes in 1988",
     pch = 20, 
     col = "steelblue")
```
---
.center[
```{r, fig.height=7, fig.width=11, echo=FALSE}
plot(x = data$beertax, 
     y = data$fatal_rate, 
     xlab = "Beer tax (in 1988 dollars)",
     ylab = "Fatality rate (fatalities per 10000)",
     main = "Traffic Fatality Rates and Beer Taxes in 1988",
     pch = 20, 
     col = "steelblue")
```
]

- Positive correlation between alcohol tax and traffic accident. Possibly due to omitted variable bias. 


---

- Run fixed effect regression using `felm` command in `lfe` package. 
    - https://www.rdocumentation.org/packages/lfe/versions/2.8-3/topics/felm
    

```{r,message=FALSE,warning=FALSE, eval=FALSE}
library("lfe")

Fatalities %>%
    mutate(fatal_rate = fatal / pop * 10000) -> data

# OLS 
result_ols <- felm( fatal_rate ~ beertax  | 0 | 0 | 0, data = data )
summary(result_ols, robust = TRUE)
```
---
```{r,message=FALSE,warning=FALSE,echo=FALSE}
library("lfe")


Fatalities %>%
    mutate(fatal_rate = fatal / pop * 10000) -> data

# OLS 
result_ols <- felm( fatal_rate ~ beertax  | 0 | 0 | 0, data = data )
summary(result_ols, robust = TRUE)
```
---

```{r}
# State FE
result_stateFE <- felm( fatal_rate ~ beertax  | state | 0 | state, data = data )
summary(result_stateFE, robust = TRUE)
```

---

```{r}
# State and Year FE
result_bothFE <- felm( fatal_rate ~ beertax  | state + year | 0 | state, data = data )
summary(result_bothFE, robust = TRUE)
```


---
Report results using texreg.
Note that 
  - Setting "robust" option `TRUE` reports Heteroskedasticity-robust SE for the first column.
  - Automatically report Cluster-Robust SE for the second and the third columns.

```{r,eval=FALSE,message=FALSE,warning=FALSE}

library(texreg)

screenreg(l = list(result_ols, result_stateFE, result_bothFE),
          digits = 3,
          # caption = 'title',
         # custom.model.names = c("(I)", "(II)", "(III)", "(IV)", "(V)"),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("fatal_rate" = 1:3),
          custom.gof.rows = list("State FE"=c("No", "Yes", "Yes"), "Year FE"=c("No", "No", "Yes")),
          stars = numeric(0)
          )
```
---
```{r,echo=FALSE,message=FALSE,warning=FALSE}

library(texreg)

screenreg(l = list(result_ols, result_stateFE, result_bothFE),
          digits = 3,
          # caption = 'title',
         # custom.model.names = c("(I)", "(II)", "(III)", "(IV)", "(V)"),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("fatal_rate" = 1:3),
          custom.gof.rows = list("State FE"=c("No", "Yes", "Yes"), "Year FE"=c("No", "No", "Yes")),
          stars = numeric(0)
          )
```
---

- What if we do not use the cluster-robust standard error?

```{r,eval=FALSE}
# State FE w.o. CRS
result_wo_CRS <- felm( fatal_rate ~ beertax  | state | 0 | 0, data = data )

# State FE w. CRS
result_w_CRS <- felm( fatal_rate ~ beertax  | state | 0 | state, data = data )

# Report heteroskedasticity robust standard error and cluster-robust standard errors
screenreg(l = list(result_wo_CRS, result_w_CRS),
          digits = 3,
          # caption = 'title',
         # custom.model.names = c("(I)", "(II)", "(III)", "(IV)", "(V)"),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          stars = numeric(0),
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("fatal_rate" = 1:2),
          custom.note = 'SE of `Model 1` is "Heteroskedasticity-Robust", while one of `Model 2` is "Cluster-Robust."'
          )

```


---
```{r,echo=FALSE}

# State FE w.o. CRS
result_wo_CRS <- felm( fatal_rate ~ beertax  | state | 0 | 0, data = data )

# State FE w. CRS
result_w_CRS <- felm( fatal_rate ~ beertax  | state | 0 | state, data = data )

# Report heteroskedasticity robust standard error and cluster-robust standard errors
screenreg(l = list(result_wo_CRS, result_w_CRS),
          digits = 3,
          # caption = 'title',
         # custom.model.names = c("(I)", "(II)", "(III)", "(IV)", "(V)"),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("fatal_rate" = 1:2),
          custom.note = 'SE of `Model 1` is "Heteroskedasticity-Robust", while one of `Model 2` is "Cluster-Robust."',
          stars = numeric(0),
          )
```

---
class: title-slide-section, center, middle
name: logistics
# Panel + IV

---
## Panel Data with Instrumental Variables

- Revisit the demand for Cigaretts
- Consider the following model
$$\log (Q_{it}) = \beta_0 + \beta_1 \log (P_{it}) + \beta_2 \log(income_{it}) + u_i + e_{it}$$
where 
    - $Q_{it}$ is the number of packs per capita in state $i$ in year $t$, 
    - $P_{it}$ is the after-tax average real price per pack of cigarettes, and 
    - $income_{it}$ is the real income per capita. This is demand shifter.

---
<br/><br/>
- As an IV for the price, we use the followings:
    - $SalesTax_{it}$: the proportion of taxes on cigarettes arising from the general sales tax.
        - Relevant as it is included in the after-tax price
        - Exogenous(indepndent) since the sales tax does not influence demand directly, but indirectly through the price.
    - $CigTax_{it}$: the cigarett-specific taxes
    
    
---

```{r, message=FALSE,warning=FALSE}

# load the data set and get an overview
library(AER)
data("CigarettesSW")
CigarettesSW %>% 
  mutate( rincome = (income / population) / cpi) %>% 
  mutate( rprice  = price / cpi ) %>% 
  mutate( salestax = (taxs - tax) / cpi ) %>% 
  mutate( cigtax = tax/cpi ) -> Cigdata
```
---

- Run IV regression with panel data.

```{r,eval=FALSE}

# OLS
result_1 <- felm( log(packs) ~ log(rprice) + log(rincome)  | 0 | 0 | state, data = Cigdata )
# State FE
result_2 <- felm( log(packs) ~ log(rprice) + log(rincome)  | state | 0 | state, data = Cigdata )
# IV without FE
result_3 <- felm( log(packs) ~ log(rincome)  | 0 | (log(rprice) ~  salestax + cigtax) | 
                      state, data = Cigdata )
# IV with FE 
result_4 <- felm( log(packs) ~ log(rincome)  | state | (log(rprice) ~  salestax + cigtax) | 
                      state, data = Cigdata )

screenreg(l = list(result_1, result_2, result_3, result_4),digits = 3,
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("log(packs)" = 1:4),
          stars = numeric(0))

```

---
```{r,echo=FALSE}

# OLS
result_1 <- felm( log(packs) ~ log(rprice) + log(rincome)  | 0 | 0 | state, data = Cigdata )
# State FE
result_2 <- felm( log(packs) ~ log(rprice) + log(rincome)  | state | 0 | state, data = Cigdata )
# IV without FE
result_3 <- felm( log(packs) ~ log(rincome)  | 0 | (log(rprice) ~  salestax + cigtax) | state, data = Cigdata )
# IV with FE 
result_4 <- felm( log(packs) ~ log(rincome)  | state | (log(rprice) ~  salestax + cigtax) | state, data = Cigdata )

screenreg(l = list(result_1, result_2, result_3, result_4),digits = 3,
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("log(packs)" = 1:4),
          stars = numeric(0))
```

---
class: title-slide-section, center, middle
name: logistics
# `felm` command

---
## Report heteroskedasticity robust standard error

```{r,eval=FALSE}

# Run felm command without specifying cluster.
result_1 <- felm( log(packs) ~ log(rprice) + log(rincome)  | 0 | 0 | state, data = Cigdata )


screenreg(l = list(result_1),
          digits = 3,
          # caption = 'title',
          custom.model.names = c("  Model 1  "),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = T,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = F, # robust standard error
          custom.header = list("log(packs)" = 1),
          stars = numeric(0),
          )


# stargazer::stargazer(result_1, type = "text", 
#                      se = list(result_1$rse ) ) 

```

---
```{r,echo=FALSE}

# Run felm command without specifying cluster.
result_1 <- felm( log(packs) ~ log(rprice) + log(rincome)  | 0 | 0 | state, data = Cigdata )

# `result_1$rse` contains heteroskedasticity robust standard error.  Put this into `se` option in `stargazer`.
screenreg(l = list(result_1),
          digits = 3,
          # caption = 'title',
          custom.model.names = c("  Model 1  "),
          custom.coef.names = NULL,  # add a class, if you want to change the names of variables.
          include.ci = F,
          include.rsquared = FALSE, include.adjrs = TRUE, include.nobs = TRUE, 
          include.pvalues = FALSE, include.df = FALSE, include.rmse = FALSE,
          robust = T, # robust standard error
          custom.header = list("log(packs)" = 1),
          stars = numeric(0),
          )

```

---
## How to conduct F test after `felm`

```{r}
# Run felm command without specifying cluster.
result_1 <- felm( packs ~ rprice + rincome  | 0 | 0 | 0, data = Cigdata )


# The following tests H0: _b[rincome] = 0 & _b[rprice] = 0 
ftest1 = waldtest(result_1, ~ rincome | rprice  )
ftest1

# ftest[5] corresponds to F-value
fval1 = ftest1[5]
fval1
```
---

```{r}
# The following tests H0: _b[rincome] - 1 = 0 & _b[rprice] = 0 
ftest2 = waldtest(result_1, ~ rincome - 1 | rprice  )
ftest2 
# ftest[5] corresponds to F-value
fval2 = ftest1[5]
fval2
```