# Preparation

## Install the useful packages

In [1]:
myPackages <- c("easypackages")
#the argument `lib.loc = "~/rlib"` is used because the packages were installed in this folder in my case
lapply(myPackages, library, lib.loc = "~/rlib", character.only = TRUE) 
libraries("dplyr", "tidyr")

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: tidyr

All packages loaded successfully



## Load the merged data

In [2]:
#Data
load("descriptives")

In [3]:
head(des)
colnames(des)

event_id,country,country_code,country_code_c,event_date,event_year,start_year,year,after,weight,⋯,EMP_2010,WP1223.MARITAL,pa,na,REGION_ALL,year_number,year_after,gdp,dem,loggdp_z
<fct>,<chr>,<chr>,<chr>,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,1.2329449,⋯,6,1,1.0,0.25,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,1.2263343,⋯,6,2,0.5,0.25,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,0.739767,⋯,4,1,0.5,0.5,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,1.2329514,⋯,4,2,1.0,0.75,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,0.3850559,⋯,4,1,0.5,0.75,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166
1,Armenia,51,ARM,2013-01-31,2013,2010,2010,0,0.5775838,⋯,3,1,0.5,0.75,ARM_1,-2.587269,0,7507.145,0.194,-0.2010166


## Drop the rows if pa, na, ls AND hope are all NA

In [4]:
des2 <- des
des2 <- des2[rowSums(is.na(des[,c("ls", "hope", "pa", "na")]))!=4,]
cat("Number of rows in ORIGINAL data frame:", nrow(des), "\n")
cat("Number of rows in NEW data frame:", nrow(des2), "\n")
cat("Number of dropped rows:", nrow(des) - nrow(des2))

Number of rows in ORIGINAL data frame: 118134 
Number of rows in NEW data frame: 118120 
Number of dropped rows: 14

In [5]:
# a function just for rounding all the numbers in the dataframe
round_df <- function(x, digits) {
    # round all numeric variables
    # x: data frame 
    # digits: number of digits to round
    numeric_columns <- sapply(x, mode) == 'numeric'
    x[numeric_columns] <-  round(x[numeric_columns], digits)
    x
}

# Descriptive Statistics for the continuous variable (dem, gdp, hope, ls, pa, na)

In [8]:
des2 %>% select(dem, gdp, loggdp_z, WP1220.AGE, hope, ls, pa, na) %>%
      pivot_longer(everything()) %>%
      group_by(name) %>%
       summarise_at(vars(value), list(MIN = ~min(., na.rm = TRUE), 
                                      MAX = ~max(., na.rm = TRUE),
                                      MEAN = ~mean(., na.rm = TRUE),
                                      SD = ~sd(., na.rm = TRUE),
                                      NumOfNAs = ~sum(is.na(.)))) %>% round_df(., 5)

name,MIN,MAX,MEAN,SD,NumOfNAs
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
dem,0.042,0.853,0.42416,0.25194,0
gdp,1068.24485,65094.79943,17190.79393,17148.75714,0
hope,0.0,10.0,6.74984,2.48481,8054
loggdp_z,-1.86907,1.64682,-7e-05,0.99999,0
ls,0.0,10.0,5.31604,2.44485,1000
na,0.0,1.0,0.31411,0.32996,6415
pa,0.0,1.0,0.70813,0.38576,6408
WP1220.AGE,15.0,99.0,41.36278,17.773,568


# Descriptive Statistics for the categorical variable (event_id, country, WP1219.MALE, EMP_2010, WP1223.MARITAL)

## event_id

In [13]:
des2$event_id %>% table %>% prop.table %>% data.frame %>% setNames(c("event_id", "proportion"))

event_id,proportion
<fct>,<dbl>
1,0.05078733
2,0.0507958
3,0.0508466
4,0.05586692
5,0.06840501
6,0.05132069
7,0.0507958
8,0.05086353
9,0.06784626
10,0.0507958


## country

In [10]:
des2$country %>% table %>% prop.table %>% data.frame %>% setNames(c("country", "proportion"))

country,proportion
<fct>,<dbl>
Armenia,0.05078733
Bolivia,0.0507958
Bulgaria,0.0508466
Cambodia,0.05586692
Canada,0.06840501
Colombia,0.0507958
Guinea,0.05086353
Italy,0.06784626
Kenya,0.0507958
Kosovo,0.05153234


# WP1219.MALE

In [27]:
gender <- data.frame(MALE = c(1, 0),
                     gender = c("male", "female"))
des2$WP1219.MALE %>% table %>% prop.table %>% data.frame %>% setNames(c("MALE", "proportion")) %>%
    merge(gender, by = "MALE") %>% select(gender, proportion)

gender,proportion
<chr>,<dbl>
female,0.5500085
male,0.4499915


## EMP_2010

In [22]:
employment <- data.frame(EMP_2010 = c(1, 2, 3, 4, 5, 6),
                     employment = c("Employed full time for an employer", 
                                    "Employed full time for self", 
                                    "Employed part time do not want full time", 
                                    "Unemployed", 
                                    "Employed part time want full time", 
                                    "Out of workforce"))
des2$EMP_2010 %>% table(useNA = "ifany") %>% prop.table %>% data.frame %>% setNames(c("EMP_2010", "proportion")) %>%
    merge(employment, by = "EMP_2010", all = T) %>% select(employment, proportion)



employment,proportion
<chr>,<dbl>
Employed full time for an employer,0.221021
Employed full time for self,0.14401456
Employed part time do not want full time,0.07571114
Unemployed,0.06826956
Employed part time want full time,0.08559939
Out of workforce,0.35431764
,0.05106671


## WP1223.MARITAL

In [25]:
marital <- data.frame(MARITAL = c(1, 2, 3, 4, 5, 8),
                     marital = c("Single/Never been married", 
                                 "Married", 
                                 "Separated", 
                                 "Divorced", 
                                 "Widowed", 
                                 "Domestic partner"))
des2$WP1223.MARITAL %>% table(useNA = "ifany") %>% prop.table %>% data.frame %>% setNames(c("MARITAL", "proportion")) %>%
    merge(marital, by = "MARITAL", all = T) %>% select(marital, proportion)


marital,proportion
<chr>,<dbl>
Single/Never been married,0.276430748
Married,0.554757873
Separated,0.025025398
Divorced,0.030570606
Widowed,0.070682357
Domestic partner,0.036225872
,0.006307145
