Install the useful packages.

In [1]:
myPackages <- c("easypackages")
#the argument `lib.loc = "~/rlib"` is used because the packages were installed in this folder in my case
lapply(myPackages, library, lib.loc = "~/rlib", character.only = TRUE) 
libraries("dplyr", "tidyr")

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: tidyr

All packages loaded successfully



Import the GDP PPP per capita (from https://data.worldbank.org/indicator/NY.GDP.PCAP.PP.CD) and democracy (the liberal democracy index from v-dem) data.

For the liberal democracy index,
- v2x_libdem: To what extent is the ideal of liberal democracy achieved
    - Scale: Interval, from low to high (0-1)





In [2]:
GDPraw <- rio::import("API_NY.GDP.PCAP.PP.CD_DS2_en_csv_v2_4250814.csv", skip = 4, header = T)
head(GDPraw)

Unnamed: 0_level_0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,⋯,2013,2014,2015,2016,2017,2018,2019,2020,2021,V67
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
1,Aruba,ABW,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,36830.157,36782.227,37314.564,37587.066,38893.961,40143.0,39834.224,31182.834,,
2,Africa Eastern and Southern,AFE,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,3365.684,3503.827,3533.337,3627.474,3702.515,3794.095,3845.619,3687.728,3912.177,
3,Afghanistan,AFG,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,2015.515,2069.424,2087.305,1981.118,2058.4,2082.392,2152.19,2078.479,,
4,Africa Western and Central,AFW,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,4013.197,4208.732,4139.323,4069.006,4115.645,4231.321,4335.409,4241.522,4474.971,
5,Angola,AGO,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,7682.476,8179.298,7337.57,7103.226,7310.897,7148.933,6995.3,6478.332,6581.042,
6,Albania,ALB,"GDP per capita, PPP (current international $)",NY.GDP.PCAP.PP.CD,,,,,,,⋯,10570.964,11259.268,11658.906,12078.843,12770.964,13498.138,13999.424,13632.187,15646.042,


In [40]:
LDIraw <- rio::import("V-Dem-CY-Core-v12.csv", 
                      select = c("country_name", "country_text_id", "year",
                                "v2x_libdem", "v2x_libdem_codelow", "v2x_libdem_codehigh", "v2x_libdem_sd"))
head(LDIraw)

Unnamed: 0_level_0,country_name,country_text_id,year,v2x_libdem,v2x_libdem_codelow,v2x_libdem_codehigh,v2x_libdem_sd
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,Mexico,MEX,1789,0.04,0.027,0.053,0.014
2,Mexico,MEX,1790,0.04,0.027,0.053,0.014
3,Mexico,MEX,1791,0.04,0.027,0.053,0.014
4,Mexico,MEX,1792,0.04,0.027,0.053,0.014
5,Mexico,MEX,1793,0.04,0.027,0.053,0.014
6,Mexico,MEX,1794,0.04,0.027,0.053,0.014


1. Convert GDP data from wide to long format.

2. Drop the rows with `NA` in `GDPperCapPPP`.

3. Filter away the data earlier than 2005 as Gallup data start from 2005.

4. Change the year in GDP data to numeric.

In [55]:
GDPbase <- GDPraw %>% select(- `Indicator Name`, - `Indicator Code`, - V67)
GDPbase <- tidyr::pivot_longer(data = GDPbase, cols = `1960`:`2021`, names_to = "Year", values_to = "GDPperCapPPP")
GDPbase <- GDPbase %>% drop_na %>% filter(Year >= 2005)  %>% as.data.frame
GDPbase <- GDPbase %>% mutate(Year = as.integer(Year))
head(GDPbase)

LDIbase <- LDIraw %>% filter(year >= 2005)

Unnamed: 0_level_0,Country Name,Country Code,Year,GDPperCapPPP
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>
1,Aruba,ABW,2005,34254.14
2,Aruba,ABW,2006,35429.17
3,Aruba,ABW,2007,37366.69
4,Aruba,ABW,2008,38729.56
5,Aruba,ABW,2009,34393.26
6,Aruba,ABW,2010,33784.54


Before merging, we need to check whether the two datasets has the same country names.

If the names are different for the same country, we will use the name in the GDP data.

In [56]:
LDIbase[! LDIbase$`country_name` %in% GDPbase$`Country Name`, "country_name"] %>% unique %>% sort

In [57]:
GDPbase[! (GDPbase$`Country Name` %in% LDIbase$`country_name` | grepl(("Africa|Asia|income|World|total|only|Euro|members|Other|ountr"), GDPbase$`Country Name`)), "Country Name"] %>% unique %>% sort

In [58]:
LDIbase[LDIbase$country_name %in% "Burma/Myanmar", "country_name"] <- "Myanmar"
LDIbase[LDIbase$country_name %in% "Cape Verde", "country_name"] <- "Cabo Verde"
LDIbase[LDIbase$country_name %in% "Democratic Republic of the Congo", "country_name"] <- "Congo, Dem. Rep."
LDIbase[LDIbase$country_name %in% "Egypt", "country_name"] <- "Egypt, Arab Rep."
LDIbase[LDIbase$country_name %in% "Hong Kong", "country_name"] <- "Hong Kong SAR, China"
LDIbase[LDIbase$country_name %in% "Iran", "country_name"] <- "Iran, Islamic Rep."
LDIbase[LDIbase$country_name %in% "Ivory Coast", "country_name"] <- "Cote d\'Ivoire"
LDIbase[LDIbase$country_name %in% "Kyrgyzstan", "country_name"] <- "Kyrgyz Republic"
LDIbase[LDIbase$country_name %in% "Laos", "country_name"] <- "Lao PDR"
LDIbase[LDIbase$country_name %in% "Palestine/West Bank", "country_name"] <- "West Bank and Gaza"
LDIbase[LDIbase$country_name %in% "Republic of the Congo", "country_name"] <- "Congo, Rep."
LDIbase[LDIbase$country_name %in% "Russia", "country_name"] <- "Russian Federation"
LDIbase[LDIbase$country_name %in% "Slovakia", "country_name"] <- "Slovak Republic"
LDIbase[LDIbase$country_name %in% "South Korea", "country_name"] <- "Korea, Rep."
LDIbase[LDIbase$country_name %in% "The Gambia", "country_name"] <- "Gambia, The"
LDIbase[LDIbase$country_name %in% "Turkey", "country_name"] <- "Turkiye"
LDIbase[LDIbase$country_name %in% "United States of America", "country_name"] <- "United States"
LDIbase[LDIbase$country_name %in% "Venezuela", "country_name"] <- "Venezuela, RB"
LDIbase[LDIbase$country_name %in% "Yemen", "country_name"] <- "Yemen, Rep."


LDIbase[! LDIbase$`country_name` %in% GDPbase$`Country Name`, "country_name"] %>% unique %>% sort
GDPbase[! (GDPbase$`Country Name` %in% LDIbase$`country_name` | grepl(("Africa|Asia|income|World|total|only|Euro|members|Other|ountr"), GDPbase$`Country Name`)), "Country Name"] %>% unique %>% sort

The data sets are ready to be merged and exported now.

In [61]:
GDPandLDI <- inner_join(GDPbase, LDIbase %>% select(-country_text_id), by = c("Country Name" = "country_name", "Year" = "year"))
head(GDPandLDI)
dim(GDPandLDI)
write.csv(GDPandLDI, "GDPandLDI.csv", row.names = FALSE)

Unnamed: 0_level_0,Country Name,Country Code,Year,GDPperCapPPP,v2x_libdem,v2x_libdem_codelow,v2x_libdem_codehigh,v2x_libdem_sd
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Afghanistan,AFG,2005,1023.55,0.133,0.108,0.152,0.022
2,Afghanistan,AFG,2006,1078.907,0.229,0.193,0.259,0.034
3,Afghanistan,AFG,2007,1230.205,0.231,0.2,0.265,0.034
4,Afghanistan,AFG,2008,1273.788,0.234,0.198,0.265,0.035
5,Afghanistan,AFG,2009,1519.311,0.231,0.194,0.26,0.034
6,Afghanistan,AFG,2010,1710.763,0.227,0.195,0.258,0.033


In the future, if we have to merge the LDI and the GDP data sets, we can just merge them using the `country_text_id` and `Country Code` without changing the country names in the LDI data.

The data frame from merging with `country_text_id` (shown in the following) has the same number of rows as the above merged data frame. This is actually indicating that, though how LDI coded the `country_text_id` is unclear, the way they coded the country code and country_text_id is the same.

In [54]:
GDPbase2 <- GDPraw %>% select(-`Indicator Name`, -`Indicator Code`, -V67)
GDPbase2 <- tidyr::pivot_longer(data = GDPbase2, cols = `1960`:`2021`, names_to = "Year", values_to = "GDPperCapPPP")
GDPbase2 <- GDPbase2 %>% drop_na %>% filter(Year >= 2005)  %>% as.data.frame
GDPbase2 <- GDPbase2 %>% mutate(Year = as.integer(Year))

LDIbase2 <- LDIraw %>% filter(year >= 2005)
GDPandLDI2 <- inner_join(GDPbase2, LDIbase2, by = c("Country Code" = "country_text_id", "Year" = "year"))
head(GDPandLDI2)
dim(GDPandLDI2)

Unnamed: 0_level_0,Country Name,Country Code,Year,GDPperCapPPP,country_name,v2x_libdem,v2x_libdem_codelow,v2x_libdem_codehigh,v2x_libdem_sd
Unnamed: 0_level_1,<chr>,<chr>,<int>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
1,Afghanistan,AFG,2005,1023.55,Afghanistan,0.133,0.108,0.152,0.022
2,Afghanistan,AFG,2006,1078.907,Afghanistan,0.229,0.193,0.259,0.034
3,Afghanistan,AFG,2007,1230.205,Afghanistan,0.231,0.2,0.265,0.034
4,Afghanistan,AFG,2008,1273.788,Afghanistan,0.234,0.198,0.265,0.035
5,Afghanistan,AFG,2009,1519.311,Afghanistan,0.231,0.194,0.26,0.034
6,Afghanistan,AFG,2010,1710.763,Afghanistan,0.227,0.195,0.258,0.033
