In [1]:
install.packages("WDI")

Installing package into 'C:/Users/Yahya/AppData/Local/R/win-library/4.4'
(as 'lib' is unspecified)



package 'WDI' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
	C:\Users\Yahya\AppData\Local\Temp\RtmpEBYS8G\downloaded_packages


In [2]:
# Install pacman if not installed (it helps manage other packages)
if (!require("pacman")) install.packages("pacman")

# Load necessary libraries
pacman::p_load(
  tidyverse,  # For data manipulation
  WDI,        # For World Bank Data API
  janitor,    # For cleaning column names
  here        # For file referencing
)

print("Libraries loaded successfully!")

Loading required package: pacman



[1] "Libraries loaded successfully!"


In [3]:
# Define the indicators we want from the World Bank
# NY.GDP.PCAP.KD    -> GDP per capita (constant 2015 US$)
# SP.DYN.LE00.IN    -> Life expectancy at birth, total (years)
# SL.UEM.TOTL.ZS    -> Unemployment, total (% of total labor force)
# SE.XPD.TOTL.GD.ZS -> Government expenditure on education, total (% of GDP)
# EN.ATM.CO2E.PC    -> CO2 emissions (metric tons per capita)
# SH.XPD.CHEX.GD.ZS -> Current health expenditure (% of GDP)

indicators <- c(
  "gdp_per_capita" = "NY.GDP.PCAP.KD",
  "life_expectancy" = "SP.DYN.LE00.IN",
  "unemployment_rate" = "SL.UEM.TOTL.ZS",
  "education_expenditure" = "SE.XPD.TOTL.GD.ZS",
  "co2_emissions" = "EN.ATM.CO2E.PC",
  "health_expenditure" = "SH.XPD.CHEX.GD.ZS"
)

# Fetch data for all countries from year 2000 to 2022
# This might take a few seconds
wb_data <- WDI(
  country = "all", 
  indicator = indicators, 
  start = 2000, 
  end = 2022, 
  extra = TRUE # Gets region and income level info
)

print("World Bank Data Fetched:")
glimpse(wb_data)

"The following indicators could not be downloaded: EN.ATM.CO2E.PC.

Please make sure that you are running the latest version of the `WDI` package, and that the arguments you are using in the `WDI()` function are valid.

Sometimes, downloads will suddenly stop working, even if nothing has changed in the R code of the WDI package. ("The same WDI package version worked yesterday!") In those cases, the problem is almost certainly related to the World Bank servers or to your internet connection.

You can check if the World Bank web API is currently serving the indicator(s) of interest by typing a URL of this form in your web browser:

https://api.worldbank.org/v2/en/country/all/indicator/EN.ATM.CO2E.PC?format=json&date=:&per_page=32500&page=1"


[1] "World Bank Data Fetched:"
Rows: 6,118
Columns: 17
$ country               [3m[90m<chr>[39m[23m "Afghanistan"[90m, [39m"Afghanistan"[90m, [39m"Afghanistan"[90m, [39m"Af…
$ iso2c                 [3m[90m<chr>[39m[23m "AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m"AF"[90m, [39m…
$ iso3c                 [3m[90m<chr>[39m[23m "AFG"[90m, [39m"AFG"[90m, [39m"AFG"[90m, [39m"AFG"[90m, [39m"AFG"[90m, [39m"AFG"[90m, [39m"AFG"[90m,[39m…
$ year                  [3m[90m<int>[39m[23m 2012[90m, [39m2016[90m, [39m2017[90m, [39m2011[90m, [39m2000[90m, [39m2013[90m, [39m2018[90m, [39m2010[90m, [39m…
$ status                [3m[90m<chr>[39m[23m ""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m""[90m, [39m…
$ lastupdated           [3m[90m<chr>[39m[23m "2025-12-19"[90m, [

In [4]:
# Load the HDI dataset
# We use read_csv from tidyverse
hdi_raw <- read_csv("../data/HDI_data.csv", show_col_types = FALSE)

print("HDI Data Loaded:")
glimpse(hdi_raw)

[1] "HDI Data Loaded:"
Rows: 206
Columns: 1,112
$ iso3                   [3m[90m<chr>[39m[23m "AFG"[90m, [39m"ALB"[90m, [39m"DZA"[90m, [39m"AND"[90m, [39m"AGO"[90m, [39m"ATG"[90m, [39m"ARG"…
$ country                [3m[90m<chr>[39m[23m "Afghanistan"[90m, [39m"Albania"[90m, [39m"Algeria"[90m, [39m"Andorra"[90m,[39m…
$ hdicode                [3m[90m<chr>[39m[23m "Low"[90m, [39m"Very High"[90m, [39m"High"[90m, [39m"Very High"[90m, [39m"Mediu…
$ region                 [3m[90m<chr>[39m[23m "SA"[90m, [39m"ECA"[90m, [39m"AS"[90m, [39m[31mNA[39m[90m, [39m"SSA"[90m, [39m"LAC"[90m, [39m"LAC"[90m, [39m"EC…
$ hdi_rank_2023          [3m[90m<dbl>[39m[23m 181[90m, [39m71[90m, [39m96[90m, [39m32[90m, [39m148[90m, [39m53[90m, [39m47[90m, [39m69[90m, [39m7[90m, [39m22[90m, [39m81[90m, [39m66…
$ hdi_1990               [3m[90m<dbl>[39m[23m 0.285[90m, [39m0.654[90m, [39m0.595[90m, [39m[31mNA[39m[90m, [39m[

In [5]:
# Save the fetched World Bank data so we don't have to download it again
write_csv(wb_data, "../data/world_bank_raw.csv")

print("Raw World Bank data saved to '../data/world_bank_raw.csv'")

[1] "Raw World Bank data saved to '../data/world_bank_raw.csv'"
