In [42]:
# This is a notebook that checks a compiled ESG database to make sure there's nothing
# missing. The "master" database is built in R and saved in the repository as ESG_wdi.RData
# (see load_data.R). A couple of lines of R code can export a file for analysis here:
#
# load(file="data/ESG_wdi.Rdata") -- creates a dataframe named 'x'
# write.csv(x, "~/esg.csv")

import pandas as pd
import wbgapi as wb

In [43]:
esg = pd.read_csv("~/esg.csv")
meta = pd.read_csv("../data/esg_metadata.csv")

# ignore indicators in archived databases
meta = meta[(meta.database_id!='11') & (meta.database_id!='57')]

# quick & dirty report of unique values
pd.Series({k:len(esg[k].unique()) for k in esg.columns})

Unnamed: 0     486204
iso3c             202
date               61
value          322438
indicatorID       125
indicator         125
iso2c             202
country           224
dtype: int64

In [44]:
# find indicators in the metadata file that aren't in the database
esg_cets = set(esg['indicatorID'].unique())
meta_cets = set(meta['cetsid'])
meta_cets - esg_cets

{'EN.ATM.METH.PC', 'EN.ATM.NOXE.PC', 'WBL'}

In [46]:
# Find WDI countries not in the database

wdi_countries = set([row['id'] for row in wb.economy.list(skipAggs=True)])
esg_countries = set(esg['iso3c'].unique())
wb.economy.info(wdi_countries - esg_countries)

id,value,region,incomeLevel
CPV,Cabo Verde,SSF,LMC
CMR,Cameroon,SSF,LMC
COG,"Congo, Rep.",SSF,LMC
DMA,Dominica,LCN,UMC
GRD,Grenada,LCN,UMC
KEN,Kenya,SSF,LMC
MDA,Moldova,ECS,LMC
MNG,Mongolia,EAS,LMC
NGA,Nigeria,SSF,LMC
PAK,Pakistan,SAS,LMC


In [47]:
# this database is generated by extract_esg.py
esg2 = pd.read_csv("esg_api_data.csv")

In [53]:
esg2_cets = set(esg2['series'])
meta_cets - esg2_cets

{'WBL'}

In [59]:
esg2_countries = set(esg2['economy'])
wdi_countries - esg2_countries

set()