In [1]:
import pandas as pd
import numpy as np

### Get all the pillar names from the excel

In [2]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [3]:
col_names = ['Indicator','check', 'Data Source','Data Link','Index','Filename','Sub-Pillar']

In [4]:
names = names[col_names]

In [5]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Data Link,Index,Filename,Sub-Pillar
0,Countries,,UN Statistics Division: List of Countries,https://unstats.un.org,False,Countries,
1,"Database of Global Administrative Areas (GADM,...",,,https://gadm.org,False,,
2,High Resolution Population Density Maps + Demo...,,,,False,,
3,population density vs openstreetmap object den...,,,,False,,
4,Population Density,Infrastructure,World Bank: World Development Indicators,https://datacatalog.worldbank.org,False,population_density,Connectivity Technology


In [6]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [7]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,18,27
Foundations,14,22
Government,10,15
Infrastructure,47,58
People,35,49
Regulation,5,8
Strategy,1,1


### Foundations

In [8]:
bnames = names[(names.check=='Foundations')&(~names.Filename.isna())]#&(names.Index==False)]

In [9]:
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Data Link,Index,Filename,Sub-Pillar
163,Digital payments penetration,Foundations,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,True,digital_payments_penetration,Digital Payments
164,% of population with digital finance account,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
165,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
166,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
169,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
170,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
171,% Making online transactions - Men,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
172,% Making online transactions - Women,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
173,% of population with digital finance account -...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
178,% of population with ID (all),Foundations,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,False,id4d_nid,Digital Legal Identity


In [10]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()
subpillars = bnames['Sub-Pillar'].unique()

In [11]:
# get all file names
bfiles = bnames.Filename.unique()

In [12]:
bfiles

array(['digital_payments_penetration',
       'population_digital_financial_services', 'id4d_nid',
       'id4d_services', 'Egov_strategy', 'open_data_idx'], dtype=object)

In [13]:
subpillars

array(['Digital Payments', 'Digital Legal Identity', 'Data Exchange'],
      dtype=object)

In [14]:
# ls digital-readiness-assessment-main/processed/

In [15]:
##ict_goods and services not in process data

In [16]:
# formula for converting scale 0-100
def convert_rank(old_value, old_min=0, old_max=100, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

### 1. Digital payments penetration

In [17]:
indicators[0]
subpillars[0]

'Digital Payments'

In [18]:
# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillar = subpillars[0]
print(subpillar)

Digital payments penetration
digital_payments_penetration
Digital Payments


In [19]:
df.head()

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,Norway,0.85,100.0
1,2.0,Denmark,0.83,97.24
2,3.0,Finland,0.8,93.95
3,4.0,Sweden,0.8,93.08
4,5.0,Netherlands,0.76,89.01


In [20]:
# score looks like the one to use
df.describe()

Unnamed: 0,RANK,VALUE,SCORE
count,122.0,122.0,122.0
mean,61.5,0.315738,36.076967
std,35.362409,0.209012,24.867406
min,1.0,0.01,0.0
25%,31.25,0.15,16.235
50%,61.5,0.27,30.87
75%,91.75,0.44,51.11
max,122.0,0.85,100.0


In [21]:
# df.Indicator.unique()

In [22]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['SCORE'] 
df['Sub-Pillar'] = subpillar
df['Year'] = 2017


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-100 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [23]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']].head(15)

In [24]:
# output scores to csv
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 2. % of population with digital finance account - registered


In [25]:
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% of population with digital finance account
population_digital_financial_services
Digital Payments


In [26]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [27]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Account (% age 15+)'] = df['Account (% age 15+)'].str.replace('%','')
df['Account (% age 15+)'] = df['Account (% age 15+)'].astype(float)

In [28]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Account (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [29]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with digital finance account,15.0,1.329670,True,Digital Payments
7,Albania,2017,% of population with digital finance account,40.0,2.703297,True,Digital Payments
10,Arab world,2017,% of population with digital finance account,37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with digital finance account,88.0,5.340659,True,Digital Payments
16,Argentina,2017,% of population with digital finance account,49.0,3.197802,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% of population with digital finance account,69.0,4.296703,True,Digital Payments
482,Kosovo,2017,% of population with digital finance account,52.0,3.362637,True,Digital Payments
487,South Africa,2017,% of population with digital finance account,69.0,4.296703,True,Digital Payments
490,Zambia,2017,% of population with digital finance account,46.0,3.032967,True,Digital Payments


In [30]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [31]:
## 3. Made or received digital payments in the past year (% age 15+) - all

In [32]:
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Made or received digital payments in the past year (% age 15+) - all
population_digital_financial_services
Digital Payments


In [33]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [34]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].str.replace('%','')
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].astype(float)

In [35]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Made or received digital payments in the past year (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [36]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Made or received digital payments in the past ...,11.0,1.217391,True,Digital Payments
7,Albania,2017,Made or received digital payments in the past ...,29.0,2.195652,True,Digital Payments
10,Arab world,2017,Made or received digital payments in the past ...,26.0,2.032609,True,Digital Payments
13,United Arab Emirates,2017,Made or received digital payments in the past ...,84.0,5.184783,True,Digital Payments
16,Argentina,2017,Made or received digital payments in the past ...,40.0,2.793478,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Made or received digital payments in the past ...,52.0,3.445652,True,Digital Payments
482,Kosovo,2017,Made or received digital payments in the past ...,39.0,2.739130,True,Digital Payments
487,South Africa,2017,Made or received digital payments in the past ...,60.0,3.880435,True,Digital Payments
490,Zambia,2017,Made or received digital payments in the past ...,39.0,2.739130,True,Digital Payments


In [37]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [38]:
## 4. Made or received digital payments in the past year (% age 15+) - female

In [39]:
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Made or received digital payments in the past year (% age 15+) - female
population_digital_financial_services
Digital Payments


In [40]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [41]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Made or received digital payments in the past year, female  (% age 15+)'] = df['Made or received digital payments in the past year, female  (% age 15+)'].str.replace('%','')
df['Made or received digital payments in the past year, female  (% age 15+)'] = df['Made or received digital payments in the past year, female  (% age 15+)'].astype(float)

In [42]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Made or received digital payments in the past year, female  (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [43]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Made or received digital payments in the past ...,4.0,1.000000,True,Digital Payments
7,Albania,2017,Made or received digital payments in the past ...,28.0,2.250000,True,Digital Payments
10,Arab world,2017,Made or received digital payments in the past ...,16.0,1.625000,True,Digital Payments
13,United Arab Emirates,2017,Made or received digital payments in the past ...,68.0,4.333333,True,Digital Payments
16,Argentina,2017,Made or received digital payments in the past ...,42.0,2.979167,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Made or received digital payments in the past ...,49.0,3.343750,True,Digital Payments
482,Kosovo,2017,Made or received digital payments in the past ...,29.0,2.302083,True,Digital Payments
487,South Africa,2017,Made or received digital payments in the past ...,61.0,3.968750,True,Digital Payments
490,Zambia,2017,Made or received digital payments in the past ...,35.0,2.614583,True,Digital Payments


In [44]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [45]:
## 5. Used a mobile phone or the internet to check account balance in the past year (% age 15+) - all

In [46]:
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Used a mobile phone or the internet to check account balance in the past year (% age 15+) - all
population_digital_financial_services
Digital Payments


In [47]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [48]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used a mobile phone or the internet to access an account (% age 15+)   '] = df['Used a mobile phone or the internet to access an account (% age 15+)   '].str.replace('%','')
df['Used a mobile phone or the internet to access an account (% age 15+)   '] = df['Used a mobile phone or the internet to access an account (% age 15+)   '].astype(float)

In [49]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used a mobile phone or the internet to access an account (% age 15+)   '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [50]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Used a mobile phone or the internet to check a...,1.0,1.058824,True,Digital Payments
7,Albania,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
10,Arab world,2017,Used a mobile phone or the internet to check a...,6.0,1.352941,True,Digital Payments
13,United Arab Emirates,2017,Used a mobile phone or the internet to check a...,47.0,3.764706,True,Digital Payments
16,Argentina,2017,Used a mobile phone or the internet to check a...,10.0,1.588235,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Used a mobile phone or the internet to check a...,25.0,2.470588,True,Digital Payments
482,Kosovo,2017,Used a mobile phone or the internet to check a...,5.0,1.294118,True,Digital Payments
487,South Africa,2017,Used a mobile phone or the internet to check a...,21.0,2.235294,True,Digital Payments
490,Zambia,2017,Used a mobile phone or the internet to check a...,26.0,2.529412,True,Digital Payments


In [51]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [52]:
## 6. % of population with digital finance account - active (90 days)

In [53]:
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Used a mobile phone or the internet to check account balance in the past year (% age 15+) - female
population_digital_financial_services
Digital Payments


In [54]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [55]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '].str.replace('%','')
df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '].astype(float)

In [56]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [57]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Used a mobile phone or the internet to check a...,1.0,1.058824,True,Digital Payments
7,Albania,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
10,Arab world,2017,Used a mobile phone or the internet to check a...,3.0,1.176471,True,Digital Payments
13,United Arab Emirates,2017,Used a mobile phone or the internet to check a...,37.0,3.176471,True,Digital Payments
16,Argentina,2017,Used a mobile phone or the internet to check a...,8.0,1.470588,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Used a mobile phone or the internet to check a...,23.0,2.352941,True,Digital Payments
482,Kosovo,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
487,South Africa,2017,Used a mobile phone or the internet to check a...,18.0,2.058824,True,Digital Payments
490,Zambia,2017,Used a mobile phone or the internet to check a...,25.0,2.470588,True,Digital Payments


In [58]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [59]:
## 7. % Making online transactions - Men

In [60]:
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% Making online transactions - Men
population_digital_financial_services
Digital Payments


In [61]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'].str.replace('%','')
df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'].astype(float)

In [62]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [63]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% Making online transactions - Men,1.0,1.000000,True,Digital Payments
7,Albania,2017,% Making online transactions - Men,9.0,1.439560,True,Digital Payments
10,Arab world,2017,% Making online transactions - Men,12.0,1.604396,True,Digital Payments
13,United Arab Emirates,2017,% Making online transactions - Men,62.0,4.351648,True,Digital Payments
16,Argentina,2017,% Making online transactions - Men,21.0,2.098901,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% Making online transactions - Men,30.0,2.593407,True,Digital Payments
482,Kosovo,2017,% Making online transactions - Men,18.0,1.934066,True,Digital Payments
487,South Africa,2017,% Making online transactions - Men,17.0,1.879121,True,Digital Payments
490,Zambia,2017,% Making online transactions - Men,14.0,1.714286,True,Digital Payments


In [64]:
df.to_csv('../indicator_scores/foundations_Making online transactions - Men_scores.csv', index=False)

In [65]:
## 8. % Making online transactions - Women

In [66]:
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% Making online transactions - Women
population_digital_financial_services
Digital Payments


In [67]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'].str.replace('%','')
df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'].astype(float)

In [68]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [69]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% Making online transactions - Women,0.0,1.000000,True,Digital Payments
7,Albania,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments
10,Arab world,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments
13,United Arab Emirates,2017,% Making online transactions - Women,54.0,4.033708,True,Digital Payments
16,Argentina,2017,% Making online transactions - Women,16.0,1.898876,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% Making online transactions - Women,28.0,2.573034,True,Digital Payments
482,Kosovo,2017,% Making online transactions - Women,12.0,1.674157,True,Digital Payments
487,South Africa,2017,% Making online transactions - Women,12.0,1.674157,True,Digital Payments
490,Zambia,2017,% Making online transactions - Women,9.0,1.505618,True,Digital Payments


In [70]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [71]:
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% of population with digital finance account - registered
population_digital_financial_services
Digital Payments


In [72]:
df.head()

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,


In [73]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Financial institution account (% age 15+) '] = df['Financial institution account (% age 15+) '].str.replace('%','')
df['Financial institution account (% age 15+) '] = df['Financial institution account (% age 15+) '].astype(float)

In [74]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Financial institution account (% age 15+) '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [75]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with digital finance account -...,15.0,1.329670,True,Digital Payments
7,Albania,2017,% of population with digital finance account -...,39.0,2.648352,True,Digital Payments
10,Arab world,2017,% of population with digital finance account -...,37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with digital finance account -...,87.0,5.285714,True,Digital Payments
16,Argentina,2017,% of population with digital finance account -...,48.0,3.142857,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% of population with digital finance account -...,67.0,4.186813,True,Digital Payments
482,Kosovo,2017,% of population with digital finance account -...,52.0,3.362637,True,Digital Payments
487,South Africa,2017,% of population with digital finance account -...,67.0,4.186813,True,Digital Payments
490,Zambia,2017,% of population with digital finance account -...,36.0,2.483516,True,Digital Payments


In [76]:
# output scores
df.to_csv('../indicator_scores/foundations_Financial institution account_scores.csv')

## 10. % of population with ID (Total)


In [77]:
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


subpillars[1]
subpillar = subpillars[1]
print(subpillar)

% of population with ID (all)
id4d_nid


ParserError: Error tokenizing data. C error: Expected 26 fields in line 8, saw 43


In [None]:
df.head(15)

In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['UP in % of Country Population'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 11. % of population with ID (Female)

In [None]:
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

In [None]:
df.head(15)

In [None]:
df['% of UP that is Female'] = df['% of UP that is Female'].replace('n/a',np.nan)

In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['% of UP that is Female'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

In [None]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 12. can ID be used for transactions



In [None]:
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

In [None]:
df.head(20)

In [None]:
df['Digitized ID system']= df['Digitized ID system'].replace('-',np.nan)
df['Digitized ID system']= df['Digitized ID system'].astype(float)

In [None]:
df.head(15)

In [None]:
# create standard columns
df.rename(columns={'Economy':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Year'] = 2018
df['Indicator'] = indicator
df['data_col'] = df['Digitized ID system']
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

In [None]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 13. Is personal data siloed


In [None]:
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Must limit the database to the first 206 rows, the remaining rows do not contain any useful information
df = df.iloc[0:205,:]

# Drop the superfluous rows by dropping na
df = df[df['#'].notna()]

# Must convert data in the DPL column into float
df['DPL'] = df['DPL'].astype(float, errors = 'ignore')

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

In [None]:
df.head(104)

In [None]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,205]
df['Country Name'] = df['Economy']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 14. Open data index

In [None]:
bnames

In [None]:
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
df.head(15)

In [None]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['score'] 
df['Country Name'] = df['name']
df['Year'] = 2016
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

### Score Aggregating

In [None]:
import os


In [None]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('foundations')]

In [None]:
scores

In [None]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [None]:
df

In [None]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.info()

In [None]:
df.head(15)

In [None]:
df.describe()

In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
agg_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = agg_df.describe()['count_source']['max']

In [None]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [None]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [None]:
agg_df.head(25)

In [None]:
agg_df.to_csv('../pillar_scores/foundation_scores_v0.csv')

In [None]:
### Score Aggregating by Subpillars

In [None]:
df.insert(0,'Pillar','Foundations')
df

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [None]:
sub_df = df.groupby(['Pillar','Sub-Pillar','Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
sub_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = sub_df.describe()['count_source']['max']

In [None]:
sub_df['agg_score_wt'] = sub_df['agg_score']*(sub_df['count_source']/max_number_sources)

In [None]:
sub_df.to_csv('../subpillar_score/foundations_scores_subpillar_v0.csv')

### Sources Generation

In [None]:
#Get all countries from Countries.xlsx
countries = pd.read_excel('../../data/Countries.xlsx')
col_names = ['Country or Area']
countries = countries[col_names]
countries.rename(columns = {'Country or Area': 'Country Name'}, inplace = True)

In [None]:
#Get all indicators from names dataframe retrieve at the begining of the script
bnames=bnames[['check','Sub-Pillar','Indicator','Data Source','Data Link']]
bnames.rename(columns = {'check': 'Pillar'}, inplace = True)

In [None]:
#Do a nice cross join so that we have combination of all countries vs all indicators
sources = countries.merge(bnames, how='cross')
sources

In [None]:
#Make copy of a scores dataframe and add the column available, with value of 1 (string)
#denoting all the country/indicator combinations that have value
dfsources = df[['Country Name','Pillar','Sub-Pillar','Indicator']].copy()
dfsources['Available'] = '1'

In [None]:
#Merge (left join) sources (all country indicator combinations)
#with those in copied scores dataframe
#resulting in a sources dataframe with values of 1 in Available column where there is a value for country/indicator
#and there is a NaN where there's not. 
#subsequently replace NaN with 0 (string)
sources = sources.merge(dfsources, how='left', on=['Country Name','Pillar','Sub-Pillar','Indicator'])
sources['Available'] = sources['Available'].fillna('0')
sources

In [None]:
#If sources.csv exists, get the contents, remove everyhing from this pillar, append prepared sources, save csv.
#if sources.csv does not exist, create new file from sources.
from os.path import exists

if exists('../../dashboard/Sources.csv') :
    CurrentSources = pd.read_csv('../../dashboard/Sources.csv', dtype=str)
    CurrentSources = CurrentSources[['Country Name','Pillar','Sub-Pillar','Indicator','Data Source','Data Link','Available']]
    CurrentSources.loc[CurrentSources['Pillar'] != 'Foundations']
    CurrentSources = CurrentSources.append(sources)
else :
    CurrentSources = sources
CurrentSources

In [None]:
CurrentSources.to_csv('../../dashboard/Sources.csv', index=False)