In [232]:
import pandas as pd
import numpy as np

### Get all the pillar names from the excel

In [233]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [234]:
col_names = ['Indicator','check', 'Data Source','Data Link','Index','Filename','Sub-Pillar']

In [235]:
names = names[col_names]

In [236]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Data Link,Index,Filename,Sub-Pillar
0,Countries,,UN Statistics Division: List of Countries,https://unstats.un.org,False,Countries,
1,"Database of Global Administrative Areas (GADM,...",,,https://gadm.org,False,,
2,High Resolution Population Density Maps + Demo...,,,,False,,
3,population density vs openstreetmap object den...,,,,False,,
4,Population Density,Infrastructure,World Bank: World Development Indicators,https://datacatalog.worldbank.org,False,population_density,Connectivity Technology


In [237]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [238]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,18,27
Foundations,14,22
Government,10,15
Infrastructure,47,58
People,35,49
Regulation,5,8
Strategy,1,1


### Foundations

In [239]:
bnames = names[(names.check=='Foundations')&(~names.Filename.isna())]#&(names.Index==False)]

In [240]:
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Data Link,Index,Filename,Sub-Pillar
163,Digital payments penetration,Foundations,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,True,digital_payments_penetration,Digital Payments
164,% of population with digital finance account,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
165,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
166,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
169,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
170,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
171,\n% Making online transactions - Men,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
172,% Making online transactions - Women,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
173,\n% of population with digital finance account...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
178,% of population with ID (all),Foundations,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,False,id4d_nid,Digital Legal Identity


In [241]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()
subpillars = bnames['Sub-Pillar'].unique()

In [242]:
# get all file names
bfiles = bnames.Filename.unique()

In [243]:
bfiles

array(['digital_payments_penetration',
       'population_digital_financial_services', 'id4d_nid',
       'id4d_services', 'Egov_strategy', 'open_data_idx'], dtype=object)

In [244]:
subpillars

array(['Digital Payments', 'Digital Legal Identity', 'Data Exchange'],
      dtype=object)

In [245]:
# ls digital-readiness-assessment-main/processed/

In [246]:
##ict_goods and services not in process data

In [247]:
# formula for converting scale 0-100
def convert_rank(old_value, old_min=0, old_max=100, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

### 1. Digital payments penetration

In [248]:
indicators[0]
subpillars[0]

'Digital Payments'

In [249]:
# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillar = subpillars[0]
print(subpillar)

Digital payments penetration
digital_payments_penetration
Digital Payments


In [250]:
df.head()

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,Norway,0.85,100.0
1,2.0,Denmark,0.83,97.24
2,3.0,Finland,0.8,93.95
3,4.0,Sweden,0.8,93.08
4,5.0,Netherlands,0.76,89.01


In [251]:
# score looks like the one to use
df.describe()

Unnamed: 0,RANK,VALUE,SCORE
count,122.0,122.0,122.0
mean,61.5,0.315738,36.076967
std,35.362409,0.209012,24.867406
min,1.0,0.01,0.0
25%,31.25,0.15,16.235
50%,61.5,0.27,30.87
75%,91.75,0.44,51.11
max,122.0,0.85,100.0


In [252]:
# df.Indicator.unique()

In [253]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['SCORE'] 
df['Sub-Pillar'] = subpillar
df['Year'] = 2017


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-100 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [254]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']].head(15)

In [255]:
# output scores to csv
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 2. % of population with digital finance account - registered


In [256]:
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% of population with digital finance account
population_digital_financial_services
Digital Payments


In [257]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [258]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Account (% age 15+)'] = df['Account (% age 15+)'].str.replace('%','')
df['Account (% age 15+)'] = df['Account (% age 15+)'].astype(float)

In [259]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Account (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [260]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with digital finance account,15.0,1.329670,True,Digital Payments
7,Albania,2017,% of population with digital finance account,40.0,2.703297,True,Digital Payments
10,Arab world,2017,% of population with digital finance account,37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with digital finance account,88.0,5.340659,True,Digital Payments
16,Argentina,2017,% of population with digital finance account,49.0,3.197802,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% of population with digital finance account,69.0,4.296703,True,Digital Payments
482,Kosovo,2017,% of population with digital finance account,52.0,3.362637,True,Digital Payments
487,South Africa,2017,% of population with digital finance account,69.0,4.296703,True,Digital Payments
490,Zambia,2017,% of population with digital finance account,46.0,3.032967,True,Digital Payments


In [261]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [262]:
## 3. Made or received digital payments in the past year (% age 15+) - all

In [263]:
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Made or received digital payments in the past year (% age 15+) - all
population_digital_financial_services
Digital Payments


In [264]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [265]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].str.replace('%','')
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].astype(float)

In [266]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Made or received digital payments in the past year (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [267]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Made or received digital payments in the past ...,11.0,1.217391,True,Digital Payments
7,Albania,2017,Made or received digital payments in the past ...,29.0,2.195652,True,Digital Payments
10,Arab world,2017,Made or received digital payments in the past ...,26.0,2.032609,True,Digital Payments
13,United Arab Emirates,2017,Made or received digital payments in the past ...,84.0,5.184783,True,Digital Payments
16,Argentina,2017,Made or received digital payments in the past ...,40.0,2.793478,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Made or received digital payments in the past ...,52.0,3.445652,True,Digital Payments
482,Kosovo,2017,Made or received digital payments in the past ...,39.0,2.739130,True,Digital Payments
487,South Africa,2017,Made or received digital payments in the past ...,60.0,3.880435,True,Digital Payments
490,Zambia,2017,Made or received digital payments in the past ...,39.0,2.739130,True,Digital Payments


In [268]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [269]:
## 4. Made or received digital payments in the past year (% age 15+) - female

In [270]:
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Made or received digital payments in the past year (% age 15+) - female
population_digital_financial_services
Digital Payments


In [271]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [272]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Made or received digital payments in the past year, female  (% age 15+)'] = df['Made or received digital payments in the past year, female  (% age 15+)'].str.replace('%','')
df['Made or received digital payments in the past year, female  (% age 15+)'] = df['Made or received digital payments in the past year, female  (% age 15+)'].astype(float)

In [273]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Made or received digital payments in the past year, female  (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [274]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Made or received digital payments in the past ...,4.0,1.000000,True,Digital Payments
7,Albania,2017,Made or received digital payments in the past ...,28.0,2.250000,True,Digital Payments
10,Arab world,2017,Made or received digital payments in the past ...,16.0,1.625000,True,Digital Payments
13,United Arab Emirates,2017,Made or received digital payments in the past ...,68.0,4.333333,True,Digital Payments
16,Argentina,2017,Made or received digital payments in the past ...,42.0,2.979167,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Made or received digital payments in the past ...,49.0,3.343750,True,Digital Payments
482,Kosovo,2017,Made or received digital payments in the past ...,29.0,2.302083,True,Digital Payments
487,South Africa,2017,Made or received digital payments in the past ...,61.0,3.968750,True,Digital Payments
490,Zambia,2017,Made or received digital payments in the past ...,35.0,2.614583,True,Digital Payments


In [275]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [276]:
## 5. Used a mobile phone or the internet to check account balance in the past year (% age 15+) - all

In [277]:
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Used a mobile phone or the internet to check account balance in the past year (% age 15+) - all
population_digital_financial_services
Digital Payments


In [278]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [279]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used a mobile phone or the internet to access an account (% age 15+)   '] = df['Used a mobile phone or the internet to access an account (% age 15+)   '].str.replace('%','')
df['Used a mobile phone or the internet to access an account (% age 15+)   '] = df['Used a mobile phone or the internet to access an account (% age 15+)   '].astype(float)

In [280]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used a mobile phone or the internet to access an account (% age 15+)   '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [281]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Used a mobile phone or the internet to check a...,1.0,1.058824,True,Digital Payments
7,Albania,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
10,Arab world,2017,Used a mobile phone or the internet to check a...,6.0,1.352941,True,Digital Payments
13,United Arab Emirates,2017,Used a mobile phone or the internet to check a...,47.0,3.764706,True,Digital Payments
16,Argentina,2017,Used a mobile phone or the internet to check a...,10.0,1.588235,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Used a mobile phone or the internet to check a...,25.0,2.470588,True,Digital Payments
482,Kosovo,2017,Used a mobile phone or the internet to check a...,5.0,1.294118,True,Digital Payments
487,South Africa,2017,Used a mobile phone or the internet to check a...,21.0,2.235294,True,Digital Payments
490,Zambia,2017,Used a mobile phone or the internet to check a...,26.0,2.529412,True,Digital Payments


In [282]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [283]:
## 6. % of population with digital finance account - active (90 days)

In [284]:
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Used a mobile phone or the internet to check account balance in the past year (% age 15+) - female
population_digital_financial_services
Digital Payments


In [285]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [286]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '].str.replace('%','')
df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '].astype(float)

In [287]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used a mobile phone or the internet to access an account, female  (% age 15+)   '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [288]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Used a mobile phone or the internet to check a...,1.0,1.058824,True,Digital Payments
7,Albania,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
10,Arab world,2017,Used a mobile phone or the internet to check a...,3.0,1.176471,True,Digital Payments
13,United Arab Emirates,2017,Used a mobile phone or the internet to check a...,37.0,3.176471,True,Digital Payments
16,Argentina,2017,Used a mobile phone or the internet to check a...,8.0,1.470588,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,Used a mobile phone or the internet to check a...,23.0,2.352941,True,Digital Payments
482,Kosovo,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments
487,South Africa,2017,Used a mobile phone or the internet to check a...,18.0,2.058824,True,Digital Payments
490,Zambia,2017,Used a mobile phone or the internet to check a...,25.0,2.470588,True,Digital Payments


In [289]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [290]:
## 7. % Making online transactions - Men

In [291]:
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)


% Making online transactions - Men
population_digital_financial_services
Digital Payments


In [292]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'].str.replace('%','')
df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'].astype(float)

In [293]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used the internet to pay bills or to buy something online in the past year, male (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [294]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,\n% Making online transactions - Men,1.0,1.000000,True,Digital Payments
7,Albania,2017,\n% Making online transactions - Men,9.0,1.439560,True,Digital Payments
10,Arab world,2017,\n% Making online transactions - Men,12.0,1.604396,True,Digital Payments
13,United Arab Emirates,2017,\n% Making online transactions - Men,62.0,4.351648,True,Digital Payments
16,Argentina,2017,\n% Making online transactions - Men,21.0,2.098901,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,\n% Making online transactions - Men,30.0,2.593407,True,Digital Payments
482,Kosovo,2017,\n% Making online transactions - Men,18.0,1.934066,True,Digital Payments
487,South Africa,2017,\n% Making online transactions - Men,17.0,1.879121,True,Digital Payments
490,Zambia,2017,\n% Making online transactions - Men,14.0,1.714286,True,Digital Payments


In [295]:
df.to_csv('../indicator_scores/foundations_Making online transactions - Men_scores.csv', index=False)

In [296]:
## 8. % Making online transactions - Women

In [297]:
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)

% Making online transactions - Women
population_digital_financial_services
Digital Payments


In [298]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'].str.replace('%','')
df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'].astype(float)

In [299]:
#create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used the internet to pay bills or to buy something online in the past year, female (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [300]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% Making online transactions - Women,0.0,1.000000,True,Digital Payments
7,Albania,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments
10,Arab world,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments
13,United Arab Emirates,2017,% Making online transactions - Women,54.0,4.033708,True,Digital Payments
16,Argentina,2017,% Making online transactions - Women,16.0,1.898876,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% Making online transactions - Women,28.0,2.573034,True,Digital Payments
482,Kosovo,2017,% Making online transactions - Women,12.0,1.674157,True,Digital Payments
487,South Africa,2017,% Making online transactions - Women,12.0,1.674157,True,Digital Payments
490,Zambia,2017,% Making online transactions - Women,9.0,1.505618,True,Digital Payments


In [301]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [302]:
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[0]
subpillar = subpillars[0]
print(subpillar)


% of population with digital finance account - registered
population_digital_financial_services
Digital Payments


In [303]:
df.head()

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,


In [304]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Financial institution account (% age 15+) '] = df['Financial institution account (% age 15+) '].str.replace('%','')
df['Financial institution account (% age 15+) '] = df['Financial institution account (% age 15+) '].astype(float)

In [305]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Financial institution account (% age 15+) '] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [306]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,\n% of population with digital finance account...,15.0,1.329670,True,Digital Payments
7,Albania,2017,\n% of population with digital finance account...,39.0,2.648352,True,Digital Payments
10,Arab world,2017,\n% of population with digital finance account...,37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,\n% of population with digital finance account...,87.0,5.285714,True,Digital Payments
16,Argentina,2017,\n% of population with digital finance account...,48.0,3.142857,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,\n% of population with digital finance account...,67.0,4.186813,True,Digital Payments
482,Kosovo,2017,\n% of population with digital finance account...,52.0,3.362637,True,Digital Payments
487,South Africa,2017,\n% of population with digital finance account...,67.0,4.186813,True,Digital Payments
490,Zambia,2017,\n% of population with digital finance account...,36.0,2.483516,True,Digital Payments


In [307]:
# output scores
df.to_csv('../indicator_scores/foundations_Financial institution account_scores.csv')

## 10. % of population with ID (Total)


In [308]:
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


subpillars[1]
subpillar = subpillars[1]
print(subpillar)

% of population with ID (all)
id4d_nid


ParserError: Error tokenizing data. C error: Expected 26 fields in line 8, saw 43


In [309]:
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,\n% of population with digital finance account...,15.0,1.32967,True,Digital Payments
7,Albania,2017,\n% of population with digital finance account...,39.0,2.648352,True,Digital Payments
10,Arab world,2017,\n% of population with digital finance account...,37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,\n% of population with digital finance account...,87.0,5.285714,True,Digital Payments
16,Argentina,2017,\n% of population with digital finance account...,48.0,3.142857,True,Digital Payments
19,Armenia,2017,\n% of population with digital finance account...,45.0,2.978022,True,Digital Payments
22,Australia,2017,\n% of population with digital finance account...,100.0,6.0,True,Digital Payments
25,Austria,2017,\n% of population with digital finance account...,98.0,5.89011,True,Digital Payments
28,Azerbaijan,2017,\n% of population with digital finance account...,29.0,2.098901,True,Digital Payments
33,Belgium,2017,\n% of population with digital finance account...,99.0,5.945055,True,Digital Payments


In [310]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['UP in % of Country Population'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

KeyError: 'UP in % of Country Population'

In [311]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with ID (all),15.0,1.329670,True,Digital Payments
7,Albania,2017,% of population with ID (all),39.0,2.648352,True,Digital Payments
10,Arab world,2017,% of population with ID (all),37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with ID (all),87.0,5.285714,True,Digital Payments
16,Argentina,2017,% of population with ID (all),48.0,3.142857,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% of population with ID (all),67.0,4.186813,True,Digital Payments
482,Kosovo,2017,% of population with ID (all),52.0,3.362637,True,Digital Payments
487,South Africa,2017,% of population with ID (all),67.0,4.186813,True,Digital Payments
490,Zambia,2017,% of population with ID (all),36.0,2.483516,True,Digital Payments


In [312]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 11. % of population with ID (Female)

In [313]:
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

% of population with ID (female)
id4d_nid


ParserError: Error tokenizing data. C error: Expected 26 fields in line 8, saw 43


In [314]:
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with ID (all),15.0,1.32967,True,Digital Payments
7,Albania,2017,% of population with ID (all),39.0,2.648352,True,Digital Payments
10,Arab world,2017,% of population with ID (all),37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with ID (all),87.0,5.285714,True,Digital Payments
16,Argentina,2017,% of population with ID (all),48.0,3.142857,True,Digital Payments
19,Armenia,2017,% of population with ID (all),45.0,2.978022,True,Digital Payments
22,Australia,2017,% of population with ID (all),100.0,6.0,True,Digital Payments
25,Austria,2017,% of population with ID (all),98.0,5.89011,True,Digital Payments
28,Azerbaijan,2017,% of population with ID (all),29.0,2.098901,True,Digital Payments
33,Belgium,2017,% of population with ID (all),99.0,5.945055,True,Digital Payments


In [315]:
df['% of UP that is Female'] = df['% of UP that is Female'].replace('n/a',np.nan)

KeyError: '% of UP that is Female'

In [316]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['% of UP that is Female'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

KeyError: '% of UP that is Female'

In [317]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population with ID (female),15.0,1.329670,True,Digital Payments
7,Albania,2017,% of population with ID (female),39.0,2.648352,True,Digital Payments
10,Arab world,2017,% of population with ID (female),37.0,2.538462,True,Digital Payments
13,United Arab Emirates,2017,% of population with ID (female),87.0,5.285714,True,Digital Payments
16,Argentina,2017,% of population with ID (female),48.0,3.142857,True,Digital Payments
...,...,...,...,...,...,...,...
479,World,2017,% of population with ID (female),67.0,4.186813,True,Digital Payments
482,Kosovo,2017,% of population with ID (female),52.0,3.362637,True,Digital Payments
487,South Africa,2017,% of population with ID (female),67.0,4.186813,True,Digital Payments
490,Zambia,2017,% of population with ID (female),36.0,2.483516,True,Digital Payments


In [318]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 12. can ID be used for transactions



In [319]:
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

can ID be used for transactions
id4d_services
Digital Legal Identity


In [320]:
df.head(20)

Unnamed: 0,ID,Economy,Region,Income,Code,Inclusion Criteria,National ID,NID Issued at Birth,Mandatory NID age,Digitized ID system,...,URL,NID card/number name,NID cost,Primary CR entity,CR Gov Entity URL,CR entity category,Mandatory Birth Registration Period,Birth Registration cost,data_country,data_year
0,1.0,Afghanistan,SAS,LIC,AFG,INCLUDE,1,1.0,0,1,...,http://mcit.gov.af/en/page/7081,Tazkira / eNID,Afs 30-35 (US$ 0.5),Ministry of Interior Affairs,"http://moi.gov.af/en/page/7180 , http://moi.go...",2.0,6 m,free,,
1,2.0,Albania,ECS,UMC,ALB,INCLUDE,1,0.0,16,1,...,http://www.punetebrendshme.gov.al,Letërnjoftimi / Albanian Identity Card,10 euros,"Vital Statistics Offices , Ministry of Interio...",http://www.punetebrendshme.gov.al/,5.0,60 d,100 Leke,,
2,3.0,Algeria,MEA,UMC,DZA,INCLUDE,1,0.0,18,1,...,http://www.interieur.gov.dz/,Carte Nationale d'Identité Biométrique Electro...,free,Ministry of the Interior and Local Governments,http://www.interieur.gov.dz,2.0,5 - 60 d,free,,
3,4.0,Andorra,ECS,HIC,AND,EXCLUDE,0,,-,-,...,,-,-,Civil Registry Office,http://www.registrecivil.ad,1.0,15 d,free,,
4,5.0,Angola,SSF,LMC,AGO,INCLUDE,1,0.0,10,1,...,http://www.minjusdh.gov.ao/VerPrestadorServico...,Bilhete de Identidade (National ID card),15 kwanza,Direcção Nacional dos Registos e do Notariado ...,http://www.minjusdh.gov.ao,1.0,5 d,free,,
5,6.0,Antigua and Barbuda,LCN,HIC,ATG,INCLUDE,0,,-,-,...,,-,-,"Civil Registry, Ministry of Justice and Legal ...",http://www.legalaffairs.gov.ag/,1.0,30 d,EC 10,,
6,7.0,Argentina,LCN,UMC,ARG,INCLUDE,1,1.0,0,1,...,http://www.nuevodni.gov.ar/inicio/index.php,DNI (Documento Nacional de Identidad) / SIBIOS...,free,"Regional Civil Registries, Provincial Authorit...",http://www.mininterior.gov.ar/renaper/renaper.php,6.0,40 d,free,,
7,8.0,Armenia,ECS,LMC,ARM,INCLUDE,1,1.0,16,1,...,https://www.ekeng.am/hy/,National ID Card / National Passport,free,"Civil Registry Office, RA Ministry of Justice",http://www.moj.am/services/civil_registry/item...,1.0,1 y,free,,
8,9.0,Australia,EAS,HIC,AUS,EXCLUDE,0,,-,-,...,,-,-,"Registrar-General, Departments of Justice",http://www.australia.gov.au/topics/law-and-jus...,1.0,6 m,free,,
9,10.0,Austria,ECS,HIC,AUT,EXCLUDE,1,0.0,0,1,...,https://www.help.gv.at/Portal.Node/hlpd/public...,Identitätsausweis / Personalausweis (Austrian ...,61.5 EUR,"Register Office, Baby-Point",https://www.help.gv.at/Portal.Node/hlpd/public...,2.0,1 m,free,,


In [321]:
df['Digitized ID system']= df['Digitized ID system'].replace('-',np.nan)
df['Digitized ID system']= df['Digitized ID system'].astype(float)

In [322]:
df.head(15)

Unnamed: 0,ID,Economy,Region,Income,Code,Inclusion Criteria,National ID,NID Issued at Birth,Mandatory NID age,Digitized ID system,...,URL,NID card/number name,NID cost,Primary CR entity,CR Gov Entity URL,CR entity category,Mandatory Birth Registration Period,Birth Registration cost,data_country,data_year
0,1.0,Afghanistan,SAS,LIC,AFG,INCLUDE,1,1.0,0,1.0,...,http://mcit.gov.af/en/page/7081,Tazkira / eNID,Afs 30-35 (US$ 0.5),Ministry of Interior Affairs,"http://moi.gov.af/en/page/7180 , http://moi.go...",2.0,6 m,free,,
1,2.0,Albania,ECS,UMC,ALB,INCLUDE,1,0.0,16,1.0,...,http://www.punetebrendshme.gov.al,Letërnjoftimi / Albanian Identity Card,10 euros,"Vital Statistics Offices , Ministry of Interio...",http://www.punetebrendshme.gov.al/,5.0,60 d,100 Leke,,
2,3.0,Algeria,MEA,UMC,DZA,INCLUDE,1,0.0,18,1.0,...,http://www.interieur.gov.dz/,Carte Nationale d'Identité Biométrique Electro...,free,Ministry of the Interior and Local Governments,http://www.interieur.gov.dz,2.0,5 - 60 d,free,,
3,4.0,Andorra,ECS,HIC,AND,EXCLUDE,0,,-,,...,,-,-,Civil Registry Office,http://www.registrecivil.ad,1.0,15 d,free,,
4,5.0,Angola,SSF,LMC,AGO,INCLUDE,1,0.0,10,1.0,...,http://www.minjusdh.gov.ao/VerPrestadorServico...,Bilhete de Identidade (National ID card),15 kwanza,Direcção Nacional dos Registos e do Notariado ...,http://www.minjusdh.gov.ao,1.0,5 d,free,,
5,6.0,Antigua and Barbuda,LCN,HIC,ATG,INCLUDE,0,,-,,...,,-,-,"Civil Registry, Ministry of Justice and Legal ...",http://www.legalaffairs.gov.ag/,1.0,30 d,EC 10,,
6,7.0,Argentina,LCN,UMC,ARG,INCLUDE,1,1.0,0,1.0,...,http://www.nuevodni.gov.ar/inicio/index.php,DNI (Documento Nacional de Identidad) / SIBIOS...,free,"Regional Civil Registries, Provincial Authorit...",http://www.mininterior.gov.ar/renaper/renaper.php,6.0,40 d,free,,
7,8.0,Armenia,ECS,LMC,ARM,INCLUDE,1,1.0,16,1.0,...,https://www.ekeng.am/hy/,National ID Card / National Passport,free,"Civil Registry Office, RA Ministry of Justice",http://www.moj.am/services/civil_registry/item...,1.0,1 y,free,,
8,9.0,Australia,EAS,HIC,AUS,EXCLUDE,0,,-,,...,,-,-,"Registrar-General, Departments of Justice",http://www.australia.gov.au/topics/law-and-jus...,1.0,6 m,free,,
9,10.0,Austria,ECS,HIC,AUT,EXCLUDE,1,0.0,0,1.0,...,https://www.help.gv.at/Portal.Node/hlpd/public...,Identitätsausweis / Personalausweis (Austrian ...,61.5 EUR,"Register Office, Baby-Point",https://www.help.gv.at/Portal.Node/hlpd/public...,2.0,1 m,free,,


In [323]:
# create standard columns
df.rename(columns={'Economy':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Year'] = 2018
df['Indicator'] = indicator
df['data_col'] = df['Digitized ID system']
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [324]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Afghanistan,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
1,Albania,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
2,Algeria,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
3,Andorra,2018,can ID be used for transactions,,,True,Digital Legal Identity
4,Angola,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
...,...,...,...,...,...,...,...
194,"Venezuela, RB",2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
195,Vietnam,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
196,"Yemen, Rep.",2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity
197,Zambia,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity


In [325]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 13. Is personal data siloed


In [326]:
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Must limit the database to the first 206 rows, the remaining rows do not contain any useful information
df = df.iloc[0:205,:]

# Drop the superfluous rows by dropping na
df = df[df['#'].notna()]

# Must convert data in the DPL column into float
df['DPL'] = df['DPL'].astype(float, errors = 'ignore')

subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Is personal data siloed
Egov_strategy
Digital Legal Identity


In [327]:
df.head(104)

Unnamed: 0,#,Flag,Code,Cnum,Economy,Level,Population,GNI,GNIPC,e-Government,...,NGTI-1,NGTI-2,NGTI-3,NGTI-4,GTI-1,GTI-2,GTI-3,GTI-4,data_country,data_year
0,1,,AFG,4.0,Afghanistan,LIC,38928,20726,540,https://mcit.gov.af/node/6938,...,0.69,0.52,0.31,0.62,-0.09,-0.34,0.02,-0.12,,
1,2,,ALB,8.0,Albania,UMIC,2878,14949,5240,https://e-albania.al/,...,0.81,0.78,0.60,0.77,0.29,0.62,0.74,0.26,,
2,3,,DZA,12.0,Algeria,LMIC,43851,170722,3970,https://www.mpttn.gov.dz/ar/content/%D8%A7%D9%...,...,0.73,0.46,0.02,0.60,0.06,-0.56,-0.68,-0.15,,
3,4,,ADO,20.0,Andorra,HIC,77,3154,40886,http://www.govern.ad,...,0.63,0.60,0.05,0.13,-0.28,-0.06,-0.62,-1.32,,
4,5,,AGO,24.0,Angola,LMIC,32866,97005,3050,http://www.governo.gov.ao,...,0.68,0.69,0.21,0.61,-0.12,0.27,-0.23,-0.13,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,98,,LSO,426.0,Lesotho,LMIC,2142,2901,1360,http://www.gov.ls,...,,,,,,,,,,
104,99,,LBR,430.0,Liberia,LIC,5058,2852,580,http://www.emansion.gov.lr,...,0.69,0.42,0.13,0.50,-0.08,-0.69,-0.42,-0.39,,
105,100,,LBY,434.0,Libya,UMIC,6871,51757,7640,http://www.pm.gov.ly,...,0.26,0.31,0.01,0.50,-1.49,-1.09,-0.71,-0.42,,
106,101,,LIE,438.0,Liechtenstein,HIC,38,4160,116430,http://regierung.li,...,0.30,0.48,0.07,0.87,-1.35,-0.49,-0.57,0.51,,


In [328]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,205]
df['Country Name'] = df['Economy']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [329]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Afghanistan,2020,Is personal data siloed,-0.04,1.0,True,Digital Legal Identity
1,Albania,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
2,Algeria,2020,Is personal data siloed,-0.04,1.0,True,Digital Legal Identity
3,Andorra,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
4,Angola,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
5,Antigua and Barbuda,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
6,Argentina,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
7,Armenia,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
8,Australia,2020,Is personal data siloed,0.02,6.0,True,Digital Legal Identity
9,Austria,2020,Is personal data siloed,,,True,Digital Legal Identity


In [330]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 14. Open data index

In [331]:
bnames

Unnamed: 0,Indicator,check,Data Source,Data Link,Index,Filename,Sub-Pillar
163,Digital payments penetration,Foundations,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,True,digital_payments_penetration,Digital Payments
164,% of population with digital finance account,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
165,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
166,Made or received digital payments in the past ...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
169,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
170,Used a mobile phone or the internet to check a...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
171,\n% Making online transactions - Men,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
172,% Making online transactions - Women,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
173,\n% of population with digital finance account...,Foundations,World Bank: Global Findex database,https://datacatalog.worldbank.org,False,population_digital_financial_services,Digital Payments
178,% of population with ID (all),Foundations,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,False,id4d_nid,Digital Legal Identity


In [332]:
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Open data index
open_data_idx
Data Exchange


In [333]:
df.head(15)

Unnamed: 0,id,site,name,slug,region,continent,rank,score
0,tw,global,Taiwan,taiwan,,,1,90
1,au,global,Australia,australia,,,2,79
2,gb,global,Great Britain,united_kingdom,,,2,79
3,fr,global,France,france,,,4,70
4,fi,global,Finland,finland,,,5,69
5,ca,global,Canada,canada,,,5,69
6,no,global,Norway,norway,,,5,69
7,br,global,Brazil,brazil,,,8,68
8,nz,global,New Zealand,new_zealand,,,8,68
9,nir,global,Northern Ireland,northern_ireland,,,10,67


In [334]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['score'] 
df['Country Name'] = df['name']
df['Year'] = 2016
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [335]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Taiwan,2016,Open data index,90,6.0,True,Data Exchange
1,Australia,2016,Open data index,79,5.382022,True,Data Exchange
2,Great Britain,2016,Open data index,79,5.382022,True,Data Exchange
3,France,2016,Open data index,70,4.876404,True,Data Exchange
4,Finland,2016,Open data index,69,4.820225,True,Data Exchange
5,Canada,2016,Open data index,69,4.820225,True,Data Exchange
6,Norway,2016,Open data index,69,4.820225,True,Data Exchange
7,Brazil,2016,Open data index,68,4.764045,True,Data Exchange
8,New Zealand,2016,Open data index,68,4.764045,True,Data Exchange
9,Northern Ireland,2016,Open data index,67,4.707865,True,Data Exchange


In [336]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

### Score Aggregating

In [337]:
import os


In [338]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('foundations')]

In [339]:
scores

['foundations_% Making online transactions - Women_scores.csv',
 'foundations_% of population with digital finance account_scores.csv',
 'foundations_% of population with ID (all)_scores.csv',
 'foundations_% of population with ID (female)_scores.csv',
 'foundations_can ID be used for transactions_scores.csv',
 'foundations_Digital payments penetration_scores.csv',
 'foundations_Financial institution account_scores.csv',
 'foundations_Is personal data siloed_scores.csv',
 'foundations_Made or received digital payments in the past year (% age 15+) - all_scores.csv',
 'foundations_Made or received digital payments in the past year (% age 15+) - female_scores.csv',
 'foundations_Making online transactions - Men_scores.csv',
 'foundations_Open data index_scores.csv',
 'foundations_Used a mobile phone or the internet to check account balance in the past year (% age 15+) - all_scores.csv',
 'foundations_Used a mobile phone or the internet to check account balance in the past year (% age 15+)

In [340]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [341]:
df

Unnamed: 0.1,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar,Unnamed: 0
0,Afghanistan,2017,% Making online transactions - Women,0.0,1.000000,True,Digital Payments,
1,Albania,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments,
2,Arab world,2017,% Making online transactions - Women,6.0,1.337079,True,Digital Payments,
3,United Arab Emirates,2017,% Making online transactions - Women,54.0,4.033708,True,Digital Payments,
4,Argentina,2017,% Making online transactions - Women,16.0,1.898876,True,Digital Payments,
...,...,...,...,...,...,...,...,...
161,World,2017,Used a mobile phone or the internet to check a...,23.0,2.352941,True,Digital Payments,
162,Kosovo,2017,Used a mobile phone or the internet to check a...,4.0,1.235294,True,Digital Payments,
163,South Africa,2017,Used a mobile phone or the internet to check a...,18.0,2.058824,True,Digital Payments,
164,Zambia,2017,Used a mobile phone or the internet to check a...,25.0,2.470588,True,Digital Payments,


In [342]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [343]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2168 entries, 0 to 2167
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country Name      2168 non-null   object 
 1   Year              2168 non-null   int64  
 2   Indicator         2168 non-null   object 
 3   data_col          2140 non-null   float64
 4   new_rank_score    2168 non-null   float64
 5   higher_is_better  2168 non-null   bool   
 6   Sub-Pillar        2168 non-null   object 
 7   Unnamed: 0        166 non-null    float64
dtypes: bool(1), float64(3), int64(1), object(3)
memory usage: 120.8+ KB


In [344]:
df.head(15)

Unnamed: 0.1,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar,Unnamed: 0
0,-,2020,Is personal data siloed,,0.0,True,Digital Legal Identity,
1,0.03,2020,Is personal data siloed,,0.0,True,Digital Legal Identity,
2,Afghanistan,2017,% Making online transactions - Women,0.0,1.0,True,Digital Payments,
3,Afghanistan,2017,Used a mobile phone or the internet to check a...,1.0,1.058824,True,Digital Payments,
4,Afghanistan,2018,can ID be used for transactions,1.0,6.0,True,Digital Legal Identity,
5,Afghanistan,2017,Made or received digital payments in the past ...,4.0,1.0,True,Digital Payments,
6,Afghanistan,2017,% of population with ID (all),15.0,1.32967,True,Digital Payments,
7,Afghanistan,2017,Made or received digital payments in the past ...,11.0,1.217391,True,Digital Payments,
8,Afghanistan,2017,\n% of population with digital finance account...,15.0,1.32967,True,Digital Payments,2.0
9,Afghanistan,2017,\n% Making online transactions - Men,1.0,1.0,True,Digital Payments,


In [345]:
df.describe()

Unnamed: 0.1,Year,data_col,new_rank_score,Unnamed: 0
count,2168.0,2140.0,2168.0,166.0
mean,2017.325185,36.915617,3.47068,249.325301
std,0.927393,32.110975,1.798547,140.305563
min,2016.0,-0.04,0.0,2.0
25%,2017.0,6.0,1.842697,130.75
50%,2017.0,30.0,3.158385,251.5
75%,2017.0,64.0,5.325843,367.0
max,2020.0,100.0,6.0,493.0


In [346]:
# checking country names
sorted(df['Country Name'].unique().tolist())

['-',
 '0.03',
 'Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Arab world',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahamas, The',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Chinese Taipei',
 'Colombia',
 'Comoros',
 'Congo, Dem. Rep.',
 'Congo, Rep.',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czech Republic',
 "Côte d'Ivoire",
 'Denmark',
 'Developing',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'East Asia & Pacific',
 'East Asia & Pacific (excluding high income)',
 'Ecuador',
 'Egypt',
 'Egypt, Arab Rep.',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini',
 'Eth

In [347]:
# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [348]:
# checking country names
sorted(df['Country Name'].unique().tolist())

['-',
 '0.03',
 'Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Arab world',
 'Argentina',
 'Armenia',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahamas, The',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Chinese Taipei',
 'Colombia',
 'Comoros',
 'Congo, Dem. Rep.',
 'Congo, Rep.',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czech Republic',
 "Côte d'Ivoire",
 'Denmark',
 'Developing',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'East Asia & Pacific',
 'East Asia & Pacific (excluding high income)',
 'Ecuador',
 'Egypt',
 'Egypt, Arab Rep.',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini',
 'Eth

In [349]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [350]:
agg_df.columns = ['agg_score', 'count_source' ]

In [351]:
max_number_sources = agg_df.describe()['count_source']['max']

In [352]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [353]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [354]:
agg_df.head(25)

Unnamed: 0_level_0,agg_score,count_source,agg_score_wt
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Liechtenstein,6.0,2,0.857143
São Tomé and Principe,6.0,2,0.857143
Seychelles,6.0,2,0.857143
Cuba,6.0,2,0.857143
"Gambia, The",6.0,1,0.428571
Gambia,6.0,1,0.428571
Cabo Verde,6.0,2,0.857143
Taiwan,6.0,1,0.428571
St. Kitts and Nevis,6.0,2,0.857143
San Marino,6.0,2,0.857143


In [355]:
agg_df.to_csv('../pillar_scores/foundation_scores_v0.csv')

In [356]:
### Score Aggregating by Subpillars

In [357]:
df.insert(0,'Pillar','Foundations')
df

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [358]:
sub_df = df.groupby(['Pillar','Sub-Pillar','Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [359]:
sub_df.columns = ['agg_score', 'count_source' ]

In [360]:
max_number_sources = sub_df.describe()['count_source']['max']

In [361]:
sub_df['agg_score_wt'] = sub_df['agg_score']*(sub_df['count_source']/max_number_sources)

In [362]:
sub_df.to_csv('../subpillar_score/foundations_scores_subpillar_v0.csv')

### Sources Generation

In [363]:
#Get all countries from Countries.xlsx
countries = pd.read_excel('../../data/Countries.xlsx')
col_names = ['Country or Area']
countries = countries[col_names]
countries.rename(columns = {'Country or Area': 'Country Name'}, inplace = True)

In [364]:
#Get all indicators from names dataframe retrieve at the begining of the script
bnames=bnames[['check','Sub-Pillar','Indicator','Data Source','Data Link']]
bnames.rename(columns = {'check': 'Pillar'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [365]:
#Do a nice cross join so that we have combination of all countries vs all indicators
sources = countries.merge(bnames, how='cross')
sources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link
0,Algeria,Foundations,Digital Payments,Digital payments penetration,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org
1,Algeria,Foundations,Digital Payments,% of population with digital finance account,World Bank: Global Findex database,https://datacatalog.worldbank.org
2,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org
3,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org
4,Algeria,Foundations,Digital Payments,Used a mobile phone or the internet to check a...,World Bank: Global Findex database,https://datacatalog.worldbank.org
...,...,...,...,...,...,...
3481,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (all),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org
3482,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (female),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org
3483,Wallis and Futuna Islands,Foundations,Digital Legal Identity,can ID be used for transactions,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org
3484,Wallis and Futuna Islands,Foundations,Digital Legal Identity,Is personal data siloed,World Bank: GovTech Dataset,https://datacatalog.worldbank.org


In [366]:
#Make copy of a scores dataframe and add the column available, with value of 1 (string)
#denoting all the country/indicator combinations that have value
dfsources = df[['Country Name','Pillar','Sub-Pillar','Indicator']].copy()
dfsources['Available'] = '1'

In [367]:
#Merge (left join) sources (all country indicator combinations)
#with those in copied scores dataframe
#resulting in a sources dataframe with values of 1 in Available column where there is a value for country/indicator
#and there is a NaN where there's not. 
#subsequently replace NaN with 0 (string)
sources = sources.merge(dfsources, how='left', on=['Country Name','Pillar','Sub-Pillar','Indicator'])
sources['Available'] = sources['Available'].fillna('0')
sources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link,Available
0,Algeria,Foundations,Digital Payments,Digital payments penetration,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,0
1,Algeria,Foundations,Digital Payments,% of population with digital finance account,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
2,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
3,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
4,Algeria,Foundations,Digital Payments,Used a mobile phone or the internet to check a...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
...,...,...,...,...,...,...,...
3481,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (all),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3482,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (female),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3483,Wallis and Futuna Islands,Foundations,Digital Legal Identity,can ID be used for transactions,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3484,Wallis and Futuna Islands,Foundations,Digital Legal Identity,Is personal data siloed,World Bank: GovTech Dataset,https://datacatalog.worldbank.org,0


In [368]:
#If sources.csv exists, get the contents, remove everyhing from this pillar, append prepared sources, save csv.
#if sources.csv does not exist, create new file from sources.
from os.path import exists

if exists('../../dashboard/Sources.csv') :
    CurrentSources = pd.read_csv('../../dashboard/Sources.csv', dtype=str)
    CurrentSources = CurrentSources[['Country Name','Pillar','Sub-Pillar','Indicator','Data Source','Data Link','Available']]
    CurrentSources.loc[CurrentSources['Pillar'] != 'Foundations']
    CurrentSources = CurrentSources.append(sources)
else :
    CurrentSources = sources
CurrentSources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link,Available
0,Algeria,Strategy,Ambition,SDG Index,Sustainable Development Report,https://www.sdgindex.org,1
1,Egypt,Strategy,Ambition,SDG Index,Sustainable Development Report,https://www.sdgindex.org,0
2,Libya,Strategy,Ambition,SDG Index,Sustainable Development Report,https://www.sdgindex.org,1
3,Morocco,Strategy,Ambition,SDG Index,Sustainable Development Report,https://www.sdgindex.org,1
4,Sudan,Strategy,Ambition,SDG Index,Sustainable Development Report,https://www.sdgindex.org,1
...,...,...,...,...,...,...,...
3481,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (all),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3482,Wallis and Futuna Islands,Foundations,Digital Legal Identity,% of population with ID (female),World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3483,Wallis and Futuna Islands,Foundations,Digital Legal Identity,can ID be used for transactions,World Bank: Identification For Development (ID4D),https://datacatalog.worldbank.org,0
3484,Wallis and Futuna Islands,Foundations,Digital Legal Identity,Is personal data siloed,World Bank: GovTech Dataset,https://datacatalog.worldbank.org,0


In [369]:
CurrentSources.to_csv('../../dashboard/Sources.csv', index=False)