In [1]:
import pandas as pd
import numpy as np

In [2]:
### Get all the pillar names from the excel

In [3]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [4]:
col_names = ['Indicator','check', 'Data Source','Index','Filename','Sub-Pillar']

In [5]:
names = names[col_names]

In [6]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Index,Filename,Sub-Pillar
0,Countries,,United Nations,False,Countries,
1,"Database of Global Administrative Areas (GADM,...",,GADM maps and data,False,,
2,High Resolution Population Density Maps + Demo...,,Facebook,False,,
3,population density vs openstreetmap object den...,,Kontur,False,,
4,Population Density,Infrastructure,World Bank,False,population_density,Connectivity Technology


In [7]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [8]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,16,25
Foundations,8,13
Government,9,15
Infrastructure,39,48
People,34,47
Regulation,5,7
Strategy,1,1


In [9]:
### People

In [10]:
bnames = names[(names.check=='People')&(~names.Filename.isna())]#&(names.Index==False)]
bnames

Unnamed: 0,Indicator,check,Data Source,Index,Filename,Sub-Pillar
99,Human Capital Index (HCI),People,DESA,True,e_government_index,Digital Literacy Skills
100,% of population using internet (all),People,ITU,False,ITU_database,Digital Literacy Skills
101,% of population using internet (female),People,ITU,False,ITU_database,Digital Literacy Skills
102,% of population using internet (male),People,ITU,False,ITU_database,Digital Literacy Skills
103,SDG 4.4 Digital literacy data,People,UNESCO,False,SDG_digital_literacy_data,Digital Literacy Skills
104,UNDP Human Development Index (HDI),People,UNDP,True,undp_human_developmnt,Culture
105,Facebook Social Connectedness Index,People,Facebook,True,fb_social_connectedness,Culture
106,Share of individuals using the Internet to int...,People,OECD,False,population_interacting_public_officials,Culture
107,Level of satisfaction for online public servic...,People,Boston Consulting Group/SalesForce,False,digital_public_service_use,Culture
108,Number of mobile apps available in national la...,People,GSMA Mobile Connectivity Index,False,apps_in_national_language,Culture


In [11]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()
subpillars = bnames['Sub-Pillar'].unique()

In [12]:
# get all file names
bfiles = bnames.Filename.unique()

In [13]:
bfiles

array(['e_government_index', 'ITU_database', 'SDG_digital_literacy_data',
       'undp_human_developmnt', 'fb_social_connectedness',
       'population_interacting_public_officials',
       'digital_public_service_use', 'apps_in_national_language',
       'time_spent_online', 'happiness_score', 'cryptocurrency_adoption',
       'not_buying_online_concern_about_returning',
       'not_buying_online_concern_about_security',
       'ewaste_per_inhabitant', 'automation_led_unemployment',
       'cyberbullying_rate', 'global_wellbeing_initiative',
       'financial_inclusiveness', 'individuals_buying_online_frequency',
       'FB_users', 'gender_gaps', 'population_digital_financial_services',
       'tax_percent_mobile_ownership', 'population_with_smartphones',
       'Chainalysis_2020_Geography_Cryptocurrency_Report'], dtype=object)

In [14]:
subpillars

array(['Digital Literacy Skills', 'Culture', 'Digital Wellbeing',
       'Usage and ownership'], dtype=object)

In [15]:
# formula for converting scale
def convert_rank(old_value, old_min=1, old_max=7, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [16]:
### 1. Human Capital Index (HCI)

In [17]:
indicators[0]

# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Human Capital Index (HCI)
e_government_index


In [18]:
df.head(10)

Unnamed: 0,Survey Year,Country Name,E-Government Rank,E-Government Index,E-Participation Index,Online Service Index,Human Capital Index,Telecommunication Infrastructure Index
0,2020,Iraq,143,0.436,0.3095,0.3353,0.4358,0.537
1,2020,Ireland,27,0.8433,0.8571,0.7706,0.9494,0.81
2,2020,Israel,30,0.8361,0.7143,0.7471,0.8924,0.8689
3,2020,Italy,37,0.8231,0.8214,0.8294,0.8466,0.7932
4,2020,Jamaica,114,0.5392,0.369,0.3882,0.7142,0.5151
5,2020,Japan,14,0.8989,0.9881,0.9059,0.8684,0.9223
6,2020,Jordan,117,0.5309,0.3333,0.3588,0.68,0.554
7,2020,Kazakhstan,29,0.8375,0.881,0.9235,0.8866,0.7024
8,2020,Kenya,116,0.5326,0.5952,0.6765,0.5812,0.3402
9,2020,Kiribati,145,0.432,0.5595,0.4941,0.6778,0.1241


In [19]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [20]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Human Capital Index'] 
df['Year'] = df['Survey Year']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [21]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Iraq,2020,Human Capital Index (HCI),0.4358,3.179,True,Digital Literacy Skills
1,Ireland,2020,Human Capital Index (HCI),0.9494,5.747,True,Digital Literacy Skills
2,Israel,2020,Human Capital Index (HCI),0.8924,5.462,True,Digital Literacy Skills
3,Italy,2020,Human Capital Index (HCI),0.8466,5.233,True,Digital Literacy Skills
4,Jamaica,2020,Human Capital Index (HCI),0.7142,4.571,True,Digital Literacy Skills
...,...,...,...,...,...,...,...
188,Senegal,2020,Human Capital Index (HCI),0.3332,2.666,True,Digital Literacy Skills
189,Serbia,2020,Human Capital Index (HCI),0.8280,5.140,True,Digital Literacy Skills
190,Seychelles,2020,Human Capital Index (HCI),0.7660,4.830,True,Digital Literacy Skills
191,Singapore,2020,Human Capital Index (HCI),0.8904,5.452,True,Digital Literacy Skills


In [22]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [23]:
### 2. % of population using internet (all)

In [24]:
indicators[1]

# load data
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (all)
ITU_database


In [25]:
df.head(150)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
...,...,...,...,...,...,...,...,...
145,North Macedonia,Europe,MKD,Female mobile phone ownership as a % of total ...,2010.0,,,
146,Norway,Europe,NOR,Female mobile phone ownership as a % of total ...,2010.0,,,
147,Poland,Europe,POL,Female mobile phone ownership as a % of total ...,2010.0,,,
148,Portugal,Europe,PRT,Female mobile phone ownership as a % of total ...,2010.0,,,


In [26]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [27]:
df = df[(df.Year==2019)]
df = df[(df.iloc[:,3] =='Internet users: 25-74 years as a % of all 25-74 years')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [28]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
7750,Angola,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7751,Benin,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7752,Botswana,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7753,Burkina Faso,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7754,Burundi,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
...,...,...,...,...,...,...,...
7940,Suriname,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7941,Trinidad and Tobago,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7942,United States,2019.0,% of population using internet (all),,,True,Digital Literacy Skills
7943,Uruguay,2019.0,% of population using internet (all),84.540984,4.771226,True,Digital Literacy Skills


In [29]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [30]:
### 3. % of population using internet (female)

In [31]:
indicators[2]

# load data
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (female)
ITU_database


In [32]:
df.head(10)

# Must convert the string in the dataset to float

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [33]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [34]:
df = df[(df.iloc[:,3] =='Female Internet users as a % of total female population')]
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [35]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
40307,Cabo Verde,2019.0,% of population using internet (female),60.965017,3.766390,True,Digital Literacy Skills
40308,Côte d'Ivoire,2019.0,% of population using internet (female),32.924445,2.151554,True,Digital Literacy Skills
40309,Kenya,2019.0,% of population using internet (female),20.125122,1.414451,True,Digital Literacy Skills
40310,Lesotho,2019.0,% of population using internet (female),44.922768,2.842529,True,Digital Literacy Skills
40311,Mauritius,2019.0,% of population using internet (female),60.130645,3.718339,True,Digital Literacy Skills
...,...,...,...,...,...,...,...
40388,Mexico,2019.0,% of population using internet (female),68.574653,4.204623,True,Digital Literacy Skills
40389,Panama,2019.0,% of population using internet (female),63.811494,3.930316,True,Digital Literacy Skills
40390,Paraguay,2019.0,% of population using internet (female),69.017230,4.230110,True,Digital Literacy Skills
40391,Peru,2019.0,% of population using internet (female),56.992144,3.537595,True,Digital Literacy Skills


In [36]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [37]:
### 4. % of population using internet (male)

In [38]:
indicators[3]

# load data
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (male)
ITU_database


In [39]:
df.head(10)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [40]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [41]:
df = df[(df.iloc[:,3] =='Male Internet users as a % of total male population')]
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [42]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
41078,Cabo Verde,2019.0,% of population using internet (male),62.921166,3.643039,True,Digital Literacy Skills
41079,Côte d'Ivoire,2019.0,% of population using internet (male),39.808871,2.173867,True,Digital Literacy Skills
41080,Kenya,2019.0,% of population using internet (male),25.071668,1.237071,True,Digital Literacy Skills
41081,Lesotho,2019.0,% of population using internet (male),38.349985,2.081130,True,Digital Literacy Skills
41082,Mauritius,2019.0,% of population using internet (male),63.393093,3.673038,True,Digital Literacy Skills
...,...,...,...,...,...,...,...
41159,Mexico,2019.0,% of population using internet (male),71.740324,4.203644,True,Digital Literacy Skills
41160,Panama,2019.0,% of population using internet (male),63.433835,3.675628,True,Digital Literacy Skills
41161,Paraguay,2019.0,% of population using internet (male),68.006259,3.966282,True,Digital Literacy Skills
41162,Peru,2019.0,% of population using internet (male),62.915703,3.642692,True,Digital Literacy Skills


In [43]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [44]:
### 5. SDG 4.4 Digital literacy data

In [45]:
indicators[4]

# load data
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# The indicators are all jumbled, need to unclutter this

SDG 4.4 Digital literacy data
SDG_digital_literacy_data


In [46]:
df.head(15)

Unnamed: 0,SDG_IND,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
0,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2014,2014,23.31007,,
1,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2016,2016,21.53173,,
2,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2017,2017,21.1488,,
3,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2018,2018,21.84886,,
4,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2014,2014,25.64427,,
5,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2016,2016,22.01463,,
6,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2017,2017,22.69577,,
7,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2018,2018,22.59234,,
8,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,TUR,Turkey,2014,2014,27.72807,,
9,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,TUR,Turkey,2015,2015,25.66142,,


In [47]:
df = df[(df.Time == 2019)]
df.head(15)

Unnamed: 0,SDG_IND,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
12,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,TUR,Turkey,2019,2019,38.1844,,
35,ICTSKILLSOFTWARE,"Proportion of youth and adults who have found,...",THA,Thailand,2019,2019,5.3,,
75,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,PAK,Pakistan,2019,2019,2.7,,
90,ICTSKILLPROGLANG,Proportion of youth and adults who have wrote ...,PRT,Portugal,2019,2019,8.2,,
103,ICTSKILLSOFTWARE,"Proportion of youth and adults who have found,...",FIN,Finland,2019,2019,63.1,,
121,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,FRA,France,2019,2019,54.6,,
126,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,ESP,Spain,2019,2019,54.67742,,
133,ICTSKILLATTACH,Proportion of youth and adults who have sent e...,KAZ,Kazakhstan,2019,2019,51.7,,
143,ICTSKILLPROGLANG,Proportion of youth and adults who have wrote ...,KAZ,Kazakhstan,2019,2019,6.3,,
152,ICTSKILLPROGLANG,Proportion of youth and adults who have wrote ...,BEL,Belgium,2019,2019,4.5,,


In [48]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [49]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']
df['Year'] = df['Time']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [50]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
12,Turkey,2019,SDG 4.4 Digital literacy data,38.18440,2.920344,True,Digital Literacy Skills
35,Thailand,2019,SDG 4.4 Digital literacy data,5.30000,1.257836,True,Digital Literacy Skills
75,Pakistan,2019,SDG 4.4 Digital literacy data,2.70000,1.126390,True,Digital Literacy Skills
90,Portugal,2019,SDG 4.4 Digital literacy data,8.20000,1.404449,True,Digital Literacy Skills
103,Finland,2019,SDG 4.4 Digital literacy data,63.10000,4.179980,True,Digital Literacy Skills
...,...,...,...,...,...,...,...
5381,Slovenia,2019,SDG 4.4 Digital literacy data,71.80000,4.619818,True,Digital Literacy Skills
5394,Bangladesh,2019,SDG 4.4 Digital literacy data,0.40000,1.010111,True,Digital Literacy Skills
5396,Cuba,2019,SDG 4.4 Digital literacy data,26.60000,2.334681,True,Digital Literacy Skills
5397,Oman,2019,SDG 4.4 Digital literacy data,90.91205,5.586049,True,Digital Literacy Skills


In [51]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [52]:
### 6. UNDP Human Development Index (HDI)

In [53]:
indicators[5]

# load data
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

UNDP Human Development Index (HDI) 
undp_human_developmnt


In [54]:
df.head()

Unnamed: 0,HDI rank,Country,Value,Unnamed: 3,(years),(2017 PPP $),data_country,data_year
0,,,2019.0,,2019.0,2019.0,,
1,,VERY HIGH HUMAN DEVELOPMENT,,,,,,
2,1.0,Norway,0.957,,12.89775,66494.25217,,
3,2.0,Ireland,0.955,,12.6663305,68370.58737,,
4,2.0,Switzerland,0.955,,13.38081241,69393.52076,,


In [55]:
# choose only the rows where column HDI rank is numeric

df = df.iloc[0:194,:]
df = df[pd.to_numeric(df['HDI rank'], errors='coerce').notnull()]



In [56]:
df.head(67)

Unnamed: 0,HDI rank,Country,Value,Unnamed: 3,(years),(2017 PPP $),data_country,data_year
2,1.0,Norway,0.957,,12.89775,66494.25217,,
3,2.0,Ireland,0.955,,12.6663305,68370.58737,,
4,2.0,Switzerland,0.955,,13.38081241,69393.52076,,
5,4.0,"Hong Kong, China (SAR)",0.949,,12.27996,62984.76553,,
6,4.0,Iceland,0.949,,12.77278684,54682.38057,,
...,...,...,...,...,...,...,...,...
64,62.0,Malaysia,0.81,,10.37283,27534.09856,,
65,64.0,Kuwait,0.806,,7.275667996,58590.08219,,
66,64.0,Serbia,0.806,,11.19411,17191.66873,,
67,66.0,Mauritius,0.804,,9.54,25266.21195,,


In [57]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Culture


In [58]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['HDI rank']
df['Country Name'] = df['Country']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [59]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Norway,2019,UNDP Human Development Index (HDI),1.0,6.000000,False,Culture
3,Ireland,2019,UNDP Human Development Index (HDI),2.0,5.973404,False,Culture
4,Switzerland,2019,UNDP Human Development Index (HDI),2.0,5.973404,False,Culture
5,"Hong Kong, China (SAR)",2019,UNDP Human Development Index (HDI),4.0,5.920213,False,Culture
6,Iceland,2019,UNDP Human Development Index (HDI),4.0,5.920213,False,Culture
...,...,...,...,...,...,...,...
189,Burundi,2019,UNDP Human Development Index (HDI),185.0,1.106383,False,Culture
190,South Sudan,2019,UNDP Human Development Index (HDI),185.0,1.106383,False,Culture
191,Chad,2019,UNDP Human Development Index (HDI),187.0,1.053191,False,Culture
192,Central African Republic,2019,UNDP Human Development Index (HDI),188.0,1.026596,False,Culture


In [60]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator, index=False))

In [61]:
### 7. Facebook Social Connectedness Index

In [62]:
indicators[6]

# load data
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Facebook Social Connectedness Index
fb_social_connectedness


In [63]:
df.head(25)

Unnamed: 0,user_loc,fr_loc,scaled_sci
0,1001,AE,8729
1,1001,AG,95256
2,1001,AL,3122
3,1001,AM,3470
4,1001,AO,2839
5,1001,AR,3729
6,1001,AT,6977
7,1001,AU,21136
8,1001,AW,27607
9,1001,AZ,1108


In [64]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Culture


In [65]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['scaled_sci'] 
df['Country Name'] = df['fr_loc']
df['Sub-Pillar'] = subpillar
df['Year'] = 2021

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [66]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

# Need to find a way to convert ISO codes to full country names

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,AE,2021,Facebook Social Connectedness Index,8729,1.000044,True,Culture
1,AG,2021,Facebook Social Connectedness Index,95256,1.000476,True,Culture
2,AL,2021,Facebook Social Connectedness Index,3122,1.000016,True,Culture
3,AM,2021,Facebook Social Connectedness Index,3470,1.000017,True,Culture
4,AO,2021,Facebook Social Connectedness Index,2839,1.000014,True,Culture
...,...,...,...,...,...,...,...
597360,YT,2021,Facebook Social Connectedness Index,17279,1.000086,True,Culture
597361,ZA,2021,Facebook Social Connectedness Index,24069,1.000120,True,Culture
597362,ZM,2021,Facebook Social Connectedness Index,22490,1.000112,True,Culture
597363,ZW,2021,Facebook Social Connectedness Index,15945,1.000080,True,Culture


In [67]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [68]:
### 8. Share of individuals using the Internet to interact with officials

In [69]:
indicators[7]

# load data
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Share of individuals using the Internet to interact with public authorities
population_interacting_public_officials


In [70]:
df.head(15)

Unnamed: 0,Country,Indicator,Breakdowns,Time,Unit,PowerCode,Reference Period,Value,Flags
0,Australia,Individuals using the Internet for downloading...,All (individuals aged 16-74),2010,Percentage,Units,,38.11,Difference in methodology
1,Australia,Individuals using the Internet for downloading...,All (individuals aged 16-74),2012,Percentage,Units,,49.96,Difference in methodology
2,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2005,Percentage,Units,,29.194,
3,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2006,Percentage,Units,,32.9733,
4,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2007,Percentage,Units,,27.4741,
5,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2008,Percentage,Units,,51.2252,Break
6,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2009,Percentage,Units,,48.8515,
7,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2010,Percentage,Units,,51.0458,
8,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2011,Percentage,Units,,51.2893,
9,Austria,Individuals using the Internet for visiting or...,All (individuals aged 16-74),2012,Percentage,Units,,52.8274,


In [71]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Culture


In [72]:
# filter most recent year
df = df[(df.Time==2019)]
df = df[(df.Indicator=='Individuals using the Internet for visiting or interacting with public authorities websites - last 12 m (%)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [73]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
16,Austria,2019,Share of individuals using the Internet to int...,69.7355,4.715671,True,Culture
64,Belgium,2019,Share of individuals using the Internet to int...,58.605,4.063974,True,Culture
109,Brazil,2019,Share of individuals using the Internet to int...,34.194129,2.634704,True,Culture
148,Colombia,2019,Share of individuals using the Internet to int...,6.274581,1.0,True,Culture
169,Czech Republic,2019,Share of individuals using the Internet to int...,53.7766,3.781269,True,Culture
216,Denmark,2019,Share of individuals using the Internet to int...,91.6709,6.0,True,Culture
262,Estonia,2019,Share of individuals using the Internet to int...,80.0156,5.317576,True,Culture
310,Finland,2019,Share of individuals using the Internet to int...,87.2954,5.743812,True,Culture
357,France,2019,Share of individuals using the Internet to int...,74.7242,5.007762,True,Culture
397,Germany,2019,Share of individuals using the Internet to int...,59.0977,4.092822,True,Culture


In [74]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [75]:
### 9. Level of satisfaction for online public service

In [76]:
indicators[8]

# load data
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Level of satisfaction for online public services (% of users, by type of interaction and service)
digital_public_service_use


In [77]:
df.head(10)

Unnamed: 0,Country,Net Perception (%)
0,Estonia,67
1,UAE,61
2,Saudi Arabia,59
3,Singapore,54
4,China,53
5,New Zealand,52
6,Netherlands,51
7,Qatar,51
8,Canada,40
9,Denmark,48


In [78]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Culture


In [79]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Net Perception (%)'] 
df['Year'] = 2020
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# Need to replace the % mark 

In [80]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Estonia,2020,Level of satisfaction for online public servic...,67,6.0,True,Culture
1,UAE,2020,Level of satisfaction for online public servic...,61,5.655172,True,Culture
2,Saudi Arabia,2020,Level of satisfaction for online public servic...,59,5.54023,True,Culture
3,Singapore,2020,Level of satisfaction for online public servic...,54,5.252874,True,Culture
4,China,2020,Level of satisfaction for online public servic...,53,5.195402,True,Culture
5,New Zealand,2020,Level of satisfaction for online public servic...,52,5.137931,True,Culture
6,Netherlands,2020,Level of satisfaction for online public servic...,51,5.08046,True,Culture
7,Qatar,2020,Level of satisfaction for online public servic...,51,5.08046,True,Culture
8,Canada,2020,Level of satisfaction for online public servic...,40,4.448276,True,Culture
9,Denmark,2020,Level of satisfaction for online public servic...,48,4.908046,True,Culture


In [81]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [82]:
### 10. Number of mobile apps available in national language

In [83]:
indicators[9]

# load data
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Number of mobile apps available in national language(s)
apps_in_national_language


In [84]:
df.head(15)

Unnamed: 0.1,Unnamed: 0,ISO Code,Country,Region,Year,Number of apps in national language
0,1,AFG,Afghanistan,South Asia,2014,2.444741
1,2,AFG,Afghanistan,South Asia,2015,2.793221
2,3,AFG,Afghanistan,South Asia,2016,2.849881
3,4,AFG,Afghanistan,South Asia,2017,2.913741
4,5,AFG,Afghanistan,South Asia,2018,2.961247
5,6,AFG,Afghanistan,South Asia,2019,3.0
6,7,AGO,Angola,Sub-Saharan Africa,2014,53.333237
7,8,AGO,Angola,Sub-Saharan Africa,2015,55.08091
8,9,AGO,Angola,Sub-Saharan Africa,2016,56.516411
9,10,AGO,Angola,Sub-Saharan Africa,2017,57.061077


In [85]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Culture


In [86]:
# filter most recent year and global value
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Number of apps in national language'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [87]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5,Afghanistan,2019,Number of mobile apps available in national la...,3.000000,1.000000,True,Culture
11,Angola,2019,Number of mobile apps available in national la...,57.978500,3.833943,True,Culture
17,Albania,2019,Number of mobile apps available in national la...,67.286446,4.313734,True,Culture
23,United Arab Emirates,2019,Number of mobile apps available in national la...,78.453438,4.889352,True,Culture
29,Argentina,2019,Number of mobile apps available in national la...,89.724289,5.470324,True,Culture
...,...,...,...,...,...,...,...
995,Samoa,2019,Number of mobile apps available in national la...,58.999996,3.886598,True,Culture
1001,Yemen,2019,Number of mobile apps available in national la...,78.453438,4.889352,True,Culture
1007,South Africa,2019,Number of mobile apps available in national la...,57.271473,3.797499,True,Culture
1013,Zambia,2019,Number of mobile apps available in national la...,16.000000,1.670103,True,Culture


In [88]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [89]:
### 11. Device Addiction (time of use on internet)

In [90]:
indicators[10]

# load data
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Device Addiction (time of use on internet / on devices)
time_spent_online


In [91]:
df.head(15)

# Need to move the row further down

Unnamed: 0,Country,"Average daily time spent using the internet by online users worldwide as of 3rd quarter 2020, by region (in hours.minutes)"
0,Philippines,10.56
1,Brazil,10.08
2,Colombia,10.07
3,South Africa,10.06
4,Argentina,9.39
5,Malaysia,9.17
6,Mexico,9.01
7,Indonesia,8.52
8,Thailand,8.44
9,Taiwan,8.08


In [92]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [93]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Average daily time spent using the internet by online users worldwide as of 3rd quarter 2020, by region (in hours.minutes)'] 
df['Country Name'] = df['Country']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [94]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Philippines,2020,Device Addiction (time of use on internet / on...,10.56,6.0,True,Digital Wellbeing
1,Brazil,2020,Device Addiction (time of use on internet / on...,10.08,5.619651,True,Digital Wellbeing
2,Colombia,2020,Device Addiction (time of use on internet / on...,10.07,5.611727,True,Digital Wellbeing
3,South Africa,2020,Device Addiction (time of use on internet / on...,10.06,5.603803,True,Digital Wellbeing
4,Argentina,2020,Device Addiction (time of use on internet / on...,9.39,5.0729,True,Digital Wellbeing
5,Malaysia,2020,Device Addiction (time of use on internet / on...,9.17,4.898574,True,Digital Wellbeing
6,Mexico,2020,Device Addiction (time of use on internet / on...,9.01,4.771791,True,Digital Wellbeing
7,Indonesia,2020,Device Addiction (time of use on internet / on...,8.52,4.383518,True,Digital Wellbeing
8,Thailand,2020,Device Addiction (time of use on internet / on...,8.44,4.320127,True,Digital Wellbeing
9,Taiwan,2020,Device Addiction (time of use on internet / on...,8.08,4.034865,True,Digital Wellbeing


In [95]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(bf), index=False)

In [96]:
### 12. Gross National Wellbeing

In [97]:
indicators[11]

# load data
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# happiness_score is missing looks like the problem is in a redundant space before it in the filename matching

Gross National Wellbeing
happiness_score


In [98]:
df.head(15)

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,Finland,7.78,100.0
1,2.0,Switzerland,7.69,98.31
2,3.0,Denmark,7.69,98.28
3,4.0,Iceland,7.53,95.13
4,5.0,Norway,7.44,93.35
5,6.0,Netherlands,7.43,93.02
6,7.0,Luxembourg,7.4,92.6
7,8.0,Sweden,7.4,92.49
8,9.0,Ireland,7.25,89.67
9,10.0,Australia,7.23,89.26


In [99]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [100]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE'] 
df['Country Name'] = df['COUNTRY/ECONOMY']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [101]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Finland,2019,Gross National Wellbeing,7.78,6.000000,True,Digital Wellbeing
1,Switzerland,2019,Gross National Wellbeing,7.69,5.911591,True,Digital Wellbeing
2,Denmark,2019,Gross National Wellbeing,7.69,5.911591,True,Digital Wellbeing
3,Iceland,2019,Gross National Wellbeing,7.53,5.754420,True,Digital Wellbeing
4,Norway,2019,Gross National Wellbeing,7.44,5.666012,True,Digital Wellbeing
...,...,...,...,...,...,...,...
129,Zambia,2019,Gross National Wellbeing,3.31,1.609037,True,Digital Wellbeing
130,Rwanda,2019,Gross National Wellbeing,3.27,1.569745,True,Digital Wellbeing
131,India,2019,Gross National Wellbeing,3.25,1.550098,True,Digital Wellbeing
132,Zimbabwe,2019,Gross National Wellbeing,2.69,1.000000,True,Digital Wellbeing


In [102]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [103]:
### 13. % of internet users who own cryptocurrency

In [104]:
indicators[12]

# load data
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of internet users who own cryptocurrency
cryptocurrency_adoption


In [105]:
df.head(15)

Unnamed: 0,Country,Share of respondents who indicated they either owned or used cryptocurrencies in 55 countries worldwide in 2020,Units
0,Nigeria,31.9,in %
1,Vietnam,21.1,in %
2,Philippines,19.8,in %
3,South Africa,17.8,in %
4,Thailand,17.6,in %
5,Peru,16.1,in %
6,Turkey,16.1,in %
7,Colombia,15.3,in %
8,Argentina,14.4,in %
9,Indonesia,13.0,in %


In [106]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Digital Literacy Skills


In [107]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Share of respondents who indicated they either owned or used cryptocurrencies in 55 countries worldwide in 2020'] 
df['Country Name'] = df['Country']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [108]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Nigeria,2020,% of internet users who own cryptocurrency,31.9,6.0,True,Digital Literacy Skills
1,Vietnam,2020,% of internet users who own cryptocurrency,21.1,4.085106,True,Digital Literacy Skills
2,Philippines,2020,% of internet users who own cryptocurrency,19.8,3.85461,True,Digital Literacy Skills
3,South Africa,2020,% of internet users who own cryptocurrency,17.8,3.5,True,Digital Literacy Skills
4,Thailand,2020,% of internet users who own cryptocurrency,17.6,3.464539,True,Digital Literacy Skills
5,Peru,2020,% of internet users who own cryptocurrency,16.1,3.198582,True,Digital Literacy Skills
6,Turkey,2020,% of internet users who own cryptocurrency,16.1,3.198582,True,Digital Literacy Skills
7,Colombia,2020,% of internet users who own cryptocurrency,15.3,3.056738,True,Digital Literacy Skills
8,Argentina,2020,% of internet users who own cryptocurrency,14.4,2.897163,True,Digital Literacy Skills
9,Indonesia,2020,% of internet users who own cryptocurrency,13.0,2.648936,True,Digital Literacy Skills


In [109]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [110]:
### 14. Percentage of individuals not buying online due to concerns about returning products

In [111]:
indicators[13]

# load data
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Percentage of individuals not buying online due to concerns about returning products
not_buying_online_concern_about_returning


In [112]:
df.head(15)

Unnamed: 0,Indicator,Country,Variable,Unit,Scope,Time,Value,Flags
0,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2005,10.3582,
1,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2006,14.6973,
2,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2009,25.081,
3,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2015,17.0493,
4,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2017,15.11467,
5,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2019,16.72749,
6,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2005,7.9119,
7,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2006,13.7518,
8,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2009,24.0688,
9,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2015,12.671,


In [113]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [114]:
# filter most recent year and global value
df = df[(df.Time==2019)]
df = df[(df.Scope =='All individuals (aged 16-74)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [115]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5,Austria,2019,Percentage of individuals not buying online du...,16.72749,4.589811,False,Digital Wellbeing
22,Belgium,2019,Percentage of individuals not buying online du...,9.16428,5.380248,False,Digital Wellbeing
37,Czech Republic,2019,Percentage of individuals not buying online du...,8.857784,5.41228,False,Digital Wellbeing
51,Denmark,2019,Percentage of individuals not buying online du...,13.27322,4.950819,False,Digital Wellbeing
67,Estonia,2019,Percentage of individuals not buying online du...,3.968984,5.923212,False,Digital Wellbeing
121,European Union (28 countries),2019,Percentage of individuals not buying online du...,17.25651,4.534523,False,Digital Wellbeing
135,Finland,2019,Percentage of individuals not buying online du...,46.96071,1.430114,False,Digital Wellbeing
152,France,2019,Percentage of individuals not buying online du...,14.23637,4.85016,False,Digital Wellbeing
167,Germany,2019,Percentage of individuals not buying online du...,16.45007,4.618804,False,Digital Wellbeing
184,Greece,2019,Percentage of individuals not buying online du...,9.599324,5.334781,False,Digital Wellbeing


In [116]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [117]:
### 15. Percentage of individuals not buying online due to payment security concerns

In [118]:
indicators[14]

# load data
indicator = indicators[14]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Percentage of individuals not buying online due to payment security concerns
not_buying_online_concern_about_security


In [119]:
df.head(15)

Unnamed: 0,Indicator,Country,Variable,Unit,Scope,Time,Value,Flags
0,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2009,38.8093,
1,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2015,34.8985,
2,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2017,33.04743,
3,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,All individuals (aged 16-74),2019,31.19888,
4,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2009,38.3401,
5,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2015,34.9687,
6,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2017,16.65675,
7,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 16-24,2019,12.88121,
8,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 55-74,2009,40.5761,
9,Individuals who did not buy online for payment...,Austria,"Individuals who, in the last 12 months, haven'...",Percentage of individuals who ordered goods or...,Individuals aged 55-74,2015,42.602,


In [120]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [121]:
# filter most recent year and global value
df = df[(df.Time==2019)]
df = df[(df.Scope =='All individuals (aged 16-74)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [122]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
3,Austria,2019,Percentage of individuals not buying online du...,31.19888,3.765819,False,Digital Wellbeing
15,Belgium,2019,Percentage of individuals not buying online du...,18.12961,4.853663,False,Digital Wellbeing
27,Czech Republic,2019,Percentage of individuals not buying online du...,9.814267,5.545805,False,Digital Wellbeing
39,Denmark,2019,Percentage of individuals not buying online du...,18.77055,4.800313,False,Digital Wellbeing
51,Estonia,2019,Percentage of individuals not buying online du...,4.357608,6.0,False,Digital Wellbeing
84,European Union (28 countries),2019,Percentage of individuals not buying online du...,24.3086,4.339344,False,Digital Wellbeing
96,Finland,2019,Percentage of individuals not buying online du...,64.42721,1.0,False,Digital Wellbeing
108,France,2019,Percentage of individuals not buying online du...,40.84349,2.963033,False,Digital Wellbeing
120,Germany,2019,Percentage of individuals not buying online du...,25.12184,4.271652,False,Digital Wellbeing
132,Greece,2019,Percentage of individuals not buying online du...,23.22381,4.429638,False,Digital Wellbeing


In [123]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [124]:
### 16. E-waste generated, kilograms per inhabitant

In [125]:
indicators[15]

# load data
indicator = indicators[15]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

E-waste generated, kilograms per inhabitant
ewaste_per_inhabitant


In [126]:
df.head(15)

Unnamed: 0,iso3c,region_id,country_name,income_id,gdp,composition_food_organic_waste_percent,composition_glass_percent,composition_metal_percent,composition_other_percent,composition_paper_cardboard_percent,...,waste_treatment_controlled_landfill_percent,waste_treatment_incineration_percent,waste_treatment_landfill_unspecified_percent,waste_treatment_open_dump_percent,waste_treatment_other_percent,waste_treatment_recycling_percent,waste_treatment_sanitary_landfill_landfill_gas_system_percent,waste_treatment_unaccounted_for_percent,waste_treatment_waterways_marine_percent,where_where_is_this_data_measured
0,ABW,LCN,Aruba,HIC,35563.3125,,,,,,...,,,,,,11.0,,89.0,,
1,AFG,SAS,Afghanistan,LIC,2057.062256,,,,,,...,,,,,,,,,,Other
2,AGO,SSF,Angola,LMC,8036.69043,51.8,6.7,4.4,11.5,11.9,...,,,,,,,,,,
3,ALB,ECS,Albania,UMC,13724.058594,51.4,4.5,4.8,15.21,9.9,...,,,,,,,,,,Some disposal sites
4,AND,ECS,Andorra,HIC,43711.800781,31.2,8.2,2.6,11.6,35.1,...,,52.1,,,,,,47.9,,
5,ARE,MEA,United Arab Emirates,HIC,67119.132812,39.0,4.0,3.0,10.0,25.0,...,,,9.0,62.0,,20.0,,,,
6,ARG,LCN,Argentina,HIC,23550.099609,38.74,3.16,1.84,15.36,13.96,...,8.9,,,22.6,,6.0,62.5,,,Other
7,ARM,ECS,Armenia,UMC,11019.838867,57.0,3.2,3.4,17.4,6.7,...,,,,100.0,,,,,,Other
8,ASM,EAS,American Samoa,UMC,11113.442383,19.7,3.4,7.9,25.6,26.4,...,,,,,,,,,,
9,ATG,LCN,Antigua and Barbuda,HIC,17965.501953,46.0,7.0,7.0,12.0,15.0,...,98.68,,,,,,,1.14,0.1,Disposal Site


In [127]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [128]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Use special_waste_e_waste_tons_year times one thousand and divde by total population
df['data_col'] = df['special_waste_e_waste_tons_year']*1000/df['population_population_number_of_people'] 
df['Year'] = 2021
df['Country Name'] = df['country_name']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [129]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Aruba,2021,"E-waste generated, kilograms per inhabitant",,,True,Digital Wellbeing
1,Afghanistan,2021,"E-waste generated, kilograms per inhabitant",0.577100,1.040883,True,Digital Wellbeing
2,Angola,2021,"E-waste generated, kilograms per inhabitant",3.665901,1.261006,True,Digital Wellbeing
3,Albania,2021,"E-waste generated, kilograms per inhabitant",7.007240,1.499126,True,Digital Wellbeing
4,Andorra,2021,"E-waste generated, kilograms per inhabitant",,,True,Digital Wellbeing
...,...,...,...,...,...,...,...
212,Kosovo,2021,"E-waste generated, kilograms per inhabitant",0.012765,1.000666,True,Digital Wellbeing
213,"Yemen, Rep.",2021,"E-waste generated, kilograms per inhabitant",1.522610,1.108265,True,Digital Wellbeing
214,South Africa,2021,"E-waste generated, kilograms per inhabitant",6.205375,1.441981,True,Digital Wellbeing
215,Zambia,2021,"E-waste generated, kilograms per inhabitant",1.051543,1.074694,True,Digital Wellbeing


In [130]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [131]:
### 17. Automation-led unemployment

In [132]:
indicators[16]

# load data
indicator = indicators[16]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Automation-led unemployment
automation_led_unemployment


In [133]:
df.head(15)

Unnamed: 0,Country,No. of employees potentially automable (millions),Total employees (millions),Potential Rate of Automation (%)
0,Japan,35.6,63.9,55.71
1,Thailand,21.0,38.4,54.69
2,Senegal,2.2,4.07,54.0
3,Colombia,9.3,17.5,53.14
4,Peru,6.9,13.0,53.08
5,Taiwan,5.2,9.8,53.06
6,Kenya,7.4,14.2,52.11
7,South Korea,12.5,24.0,52.08
8,Sweden,2.1,4.04,52.0
9,Costa Rica,1.1,2.12,52.0


In [134]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [135]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Potential Rate of Automation (%)'] 
df['Year'] = 2018
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [136]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Japan,2018,Automation-led unemployment,55.71,6.0,True,Digital Wellbeing
1,Thailand,2018,Automation-led unemployment,54.69,5.656797,True,Digital Wellbeing
2,Senegal,2018,Automation-led unemployment,54.0,5.42463,True,Digital Wellbeing
3,Colombia,2018,Automation-led unemployment,53.14,5.135262,True,Digital Wellbeing
4,Peru,2018,Automation-led unemployment,53.08,5.115074,True,Digital Wellbeing
5,Taiwan,2018,Automation-led unemployment,53.06,5.108345,True,Digital Wellbeing
6,Kenya,2018,Automation-led unemployment,52.11,4.788694,True,Digital Wellbeing
7,South Korea,2018,Automation-led unemployment,52.08,4.7786,True,Digital Wellbeing
8,Sweden,2018,Automation-led unemployment,52.0,4.751682,True,Digital Wellbeing
9,Costa Rica,2018,Automation-led unemployment,52.0,4.751682,True,Digital Wellbeing


In [137]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [138]:
### 18. Cyberbullying

In [139]:
indicators[17]

# load data
indicator = indicators[17]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Cyberbullying
cyberbullying_rate


In [140]:
df.head(15)

# Need to move the top row down further

Unnamed: 0,2011,2016,2018,Country
0,32,32,37,India
1,20,19,29,Brazil
2,15,34,26,United States
3,12,13,25,Belgium
4,10,25,26,South Africa
5,--,--,23,Malaysia
6,14,20,23,Sweden
7,18,17,20,Canada
8,5,14,20,Turkey
9,18,17,19,Saudi Arabia


In [141]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [142]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2018'] 
df['Year'] = 2018
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [143]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,India,2018,Cyberbullying,37,1.0,True,Digital Wellbeing
1,Brazil,2018,Cyberbullying,29,2.111111,True,Digital Wellbeing
2,United States,2018,Cyberbullying,26,2.527778,True,Digital Wellbeing
3,Belgium,2018,Cyberbullying,25,2.666667,True,Digital Wellbeing
4,South Africa,2018,Cyberbullying,26,2.527778,True,Digital Wellbeing
5,Malaysia,2018,Cyberbullying,23,2.944444,True,Digital Wellbeing
6,Sweden,2018,Cyberbullying,23,2.944444,True,Digital Wellbeing
7,Canada,2018,Cyberbullying,20,3.361111,True,Digital Wellbeing
8,Turkey,2018,Cyberbullying,20,3.361111,True,Digital Wellbeing
9,Saudi Arabia,2018,Cyberbullying,19,3.5,True,Digital Wellbeing


In [144]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [145]:
### 19. Global Wellbeing Initiative

In [146]:
indicators[18]

# load data
indicator = indicators[18]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Strange the global_wellbeing_initiative is clearly in the data manifest and the link works well 
# but it has not been moved to the processed folder

Global Wellbeing Initiative (World Happiness Index)
global_wellbeing_initiative


In [147]:
df.head(15)

Unnamed: 0,Country name,Regional indicator,Ladder score,Standard error of ladder score,upperwhisker,lowerwhisker,Logged GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,...,Ladder score in Dystopia,Explained by: Log GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Dystopia + residual,data_country,data_year
0,Finland,Western Europe,7.842,0.032,7.904,7.78,10.775,0.954,72.0,0.949,...,2.43,1.446,1.106,0.741,0.691,0.124,0.481,3.253,,
1,Denmark,Western Europe,7.62,0.035,7.687,7.552,10.933,0.954,72.7,0.946,...,2.43,1.502,1.108,0.763,0.686,0.208,0.485,2.868,,
2,Switzerland,Western Europe,7.571,0.036,7.643,7.5,11.117,0.942,74.4,0.919,...,2.43,1.566,1.079,0.816,0.653,0.204,0.413,2.839,,
3,Iceland,Western Europe,7.554,0.059,7.67,7.438,10.878,0.983,73.0,0.955,...,2.43,1.482,1.172,0.772,0.698,0.293,0.17,2.967,,
4,Netherlands,Western Europe,7.464,0.027,7.518,7.41,10.932,0.942,72.4,0.913,...,2.43,1.501,1.079,0.753,0.647,0.302,0.384,2.798,,
5,Norway,Western Europe,7.392,0.035,7.462,7.323,11.053,0.954,73.3,0.96,...,2.43,1.543,1.108,0.782,0.703,0.249,0.427,2.58,,
6,Sweden,Western Europe,7.363,0.036,7.433,7.293,10.867,0.934,72.7,0.945,...,2.43,1.478,1.062,0.763,0.685,0.244,0.448,2.683,,
7,Luxembourg,Western Europe,7.324,0.037,7.396,7.252,11.647,0.908,72.6,0.907,...,2.43,1.751,1.003,0.76,0.639,0.166,0.353,2.653,,
8,New Zealand,North America and ANZ,7.277,0.04,7.355,7.198,10.643,0.948,73.4,0.929,...,2.43,1.4,1.094,0.785,0.665,0.276,0.445,2.612,,
9,Austria,Western Europe,7.268,0.036,7.337,7.198,10.906,0.934,73.3,0.908,...,2.43,1.492,1.062,0.782,0.64,0.215,0.292,2.784,,


In [148]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Digital Wellbeing


In [149]:
# create standard columns
df.rename(columns={'Country name':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Ladder score'] 
df['Year'] = 2021
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))


In [150]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Finland,2021,Global Wellbeing Initiative (World Happiness I...,7.842,6.000000,True,Digital Wellbeing
1,Denmark,2021,Global Wellbeing Initiative (World Happiness I...,7.620,5.791314,True,Digital Wellbeing
2,Switzerland,2021,Global Wellbeing Initiative (World Happiness I...,7.571,5.745253,True,Digital Wellbeing
3,Iceland,2021,Global Wellbeing Initiative (World Happiness I...,7.554,5.729272,True,Digital Wellbeing
4,Netherlands,2021,Global Wellbeing Initiative (World Happiness I...,7.464,5.644670,True,Digital Wellbeing
...,...,...,...,...,...,...,...
144,Lesotho,2021,Global Wellbeing Initiative (World Happiness I...,3.512,1.929686,True,Digital Wellbeing
145,Botswana,2021,Global Wellbeing Initiative (World Happiness I...,3.467,1.887385,True,Digital Wellbeing
146,Rwanda,2021,Global Wellbeing Initiative (World Happiness I...,3.415,1.838503,True,Digital Wellbeing
147,Zimbabwe,2021,Global Wellbeing Initiative (World Happiness I...,3.145,1.584696,True,Digital Wellbeing


In [151]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [152]:
### 20. Financial Inclusiveness

In [153]:
indicators[19]

# load data
indicator = indicators[19]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)


df = pd.read_csv('../../processed/{}.csv'.format(bf))
# Strange the global_wellbeing_initiative is clearly in the data manifest and the link works well 
# but it has not been moved to the processed folder

Financial Inclusiveness
financial_inclusiveness


In [154]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [155]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [156]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Account (% age 15+)'] = df['Account (% age 15+)'].str.replace('%','')
df['Account (% age 15+)'] = df['Account (% age 15+)'].astype(float)

In [157]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Account (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [158]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,Financial Inclusiveness,15.0,1.329670,True,Usage and ownership
7,Albania,2017,Financial Inclusiveness,40.0,2.703297,True,Usage and ownership
10,Arab world,2017,Financial Inclusiveness,37.0,2.538462,True,Usage and ownership
13,United Arab Emirates,2017,Financial Inclusiveness,88.0,5.340659,True,Usage and ownership
16,Argentina,2017,Financial Inclusiveness,49.0,3.197802,True,Usage and ownership
...,...,...,...,...,...,...,...
479,World,2017,Financial Inclusiveness,69.0,4.296703,True,Usage and ownership
482,Kosovo,2017,Financial Inclusiveness,52.0,3.362637,True,Usage and ownership
487,South Africa,2017,Financial Inclusiveness,69.0,4.296703,True,Usage and ownership
490,Zambia,2017,Financial Inclusiveness,46.0,3.032967,True,Usage and ownership


In [159]:
# output scores
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [160]:
### 21. E-commerce activity (% of individuals buying online and frequency)

In [161]:
indicators[20]

# load data
indicator = indicators[20]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Added _ between spaces to see if this changes anything

E-commerce activity (% of individuals buying online and frequency)
individuals_buying_online_frequency


In [162]:
df.head(15)

# Move the top row down further

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE (%),SCORE,Year
0,1.0,Denmark,77.97,100.0,2017
1,2.0,Netherlands,75.72,97.11,2017
2,3.0,Norway,75.63,96.99,2017
3,4.0,United Kingdom,74.74,95.86,2017
4,5.0,"Korea, Rep.",72.47,92.94,2017
5,6.0,Sweden,71.65,91.9,2017
6,7.0,United States,70.43,90.33,2017
7,8.0,New Zealand,69.11,88.63,2017
8,9.0,Canada,68.57,87.95,2017
9,10.0,Australia,67.69,86.81,2017


In [163]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [164]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE (%)'] 
df['Country Name'] = df['COUNTRY/ECONOMY']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [165]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Denmark,2017,E-commerce activity (% of individuals buying o...,77.97,6.000000,True,Usage and ownership
1,Netherlands,2017,E-commerce activity (% of individuals buying o...,75.72,5.855658,True,Usage and ownership
2,Norway,2017,E-commerce activity (% of individuals buying o...,75.63,5.849885,True,Usage and ownership
3,United Kingdom,2017,E-commerce activity (% of individuals buying o...,74.74,5.792789,True,Usage and ownership
4,"Korea, Rep.",2017,E-commerce activity (% of individuals buying o...,72.47,5.647164,True,Usage and ownership
...,...,...,...,...,...,...,...
129,Iceland,2017,E-commerce activity (% of individuals buying o...,,,True,Usage and ownership
130,Jamaica,2017,E-commerce activity (% of individuals buying o...,,,True,Usage and ownership
131,Oman,2017,E-commerce activity (% of individuals buying o...,,,True,Usage and ownership
132,Qatar,2017,E-commerce activity (% of individuals buying o...,,,True,Usage and ownership


In [166]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [167]:
### 22. Internet Usage

In [168]:
indicators[21]

# load data
indicator = indicators[21]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Internet Usage
ITU_database


In [169]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [170]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [171]:
df = df[(df['Indicator name'] == 'Individuals using the Internet, total (%)')]
df = df[(df.Year == 2019)]

In [172]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [173]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
47607,Cabo Verde,2019.0,Internet Usage,61.943398,3.715247,True,Usage and ownership
47608,Central African Rep.,2019.0,Internet Usage,,,True,Usage and ownership
47609,Congo (Rep. of the),2019.0,Internet Usage,,,True,Usage and ownership
47610,Côte d'Ivoire,2019.0,Internet Usage,36.288955,2.162890,True,Usage and ownership
47611,Equatorial Guinea,2019.0,Internet Usage,,,True,Usage and ownership
...,...,...,...,...,...,...,...
47739,Saint Vincent and the Grenadines,2019.0,Internet Usage,,,True,Usage and ownership
47740,Trinidad and Tobago,2019.0,Internet Usage,,,True,Usage and ownership
47741,United States,2019.0,Internet Usage,89.430285,5.378486,True,Usage and ownership
47742,Uruguay,2019.0,Internet Usage,83.351534,5.010659,True,Usage and ownership


In [174]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [175]:
### 23. Households with a computer and with Internet Access

In [176]:
indicators[22]

# load data
indicator = indicators[22]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Households with a computer and with Internet access
ITU_database


In [177]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [178]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [179]:
df = df[(df['Indicator name'] == 'Households with Internet access at home (%)')]
df = df[(df.Year == 2019)]
df

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
44992,Angola,Africa,AGO,Households with Internet access at home (%),2019.0,,,
44993,Benin,Africa,BEN,Households with Internet access at home (%),2019.0,,,
44994,Botswana,Africa,BWA,Households with Internet access at home (%),2019.0,63.454256,,
44995,Burkina Faso,Africa,BFA,Households with Internet access at home (%),2019.0,,,
44996,Burundi,Africa,BDI,Households with Internet access at home (%),2019.0,,,
...,...,...,...,...,...,...,...,...
45183,Suriname,The Americas,SUR,Households with Internet access at home (%),2019.0,54.862557,,
45184,Trinidad and Tobago,The Americas,TTO,Households with Internet access at home (%),2019.0,,,
45185,United States,The Americas,USA,Households with Internet access at home (%),2019.0,86.612378,,
45186,Uruguay,The Americas,URY,Households with Internet access at home (%),2019.0,69.330913,,


In [180]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [181]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
44992,Angola,2019.0,Households with a computer and with Internet a...,,,True,Usage and ownership
44993,Benin,2019.0,Households with a computer and with Internet a...,,,True,Usage and ownership
44994,Botswana,2019.0,Households with a computer and with Internet a...,63.454256,4.112894,True,Usage and ownership
44995,Burkina Faso,2019.0,Households with a computer and with Internet a...,,,True,Usage and ownership
44996,Burundi,2019.0,Households with a computer and with Internet a...,,,True,Usage and ownership
...,...,...,...,...,...,...,...
45183,Suriname,2019.0,Households with a computer and with Internet a...,54.862557,3.669246,True,Usage and ownership
45184,Trinidad and Tobago,2019.0,Households with a computer and with Internet a...,,,True,Usage and ownership
45185,United States,2019.0,Households with a computer and with Internet a...,86.612378,5.308706,True,Usage and ownership
45186,Uruguay,2019.0,Households with a computer and with Internet a...,69.330913,4.416346,True,Usage and ownership


In [182]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [183]:
### 24. % of population using Facebook

In [184]:
indicators[23]

# load data
indicator = indicators[23]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using Facebook
FB_users


In [185]:
df.head(15)

Unnamed: 0,Country,Facebook Users,Population in Thousands (2021),Percentage of Facebook Users
0,India,251000000,1393409,18.01
1,United States,240000000,332915,72.09
2,Brazil,139000000,213993,64.96
3,Indonesia,136960000,276362,49.56
4,Mexico,78000000,130262,59.88
5,Philippines,71760000,111047,64.62
6,Vietnam,66720000,98169,67.96
7,Thailand,46000000,69951,65.76
8,United Kingdom,44000000,68207,64.51
9,Turkey,44000000,85043,51.74


In [186]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [187]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Percentage of Facebook Users'] 
df['Year'] = 2021
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [188]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,India,2021,% of population using Facebook,18.01,1.897091,True,Usage and ownership
1,United States,2021,% of population using Facebook,72.09,4.590855,True,Usage and ownership
2,Brazil,2021,% of population using Facebook,64.96,4.235704,True,Usage and ownership
3,Indonesia,2021,% of population using Facebook,49.56,3.468619,True,Usage and ownership
4,Mexico,2021,% of population using Facebook,59.88,3.982666,True,Usage and ownership
...,...,...,...,...,...,...,...
223,Tuvalu,2021,% of population using Facebook,14.17,1.705818,True,Usage and ownership
224,Niue,2021,% of population using Facebook,41.00,3.042239,True,Usage and ownership
225,Tokelau,2021,% of population using Facebook,41.00,3.042239,True,Usage and ownership
226,Vatican City,2021,% of population using Facebook,2.00,1.099621,True,Usage and ownership


In [189]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [190]:
### 25. Gender gap for social media use

In [191]:
indicators[24]

# load data
indicator = indicators[24]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Gender gap for social media use
gender_gaps


In [192]:
df.head(15)

Unnamed: 0,ISO Code,Country,Region,Year,Gender parity in account ownership,Gender gap in social media use,Gender gap in mobile ownership
0,AFG,Afghanistan,South Asia,2014,11.109999,0.0,0.0
1,AFG,Afghanistan,South Asia,2015,14.087173,0.0,0.0
2,AFG,Afghanistan,South Asia,2016,17.06435,0.0,0.0
3,AFG,Afghanistan,South Asia,2017,20.041523,0.0,0.0
4,AFG,Afghanistan,South Asia,2018,20.041523,0.0,0.0
5,AFG,Afghanistan,South Asia,2019,20.041523,0.0,0.0
6,AGO,Angola,Sub-Saharan Africa,2014,55.237572,46.42857,51.645042
7,AGO,Angola,Sub-Saharan Africa,2015,55.237572,46.42857,57.001461
8,AGO,Angola,Sub-Saharan Africa,2016,55.237572,46.42857,63.725491
9,AGO,Angola,Sub-Saharan Africa,2017,55.237572,38.646553,53.966476


In [193]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [194]:
df = df[df.Year==2019]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Gender gap in social media use'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [195]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5,Afghanistan,2019,Gender gap for social media use,0.000000,1.000000,True,Usage and ownership
11,Angola,2019,Gender gap for social media use,49.552494,3.477625,True,Usage and ownership
17,Albania,2019,Gender gap for social media use,41.406120,3.070306,True,Usage and ownership
23,United Arab Emirates,2019,Gender gap for social media use,13.890497,1.694525,True,Usage and ownership
29,Argentina,2019,Gender gap for social media use,100.000000,6.000000,True,Usage and ownership
...,...,...,...,...,...,...,...
995,Samoa,2019,Gender gap for social media use,100.000000,6.000000,True,Usage and ownership
1001,Yemen,2019,Gender gap for social media use,0.000000,1.000000,True,Usage and ownership
1007,South Africa,2019,Gender gap for social media use,96.334473,5.816724,True,Usage and ownership
1013,Zambia,2019,Gender gap for social media use,59.793156,3.989658,True,Usage and ownership


In [196]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [197]:
### 26. % of population using digital financial services

In [198]:
indicators[25]

# load data
indicator = indicators[25]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using digital financial services
population_digital_financial_services


In [199]:
df.head(15)

# May have to convert the data to csv file as the original file is missing many important columns

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [200]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [201]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Used the internet to pay bills or to buy something online in the past year (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year (% age 15+)'].str.replace('%','')
df['Used the internet to pay bills or to buy something online in the past year (% age 15+)'] = df['Used the internet to pay bills or to buy something online in the past year (% age 15+)'].astype(float)

In [202]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Used the internet to pay bills or to buy something online in the past year (% age 15+)'] 
df['Sub-Pillar'] = subpillar


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [203]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df 

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Afghanistan,2017,% of population using digital financial services,1.0,1.000000,True,Usage and ownership
7,Albania,2017,% of population using digital financial services,7.0,1.340909,True,Usage and ownership
10,Arab world,2017,% of population using digital financial services,9.0,1.454545,True,Usage and ownership
13,United Arab Emirates,2017,% of population using digital financial services,60.0,4.352273,True,Usage and ownership
16,Argentina,2017,% of population using digital financial services,19.0,2.022727,True,Usage and ownership
...,...,...,...,...,...,...,...
479,World,2017,% of population using digital financial services,29.0,2.590909,True,Usage and ownership
482,Kosovo,2017,% of population using digital financial services,15.0,1.795455,True,Usage and ownership
487,South Africa,2017,% of population using digital financial services,14.0,1.738636,True,Usage and ownership
490,Zambia,2017,% of population using digital financial services,11.0,1.568182,True,Usage and ownership


In [204]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [205]:
### 27. Mobile Broadband Pricing (pre-paid)

In [206]:
indicators[26]

# load data
indicator = indicators[26]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# File not found despite its presence in the personal repo and the data manifest

Mobile Broadband Pricing (pre-paid)
ITU_database


In [207]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [208]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [209]:
df = df[df.Year==2019]
df = df[df['Indicator name'] == 'Mobile broadband basket as a % of GNI p.c.']
df

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
17974,Angola,Africa,AGO,Mobile broadband basket as a % of GNI p.c.,2019.0,5.63,,
17975,Benin,Africa,BEN,Mobile broadband basket as a % of GNI p.c.,2019.0,6.21,,
17976,Botswana,Africa,BWA,Mobile broadband basket as a % of GNI p.c.,2019.0,2.87,,
17977,Burkina Faso,Africa,BFA,Mobile broadband basket as a % of GNI p.c.,2019.0,19.63,,
17978,Burundi,Africa,BDI,Mobile broadband basket as a % of GNI p.c.,2019.0,20.43,,
...,...,...,...,...,...,...,...,...
18161,Saint Vincent and the Grenadines,The Americas,VCT,Mobile broadband basket as a % of GNI p.c.,2019.0,4.59,,
18162,Suriname,The Americas,SUR,Mobile broadband basket as a % of GNI p.c.,2019.0,1.93,,
18163,Trinidad and Tobago,The Americas,TTO,Mobile broadband basket as a % of GNI p.c.,2019.0,3.07,,
18164,United States,The Americas,USA,Mobile broadband basket as a % of GNI p.c.,2019.0,0.42,,


In [210]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [211]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
17974,Angola,2019.0,Mobile Broadband Pricing (pre-paid),5.63,5.553571,True,Usage and ownership
17975,Benin,2019.0,Mobile Broadband Pricing (pre-paid),6.21,5.506062,True,Usage and ownership
17976,Botswana,2019.0,Mobile Broadband Pricing (pre-paid),2.87,5.779653,True,Usage and ownership
17977,Burkina Faso,2019.0,Mobile Broadband Pricing (pre-paid),19.63,4.406782,True,Usage and ownership
17978,Burundi,2019.0,Mobile Broadband Pricing (pre-paid),20.43,4.341252,True,Usage and ownership
...,...,...,...,...,...,...,...
18161,Saint Vincent and the Grenadines,2019.0,Mobile Broadband Pricing (pre-paid),4.59,5.638761,True,Usage and ownership
18162,Suriname,2019.0,Mobile Broadband Pricing (pre-paid),1.93,5.856651,True,Usage and ownership
18163,Trinidad and Tobago,2019.0,Mobile Broadband Pricing (pre-paid),3.07,5.763270,True,Usage and ownership
18164,United States,2019.0,Mobile Broadband Pricing (pre-paid),0.42,5.980341,True,Usage and ownership


In [212]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [213]:
### 28. Tax as % of total cost of mobile ownership

In [214]:
indicators[27]

# load data
indicator = indicators[27]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Tax as % of total cost of mobile ownership
tax_percent_mobile_ownership


In [215]:
df.head(15)

Unnamed: 0,ISO Code,Country,Region,Year,Cluster,Index,Infrastructure,Affordability,Consumer Readiness,Content and Services,...,Gender gap in mobile ownership,TLDs per capita,E-Government Score,Mobile Social Media Penetration,Apps developed per person,Number of apps in national language,Accessibility of top ranked apps,Cybersecurity Index,data_country,data_year
0,AFG,Afghanistan,South Asia,2014,Discoverer,22.12,21.74,31.79,24.4,14.19,...,0.0,39.55,18.11,3.28,20.98,2.44,4.37,26.5,,
1,AFG,Afghanistan,South Asia,2015,Discoverer,22.99,22.82,30.81,25.28,15.71,...,0.0,39.57,24.27,4.36,22.93,2.79,8.03,25.83,,
2,AFG,Afghanistan,South Asia,2016,Discoverer,23.71,26.92,26.75,26.07,16.83,...,0.0,39.58,30.43,6.73,30.31,2.85,5.9,25.17,,
3,AFG,Afghanistan,South Asia,2017,Discoverer,25.82,33.54,27.22,28.56,17.04,...,0.0,39.47,30.5,7.78,31.62,2.91,6.15,24.5,,
4,AFG,Afghanistan,South Asia,2018,Discoverer,28.39,30.91,42.64,29.24,16.87,...,0.0,39.39,30.56,8.54,36.54,2.96,8.66,17.7,,
5,AFG,Afghanistan,South Asia,2019,Discoverer,28.94,32.34,41.53,29.72,17.58,...,0.0,39.41,41.18,9.39,39.36,3.0,5.72,17.7,,
6,AGO,Angola,Sub-Saharan Africa,2014,Discoverer,32.78,25.99,35.99,44.33,27.85,...,51.65,0.0,29.92,3.99,22.74,53.33,49.09,8.8,,
7,AGO,Angola,Sub-Saharan Africa,2015,Emerging,37.18,33.09,42.01,45.41,30.27,...,57.0,0.31,32.35,5.26,22.12,55.08,58.33,8.47,,
8,AGO,Angola,Sub-Saharan Africa,2016,Emerging,39.85,37.8,44.74,46.47,32.11,...,63.73,0.0,34.78,6.27,27.46,56.52,61.73,8.13,,
9,AGO,Angola,Sub-Saharan Africa,2017,Emerging,42.89,48.6,47.94,46.32,31.36,...,53.97,0.0,37.88,4.26,31.94,57.06,55.09,7.8,,


In [216]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [217]:
df = df[df.Year==2019]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Tax as a % of TCMO'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [218]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5,Afghanistan,2019,Tax as % of total cost of mobile ownership,79.16,2.0420,False,Usage and ownership
11,Angola,2019,Tax as % of total cost of mobile ownership,65.83,2.7085,False,Usage and ownership
17,Albania,2019,Tax as % of total cost of mobile ownership,50.00,3.5000,False,Usage and ownership
23,United Arab Emirates,2019,Tax as % of total cost of mobile ownership,82.31,1.8845,False,Usage and ownership
29,Argentina,2019,Tax as % of total cost of mobile ownership,5.23,5.7385,False,Usage and ownership
...,...,...,...,...,...,...,...
995,Samoa,2019,Tax as % of total cost of mobile ownership,62.50,2.8750,False,Usage and ownership
1001,Yemen,2019,Tax as % of total cost of mobile ownership,75.00,2.2500,False,Usage and ownership
1007,South Africa,2019,Tax as % of total cost of mobile ownership,62.50,2.8750,False,Usage and ownership
1013,Zambia,2019,Tax as % of total cost of mobile ownership,17.88,5.1060,False,Usage and ownership


In [219]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [220]:
### 29. % of population with a smartphone

In [221]:
indicators[28]

# load data
indicator = indicators[28]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population with a smartphone
population_with_smartphones


In [222]:
df.head(15)

Unnamed: 0.1,Unnamed: 0,Country,Total Population,Smartphone Penetration Rate,Smartphone Users
0,1,China,1.44B,0.64,918.45M
1,2,India,1.38B,0.32,439.42M
2,3,United States,331M,0.82,270M
3,4,Indonesia,273.52M,0.59,160.23M
4,5,Brazil,212.56M,0.51,109.34M
5,6,Russia,145.93M,0.69,99.93M
6,7,Japan,126.48M,0.63,80M
7,8,Mexico,128.93M,0.54,70.14M
8,9,Germany,83.78M,0.78,65.24M
9,10,Vietnam,97.34M,0.63,61.37M


In [223]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [224]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Smartphone Penetration Rate'] 
df['Country Name'] = df['Country']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [225]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,China,2020,% of population with a smartphone,0.64,4.59375,True,Usage and ownership
1,India,2020,% of population with a smartphone,0.32,2.09375,True,Usage and ownership
2,United States,2020,% of population with a smartphone,0.82,6.0,True,Usage and ownership
3,Indonesia,2020,% of population with a smartphone,0.59,4.203125,True,Usage and ownership
4,Brazil,2020,% of population with a smartphone,0.51,3.578125,True,Usage and ownership
5,Russia,2020,% of population with a smartphone,0.69,4.984375,True,Usage and ownership
6,Japan,2020,% of population with a smartphone,0.63,4.515625,True,Usage and ownership
7,Mexico,2020,% of population with a smartphone,0.54,3.8125,True,Usage and ownership
8,Germany,2020,% of population with a smartphone,0.78,5.6875,True,Usage and ownership
9,Vietnam,2020,% of population with a smartphone,0.63,4.515625,True,Usage and ownership


In [226]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [227]:
### 30. Gender gap in internet usage

In [228]:
indicators[29]

# load data
indicator = indicators[29]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Gender gap in internet usage
ITU_database


In [229]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [230]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [231]:
df = df[(df['Indicator name'] == 'Female Internet users as a % of total female population')]
df = df[(df.Year == 2019)]
df

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
40307,Cabo Verde,Africa,CPV,Female Internet users as a % of total female p...,2019.0,60.965017,,
40308,Côte d'Ivoire,Africa,CIV,Female Internet users as a % of total female p...,2019.0,32.924445,,
40309,Kenya,Africa,KEN,Female Internet users as a % of total female p...,2019.0,20.125122,,
40310,Lesotho,Africa,LSO,Female Internet users as a % of total female p...,2019.0,44.922768,,
40311,Mauritius,Africa,MUS,Female Internet users as a % of total female p...,2019.0,60.130645,,
...,...,...,...,...,...,...,...,...
40388,Mexico,The Americas,MEX,Female Internet users as a % of total female p...,2019.0,68.574653,,
40389,Panama,The Americas,PAN,Female Internet users as a % of total female p...,2019.0,63.811494,,
40390,Paraguay,The Americas,PRY,Female Internet users as a % of total female p...,2019.0,69.017230,,
40391,Peru,The Americas,PER,Female Internet users as a % of total female p...,2019.0,56.992144,,


In [232]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [233]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
40307,Cabo Verde,2019.0,Gender gap in internet usage,60.965017,3.766390,True,Usage and ownership
40308,Côte d'Ivoire,2019.0,Gender gap in internet usage,32.924445,2.151554,True,Usage and ownership
40309,Kenya,2019.0,Gender gap in internet usage,20.125122,1.414451,True,Usage and ownership
40310,Lesotho,2019.0,Gender gap in internet usage,44.922768,2.842529,True,Usage and ownership
40311,Mauritius,2019.0,Gender gap in internet usage,60.130645,3.718339,True,Usage and ownership
...,...,...,...,...,...,...,...
40388,Mexico,2019.0,Gender gap in internet usage,68.574653,4.204623,True,Usage and ownership
40389,Panama,2019.0,Gender gap in internet usage,63.811494,3.930316,True,Usage and ownership
40390,Paraguay,2019.0,Gender gap in internet usage,69.017230,4.230110,True,Usage and ownership
40391,Peru,2019.0,Gender gap in internet usage,56.992144,3.537595,True,Usage and ownership


In [234]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [235]:
### 31. Gender gap in mobile usage

In [236]:
indicators[30]

# load data
indicator = indicators[30]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Gender gap in mobile usage
gender_gaps


In [237]:
df.head(15)

Unnamed: 0,ISO Code,Country,Region,Year,Gender parity in account ownership,Gender gap in social media use,Gender gap in mobile ownership
0,AFG,Afghanistan,South Asia,2014,11.109999,0.0,0.0
1,AFG,Afghanistan,South Asia,2015,14.087173,0.0,0.0
2,AFG,Afghanistan,South Asia,2016,17.06435,0.0,0.0
3,AFG,Afghanistan,South Asia,2017,20.041523,0.0,0.0
4,AFG,Afghanistan,South Asia,2018,20.041523,0.0,0.0
5,AFG,Afghanistan,South Asia,2019,20.041523,0.0,0.0
6,AGO,Angola,Sub-Saharan Africa,2014,55.237572,46.42857,51.645042
7,AGO,Angola,Sub-Saharan Africa,2015,55.237572,46.42857,57.001461
8,AGO,Angola,Sub-Saharan Africa,2016,55.237572,46.42857,63.725491
9,AGO,Angola,Sub-Saharan Africa,2017,55.237572,38.646553,53.966476


In [238]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [239]:
df = df[(df.Year == 2019)]
df

Unnamed: 0,ISO Code,Country,Region,Year,Gender parity in account ownership,Gender gap in social media use,Gender gap in mobile ownership
5,AFG,Afghanistan,South Asia,2019,2.004152e+01,0.000000,0.000000
11,AGO,Angola,Sub-Saharan Africa,2019,5.523757e+01,49.552494,57.712311
17,ALB,Albania,Europe & Central Asia,2019,8.901575e+01,41.406120,67.741936
23,ARE,United Arab Emirates,Middle East & North Africa,2019,7.944289e+01,13.890497,100.000000
29,ARG,Argentina,Latin America & Caribbean,2019,1.000000e+02,100.000000,99.650528
...,...,...,...,...,...,...,...
995,WSM,Samoa,East Asia & Pacific,2019,9.256590e+01,100.000000,100.000000
1001,YEM,Yemen,Middle East & North Africa,2019,3.250000e-15,0.000000,0.000000
1007,ZAF,South Africa,Sub-Saharan Africa,2019,1.000000e+02,96.334473,78.272583
1013,ZMB,Zambia,Sub-Saharan Africa,2019,7.414376e+01,59.793156,53.703701


In [240]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Gender gap in mobile ownership'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [241]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5,Afghanistan,2019,Gender gap in mobile usage,0.000000,1.000000,True,Usage and ownership
11,Angola,2019,Gender gap in mobile usage,57.712311,3.885616,True,Usage and ownership
17,Albania,2019,Gender gap in mobile usage,67.741936,4.387097,True,Usage and ownership
23,United Arab Emirates,2019,Gender gap in mobile usage,100.000000,6.000000,True,Usage and ownership
29,Argentina,2019,Gender gap in mobile usage,99.650528,5.982526,True,Usage and ownership
...,...,...,...,...,...,...,...
995,Samoa,2019,Gender gap in mobile usage,100.000000,6.000000,True,Usage and ownership
1001,Yemen,2019,Gender gap in mobile usage,0.000000,1.000000,True,Usage and ownership
1007,South Africa,2019,Gender gap in mobile usage,78.272583,4.913629,True,Usage and ownership
1013,Zambia,2019,Gender gap in mobile usage,53.703701,3.685185,True,Usage and ownership


In [242]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [243]:
### 32. Mobile Device Penetration

In [244]:
indicators[31]

# load data
indicator = indicators[31]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


Mobile Device Penetration
ITU_database


In [245]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [246]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [247]:
df = df[(df['Indicator name'] == 'Individuals owning a mobile phone (%)')]
df = df[(df.Year == 2019)]
df

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
5753,Angola,Africa,AGO,Individuals owning a mobile phone (%),2019.0,,,
5754,Benin,Africa,BEN,Individuals owning a mobile phone (%),2019.0,,,
5755,Botswana,Africa,BWA,Individuals owning a mobile phone (%),2019.0,,,
5756,Burkina Faso,Africa,BFA,Individuals owning a mobile phone (%),2019.0,,,
5757,Burundi,Africa,BDI,Individuals owning a mobile phone (%),2019.0,,,
...,...,...,...,...,...,...,...,...
5943,Suriname,The Americas,SUR,Individuals owning a mobile phone (%),2019.0,,,
5944,Trinidad and Tobago,The Americas,TTO,Individuals owning a mobile phone (%),2019.0,,,
5945,United States,The Americas,USA,Individuals owning a mobile phone (%),2019.0,,,
5946,Uruguay,The Americas,URY,Individuals owning a mobile phone (%),2019.0,83.282233,,


In [248]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [249]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
5753,Angola,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5754,Benin,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5755,Botswana,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5756,Burkina Faso,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5757,Burundi,2019.0,Mobile Device Penetration,,,True,Usage and ownership
...,...,...,...,...,...,...,...
5943,Suriname,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5944,Trinidad and Tobago,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5945,United States,2019.0,Mobile Device Penetration,,,True,Usage and ownership
5946,Uruguay,2019.0,Mobile Device Penetration,83.282233,4.471964,True,Usage and ownership


In [250]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [251]:
### 33. Mobile Device Penetration (female)

In [252]:
indicators[32]

# load data
indicator = indicators[32]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


Mobile Device Penetration (female)
ITU_database


In [253]:
df.head(15)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [254]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [255]:
df = df[(df.Year==2019)]
df = df[(df['Indicator name'] == 'Female mobile phone ownership as a % of total female population')]

In [256]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [257]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
1763,Angola,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1764,Benin,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1765,Botswana,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1766,Burkina Faso,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1767,Burundi,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
...,...,...,...,...,...,...,...
1953,Suriname,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1954,Trinidad and Tobago,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1955,United States,2019.0,Mobile Device Penetration (female),,,True,Usage and ownership
1956,Uruguay,2019.0,Mobile Device Penetration (female),84.173624,4.925565,True,Usage and ownership


In [258]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [259]:
### 34. Crypto Adoption Index

In [260]:
indicators[33]

# load data
indicator = indicators[33]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Crypto Adoption Index
Chainalysis_2020_Geography_Cryptocurrency_Report


In [261]:
df.head(15)

Unnamed: 0,Country,Score,Rank,On-chain value received (Rank),On-chain retail value received (Rank),P2P exchange trade volume (Rank)
0,Vietnam,1.0,1,4,2,3
1,India,0.37,2,2,3,72
2,Pakistan,0.36,3,11,12,8
3,Ukraine,0.29,4,6,5,40
4,Kenya,0.28,5,41,28,1
5,Nigeria,0.26,6,15,10,18
6,Venezuela,0.25,7,29,22,6
7,United States,0.22,8,3,4,109
8,Togo,0.19,9,47,42,2
9,Argentina,0.19,10,14,17,33


In [262]:
subpillars[3]
subpillar = subpillars[3]
print(subpillar)

Usage and ownership


In [263]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Score'] 
df['Country Name'] = df['Country']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [264]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Vietnam,2019,Crypto Adoption Index,1.00,6.00,True,Usage and ownership
1,India,2019,Crypto Adoption Index,0.37,2.85,True,Usage and ownership
2,Pakistan,2019,Crypto Adoption Index,0.36,2.80,True,Usage and ownership
3,Ukraine,2019,Crypto Adoption Index,0.29,2.45,True,Usage and ownership
4,Kenya,2019,Crypto Adoption Index,0.28,2.40,True,Usage and ownership
...,...,...,...,...,...,...,...
152,Guyana,2019,Crypto Adoption Index,0.00,1.00,True,Usage and ownership
153,"Virgin Islands, U.S.",2019,Crypto Adoption Index,0.00,1.00,True,Usage and ownership
154,Brunei Darussalam,2019,Crypto Adoption Index,0.00,1.00,True,Usage and ownership
155,Bermuda,2019,Crypto Adoption Index,0.00,1.00,True,Usage and ownership


In [265]:
df.to_csv('../indicator_scores/people_{}_scores.csv'.format(indicator), index=False)

In [266]:
### Score Aggregating

In [267]:
import os

In [268]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('people')]

In [269]:
scores

['people_% of internet users who own cryptocurrency_scores.csv',
 'people_% of population using digital financial services_scores.csv',
 'people_% of population using Facebook_scores.csv',
 'people_% of population using internet (all)_scores.csv',
 'people_% of population using internet (female)_scores.csv',
 'people_% of population using internet (male)_scores.csv',
 'people_% of population with a smartphone_scores.csv',
 'people_Automation-led unemployment_scores.csv',
 'people_Crypto Adoption Index_scores.csv',
 'people_Cyberbullying_scores.csv',
 'people_E-commerce activity (% of individuals buying online and frequency)_scores.csv',
 'people_E-waste generated, kilograms per inhabitant_scores.csv',
 'people_Facebook Social Connectedness Index_scores.csv',
 'people_Financial Inclusiveness_scores.csv',
 'people_Gender gap for social media use_scores.csv',
 'people_Gender gap in internet usage_scores.csv',
 'people_Gender gap in mobile usage_scores.csv',
 'people_Global Wellbeing Initi

In [270]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [271]:
df.to_csv('../pillar_scores/people_scores_beta.csv')

In [272]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)
df['Country Name'] = df['Country Name'].astype(str, errors = 'ignore')

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')
df['Country Name'] = df['Country Name'].str.strip(' *')

# Replace values that are not truly country names with nan
df['Country Name'] = df['Country Name'].replace('nan',np.nan)

df['Country Name'] = df['Country Name'].replace('Vietnam','Viet Nam')
df['Country Name'] = df['Country Name'].replace('United States of America','United States')



# Dropping the columns having NaN/NaT values
df = df[df['Country Name'].notna()]


In [273]:
sorted(df['Country Name'].unique().tolist())

['AE',
 'AG',
 'AL',
 'AM',
 'AO',
 'AR',
 'AT',
 'AU',
 'AW',
 'AZ',
 'Afghanistan',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Arab world',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'BA',
 'BB',
 'BD',
 'BE',
 'BF',
 'BG',
 'BH',
 'BI',
 'BJ',
 'BN',
 'BO',
 'BR',
 'BS',
 'BT',
 'BW',
 'BY',
 'BZ',
 'Bahamas',
 'Bahamas, The',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bolivia (Plurinational State of)',
 'Bolivia, Plurinational State of',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'CA',
 'CD',
 'CF',
 'CG',
 'CH',
 'CI',
 'CL',
 'CM',
 'CO',
 'CR',
 'CV',
 'CW',
 'CY',
 'CZ',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Cayman Islands',
 'Central African Rep.',
 'Central Africa

In [274]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [275]:
agg_df.columns = ['agg_score', 'count_source' ]

In [276]:
max_number_sources = agg_df.describe()['count_source']['max']

In [277]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [278]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [279]:
agg_df.head(25)

Unnamed: 0_level_0,agg_score,count_source,agg_score_wt
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Hong Kong, China (SAR)",5.920213,1,0.001833
Korea (Rep. of),5.782565,9,0.016117
"Hong Kong, China",5.65408,9,0.015759
North America,5.522665,2,0.003421
Brunei,5.509863,1,0.001706
Korea (Republic of),5.414894,1,0.001677
Georgia (Country),5.3585,1,0.001659
High income: OECD,5.322865,2,0.003297
Euro area,5.294456,2,0.003279
High income,5.238574,2,0.003245


In [280]:
agg_df.to_csv('../pillar_scores/people_scores_v0.csv')

In [281]:
### Score Aggregating by Subpillars

In [282]:
df.insert(0,'Pillar','People')
df

Unnamed: 0.1,Pillar,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar,Unnamed: 0
0,People,AE,2021.0,Facebook Social Connectedness Index,9567.0000,1.000048,True,Culture,
1,People,AE,2021.0,Facebook Social Connectedness Index,17551.0000,1.000088,True,Culture,
2,People,AE,2021.0,Facebook Social Connectedness Index,3769.0000,1.000019,True,Culture,
3,People,AE,2021.0,Facebook Social Connectedness Index,28120.0000,1.000141,True,Culture,
4,People,AE,2021.0,Facebook Social Connectedness Index,13213.0000,1.000066,True,Culture,
...,...,...,...,...,...,...,...,...,...
598834,People,Zimbabwe,2019.0,Households with a computer and with Internet a...,,0.000000,True,Usage and ownership,
598835,People,Zimbabwe,2017.0,E-commerce activity (% of individuals buying o...,3.7900,1.241211,True,Usage and ownership,
598836,People,Zimbabwe,2019.0,Tax as % of total cost of mobile ownership,24.0000,4.800000,False,Usage and ownership,
598837,People,Zimbabwe,2019.0,Gross National Wellbeing,2.6900,1.000000,True,Digital Wellbeing,


In [283]:
sub_df = df.groupby(['Pillar','Sub-Pillar','Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [284]:
sub_df.columns = ['agg_score', 'count_source' ]

In [285]:
max_number_sources = sub_df.describe()['count_source']['max']

In [286]:
sub_df['agg_score_wt'] = sub_df['agg_score']*(sub_df['count_source']/max_number_sources)

In [287]:
sub_df.to_csv('../subpillar_score/people_scores_subpillar_v0.csv')

In [288]:
#Test Uzbekistan

In [289]:
# get list of files in scores folder
scores = os.listdir('../subpillar_score/')
scores

df = pd.concat([pd.read_csv('../subpillar_score/{}'.format(s)) for s in scores])    

df

Unnamed: 0,Pillar,Sub-Pillar,Country Name,agg_score,count_source,agg_score_wt
0,Business,Financing Incentives,Albania,3.142857,1,0.392857
1,Business,Financing Incentives,Algeria,4.285714,1,0.535714
2,Business,Financing Incentives,Angola,1.000000,1,0.125000
3,Business,Financing Incentives,Argentina,1.821429,1,0.227679
4,Business,Financing Incentives,Armenia,4.035714,1,0.504464
...,...,...,...,...,...,...
200,Strategy,Ambition,St. Lucia,,0,
201,Strategy,Ambition,St. Vincent and the Grenadines,,0,
202,Strategy,Ambition,Timor-Leste,,0,
203,Strategy,Ambition,Tonga,,0,


In [290]:
uzb = df[(df['Country Name'] == 'Uzbekistan')]
uzb
uzb.to_csv('../country_scores/Uzbekistan.csv')