In [1]:
import pandas as pd
import numpy as np

In [2]:
### Get all the pillar names from the excel

In [3]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [4]:
col_names = ['Indicator','check', 'Data Source','Index','Filename']

In [5]:
names = names[col_names]

In [6]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Index,Filename
0,Countries,,United Nations,False,Countries
1,"Database of Global Administrative Areas (GADM,...",,GADM maps and data,False,
2,High Resolution Population Density Maps + Demo...,,Facebook,False,
3,population density vs openstreetmap object den...,,Kontur,False,
4,Population Density,Infrastructure,World Bank,False,population_density


In [7]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [8]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,20,25
Foundations,7,12
Government,10,15
Infrastructure,38,48
People,35,47
Regulation,6,7
Strategy,1,1


In [9]:
### Infrastructure

In [10]:
bnames = names[(names.check=='Infrastructure')&(~names.Filename.isna())]#&(names.Index==False)]
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Index,Filename
4,Population Density,Infrastructure,World Bank,False,population_density
6,% of population covered by internet connectivity,Infrastructure,ITU,False,ITU_database
7,% of population covered by mobile 2G+ data con...,Infrastructure,GSMA Mobile Connectivity Index,False,countries_mobile_connectivity
8,% of population covered by mobile 3G+ data con...,Infrastructure,GSMA Mobile Connectivity Index,False,countries_mobile_connectivity
9,% of population covered by mobile 4G+ data con...,Infrastructure,GSMA Mobile Connectivity Index,False,countries_mobile_connectivity
10,% of population covered by mobile 5G+ data con...,Infrastructure,GSMA Mobile Connectivity Index,False,countries_mobile_connectivity
11,Mobile Coverage Maps,Infrastructure,ITU,False,ITU_database
12,Electricity Density,Infrastructure,Energy Data,False,electricity_yearbook
13,% of population covered by electricity,Infrastructure,World Bank,False,population_electricity_coverage
14,Electricity supply quality,Infrastructure,International Energy Agency / Global Competiti...,False,elect_supply_quality


In [11]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()

In [12]:
# get all file names
bfiles = bnames.Filename.unique()

In [13]:
bfiles

array(['population_density', 'ITU_database',
       'countries_mobile_connectivity', 'electricity_yearbook',
       'population_electricity_coverage', 'elect_supply_quality',
       'sustainability_index', 'mobile_density', 'e_government_index',
       'spectrum_allocated_mobile_providers', 'countries_ixp',
       'internet_speed', 'mobile_latency', 'mobile_speed',
       'fixed_bdbd_spd_dl_ul', 'postal_coverage',
       'logistics_performance_index', 'national_cybersecurity_index',
       'dice_export_global_cybersecurity_index',
       'software_developer_ecosystem_size',
       'digital_platform_economy_index', 'migration_skill',
       'migration_industry', 'migration_country',
       'global_fintech_ranking', 'tech_hubs', 'banking_sector_size',
       'angel_investment', 'startup_eco_size',
       'international_co_inventions'], dtype=object)

In [14]:
# formula for converting scale
def convert_rank(old_value, old_min=1, old_max=7, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [15]:
### 2. % of population covered by internet connectivity

In [16]:
indicators[1]

# load data
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population covered by internet connectivity
ITU_database


In [17]:
df = df[(df.Year == 2020)]
df = df[(df['Indicator name']== 'Individuals using the Internet, total (%)')]
df

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
47744,Mauritius,Africa,MUS,"Individuals using the Internet, total (%)",2020.0,64.884904,,
47745,Uganda,Africa,UGA,"Individuals using the Internet, total (%)",2020.0,,,
47746,Bahrain,Arab States,BHR,"Individuals using the Internet, total (%)",2020.0,99.539512,,
47747,Egypt,Arab States,EGY,"Individuals using the Internet, total (%)",2020.0,71.914200,,
47748,Iraq,Arab States,IRQ,"Individuals using the Internet, total (%)",2020.0,,,
...,...,...,...,...,...,...,...,...
47805,Bolivia (Plurinational State of),The Americas,BOL,"Individuals using the Internet, total (%)",2020.0,55.139051,,
47806,Costa Rica,The Americas,CRI,"Individuals using the Internet, total (%)",2020.0,80.530186,,
47807,Mexico,The Americas,MEX,"Individuals using the Internet, total (%)",2020.0,71.970000,,
47808,Paraguay,The Americas,PRY,"Individuals using the Internet, total (%)",2020.0,74.515240,,


In [18]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [19]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
47744,Mauritius,2020.0,% of population covered by internet connectivity,64.884904,3.298839,True
47745,Uganda,2020.0,% of population covered by internet connectivity,,,True
47746,Bahrain,2020.0,% of population covered by internet connectivity,99.539512,5.964578,True
47747,Egypt,2020.0,% of population covered by internet connectivity,71.9142,3.839554,True
47748,Iraq,2020.0,% of population covered by internet connectivity,,,True
47749,Kuwait,2020.0,% of population covered by internet connectivity,98.599995,5.892307,True
47750,Morocco,2020.0,% of population covered by internet connectivity,84.120363,4.778489,True
47751,Oman,2020.0,% of population covered by internet connectivity,95.232293,5.633253,True
47752,Qatar,2020.0,% of population covered by internet connectivity,99.652794,5.973292,True
47753,Saudi Arabia,2020.0,% of population covered by internet connectivity,97.862332,5.835564,True


In [20]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [21]:
### 3. % of population covered by mobile 2G

In [22]:
indicators[2]

'% of population covered by mobile 2G+ data connectivity'

In [23]:
# load data
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population covered by mobile 2G+ data connectivity
countries_mobile_connectivity


In [24]:
df.head(10)

Unnamed: 0,ISO Code,Country,Region,Year,Cluster,Index,Infrastructure,Affordability,Consumer Readiness,Content and Services,...,Gender gap in mobile ownership,TLDs per capita,E-Government Score,Mobile Social Media Penetration,Apps developed per person,Number of apps in national language,Accessibility of top ranked apps,Cybersecurity Index,data_country,data_year
0,AFG,Afghanistan,South Asia,2014,Discoverer,22.12,21.74,31.79,24.4,14.19,...,0.0,39.55,18.11,3.28,20.98,2.44,4.37,26.5,,
1,AFG,Afghanistan,South Asia,2015,Discoverer,22.99,22.82,30.81,25.28,15.71,...,0.0,39.57,24.27,4.36,22.93,2.79,8.03,25.83,,
2,AFG,Afghanistan,South Asia,2016,Discoverer,23.71,26.92,26.75,26.07,16.83,...,0.0,39.58,30.43,6.73,30.31,2.85,5.9,25.17,,
3,AFG,Afghanistan,South Asia,2017,Discoverer,25.82,33.54,27.22,28.56,17.04,...,0.0,39.47,30.5,7.78,31.62,2.91,6.15,24.5,,
4,AFG,Afghanistan,South Asia,2018,Discoverer,28.39,30.91,42.64,29.24,16.87,...,0.0,39.39,30.56,8.54,36.54,2.96,8.66,17.7,,
5,AFG,Afghanistan,South Asia,2019,Discoverer,28.94,32.34,41.53,29.72,17.58,...,0.0,39.41,41.18,9.39,39.36,3.0,5.72,17.7,,
6,AGO,Angola,Sub-Saharan Africa,2014,Discoverer,32.78,25.99,35.99,44.33,27.85,...,51.65,0.0,29.92,3.99,22.74,53.33,49.09,8.8,,
7,AGO,Angola,Sub-Saharan Africa,2015,Emerging,37.18,33.09,42.01,45.41,30.27,...,57.0,0.31,32.35,5.26,22.12,55.08,58.33,8.47,,
8,AGO,Angola,Sub-Saharan Africa,2016,Emerging,39.85,37.8,44.74,46.47,32.11,...,63.73,0.0,34.78,6.27,27.46,56.52,61.73,8.13,,
9,AGO,Angola,Sub-Saharan Africa,2017,Emerging,42.89,48.6,47.94,46.32,31.36,...,53.97,0.0,37.88,4.26,31.94,57.06,55.09,7.8,,


In [25]:
# filter most recent year
df = df[(df.Year==2019)]


In [26]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2G Coverage'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [27]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
5,Afghanistan,2019,% of population covered by mobile 2G+ data con...,90.0,4.921251,True
11,Angola,2019,% of population covered by mobile 2G+ data con...,90.0,4.921251,True
17,Albania,2019,% of population covered by mobile 2G+ data con...,99.86,5.984898,True
23,United Arab Emirates,2019,% of population covered by mobile 2G+ data con...,100.0,6.0,True
29,Argentina,2019,% of population covered by mobile 2G+ data con...,98.0,5.78425,True
35,Armenia,2019,% of population covered by mobile 2G+ data con...,100.0,6.0,True
41,Australia,2019,% of population covered by mobile 2G+ data con...,99.4,5.935275,True
47,Austria,2019,% of population covered by mobile 2G+ data con...,99.0,5.892125,True
53,Azerbaijan,2019,% of population covered by mobile 2G+ data con...,100.0,6.0,True
59,Burundi,2019,% of population covered by mobile 2G+ data con...,53.65,1.0,True


In [28]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [29]:
### 4. % of population covered by mobile 3G

In [30]:
indicators[3]

# load data
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population covered by mobile 4G+ data connectivity
countries_mobile_connectivity


In [31]:
df.head(10)

Unnamed: 0,ISO Code,Country,Region,Year,Cluster,Index,Infrastructure,Affordability,Consumer Readiness,Content and Services,...,Gender gap in mobile ownership,TLDs per capita,E-Government Score,Mobile Social Media Penetration,Apps developed per person,Number of apps in national language,Accessibility of top ranked apps,Cybersecurity Index,data_country,data_year
0,AFG,Afghanistan,South Asia,2014,Discoverer,22.12,21.74,31.79,24.4,14.19,...,0.0,39.55,18.11,3.28,20.98,2.44,4.37,26.5,,
1,AFG,Afghanistan,South Asia,2015,Discoverer,22.99,22.82,30.81,25.28,15.71,...,0.0,39.57,24.27,4.36,22.93,2.79,8.03,25.83,,
2,AFG,Afghanistan,South Asia,2016,Discoverer,23.71,26.92,26.75,26.07,16.83,...,0.0,39.58,30.43,6.73,30.31,2.85,5.9,25.17,,
3,AFG,Afghanistan,South Asia,2017,Discoverer,25.82,33.54,27.22,28.56,17.04,...,0.0,39.47,30.5,7.78,31.62,2.91,6.15,24.5,,
4,AFG,Afghanistan,South Asia,2018,Discoverer,28.39,30.91,42.64,29.24,16.87,...,0.0,39.39,30.56,8.54,36.54,2.96,8.66,17.7,,
5,AFG,Afghanistan,South Asia,2019,Discoverer,28.94,32.34,41.53,29.72,17.58,...,0.0,39.41,41.18,9.39,39.36,3.0,5.72,17.7,,
6,AGO,Angola,Sub-Saharan Africa,2014,Discoverer,32.78,25.99,35.99,44.33,27.85,...,51.65,0.0,29.92,3.99,22.74,53.33,49.09,8.8,,
7,AGO,Angola,Sub-Saharan Africa,2015,Emerging,37.18,33.09,42.01,45.41,30.27,...,57.0,0.31,32.35,5.26,22.12,55.08,58.33,8.47,,
8,AGO,Angola,Sub-Saharan Africa,2016,Emerging,39.85,37.8,44.74,46.47,32.11,...,63.73,0.0,34.78,6.27,27.46,56.52,61.73,8.13,,
9,AGO,Angola,Sub-Saharan Africa,2017,Emerging,42.89,48.6,47.94,46.32,31.36,...,53.97,0.0,37.88,4.26,31.94,57.06,55.09,7.8,,


In [32]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['3G Coverage'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [33]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
5,Afghanistan,2019,% of population covered by mobile 4G+ data con...,58.7,3.282895,True
11,Angola,2019,% of population covered by mobile 4G+ data con...,71.0,4.092105,True
17,Albania,2019,% of population covered by mobile 4G+ data con...,97.0,5.802632,True
23,United Arab Emirates,2019,% of population covered by mobile 4G+ data con...,100.0,6.0,True
29,Argentina,2019,% of population covered by mobile 4G+ data con...,95.0,5.671053,True
35,Armenia,2019,% of population covered by mobile 4G+ data con...,99.0,5.934211,True
41,Australia,2019,% of population covered by mobile 4G+ data con...,99.5,5.967105,True
47,Austria,2019,% of population covered by mobile 4G+ data con...,99.0,5.934211,True
53,Azerbaijan,2019,% of population covered by mobile 4G+ data con...,95.0,5.671053,True
59,Burundi,2019,% of population covered by mobile 4G+ data con...,40.0,2.052632,True


In [34]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [35]:
### 5. % of population covered by mobile 4G

In [36]:
indicators[4]

# load data
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population covered by mobile 5G+ data connectivity
countries_mobile_connectivity


In [37]:
df.head(10)

Unnamed: 0,ISO Code,Country,Region,Year,Cluster,Index,Infrastructure,Affordability,Consumer Readiness,Content and Services,...,Gender gap in mobile ownership,TLDs per capita,E-Government Score,Mobile Social Media Penetration,Apps developed per person,Number of apps in national language,Accessibility of top ranked apps,Cybersecurity Index,data_country,data_year
0,AFG,Afghanistan,South Asia,2014,Discoverer,22.12,21.74,31.79,24.4,14.19,...,0.0,39.55,18.11,3.28,20.98,2.44,4.37,26.5,,
1,AFG,Afghanistan,South Asia,2015,Discoverer,22.99,22.82,30.81,25.28,15.71,...,0.0,39.57,24.27,4.36,22.93,2.79,8.03,25.83,,
2,AFG,Afghanistan,South Asia,2016,Discoverer,23.71,26.92,26.75,26.07,16.83,...,0.0,39.58,30.43,6.73,30.31,2.85,5.9,25.17,,
3,AFG,Afghanistan,South Asia,2017,Discoverer,25.82,33.54,27.22,28.56,17.04,...,0.0,39.47,30.5,7.78,31.62,2.91,6.15,24.5,,
4,AFG,Afghanistan,South Asia,2018,Discoverer,28.39,30.91,42.64,29.24,16.87,...,0.0,39.39,30.56,8.54,36.54,2.96,8.66,17.7,,
5,AFG,Afghanistan,South Asia,2019,Discoverer,28.94,32.34,41.53,29.72,17.58,...,0.0,39.41,41.18,9.39,39.36,3.0,5.72,17.7,,
6,AGO,Angola,Sub-Saharan Africa,2014,Discoverer,32.78,25.99,35.99,44.33,27.85,...,51.65,0.0,29.92,3.99,22.74,53.33,49.09,8.8,,
7,AGO,Angola,Sub-Saharan Africa,2015,Emerging,37.18,33.09,42.01,45.41,30.27,...,57.0,0.31,32.35,5.26,22.12,55.08,58.33,8.47,,
8,AGO,Angola,Sub-Saharan Africa,2016,Emerging,39.85,37.8,44.74,46.47,32.11,...,63.73,0.0,34.78,6.27,27.46,56.52,61.73,8.13,,
9,AGO,Angola,Sub-Saharan Africa,2017,Emerging,42.89,48.6,47.94,46.32,31.36,...,53.97,0.0,37.88,4.26,31.94,57.06,55.09,7.8,,


In [38]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['4G Coverage'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [39]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
5,Afghanistan,2019,% of population covered by mobile 5G+ data con...,15.0,1.75,True
11,Angola,2019,% of population covered by mobile 5G+ data con...,50.0,3.5,True
17,Albania,2019,% of population covered by mobile 5G+ data con...,96.0,5.8,True
23,United Arab Emirates,2019,% of population covered by mobile 5G+ data con...,99.0,5.95,True
29,Argentina,2019,% of population covered by mobile 5G+ data con...,89.77,5.4885,True
35,Armenia,2019,% of population covered by mobile 5G+ data con...,95.0,5.75,True
41,Australia,2019,% of population covered by mobile 5G+ data con...,99.2,5.96,True
47,Austria,2019,% of population covered by mobile 5G+ data con...,99.0,5.95,True
53,Azerbaijan,2019,% of population covered by mobile 5G+ data con...,90.0,5.5,True
59,Burundi,2019,% of population covered by mobile 5G+ data con...,25.0,2.25,True


In [40]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [41]:
### 6. % of population covered by mobile 5G

In [42]:
indicators[5]

# load data
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Mobile Coverage Maps
ITU_database


In [43]:
df.head(10)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [44]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['5G Coverage'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

KeyError: '5G Coverage'

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 7. Mobile Coverage Maps

In [None]:
indicators[6]

# load data
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df = df[(df.Year == 2020)]
df = df[(df['Indicator name']== 'Mobile-cellular subscriptions per 100 inhabitants')]
df

In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 8. Electricity Density

In [None]:
indicators[7]

# load data
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(20)

In [None]:
# create standard columns
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2017'] 
df['Country Name'] = df['Unnamed: 28']
df['Year'] = 2017

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 10. % of population covered by electricity

In [None]:
indicators[9]

# load data
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2019'] 
df['Year'] = 2019

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 11. Electricity Supply Quality

In [None]:
indicators[10]

# load data
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(10)

In [None]:
# The data needs to be transposed as countries are now in columns, not rows, which could present problems for later.

In [None]:
### 12. Electricity Environmental Sustainability

In [None]:
indicators[11]

# load data
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2021 SDG Index Score'] 
df['Year'] = 2021

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 13. Cellphone Signal Density

In [None]:
indicators[12]

# load data
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(10)

In [None]:
df = df[(df.Year == 2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Network coverage'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 14. Telecommunication Infrastructure Index (TII)

In [None]:
indicators[13]

# load data
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(10)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Telecommunication Infrastructure Index'] 
df['Year'] = df['Survey Year']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))


In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 15. GSMA Mobile Connectivity Index

In [None]:
indicators[14]

# load data
indicator = indicators[14]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(10)

In [None]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Index'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 16. Spectrum Allocated to Mobile Providers

In [None]:
indicators[15]

# load data
indicator = indicators[15]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Spectrum'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 17. Internet Exchange Points (IXPs) 

In [None]:
indicators[16]

# load data
indicator = indicators[16]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))



In [None]:
df['Country Name'] = df['Country']
df.head(15)

In [None]:
agg_df = df.groupby('Country Name').agg({'Country':'count'})
agg_df

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

agg_df['higher_is_better'] = True
agg_df['Indicator'] = indicator
agg_df['data_col'] = agg_df['Country'] 
agg_df['Year'] = 2019

min_rank = agg_df['data_col'].min()
max_rank = agg_df['data_col'].max()

# transform 0-1 rank into 1-6
agg_df['new_rank_score'] = agg_df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
agg_df[['Year','Indicator','data_col','new_rank_score','higher_is_better']].head(150)

In [None]:
agg_df[['Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 18. ISP internet download speed averag

In [None]:
indicators[17]

# load data
indicator = indicators[17]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Download speed (Mbps)'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 19. Mobile dowload speed at the slowest hour

In [None]:
indicators[18]

# load data
indicator = indicators[18]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Waiting for internet_speed to be added to the processed folder

In [None]:
df.head(15)

In [None]:
# filter most recent year
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Mobile latencies'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))


# invert since to lower rank is better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 20. Mobile download speed average

In [None]:
indicators[19]

# load data
indicator = indicators[19]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Mbps'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)


In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 21. Individuals using the Internet (% of population)

In [None]:
indicators[20]

# load data
indicator = indicators[20]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df = df[(df['Indicator name'] == 'Individuals using the Internet, total (%)')]
df = df[(df.Year==2019)]
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df.rename(columns = {'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(150)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 22. Mobile-cellular subscriptions per 100 inhabitants

In [None]:
indicators[21]

# load data
indicator = indicators[21]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df = df[(df['Indicator name'] == 'Individuals using the Internet, total (%)')]
df = df[(df.Year==2020)]
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df.rename(columns = {'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(150)


In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 23. Average fixed broadband download speeds	

In [None]:
indicators[22]

# load data
indicator = indicators[22]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


In [None]:
df.head(15)

In [None]:
# create standard columns

df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Year'] = 2021
df['Indicator'] = indicator
df['data_col'] = df['broadband'] 
df.rename(columns = {'country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 24. Postal Coverage

In [None]:
indicators[23]

# load data
indicator = indicators[23]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
#Choose 'Percent of Population Having Mail Delivered at Home' as the data column
df = df[(df.Indicator=='Percent of Population Having Mail Delivered at Home')]
df.head(15)

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2015
df['Indicator'] = indicator
df['data_col'] = df['2015'] 

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 25. Logistics Performance Index (LPI) 

In [None]:
indicators[24]

# load data
indicator = indicators[24]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2018
df['Indicator'] = indicator
df['data_col'] = df['score']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 26. National cyber security index

In [None]:
indicators[25]

# load data
indicator = indicators[25]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2021
df['Indicator'] = indicator
df['data_col'] = df['National Cyber Security Index']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 27. Global Cybersecurity Index (GCI)

In [None]:
indicators[26]

# load data
indicator = indicators[26]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2020
df['Indicator'] = indicator
df['data_col'] = df['Score (2020)'] 
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 28. Software Developer Ecosystem size

In [None]:
indicators[27]

# load data
indicator = indicators[27]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df['higher_is_better'] = True
df['Year'] = 2018
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,[1]]
df.rename(columns={'Country':'Country Name'}, inplace=True)
    
min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 29. Digital Work Ecosystem size

In [None]:
indicators[28]

# load data
indicator = indicators[28]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = False
df['Year'] = 2020
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,[0]].astype(float)
df.rename(columns={'Country':'Country Name'}, inplace=True)
    
min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# invert since to lower rank is better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 30. Country, Industry, Skill Migration Data (Skill) 

In [None]:
indicators[29]

# load data
indicator = indicators[29]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df['Country Name'] = df.iloc[:,[1]]

df_sum = df.groupby('Country Name')[['net_per_10K_2019']].sum()
df_sum

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df_sum['higher_is_better'] = True
df_sum['Year'] = 2019
df_sum['Indicator'] = indicator
df_sum['data_col'] = df_sum['net_per_10K_2019']


min_rank = df_sum['data_col'].min()
max_rank = df_sum['data_col'].max()

# transform 0-1 rank into 1-6
df_sum['new_rank_score'] = df_sum['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 31. Country, Industry, Skill Migration Data (Industry)

In [None]:
indicators[30]

# load data
indicator = indicators[30]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


In [None]:
df.head(15)

In [None]:
df['Country Name'] = df.iloc[:,[1]]

df_sum = df.groupby('Country Name')[['net_per_10K_2019']].sum()
df_sum

In [None]:
## create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df_sum['higher_is_better'] = True
df_sum['Year'] = 2019
df_sum['Indicator'] = indicator
df_sum['data_col'] = df_sum['net_per_10K_2019']


min_rank = df_sum['data_col'].min()
max_rank = df_sum['data_col'].max()

# transform 0-1 rank into 1-6
df_sum['new_rank_score'] = df_sum['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 32. Country, Industry, Skill Migration Data (Nation)

In [None]:
indicators[31]

# load data
indicator = indicators[31]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df['Country Name'] = df.iloc[:,[1]]

df_sum = df.groupby('Country Name')[['net_per_10K_2019']].sum()
df_sum

In [None]:
## create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df_sum['higher_is_better'] = True
df_sum['Year'] = 2019
df_sum['Indicator'] = indicator
df_sum['data_col'] = df_sum['net_per_10K_2019']


min_rank = df_sum['data_col'].min()
max_rank = df_sum['data_col'].max()

# transform 0-1 rank into 1-6
df_sum['new_rank_score'] = df_sum['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 33. Digital Finance Ecosystem size

In [None]:
indicators[32]

# load data
indicator = indicators[32]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2021
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,[2]]
df['Country Name'] = df.iloc[:,[1]]

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# Have to shorten the column names to see if this works

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)



In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 34. Tech hubs & spaces size

In [None]:
indicators[33]

# load data
indicator = indicators[33]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

#There are two problems:
#1. The column names need some overhaul (moving the second column to the top and make it the column names)
#2. This is a list of cities, not countries, may present probkems later. 

In [None]:
### 35. Banking Ecosystem size

In [None]:
indicators[34]

# load data
indicator = indicators[34]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2017
df['Indicator'] = indicator
df['data_col'] = df['Bank assets, percent of GDP, 2017']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))# Okay but will have to remove number from the Country column, leaving only characters. 

In [None]:

df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 36. Angel Ecosystem size

In [None]:
indicators[35]

# load data
indicator = indicators[35]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df['higher_is_better'] = True
df['Year'] = 2019
df['Indicator'] = indicator
df['data_col'] = df['Business angel investments (in million Euros)']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

# Okay but there are only EU countries

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### 37. Startup Ecosystem size

In [None]:
indicators[36]

# load data
indicator = indicators[36]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# This is a list of cities, not countries, wonder if it will work 

In [None]:
### 38. Venture Ecosystem size

indicators[37]

# load data
indicator = indicators[37]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# Similar to the previous one

In [None]:
### 39. International Co-Inventions

In [None]:
indicators[38]

# load data
indicator = indicators[38]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year and global value
df = df[(df.Time==2017)]
df = df[(df.Country!='World')]
df ['Country Name'] = df ['Country']
df

In [None]:
# Create summarization
df_sum = df.groupby('Country Name')[['Value']].sum()
df_sum

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)

df_sum['higher_is_better'] = True
df_sum['Year'] = 2017
df_sum['Indicator'] = indicator
df_sum['data_col'] = df_sum.iloc[:,[0]]

min_rank = df_sum['data_col'].min()
max_rank = df_sum['data_col'].max()

# transform 0-1 rank into 1-6
df_sum['new_rank_score'] = df_sum['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

# Okay but the data is too general with many missing countries

In [None]:
df_sum[['Year','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/infrastructure_{}_scores.csv'.format(indicator, index=False))

In [None]:
### Score Aggregating

In [None]:
import os

In [None]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('infrastructure')]

In [None]:
scores

In [None]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [None]:
df

In [None]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')


# Replace values that are not truly country names with nan
df['Country Name'] = df['Country Name'].replace('n.a. : non avalaible',np.nan)
df['Country Name'] = df['Country Name'].replace('nan',np.nan)
df['Country Name'] = df['Country Name'].replace('Not classified',np.nan)
df['Country Name'] = df['Country Name'].replace('Source :',np.nan)
df['Country Name'] = df['Country Name'].replace('© Copyright Enerdata. Reproduction and diffusion prohibited (web, photocopy, intranet...) without written permission.',np.nan)

# Dropping the columns having NaN/NaT values
df = df[df['Country Name'].notna()]


df['Country Name'] = df['Country Name'].astype(str, errors = 'ignore')



In [None]:
df.info()

In [None]:
df.head(15)

In [None]:
sorted(df['Country Name'].unique().tolist())

In [None]:
df.head(15)

In [None]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
agg_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = agg_df.describe()['count_source']['max']

In [None]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [None]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [None]:
agg_df.head(25)

In [None]:
agg_df.to_csv('../pillar_scores/infrastructure_scores_v0.csv')