In [1]:
import pandas as pd
import numpy as np

### Get all the pillar names from the excel

In [2]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [3]:
col_names = ['Indicator','check', 'Data Source','Data Link','Raw/Index','Filename','Sub-Pillar']

In [4]:
names = names[col_names]

In [5]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Data Link,Raw/Index,Filename,Sub-Pillar
0,Countries,,UN Statistics Division: List of Countries,https://unstats.un.org,Raw,Countries,
1,"Database of Global Administrative Areas (GADM,...",,,https://gadm.org,Raw,,
2,High Resolution Population Density Maps + Demo...,,,,,,
3,population density vs openstreetmap object den...,,,,,,
4,Population Density,Infrastructure,World Bank: World Development Indicators,https://datacatalog.worldbank.org,Raw,population_density,Connectivity Technology


In [6]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [7]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,19,28
Foundations,13,22
Government,11,16
Infrastructure,47,58
People,35,49
Regulation,5,8
Strategy,1,1


### Business

In [8]:
bnames = names[(names.check=='Business')&(~names.Filename.isna())]#&(names.Index==False)]

In [9]:
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Data Link,Raw/Index,Filename,Sub-Pillar
87,UNCTAD Business-to-Consumer (B2C) E-commerce I...,Business,UNCTAD: Business-to-Consumer (B2C) E-commerce...,https://unctad.org,Index,b2c_ecommerse_idx,Technology Adoption
88,"Networking Services (Spend, IT Forecast Data)",Business,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,Index,network_readiness_index,Technology Adoption
93,"Cloud Services (Spend, IT Forecast Data)",Business,Statista,https://www.statista.com,Index,cloud_services,Technology Adoption
94,ICT task-intensive jobs as a percentage of tot...,Business,OECD: Going Digital Toolkit,http://goingdigital.oecd.org,Index,ICT_proportion,Technology Adoption
97,Share of business with internet,Business,OECD: ICT Access and Usage by Businesses,https://stats.oecd.org,Index,business_internet,Technology Adoption
98,Share of businesses with broadband,Business,OECD: Innovation Indicators,https://www.oecd.org/innovation/inno/inno-stat...,Index,business_broadband,Technology Adoption
99,Share of businesses with online presence,Business,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,Index,share_of_businesses_online_presence,Technology Adoption
100,Size of gig economy (% of GDP),Business,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,Index,prevalance_gig_economy,Technology Adoption
101,Size of digital economy (% of transactions),Business,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,Index,size_digital_economy,Technology Adoption
102,Venture Capital Availability,Business,World Economic Forum: Global Competitiveness I...,http://reports.weforum.org,Index,TCdata360,Financing Incentives


In [10]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()
subpillars = bnames['Sub-Pillar'].unique()

In [11]:
# get all file names
bfiles = bnames.Filename.unique()

In [12]:
bfiles

array(['b2c_ecommerse_idx', 'network_readiness_index', 'cloud_services',
       'ICT_proportion', 'business_internet', 'business_broadband',
       'share_of_businesses_online_presence', 'prevalance_gig_economy',
       'size_digital_economy', 'TCdata360', 'doing_bus_idx',
       'legal_rights_strength', 'time_start_bus', 'ease_doing_bus',
       'ease_of_finding_skilled_employees', 'start_up_investment',
       'global_innovation_dataset', 'global_resilience_index'],
      dtype=object)

In [13]:
subpillars

array(['Technology Adoption', 'Financing Incentives',
       'Startup Environment'], dtype=object)

In [14]:
# ls digital-readiness-assessment-main/processed/

In [15]:
# formula for converting scale 0-100
def convert_rank(old_value, old_min=min, old_max=max, new_min=1, new_max=5.99 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [16]:
# formula for converting scale 1-7
def convert_rank_b(old_value, old_min=1, old_max=7, new_min=1, new_max=5.99 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [17]:
# formula for converting scale 0-12
def convert_rank_c(old_value, old_min=0, old_max=12, new_min=1, new_max=5.99 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [18]:
### Add population dataset
df = pd.read_csv('../../processed/Population.csv')

# Normalize country names as much as possible
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
df['Country Name'] = df['Country Name'].replace('Bahrain (Kingdom of)','Bahrain')
df['Country Name'] = df['Country Name'].replace('Bolivia','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Bolivia, Plurinational State of','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Brunei','Brunei Darussalam')
df['Country Name'] = df['Country Name'].replace('Bulgaria (Rep.)','Bulgaria')
df['Country Name'] = df['Country Name'].replace('Central African Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Central African Rep.','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace("China (People's Rep.)",'China')
df['Country Name'] = df['Country Name'].replace("Comoros",'Comoros (the)')
df['Country Name'] = df['Country Name'].replace("Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Brazzaville)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Rep. of the)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Democratic Republic of the)",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, Dem. Rep.",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, The Democratic Republic of the",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("DR Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Dem. Rep. of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Cote d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Côte d’Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote D'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote dIvoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cōte d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Ivory Coast","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Dem. People's Rep. of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Democratic People's Republic of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Korea, Dem. People's Rep.","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("North Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].replace("Eswatini (Kingdom of)",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Swaziland",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Faröe Islands",'Faroe Islands')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
df['Country Name'] = df['Country Name'].replace("Georgia (Country)",'Georgia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
df['Country Name'] = df['Country Name'].replace("Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep. of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep.)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Republic of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea, South",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("South Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Republic of Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyrgyzstan')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lao.*$)', "Lao People's Democratic Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macao.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macau.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Micronesia.*$)', "Micronesia (Federated States of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Moldova.*$)', "Republic of Moldova (the)")
df['Country Name'] = df['Country Name'].replace("Morroco",'Morocco')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Nepal.*$)', "Nepal")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].replace("Niger",'Niger (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macedonia.*$)', "North Macedonia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Palestin.*$)', "State of Palestine (the)")
df['Country Name'] = df['Country Name'].replace("West Bank and Gaza",'State of Palestine (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Panama.*$)', "Panama")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Philippines.*$)', "Philippines (the)")
df['Country Name'] = df['Country Name'].replace("Republic of the Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Myanmar.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Puerto Rico.*$)', "Puerto Rico")
df['Country Name'] = df['Country Name'].replace("Russia",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].replace("Russian Federation",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Slovak.*$)', "Slovakia")
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan (the)\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*South Sudan.*$)', "South Sudan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Syria.*$)', "Syrian Arab Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*São Tomé.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taiwan.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taipei.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tanzania.*$)', "United Republic of Tanzania (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Netherlands.*$)', "Netherlands (the)")
df['Country Name'] = df['Country Name'].replace("UAE",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("U.A.E",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("United Arab Emirates",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace('United Kingdom','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('UK','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("Great Britain",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("United Kingdom of Great Britain and Northern Ireland",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('Vietnam','Viet Nam')
df['Country Name'] = df['Country Name'].replace('United States','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('USA','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('United States of America','United States of America (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Virgin Islands.*$)', "United States Virgin Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vatican.*$)', "Vatican")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Venezuela.*$)', "Venezuela (Bolivarian Republic of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Yemen.*$)', "Yemen")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Arab world.*$)', "Arab World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*World.*$)', "World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kitts and Nevis.*$)', "Saint Kitts and Nevis")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lucia.*$)', "Saint Lucia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Martin (French Part).*$)', "Saint Martin (French Part)")
df['Country Name'] = df['Country Name'].replace('Sint Maarten','Saint Martin')
df['Country Name'] = df['Country Name'].replace('St. Martin (French part)','Saint Martin (French Part)')
df['Country Name'] = df['Country Name'].replace('Sint Maarten (Dutch part)','Saint Martin (Dutch Part)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent and the Grenadines.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Verde.*$)', "Cabo Verde")
df['Country Name'] = df['Country Name'].replace('Congo, Democratic Republic','Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo, Rep.','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo (Rep.)','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Egypt.*$)', "Egypt")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, D.*$)', "Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tobago.*$)', "Trinidad and Tobago")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Timor-Leste.*$)', "Timor-Leste")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Emirates.*$)', "United Arab Emirates (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Papua.*$)', "Papua New Guinea")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bissau.*$)', "Guinea-Bissau")
df['Country Name'] = df['Country Name'].replace('Eq. Guinea','Equatorial Guinea')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Burma.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].replace('C.A. Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Ant.& Barb.','Antigua and Barbuda')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bosnia.*$)', "Bosnia and Herzegovina")
df['Country Name'] = df['Country Name'].replace('Domin. Rep.','Dominican Republic (the)')
df['Country Name'] = df['Country Name'].replace('Dominica (Commonwealth of)','Dominica')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*European Union.*$)', "European Union")
df['Country Name'] = df['Country Name'].replace('R. of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Principe.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Solomon.*$)', "Solomon Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].replace('Curacao','Curaçao')
df['Country Name'] = df['Country Name'].replace('Reunion','Réunion')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kosovo.*$)', "Kosovo (UNSCR 1244)")

pop = df
pop


  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
  df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyr

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,Country Name,Country Code,Indicator Name,Indicator Code
0,54208.0,55434.0,56234.0,56699.0,57029.0,57357.0,57702.0,58044.0,58377.0,58734.0,...,104339.0,104865.0,105361.0,105846.0,106310.0,106766.0,Aruba,ABW,"Population, total",SP.POP.TOTL
1,130836765.0,134159786.0,137614644.0,141202036.0,144920186.0,148769974.0,152752671.0,156876454.0,161156430.0,165611760.0,...,593871847.0,609978946.0,626392880.0,643090131.0,660046272.0,677243299.0,Africa Eastern and Southern,AFE,"Population, total",SP.POP.TOTL
2,8996967.0,9169406.0,9351442.0,9543200.0,9744772.0,9956318.0,10174840.0,10399936.0,10637064.0,10893772.0,...,34413603.0,35383028.0,36296111.0,37171922.0,38041757.0,38928341.0,Afghanistan,AFG,"Population, total",SP.POP.TOTL
3,96396419.0,98407221.0,100506960.0,102691339.0,104953470.0,107289875.0,109701811.0,112195950.0,114781116.0,117468741.0,...,401586651.0,412551299.0,423769930.0,435229381.0,446911598.0,458803476.0,Africa Western and Central,AFW,"Population, total",SP.POP.TOTL
4,5454938.0,5531451.0,5608499.0,5679409.0,5734995.0,5770573.0,5781305.0,5774440.0,5771973.0,5803677.0,...,27884380.0,28842482.0,29816769.0,30809787.0,31825299.0,32866268.0,Angola,AGO,"Population, total",SP.POP.TOTL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,947000.0,966000.0,994000.0,1022000.0,1050000.0,1078000.0,1106000.0,1135000.0,1163000.0,1191000.0,...,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1775378.0,Kosovo (UNSCR 1244),XKX,"Population, total",SP.POP.TOTL
262,5315351.0,5393034.0,5473671.0,5556767.0,5641598.0,5727745.0,5816241.0,5907873.0,6001858.0,6097042.0,...,26497881.0,27168210.0,27834811.0,28498683.0,29161922.0,29825968.0,Yemen,YEM,"Population, total",SP.POP.TOTL
263,17099836.0,17524533.0,17965733.0,18423157.0,18896303.0,19384838.0,19888259.0,20406863.0,20942147.0,21496075.0,...,55386369.0,56207649.0,57009751.0,57792520.0,58558267.0,59308690.0,South Africa,ZAF,"Population, total",SP.POP.TOTL
264,3070780.0,3164330.0,3260645.0,3360099.0,3463211.0,3570466.0,3681953.0,3797877.0,3918872.0,4045740.0,...,15879370.0,16363449.0,16853608.0,17351714.0,17861034.0,18383956.0,Zambia,ZMB,"Population, total",SP.POP.TOTL


In [19]:
### Add country's area dataset
### Add population dataset
df = pd.read_csv('../../processed/GDP.csv')

# Normalize country names as much as possible
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
df['Country Name'] = df['Country Name'].replace('Bahrain (Kingdom of)','Bahrain')
df['Country Name'] = df['Country Name'].replace('Bolivia','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Bolivia, Plurinational State of','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Brunei','Brunei Darussalam')
df['Country Name'] = df['Country Name'].replace('Bulgaria (Rep.)','Bulgaria')
df['Country Name'] = df['Country Name'].replace('Central African Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Central African Rep.','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace("China (People's Rep.)",'China')
df['Country Name'] = df['Country Name'].replace("Comoros",'Comoros (the)')
df['Country Name'] = df['Country Name'].replace("Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Brazzaville)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Rep. of the)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Democratic Republic of the)",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, Dem. Rep.",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, The Democratic Republic of the",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("DR Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Dem. Rep. of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Cote d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Côte d’Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote D'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote dIvoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cōte d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Ivory Coast","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Dem. People's Rep. of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Democratic People's Republic of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Korea, Dem. People's Rep.","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("North Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].replace("Eswatini (Kingdom of)",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Swaziland",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Faröe Islands",'Faroe Islands')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
df['Country Name'] = df['Country Name'].replace("Georgia (Country)",'Georgia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
df['Country Name'] = df['Country Name'].replace("Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep. of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep.)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Republic of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea, South",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("South Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Republic of Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyrgyzstan')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lao.*$)', "Lao People's Democratic Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macao.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macau.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Micronesia.*$)', "Micronesia (Federated States of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Moldova.*$)', "Republic of Moldova (the)")
df['Country Name'] = df['Country Name'].replace("Morroco",'Morocco')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Nepal.*$)', "Nepal")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].replace("Niger",'Niger (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macedonia.*$)', "North Macedonia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Palestin.*$)', "State of Palestine (the)")
df['Country Name'] = df['Country Name'].replace("West Bank and Gaza",'State of Palestine (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Panama.*$)', "Panama")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Philippines.*$)', "Philippines (the)")
df['Country Name'] = df['Country Name'].replace("Republic of the Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Myanmar.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Puerto Rico.*$)', "Puerto Rico")
df['Country Name'] = df['Country Name'].replace("Russia",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].replace("Russian Federation",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Slovak.*$)', "Slovakia")
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan (the)\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*South Sudan.*$)', "South Sudan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Syria.*$)', "Syrian Arab Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*São Tomé.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taiwan.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taipei.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tanzania.*$)', "United Republic of Tanzania (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Netherlands.*$)', "Netherlands (the)")
df['Country Name'] = df['Country Name'].replace("UAE",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("U.A.E",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("United Arab Emirates",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace('United Kingdom','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('UK','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("Great Britain",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("United Kingdom of Great Britain and Northern Ireland",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('Vietnam','Viet Nam')
df['Country Name'] = df['Country Name'].replace('United States','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('USA','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('United States of America','United States of America (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Virgin Islands.*$)', "United States Virgin Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vatican.*$)', "Vatican")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Venezuela.*$)', "Venezuela (Bolivarian Republic of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Yemen.*$)', "Yemen")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Arab world.*$)', "Arab World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*World.*$)', "World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kitts and Nevis.*$)', "Saint Kitts and Nevis")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lucia.*$)', "Saint Lucia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Martin (French Part).*$)', "Saint Martin (French Part)")
df['Country Name'] = df['Country Name'].replace('Sint Maarten','Saint Martin')
df['Country Name'] = df['Country Name'].replace('St. Martin (French part)','Saint Martin (French Part)')
df['Country Name'] = df['Country Name'].replace('Sint Maarten (Dutch part)','Saint Martin (Dutch Part)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent and the Grenadines.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Verde.*$)', "Cabo Verde")
df['Country Name'] = df['Country Name'].replace('Congo, Democratic Republic','Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo, Rep.','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo (Rep.)','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Egypt.*$)', "Egypt")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, D.*$)', "Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tobago.*$)', "Trinidad and Tobago")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Timor-Leste.*$)', "Timor-Leste")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Emirates.*$)', "United Arab Emirates (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Papua.*$)', "Papua New Guinea")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bissau.*$)', "Guinea-Bissau")
df['Country Name'] = df['Country Name'].replace('Eq. Guinea','Equatorial Guinea')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Burma.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].replace('C.A. Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Ant.& Barb.','Antigua and Barbuda')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bosnia.*$)', "Bosnia and Herzegovina")
df['Country Name'] = df['Country Name'].replace('Domin. Rep.','Dominican Republic (the)')
df['Country Name'] = df['Country Name'].replace('Dominica (Commonwealth of)','Dominica')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*European Union.*$)', "European Union")
df['Country Name'] = df['Country Name'].replace('R. of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Principe.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Solomon.*$)', "Solomon Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].replace('Curacao','Curaçao')
df['Country Name'] = df['Country Name'].replace('Reunion','Réunion')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kosovo.*$)', "Kosovo (UNSCR 1244)")

gdp = df
gdp


  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
  df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyr

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,Country Name,Country Code,Indicator Name,Indicator Code
0,,,,,,,,,,,...,2.962905e+09,2.983637e+09,3.092430e+09,3.202189e+09,,,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD
1,1.934248e+10,1.975349e+10,2.152662e+10,2.577236e+10,2.356323e+10,2.685135e+10,2.919650e+10,3.021907e+10,3.292707e+10,3.780176e+10,...,8.954400e+11,8.569920e+11,9.647910e+11,9.866110e+11,9.803720e+11,9.008290e+11,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD
2,5.377778e+08,5.488889e+08,5.466667e+08,7.511112e+08,8.000000e+08,1.006667e+09,1.400000e+09,1.673333e+09,1.373333e+09,1.408889e+09,...,1.990711e+10,1.801775e+10,1.886995e+10,1.835388e+10,1.929110e+10,1.980707e+10,Afghanistan,AFG,GDP (current US$),NY.GDP.MKTP.CD
3,1.040732e+10,1.113130e+10,1.194684e+10,1.268022e+10,1.384262e+10,1.486682e+10,1.583747e+10,1.443065e+10,1.488470e+10,1.688703e+10,...,7.574920e+11,6.874850e+11,6.809890e+11,7.381310e+11,7.920790e+11,7.865850e+11,Africa Western and Central,AFW,GDP (current US$),NY.GDP.MKTP.CD
4,,,,,,,,,,,...,1.161940e+11,1.011240e+11,1.221240e+11,1.013530e+11,8.941719e+10,6.230691e+10,Angola,AGO,GDP (current US$),NY.GDP.MKTP.CD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,,,,,,,,,,,...,6.442916e+09,6.719172e+09,7.245707e+09,7.942962e+09,7.953156e+09,7.611402e+09,Kosovo (UNSCR 1244),XKX,GDP (current US$),NY.GDP.MKTP.CD
262,,,,,,,,,,,...,4.244510e+10,3.093383e+10,2.673614e+10,2.348627e+10,,,Yemen,YEM,GDP (current US$),NY.GDP.MKTP.CD
263,7.575397e+09,7.972997e+09,8.497997e+09,9.423396e+09,1.037400e+10,1.133440e+10,1.235500e+10,1.377739e+10,1.489459e+10,1.678039e+10,...,3.176210e+11,2.963570e+11,3.495540e+11,3.682890e+11,3.514320e+11,3.019240e+11,South Africa,ZAF,GDP (current US$),NY.GDP.MKTP.CD
264,7.130000e+08,6.962857e+08,6.931429e+08,7.187143e+08,8.394286e+08,1.082857e+09,1.264286e+09,1.368000e+09,1.605857e+09,1.965714e+09,...,2.125122e+10,2.095841e+10,2.587360e+10,2.631164e+10,2.330869e+10,1.932005e+10,Zambia,ZMB,GDP (current US$),NY.GDP.MKTP.CD


### 1. 'UNCTAD Business-to-Consumer (B2C) E-commerce Index'

In [20]:
indicators[0]

'UNCTAD Business-to-Consumer (B2C) E-commerce Index'

In [21]:
# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

UNCTAD Business-to-Consumer (B2C) E-commerce Index
b2c_ecommerse_idx


In [22]:
df.head()

Unnamed: 0,2020 Rannk,Economy,Share of individuals using the Internet (2019 or latest),"Share of individuals with an account (15+, 2017)","Secure Internet servers (normalize d, 2019)",UPU postal reliability score (2019 or latest),2020 Index value,Index value change (2019-20data),Rank 2019
0,1,Switzerland,97,98,92,97,95.9,-0.1,2
1,2,Netherlands,96,100,94,93,95.8,-0.7,1
2,3,Denmark,97,100,100,81,94.5,0.2,6
3,4,Singapore,89,98,94,97,94.4,-0.4,3
4,5,United Kingdom,96,96,84,98,93.6,-0.8,4


In [23]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [24]:
# create standard columns
df.rename(columns={'Economy':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2020 Index value'] 
df['Sub-Pillar'] = subpillar
df['Year'] = 2020

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [25]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Switzerland,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,95.9,5.78541,True,Technology Adoption
1,Netherlands,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,95.8,5.78042,True,Technology Adoption
2,Denmark,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,94.5,5.71555,True,Technology Adoption
3,Singapore,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,94.4,5.71056,True,Technology Adoption
4,United Kingdom,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,93.6,5.67064,True,Technology Adoption
...,...,...,...,...,...,...,...
147,Dem. Rep. of the Congo,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,12.8,1.63872,True,Technology Adoption
148,Comoros,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,12.0,1.59880,True,Technology Adoption
149,Burundi,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,8.3,1.41417,True,Technology Adoption
150,Chad,2020,UNCTAD Business-to-Consumer (B2C) E-commerce I...,7.1,1.35429,True,Technology Adoption


In [26]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator),index = False)

## 2. Networking Services (Spend, IT Forecast Data)


In [27]:
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Networking Services (Spend, IT Forecast Data)
network_readiness_index


In [28]:
df.head(16)

Unnamed: 0,Rank,Country,Score,Income Group,Region
0,1,Sweden,82.75,High-income,Europe
1,2,Denmark,82.19,High-income,Europe
2,3,Singapore,81.39,High-income,Asia & Pacific
3,4,Netherlands,81.37,High-income,Europe
4,5,Switzerland,80.41,High-income,Europe
5,6,Finland,80.16,High-income,Europe
6,7,Norway,79.39,High-income,Europe
7,8,United States,78.91,High-income,The Americas
8,9,Germany,77.48,High-income,Europe
9,10,United Kingdom,76.27,High-income,Europe


In [29]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [30]:
# convert rank into 1-6 - in order to get most recent data 
df['data_col'] = df['Score'] 


# going to assume index is between 1-100 but not 100% sure
min_rank = 1 #df['data_col'].min()
max_rank = 100 #df['data_col'].max()

# transform 1-100 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

In [31]:
# prepare output
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

In [32]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Sweden,2020,"Networking Services (Spend, IT Forecast Data)",82.75,5.129225,True,Technology Adoption
1,Denmark,2020,"Networking Services (Spend, IT Forecast Data)",82.19,5.101281,True,Technology Adoption
2,Singapore,2020,"Networking Services (Spend, IT Forecast Data)",81.39,5.061361,True,Technology Adoption
3,Netherlands,2020,"Networking Services (Spend, IT Forecast Data)",81.37,5.060363,True,Technology Adoption
4,Switzerland,2020,"Networking Services (Spend, IT Forecast Data)",80.41,5.012459,True,Technology Adoption
...,...,...,...,...,...,...,...
129,Burundi,2020,"Networking Services (Spend, IT Forecast Data)",22.62,2.128738,True,Technology Adoption
130,Angola,2020,"Networking Services (Spend, IT Forecast Data)",20.96,2.045904,True,Technology Adoption
131,Yemen,2020,"Networking Services (Spend, IT Forecast Data)",18.00,1.898200,True,Technology Adoption
132,"Congo, Dem. Rep.",2020,"Networking Services (Spend, IT Forecast Data)",16.60,1.828340,True,Technology Adoption


In [33]:
# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator),index = False)

### 3. Cloud Services (Spend, IT Forecast Data)


In [34]:
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Cloud Services (Spend, IT Forecast Data)
cloud_services


In [35]:
# remove nulls
df = df.dropna()
df

Unnamed: 0,Cloud computing policy environment by category - country ranking 2018,Unnamed: 1
2,Germany,18.2
3,Japan,20.3
4,United States,18.0
5,United Kingdom,19.8
6,Australia,16.1
7,Singapore,20.7
8,Canada,17.0
9,France,17.3
10,Italy,15.0
11,Spain,16.6


In [36]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [37]:
# prepare standard columns
df['data_col'] = df['Unnamed: 1'].astype(float)
df['Country Name'] = df.iloc[:,0]
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = 2018
df['Sub-Pillar'] = subpillar

In [38]:
min_rank = 1
max_rank = df['Country Name'].nunique()

In [39]:
# transform 1-24 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

In [40]:
# prepare output
df.sort_values(by='new_rank_score', ascending=False)

# prepare output
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]

# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

In [41]:
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Germany,2018,"Cloud Services (Spend, IT Forecast Data)",18.2,1.90818,True,Technology Adoption
3,Japan,2018,"Cloud Services (Spend, IT Forecast Data)",20.3,2.01297,True,Technology Adoption
4,United States,2018,"Cloud Services (Spend, IT Forecast Data)",18.0,1.8982,True,Technology Adoption
5,United Kingdom,2018,"Cloud Services (Spend, IT Forecast Data)",19.8,1.98802,True,Technology Adoption
6,Australia,2018,"Cloud Services (Spend, IT Forecast Data)",16.1,1.80339,True,Technology Adoption
7,Singapore,2018,"Cloud Services (Spend, IT Forecast Data)",20.7,2.03293,True,Technology Adoption
8,Canada,2018,"Cloud Services (Spend, IT Forecast Data)",17.0,1.8483,True,Technology Adoption
9,France,2018,"Cloud Services (Spend, IT Forecast Data)",17.3,1.86327,True,Technology Adoption
10,Italy,2018,"Cloud Services (Spend, IT Forecast Data)",15.0,1.7485,True,Technology Adoption
11,Spain,2018,"Cloud Services (Spend, IT Forecast Data)",16.6,1.82834,True,Technology Adoption


## 4. ICT task-intensive jobs as a percentage of total employment

In [42]:
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

ICT task-intensive jobs as a percentage of total employment
ICT_proportion


In [43]:
df.head()

Unnamed: 0,Indicator,Country,Industry,Information and communication technologies,Sex,Measure,Time,Value,Flags
0,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2011,3.1764,
1,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2012,3.225967,
2,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2013,3.346251,
3,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2014,3.3191,
4,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2015,3.72934,


In [44]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [45]:
df[(df['Time']==2018)&(df['Information and communication technologies']=='ICT-intensive')].sort_values(by='Value', ascending=False)

Unnamed: 0,Indicator,Country,Industry,Information and communication technologies,Sex,Measure,Time,Value,Flags


In [46]:
# bnames

In [47]:
df['Information and communication technologies'].unique()

array(['Specialist (ISCO-08: 133+215+251+252+351+352+742)',
       'Other ICT-intensive (ISCO-08: 121+122,134+,211+,216+,231+,241+,242+243)',
       'Non-ICT (rest of ISCO-08 occupations)', 'ICT-intensive', 'Total'],
      dtype=object)

In [48]:
df.Sex.unique()

array(['Total'], dtype=object)

In [49]:
# convert to correct types
df['Value'] = df['Value'].astype(float)

In [50]:
df['Value'].describe()

count    985.000000
mean      42.353406
std       42.614469
min        0.890157
25%        5.526795
50%       12.887070
75%       91.318100
max      100.000000
Name: Value, dtype: float64

In [51]:
# filter on relevant years
df = df[(df['Time']==2017)&(df['Information and communication technologies']=='ICT-intensive')]

# convert rank into 1-6 - in order to get most recent data 
df['data_col'] = df['Value'] 

# going to assume index is between 1-100 but not 100% sure
min_rank = 0 #df['data_col'].min()
max_rank = 100 #df['data_col'].max()

# transform 1-147 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

# df_rank[['Country ISO3', 'Country Name','Indicator','data_col','new_rank_score']].head()

df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = df['Time']
df['Sub-Pillar'] = subpillar

df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)


# output scores to csv
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
27,Austria,2017,ICT task-intensive jobs as a percentage of tot...,10.88416,1.54312,True,Technology Adoption
62,Belgium,2017,ICT task-intensive jobs as a percentage of tot...,14.44826,1.720968,True,Technology Adoption
97,Czech Republic,2017,ICT task-intensive jobs as a percentage of tot...,9.219953,1.460076,True,Technology Adoption
157,Estonia,2017,ICT task-intensive jobs as a percentage of tot...,15.76142,1.786495,True,Technology Adoption
217,Finland,2017,ICT task-intensive jobs as a percentage of tot...,15.22048,1.759502,True,Technology Adoption
252,France,2017,ICT task-intensive jobs as a percentage of tot...,12.00835,1.599217,True,Technology Adoption
287,Germany,2017,ICT task-intensive jobs as a percentage of tot...,10.4419,1.521051,True,Technology Adoption
322,Greece,2017,ICT task-intensive jobs as a percentage of tot...,6.675247,1.333095,True,Technology Adoption
357,Hungary,2017,ICT task-intensive jobs as a percentage of tot...,8.369766,1.417651,True,Technology Adoption
392,Iceland,2017,ICT task-intensive jobs as a percentage of tot...,14.27687,1.712416,True,Technology Adoption


In [52]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator),index = False)

## 5. Share of business with internet

In [53]:
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Share of business with internet
business_internet


In [54]:
df= df.replace('..',np.nan)

In [55]:
df.head(15)

Unnamed: 0,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,Country
0,52.47,54.8,60.17,61.52,67.16,67.81,69.33,76.25,75.58,75.77,76.73,75.62,77.37,79.38,80.37,,Australia
1,72.22,78.81,80.06,79.84,79.85,80.19,82.87,82.01,85.7,86.35,87.46,88.11,85.55,87.92,89.45,90.42,Austria
2,,,,,77.37,78.47,76.6,76.01,78.26,79.15,81.04,81.0,82.6,84.03,86.72,86.62,Belgium
3,64.8,67.5,69.7,,,,,79.8,77.5,,,,78.5,,81.8,,Canada
4,,,,47.87,51.74,54.03,59.41,63.27,66.55,67.0,66.47,67.43,67.17,67.81,,,Colombia
5,,70.08,71.12,73.99,72.66,73.63,77.44,79.67,79.86,82.63,82.57,82.15,82.9,82.79,83.31,83.32,Czech Republic
6,,,,,87.61,87.83,88.68,89.3,91.78,91.4,91.95,93.34,95.09,95.58,93.92,92.77,Denmark
7,52.65,57.86,61.87,65.73,67.53,70.04,72.63,74.97,75.74,77.56,79.73,77.93,78.09,78.36,81.18,79.79,Estonia
8,,,,,84.62,87.32,92.56,91.3,93.64,95.1,95.2,95.33,96.28,95.64,,95.92,Finland
9,,,,,54.05,57.71,60.05,64.48,65.3,63.59,66.82,68.5,66.53,69.41,71.54,70.35,France


In [56]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [57]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2020'].astype(float)
df['Country Name'] = df['Country']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [58]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Australia,2020,Share of business with internet,,,True,Technology Adoption
1,Austria,2020,Share of business with internet,90.42,5.511958,True,Technology Adoption
2,Belgium,2020,Share of business with internet,86.62,5.322338,True,Technology Adoption
3,Canada,2020,Share of business with internet,,,True,Technology Adoption
4,Colombia,2020,Share of business with internet,,,True,Technology Adoption
5,Czech Republic,2020,Share of business with internet,83.32,5.157668,True,Technology Adoption
6,Denmark,2020,Share of business with internet,92.77,5.629223,True,Technology Adoption
7,Estonia,2020,Share of business with internet,79.79,4.981521,True,Technology Adoption
8,Finland,2020,Share of business with internet,95.92,5.786408,True,Technology Adoption
9,France,2020,Share of business with internet,70.35,4.510465,True,Technology Adoption


In [59]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 6. Share of businesses with broadband

In [60]:
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Share of businesses with broadband
business_broadband


In [61]:
df.head(15)

Unnamed: 0,2008,2009,2010,Country
0,76.94,76.01,82.06,Austria
1,79.33,77.31,86.52,Czech Republic
2,87.53,86.08,88.08,Estonia
3,,91.68,93.31,France
4,83.46,87.9,89.34,Germany
5,70.37,74.19,79.61,Hungary
6,,,95.43,Iceland
7,,76.11,86.84,Ireland
8,,82.92,84.12,Italy
9,,88.03,87.91,Luxembourg


In [62]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [63]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2010']
df['Country Name'] = df['Country']
df['Year'] = 2010
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [64]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Austria,2010,Share of businesses with broadband,82.06,5.094794,True,Technology Adoption
1,Czech Republic,2010,Share of businesses with broadband,86.52,5.317348,True,Technology Adoption
2,Estonia,2010,Share of businesses with broadband,88.08,5.395192,True,Technology Adoption
3,France,2010,Share of businesses with broadband,93.31,5.656169,True,Technology Adoption
4,Germany,2010,Share of businesses with broadband,89.34,5.458066,True,Technology Adoption
5,Hungary,2010,Share of businesses with broadband,79.61,4.972539,True,Technology Adoption
6,Iceland,2010,Share of businesses with broadband,95.43,5.761957,True,Technology Adoption
7,Ireland,2010,Share of businesses with broadband,86.84,5.333316,True,Technology Adoption
8,Italy,2010,Share of businesses with broadband,84.12,5.197588,True,Technology Adoption
9,Luxembourg,2010,Share of businesses with broadband,87.91,5.386709,True,Technology Adoption


In [65]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 7. Share of businesses with online presence

In [66]:
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))
df.head(15)

Share of businesses with online presence
share_of_businesses_online_presence


Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE,Year
0,1.0,Finland,95.64,100.0,2018
1,2.0,Denmark,93.92,98.01,2018
2,3.0,Japan,92.4,96.24,2018
3,4.0,Netherlands,91.89,95.65,2018
4,5.0,Switzerland,91.74,95.48,2018
5,6.0,Sweden,89.65,93.05,2018
6,7.0,Austria,89.45,92.82,2018
7,8.0,Germany,88.21,91.38,2018
8,9.0,Belgium,86.72,89.65,2018
9,10.0,United Kingdom,83.88,86.35,2018


In [67]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [68]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [69]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Finland,2018,Share of businesses with online presence,95.64,5.772436,True,Technology Adoption
1,Denmark,2018,Share of businesses with online presence,93.92,5.686608,True,Technology Adoption
2,Japan,2018,Share of businesses with online presence,92.4,5.61076,True,Technology Adoption
3,Netherlands,2018,Share of businesses with online presence,91.89,5.585311,True,Technology Adoption
4,Switzerland,2018,Share of businesses with online presence,91.74,5.577826,True,Technology Adoption
5,Sweden,2018,Share of businesses with online presence,89.65,5.473535,True,Technology Adoption
6,Austria,2018,Share of businesses with online presence,89.45,5.463555,True,Technology Adoption
7,Germany,2018,Share of businesses with online presence,88.21,5.401679,True,Technology Adoption
8,Belgium,2018,Share of businesses with online presence,86.72,5.327328,True,Technology Adoption
9,United Kingdom,2018,Share of businesses with online presence,83.88,5.185612,True,Technology Adoption


In [70]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 8. Size of gig economy (% of GDP)

In [71]:
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Size of gig economy (% of GDP)
prevalance_gig_economy


In [72]:
df.head(15)

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,United States,5.4,100.0
1,2.0,Netherlands,5.22,94.63
2,3.0,United Kingdom,5.19,93.8
3,4.0,Saudi Arabia,5.08,90.33
4,5.0,Malaysia,5.07,90.19
5,6.0,Egypt,5.05,89.46
6,7.0,Israel,5.02,88.42
7,8.0,Canada,4.94,86.07
8,9.0,Singapore,4.92,85.52
9,10.0,United Arab Emirates,4.87,83.82


In [73]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [74]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['SCORE']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [75]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,United States,2019,Size of gig economy (% of GDP),100.0,5.99,True,Technology Adoption
1,Netherlands,2019,Size of gig economy (% of GDP),94.63,5.722037,True,Technology Adoption
2,United Kingdom,2019,Size of gig economy (% of GDP),93.8,5.68062,True,Technology Adoption
3,Saudi Arabia,2019,Size of gig economy (% of GDP),90.33,5.507467,True,Technology Adoption
4,Malaysia,2019,Size of gig economy (% of GDP),90.19,5.500481,True,Technology Adoption
5,Egypt,2019,Size of gig economy (% of GDP),89.46,5.464054,True,Technology Adoption
6,Israel,2019,Size of gig economy (% of GDP),88.42,5.412158,True,Technology Adoption
7,Canada,2019,Size of gig economy (% of GDP),86.07,5.294893,True,Technology Adoption
8,Singapore,2019,Size of gig economy (% of GDP),85.52,5.267448,True,Technology Adoption
9,United Arab Emirates,2019,Size of gig economy (% of GDP),83.82,5.182618,True,Technology Adoption


In [76]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 9. Size of digital economy (% of transactions)


In [77]:
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Size of digital economy (% of transactions)
size_digital_economy


In [78]:
df

Unnamed: 0,Order,Country Name,Value,Score
0,1.0,Singapore,78.13,100.00
1,2.0,Switzerland,64.57,82.59
2,3.0,"Korea, Rep.",63.66,81.42
3,4.0,Germany,61.45,78.58
4,5.0,Hungary,59.72,76.36
...,...,...,...,...
129,,"Congo, Dem. Rep.",,
130,,Dominican Republic,,
131,,Guinea,,
132,,Lesotho,,


In [79]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [80]:
# create standard columns
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [81]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Singapore,2019,Size of digital economy (% of transactions),78.13,4.898687,True,Technology Adoption
1,Switzerland,2019,Size of digital economy (% of transactions),64.57,4.222043,True,Technology Adoption
2,"Korea, Rep.",2019,Size of digital economy (% of transactions),63.66,4.176634,True,Technology Adoption
3,Germany,2019,Size of digital economy (% of transactions),61.45,4.066355,True,Technology Adoption
4,Hungary,2019,Size of digital economy (% of transactions),59.72,3.980028,True,Technology Adoption
5,Japan,2019,Size of digital economy (% of transactions),56.21,3.804879,True,Technology Adoption
6,Ireland,2019,Size of digital economy (% of transactions),54.35,3.712065,True,Technology Adoption
7,Denmark,2019,Size of digital economy (% of transactions),54.22,3.705578,True,Technology Adoption
8,Qatar,2019,Size of digital economy (% of transactions),54.17,3.703083,True,Technology Adoption
9,Sweden,2019,Size of digital economy (% of transactions),53.01,3.645199,True,Technology Adoption


In [82]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 10. Venture Capital Availability


In [83]:
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Venture Capital Availability
TCdata360


In [84]:
df.head(15)

Unnamed: 0,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,...,2008-2009,2009-2010,2010-2011,2011-2012,2012-2013,2013-2014,2014-2015,2015-2016,2016-2017,2017-2018
0,,,,,,,,,,,...,,,40.9,54.127067,,40.9205,38.90301,,,
1,,,,,,,,,,,...,,,66.0,49.0,,87.0,92.0,,,
2,,,,,,,,,,,...,,,3.858249,3.165548,,2.823529,2.601164,,,
3,,,,,,,,,,,...,,,127.0,142.0,,148.0,144.0,,,
4,,,,,,,,,,,...,,,2.797549,2.242699,,2.028571,2.177586,,,
5,,,,,,,,,,,...,,,130.0,142.0,,148.0,144.0,,,
6,,,,,,,,,,,...,,,2.196592,2.090002,,2.030303,2.249658,,,
7,,,,,,,,,,,...,,,138.0,141.0,,148.0,141.0,,,
8,,,,,,,,,,,...,,,2.793415,2.468175,,2.352941,2.260678,,,
9,,,,,,,,,,,...,,,115.0,127.0,,139.0,138.0,,,


In [85]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Financing Incentives


In [86]:
df = df[(df.Indicator == 'Venture capital availability, 1-7 (best)')]
df = df[(df['Subindicator Type'] == '1-7 Best')]
df

Unnamed: 0,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,...,2008-2009,2009-2010,2010-2011,2011-2012,2012-2013,2013-2014,2014-2015,2015-2016,2016-2017,2017-2018
226,,,,,1.802891,1.494084,,2.117647,2.348170,,...,,,,,,,,,,
512,2.696235,2.665708,2.524466,2.321898,2.145235,1.956589,1.844924,1.859132,1.948528,1.898624,...,,,,,,,,,,
796,4.491447,4.544287,4.296279,3.881013,3.716301,3.973436,4.143612,4.123291,4.352251,4.352251,...,,,,,,,,,,
1080,2.893384,2.839896,2.425063,2.131687,1.897883,1.909561,1.822370,1.745999,1.779793,2.009052,...,,,,,,,,,,
1366,2.404752,2.123574,1.999757,1.921578,1.789272,2.136096,2.369044,2.425793,2.355755,2.537145,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41962,3.334126,3.115717,3.177300,3.044030,2.667287,2.311127,2.290705,2.565255,2.690639,2.968303,...,,,,,,,,,,
42246,,,,,,2.587327,2.262899,1.876288,1.692528,,...,,,,,,,,,,
42530,3.770429,3.719011,3.890964,3.374518,3.011638,2.932891,3.054382,3.294088,3.185590,2.962898,...,,,,,,,,,,
42816,1.494845,1.984663,2.526940,2.298351,1.992631,2.073739,2.489051,2.538825,2.372437,2.225287,...,,,,,,,,,,


In [87]:
# create standard columns
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2019'] 
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank_b(row,old_min=1,old_max=7))

In [88]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
226,Angola,2019,Venture Capital Availability,1.705738,1.586939,True,Financing Incentives
512,Albania,2019,Venture Capital Availability,3.014087,2.675049,True,Financing Incentives
796,United Arab Emirates,2019,Venture Capital Availability,4.832029,4.186971,True,Financing Incentives
1080,Argentina,2019,Venture Capital Availability,2.328207,2.104625,True,Financing Incentives
1366,Armenia,2019,Venture Capital Availability,3.311092,2.922058,True,Financing Incentives
...,...,...,...,...,...,...,...
41962,Vietnam,2019,Venture Capital Availability,3.267500,2.885804,True,Financing Incentives
42246,"Yemen, Rep.",2019,Venture Capital Availability,2.163688,1.967801,True,Financing Incentives
42530,South Africa,2019,Venture Capital Availability,3.051583,2.706233,True,Financing Incentives
42816,Zambia,2019,Venture Capital Availability,1.803466,1.668216,True,Financing Incentives


In [89]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

In [90]:
## 11. Doing Business Index

In [91]:
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Doing Business Index
doing_bus_idx


In [92]:
df

Unnamed: 0,Country code,Economy,Region,Income group,DB Year,Ease of doing business rank (DB19),Ease of doing business score (DB17-19 methodology),Ease of doing business score (DB16 methodology),Ease of doing business score (DB15 methodology),Ease of doing business score (DB10-14 methodology),...,Recovery rate (cents on the dollar),Strength of insolvency framework index (0-16) (DB15-19 methodology),Commencement of proceedings index (0-3) (DB15-19 methodology),Management of debtor's assets index (0-6) (DB15-19 methodology),Reorganization proceedings index (0-3) (DB15-19 methodology),Creditor participation index (0-4) (DB15-19 methodology),Score-Recovery rate (cents on the dollar),Score-Strength of insolvency framework index (0-16) (DB15-19 methodology),data_country,data_year
0,AFG,Afghanistan,South Asia,Low income,2005,,,,,,...,0.0,,,,,,0.00,,,
1,AFG,Afghanistan,South Asia,Low income,2006,,,,,,...,0.0,,,,,,0.00,,,
2,AFG,Afghanistan,South Asia,Low income,2007,,,,,,...,0.0,,,,,,0.00,,,
3,AFG,Afghanistan,South Asia,Low income,2008,,,,,,...,25.1,,,,,,27.07,,,
4,AFG,Afghanistan,South Asia,Low income,2009,,,,,,...,24.3,,,,,,26.16,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3020,ZWE,Zimbabwe,Sub-Saharan Africa,Low income,2015,,,47.11,44.36,,...,13.8,5.0,3.0,2.0,0.0,0.0,14.81,31.25,,
3021,ZWE,Zimbabwe,Sub-Saharan Africa,Low income,2016,,47.74,47.94,,,...,16.1,5.0,3.0,2.0,0.0,0.0,17.38,31.25,,
3022,ZWE,Zimbabwe,Sub-Saharan Africa,Low income,2017,,47.73,,,,...,18.0,5.0,3.0,2.0,0.0,0.0,19.43,31.25,,
3023,ZWE,Zimbabwe,Sub-Saharan Africa,Low income,2018,,48.52,,,,...,19.7,5.0,3.0,2.0,0.0,0.0,21.17,31.25,,


In [93]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [94]:
df = df[(df['DB Year'] == 2019)]
df.head(15)

Unnamed: 0,Country code,Economy,Region,Income group,DB Year,Ease of doing business rank (DB19),Ease of doing business score (DB17-19 methodology),Ease of doing business score (DB16 methodology),Ease of doing business score (DB15 methodology),Ease of doing business score (DB10-14 methodology),...,Recovery rate (cents on the dollar),Strength of insolvency framework index (0-16) (DB15-19 methodology),Commencement of proceedings index (0-3) (DB15-19 methodology),Management of debtor's assets index (0-6) (DB15-19 methodology),Reorganization proceedings index (0-3) (DB15-19 methodology),Creditor participation index (0-4) (DB15-19 methodology),Score-Recovery rate (cents on the dollar),Score-Strength of insolvency framework index (0-16) (DB15-19 methodology),data_country,data_year
14,AFG,Afghanistan,South Asia,Low income,2019,167.0,47.77,,,,...,26.5,12.0,2.0,6.0,2.0,2.0,28.57,75.0,,
30,ALB,Albania,Europe & Central Asia,Upper middle income,2019,63.0,69.51,,,,...,44.0,14.0,3.0,6.0,3.0,2.0,47.33,87.5,,
46,DZA,Algeria,Middle East & North Africa,Upper middle income,2019,157.0,49.65,,,,...,50.8,7.0,3.0,2.0,1.0,1.0,54.72,43.75,,
62,AGO,Angola,Sub-Saharan Africa,Lower middle income,2019,173.0,43.86,,,,...,0.0,0.0,2.0,4.0,0.0,0.0,0.0,0.0,,
76,ATG,Antigua and Barbuda,Latin America & Caribbean,High income,2019,112.0,59.48,,,,...,36.8,5.0,2.0,2.0,0.0,1.0,39.56,31.25,,
92,ARG,Argentina,Latin America & Caribbean,High income,2019,119.0,58.8,,,,...,21.5,9.5,2.5,4.0,2.0,1.0,23.11,59.38,,
108,ARM,Armenia,Europe & Central Asia,Upper middle income,2019,41.0,75.37,,,,...,38.2,7.5,2.5,2.0,2.0,1.0,41.1,46.88,,
124,AUS,Australia,High income: OECD,High income,2019,18.0,80.13,,,,...,82.7,11.0,2.5,5.0,0.5,3.0,88.99,68.75,,
140,AUT,Austria,High income: OECD,High income,2019,26.0,78.57,,,,...,80.1,11.0,2.5,5.5,1.0,2.0,86.2,68.75,,
156,AZE,Azerbaijan,Europe & Central Asia,Upper middle income,2019,25.0,78.64,,,,...,40.1,13.5,3.0,6.0,1.5,3.0,43.21,84.38,,


In [95]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,6]
df['Year'] = df.iloc[:,4]
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=0,old_max=100))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['higher_is_better'] = False
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Indicator'] = indicator
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['data_col'] = df.iloc[:,6]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value i

In [96]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
14,Afghanistan,2019,Doing Business Index,47.77,3.383723,False,Startup Environment
30,Albania,2019,Doing Business Index,69.51,4.468549,False,Startup Environment
46,Algeria,2019,Doing Business Index,49.65,3.477535,False,Startup Environment
62,Angola,2019,Doing Business Index,43.86,3.188614,False,Startup Environment
76,Antigua and Barbuda,2019,Doing Business Index,59.48,3.968052,False,Startup Environment
92,Argentina,2019,Doing Business Index,58.8,3.93412,False,Startup Environment
108,Armenia,2019,Doing Business Index,75.37,4.760963,False,Startup Environment
124,Australia,2019,Doing Business Index,80.13,4.998487,False,Startup Environment
140,Austria,2019,Doing Business Index,78.57,4.920643,False,Startup Environment
156,Azerbaijan,2019,Doing Business Index,78.64,4.924136,False,Startup Environment


In [97]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 12. Strength of Legal Rights 

In [98]:
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Strength of Legal Rights
legal_rights_strength


In [99]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271 entries, 0 to 270
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Series Name    268 non-null    object
 1   Series Code    266 non-null    object
 2   Country Name   266 non-null    object
 3   Country Code   266 non-null    object
 4   1990 [YR1990]  266 non-null    object
 5   2000 [YR2000]  266 non-null    object
 6   2011 [YR2011]  266 non-null    object
 7   2012 [YR2012]  266 non-null    object
 8   2013 [YR2013]  266 non-null    object
 9   2014 [YR2014]  266 non-null    object
 10  2015 [YR2015]  266 non-null    object
 11  2016 [YR2016]  266 non-null    object
 12  2017 [YR2017]  266 non-null    object
 13  2018 [YR2018]  266 non-null    object
 14  2019 [YR2019]  266 non-null    object
 15  2020 [YR2020]  266 non-null    object
dtypes: object(16)
memory usage: 34.0+ KB


In [100]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


#### Find Relevant Columns

In [101]:
df['Series Name'].unique()

array(['Strength of legal rights index (0=weak to 12=strong)', nan,
       'Data from database: World Development Indicators',
       'Last Updated: 06/30/2021'], dtype=object)

In [102]:
# remove unwanted rows
df = df[~df['Series Code'].isna()]
df = df.replace('..', np.nan)

In [103]:
df.loc[0][0]

'Strength of legal rights index (0=weak to 12=strong)'

In [104]:
df.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,1990 [YR1990],2000 [YR2000],2011 [YR2011],2012 [YR2012],2013 [YR2013],2014 [YR2014],2015 [YR2015],2016 [YR2016],2017 [YR2017],2018 [YR2018],2019 [YR2019],2020 [YR2020]
0,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Afghanistan,AFG,,,,,9.0,9.0,9.0,9.0,9.0,10.0,10.0,
1,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Albania,ALB,,,,,7.0,6.0,6.0,6.0,8.0,8.0,8.0,
2,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Algeria,DZA,,,,,2.0,2.0,2.0,2.0,2.0,2.0,2.0,
3,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,American Samoa,ASM,,,,,,,,,,,,
4,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Andorra,AND,,,,,,,,,,,,


In [105]:
df.columns

Index(['Series Name', 'Series Code', 'Country Name', 'Country Code',
       '1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]'],
      dtype='object')

In [106]:
# clean data
df = df.replace('..', np.nan)

df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']] = df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']].astype(float)

In [107]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 266 entries, 0 to 265
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Series Name    266 non-null    object 
 1   Series Code    266 non-null    object 
 2   Country Name   266 non-null    object 
 3   Country Code   266 non-null    object 
 4   1990 [YR1990]  0 non-null      float64
 5   2000 [YR2000]  0 non-null      float64
 6   2011 [YR2011]  0 non-null      float64
 7   2012 [YR2012]  0 non-null      float64
 8   2013 [YR2013]  236 non-null    float64
 9   2014 [YR2014]  236 non-null    float64
 10  2015 [YR2015]  237 non-null    float64
 11  2016 [YR2016]  237 non-null    float64
 12  2017 [YR2017]  237 non-null    float64
 13  2018 [YR2018]  238 non-null    float64
 14  2019 [YR2019]  238 non-null    float64
 15  2020 [YR2020]  0 non-null      float64
dtypes: float64(12), object(4)
memory usage: 43.4+ KB


In [108]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

df['higher_is_better'] = True
df['Indicator'] = df['Series Name']
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

#### Convert Scales

In [109]:
# convert 0-12 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank_c(row, old_min=0,old_max=12))

In [110]:
df.head(16)

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019],higher_is_better,Indicator,data_col,Year,Sub-Pillar,new_rank_score
0,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Afghanistan,AFG,10.0,True,Strength of legal rights index (0=weak to 12=s...,10.0,2019,Startup Environment,5.158333
1,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Albania,ALB,8.0,True,Strength of legal rights index (0=weak to 12=s...,8.0,2019,Startup Environment,4.326667
2,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Algeria,DZA,2.0,True,Strength of legal rights index (0=weak to 12=s...,2.0,2019,Startup Environment,1.831667
3,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,American Samoa,ASM,,True,Strength of legal rights index (0=weak to 12=s...,,2019,Startup Environment,
4,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Andorra,AND,,True,Strength of legal rights index (0=weak to 12=s...,,2019,Startup Environment,
5,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Angola,AGO,1.0,True,Strength of legal rights index (0=weak to 12=s...,1.0,2019,Startup Environment,1.415833
6,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Antigua and Barbuda,ATG,5.0,True,Strength of legal rights index (0=weak to 12=s...,5.0,2019,Startup Environment,3.079167
7,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Argentina,ARG,2.0,True,Strength of legal rights index (0=weak to 12=s...,2.0,2019,Startup Environment,1.831667
8,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Armenia,ARM,6.0,True,Strength of legal rights index (0=weak to 12=s...,6.0,2019,Startup Environment,3.495
9,Strength of legal rights index (0=weak to 12=s...,IC.LGL.CRED.XQ,Aruba,ABW,,True,Strength of legal rights index (0=weak to 12=s...,,2019,Startup Environment,


In [111]:
df.columns

Index(['Series Name', 'Series Code', 'Country Name', 'Country Code',
       '2019 [YR2019]', 'higher_is_better', 'Indicator', 'data_col', 'Year',
       'Sub-Pillar', 'new_rank_score'],
      dtype='object')

#### Prepare Output

In [112]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.sort_values(by='new_rank_score', ascending=False).head(16)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
12,Azerbaijan,2019,Strength of legal rights index (0=weak to 12=s...,12.0,5.99,True,Startup Environment
132,Montenegro,2019,Strength of legal rights index (0=weak to 12=s...,12.0,5.99,True,Startup Environment
141,New Zealand,2019,Strength of legal rights index (0=weak to 12=s...,12.0,5.99,True,Startup Environment
158,Puerto Rico,2019,Strength of legal rights index (0=weak to 12=s...,12.0,5.99,True,Startup Environment
28,Brunei Darussalam,2019,Strength of legal rights index (0=weak to 12=s...,12.0,5.99,True,Startup Environment
119,Malawi,2019,Strength of legal rights index (0=weak to 12=s...,11.0,5.574167,True,Startup Environment
105,Kosovo,2019,Strength of legal rights index (0=weak to 12=s...,11.0,5.574167,True,Startup Environment
101,Kenya,2019,Strength of legal rights index (0=weak to 12=s...,11.0,5.574167,True,Startup Environment
162,Rwanda,2019,Strength of legal rights index (0=weak to 12=s...,11.0,5.574167,True,Startup Environment
42,Colombia,2019,Strength of legal rights index (0=weak to 12=s...,11.0,5.574167,True,Startup Environment


In [113]:
# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

### 13. Time to start a business


#### Load Data

In [114]:
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Time to Start a Business
time_start_bus


In [115]:
# remove unwanted rows
df = df[~df['Series Code'].isna()]
df = df.replace('..', np.nan)

In [116]:
df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']] = df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']].astype(float)

In [117]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 266 entries, 0 to 265
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Series Name    266 non-null    object 
 1   Series Code    266 non-null    object 
 2   Country Name   266 non-null    object 
 3   Country Code   266 non-null    object 
 4   1990 [YR1990]  0 non-null      float64
 5   2000 [YR2000]  0 non-null      float64
 6   2011 [YR2011]  221 non-null    float64
 7   2012 [YR2012]  225 non-null    float64
 8   2013 [YR2013]  236 non-null    float64
 9   2014 [YR2014]  236 non-null    float64
 10  2015 [YR2015]  237 non-null    float64
 11  2016 [YR2016]  237 non-null    float64
 12  2017 [YR2017]  237 non-null    float64
 13  2018 [YR2018]  238 non-null    float64
 14  2019 [YR2019]  238 non-null    float64
 15  2020 [YR2020]  0 non-null      float64
dtypes: float64(12), object(4)
memory usage: 35.3+ KB


In [118]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [119]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

df['higher_is_better'] = False 
df['Indicator'] = df['Series Name']
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

In [120]:
first = df['data_col'].quantile(1/5)
second = df['data_col'].quantile(2/5)
third = df['data_col'].quantile(3/5)
fourth = df['data_col'].quantile(4/56)
fifth = df['data_col'].quantile(5/5)
data = {'Min':min_rank,'Max':max_rank,'1':first,'2':second,'3':third,'4':fourth,'5':fifth}
sextile = pd.DataFrame(data, index=[0])
sextile

Unnamed: 0,Min,Max,1,2,3,4,5
0,0.5,230.0,7.0,11.5,17.1,4.0,230.0


In [121]:
df.head()

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019],higher_is_better,Indicator,data_col,Year,Sub-Pillar
0,Time required to start a business (days),IC.REG.DURS,Afghanistan,AFG,8.5,False,Time required to start a business (days),8.5,2019,Startup Environment
1,Time required to start a business (days),IC.REG.DURS,Albania,ALB,4.5,False,Time required to start a business (days),4.5,2019,Startup Environment
2,Time required to start a business (days),IC.REG.DURS,Algeria,DZA,18.0,False,Time required to start a business (days),18.0,2019,Startup Environment
3,Time required to start a business (days),IC.REG.DURS,American Samoa,ASM,,False,Time required to start a business (days),,2019,Startup Environment
4,Time required to start a business (days),IC.REG.DURS,Andorra,AND,,False,Time required to start a business (days),,2019,Startup Environment


In [122]:
# Convert the results into quintile
df['quintile'] = pd.qcut(df['data_col'], 5, labels=[1,2,3,4,5])
df['new_rank_score'] = df['quintile'].apply(lambda row: convert_rank(row, old_min=1,old_max=5))
# need to invert score since higher rank is not better 
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (5.99-row)+1)

df=df[df['new_rank_score'].notna()]
df

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019],higher_is_better,Indicator,data_col,Year,Sub-Pillar,quintile,new_rank_score
0,Time required to start a business (days),IC.REG.DURS,Afghanistan,AFG,8.500000,False,Time required to start a business (days),8.500000,2019,Startup Environment,2,4.7425
1,Time required to start a business (days),IC.REG.DURS,Albania,ALB,4.500000,False,Time required to start a business (days),4.500000,2019,Startup Environment,1,5.9900
2,Time required to start a business (days),IC.REG.DURS,Algeria,DZA,18.000000,False,Time required to start a business (days),18.000000,2019,Startup Environment,4,2.2475
5,Time required to start a business (days),IC.REG.DURS,Angola,AGO,36.000000,False,Time required to start a business (days),36.000000,2019,Startup Environment,5,1.0000
6,Time required to start a business (days),IC.REG.DURS,Antigua and Barbuda,ATG,19.000000,False,Time required to start a business (days),19.000000,2019,Startup Environment,4,2.2475
...,...,...,...,...,...,...,...,...,...,...,...,...
261,Time required to start a business (days),IC.REG.DURS,Sub-Saharan Africa,SSF,21.525000,False,Time required to start a business (days),21.525000,2019,Startup Environment,4,2.2475
262,Time required to start a business (days),IC.REG.DURS,Sub-Saharan Africa (excluding high income),SSA,21.302128,False,Time required to start a business (days),21.302128,2019,Startup Environment,4,2.2475
263,Time required to start a business (days),IC.REG.DURS,Sub-Saharan Africa (IDA & IBRD countries),TSS,21.525000,False,Time required to start a business (days),21.525000,2019,Startup Environment,4,2.2475
264,Time required to start a business (days),IC.REG.DURS,Upper middle income,UMC,22.751923,False,Time required to start a business (days),22.751923,2019,Startup Environment,4,2.2475


In [123]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.sort_values(by='new_rank_score', ascending=False).head(16)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
217,Africa Eastern and Southern,2019,Time required to start a business (days),26.826923,1.0,False,Startup Environment
186,Suriname,2019,Time required to start a business (days),66.0,1.0,False,Startup Environment
177,South Africa,2019,Time required to start a business (days),40.0,1.0,False,Startup Environment
66,Fiji,2019,Time required to start a business (days),40.0,1.0,False,Startup Environment
64,Ethiopia,2019,Time required to start a business (days),32.0,1.0,False,Startup Environment
185,Sudan,2019,Time required to start a business (days),34.5,1.0,False,Startup Environment
61,Eritrea,2019,Time required to start a business (days),84.0,1.0,False,Startup Environment
60,Equatorial Guinea,2019,Time required to start a business (days),33.0,1.0,False,Startup Environment
191,Tanzania,2019,Time required to start a business (days),29.5,1.0,False,Startup Environment
224,East Asia & Pacific (excluding high income),2019,Time required to start a business (days),29.73,1.0,False,Startup Environment


#### Prepare Output

In [124]:
bf

'time_start_bus'

In [125]:
# output scores
#df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator),index = False)
df.insert(0,'Pillar','Business')
#df.to_csv('../non-index/business_{}_scores.csv'.format(indicator),index = False)

### 14. Ease doing business


#### Load Data

In [126]:
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Ease of Doing Business
ease_doing_bus


In [127]:
# remove unwanted rows
df = df.replace('..', np.nan)
df = df[~df['Series Code'].isna()]


In [128]:
df['Series Name'].unique()

array(['Ease of doing business index (1=most business-friendly regulations)'],
      dtype=object)

In [129]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 266 entries, 0 to 265
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Series Name    266 non-null    object 
 1   Series Code    266 non-null    object 
 2   Country Name   266 non-null    object 
 3   Country Code   266 non-null    object 
 4   1990 [YR1990]  0 non-null      float64
 5   2000 [YR2000]  0 non-null      float64
 6   2011 [YR2011]  0 non-null      float64
 7   2012 [YR2012]  0 non-null      float64
 8   2013 [YR2013]  0 non-null      float64
 9   2014 [YR2014]  0 non-null      float64
 10  2015 [YR2015]  0 non-null      float64
 11  2016 [YR2016]  0 non-null      float64
 12  2017 [YR2017]  0 non-null      float64
 13  2018 [YR2018]  0 non-null      float64
 14  2019 [YR2019]  189 non-null    object 
 15  2020 [YR2020]  0 non-null      float64
dtypes: float64(11), object(5)
memory usage: 35.3+ KB


In [130]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [131]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

# df['higher_is_better'] = False
df['Indicator'] = df['Series Name']
df['2019 [YR2019]'] = df['2019 [YR2019]'].astype(float)
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

# df['data_norm'] = df['data_norm'] = (df.data_col - df.data_col.mean())/df.data_col.std()

In [132]:
rank_min = df.data_col.min()
rank_max = df.data_col.max()

In [133]:
rank_min, rank_max

(1.0, 190.0)

In [134]:
# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=rank_min,old_max=rank_max))

In [135]:
# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (5.99-row)+1)

In [136]:
df.sort_values(by='new_rank_score', ascending=False).head(16)

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019],Indicator,data_col,Year,Sub-Pillar,new_rank_score
141,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,New Zealand,NZL,1.0,Ease of doing business index (1=most business-...,1.0,2019,Startup Environment,5.99
171,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Singapore,SGP,2.0,Ease of doing business index (1=most business-...,2.0,2019,Startup Environment,5.963598
86,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,"Hong Kong SAR, China",HKG,3.0,Ease of doing business index (1=most business-...,3.0,2019,Startup Environment,5.937196
53,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Denmark,DNK,4.0,Ease of doing business index (1=most business-...,4.0,2019,Startup Environment,5.910794
104,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,"Korea, Rep.",KOR,5.0,Ease of doing business index (1=most business-...,5.0,2019,Startup Environment,5.884392
206,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,United States,USA,6.0,Ease of doing business index (1=most business-...,6.0,2019,Startup Environment,5.857989
72,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Georgia,GEO,7.0,Ease of doing business index (1=most business-...,7.0,2019,Startup Environment,5.831587
205,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,United Kingdom,GBR,8.0,Ease of doing business index (1=most business-...,8.0,2019,Startup Environment,5.805185
147,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Norway,NOR,9.0,Ease of doing business index (1=most business-...,9.0,2019,Startup Environment,5.778783
187,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Sweden,SWE,10.0,Ease of doing business index (1=most business-...,10.0,2019,Startup Environment,5.752381


In [137]:
df['higher_is_better'] = True
df.head(15)

Unnamed: 0,Series Name,Series Code,Country Name,Country Code,2019 [YR2019],Indicator,data_col,Year,Sub-Pillar,new_rank_score,higher_is_better
0,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Afghanistan,AFG,173.0,Ease of doing business index (1=most business-...,173.0,2019,Startup Environment,1.448836,True
1,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Albania,ALB,82.0,Ease of doing business index (1=most business-...,82.0,2019,Startup Environment,3.851429,True
2,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Algeria,DZA,157.0,Ease of doing business index (1=most business-...,157.0,2019,Startup Environment,1.87127,True
3,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,American Samoa,ASM,,Ease of doing business index (1=most business-...,,2019,Startup Environment,,True
4,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Andorra,AND,,Ease of doing business index (1=most business-...,,2019,Startup Environment,,True
5,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Angola,AGO,177.0,Ease of doing business index (1=most business-...,177.0,2019,Startup Environment,1.343228,True
6,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Antigua and Barbuda,ATG,113.0,Ease of doing business index (1=most business-...,113.0,2019,Startup Environment,3.032963,True
7,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Argentina,ARG,126.0,Ease of doing business index (1=most business-...,126.0,2019,Startup Environment,2.689735,True
8,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Armenia,ARM,47.0,Ease of doing business index (1=most business-...,47.0,2019,Startup Environment,4.775503,True
9,Ease of doing business index (1=most business-...,IC.BUS.EASE.XQ,Aruba,ABW,,Ease of doing business index (1=most business-...,,2019,Startup Environment,,True


#### Prepare Output

In [138]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]

# output scores


## 15. Ease of finding skilled employees

In [139]:
indicator = indicators[14]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Ease of finding skilled employees
ease_of_finding_skilled_employees


In [140]:
df.head()

Unnamed: 0,2017,2018,2019,Country,Unnamed: 4
0,3.88,4.03,3.89,Albania,
1,3.98,3.84,4.12,Algeria,
2,No data,2.08,2.76,Angola,
3,4.35,4.1,4.19,Argentina,
4,3.82,3.87,4.03,Armenia,


In [141]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [142]:
values = ['2017','2018','2019']

In [143]:
df = df.replace('No data', np.nan)

In [144]:
df[values] = df[values].astype(float)

In [145]:
df.head()

Unnamed: 0,2017,2018,2019,Country,Unnamed: 4
0,3.88,4.03,3.89,Albania,
1,3.98,3.84,4.12,Algeria,
2,,2.08,2.76,Angola,
3,4.35,4.1,4.19,Argentina,
4,3.82,3.87,4.03,Armenia,


In [146]:
df[values].describe()

Unnamed: 0,2017,2018,2019
count,132.0,136.0,137.0
mean,4.183258,4.136176,4.196058
std,0.664458,0.659186,0.589124
min,2.72,2.08,2.76
25%,3.685,3.6375,3.84
50%,4.065,4.095,4.17
75%,4.6925,4.655,4.63
max,5.67,5.75,5.32


In [147]:
# create standard columns
df['data_col'] = df['2019']
df['new_rank_score'] = df['data_col']
df['higher_is_better'] = True
df['Indicator'] = indicator
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

df.rename(columns={'Country':'Country Name'}, inplace=True)


df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Albania,2019,Ease of finding skilled employees,3.89,3.89,True,Startup Environment
1,Algeria,2019,Ease of finding skilled employees,4.12,4.12,True,Startup Environment
2,Angola,2019,Ease of finding skilled employees,2.76,2.76,True,Startup Environment
3,Argentina,2019,Ease of finding skilled employees,4.19,4.19,True,Startup Environment
4,Armenia,2019,Ease of finding skilled employees,4.03,4.03,True,Startup Environment
...,...,...,...,...,...,...,...
134,"Venezuela, RB",2019,Ease of finding skilled employees,3.39,3.39,True,Startup Environment
135,Vietnam,2019,Ease of finding skilled employees,3.96,3.96,True,Startup Environment
136,"Yemen, Rep.",2019,Ease of finding skilled employees,3.71,3.71,True,Startup Environment
137,Zambia,2019,Ease of finding skilled employees,4.76,4.76,True,Startup Environment


In [148]:
# output scores to csv
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']].to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 16. Amount invested into startups yearly from private, public, blended sources (respectively)


In [149]:
indicator = indicators[15]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Amount invested into startups yearly from private, public, blended sources (respectively)
start_up_investment


In [150]:
df.head(15)

Unnamed: 0,LOCATION,Country,SUBJECT,Subject,STAGES,Development stages,MEASURE,Measure,TIME,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2006,2006,USD,US Dollar,6,Millions,,,456.334579,,
1,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2007,2007,USD,US Dollar,6,Millions,,,680.29317,,
2,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2008,2008,USD,US Dollar,6,Millions,,,755.759626,,
3,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2009,2009,USD,US Dollar,6,Millions,,,532.682779,,
4,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2010,2010,USD,US Dollar,6,Millions,,,367.836251,,
5,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2011,2011,USD,US Dollar,6,Millions,,,246.528233,,
6,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2012,2012,USD,US Dollar,6,Millions,,,331.331196,,
7,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2013,2013,USD,US Dollar,6,Millions,,,252.934084,,
8,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2014,2014,USD,US Dollar,6,Millions,,,265.918369,,
9,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2015,2015,USD,US Dollar,6,Millions,,,288.485377,,


In [151]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [152]:
df = df[(df['Development stages']=='Total') & (df.Year == 2019) & (df.MEASURE == 'USD_V')]
df.head(15)

Unnamed: 0,LOCATION,Country,SUBJECT,Subject,STAGES,Development stages,MEASURE,Measure,TIME,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
13,AUS,Australia,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,487.310802,,
120,AUT,Austria,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,90.416881,,
232,BEL,Belgium,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,395.828165,,
343,CAN,Canada,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,3286.872933,,
448,CZE,Czech Republic,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,27.204683,,
538,DNK,Denmark,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,352.437466,,
650,FIN,Finland,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,321.566548,,
762,FRA,France,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,2164.94444,,
874,DEU,Germany,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,2379.672789,,
985,GRC,Greece,VC_INV,Venture capital investments,VC_T,Total,USD_V,"USD, current prices",2019,2019,USD,US Dollar,6,Millions,,,27.057841,,


In [153]:
# create standard columns
df['Country Name'] = df['Country']
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['higher_is_better'] = True
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

In [154]:
df = df[['Country Name', 'Year','Indicator','data_col','higher_is_better','Sub-Pillar']]

# Normalize country names as much as possible
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
df['Country Name'] = df['Country Name'].replace('Bahrain (Kingdom of)','Bahrain')
df['Country Name'] = df['Country Name'].replace('Bolivia','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Bolivia, Plurinational State of','Bolivia (Plurinational State of)')
df['Country Name'] = df['Country Name'].replace('Brunei','Brunei Darussalam')
df['Country Name'] = df['Country Name'].replace('Bulgaria (Rep.)','Bulgaria')
df['Country Name'] = df['Country Name'].replace('Central African Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Central African Rep.','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace("China (People's Rep.)",'China')
df['Country Name'] = df['Country Name'].replace("Comoros",'Comoros (the)')
df['Country Name'] = df['Country Name'].replace("Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Brazzaville)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Rep. of the)",'Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo (Democratic Republic of the)",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, Dem. Rep.",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Congo, The Democratic Republic of the",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("DR Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Democratic Republic of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Dem. Rep. of the Congo",'Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace("Cote d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Côte d’Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote D'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cote dIvoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Cōte d'Ivoire","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Ivory Coast","Côte d'Ivoire")
df['Country Name'] = df['Country Name'].replace("Dem. People's Rep. of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Democratic People's Republic of Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("Korea, Dem. People's Rep.","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].replace("North Korea","Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
df['Country Name'] = df['Country Name'].replace("Eswatini (Kingdom of)",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Swaziland",'Eswatini')
df['Country Name'] = df['Country Name'].replace("Faröe Islands",'Faroe Islands')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
df['Country Name'] = df['Country Name'].replace("Georgia (Country)",'Georgia')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
df['Country Name'] = df['Country Name'].replace("Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep. of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Rep.)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea (Republic of)",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Korea, South",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("South Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace("Republic of Korea",'Republic of Korea (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyrgyzstan')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lao.*$)', "Lao People's Democratic Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macao.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macau.*$)', "China, Macao Special Administrative Region")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Micronesia.*$)', "Micronesia (Federated States of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Moldova.*$)', "Republic of Moldova (the)")
df['Country Name'] = df['Country Name'].replace("Morroco",'Morocco')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Nepal.*$)', "Nepal")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].replace("Niger",'Niger (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Macedonia.*$)', "North Macedonia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*New Ze.*$)', "New Zealand")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Palestin.*$)', "State of Palestine (the)")
df['Country Name'] = df['Country Name'].replace("West Bank and Gaza",'State of Palestine (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Panama.*$)', "Panama")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Philippines.*$)', "Philippines (the)")
df['Country Name'] = df['Country Name'].replace("Republic of the Congo",'Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Myanmar.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Puerto Rico.*$)', "Puerto Rico")
df['Country Name'] = df['Country Name'].replace("Russia",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].replace("Russian Federation",'Russian Federation (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Slovak.*$)', "Slovakia")
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'\bSudan (the)\b', 'Sudan (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*South Sudan.*$)', "South Sudan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Syria.*$)', "Syrian Arab Republic (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*São Tomé.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taiwan.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Taipei.*$)', "Taiwan")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tanzania.*$)', "United Republic of Tanzania (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Netherlands.*$)', "Netherlands (the)")
df['Country Name'] = df['Country Name'].replace("UAE",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("U.A.E",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace("United Arab Emirates",'United Arab Emirates (the)')
df['Country Name'] = df['Country Name'].replace('United Kingdom','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('UK','United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("Great Britain",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace("United Kingdom of Great Britain and Northern Ireland",'United Kingdom of Great Britain and Northern Ireland (the)')
df['Country Name'] = df['Country Name'].replace('Vietnam','Viet Nam')
df['Country Name'] = df['Country Name'].replace('United States','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('USA','United States of America (the)')
df['Country Name'] = df['Country Name'].replace('United States of America','United States of America (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Virgin Islands.*$)', "United States Virgin Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vatican.*$)', "Vatican")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Venezuela.*$)', "Venezuela (Bolivarian Republic of)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Yemen.*$)', "Yemen")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Arab world.*$)', "Arab World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*World.*$)', "World")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kitts and Nevis.*$)', "Saint Kitts and Nevis")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Lucia.*$)', "Saint Lucia")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Martin (French Part).*$)', "Saint Martin (French Part)")
df['Country Name'] = df['Country Name'].replace('Sint Maarten','Saint Martin')
df['Country Name'] = df['Country Name'].replace('St. Martin (French part)','Saint Martin (French Part)')
df['Country Name'] = df['Country Name'].replace('Sint Maarten (Dutch part)','Saint Martin (Dutch Part)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent and the Grenadines.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Verde.*$)', "Cabo Verde")
df['Country Name'] = df['Country Name'].replace('Congo, Democratic Republic','Democratic Republic of the Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo, Rep.','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Republic of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].replace('Congo (Rep.)','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Egypt.*$)', "Egypt")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, D.*$)', "Democratic People's Republic of Korea (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Tobago.*$)', "Trinidad and Tobago")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Timor-Leste.*$)', "Timor-Leste")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Emirates.*$)', "United Arab Emirates (the)")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Papua.*$)', "Papua New Guinea")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bissau.*$)', "Guinea-Bissau")
df['Country Name'] = df['Country Name'].replace('Eq. Guinea','Equatorial Guinea')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Burma.*$)', "Myanmar")
df['Country Name'] = df['Country Name'].replace('C.A. Republic','Central African Republic (the)')
df['Country Name'] = df['Country Name'].replace('Ant.& Barb.','Antigua and Barbuda')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bosnia.*$)', "Bosnia and Herzegovina")
df['Country Name'] = df['Country Name'].replace('Domin. Rep.','Dominican Republic (the)')
df['Country Name'] = df['Country Name'].replace('Dominica (Commonwealth of)','Dominica')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*European Union.*$)', "European Union")
df['Country Name'] = df['Country Name'].replace('R. of Congo','Congo (the)')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Principe.*$)', "Sao Tome and Principe")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Solomon.*$)', "Solomon Islands")
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Vincent.*$)', "Saint Vincent and the Grenadines")
df['Country Name'] = df['Country Name'].replace('Curacao','Curaçao')
df['Country Name'] = df['Country Name'].replace('Reunion','Réunion')
df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kosovo.*$)', "Kosovo (UNSCR 1244)")

df=df.merge(pop,how='outer',on='Country Name')
df

  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Bahamas.*$)', 'Bahamas (the)')
  df['Country Name'] = df['Country Name'].str.replace(r"(^.*Côte d'Ivoire.*$)", "Côte d'Ivoire")
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Czech.*$)', 'Czechia')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Dominican Re.*$)', 'Dominican Republic (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hong Kong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Hongkong.*$)', 'China, Hong Kong Special Administrative Region')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Gambia.*$)', 'Gambia (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Iran.*$)', 'Iran (Islamic Republic of)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Korea, Rep.*$)', 'Republic of Korea (the)')
  df['Country Name'] = df['Country Name'].str.replace(r'(^.*Kyrgyz.*$)', 'Kyr

Unnamed: 0,Country Name,Year,Indicator,data_col,higher_is_better,Sub-Pillar,1960,1961,1962,1963,...,2014,2015,2016,2017,2018,2019,2020,Country Code,Indicator Name,Indicator Code
0,Australia,2019.0,Amount invested into startups yearly from priv...,487.310802,True,Startup Environment,10276477.0,10483000.0,10742000.0,10950000.0,...,23475686.0,23815995.0,24190907.0,24601860.0,24982688.0,25365745.0,25687041.0,AUS,"Population, total",SP.POP.TOTL
1,Austria,2019.0,Amount invested into startups yearly from priv...,90.416881,True,Startup Environment,7047539.0,7086299.0,7129864.0,7175811.0,...,8546356.0,8642699.0,8736668.0,8797566.0,8840521.0,8879920.0,8917205.0,AUT,"Population, total",SP.POP.TOTL
2,Belgium,2019.0,Amount invested into startups yearly from priv...,395.828165,True,Startup Environment,9153489.0,9183948.0,9220578.0,9289770.0,...,11209057.0,11274196.0,11331422.0,11375158.0,11427054.0,11488980.0,11555997.0,BEL,"Population, total",SP.POP.TOTL
3,Canada,2019.0,Amount invested into startups yearly from priv...,3286.872933,True,Startup Environment,17909009.0,18271000.0,18614000.0,18964000.0,...,35437435.0,35702908.0,36109487.0,36545295.0,37065178.0,37593384.0,38005238.0,CAN,"Population, total",SP.POP.TOTL
4,Czechia,2019.0,Amount invested into startups yearly from priv...,27.204683,True,Startup Environment,9602006.0,9586651.0,9624660.0,9670685.0,...,10525347.0,10546059.0,10566332.0,10594438.0,10629928.0,10671870.0,10698896.0,CZE,"Population, total",SP.POP.TOTL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Samoa,,,,,,108627.0,112112.0,115768.0,119552.0,...,192220.0,193510.0,194540.0,195358.0,196128.0,197093.0,198410.0,WSM,"Population, total",SP.POP.TOTL
262,Kosovo (UNSCR 1244),,,,,,947000.0,966000.0,994000.0,1022000.0,...,1812771.0,1788196.0,1777557.0,1791003.0,1797085.0,1788878.0,1775378.0,XKX,"Population, total",SP.POP.TOTL
263,Yemen,,,,,,5315351.0,5393034.0,5473671.0,5556767.0,...,25823488.0,26497881.0,27168210.0,27834811.0,28498683.0,29161922.0,29825968.0,YEM,"Population, total",SP.POP.TOTL
264,Zambia,,,,,,3070780.0,3164330.0,3260645.0,3360099.0,...,15399793.0,15879370.0,16363449.0,16853608.0,17351714.0,17861034.0,18383956.0,ZMB,"Population, total",SP.POP.TOTL


In [155]:
df['new_data_col'] = (df['data_col']*1000000)/(df['2019'])
df=df[df['Indicator'].notna()]
df['new_data_col'] = df['new_data_col'].astype(np.float64)
df=df[['Country Name','Year','Indicator','new_data_col','higher_is_better','Sub-Pillar']]
min_rank = df['new_data_col'].min()
max_rank = df['new_data_col'].max()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['new_data_col'] = df['new_data_col'].astype(np.float64)


In [156]:
first = df['new_data_col'].quantile(1/5)
second = df['new_data_col'].quantile(2/5)
third = df['new_data_col'].quantile(3/5)
fourth = df['new_data_col'].quantile(4/5)
fifth = df['new_data_col'].quantile(5/5)
data = {'Min':min_rank,'Max':max_rank,'1':first,'2':second,'3':third,'4':fourth,'5':fifth}
sextile = pd.DataFrame(data, index=[0])
sextile

Unnamed: 0,Min,Max,1,2,3,4,5
0,0.566176,413.147474,3.219303,14.089509,29.409678,44.330586,413.147474


In [157]:
df.rename(columns={'new_data_col':'data_col'}, inplace=True)
min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Australia,2019.0,Amount invested into startups yearly from priv...,19.211374,1.225506,True,Startup Environment
1,Austria,2019.0,Amount invested into startups yearly from priv...,10.182173,1.116302,True,Startup Environment
2,Belgium,2019.0,Amount invested into startups yearly from priv...,34.452855,1.409845,True,Startup Environment
3,Canada,2019.0,Amount invested into startups yearly from priv...,87.432218,2.050609,True,Startup Environment
4,Czechia,2019.0,Amount invested into startups yearly from priv...,2.549195,1.023984,True,Startup Environment
5,Denmark,2019.0,Amount invested into startups yearly from priv...,60.61436,1.726258,True,Startup Environment
6,Finland,2019.0,Amount invested into startups yearly from priv...,58.237866,1.697515,True,Startup Environment
7,France,2019.0,Amount invested into startups yearly from priv...,32.192997,1.382513,True,Startup Environment
8,Germany,2019.0,Amount invested into startups yearly from priv...,28.63868,1.339525,True,Startup Environment
9,Greece,2019.0,Amount invested into startups yearly from priv...,2.52368,1.023675,True,Startup Environment


In [158]:
#df.to_csv('../indicator_scores/business_{}_scores.csv'.format(bf), index=False)
df.insert(0,'Pillar','Business')
df.to_csv('../non-index/min max/business_{}_scores.csv'.format(bf),index = False)

In [159]:
### 17. Regulatory Quality

In [160]:
indicator = indicators[16]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))
df.head(16)

Regulatory Quality
global_innovation_dataset


Unnamed: 0,ISO2,Economy,IndCode,IndNum,IndName,Rank,Score,Value,MinDatCov,Outdated,OverallStreWeak,IncomeStreWeak,DataYear
0,ALB,Albania,Inputs,.1,Innovation Input Sub-index,71.0,39.940929,,,,,,
1,ALB,Albania,Outputs,.2,Innovation Output Sub-index,92.0,16.109792,,,,,,
2,ALB,Albania,Index,0,Global Innovation Index,84.0,28.025361,,,,,,
3,ALB,Albania,P1,1,Institutions,60.0,64.91924,,0.0,,,,
4,ALB,Albania,SP11,1.1.,Political environment,71.0,56.071183,,0.0,,,,
5,ALB,Albania,PolStab,1.1.1,Political and operational stability*,60.0,69.642859,2.1,,0.0,,,2020.0
6,ALB,Albania,GovEff,1.1.2,Government effectiveness*,76.0,49.285344,-0.061331,,0.0,,,2019.0
7,ALB,Albania,SP12,1.2.,Regulatory environment,82.0,58.941532,,0.0,,,,
8,ALB,Albania,RegQua,1.2.1,Regulatory quality*,58.0,50.670701,0.27438,,0.0,,,2019.0
9,ALB,Albania,RuleOL,1.2.2,Rule of law*,85.0,35.889077,-0.411179,,0.0,,,2019.0


In [161]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [162]:
df = df[(df.IndName == 'Regulatory quality*')]
df

Unnamed: 0,ISO2,Economy,IndCode,IndNum,IndName,Rank,Score,Value,MinDatCov,Outdated,OverallStreWeak,IncomeStreWeak,DataYear
8,ALB,Albania,RegQua,1.2.1,Regulatory quality*,58.0,50.670701,0.274380,,0.0,,,2019.0
120,DZA,Algeria,RegQua,1.2.1,Regulatory quality*,129.0,9.425411,-1.303379,,0.0,,W,2019.0
232,AGO,Angola,RegQua,1.2.1,Regulatory quality*,124.0,20.130634,-0.893871,,0.0,,,2019.0
344,ARG,Argentina,RegQua,1.2.1,Regulatory quality*,103.0,30.616559,-0.492753,,0.0,,W,2019.0
456,ARM,Armenia,RegQua,1.2.1,Regulatory quality*,59.0,50.020701,0.249515,,0.0,,,2019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14232,UZB,Uzbekistan,RegQua,1.2.1,Regulatory quality*,126.0,17.531597,-0.993293,,0.0,W,W,2019.0
14344,VNM,Viet Nam,RegQua,1.2.1,Regulatory quality*,93.0,36.639718,-0.262348,,0.0,,,2019.0
14456,YEM,Yemen,RegQua,1.2.1,Regulatory quality*,132.0,0.000000,-1.663930,,0.0,W,W,2019.0
14568,ZMB,Zambia,RegQua,1.2.1,Regulatory quality*,105.0,29.008412,-0.554269,,0.0,,,2019.0


In [163]:
df['higher_is_better'] = True
df['Year'] = df['DataYear']
df['Indicator'] = indicator
df['data_col'] = df['Score']
df['Sub-Pillar'] = subpillar
df['Country Name'] = df['Economy']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [164]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
8,Albania,2019.0,Regulatory Quality,50.670701,3.528468,True,Startup Environment
120,Algeria,2019.0,Regulatory Quality,9.425411,1.470328,True,Startup Environment
232,Angola,2019.0,Regulatory Quality,20.130634,2.004519,True,Startup Environment
344,Argentina,2019.0,Regulatory Quality,30.616559,2.527766,True,Startup Environment
456,Armenia,2019.0,Regulatory Quality,50.020701,3.496033,True,Startup Environment
...,...,...,...,...,...,...,...
14232,Uzbekistan,2019.0,Regulatory Quality,17.531597,1.874827,True,Startup Environment
14344,Viet Nam,2019.0,Regulatory Quality,36.639718,2.828322,True,Startup Environment
14456,Yemen,2019.0,Regulatory Quality,0.000000,1.000000,True,Startup Environment
14568,Zambia,2019.0,Regulatory Quality,29.008412,2.447520,True,Startup Environment


In [165]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

In [166]:
### 18. Ease of Getting Credit

In [167]:
indicator = indicators[17]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))
df.head(16)

Ease of Getting Credit
global_innovation_dataset


Unnamed: 0,ISO2,Economy,IndCode,IndNum,IndName,Rank,Score,Value,MinDatCov,Outdated,OverallStreWeak,IncomeStreWeak,DataYear
0,ALB,Albania,Inputs,.1,Innovation Input Sub-index,71.0,39.940929,,,,,,
1,ALB,Albania,Outputs,.2,Innovation Output Sub-index,92.0,16.109792,,,,,,
2,ALB,Albania,Index,0,Global Innovation Index,84.0,28.025361,,,,,,
3,ALB,Albania,P1,1,Institutions,60.0,64.91924,,0.0,,,,
4,ALB,Albania,SP11,1.1.,Political environment,71.0,56.071183,,0.0,,,,
5,ALB,Albania,PolStab,1.1.1,Political and operational stability*,60.0,69.642859,2.1,,0.0,,,2020.0
6,ALB,Albania,GovEff,1.1.2,Government effectiveness*,76.0,49.285344,-0.061331,,0.0,,,2019.0
7,ALB,Albania,SP12,1.2.,Regulatory environment,82.0,58.941532,,0.0,,,,
8,ALB,Albania,RegQua,1.2.1,Regulatory quality*,58.0,50.670701,0.27438,,0.0,,,2019.0
9,ALB,Albania,RuleOL,1.2.2,Rule of law*,85.0,35.889077,-0.411179,,0.0,,,2019.0


In [168]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [169]:
df = df[(df.IndName == 'Ease of getting credit*')]
df

Unnamed: 0,ISO2,Economy,IndCode,IndNum,IndName,Rank,Score,Value,MinDatCov,Outdated,OverallStreWeak,IncomeStreWeak,DataYear
46,ALB,Albania,EaseCred,4.1.1,Ease of getting credit*,44.0,70.0,70.0,,0.0,,,2019.0
158,DZA,Algeria,EaseCred,4.1.1,Ease of getting credit*,129.0,10.0,10.0,,0.0,W,W,2019.0
270,AGO,Angola,EaseCred,4.1.1,Ease of getting credit*,131.0,5.0,5.0,,0.0,W,W,2019.0
382,ARG,Argentina,EaseCred,4.1.1,Ease of getting credit*,94.0,50.0,50.0,,0.0,,W,2019.0
494,ARM,Armenia,EaseCred,4.1.1,Ease of getting credit*,44.0,70.0,70.0,,0.0,,,2019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14270,UZB,Uzbekistan,EaseCred,4.1.1,Ease of getting credit*,61.0,65.0,65.0,,0.0,,,2019.0
14382,VNM,Viet Nam,EaseCred,4.1.1,Ease of getting credit*,23.0,80.0,80.0,,0.0,,,2019.0
14494,YEM,Yemen,EaseCred,4.1.1,Ease of getting credit*,132.0,0.0,0.0,,0.0,W,W,2019.0
14606,ZMB,Zambia,EaseCred,4.1.1,Ease of getting credit*,4.0,95.0,95.0,,0.0,S,S,2019.0


In [170]:
df['higher_is_better'] = True
df['Year'] = df['DataYear']
df['Indicator'] = indicator
df['data_col'] = df['Score']
df['Sub-Pillar'] = subpillar
df['Country Name'] = df['Economy']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [171]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
46,Albania,2019.0,Ease of Getting Credit,70.0,4.4930,True,Startup Environment
158,Algeria,2019.0,Ease of Getting Credit,10.0,1.4990,True,Startup Environment
270,Angola,2019.0,Ease of Getting Credit,5.0,1.2495,True,Startup Environment
382,Argentina,2019.0,Ease of Getting Credit,50.0,3.4950,True,Startup Environment
494,Armenia,2019.0,Ease of Getting Credit,70.0,4.4930,True,Startup Environment
...,...,...,...,...,...,...,...
14270,Uzbekistan,2019.0,Ease of Getting Credit,65.0,4.2435,True,Startup Environment
14382,Viet Nam,2019.0,Ease of Getting Credit,80.0,4.9920,True,Startup Environment
14494,Yemen,2019.0,Ease of Getting Credit,0.0,1.0000,True,Startup Environment
14606,Zambia,2019.0,Ease of Getting Credit,95.0,5.7405,True,Startup Environment


In [172]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

In [173]:
### 19. Ease of Getting Credit

In [174]:
indicator = indicators[18]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))
df.head(16)

Global Resilience Index
global_resilience_index


Unnamed: 0,Country,Global Resilience Score,Unnamed: 2
0,Albania,34.6,
1,Algeria,31.2,
2,Argentina,53.7,
3,Armenia,46.9,
4,Australia,90.3,
5,Austria,94.2,
6,Azerbaijan,40.6,
7,Bahrain,60.7,
8,Bangladesh,29.5,
9,Belgium,90.1,


In [175]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

Startup Environment


In [176]:
df['higher_is_better'] = True
df['Year'] = 2021
df['Indicator'] = indicator
df['data_col'] = df['Global Resilience Score']
df['Sub-Pillar'] = subpillar
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=0,old_max=100))

In [177]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Albania,2021,Global Resilience Index,34.6,2.72654,True,Startup Environment
1,Algeria,2021,Global Resilience Index,31.2,2.55688,True,Startup Environment
2,Argentina,2021,Global Resilience Index,53.7,3.67963,True,Startup Environment
3,Armenia,2021,Global Resilience Index,46.9,3.34031,True,Startup Environment
4,Australia,2021,Global Resilience Index,90.3,5.50597,True,Startup Environment
...,...,...,...,...,...,...,...
121,Uruguay,2021,Global Resilience Index,62.3,4.10877,True,Startup Environment
122,Venezuela,2021,Global Resilience Index,1.5,1.07485,True,Startup Environment
123,Vietnam,2021,Global Resilience Index,37.9,2.89121,True,Startup Environment
124,Zambia,2021,Global Resilience Index,32.5,2.62175,True,Startup Environment


### Score Aggregating

In [178]:
import os


In [179]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('business')]

In [180]:
scores

['business_Cloud Services (Spend, IT Forecast Data)_scores.csv',
 'business_Doing Business Index_scores.csv',
 'business_Ease of finding skilled employees_scores.csv',
 'business_Ease of Getting Credit_scores.csv',
 'business_ICT task-intensive jobs as a percentage of total employment_scores.csv',
 'business_Networking Services (Spend, IT Forecast Data)_scores.csv',
 'business_Regulatory Quality_scores.csv',
 'business_Share of business with internet_scores.csv',
 'business_Share of businesses with broadband_scores.csv',
 'business_Share of businesses with online presence_scores.csv',
 'business_Size of digital economy (% of transactions)_scores.csv',
 'business_Size of gig economy (% of GDP)_scores.csv',
 'business_Strength of Legal Rights_scores.csv',
 'business_UNCTAD Business-to-Consumer (B2C) E-commerce Index_scores.csv',
 'business_Venture Capital Availability_scores.csv']

In [181]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [182]:
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Germany,2018.0,"Cloud Services (Spend, IT Forecast Data)",18.200000,1.908180,True,Technology Adoption
1,Japan,2018.0,"Cloud Services (Spend, IT Forecast Data)",20.300000,2.012970,True,Technology Adoption
2,United States,2018.0,"Cloud Services (Spend, IT Forecast Data)",18.000000,1.898200,True,Technology Adoption
3,United Kingdom,2018.0,"Cloud Services (Spend, IT Forecast Data)",19.800000,1.988020,True,Technology Adoption
4,Australia,2018.0,"Cloud Services (Spend, IT Forecast Data)",16.100000,1.803390,True,Technology Adoption
...,...,...,...,...,...,...,...
147,Vietnam,2019.0,Venture Capital Availability,3.267500,2.885804,True,Financing Incentives
148,"Yemen, Rep.",2019.0,Venture Capital Availability,2.163688,1.967801,True,Financing Incentives
149,South Africa,2019.0,Venture Capital Availability,3.051583,2.706233,True,Financing Incentives
150,Zambia,2019.0,Venture Capital Availability,1.803466,1.668216,True,Financing Incentives


In [183]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [184]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1829 entries, 0 to 1828
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country Name      1829 non-null   object 
 1   Year              1829 non-null   float64
 2   Indicator         1829 non-null   object 
 3   data_col          1747 non-null   float64
 4   new_rank_score    1829 non-null   float64
 5   higher_is_better  1829 non-null   bool   
 6   Sub-Pillar        1829 non-null   object 
dtypes: bool(1), float64(3), object(3)
memory usage: 87.6+ KB


In [185]:
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Afghanistan,2019.0,Doing Business Index,47.770000,3.383723,False,Startup Environment
1,Afghanistan,2019.0,Strength of legal rights index (0=weak to 12=s...,10.000000,5.158333,True,Startup Environment
2,Afghanistan,2020.0,UNCTAD Business-to-Consumer (B2C) E-commerce I...,17.100000,1.853290,True,Technology Adoption
3,Africa Eastern and Southern,2019.0,Strength of legal rights index (0=weak to 12=s...,4.538462,2.887244,True,Startup Environment
4,Africa Western and Central,2019.0,Strength of legal rights index (0=weak to 12=s...,5.863636,3.438295,True,Startup Environment
...,...,...,...,...,...,...,...
1824,Zimbabwe,2019.0,Regulatory Quality,5.247431,1.261847,True,Startup Environment
1825,Zimbabwe,2019.0,Strength of legal rights index (0=weak to 12=s...,6.000000,3.495000,True,Startup Environment
1826,Zimbabwe,2019.0,Ease of Getting Credit,65.000000,4.243500,True,Startup Environment
1827,Zimbabwe,2019.0,Venture Capital Availability,2.058023,1.879922,True,Financing Incentives


In [186]:
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Afghanistan,2019.0,Doing Business Index,47.77,3.383723,False,Startup Environment
1,Afghanistan,2019.0,Strength of legal rights index (0=weak to 12=s...,10.0,5.158333,True,Startup Environment
2,Afghanistan,2020.0,UNCTAD Business-to-Consumer (B2C) E-commerce I...,17.1,1.85329,True,Technology Adoption
3,Africa Eastern and Southern,2019.0,Strength of legal rights index (0=weak to 12=s...,4.538462,2.887244,True,Startup Environment
4,Africa Western and Central,2019.0,Strength of legal rights index (0=weak to 12=s...,5.863636,3.438295,True,Startup Environment
5,Albania,2020.0,UNCTAD Business-to-Consumer (B2C) E-commerce I...,49.5,3.47005,True,Technology Adoption
6,Albania,2019.0,Size of digital economy (% of transactions),6.69,1.333831,True,Technology Adoption
7,Albania,2019.0,Ease of finding skilled employees,3.89,3.89,True,Startup Environment
8,Albania,2019.0,Ease of Getting Credit,70.0,4.493,True,Startup Environment
9,Albania,2019.0,Regulatory Quality,50.670701,3.528468,True,Startup Environment


In [187]:
df.describe()

Unnamed: 0,Year,data_col,new_rank_score
count,1829.0,1747.0,1829.0
mean,2018.967195,37.610482,3.3724
std,1.096551,29.404385,1.353045
min,2010.0,0.0,0.0
25%,2019.0,6.0,2.497
50%,2019.0,36.9,3.495
75%,2019.0,63.125,4.417159
max,2020.0,100.0,5.99


In [188]:
df['Country Name'] = df['Country Name'].astype(str)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1829 entries, 0 to 1828
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country Name      1829 non-null   object 
 1   Year              1829 non-null   float64
 2   Indicator         1829 non-null   object 
 3   data_col          1747 non-null   float64
 4   new_rank_score    1829 non-null   float64
 5   higher_is_better  1829 non-null   bool   
 6   Sub-Pillar        1829 non-null   object 
dtypes: bool(1), float64(3), object(3)
memory usage: 87.6+ KB


In [189]:
# checking country names
sorted(df['Country Name'].unique().tolist())

['Afghanistan',
 'Africa Eastern and Southern',
 'Africa Western and Central',
 'Albania',
 'Albania ',
 'Algeria',
 'Algeria ',
 'American Samoa',
 'Andorra',
 'Angola',
 'Angola ',
 'Antigua and Barbuda',
 'Arab World',
 'Argentina',
 'Argentina ',
 'Armenia',
 'Armenia ',
 'Aruba',
 'Australia',
 'Australia ',
 'Austria',
 'Austria ',
 'Azerbaijan',
 'Azerbaijan ',
 'B:',
 'Bahamas, The',
 'Bahrain',
 'Bahrain ',
 'Bangladesh',
 'Bangladesh ',
 'Bangladesh Chittagong',
 'Bangladesh Dhaka',
 'Barbados',
 'Belarus',
 'Belarus ',
 'Belgium',
 'Belgium ',
 'Belize',
 'Benin',
 'Benin ',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bolivia ',
 'Bolivia (Plurinational State of)',
 'Bosnia and Herzegovina',
 'Bosnia and Herzegovina ',
 'Botswana',
 'Botswana ',
 'Brazil',
 'Brazil ',
 'Brazil Rio de Janeiro',
 'Brazil São Paulo',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Bulgaria',
 'Bulgaria ',
 'Burkina Faso',
 'Burkina Faso ',
 'Burundi',
 'Burundi ',
 'Cabo Verde',
 'Cabo Verde ',
 'C

In [190]:
# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [191]:
# checking country names
sorted(df['Country Name'].unique().tolist())

['Afghanistan',
 'Africa Eastern and Southern',
 'Africa Western and Central',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Arab World',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'B:',
 'Bahamas, The',
 'Bahrain',
 'Bangladesh',
 'Bangladesh Chittagong',
 'Bangladesh Dhaka',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bolivia (Plurinational State of)',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'Brazil Rio de Janeiro',
 'Brazil São Paulo',
 'British Virgin Islands',
 'Brunei Darussalam',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Caribbean small states',
 'Cayman Islands',
 'Central African Republic',
 'Central Europe and the Baltics',
 'Chad',
 'Channel Islands',
 'Chile',
 'China',
 'China Beijing',
 'China Shanghai',
 'China, Hong Kong SAR',
 'Colombia',
 'Comoros',
 'Congo',
 'Congo, Dem. 

In [192]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [193]:
agg_df.columns = ['agg_score', 'count_source' ]

In [194]:
max_number_sources = agg_df.describe()['count_source']['max']

In [195]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [196]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [197]:
agg_df.head(25)

Unnamed: 0_level_0,agg_score,count_source,agg_score_wt
Country Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"China, Hong Kong SAR",5.58082,1,0.372055
"Korea, Republic of",5.48102,1,0.365401
United States of America,5.402309,3,1.080462
Czechia,5.28142,1,0.352095
Hong Kong,5.250172,2,0.700023
United States New York City,5.191101,1,0.346073
North America,5.158333,1,0.343889
Kosovo,5.137126,2,0.68495
United States Los Angeles,5.035912,1,0.335727
Russian Federation Moscow,4.871741,1,0.324783


In [198]:
agg_df.to_csv('../pillar_scores/business_scores_v0.csv')

In [199]:
### Score Aggregating by Subpillars

In [200]:
df.insert(0,'Pillar','Business')
df

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [201]:
sub_df = df.groupby(['Pillar','Sub-Pillar','Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [202]:
sub_df.columns = ['agg_score', 'count_source' ]

In [203]:
max_number_sources = sub_df.describe()['count_source']['max']

In [204]:
sub_df['agg_score_wt'] = sub_df['agg_score']*(sub_df['count_source']/max_number_sources)

In [205]:
sub_df.to_csv('../subpillar_score/business_scores_subpillar_v0.csv')

### Sources Generation

In [206]:
#Get all countries from Countries.xlsx
countries = pd.read_excel('../../data/Countries.xlsx')
col_names = ['Country or Area']
countries = countries[col_names]
countries.rename(columns = {'Country or Area': 'Country Name'}, inplace = True)

In [207]:
#Get all indicators from names dataframe retrieve at the begining of the script
bnames=bnames[['check','Sub-Pillar','Indicator','Data Source','Data Link']]
bnames.rename(columns = {'check': 'Pillar'}, inplace = True)
bnames = bnames.replace('\n','', regex=True)

In [208]:
#Do a nice cross join so that we have combination of all countries vs all indicators
sources = countries.merge(bnames, how='cross')
sources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link
0,Algeria,Business,Technology Adoption,UNCTAD Business-to-Consumer (B2C) E-commerce I...,UNCTAD: Business-to-Consumer (B2C) E-commerce...,https://unctad.org
1,Algeria,Business,Technology Adoption,"Networking Services (Spend, IT Forecast Data)",Portulans Institute: Network Readiness Index,https://networkreadinessindex.org
2,Algeria,Business,Technology Adoption,"Cloud Services (Spend, IT Forecast Data)",Statista,https://www.statista.com
3,Algeria,Business,Technology Adoption,ICT task-intensive jobs as a percentage of tot...,OECD: Going Digital Toolkit,http://goingdigital.oecd.org
4,Algeria,Business,Technology Adoption,Share of business with internet,OECD: ICT Access and Usage by Businesses,https://stats.oecd.org
...,...,...,...,...,...,...
4726,Wallis and Futuna Islands,Business,Startup Environment,Ease of finding skilled employees,World Bank: GovData,https://datacatalog.worldbank.org
4727,Wallis and Futuna Islands,Business,Startup Environment,Amount invested into startups yearly from priv...,OECD: Venture capital investments,https://stats.oecd.org
4728,Wallis and Futuna Islands,Business,Startup Environment,Regulatory Quality,Global Innovation Index/World Bank: Regulatory...,https://www.globalinnovationindex.org/analysis...
4729,Wallis and Futuna Islands,Business,Startup Environment,Ease of Getting Credit,Global Innovation Index/World Bank: Ease of Ge...,https://www.globalinnovationindex.org/analysis...


In [209]:
#Make copy of a scores dataframe and add the column available, with value of 1 (string)
#denoting all the country/indicator combinations that have value
dfsources = df[['Country Name','Pillar','Sub-Pillar','Indicator']].copy()
dfsources['Available'] = '1'

In [210]:
#Merge (left join) sources (all country indicator combinations)
#with those in copied scores dataframe
#resulting in a sources dataframe with values of 1 in Available column where there is a value for country/indicator
#and there is a NaN where there's not. 
#subsequently replace NaN with 0 (string)
sources = sources.merge(dfsources, how='left', on=['Country Name','Pillar','Sub-Pillar','Indicator'])
sources['Available'] = sources['Available'].fillna('0')
sources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link,Available
0,Algeria,Business,Technology Adoption,UNCTAD Business-to-Consumer (B2C) E-commerce I...,UNCTAD: Business-to-Consumer (B2C) E-commerce...,https://unctad.org,1
1,Algeria,Business,Technology Adoption,"Networking Services (Spend, IT Forecast Data)",Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,1
2,Algeria,Business,Technology Adoption,"Cloud Services (Spend, IT Forecast Data)",Statista,https://www.statista.com,0
3,Algeria,Business,Technology Adoption,ICT task-intensive jobs as a percentage of tot...,OECD: Going Digital Toolkit,http://goingdigital.oecd.org,0
4,Algeria,Business,Technology Adoption,Share of business with internet,OECD: ICT Access and Usage by Businesses,https://stats.oecd.org,0
...,...,...,...,...,...,...,...
4726,Wallis and Futuna Islands,Business,Startup Environment,Ease of finding skilled employees,World Bank: GovData,https://datacatalog.worldbank.org,0
4727,Wallis and Futuna Islands,Business,Startup Environment,Amount invested into startups yearly from priv...,OECD: Venture capital investments,https://stats.oecd.org,0
4728,Wallis and Futuna Islands,Business,Startup Environment,Regulatory Quality,Global Innovation Index/World Bank: Regulatory...,https://www.globalinnovationindex.org/analysis...,0
4729,Wallis and Futuna Islands,Business,Startup Environment,Ease of Getting Credit,Global Innovation Index/World Bank: Ease of Ge...,https://www.globalinnovationindex.org/analysis...,0


In [211]:
#If sources.csv exists, get the contents, remove everyhing from this pillar, append prepared sources, save csv.
#if sources.csv does not exist, create new file from sources.
from os.path import exists

if exists('../../dashboard/Sources.csv') :
    CurrentSources = pd.read_csv('../../dashboard/Sources.csv', dtype=str)
    CurrentSources = CurrentSources[['Country Name','Pillar','Sub-Pillar','Indicator','Data Source','Data Link','Available']]
    CurrentSources = CurrentSources.loc[CurrentSources['Pillar'] != 'Business']
    CurrentSources = CurrentSources.append(sources)
else :
    CurrentSources = sources
CurrentSources

Unnamed: 0,Country Name,Pillar,Sub-Pillar,Indicator,Data Source,Data Link,Available
0,Algeria,Foundations,Digital Payments,Digital payments penetration,Portulans Institute: Network Readiness Index,https://networkreadinessindex.org,0
1,Algeria,Foundations,Digital Payments,% of population with digital finance account,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
2,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
3,Algeria,Foundations,Digital Payments,Made or received digital payments in the past ...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
4,Algeria,Foundations,Digital Payments,Used a mobile phone or the internet to check a...,World Bank: Global Findex database,https://datacatalog.worldbank.org,1
...,...,...,...,...,...,...,...
4726,Wallis and Futuna Islands,Business,Startup Environment,Ease of finding skilled employees,World Bank: GovData,https://datacatalog.worldbank.org,0
4727,Wallis and Futuna Islands,Business,Startup Environment,Amount invested into startups yearly from priv...,OECD: Venture capital investments,https://stats.oecd.org,0
4728,Wallis and Futuna Islands,Business,Startup Environment,Regulatory Quality,Global Innovation Index/World Bank: Regulatory...,https://www.globalinnovationindex.org/analysis...,0
4729,Wallis and Futuna Islands,Business,Startup Environment,Ease of Getting Credit,Global Innovation Index/World Bank: Ease of Ge...,https://www.globalinnovationindex.org/analysis...,0


In [212]:
CurrentSources.to_csv('../../dashboard/Sources.csv', index=False)