In [1]:
import pandas as pd
import numpy as np

### Get all the pillar names from the excel

In [2]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [3]:
col_names = ['Indicator','check', 'Data Source','Index','Filename','Sub-Pillar']

In [4]:
names = names[col_names]

In [5]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Index,Filename,Sub-Pillar
0,Countries,,UN Statistics Division: List of Countries,False,Countries,
1,"Database of Global Administrative Areas (GADM,...",,,False,,
2,High Resolution Population Density Maps + Demo...,,,False,,
3,population density vs openstreetmap object den...,,,False,,
4,Population Density,Infrastructure,World Bank: World Development Indicators,False,population_density,Connectivity Technology


In [6]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [7]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,16,25
Foundations,14,22
Government,10,15
Infrastructure,41,52
People,35,49
Regulation,5,8
Strategy,1,1


### Business

In [8]:
bnames = names[(names.check=='Business')&(~names.Filename.isna())]#&(names.Index==False)]

In [9]:
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Index,Filename,Sub-Pillar
80,UNCTAD Business-to-Consumer (B2C) E-commerce I...,Business,UNCTAD: Business-to-Consumer (B2C) E-commerce...,True,b2c_ecommerse_idx,Technology Adoption
81,"Networking Services (Spend, IT Forecast Data)",Business,Portulans Institute: Network Readiness Index,True,network_readiness_index,Technology Adoption
86,"Cloud Services (Spend, IT Forecast Data)",Business,Statista,True,cloud_services,Technology Adoption
87,ICT task-intensive jobs as a percentage of tot...,Business,OECD: Going Digital Toolkit,False,ICT_proportion,Technology Adoption
90,Share of business with internet,Business,OECD: ICT Access and Usage by Businesses,False,business_internet,Technology Adoption
91,Share of businesses with broadband,Business,World Bank: TCdata360,False,business_broadband,Technology Adoption
92,Share of businesses with online presence,Business,Portulans Institute: Network Readiness Index,False,share_of_businesses_online_presence,Technology Adoption
93,Size of gig economy (% of GDP),Business,Portulans Institute: Network Readiness Index,False,prevalance_gig_economy,Technology Adoption
94,Size of digital economy (% of transactions),Business,Portulans Institute: Network Readiness Index,False,size_digital_economy,Technology Adoption
95,Venture Capital Availability,Business,World Bank: TCdata360,True,TCdata360,Financing Incentives


In [10]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()
subpillars = bnames['Sub-Pillar'].unique()

In [11]:
# get all file names
bfiles = bnames.Filename.unique()

In [12]:
bfiles

array(['b2c_ecommerse_idx', 'network_readiness_index', 'cloud_services',
       'ICT_proportion', 'business_internet', 'business_broadband',
       'share_of_businesses_online_presence', 'prevalance_gig_economy',
       'size_digital_economy', 'TCdata360', 'doing_bus_idx',
       'legal_rights_strength', 'time_start_bus', 'ease_doing_bus',
       'ease_of_finding_skilled_employees', 'start_up_investment'],
      dtype=object)

In [13]:
subpillars

array(['Technology Adoption', 'Financing Incentives',
       'Startup Environment'], dtype=object)

In [14]:
# ls digital-readiness-assessment-main/processed/

In [15]:
# formula for converting scale
def convert_rank(old_value, old_min=1, old_max=7, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

### 1. 'UNCTAD Business-to-Consumer (B2C) E-commerce Index'

In [16]:
indicators[0]

'UNCTAD Business-to-Consumer (B2C) E-commerce Index'

In [17]:
# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

UNCTAD Business-to-Consumer (B2C) E-commerce Index
b2c_ecommerse_idx


In [18]:
df.Indicator.unique()

array(['Country rank and value in the UNCTAD B2C E-commerce Index'],
      dtype=object)

In [19]:
df.head()

Unnamed: 0,2015,2016,2017,Country ISO3,Country Name,Indicator Id,Indicator,Subindicator Type
0,14.1,17.0,,AFG,Afghanistan,24717,Country rank and value in the UNCTAD B2C E-com...,Value
1,,130.0,132.0,AFG,Afghanistan,24718,Country rank and value in the UNCTAD B2C E-com...,Rank
2,21.1,29.0,,AGO,Angola,24717,Country rank and value in the UNCTAD B2C E-com...,Value
3,,113.0,113.0,AGO,Angola,24718,Country rank and value in the UNCTAD B2C E-com...,Rank
4,51.0,62.0,,ALB,Albania,24717,Country rank and value in the UNCTAD B2C E-com...,Value


In [20]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [21]:
# two sub indicators per country
df['Subindicator Type'].unique()

array(['Value', 'Rank'], dtype=object)

In [22]:
df = df[(df['Subindicator Type']== 'Value')]
         
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2016'] 
df['Sub-Pillar'] = subpillar
df['Year'] = 2016

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [23]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Afghanistan,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,17.0,1.748663,True,Technology Adoption
2,Angola,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,29.0,2.390374,True,Technology Adoption
4,Albania,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,62.0,4.155080,True,Technology Adoption
6,United Arab Emirates,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,87.0,5.491979,True,Technology Adoption
8,Argentina,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,45.0,3.245989,True,Technology Adoption
...,...,...,...,...,...,...,...
284,"Venezuela, RB",2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,,,True,Technology Adoption
286,Vietnam,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,50.0,3.513369,True,Technology Adoption
288,South Africa,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,54.0,3.727273,True,Technology Adoption
290,Zambia,2016,UNCTAD Business-to-Consumer (B2C) E-commerce I...,24.0,2.122995,True,Technology Adoption


In [24]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator))

## 2. Networking Services (Spend, IT Forecast Data)


In [25]:
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Networking Services (Spend, IT Forecast Data)
network_readiness_index


In [26]:
df.head(16)

Unnamed: 0,Rank,Country,Score,Income Group,Region
0,1,Sweden,82.75,High-income,Europe
1,2,Denmark,82.19,High-income,Europe
2,3,Singapore,81.39,High-income,Asia & Pacific
3,4,Netherlands,81.37,High-income,Europe
4,5,Switzerland,80.41,High-income,Europe
5,6,Finland,80.16,High-income,Europe
6,7,Norway,79.39,High-income,Europe
7,8,United States,78.91,High-income,The Americas
8,9,Germany,77.48,High-income,Europe
9,10,United Kingdom,76.27,High-income,Europe


In [27]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [28]:
# convert rank into 1-6 - in order to get most recent data 
df['data_col'] = df['Score'] 


# going to assume index is between 1-100 but not 100% sure
min_rank = 1 #df['data_col'].min()
max_rank = 100 #df['data_col'].max()

# transform 1-100 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

In [29]:
# prepare output
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

In [30]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Sweden,2020,"Networking Services (Spend, IT Forecast Data)",82.75,5.128788,True,Technology Adoption
1,Denmark,2020,"Networking Services (Spend, IT Forecast Data)",82.19,5.100505,True,Technology Adoption
2,Singapore,2020,"Networking Services (Spend, IT Forecast Data)",81.39,5.060101,True,Technology Adoption
3,Netherlands,2020,"Networking Services (Spend, IT Forecast Data)",81.37,5.059091,True,Technology Adoption
4,Switzerland,2020,"Networking Services (Spend, IT Forecast Data)",80.41,5.010606,True,Technology Adoption
...,...,...,...,...,...,...,...
129,Burundi,2020,"Networking Services (Spend, IT Forecast Data)",22.62,2.091919,True,Technology Adoption
130,Angola,2020,"Networking Services (Spend, IT Forecast Data)",20.96,2.008081,True,Technology Adoption
131,Yemen,2020,"Networking Services (Spend, IT Forecast Data)",18.00,1.858586,True,Technology Adoption
132,"Congo, Dem. Rep.",2020,"Networking Services (Spend, IT Forecast Data)",16.60,1.787879,True,Technology Adoption


In [31]:
# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator))

### 3. Cloud Services (Spend, IT Forecast Data)


In [32]:
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Cloud Services (Spend, IT Forecast Data)
cloud_services


In [33]:
# remove nulls
df = df.dropna()
df

Unnamed: 0,Cloud computing policy environment by category - country ranking 2018,Unnamed: 1
2,Germany,18.2
3,Japan,20.3
4,United States,18.0
5,United Kingdom,19.8
6,Australia,16.1
7,Singapore,20.7
8,Canada,17.0
9,France,17.3
10,Italy,15.0
11,Spain,16.6


In [34]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [35]:
# prepare standard columns
df['data_col'] = df['Unnamed: 1'].astype(float)
df['Country Name'] = df.iloc[:,0]
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = 2018
df['Sub-Pillar'] = subpillar

In [36]:
min_rank = 1
max_rank = df['Country Name'].nunique()

In [37]:
# transform 1-24 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

In [38]:
# prepare output
df.sort_values(by='new_rank_score', ascending=False)

# prepare output
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]

# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

In [39]:
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
2,Germany,2018,"Cloud Services (Spend, IT Forecast Data)",18.2,4.73913,True,Technology Adoption
3,Japan,2018,"Cloud Services (Spend, IT Forecast Data)",20.3,5.195652,True,Technology Adoption
4,United States,2018,"Cloud Services (Spend, IT Forecast Data)",18.0,4.695652,True,Technology Adoption
5,United Kingdom,2018,"Cloud Services (Spend, IT Forecast Data)",19.8,5.086957,True,Technology Adoption
6,Australia,2018,"Cloud Services (Spend, IT Forecast Data)",16.1,4.282609,True,Technology Adoption
7,Singapore,2018,"Cloud Services (Spend, IT Forecast Data)",20.7,5.282609,True,Technology Adoption
8,Canada,2018,"Cloud Services (Spend, IT Forecast Data)",17.0,4.478261,True,Technology Adoption
9,France,2018,"Cloud Services (Spend, IT Forecast Data)",17.3,4.543478,True,Technology Adoption
10,Italy,2018,"Cloud Services (Spend, IT Forecast Data)",15.0,4.043478,True,Technology Adoption
11,Spain,2018,"Cloud Services (Spend, IT Forecast Data)",16.6,4.391304,True,Technology Adoption


## 4. ICT task-intensive jobs as a percentage of total employment

In [40]:
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

ICT task-intensive jobs as a percentage of total employment
ICT_proportion


In [41]:
df.head()

Unnamed: 0,Indicator,Country,Industry,Information and communication technologies,Sex,Measure,Time,Value,Flags
0,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2011,3.1764,
1,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2012,3.225967,
2,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2013,3.346251,
3,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2014,3.3191,
4,ICT specialists within and outside information...,Austria,Total economy,Specialist (ISCO-08: 133+215+251+252+351+352+742),Total,Share of jobs,2015,3.72934,


In [42]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [43]:
df[(df['Time']==2018)&(df['Information and communication technologies']=='ICT-intensive')].sort_values(by='Value', ascending=False)

Unnamed: 0,Indicator,Country,Industry,Information and communication technologies,Sex,Measure,Time,Value,Flags


In [44]:
# bnames

In [45]:
df['Information and communication technologies'].unique()

array(['Specialist (ISCO-08: 133+215+251+252+351+352+742)',
       'Other ICT-intensive (ISCO-08: 121+122,134+,211+,216+,231+,241+,242+243)',
       'Non-ICT (rest of ISCO-08 occupations)', 'ICT-intensive', 'Total'],
      dtype=object)

In [46]:
df.Sex.unique()

array(['Total'], dtype=object)

In [47]:
# convert to correct types
df['Value'] = df['Value'].astype(float)

In [48]:
df['Value'].describe()

count    985.000000
mean      42.353406
std       42.614469
min        0.890157
25%        5.526795
50%       12.887070
75%       91.318100
max      100.000000
Name: Value, dtype: float64

In [49]:
# filter on relevant years
df = df[(df['Time']==2017)&(df['Information and communication technologies']=='ICT-intensive')]

# convert rank into 1-6 - in order to get most recent data 
df['data_col'] = df['Value'] 

# going to assume index is between 1-100 but not 100% sure
min_rank = 0 #df['data_col'].min()
max_rank = 100 #df['data_col'].max()

# transform 1-147 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# need to invert score since higher rank is not better 
# df['new_rank_score'] = (6-df['new_rank_score'])+1

# df_rank[['Country ISO3', 'Country Name','Indicator','data_col','new_rank_score']].head()

df.rename(columns={'Country':'Country Name'}, inplace=True)
df['Indicator'] = indicator
df['higher_is_better'] = True
df['Year'] = df['Time']
df['Sub-Pillar'] = subpillar

df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)


# output scores to csv
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
27,Austria,2017,ICT task-intensive jobs as a percentage of tot...,10.88416,1.544208,True,Technology Adoption
62,Belgium,2017,ICT task-intensive jobs as a percentage of tot...,14.44826,1.722413,True,Technology Adoption
97,Czech Republic,2017,ICT task-intensive jobs as a percentage of tot...,9.219953,1.460998,True,Technology Adoption
157,Estonia,2017,ICT task-intensive jobs as a percentage of tot...,15.76142,1.788071,True,Technology Adoption
217,Finland,2017,ICT task-intensive jobs as a percentage of tot...,15.22048,1.761024,True,Technology Adoption
252,France,2017,ICT task-intensive jobs as a percentage of tot...,12.00835,1.600418,True,Technology Adoption
287,Germany,2017,ICT task-intensive jobs as a percentage of tot...,10.4419,1.522095,True,Technology Adoption
322,Greece,2017,ICT task-intensive jobs as a percentage of tot...,6.675247,1.333762,True,Technology Adoption
357,Hungary,2017,ICT task-intensive jobs as a percentage of tot...,8.369766,1.418488,True,Technology Adoption
392,Iceland,2017,ICT task-intensive jobs as a percentage of tot...,14.27687,1.713843,True,Technology Adoption


In [50]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 5. Share of business with internet

In [51]:
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Share of business with internet
business_internet


In [52]:
df= df.replace('..',np.nan)

In [53]:
df.head(15)

Unnamed: 0,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,Country
0,52.47,54.8,60.17,61.52,67.16,67.81,69.33,76.25,75.58,75.77,76.73,75.62,77.37,79.38,80.37,,Australia
1,72.22,78.81,80.06,79.84,79.85,80.19,82.87,82.01,85.7,86.35,87.46,88.11,85.55,87.92,89.45,90.42,Austria
2,,,,,77.37,78.47,76.6,76.01,78.26,79.15,81.04,81.0,82.6,84.03,86.72,86.62,Belgium
3,64.8,67.5,69.7,,,,,79.8,77.5,,,,78.5,,81.8,,Canada
4,,,,47.87,51.74,54.03,59.41,63.27,66.55,67.0,66.47,67.43,67.17,67.81,,,Colombia
5,,70.08,71.12,73.99,72.66,73.63,77.44,79.67,79.86,82.63,82.57,82.15,82.9,82.79,83.31,83.32,Czech Republic
6,,,,,87.61,87.83,88.68,89.3,91.78,91.4,91.95,93.34,95.09,95.58,93.92,92.77,Denmark
7,52.65,57.86,61.87,65.73,67.53,70.04,72.63,74.97,75.74,77.56,79.73,77.93,78.09,78.36,81.18,79.79,Estonia
8,,,,,84.62,87.32,92.56,91.3,93.64,95.1,95.2,95.33,96.28,95.64,,95.92,Finland
9,,,,,54.05,57.71,60.05,64.48,65.3,63.59,66.82,68.5,66.53,69.41,71.54,70.35,France


In [54]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [55]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2020'].astype(float)
df['Country Name'] = df['Country']
df['Year'] = 2020
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [56]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Australia,2020,Share of business with internet,,,True,Technology Adoption
1,Austria,2020,Share of business with internet,90.42,5.347878,True,Technology Adoption
2,Belgium,2020,Share of business with internet,86.62,4.89732,True,Technology Adoption
3,Canada,2020,Share of business with internet,,,True,Technology Adoption
4,Colombia,2020,Share of business with internet,,,True,Technology Adoption
5,Czech Republic,2020,Share of business with internet,83.32,4.506047,True,Technology Adoption
6,Denmark,2020,Share of business with internet,92.77,5.626512,True,Technology Adoption
7,Estonia,2020,Share of business with internet,79.79,4.087503,True,Technology Adoption
8,Finland,2020,Share of business with internet,95.92,6.0,True,Technology Adoption
9,France,2020,Share of business with internet,70.35,2.968224,True,Technology Adoption


In [57]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 6. Share of businesses with broadband

In [58]:
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Share of businesses with broadband
business_broadband


In [59]:
df.head(15)

Unnamed: 0,2008,2009,2010,Country
0,76.94,76.01,82.06,Austria
1,79.33,77.31,86.52,Czech Republic
2,87.53,86.08,88.08,Estonia
3,,91.68,93.31,France
4,83.46,87.9,89.34,Germany
5,70.37,74.19,79.61,Hungary
6,,,95.43,Iceland
7,,76.11,86.84,Ireland
8,,82.92,84.12,Italy
9,,88.03,87.91,Luxembourg


In [60]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [61]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2010']
df['Country Name'] = df['Country']
df['Year'] = 2010
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [62]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Austria,2010,Share of businesses with broadband,82.06,3.436782,True,Technology Adoption
1,Czech Republic,2010,Share of businesses with broadband,86.52,4.291188,True,Technology Adoption
2,Estonia,2010,Share of businesses with broadband,88.08,4.590038,True,Technology Adoption
3,France,2010,Share of businesses with broadband,93.31,5.591954,True,Technology Adoption
4,Germany,2010,Share of businesses with broadband,89.34,4.831418,True,Technology Adoption
5,Hungary,2010,Share of businesses with broadband,79.61,2.967433,True,Technology Adoption
6,Iceland,2010,Share of businesses with broadband,95.43,5.998084,True,Technology Adoption
7,Ireland,2010,Share of businesses with broadband,86.84,4.35249,True,Technology Adoption
8,Italy,2010,Share of businesses with broadband,84.12,3.831418,True,Technology Adoption
9,Luxembourg,2010,Share of businesses with broadband,87.91,4.557471,True,Technology Adoption


In [63]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 7. Share of businesses with online presence

In [64]:
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))
df.head(15)

Share of businesses with online presence
share_of_businesses_online_presence


Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE,Year
0,1.0,Finland,95.64,100.0,2018
1,2.0,Denmark,93.92,98.01,2018
2,3.0,Japan,92.4,96.24,2018
3,4.0,Netherlands,91.89,95.65,2018
4,5.0,Switzerland,91.74,95.48,2018
5,6.0,Sweden,89.65,93.05,2018
6,7.0,Austria,89.45,92.82,2018
7,8.0,Germany,88.21,91.38,2018
8,9.0,Belgium,86.72,89.65,2018
9,10.0,United Kingdom,83.88,86.35,2018


In [65]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [66]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE']
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [67]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Finland,2018,Share of businesses with online presence,95.64,6.0,True,Technology Adoption
1,Denmark,2018,Share of businesses with online presence,93.92,5.900163,True,Technology Adoption
2,Japan,2018,Share of businesses with online presence,92.4,5.811934,True,Technology Adoption
3,Netherlands,2018,Share of businesses with online presence,91.89,5.782331,True,Technology Adoption
4,Switzerland,2018,Share of businesses with online presence,91.74,5.773624,True,Technology Adoption
5,Sweden,2018,Share of businesses with online presence,89.65,5.65231,True,Technology Adoption
6,Austria,2018,Share of businesses with online presence,89.45,5.640701,True,Technology Adoption
7,Germany,2018,Share of businesses with online presence,88.21,5.568725,True,Technology Adoption
8,Belgium,2018,Share of businesses with online presence,86.72,5.482238,True,Technology Adoption
9,United Kingdom,2018,Share of businesses with online presence,83.88,5.31739,True,Technology Adoption


In [68]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 8. Size of gig economy (% of GDP)

In [69]:
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Size of gig economy (% of GDP)
prevalance_gig_economy


In [70]:
df.head(15)

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,United States,5.4,100.0
1,2.0,Netherlands,5.22,94.63
2,3.0,United Kingdom,5.19,93.8
3,4.0,Saudi Arabia,5.08,90.33
4,5.0,Malaysia,5.07,90.19
5,6.0,Egypt,5.05,89.46
6,7.0,Israel,5.02,88.42
7,8.0,Canada,4.94,86.07
8,9.0,Singapore,4.92,85.52
9,10.0,United Arab Emirates,4.87,83.82


In [71]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [72]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [73]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,United States,2019,Size of gig economy (% of GDP),5.4,6.0,True,Technology Adoption
1,Netherlands,2019,Size of gig economy (% of GDP),5.22,5.72561,True,Technology Adoption
2,United Kingdom,2019,Size of gig economy (% of GDP),5.19,5.679878,True,Technology Adoption
3,Saudi Arabia,2019,Size of gig economy (% of GDP),5.08,5.512195,True,Technology Adoption
4,Malaysia,2019,Size of gig economy (% of GDP),5.07,5.496951,True,Technology Adoption
5,Egypt,2019,Size of gig economy (% of GDP),5.05,5.466463,True,Technology Adoption
6,Israel,2019,Size of gig economy (% of GDP),5.02,5.420732,True,Technology Adoption
7,Canada,2019,Size of gig economy (% of GDP),4.94,5.29878,True,Technology Adoption
8,Singapore,2019,Size of gig economy (% of GDP),4.92,5.268293,True,Technology Adoption
9,United Arab Emirates,2019,Size of gig economy (% of GDP),4.87,5.192073,True,Technology Adoption


In [74]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 9. Size of digital economy (% of transactions)


In [75]:
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Size of digital economy (% of transactions)
size_digital_economy


In [76]:
df

Unnamed: 0,Order,Country Name,Value,Score
0,1.0,Singapore,78.13,100.00
1,2.0,Switzerland,64.57,82.59
2,3.0,"Korea, Rep.",63.66,81.42
3,4.0,Germany,61.45,78.58
4,5.0,Hungary,59.72,76.36
...,...,...,...,...
129,,"Congo, Dem. Rep.",,
130,,Dominican Republic,,
131,,Guinea,,
132,,Lesotho,,


In [77]:
subpillars[0]
subpillar = subpillars[0]
print(subpillar)

Technology Adoption


In [78]:
# create standard columns
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [79]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better,Sub-Pillar
0,Singapore,2019,Size of digital economy (% of transactions),78.13,6.0,True,Technology Adoption
1,Switzerland,2019,Size of digital economy (% of transactions),64.57,5.129318,True,Technology Adoption
2,"Korea, Rep.",2019,Size of digital economy (% of transactions),63.66,5.070887,True,Technology Adoption
3,Germany,2019,Size of digital economy (% of transactions),61.45,4.928984,True,Technology Adoption
4,Hungary,2019,Size of digital economy (% of transactions),59.72,4.817902,True,Technology Adoption
5,Japan,2019,Size of digital economy (% of transactions),56.21,4.592526,True,Technology Adoption
6,Ireland,2019,Size of digital economy (% of transactions),54.35,4.473096,True,Technology Adoption
7,Denmark,2019,Size of digital economy (% of transactions),54.22,4.464749,True,Technology Adoption
8,Qatar,2019,Size of digital economy (% of transactions),54.17,4.461538,True,Technology Adoption
9,Sweden,2019,Size of digital economy (% of transactions),53.01,4.387055,True,Technology Adoption


In [80]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

No Country Data

## 10. Venture Capital Availability


In [81]:
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Venture Capital Availability
TCdata360


In [82]:
df.head(15)

Unnamed: 0,Country ISO3,Country Name,Indicator Id,Indicator,Subindicator Type,2007,2007-2008,2008,2008-2009,2009,...,2014,2014-2015,2015,2015-2016,2016,2016-2017,2017,2017-2018,2018,2019
0,AGO,Angola,507,Imports as a percentage of GDP,Value,,,,,,...,,38.90301,,,,,,,,
1,AGO,Angola,508,Imports as a percentage of GDP,Rank,,,,,,...,,92.0,,,,,,,,
2,AGO,Angola,517,Intensity of local competition,1-7 Best,,,,,,...,,2.601164,,,,,,,,
3,AGO,Angola,518,Intensity of local competition,Rank,,,,,,...,,144.0,,,,,,,,
4,AGO,Angola,519,Extent of market dominance,1-7 Best,,,,,,...,,2.177586,,,,,,,,
5,AGO,Angola,520,Extent of market dominance,Rank,,,,,,...,,144.0,,,,,,,,
6,AGO,Angola,535,Quality of overall infrastructure,1-7 Best,,,,,,...,,2.249658,,,,,,,,
7,AGO,Angola,536,Quality of overall infrastructure,Rank,,,,,,...,,141.0,,,,,,,,
8,AGO,Angola,537,Quality of roads,1-7 Best,,,,,,...,,2.260678,,,,,,,,
9,AGO,Angola,538,Quality of roads,Rank,,,,,,...,,138.0,,,,,,,,


In [97]:
subpillars[1]
subpillar = subpillars[1]
print(subpillar)

Financing Incentives


In [None]:
## 11. Doing Business Index

In [None]:
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
df = df[(df['DB Year'] == 2019)]
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,5]
df['Year'] = df.iloc[:,4]
df['Country Name'] = df.iloc[:,1]
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.head(15)

In [None]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 12. Strength of Legal Rights 

In [None]:
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.info()

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

#### Find Relevant Columns

In [None]:
df['Series Name'].unique()

In [None]:
# remove unwanted rows
df = df[~df['Series Code'].isna()]
df = df.replace('..', np.nan)

In [None]:
df.loc[0][0]

In [None]:
df.head()

In [None]:
df.columns

In [None]:
# clean data
df = df.replace('..', np.nan)

df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']] = df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']].astype(float)

In [None]:
df.info()

In [None]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

df['higher_is_better'] = True
df['Indicator'] = df['Series Name']
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

#### Convert Scales

In [None]:
# convert 0-12 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=0,old_max=12))

In [None]:
df.head(16)

In [None]:
df.columns

#### Prepare Output

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.sort_values(by='new_rank_score', ascending=False).head(16)

In [None]:
# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

### 13. Time to start a business


#### Load Data

In [None]:
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
# remove unwanted rows
df = df[~df['Series Code'].isna()]
df = df.replace('..', np.nan)

In [None]:
df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']] = df[['1990 [YR1990]', '2000 [YR2000]', '2011 [YR2011]', '2012 [YR2012]',
       '2013 [YR2013]', '2014 [YR2014]', '2015 [YR2015]', '2016 [YR2016]',
       '2017 [YR2017]', '2018 [YR2018]', '2019 [YR2019]', '2020 [YR2020]']].astype(float)

In [None]:
df.info()

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

df['higher_is_better'] = True
df['Indicator'] = df['Series Name']
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

In [None]:
df.head()

In [None]:
def map_days_to_scores(number):
    if number<=2:
        return 4
    if number <6 and number>2:
        return 3
    elif number >=6 and number <11:
        return 2
    elif number >=11:
        return 1

In [None]:
# map days to scores 
df['data_col'] = df['data_col'].apply(map_days_to_scores)

In [None]:
# convert 1-3 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=1,old_max=4))

In [None]:
df.head()

In [None]:
df[df['2019 [YR2019]']<3]

In [None]:
df['2019 [YR2019]'].describe()

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]
df.sort_values(by='new_rank_score', ascending=False).head(16)

#### Prepare Output

In [None]:
bf

In [None]:
# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

### 14. Ease doing business


#### Load Data

In [None]:
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
# remove unwanted rows
df = df.replace('..', np.nan)
df = df[~df['Series Code'].isna()]


In [None]:
df['Series Name'].unique()

In [None]:
df.info()

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
# keep only most recent value
df = df[['Series Name', 'Series Code', 'Country Name', 'Country Code','2019 [YR2019]']]

# df['higher_is_better'] = False
df['Indicator'] = df['Series Name']
df['2019 [YR2019]'] = df['2019 [YR2019]'].astype(float)
df['data_col'] = df['2019 [YR2019]']
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

# df['data_norm'] = df['data_norm'] = (df.data_col - df.data_col.mean())/df.data_col.std()

In [None]:
rank_min = df.data_col.min()
rank_max = df.data_col.max()

In [None]:
rank_min, rank_max

In [None]:
# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=rank_min,old_max=rank_max))

In [None]:
# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df.sort_values(by='new_rank_score', ascending=False).head(16)

In [None]:
df['higher_is_better'] = True
df.head(15)

#### Prepare Output

In [None]:
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]

# output scores
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 15. Ease of finding skilled employees

In [None]:
indicator = indicators[14]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head()

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
values = ['2017','2018','2019']

In [None]:
df = df.replace('No data', np.nan)

In [None]:
df[values] = df[values].astype(float)

In [None]:
df.head()

In [None]:
df[values].describe()

In [None]:
# create standard columns
df['data_col'] = df['2019']
df['new_rank_score'] = df['data_col']
df['higher_is_better'] = True
df['Indicator'] = indicator
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

df.rename(columns={'Country':'Country Name'}, inplace=True)


df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]


# output scores to csv
df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']].to_csv('../indicator_scores/business_{}_scores.csv'.format(indicator), index=False)

## 16. Amount invested into startups yearly from private, public, blended sources (respectively)


In [None]:
indicator = indicators[15]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
subpillars[2]
subpillar = subpillars[2]
print(subpillar)

In [None]:
df = df[(df['Development stages']=='Total') & (df.Year == 2019) & (df.MEASURE == 'USD_V')]
df.head(15)

In [None]:
# create standard columns
df['Country Name'] = df['Country']
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['higher_is_better'] = True
df['Year'] = 2019
df['Sub-Pillar'] = subpillar

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

df = df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better','Sub-Pillar']]

In [None]:
df

In [None]:
df.to_csv('../indicator_scores/business_{}_scores.csv'.format(bf), index=False)

### Score Aggregating

In [None]:
import os


In [None]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('business')]

In [None]:
scores

In [None]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [None]:
df

In [None]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.info()

In [None]:
df

In [None]:
df.head(15)

In [None]:
df.describe()

In [None]:
df['Country Name'] = df['Country Name'].astype(str)
df.info()

In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
agg_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = agg_df.describe()['count_source']['max']

In [None]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [None]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [None]:
agg_df.head(25)

In [None]:
agg_df.to_csv('../pillar_scores/business_scores_v0.csv')

In [None]:
### Score Aggregating by Subpillars

In [None]:
df.insert(0,'Pillar','Business')
df

# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()
df['Country Name'] = df['Country Name'].str.strip('**')
df['Country Name'] = df['Country Name'].str.strip('*')

In [None]:
sub_df = df.groupby(['Pillar','Sub-Pillar','Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
sub_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = sub_df.describe()['count_source']['max']

In [None]:
sub_df['agg_score_wt'] = sub_df['agg_score']*(sub_df['count_source']/max_number_sources)

In [None]:
sub_df.to_csv('../subpillar_score/business_scores_subpillar_v0.csv')