In [1]:
import pandas as pd
import numpy as np



### Get all the pillar names from the excel

In [2]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [3]:
col_names = ['Indicator','check', 'Data Source','Index','Filename']

In [4]:
names = names[col_names]

In [5]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Index,Filename
0,Countries,,United Nations,False,Countries
1,"Database of Global Administrative Areas (GADM,...",,GADM maps and data,False,
2,High Resolution Population Density Maps + Demo...,,Facebook,False,
3,population density vs openstreetmap object den...,,Kontur,False,
4,Population Density,Infrastructure,World Bank,False,population_density


In [6]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [7]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,16,25
Foundations,8,13
Government,9,15
Infrastructure,39,48
People,34,47
Regulation,5,7
Strategy,1,1


### Foundations

In [8]:
bnames = names[(names.check=='Foundations')&(~names.Filename.isna())]#&(names.Index==False)]

In [9]:
bnames.head(25)

Unnamed: 0,Indicator,check,Data Source,Index,Filename
148,Digital payments penetration,Foundations,Portulans Institute,True,digital_payments_penetration
149,% of population with digital finance account -...,Foundations,World Bank,False,population_digital_financial_services
150,% of population with digital finance account -...,Foundations,World Bank,False,population_digital_financial_services
154,% of population with ID (all),Foundations,World Bank,False,id4d_nid
155,% of population with ID (female),Foundations,World Bank,False,id4d_nid
157,can ID be used for transactions,Foundations,World Bank,False,id4d_services
158,Is personal data siloed,Foundations,World Bank,False,Egov_strategy
159,Open data index,Foundations,Open Knowledge Foundation,True,open_data_idx


In [10]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()

In [11]:
# get all file names
bfiles = bnames.Filename.unique()

In [12]:
bfiles

array(['digital_payments_penetration',
       'population_digital_financial_services', 'id4d_nid',
       'id4d_services', 'Egov_strategy', 'open_data_idx'], dtype=object)

In [13]:
# ls digital-readiness-assessment-main/processed/

In [14]:
##ict_goods and services not in process data

In [15]:
# formula for converting scale
def convert_rank(old_value, old_min=1, old_max=7, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

### 1. Digital payments penetration

In [16]:
indicators[0]

'Digital payments penetration'

In [17]:
# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Digital payments penetration
digital_payments_penetration


In [18]:
df.head()

Unnamed: 0,RANK,COUNTRY/ECONOMY,VALUE,SCORE
0,1.0,Norway,0.85,100.0
1,2.0,Denmark,0.83,97.24
2,3.0,Finland,0.8,93.95
3,4.0,Sweden,0.8,93.08
4,5.0,Netherlands,0.76,89.01


In [19]:
# score looks like the one to use
df.describe()

Unnamed: 0,RANK,VALUE,SCORE
count,122.0,122.0,122.0
mean,61.5,0.315738,36.076967
std,35.362409,0.209012,24.867406
min,1.0,0.01,0.0
25%,31.25,0.15,16.235
50%,61.5,0.27,30.87
75%,91.75,0.44,51.11
max,122.0,0.85,100.0


In [20]:
# df.Indicator.unique()

In [21]:
# create standard columns
df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['SCORE'] 


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-100 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))



In [22]:
df

Unnamed: 0,RANK,Country Name,VALUE,SCORE,higher_is_better,Indicator,data_col,new_rank_score
0,1.0,Norway,0.85,100.00,True,Digital payments penetration,100.00,6.0000
1,2.0,Denmark,0.83,97.24,True,Digital payments penetration,97.24,5.8620
2,3.0,Finland,0.80,93.95,True,Digital payments penetration,93.95,5.6975
3,4.0,Sweden,0.80,93.08,True,Digital payments penetration,93.08,5.6540
4,5.0,Netherlands,0.76,89.01,True,Digital payments penetration,89.01,5.4505
...,...,...,...,...,...,...,...,...
129,,Jamaica,,,True,Digital payments penetration,,
130,,Madagascar,,,True,Digital payments penetration,,
131,,Oman,,,True,Digital payments penetration,,
132,,Qatar,,,True,Digital payments penetration,,


In [23]:

# output scores to csv
df[['Country Name','Indicator','data_col','new_rank_score','higher_is_better']].to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [24]:
df[['Country Name','Indicator','data_col','new_rank_score','higher_is_better']].head(15)


Unnamed: 0,Country Name,Indicator,data_col,new_rank_score,higher_is_better
0,Norway,Digital payments penetration,100.0,6.0,True
1,Denmark,Digital payments penetration,97.24,5.862,True
2,Finland,Digital payments penetration,93.95,5.6975,True
3,Sweden,Digital payments penetration,93.08,5.654,True
4,Netherlands,Digital payments penetration,89.01,5.4505,True
5,New Zealand,Digital payments penetration,86.73,5.3365,True
6,United States,Digital payments penetration,84.4,5.22,True
7,Estonia,Digital payments penetration,82.82,5.141,True
8,"Korea, Rep.",Digital payments penetration,82.64,5.132,True
9,Canada,Digital payments penetration,81.53,5.0765,True


## 2. % of population with digital finance account - registered


In [25]:
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population with digital finance account - registered
population_digital_financial_services


In [26]:
df.head(15)

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,
5,2011,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,28%,34%,36%,15%,23%,...,,,,,,,,,,
6,2014,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,38%,43%,44%,29%,34%,...,,,,,,,,,,
7,2017,ALB,Albania,Europe & Central Asia (excluding high income),Upper middle income,40%,42%,53%,27%,38%,...,2%,6%,1%,1%,4%,0%,4%,2%,,
8,2011,ARB,Arab world,,,22%,30%,33%,11%,14%,...,,,,,,,,,,
9,2014,ARB,Arab world,,,30%,38%,42%,18%,22%,...,,,,,,,,,,


In [27]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Account (% age 15+)'] = df['Account (% age 15+)'].str.replace('%','')
df['Account (% age 15+)'] = df['Account (% age 15+)'].astype(float)

In [28]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Account (% age 15+)'] 


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [29]:
df = df[['Country Name', 'Indicator', 'data_col', 'new_rank_score','higher_is_better']]
df

Unnamed: 0,Country Name,Indicator,data_col,new_rank_score,higher_is_better
2,Afghanistan,% of population with digital finance account -...,15.0,1.329670,True
7,Albania,% of population with digital finance account -...,40.0,2.703297,True
10,Arab world,% of population with digital finance account -...,37.0,2.538462,True
13,United Arab Emirates,% of population with digital finance account -...,88.0,5.340659,True
16,Argentina,% of population with digital finance account -...,49.0,3.197802,True
...,...,...,...,...,...
479,World,% of population with digital finance account -...,69.0,4.296703,True
482,Kosovo,% of population with digital finance account -...,52.0,3.362637,True
487,South Africa,% of population with digital finance account -...,69.0,4.296703,True
490,Zambia,% of population with digital finance account -...,46.0,3.032967,True


In [30]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 3. % of population with digital finance account - active (90 days)

In [31]:
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population with digital finance account - active (90 days)
population_digital_financial_services


In [32]:
df.head()

Unnamed: 0,Year,ISO,Country Name,Region,Income Group,Account (% age 15+),"Account, male (% age 15+)","Account, in labor force (% age 15+)","Account, out of labor force (% age 15+)","Account, female (% age 15+)",...,"Mobile money account, female (% age 15+)","Mobile money account, young adults (% age 15-24)","Mobile money account, older adults (% age 25+)","Mobile money account, primary education or less (% age 15+)","Mobile money account, secondary education or less (% age 15+)","Mobile money account, income, poorest 40% (% age 15+)","Mobile money account, income, richest 60% (% age 15+)","Mobile money account, rural (% age 15+)",data_country,data_year
0,2011,AFG,Afghanistan,South Asia,Low income,9%,15%,15%,2%,3%,...,,,,,,,,,,
1,2014,AFG,Afghanistan,South Asia,Low income,10%,16%,15%,4%,4%,...,0%,0%,0%,0%,0%,0%,1%,0%,,
2,2017,AFG,Afghanistan,South Asia,Low income,15%,23%,25%,4%,7%,...,1%,0%,1%,0%,2%,0%,1%,1%,,
3,2011,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,39%,39%,46%,31%,39%,...,,,,,,,,,,
4,2014,AGO,Angola,Sub-Saharan Africa (excluding high income),Lower middle income,29%,36%,36%,12%,22%,...,,,,,,,,,,


In [33]:
df = df[(df.Year == 2017)]

# Must convert the data to float by removing the % sign
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].str.replace('%','')
df['Made or received digital payments in the past year (% age 15+)'] = df['Made or received digital payments in the past year (% age 15+)'].astype(float)

In [34]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Made or received digital payments in the past year (% age 15+)'] 


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [35]:
df = df[['Country Name', 'Indicator', 'data_col', 'new_rank_score','higher_is_better']]
df

Unnamed: 0,Country Name,Indicator,data_col,new_rank_score,higher_is_better
2,Afghanistan,% of population with digital finance account -...,11.0,1.217391,True
7,Albania,% of population with digital finance account -...,29.0,2.195652,True
10,Arab world,% of population with digital finance account -...,26.0,2.032609,True
13,United Arab Emirates,% of population with digital finance account -...,84.0,5.184783,True
16,Argentina,% of population with digital finance account -...,40.0,2.793478,True
...,...,...,...,...,...
479,World,% of population with digital finance account -...,52.0,3.445652,True
482,Kosovo,% of population with digital finance account -...,39.0,2.739130,True
487,South Africa,% of population with digital finance account -...,60.0,3.880435,True
490,Zambia,% of population with digital finance account -...,39.0,2.739130,True


In [36]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator))

## 4. % of population with ID (Total)


In [47]:
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf), skiprows=[1,2,3,4,5,6])

% of population with ID (all)
id4d_nid


ParserError: Error tokenizing data. C error: Expected 26 fields in line 8, saw 43


In [46]:
df.head(15)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,id,Economy,Country Code,Region,Income,OECD,Lending category,Other,GCC adjusted,Inclusion Criteria,...,UPA Total,UPA Male,UPA Female,% of UPA that is Female,UPB Total,UPB in % of Population above Cut-off,UPB Male,UPB Female,% of UPB that is Female,BR Total
1,Afghanistan,AFG,SAS,LIC,,IDA,HIPC,,INCLUDE,11983428,33,5349909.0,6631580.0,55.0,18,10496548,5349909.0,5144700.0,49.0,1486880,8,0.0,1486880.0,100.0,42.3,42.7,...,7296096.0,36373176,18191591,9336665,8854905,18181585,9398623,8782976,,
2,Albania,ALB,ECS,UMC,,IBRD,,,INCLUDE,7565,0,1685.0,5451.0,72.0,16,7565,1685.0,5451.0,72.0,0,0,0.0,0.0,0.0,98.6,99.4,...,2188099.0,2934363,540349,280793,259561,2394014,1199481,1194510,,
3,Algeria,DZA,MEA,UMC,,IBRD,,,INCLUDE,4737130,11,,,,18,56303,28727.0,27576.0,49.0,4680827,17,,,,99.6,99.6,...,,42008054,14075724,7181719,6894010,27932330,14033462,13898865,,
4,Andorra,AND,ECS,HIC,,..,,,EXCLUDE,42117,53,,,,18,0,,,,42117,63,,,,100.0,–,...,,80209,13580,6971,6609,66629,34060,32569,,
5,Angola,AGO,SSF,LMC,,IBRD,,,INCLUDE,17359571,56,,,,18,12292020,6134411.0,6157438.0,50.0,5067551,35,,,,25.0,24.8,...,,30774205,16389360,8157462,8231869,14384845,6937053,7447820,,
6,Antigua and Barbuda,ATG,LCN,HIC,,IBRD,,,INCLUDE,25343,25,,,,18,2939,,,,22404,30,,,,90.0,–,...,,103050,29388,14745,14644,73662,34732,38932,,
7,Argentina,ARG,LCN,UMC,,IBRD,,,INCLUDE,58760,0,,,,16,58760,23908.0,28874.0,49.0,0,0,,,,99.5,99.6,...,,44688864,11751904,5977086,5774773,32936960,15898099,17038900,,
8,Armenia,ARM,ECS,LMC,,IBRD,,,INCLUDE,8969,0,,,,18,8969,4046.0,4831.0,54.0,0,0,,,,98.7,98.9,...,,2934152,689894,367840,322065,2244258,1012449,1231792,,
9,Australia,AUS,EAS,HIC,OECD,..,,,EXCLUDE,3029780,12,,,,18,0,,,,3029780,16,1609169.0,1422069.0,47.0,100.0,–,...,8269334.0,24772247,5624607,2884647,2739917,19147640,9456278,9691403,,
10,Austria,AUT,ECS,HIC,OECD,..,EMU,,EXCLUDE,1032598,12,,,,16,0,,,,1032598,14,523874.0,508717.0,49.0,100.0,–,...,3307645.0,8751820,1318229,676854,641377,7433591,3617222,3816362,,


In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['UP in % of Country Population'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name', 'Indicator', 'data_col', 'new_rank_score','higher_is_better']]
df

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

In [None]:
## 5. % of population with ID (Female)

In [None]:
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df['% of UP that is Female'] = df['% of UP that is Female'].replace('n/a',np.nan)

In [None]:
# create standard columns
df.rename(columns={'Country':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Subtract the unregister proportion column by 100 and absolute the value
df['data_col'] = df['% of UP that is Female'].sub(100).abs()
df ['Year'] = 2018
df['Country Name'] = df.iloc[:,1]


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 1-20 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name', 'Indicator', 'data_col', 'new_rank_score','higher_is_better']]
df

In [None]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 6. can ID be used for transactions



In [None]:
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(20)

In [None]:
df['Digitized ID system']= df['Digitized ID system'].replace('-',np.nan)
df['Digitized ID system']= df['Digitized ID system'].astype(float)

In [None]:
df.head(15)

In [None]:
# create standard columns
df.rename(columns={'Economy':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Year'] = 2018
df['Indicator'] = indicator
df['data_col'] = df['Digitized ID system']
df.rename(columns={'Country':'Country Name'}, inplace=True)

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']]
df

In [None]:
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 7. Is personal data siloed


In [None]:
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Must limit the database to the first 206 rows, the remaining rows do not contain any useful information
df = df.iloc[0:205,:]

# Drop the superfluous rows by dropping na
df = df[df['#'].notna()]

# Must convert data in the DPL column into float
df['DPL'] = df['DPL'].astype(float, errors = 'ignore')


In [None]:
df.head(104)

In [None]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df.iloc[:,205]
df['Country Name'] = df['Economy']
df['Year'] = 2020

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']]
df.head(15)

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

## 8. Open data index

In [None]:
bnames

In [None]:
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['score'] 
df['Country Name'] = df['name']
df['Year'] = 2016

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df = df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']]
df.head(15)

In [None]:
# output scores
df.to_csv('../indicator_scores/foundations_{}_scores.csv'.format(indicator), index=False)

### Score Aggregating

In [None]:
import os


In [None]:
# get list of files in scores folder
scores = os.listdir('../indicator_scores/')
scores = [s for s in scores if s.startswith('foundations')]

In [None]:
scores

In [None]:
# create a dataframe that concatenates all these file into one table
df = pd.concat([pd.read_csv('../indicator_scores/{}'.format(s)) for s in scores])    

In [None]:
df

In [None]:
# Data cleaning
df['new_rank_score'] = df['new_rank_score'].fillna(0)
df.sort_values(by=['Country Name'], ascending=True, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.info()

In [None]:
df.head(15)

In [None]:
df.describe()

In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# remove trailing whitespaces from country name
df['Country Name'] = df['Country Name'].str.strip()


In [None]:
# checking country names
sorted(df['Country Name'].unique().tolist())

In [None]:
# average indicator scores per country
agg_df = df.groupby(['Country Name']).agg({'new_rank_score':'mean','data_col':'count'})

In [None]:
agg_df.columns = ['agg_score', 'count_source' ]

In [None]:
max_number_sources = agg_df.describe()['count_source']['max']

In [None]:
agg_df['agg_score_wt'] = agg_df['agg_score']*(agg_df['count_source']/max_number_sources)

In [None]:
agg_df.sort_values(by='agg_score', ascending=False, inplace=True)

In [None]:
agg_df.head(25)

In [None]:
agg_df.to_csv('../pillar_scores/foundation_scores_v0.csv')