In [1]:
import pandas as pd
import numpy as np

In [2]:
### Get all the pillar names from the excel

In [3]:
names = pd.read_excel('../../UNDP Digital Assessment Data Framework Filename Matching V7.xlsx')

In [4]:
col_names = ['Indicator','check', 'Data Source','Index','Filename']

In [5]:
names = names[col_names]

In [6]:
names.head()

Unnamed: 0,Indicator,check,Data Source,Index,Filename
0,Countries,,United Nations,False,Countries
1,"Database of Global Administrative Areas (GADM,...",,GADM maps and data,False,
2,High Resolution Population Density Maps + Demo...,,Facebook,False,
3,population density vs openstreetmap object den...,,Kontur,False,
4,Population Density,Infrastructure,World Bank,False,population_density


In [7]:
# get all the files per pillar
data_stats = names.groupby('check').agg({'Filename':'count','Indicator':'count'})

In [8]:
data_stats

Unnamed: 0_level_0,Filename,Indicator
check,Unnamed: 1_level_1,Unnamed: 2_level_1
Business,20,25
Foundations,9,12
Government,10,15
Infrastructure,45,48
People,39,47
Regulation,6,7
Strategy,1,1


In [9]:
### People

In [10]:
bnames = names[(names.check=='People')&(~names.Filename.isna())]#&(names.Index==False)]
bnames

Unnamed: 0,Indicator,check,Data Source,Index,Filename
99,Human Capital Index (HCI),People,DESA,True,e_government_index
100,% of population using internet (all),People,ITU,False,ITU_database
101,% of population using internet (female),People,ITU,False,ITU_database
102,% of population using internet (male),People,ITU,False,ITU_database
103,SDG 4.4 Digital literacy data,People,UNESCO,False,SDG_digital_literacy_data
104,UNDP Human Development Index (HDI),People,UNDP,True,undp_human_developmnt
105,Facebook Social Connectedness Index,People,Facebook,True,fb_social_connectedness
106,Share of individuals using the Internet to int...,People,OECD,False,population_interacting_public_officials
107,Level of satisfaction for online public servic...,People,Boston Consulting Group/SalesForce,False,digital_public_service_use
108,Number of mobile apps available in national la...,People,GSMA Mobile Connectivity Index,False,apps_in_national_language


In [11]:
# get list of names for all indicators
indicators = bnames.Indicator.unique()

In [12]:
# get all file names
bfiles = bnames.Filename.unique()

In [13]:
bfiles

array(['e_government_index', 'ITU_database', 'SDG_digital_literacy_data',
       'undp_human_developmnt', 'fb_social_connectedness',
       'population_interacting_public_officials',
       'digital_public_service_use', 'apps_in_national_language',
       'time_spent_online', 'happiness_score', 'cryptocurrency_adoption',
       'not_buying_online_concern_about_returning',
       'not_buying_online_concern_about_security',
       'ewaste_per_inhabitant', 'automation_led_unemployment',
       'cyberbullying_rate', 'global_wellbeing_initiative ',
       'financial_inclusiveness ', 'individuals_buying_online_frequency',
       'e-commerce_activity', 'top_sites', 'youtube_searches',
       'google_trends', 'FB_users', 'gender_gaps',
       'population_digital_financial_services',
       'mobile_broadband_pricing', 'tax_percent_mobile_ownership',
       'population_with_smartphones', 'countries_mobile_connectivity',
       'Chainalysis_2020_Geography_Cryptocurrency_Report'], dtype=object)

In [14]:
# formula for converting scale
def convert_rank(old_value, old_min=1, old_max=7, new_min=1, new_max=6 ):
    """ Convert old scale values scale into new scale values"""
    old_range = old_max - old_min
    new_range = new_max - new_min
    new_value = (((old_value-old_min)*new_range)/old_range)+new_min
    return new_value

In [15]:
### 1. Human Capital Index (HCI)

In [16]:
indicators[0]

# load data
indicator = indicators[0]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Human Capital Index (HCI)
e_government_index


In [17]:
df.head(10)

Unnamed: 0,Survey Year,Country Name,E-Government Rank,E-Government Index,E-Participation Index,Online Service Index,Human Capital Index,Telecommunication Infrastructure Index
0,2020,Iraq,143,0.436,0.3095,0.3353,0.4358,0.537
1,2020,Ireland,27,0.8433,0.8571,0.7706,0.9494,0.81
2,2020,Israel,30,0.8361,0.7143,0.7471,0.8924,0.8689
3,2020,Italy,37,0.8231,0.8214,0.8294,0.8466,0.7932
4,2020,Jamaica,114,0.5392,0.369,0.3882,0.7142,0.5151
5,2020,Japan,14,0.8989,0.9881,0.9059,0.8684,0.9223
6,2020,Jordan,117,0.5309,0.3333,0.3588,0.68,0.554
7,2020,Kazakhstan,29,0.8375,0.881,0.9235,0.8866,0.7024
8,2020,Kenya,116,0.5326,0.5952,0.6765,0.5812,0.3402
9,2020,Kiribati,145,0.432,0.5595,0.4941,0.6778,0.1241


In [18]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Human Capital Index'] 
df['Year'] = df['Survey Year']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [19]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
0,Iraq,2020,Human Capital Index (HCI),0.4358,3.179,True
1,Ireland,2020,Human Capital Index (HCI),0.9494,5.747,True
2,Israel,2020,Human Capital Index (HCI),0.8924,5.462,True
3,Italy,2020,Human Capital Index (HCI),0.8466,5.233,True
4,Jamaica,2020,Human Capital Index (HCI),0.7142,4.571,True
5,Japan,2020,Human Capital Index (HCI),0.8684,5.342,True
6,Jordan,2020,Human Capital Index (HCI),0.68,4.4,True
7,Kazakhstan,2020,Human Capital Index (HCI),0.8866,5.433,True
8,Kenya,2020,Human Capital Index (HCI),0.5812,3.906,True
9,Kiribati,2020,Human Capital Index (HCI),0.6778,4.389,True


In [20]:
### 2. % of population using internet (all)

In [21]:
indicators[1]

# load data
indicator = indicators[1]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (all)
ITU_database


In [22]:
df.head(150)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
...,...,...,...,...,...,...,...,...
145,North Macedonia,Europe,MKD,Female mobile phone ownership as a % of total ...,2010.0,,,
146,Norway,Europe,NOR,Female mobile phone ownership as a % of total ...,2010.0,,,
147,Poland,Europe,POL,Female mobile phone ownership as a % of total ...,2010.0,,,
148,Portugal,Europe,PRT,Female mobile phone ownership as a % of total ...,2010.0,,,


In [23]:
df = df[(df.Year==2019)]
df = df[(df.iloc[:,3] =='Internet users: 25-74 years as a % of all 25-74 years')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Country Name'] = df['Country']


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [24]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(100)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
7750,Angola,2019.0,% of population using internet (all),,,True
7751,Benin,2019.0,% of population using internet (all),,,True
7752,Botswana,2019.0,% of population using internet (all),,,True
7753,Burkina Faso,2019.0,% of population using internet (all),,,True
7754,Burundi,2019.0,% of population using internet (all),,,True
...,...,...,...,...,...,...
7845,Samoa,2019.0,% of population using internet (all),,,True
7846,Singapore,2019.0,% of population using internet (all),,,True
7847,Solomon Islands,2019.0,% of population using internet (all),,,True
7848,Sri Lanka,2019.0,% of population using internet (all),,,True


In [25]:
### 3. % of population using internet (female)

In [26]:
indicators[2]

# load data
indicator = indicators[2]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (female)
ITU_database


In [27]:
df.head(10)

# Must convert the string in the dataset to float

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [28]:
df = df[(df.iloc[:,3] =='Female Internet users as a % of total female population')]
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [29]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
40307,Cabo Verde,2019.0,% of population using internet (female),60.965017,3.76639,True
40308,Côte d'Ivoire,2019.0,% of population using internet (female),32.924445,2.151554,True
40309,Kenya,2019.0,% of population using internet (female),20.125122,1.414451,True
40310,Lesotho,2019.0,% of population using internet (female),44.922768,2.842529,True
40311,Mauritius,2019.0,% of population using internet (female),60.130645,3.718339,True
40312,Bahrain,2019.0,% of population using internet (female),99.316403,5.975016,True
40313,Egypt,2019.0,% of population using internet (female),52.98699,3.306941,True
40314,Kuwait,2019.0,% of population using internet (female),99.579959,5.990194,True
40315,Morocco,2019.0,% of population using internet (female),70.16713,4.296332,True
40316,Oman,2019.0,% of population using internet (female),97.153466,5.850455,True


In [30]:
### 4. % of population using internet (male)

In [31]:
indicators[3]

# load data
indicator = indicators[3]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

% of population using internet (male)
ITU_database


In [32]:
df.head(10)

Unnamed: 0,Country,Region,ISO,Indicator name,Year,Value,data_country,data_year
0,Angola,Africa,AGO,Female mobile phone ownership as a % of total ...,2010.0,,,
1,Benin,Africa,BEN,Female mobile phone ownership as a % of total ...,2010.0,,,
2,Botswana,Africa,BWA,Female mobile phone ownership as a % of total ...,2010.0,,,
3,Burkina Faso,Africa,BFA,Female mobile phone ownership as a % of total ...,2010.0,,,
4,Burundi,Africa,BDI,Female mobile phone ownership as a % of total ...,2010.0,,,
5,Cabo Verde,Africa,CPV,Female mobile phone ownership as a % of total ...,2010.0,,,
6,Cameroon,Africa,CMR,Female mobile phone ownership as a % of total ...,2010.0,,,
7,Central African Rep.,Africa,CAF,Female mobile phone ownership as a % of total ...,2010.0,,,
8,Chad,Africa,TCD,Female mobile phone ownership as a % of total ...,2010.0,,,
9,Congo (Rep. of the),Africa,COG,Female mobile phone ownership as a % of total ...,2010.0,,,


In [33]:
df = df[(df.iloc[:,3] =='Male Internet users as a % of total male population')]
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']


min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [34]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

Unnamed: 0,Country Name,Year,Indicator,data_col,new_rank_score,higher_is_better
41078,Cabo Verde,2019.0,% of population using internet (male),62.921166,3.643039,True
41079,Côte d'Ivoire,2019.0,% of population using internet (male),39.808871,2.173867,True
41080,Kenya,2019.0,% of population using internet (male),25.071668,1.237071,True
41081,Lesotho,2019.0,% of population using internet (male),38.349985,2.08113,True
41082,Mauritius,2019.0,% of population using internet (male),63.393093,3.673038,True
41083,Bahrain,2019.0,% of population using internet (male),99.910314,5.994317,True
41084,Egypt,2019.0,% of population using internet (male),61.541347,3.555329,True
41085,Kuwait,2019.0,% of population using internet (male),99.521506,5.969602,True
41086,Morocco,2019.0,% of population using internet (male),78.636833,4.642032,True
41087,Oman,2019.0,% of population using internet (male),94.404849,5.644353,True


In [35]:
### 5. SDG 4.4 Digital literacy data

In [36]:
indicators[4]

# load data
indicator = indicators[4]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# The indicators are all jumbled, need to unclutter this

SDG 4.4 Digital literacy data
SDG_digital_literacy_data


In [37]:
df.head(15)

Unnamed: 0,SDG_IND,Indicator,LOCATION,Country,TIME,Time,Value,Flag Codes,Flags
0,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2014,2014,23.31007,,
1,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2016,2016,21.53173,,
2,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2017,2017,21.1488,,
3,ICTSKILLTRANSFERFILE_M,Proportion of youth and adults who have transf...,BRA,Brazil,2018,2018,21.84886,,
4,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2014,2014,25.64427,,
5,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2016,2016,22.01463,,
6,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2017,2017,22.69577,,
7,ICTSKILLDUPLIC_M,Proportion of youth and adults who have used c...,BRA,Brazil,2018,2018,22.59234,,
8,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,TUR,Turkey,2014,2014,27.72807,,
9,ICTSKILLTRANSFERFILE,Proportion of youth and adults who have transf...,TUR,Turkey,2015,2015,25.66142,,


In [38]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']
df['Year'] = df['Time']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [39]:
### 6. UNDP Human Development Index (HDI)

In [81]:
indicators[5]

# load data
indicator = indicators[5]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

UNDP Human Development Index (HDI) 
undp_human_developmnt


In [82]:
df.head(15)
# Need to move the top row down
# Maybe something is wrong with the link although the one in data manifest works perfectly

Unnamed: 0,HDI rank,Country,Value,Unnamed: 3,(years),(2017 PPP $),data_country,data_year
0,,,2019.0,,2019.0,2019.0,,
1,,VERY HIGH HUMAN DEVELOPMENT,,,,,,
2,1.0,Norway,0.957,,12.89775,66494.25217,,
3,2.0,Ireland,0.955,,12.6663305,68370.58737,,
4,2.0,Switzerland,0.955,,13.38081241,69393.52076,,
5,4.0,"Hong Kong, China (SAR)",0.949,,12.27996,62984.76553,,
6,4.0,Iceland,0.949,,12.77278684,54682.38057,,
7,6.0,Germany,0.947,,14.15168,55314.35355,,
8,7.0,Sweden,0.945,,12.54847,54507.80504,,
9,8.0,Australia,0.944,,12.72469119,48084.84207,,


In [86]:
# choose only the rows where column HDI rank is numeric

df = df.iloc[0:194,:]
df = df[pd.to_numeric(df['HDI rank'], errors='coerce').notnull()]



In [84]:
df.head(15)

Unnamed: 0,HDI rank,Country,Value,Unnamed: 3,(years),(2017 PPP $),data_country,data_year
2,1.0,Norway,0.957,,12.89775,66494.25217,,
3,2.0,Ireland,0.955,,12.6663305,68370.58737,,
4,2.0,Switzerland,0.955,,13.38081241,69393.52076,,
5,4.0,"Hong Kong, China (SAR)",0.949,,12.27996,62984.76553,,
6,4.0,Iceland,0.949,,12.77278684,54682.38057,,
7,6.0,Germany,0.947,,14.15168,55314.35355,,
8,7.0,Sweden,0.945,,12.54847,54507.80504,,
9,8.0,Australia,0.944,,12.72469119,48084.84207,,
10,8.0,Netherlands,0.944,,12.4148,57707.06867,,
11,10.0,Denmark,0.94,,12.61380326,58661.87084,,


In [85]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value']
df['Country Name'] = df['Country']
df['Year'] = 2019

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

TypeError: unsupported operand type(s) for -: 'str' and 'str'

In [None]:
### 7. Facebook Social Connectedness Index

In [62]:
indicators[6]

# load data
indicator = indicators[6]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

Facebook Social Connectedness Index
fb_social_connectedness


In [63]:
df.head(25)

Unnamed: 0,user_loc,fr_loc,scaled_sci
0,1001,AE,8729
1,1001,AG,95256
2,1001,AL,3122
3,1001,AM,3470
4,1001,AO,2839
5,1001,AR,3729
6,1001,AT,6977
7,1001,AU,21136
8,1001,AW,27607
9,1001,AZ,1108


In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['scaled_sci'] 
df['Country Name'] = df['fr_loc']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Indicator','data_col','new_rank_score','higher_is_better']].head(30)

# Need to find a way to convert ISO codes to full country names

In [None]:
### 8. Share of individuals using the Internet to interact with officials

In [None]:
indicators[7]

# load data
indicator = indicators[7]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year
df = df[(df.Time==2019)]
df = df[(df.Indicator=='Individuals using the Internet for visiting or interacting with public authorities websites - last 12 m (%)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)


In [None]:
### 9. Level of satisfaction for online public service

In [None]:
indicators[8]

# load data
indicator = indicators[8]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(10)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Net Perception (%)'] 
df['Year'] = 2020
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# Need to replace the % mark 

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 10. Number of mobile apps available in national language

In [None]:
indicators[9]

# load data
indicator = indicators[9]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year and global value
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Number of apps in national language'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 11. Device Addiction (time of use on internet)

In [None]:
indicators[10]

# load data
indicator = indicators[10]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# Need to move the row further down

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Average daily time spent using the internet by online users worldwide as of 3rd quarter 2020, by region (in hours.minutes)'] 
df['Country Name'] = df['Country']
df['Year'] = 2020

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 12. Gross National Wellbeing

In [None]:
indicators[11]

# load data
indicator = indicators[11]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# happiness_score is missing looks like the problem is in a redundant space before it in the filename matching

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE'] 
df['Country Name'] = df['COUNTRY/ECONOMY']
df['Year'] = 2019

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Average daily time spent using the internet by online users worldwide as of 3rd quarter 2020, by region (in hours.minutes)'] 
df['Country Name'] = df['Country']
df['Year'] = 2020

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
### 13. % of internet users who own cryptocurrency

In [None]:
indicators[12]

# load data
indicator = indicators[12]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Share of respondents who indicated they either owned or used cryptocurrencies in 55 countries worldwide in 2020'] 
df['Country Name'] = df['Country']
df['Year'] = 2020

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 14. Percentage of individuals not buying online due to concerns about returning products

In [None]:
indicators[13]

# load data
indicator = indicators[13]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year and global value
df = df[(df.Time==2019)]
df = df[(df.Scope =='All individuals (aged 16-74)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 15. Percentage of individuals not buying online due to payment security concerns

In [None]:
indicators[14]

# load data
indicator = indicators[14]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# filter most recent year and global value
df = df[(df.Time==2019)]
df = df[(df.Scope =='All individuals (aged 16-74)')]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Value'] 
df['Year'] = df['Time']
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 16. E-waste generated, kilograms per inhabitant

In [None]:
indicators[15]

# load data
indicator = indicators[15]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
# Use special_waste_e_waste_tons_year times one thousand and divde by total population
df['data_col'] = df['special_waste_e_waste_tons_year']*1000/df['population_population_number_of_people'] 
df['Year'] = 2021
df['Country Name'] = df['country_name']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 17. Automation-led unemployment

In [None]:
indicators[16]

# load data
indicator = indicators[16]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Potential Rate of Automation (%)'] 
df['Year'] = 2018
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 18. Cyberbullying

In [None]:
indicators[17]

# load data
indicator = indicators[17]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# Need to move the top row down further

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2018'] 
df['Year'] = 2018
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 19. Global Wellbeing Initiative

In [60]:
indicators[18]

# load data
indicator = indicators[18]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Strange the global_wellbeing_initiative is clearly in the data manifest and the link works well 
# but it has not been moved to the processed folder

Global Wellbeing Initiative (World Happiness Index)
global_wellbeing_initiative 


FileNotFoundError: [Errno 2] No such file or directory: '../../processed/global_wellbeing_initiative .csv'

In [None]:
### 20. Financial Inclusiveness

In [61]:
indicators[19]

# load data
indicator = indicators[19]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)


df = pd.read_csv('../../processed/{}.csv'.format(bf))
# Strange the global_wellbeing_initiative is clearly in the data manifest and the link works well 
# but it has not been moved to the processed folder

Financial Inclusiveness
financial_inclusiveness 


FileNotFoundError: [Errno 2] No such file or directory: '../../processed/financial_inclusiveness .csv'

In [None]:
### 21. E-commerce activity (% of individuals buying online and frequency)

In [None]:
indicators[20]

# load data
indicator = indicators[20]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Added _ between spaces to see if this changes anything

In [None]:
df.head(15)

# Move the top row down further

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['VALUE (%)'] 
df['Country Name'] = df['COUNTRY/ECONOMY']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name', 'Year','Indicator','data_col','new_rank_score','higher_is_better']].head(20)

In [None]:
### 22. E-commerce activity (Types of goods and services purchased online) 

In [None]:
indicators[21]

# load data
indicator = indicators[21]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# Move the top row down further
# This is a list of categories, wonder if it will do

In [None]:
df.head(15)

In [None]:
### 23. Top Visited websites

In [None]:
indicators[22]

# load data
indicator = indicators[22]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
### 24. Top YouTube Searches

In [None]:
indicators[23]

# load data
indicator = indicators[23]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
### 25. Top Google searches

In [None]:
indicators[24]

# load data
indicator = indicators[24]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
### 26. Internet Usage

In [None]:
indicators[25]

# load data
indicator = indicators[25]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2019_value'] 
df['Year'] = 2019
df['Country Name'] = df['ShortName']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# convert 1-190 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row, old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 27. Households with a computer and with Internet Access

In [None]:
indicators[26]

# load data
indicator = indicators[26]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# have to replace 'No data' with blank

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['2016'] 
df['Year'] = 2016
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 28. % of population using Facebook

In [None]:
indicators[27]

# load data
indicator = indicators[27]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Percentage of Facebook Users'] 
df['Year'] = 2021
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 29. Gender gap for social media use

In [None]:
indicators[28]

# load data
indicator = indicators[28]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df = df[df.Year==2019]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Gender gap in social media use'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 30. % of population using digital financial services

In [None]:
indicators[29]

# load data
indicator = indicators[29]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# May have to convert the data to csv file as the original file is missing many important columns

In [None]:
### 31. Mobile Broadband Pricing (pre-paid)

In [None]:
indicators[30]

# load data
indicator = indicators[30]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# File not found despite its presence in the personal repo and the data manifest

In [None]:
### 32. Tax as % of total cost of mobile ownership

In [None]:
indicators[31]

# load data
indicator = indicators[31]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df = df[df.Year==2019]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = False
df['Indicator'] = indicator
df['data_col'] = df['Tax as a % of TCMO'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# invert since to put as higher is  better
df['new_rank_score'] = df['new_rank_score'].apply(lambda row: (6-row)+1)

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 33. % of population with a SIM card

In [None]:
indicators[32]

# load data
indicator = indicators[32]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

# File not found despite its presence in the personal repo and the data manifest

In [None]:
### 34. % of population with a smartphone

In [None]:
indicators[33]

# load data
indicator = indicators[33]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Smartphone Penetration Rate'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

# Must delete the % mark in the fourth column

In [None]:
### 35. Gender gap in internet usage

In [None]:
indicators[34]

# load data
indicator = indicators[34]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

# The problem with the data base is that it has ... in the Male and Female columns, making it string columns, not numerical
# This will present problems when trying to calculate ranking

In [None]:
### 36. Gender gap in mobile usage

In [None]:
indicators[35]

# load data
indicator = indicators[35]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))

In [None]:
df.head(15)

In [None]:
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Gender gap in mobile ownership'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 37. Mobile Device Penetration

In [None]:
indicators[36]

# load data
indicator = indicators[36]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


In [None]:
df.head(15)

In [None]:
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Mobile Ownership'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)

In [None]:
### 38. Mobile Device Penetration (female)

In [None]:
indicators[37]

# load data
indicator = indicators[37]
print(indicator)
bf = bnames[bnames['Indicator']==indicator]['Filename'].values[0]
print(bf)

df = pd.read_csv('../../processed/{}.csv'.format(bf))


In [None]:
df.head(15)

In [None]:
df = df[(df.Year==2019)]

# create standard columns
# df.rename(columns={'COUNTRY/ECONOMY':'Country Name'}, inplace=True)
df['higher_is_better'] = True
df['Indicator'] = indicator
df['data_col'] = df['Gender gap in mobile ownership'] 
df['Country Name'] = df['Country']

min_rank = df['data_col'].min()
max_rank = df['data_col'].max()

# transform 0-1 rank into 1-6
df['new_rank_score'] = df['data_col'].apply(lambda row: convert_rank(row,old_min=min_rank,old_max=max_rank))

In [None]:
df[['Country Name','Year','Indicator','data_col','new_rank_score','higher_is_better']].head(15)