# Demography indicators 
tp = 'SP.POP.TOTL'  
up = 'SP.URB.TOTL.IN.ZS'  
rp = 'SP.RUR.TOTL.ZS'  
le = 'SP.DYN.LE00.IN'    
im = 'SP.DYN.IMRT.IN'    
pd = 'EN.POP.DNST'  
ph = 'SI.POV.NAHC'   
pg = 'SP.POP.GROW'  
fr = 'SP.DYN.TFRT.IN'  
gdp = 'NY.GDP.PCAP.PP.KD'  

In [53]:
import requests
import pandas as pd

def download_worldbank(indicator, countries, date_start, date_end):
    url_base = 'http://api.worldbank.org/v2/'  # Base URL for the World Bank API
    country_codes = ';'.join(countries)  # Combine country codes into a string
    url = url_base + f'country/{country_codes}/indicator/{indicator}?per_page=30000&format=xml'

    response = requests.get(url)
    df = pd.read_xml(response.content)
    return df

In [54]:
# INDICATORS
indicators = [
    'SP.POP.TOTL',  # Total population
    'SP.URB.TOTL.IN.ZS',  # Urban population (% of total population)
    'SP.RUR.TOTL.ZS',  # Rural population (% of total population)
    'SP.DYN.LE00.IN',  # Life expectancy at birth (total years)
    'SP.DYN.IMRT.IN',  # Infant mortality rate (per 1,000 live births)
    'EN.POP.DNST',  # Population density (people per sq. km of land area)
    'SI.POV.NAHC',  # Poverty headcount ratio at national poverty lines (% of population)
    'SP.POP.GROW',  # Population growth rate (% annual)
    'SP.DYN.TFRT.IN',  # Total fertility rate (births per woman)
    'NY.GDP.PCAP.PP.KD',  # GDP per capita (constant 2011 PPP $)
]

# ASEAN VARIABLES
asean_variables = {}
sa_variables = {}

for i, x in enumerate(indicators):
    asean_variables[x] = download_worldbank(
        indicator=x,
        countries=asean_list,
        date_start='2011',
        date_end='2024',
    )

for i, x in enumerate(indicators):
    sa_variables[x] = download_worldbank(
        indicator=x,
        countries=south_asia_list,
        date_start='2011',
        date_end='2024',
    )

# Now you can access the results like this:
for indicator, df in asean_variables.items():
    print(f"Indicator: {indicator}")
    print(df.head(2))
    print()

for indicator, df in sa_variables.items():
    print(f"Indicator: {indicator}")
    print(df.head(2))
    print()

Indicator: SP.POP.TOTL
           indicator            country countryiso3code  date     value  unit  \
0  Population, total  Brunei Darussalam             BRN  2024       NaN   NaN   
1  Population, total  Brunei Darussalam             BRN  2023  458949.0   NaN   

   obs_status  decimal  
0         NaN        0  
1         NaN        0  

Indicator: SP.URB.TOTL.IN.ZS
                                  indicator            country  \
0  Urban population (% of total population)  Brunei Darussalam   
1  Urban population (% of total population)  Brunei Darussalam   

  countryiso3code  date   value  unit  obs_status  decimal  
0             BRN  2024     NaN   NaN         NaN        0  
1             BRN  2023  79.149   NaN         NaN        0  

Indicator: SP.RUR.TOTL.ZS
                                  indicator            country  \
0  Rural population (% of total population)  Brunei Darussalam   
1  Rural population (% of total population)  Brunei Darussalam   

  countryiso3code  d

In [56]:
# Save the ASEAN dataset as CSV
for indicator, df in asean_variables.items():
    df.to_csv(f'../data/raw/asean_{indicator}.csv', index=False)

# Save the South Asia dataset as CSV
for indicator, df in sa_variables.items():
    df.to_csv(f'../data/raw/south_asia_{indicator}.csv', index=False)

In [None]:
import pandas as pd

# Read all the CSV files
asean_dfs = []
for indicator, df in asean_variables.items():
    df['Region'] = 'ASEAN'
    asean_dfs.append(df)

sa_dfs = []
for indicator, df in sa_variables.items():
    df['Region'] = 'South Asia'
    sa_dfs.append(df)

# Concatenate the dataframes
all_dfs = asean_dfs + sa_dfs
df = pd.concat(all_dfs, ignore_index=True)

# Save the merged dataframe as a CSV file
df.to_csv('../data/processed/merged_asean_sa_data.csv', index=False)


In [67]:
import pandas as pd

# Read the merged CSV file
df = pd.read_csv('../data/processed/merged_asean_sa_data.csv')

# Print the first 2 rows of the dataframe
print(df.head(10))

           indicator            country countryiso3code  date     value  unit  \
0  Population, total  Brunei Darussalam             BRN  2024       NaN   NaN   
1  Population, total  Brunei Darussalam             BRN  2023  458949.0   NaN   
2  Population, total  Brunei Darussalam             BRN  2022  455370.0   NaN   
3  Population, total  Brunei Darussalam             BRN  2021  451721.0   NaN   
4  Population, total  Brunei Darussalam             BRN  2020  447404.0   NaN   
5  Population, total  Brunei Darussalam             BRN  2019  442680.0   NaN   
6  Population, total  Brunei Darussalam             BRN  2018  437810.0   NaN   
7  Population, total  Brunei Darussalam             BRN  2017  432772.0   NaN   
8  Population, total  Brunei Darussalam             BRN  2016  427564.0   NaN   
9  Population, total  Brunei Darussalam             BRN  2015  422212.0   NaN   

   obs_status  decimal Region  
0         NaN        0  ASEAN  
1         NaN        0  ASEAN  
2         Na

In [73]:
# set index 
df_setindex = df.set_index(['country', 'date'])
df_setindex.head(10)



Unnamed: 0_level_0,Unnamed: 1_level_0,indicator,countryiso3code,value,unit,obs_status,decimal,Region
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Brunei Darussalam,2024,"Population, total",BRN,,,,0,ASEAN
Brunei Darussalam,2023,"Population, total",BRN,458949.0,,,0,ASEAN
Brunei Darussalam,2022,"Population, total",BRN,455370.0,,,0,ASEAN
Brunei Darussalam,2021,"Population, total",BRN,451721.0,,,0,ASEAN
Brunei Darussalam,2020,"Population, total",BRN,447404.0,,,0,ASEAN
Brunei Darussalam,2019,"Population, total",BRN,442680.0,,,0,ASEAN
Brunei Darussalam,2018,"Population, total",BRN,437810.0,,,0,ASEAN
Brunei Darussalam,2017,"Population, total",BRN,432772.0,,,0,ASEAN
Brunei Darussalam,2016,"Population, total",BRN,427564.0,,,0,ASEAN
Brunei Darussalam,2015,"Population, total",BRN,422212.0,,,0,ASEAN


In [None]:
df_setindex.to_csv('../data/processed/merged_asean_sa_data.csv', index=False)