# Population Dataframe 

The code below includes a function that returns a pandas Dataframe indexed by Region, or Country and Year, with columns giving counts of people in different age-sex groups

### Importing Packages

In [None]:
%pip install wbdata
%pip install pandas
%pip install iso3166

import wbdata
import pandas as pd
import math
import iso3166

All data is sources from the World Bank Data API. To access and explore data, you can use wbdata.get_topic(), wbdata.get_indicator or wbdata.get_source

For population estimates data, we need to access **Source 40:** 

In [30]:
source_id = 40 #Population Estimates and Projections

Now, we can define a function that will allow us to create a dataframe indexed by country, year, age group and sex 

the population ranges from the data set include :00-04, 05-09, 10-14, 15-19, 20-24, 25-29, 30-34, ..., 70-74 

In [20]:
def pop_df(year='2023', group='all', age_lower=0, age_upper=100, location='world'):
    df = pop_df_helper(year, age_lower, age_upper, location)

    #Creates distinct columns grouped by sex      
    if group == 'males':
        return df.drop(columns = ['Female'])
    elif group == 'females':
        return df.drop(columns = ['Male'])
    else:
        total_pop = df["Female"] + df["Male"]
        df["Total"] = total_pop
    return df

# Returns a list of input strings for population age ranges
def pop_df_helper(year, age_lower, age_upper, location):
    if location != 'world':
        country_alpha3 = iso3166.countries.get(location).alpha3
        country_name = iso3166.countries.get(location).apolitical_name
    else:
        country_alpha3 = 'WLD'
        country_name = 'World'
    inputs = list_of_age_inputs(age_lower, age_upper)

    #create indicator dictionaries required for the dataset's API
    indicator_dict_m = {}
    indicator_dict_f = {}
    for i in inputs:
        key_m = "SP.POP.{}.{}".format(i, 'MA')
        key_f = "SP.POP.{}.{}".format(i, 'FE')
        value = "{}-{}".format(i[:2], i[2:])
        indicator_dict_m[key_m] = value
        indicator_dict_f[key_f] = value

    source_id = 40
    wbdf_m = wbdata.get_dataframe(indicator_dict_m, country=country_alpha3, source=source_id)
    wbdf_f = wbdata.get_dataframe(indicator_dict_f, country=country_alpha3, source=source_id)
    datas_m = wbdf_m.query("date=='{}'".format(year)).sum(axis=0).tolist()
    datas_f = wbdf_f.query("date=='{}'".format(year)).sum(axis=0).tolist()
    df = pd.DataFrame({
        'Country': country_name,
        'Year': year,
        'Age': list(indicator_dict_m.values()),
        'Female': datas_f,
        'Male': datas_m
    })
    return df

# Returns a list of input strings for population age ranges
def list_of_age_inputs(age_lower, age_upper):
    def round_down(n):
        return max(0, n - n % 5)

    def round_up(n):
        return n - n % 5 + 5

    results = []
    r_lower_bound, r_upper_bound = round_down(age_lower), round_up(age_upper)   
    while r_lower_bound < min(79, r_upper_bound):
        results.append("{:02d}{:02d}".format(r_lower_bound, r_lower_bound + 4))
        r_lower_bound += 5
    if age_upper >= 80:
        results.append('80UP')
    return results

## Example call

In [32]:
year = 2017 #1960 to 2050 (projections)
group = 'all' #males/females/all
age_lower = 13 #rounds to nearest 5 (actual range: 10 to 65)
age_upper = 61
location = 'bangladesh' #accepts country names/abbreviations, 'world

In [33]:
df = pop_df(year, group, age_lower, age_upper, location)
df

Unnamed: 0,Country,Year,Age,Female,Male,Total
0,Bangladesh,2017,10-14,8225272.0,8483492.0,16708764.0
1,Bangladesh,2017,15-19,8211225.0,8447567.0,16658792.0
2,Bangladesh,2017,20-24,7591111.0,7723966.0,15315077.0
3,Bangladesh,2017,25-29,7149808.0,7148678.0,14298486.0
4,Bangladesh,2017,30-34,6663791.0,6318060.0,12981851.0
5,Bangladesh,2017,35-39,6026943.0,5472318.0,11499261.0
6,Bangladesh,2017,40-44,5263200.0,4749203.0,10012403.0
7,Bangladesh,2017,45-49,4437729.0,4192227.0,8629956.0
8,Bangladesh,2017,50-54,3491474.0,3445787.0,6937261.0
9,Bangladesh,2017,55-59,3005996.0,2862495.0,5868491.0
