# Population Statistics

## Imports

The following imports are required for the function to execute data retreival!

In [67]:
%pip install wbdata
%pip install pandas
%pip install iso3166

import wbdata
import pandas as pd
import math
import iso3166

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


## Population Statistics Function

Given any year, sex, age range, and country, the following function will retrieve the necessary data and display it as a pandas dataframe.

Calling the function should be something like: **population(year, sex, age_range, location)**

Thought process of how the function works is commented

In [194]:
def population(year, sex, age_range, location):

    # Checks to see if location is World or WLD, otherwise, get alpha3 name and country name
    if location.lower() == "world" or location.lower() == "wld":
        alpha3_name = "WLD"
        real_name = "World"
    else:
        alpha3_name = iso3166.countries.get(location).alpha3                        
        real_name = iso3166.countries.get(location).apolitical_name

    def age_helper(age_range):
        
        range = []
        
        # Rounds age up/down to fit within the age range of datasets
        lower_bound = 5 * math.floor(min(age_range)/5)
        upper_bound = 5 * round(max(age_range)/5)
        
        # Loops for every dataframe that contains lower_bound and lower_bound + 4 (example: {"6064"})
        # The upper_bound + 1 counters the rounding down mechanism and ensures that the age ranges are within the dataframe
        while lower_bound < min(79, upper_bound + 1):
            range.append("{:02}{:02}".format(lower_bound, lower_bound + 4))
            lower_bound += 5

        # Adds the ID "80UP" to range when the upper_bound is greater than 80
        if upper_bound >= 80:
            range.append("80UP")

        return range
        
    # Filters sex parameter into a specific ID to access
    if sex.lower() == "male":
        sex = "MA"
    elif sex.lower() == "female":
        sex = "FE"
    else:
        print("Please enter 'Male' or 'Female'")
        
    indicatorLst = []
    ageLst = []
    keyLst = []
    finalDt = {}
    for i in age_helper(age_range):
        df_input = "SP.POP.{}.{}".format(i, sex)
        ageLst.append("{}-{}".format(i[:2], i[2:]))
        indicatorLst.append(df_input)

    for age, indicator in zip(ageLst, indicatorLst):
        if "MA" in indicator:
            keyLst.append("Population ages {}, male".format(age))
        else:
            keyLst.append("Population ages {}, female".format(age))

    for key, value in zip(indicatorLst, keyLst):
        finalDt[key] = value

    # Dataframe retrieval and compiles all indicators within finalDt given the year, age range, and population
    wb_dataframe = wbdata.get_dataframe(finalDt, country=alpha3_name, source=40)
    data = wb_dataframe.query("date=='{}'".format(year)).sum(axis=0).tolist() # returns list of population for each year
    total_population = sum(data) # calculates total population
    final_dataframe = pd.DataFrame({'Country': real_name, 'Year': year, 'Age Range': ageLst, 'Population': data})

    # I originally had the function return the dataframe before, but the unit testing required checking for accurate population assertion statements
    print(final_dataframe)
    return total_population

population(year=2002, sex='Male', age_range=(0, 200), location='China')

   Country  Year Age Range  Population
0    China  2002     00-04  43912190.0
1    China  2002     05-09  48413442.0
2    China  2002     10-14  63435759.0
3    China  2002     15-19  58533916.0
4    China  2002     20-24  51100603.0
5    China  2002     25-29  55383221.0
6    China  2002     30-34  65505826.0
7    China  2002     35-39  61719463.0
8    China  2002     40-44  38781702.0
9    China  2002     45-49  44581982.0
10   China  2002     50-54  35131636.0
11   China  2002     55-59  24567438.0
12   China  2002     60-64  20733525.0
13   China  2002     65-69  17669786.0
14   China  2002     70-74  12487584.0
15   China  2002     75-79   7704976.0
16   China  2002     80-UP   5201959.0


654865008.0

## Unit Test

These are some assertion statements for testing the total population of certain inputs. 

If allthe test cases pass, the final output should be "All test cases have passed!"

While this is not an exhaustive list of assertion and edge cases, it handles the main functionality of what the project requires.

In [196]:
def population_testing():
    
    assert population(year=2000, sex='Male', age_range=(0, 100), location='WLD') > 0, "Population should be greater than 0."
    assert population(year=2000, sex='Female', age_range=(0, 100), location='WLD') < 7e9, "Population should be less than 7 billion."
    assert population(year=1966, sex='Male', age_range=(18, 26), location='USA') < 21e6, "Population should be less than 21 million."
    assert population(year=1966, sex='Female', age_range=(18, 26), location='USA') > 21e6, "Population should be greater than 21 million."
    assert population(year=1966, sex='Female', age_range=(3, 89), location='Australia') < 6e6, "Population should be less than 6 million."
    assert population(year=2002, sex='Male', age_range=(0, 200), location='China') > 6e7, "Population should be greater than 600 million."
    
    print("All test cases have passed!")

population_testing()

   Country  Year Age Range   Population
0    World  2000     00-04  319947454.0
1    World  2000     05-09  317085952.0
2    World  2000     10-14  319870427.0
3    World  2000     15-19  293445532.0
4    World  2000     20-24  263094592.0
5    World  2000     25-29  256458181.0
6    World  2000     30-34  241585037.0
7    World  2000     35-39  216703667.0
8    World  2000     40-44  187436101.0
9    World  2000     45-49  166501073.0
10   World  2000     50-54  131738554.0
11   World  2000     55-59  102296144.0
12   World  2000     60-64   90035292.0
13   World  2000     65-69   71463003.0
14   World  2000     70-74   53316050.0
15   World  2000     75-79   32840343.0
16   World  2000     80-UP   25022711.0
   Country  Year Age Range   Population
0    World  2000     00-04  299176185.0
1    World  2000     05-09  297917803.0
2    World  2000     10-14  302850456.0
3    World  2000     15-19  280015254.0
4    World  2000     20-24  252662959.0
5    World  2000     25-29  248149260.0
