In [23]:
#1. [A] Population Statistics
import requests
import re

WB_BASE = "https://api.worldbank.org/v2"


class PopulationDataError(ValueError):
    pass


def _wb_get_value(place, indicator, year):
    place = place.strip().upper()
    indicator = indicator.strip()
    year = int(year)

    url = f"{WB_BASE}/country/{place}/indicator/{indicator}"
    params = {"format": "json", "date": str(year), "per_page": 20000}

    r = requests.get(url, params=params, timeout=30)
    r.raise_for_status()
    data = r.json()

    if not isinstance(data, list) or len(data) < 2 or not data[1]:
        return None

    val = data[1][0].get("value", None)
    return None if val is None else float(val)


def _age_group_indicator(low, high, sex):
    suffix = "MA" if sex == "males" else "FE"
    return f"SP.POP.{low:02d}{high:02d}.{suffix}"


def _age_80_plus_indicator(sex):
    return "SP.POP.80UP.MA" if sex == "males" else "SP.POP.80UP.FE"


def population(year, sex, age_range, place):
    """
    Returns the population count needed to answer:

    In [year] how many [people/males/females] aged [low] to [high]
    were living in [the world/region/country]?
    """
    if not isinstance(year, int):
        raise PopulationDataError("year must be an int")

    if not isinstance(age_range, tuple) or len(age_range) != 2:
        raise PopulationDataError("age_range must be (low, high)")

    low, high = age_range
    if not (isinstance(low, int) and isinstance(high, int)):
        raise PopulationDataError("age_range values must be ints")

    if low < 0 or high < 0 or high < low:
        raise PopulationDataError("age_range must satisfy 0 <= low <= high")

    sex = sex.strip().lower()
    if sex not in ("people", "males", "females"):
        raise PopulationDataError("sex must be 'people', 'males', or 'females'")

    place = place.strip().upper()
    if not place:
        raise PopulationDataError("place must be a World Bank code like 'JPN' or 'WLD'")

    def one_sex_total(one_sex):
        total = 0.0

        if high >= 80:
            if low > 80:
                raise PopulationDataError("low > 80 not supported (80+ is a single bin)")
            ind80 = _age_80_plus_indicator(one_sex)
            v80 = _wb_get_value(place, ind80, year)
            if v80 is None:
                raise PopulationDataError(f"missing data: {place}, {year}, {ind80}")
            total += v80
            high_effective = 79
        else:
            high_effective = high

        for start in range(0, 80, 5):
            end = start + 4

            overlap_low = max(low, start)
            overlap_high = min(high_effective, end)
            if overlap_low > overlap_high:
                continue

            ind = _age_group_indicator(start, end, one_sex)
            bin_value = _wb_get_value(place, ind, year)
            if bin_value is None:
                raise PopulationDataError(f"missing data: {place}, {year}, {ind}")

            years_covered = overlap_high - overlap_low + 1
            total += bin_value * (years_covered / 5.0)

        return total

    if sex == "people":
        total_value = one_sex_total("males") + one_sex_total("females")
    else:
        total_value = one_sex_total(sex)

    return int(round(total_value))


PLACE_MAP = {
    "japan": "JPN",
    "world": "WLD",
}


def population_from_query(query):
    """
    Takes a query of the form:

    In [year] how many [people/males/females] aged [low] to [high]
    were living in [the world/region/country]?

    and returns the population count.
    """
    parts = re.findall(r"\[(.*?)\]", query, flags=re.DOTALL)

    if len(parts) != 5:
        raise ValueError(
            "Query must have 5 bracketed fields: [year], [sex], [low], [high], [place]"
        )

    year = int(parts[0].strip())
    sex = parts[1].strip().lower()
    low = int(parts[2].strip())
    high = int(parts[3].strip())

    place_raw = parts[4].strip().lower()
    place_raw = re.sub(r"\s+", " ", place_raw)

    if place_raw in PLACE_MAP:
        place = PLACE_MAP[place_raw]
    else:
        place = place_raw.upper()

    return population(year, sex, (low, high), place)
def ask(query):
    print(population_from_query(query))

In [24]:
ask("In [2004] how many [people] aged [12] to [30] were living in [Japan]?")

28319362


In [2]:
#2. [A] Unit tests
def run_tests():
    jp_2004 = population(2004, "people", (0, 79), "JPN")
    assert jp_2004 > 50_000_000, "Japan 2004 (0-79) should be > 50M"

    wld_2004 = population(2004, "people", (0, 79), "WLD")
    assert wld_2004 > 5_000_000_000, "World 2004 (0-79) should be > 5B"

    year = 2004
    place = "JPN"
    ages = (12, 30)
    m = population(year, "males", ages, place)
    f = population(year, "females", ages, place)
    p = population(year, "people", ages, place)
    assert abs((m + f) - p) <= 5, "people should equal males+females (rounding tolerance)"

    small = population(2004, "people", (12, 20), "JPN")
    big = population(2004, "people", (12, 30), "JPN")
    assert big >= small, "Expanding age range should not decrease population"

    clean = population(2004, "people", (12, 30), "JPN")
    messy = population(2004, "people", (12, 30), "  jpn  ")
    assert clean == messy, "Place code should be case/whitespace insensitive"

    q = "In [2004] how many [people] aged [12] to [30] were living in [Japan]?"
    parsed = population_from_query(q)
    direct = population(2004, "people", (12, 30), "JPN")
    assert parsed == direct, "Query parser should match direct function call"

    try:
        population(2004, "men", (12, 30), "JPN")
        assert False, "Expected error for invalid sex"
    except Exception:
        pass

    try:
        population(2004, "people", (30, 12), "JPN")
        assert False, "Expected error for reversed age_range"
    except Exception:
        pass

    a = population(2004, "people", (80, 80), "JPN")   # 80+ bin
    b = population(2004, "people", (0, 80), "JPN")    # includes 80+
    assert b >= a, "(0,80) should be >= (80,80)"

    print("All tests passed!")