In [1]:
import os
import pandas as pd
import json
from collections import defaultdict

# Input/output paths
POPULATION_FOLDER = ''
OUTPUT_FILE = 'population_2022_LSOA21.json'

# File definitions
POPULATION_FILES = [
    'population_male_aged_0_to_15.csv',
    'population_male_aged_16_to_24.csv',
    'population_male_aged_25_to_49.csv',
    'population_male_aged_50_to_64.csv',
    'population_male_aged_65+.csv',
    'population_female_aged_0_to_15.csv',
    'population_female_aged_16_to_24.csv',
    'population_female_aged_25_to_49.csv',
    'population_female_aged_50_to_64.csv',
    'population_female_aged_65+.csv'
]

AGE_GROUPS = ['0_to_15', '16_to_24', '25_to_49', '50_to_64', '65+']
YEAR = '2022'

# Storage
lsoa_values = defaultdict(lambda: [0] * 18)

# Process each file
for file in POPULATION_FILES:
    file_path = os.path.join(POPULATION_FOLDER, file)
    df = pd.read_csv(file_path)
    df = df[['mnemonic', YEAR]]

    # Parse stratum
    parts = file.replace('.csv', '').split('_')
    sex = parts[1]  # 'male' or 'female'
    age = parts[-1] if '+' in parts[-1] else '_'.join(parts[-3:])
    sex_idx = 1 if sex == 'male' else 2
    age_idx = AGE_GROUPS.index(age)
    stratum_idx = POPULATION_FILES.index(file)

    for _, row in df.iterrows():
        lsoa = row['mnemonic']
        val = row[YEAR]
        if pd.isna(val):
            continue
        val = int(val)

        # Update lv3 (10 strata: index 8â€“17)
        lsoa_values[lsoa][8 + stratum_idx] += val

        # Update lv1 and lv2:
        lsoa_values[lsoa][0] += val                       # total
        lsoa_values[lsoa][sex_idx] += val                 # male or female
        lsoa_values[lsoa][3 + age_idx] += val             # age group

# Create output dict
output_json = {}
for lsoa, values in lsoa_values.items():
    output_json[lsoa] = {
        'population_lv1': values[0:1],       # total
        'population_lv2': values[:8],        # total + male + female + 5 age groups
        'population_lv3': values             # lv2 + 10 strata
    }

# Save
with open(OUTPUT_FILE, 'w') as f:
    json.dump(output_json, f, indent=2)

In [1]:
import json

with open("lsoa21_population_2022_normalized.json", "r") as f:
    data = json.load(f)

print("Number of LSOA21:", len(data))

Number of LSOA21: 35672
