In [1]:
#!/usr/bin/env python3
"""
Merge England and Wales LSOA21 population projection JSONs into GB-wide JSONs.

Assumptions:
- You already generated:
    England JSONs (4 normalized + 4 raw)
    Wales   JSONs (4 normalized + 4 raw)
- Filenames are identical EXCEPT Wales versions have a suffix "_Wales"
  before ".json".

Example:
  population_LSOA21_2030_normalized.json
  population_LSOA21_2030_normalized_Wales.json

Output:
- 8 merged JSONs (England + Wales)
- Saved in the *parent directory* of JSON_DIR
- Output filenames match the England-only filenames exactly

Safety:
- Fails if an LSOA code appears in both England and Wales files
"""

import json
from pathlib import Path

# ----------------------------- PATHS -----------------------------
JSON_DIR = Path("lsoa21_population_projections_json")  # existing folder
OUT_DIR = JSON_DIR.parent                              # parent directory

YEARS = [2030, 2035, 2040, 2045]

# Two variants to merge
VARIANTS = [
    ("population_LSOA21_{year}_normalized.json",
     "population_LSOA21_{year}_normalized_Wales.json"),
    ("population_LSOA21_{year}.json",
     "population_LSOA21_{year}_Wales.json"),
]

def load_json(path: Path) -> dict:
    with open(path, "r") as f:
        return json.load(f)

def save_json(obj: dict, path: Path):
    with open(path, "w") as f:
        json.dump(obj, f, indent=2)

def main():
    for year in YEARS:
        for eng_tpl, wal_tpl in VARIANTS:
            eng_path = JSON_DIR / eng_tpl.format(year=year)
            wal_path = JSON_DIR / wal_tpl.format(year=year)

            if not eng_path.exists():
                raise FileNotFoundError(f"Missing England file: {eng_path}")
            if not wal_path.exists():
                raise FileNotFoundError(f"Missing Wales file: {wal_path}")

            eng = load_json(eng_path)
            wal = load_json(wal_path)

            # safety: no overlapping LSOAs
            overlap = set(eng.keys()) & set(wal.keys())
            if overlap:
                raise RuntimeError(
                    f"Found {len(overlap)} overlapping LSOA codes between "
                    f"{eng_path.name} and {wal_path.name}"
                )

            merged = {**eng, **wal}

            out_path = OUT_DIR / eng_tpl.format(year=year)
            save_json(merged, out_path)

            print(
                f"[OK] Merged {year}: "
                f"{len(eng):,} (ENG) + {len(wal):,} (WAL) "
                f"= {len(merged):,} -> {out_path}"
            )

    print("All merges completed successfully.")

if __name__ == "__main__":
    main()

[OK] Merged 2030: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2030_normalized.json
[OK] Merged 2030: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2030.json
[OK] Merged 2035: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2035_normalized.json
[OK] Merged 2035: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2035.json
[OK] Merged 2040: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2040_normalized.json
[OK] Merged 2040: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2040.json
[OK] Merged 2045: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2045_normalized.json
[OK] Merged 2045: 33,755 (ENG) + 1,917 (WAL) = 35,672 -> population_LSOA21_2045.json
All merges completed successfully.


In [2]:
#!/usr/bin/env python3
"""
Rename England-only LSOA projection JSONs by appending `_England`
to all filenames in the JSON directory.

Example:
  population_LSOA21_2030_normalized.json
  -> population_LSOA21_2030_normalized_England.json

Safeguards:
- Skips files already containing `_Wales` or `_England`
- Fails if target filename already exists
"""

from pathlib import Path

JSON_DIR = Path("lsoa21_population_projections_json")

def main():
    if not JSON_DIR.exists():
        raise FileNotFoundError(f"Directory not found: {JSON_DIR}")

    renamed = 0

    for p in JSON_DIR.glob("*.json"):
        name = p.name

        # Skip Wales or already-renamed files
        if name.endswith("_Wales.json") or name.endswith("_England.json"):
            continue

        # Insert _England before .json
        new_name = name.replace(".json", "_England.json")
        new_path = p.with_name(new_name)

        if new_path.exists():
            raise RuntimeError(f"Target file already exists: {new_path}")

        p.rename(new_path)
        renamed += 1
        print(f"[OK] {name} -> {new_name}")

    print(f"\nDone. Renamed {renamed} England JSON files.")

if __name__ == "__main__":
    main()

[OK] population_LSOA21_2030.json -> population_LSOA21_2030_England.json
[OK] population_LSOA21_2030_normalized.json -> population_LSOA21_2030_normalized_England.json
[OK] population_LSOA21_2035.json -> population_LSOA21_2035_England.json
[OK] population_LSOA21_2035_normalized.json -> population_LSOA21_2035_normalized_England.json
[OK] population_LSOA21_2040.json -> population_LSOA21_2040_England.json
[OK] population_LSOA21_2040_normalized.json -> population_LSOA21_2040_normalized_England.json
[OK] population_LSOA21_2045.json -> population_LSOA21_2045_England.json
[OK] population_LSOA21_2045_normalized.json -> population_LSOA21_2045_normalized_England.json

Done. Renamed 8 England JSON files.


In [3]:
#!/usr/bin/env python3
"""
Print number of LSOAs in each final projection JSON file.
"""

import json
from pathlib import Path

# directory where the final 8 json files live
JSON_DIR = Path("")

def main():
    files = sorted(JSON_DIR.glob("*.json"))
    if not files:
        raise RuntimeError(f"No JSON files found in {JSON_DIR}")

    print(f"Found {len(files)} JSON files\n")

    for p in files:
        with open(p, "r") as f:
            data = json.load(f)
        print(f"{p.name:55s}  ->  LSOAs = {len(data):,}")

if __name__ == "__main__":
    main()

Found 8 JSON files

population_LSOA21_2030.json                              ->  LSOAs = 35,672
population_LSOA21_2030_normalized.json                   ->  LSOAs = 35,672
population_LSOA21_2035.json                              ->  LSOAs = 35,672
population_LSOA21_2035_normalized.json                   ->  LSOAs = 35,672
population_LSOA21_2040.json                              ->  LSOAs = 35,672
population_LSOA21_2040_normalized.json                   ->  LSOAs = 35,672
population_LSOA21_2045.json                              ->  LSOAs = 35,672
population_LSOA21_2045_normalized.json                   ->  LSOAs = 35,672
