In [7]:
# Creating the postcode centroid file
# I'm importing argparse so I can use it to sync devices from cloud APIs, ingest the postcode centroids, and fill any missing historicla data if required.
# Importing logging to make debugging easier as it'll tell me where the issue is, for example which API link may be causing the issue.
import os
import sys
import time
import json
import argparse
import logging

# Importing requests so that my postcode centroid URL can be fetched and used.
# SQLAlchemy helps connect me to TimeScaleDB for the dashboard and automatically reconnects to Render.
import requests
from sqlalchemy import create_engine, text

In [20]:
import requests

postcode_centroid_URL = "https://services1.arcgis.com/ESMARspQHYMw9BZ9/arcgis/rest/services/ONSPD_LATEST_UK/FeatureServer/1"

inner_london_lad_codes = [
    "E09000001","E09000007","E09000012","E09000013","E09000019","E09000020",
    "E09000022","E09000023","E09000025","E09000028","E09000030","E09000032","E09000033"
]

def normalisation(postcode):
    p = postcode.strip().upper().replace(" ", "")
    if len(p) > 3:
        p = p[:-3] + " " + p[-3:]
    return p

def build_where_inner_london():
    # LAD25CD IN ('E09000001', 'E09000007', ...)
    codes = ",".join([f"'{c}'" for c in inner_london_lad_codes])
    return f"LAD25CD IN ({codes})"

# Since I know the max record count per page is 2000 and features of the URL from the directory
# Creating a function so that I can later call it to create pages of london centroid postcodes with the chosen features.
def query_page(postcode_centroid_URL,where,page_size,session,timeout = 120, retries = 4):
  parameters = {
      "where": where,
      "outFields": "PCDS,LONG,LAT,OBJECTID,LAD25CD",
      "f": "json",
      "resultRecordCount": page_size,
      "orderByFields":"OBJECTID",
      "returnGeometry":"false"
      }
  # Adding the /query to the URL so that I can extract all the features required.
  query_response = postcode_centroid_URL+ "/query"


  # Creating a for loop in the event that the URL does not work the first time it will retry 4 times before saying it really won't work
  for attempt in range(retries):
    try:
      # If the call works then it will get all the features stated in parameters and put them in a list for me
      r = session.get(query_response, params = parameters, timeout = timeout)
      r.raise_for_status()
      data = r.json()

      # If there is an error then it'll tell me the error so I can debug/fix it
      if "error" in data:
        raise RuntimeError(data["error"])

      # This part gathers all the features: "PCDS,LONG,LAT,OBJECTID,LAD25CD" and will put all its attributes in a list
      features = data.get("features", [])
      return [f.get("attributes",{}) for f in features]

    # This part is so that instead of immediately crashing, it will wait 1.5s intervals and try again
    except Exception:
      #small wait, then retries
      time.sleep(1.5 * (attempt + 1))

  # If it still fails
  raise RuntimeError("Failed to fetch data")

# I am now calling all the inner London postcode centroids into pages through paginiation

session = requests.Session()
last_objectid = 0
page_size = 2000
all_london_rows = []

while True:
  where = build_where_inner_london() + f" AND OBJECTID > {last_objectid}"
  page = query_page(
      postcode_centroid_URL,
      where = where,
      page_size = page_size,
      session = session
  )
  print(f"Last OBJECTID: {last_objectid}, got {len(page)} records")

  if not page or len(page) == 0:
    print("Breaking - empty page")
    break

  # Making sure the loop continues until there are no more postcodes
  all_london_rows.extend(page)

  new_last = page[-1].get("OBJECTID")
  if new_last is None or new_last <= last_objectid:
      print("Breaking - OBJECTID did not advance (pagination issue)")
      break
  last_objectid = new_last

  # Stop if we got less than a full page (means we're at the end)
  if len(page) < page_size:
    print(f"Breaking - partial page ({len(page)} records)")
    break

print("All London postcode centroids have been fetched. The total number of rows are", len(all_london_rows))

# Doing a mini EDA to ensure there are no duplicate postcodes
cleaned_list = []
added_pcds = set()

for row in all_london_rows:
  pcds = row.get("PCDS")
  lat = row.get("LAT")
  long = row.get("LONG")
  # This removes any of the objectid completely if it doesn't contain all the features req
  if pcds is None or lat is None or long is None:
    continue
  # Calling the normalisation function created earlier
  pcds_norm = normalisation(str(pcds))
  if pcds_norm in added_pcds:
    continue

  added_pcds.add(pcds_norm)
  cleaned_list.append({"Postcodes": pcds_norm, "Latitude": float(lat), "Longitude": float(long)})


print("These are the unique postcodes of London", len(cleaned_list))

# Saving it in a csv
with open("innerLondon_postcode_centroids.csv", "w", encoding = "utf-8") as f:
  f.write("Postcodes,Latitude,Longitude\n")
  for row in sorted(cleaned_list, key = lambda x: x["Postcodes"]):
    f.write(f"{row['Postcodes']}, {row['Latitude']}, {row['Longitude']}\n")

print("The CSV file has been saved")

Last OBJECTID: 0, got 2000 records
Last OBJECTID: 748597, got 2000 records
Last OBJECTID: 753363, got 2000 records
Last OBJECTID: 755371, got 2000 records
Last OBJECTID: 757371, got 2000 records
Last OBJECTID: 759452, got 2000 records
Last OBJECTID: 764952, got 2000 records
Last OBJECTID: 766973, got 2000 records
Last OBJECTID: 770583, got 2000 records
Last OBJECTID: 772591, got 2000 records
Last OBJECTID: 774647, got 2000 records
Last OBJECTID: 776662, got 2000 records
Last OBJECTID: 778662, got 2000 records
Last OBJECTID: 780662, got 2000 records
Last OBJECTID: 782662, got 2000 records
Last OBJECTID: 784662, got 2000 records
Last OBJECTID: 786662, got 2000 records
Last OBJECTID: 788662, got 2000 records
Last OBJECTID: 1503492, got 2000 records
Last OBJECTID: 1505492, got 2000 records
Last OBJECTID: 1514097, got 2000 records
Last OBJECTID: 1518751, got 2000 records
Last OBJECTID: 1527941, got 2000 records
Last OBJECTID: 1703467, got 2000 records
Last OBJECTID: 1705467, got 2000 record