# Backend

## Crime Analysis Dashboard App

In [3]:
#Import Libraries
import requests
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.ops import triangulate
from shapely.geometry import Polygon
import mapbox_earcut as earcut
from pathlib import Path

## Get Police Force Table

In [4]:

def get_forces():

    """ 
    
    Defines the function to call the API and retrieve the police forces data table.
    
    Input: None.
    
    Output: Dataframe containing all the police forece ids and names.
    
    """

    # Defines the connection to the API
    url = "https://data.police.uk/api/forces"
    response = requests.get(url)

    # Raises exception if connection fails
    if response.status_code != 200:                                 
        raise Exception(f"API error: {response.status_code}")           
    
    # Converts responce to json
    data = response.json()

    # Converts json to dataframe
    df = pd.DataFrame(data)

    # Rename ID and name columns
    df.rename(columns={"id": "police_force_id", "name": "police_force_name"}, inplace=True)

    # Returns dataframe
    return df

# Defines the police force dataframe
df_forces = get_forces()

print(df_forces.head())

     police_force_id               police_force_name
0  avon-and-somerset  Avon and Somerset Constabulary
1       bedfordshire             Bedfordshire Police
2     cambridgeshire     Cambridgeshire Constabulary
3           cheshire           Cheshire Constabulary
4     city-of-london           City of London Police


## Get Neighbourhood Table

In [5]:

# INPUT FROM USER
police_force_ids = ["bedfordshire", "cambridgeshire"]  

def get_neighbourhood(id):

    """

    Defines the function to call the API and retrieve the neighbourhood data table.

    Input: Police force ID.

    Output: Dataframe containing the neighbouthood id, name and which police force id it belongs to.

    """

    # Defines the connection to the API
    url = f"https://data.police.uk/api/{id}/neighbourhoods"     # Defines the API url 
    response = requests.get(url)                                # Defines the responce after we 'get' the url
    
    # Raises exception if connection fails
    if response.status_code != 200:
        raise Exception(f"API error: {response.status_code}")
    
    # Converts responce to json 
    data = response.json()

    # Converts json to dataframe
    df = pd.DataFrame(data)

    # Adds police force ID
    df['police_force_id'] = id

    # Renames the ID and name columns
    df.rename(columns={"id": "neighbourhood_id", "name": "neighbourhood_name"}, inplace=True)

    # Returns dataframe
    return df

# Empty list of neighbourhood dataframes 
df_neighbourhoods_list =[]

# Appends each neighbourhood dataframe to the list
for id in police_force_ids:
    df_neighbourhoods_list.append(get_neighbourhood(id))

# Unions the list of dataframes into one
df_neighbourhoods = pd.concat(df_neighbourhoods_list)

print(df_neighbourhoods.sample(5))

      neighbourhood_id              neighbourhood_name police_force_id
12                 LU5                   Luton Airport    bedfordshire
7                  CB5        Houghton Regis and Rural    bedfordshire
15                 NU3  Harpur, Brickhill and De Parys    bedfordshire
13  St_Ives_and_Ramsey                         St Ives  cambridgeshire
9                  LU2            Luton South and East    bedfordshire


## Get Neighbourhood Boundaries Table

In [6]:

def get_neighbourhood_boundaries(neighbourhood_id):

    """

    Defines the function to call the API and retrieve the neighbourhood boundaries data table.

    Input: Neighbourhood ID.

    Output: Corresponding boundary polygon string.
    
    """

    # Finds the input's corresponding police force ID 
    police_force_id = df_neighbourhoods.loc[df_neighbourhoods["neighbourhood_id"] == neighbourhood_id, "police_force_id"].iloc[0]

    # Defines the connection to the API
    url = f"https://data.police.uk/api/{police_force_id}/{neighbourhood_id}/boundary"
    response = requests.get(url)
    
    # Raises exception if connection fails
    if response.status_code != 200:
        raise Exception(f"API error: {response.status_code}")
    
    # Convert responce to json
    data = response.json()

    # Convert to polygon string format
    polygon_str = ":".join(
        f"{float(item['latitude'])},{float(item['longitude'])}"
        for item in data
    )

    # Returns polygon
    return polygon_str

# Makes a copy of the neighbourhoods dataframe
df_neighbourhood_boundaries = df_neighbourhoods.copy()

# Initiates empty boundary list 
boundary_list = []

# Loops through all neighbourhood ID's and appends the boundary polygon to the list
for neighbourhood_id in df_neighbourhood_boundaries["neighbourhood_id"]:
    boundary_list.append(get_neighbourhood_boundaries(neighbourhood_id))

# Defines the boundary list as a new column in the dataframe
df_neighbourhood_boundaries["neighbourhood_boundary"] = boundary_list

print(df_neighbourhood_boundaries.head())

  neighbourhood_id                         neighbourhood_name police_force_id  \
0              BD2                   Bromham, Oakley, Wootton    bedfordshire   
1              BD3           Wilstead, Shortstown, Willington    bedfordshire   
2              BD5                          Riseley, Wyboston    bedfordshire   
3              CB1  Flitwick, Ampthill, Marston and Cranfield    bedfordshire   
4              CB2    Biggleswade, Sandy, Potton and Shefford    bedfordshire   

                              neighbourhood_boundary  
0  52.200075358,-0.548369711:52.200155837,-0.5484...  
1  52.201040284,-0.437150426:52.20106348,-0.43761...  
2  52.322913907,-0.465104578:52.322953267,-0.4653...  
3  52.110251611,-0.591229997:52.11069521,-0.59140...  
4  52.190902794,-0.287653471:52.190925829,-0.2891...  


## Get Street-Level Crimes Table

In [7]:
def get_kml(neighbourhood_id): 
    
    """

    Defines the function to call the API and retrieve the neighbourhood KML file.

    Input: Neighbourhood ID.

    Output: Corresponding KML file content.
    
    """

    # Get pathfile from neighbourhood ID
    return f"C:/Users/benco/Downloads/force_kmls/data/{neighbourhood_id}.kml"

    # Return KML content
    return path




def get_street_level_crimes(poly_str):

    """
    
    Defines the function to call the API and retrieve the neighbourhood boundaries data table.

    Input: A polygon string defining the area to get street-level crimes for.

    Output: Street-level crime dataframe for the neighbourhood.

    """

    # Defines the connection to the API
    url = f"https://data.police.uk/api/crimes-street/all-crime?poly={poly_str}"
    response = requests.get(url)
    
    # Raises exception if connection fails
    if response.status_code != 200:
        raise Exception(f"API error: {response.status_code}")
    
    # Convert responce to json
    data = response.json()

    # Converts json to dataframe
    df = pd.DataFrame(data)

    # Returns dataframe
    return df



def load_polygon_from_kml(filepath):

    """

    A function to load a polygon from a KML file.

    Input: Filepath to the KML file.

    Output: Shapely polygon object.

    """

    # Loads KML file using geopandas
    gdf = gpd.read_file(filepath, driver="LIBKML")

    # Many KMLs contain a single feature
    polygon = gdf.geometry.iloc[0]

    # Returns polygon
    return polygon



def triangulate_polygon(polygon):

    """

    A function to triangulate a polygon into smaller triangles.

    Input: Shapely polygon object.

    Output: List of Shapely triangle objects.

    """

    # Triangulates polygon
    triangles = triangulate(polygon)

    # Keeps only triangles fully inside the polygon
    triangles = [t for t in triangles if polygon.contains(t.centroid)]

    # Returns list of triangles
    return triangles



def triangle_to_poly_string(triangle):
    
    """

    A function to convert a triangle to a polygon string.

    Input: Shapely triangle object.

    Output: Polygon string.

    """

    # Gets triangle coordinates
    coords = list(triangle.exterior.coords)[:-1]  # remove repeated closing point

    # convert (lng, lat) → (lat, lng)
    return ":".join(
        f"{coord[1]},{coord[0]}"
        for coord in coords
    )



def simplify_polygon(polygon, tolerance=0.0005):

    """

    Reduce polygon complexity while preserving shape.
    Tolerance ≈ 50m at UK latitudes.

    Input: Shapely polygon object, tolerance value.

    Output: Simplified Shapely polygon object.

    """

    # Simplifies polygon
    return polygon.simplify(tolerance, preserve_topology=True)



def process_kml_file_to_dataframe(neighbourhood_id):

    """

    A function to process a KML file and retrieve street-level crime data for the area.

    Input: Filepath to the KML file.

    Output: Street-level crime dataframe for the area.

    """

    # Get KML filepath
    kml_path = get_kml(neighbourhood_id)

    # Loads polygon
    polygon = load_polygon_from_kml(kml_path)

    # Simplifies polygon
    polygon = simplify_polygon(polygon)

    # Triangulates polygon
    triangles = triangulate_polygon(polygon)

    # Initialises a list to collect each triangle's dataframe
    all_dfs = []

    # Gets dataframa for each triangle
    for tri in triangles:
        
        # Converts the triangles to lists of coordiante strings
        poly_str = triangle_to_poly_string(tri)

        # Calls the API and gets the dataframe
        df = get_street_level_crimes(poly_str)

        # Combine dataframes
        all_dfs.append(df)

    # Checks that data has been collected, returns blank dataframe if no data is collected
    if len(all_dfs) == 0:
        return pd.DataFrame()

    # Combines all dataframes
    final_df = pd.concat(all_dfs, ignore_index=True)

    # Returns combined dataframe
    return final_df


df_crimes = process_kml_file_to_dataframe("hertfordshire")
print(df_crimes.head())



                category location_type  \
0          violent-crime         Force   
1  criminal-damage-arson         Force   
2          vehicle-crime         Force   
3  anti-social-behaviour         Force   
4  anti-social-behaviour         Force   

                                            location context  \
0  {'latitude': '52.062879', 'street': {'id': 194...           
1  {'latitude': '51.684363', 'street': {'id': 194...           
2  {'latitude': '51.684363', 'street': {'id': 194...           
3  {'latitude': '51.685564', 'street': {'id': 194...           
4  {'latitude': '51.684728', 'street': {'id': 194...           

                                      outcome_status  \
0  {'category': 'Under investigation', 'date': '2...   
1  {'category': 'Investigation complete; no suspe...   
2  {'category': 'Investigation complete; no suspe...   
3                                               None   
4                                               None   

                         

## Get Specific Neighbourhood Table

In [10]:
def get_specific_neighbourhood(police_force_id, neighbourhood_id):

    """

    A function to get data for a specific neighbourhood.

    Input: Police force ID. Neighbourhood ID.

    Output: Dataframe containing all data for the specific neighbourhood.


    """

    # Defines the connection to the API
    url = f"https://data.police.uk/api/{police_force_id}/{neighbourhood_id}"
    response = requests.get(url)
    
    # Raises exception if connection fails
    if response.status_code != 200:
        raise Exception(f"API error: {response.status_code}")
    
    # Convert responce to json
    data = response.json()

    # Converts json to dataframe
    df = pd.json_normalize(data)

    # Returns dataframe
    return df
    
    
def get_specific_neighnourhoods_from_police_force(police_force_id, df_neighbourhoods=df_neighbourhoods):

    """

    A function to get all specific neighbourhoods for a specific police force ID.

    Input: Police force ID. Neighbourhoods dataframe.

    Output: Dataframe containing all neighbourhoods for the specific police force ID.

    """

    # Initiates empty list to collect dataframes
    specific_neighbourhood_list = []

    # Loops through all neighbourhood ID's for the specific police force ID
    for neighbourhood_id in df_neighbourhoods["neighbourhood_id"][df_neighbourhoods["police_force_id"] == police_force_id]:

        # Calls the API and gets the dataframe
        df = get_specific_neighbourhood(police_force_id, neighbourhood_id)

        # Appends dataframe to the list
        specific_neighbourhood_list.append(df)

    # Unions the list of dataframes into one
    df = pd.concat(specific_neighbourhood_list, ignore_index=True)

    # Adds police force ID column
    df["police_force_id"] = police_force_id

    # Returns datatframe
    return df



df_specific_neighbourhoods = get_specific_neighnourhoods_from_police_force("bedfordshire")
print(df_specific_neighbourhoods.head())



    id                                       name  \
0  BD2                   Bromham, Oakley, Wootton   
1  BD3           Wilstead, Shortstown, Willington   
2  BD5                          Riseley, Wyboston   
3  CB1  Flitwick, Ampthill, Marston and Cranfield   
4  CB2    Biggleswade, Sandy, Potton and Shefford   

                                           url_force population links  \
0  https://www.police.uk/pu/your-area/bedfordshir...          0    []   
1  https://www.police.uk/pu/your-area/bedfordshir...          0    []   
2  https://www.police.uk/pu/your-area/bedfordshir...          0    []   
3  https://www.police.uk/pu/your-area/bedfordshir...          0    []   
4  https://www.police.uk/pu/your-area/bedfordshir...          0    []   

  locations centre.latitude centre.longitude  \
0        []         52.1386        -0.551294   
1        []         52.1278        -0.427471   
2        []         52.2442        -0.454736   
3        []         52.0219        -0.482151   
4 

---

# Cleaning

In [None]:
df = df_crimes.copy()

In [None]:
df.isna().sum()

category               0
location_type          0
location               0
context                0
outcome_status      1748
persistent_id          0
id                     0
location_subtype       0
month                  0
dtype: int64

## Filling Nulls

In [None]:
# Fills nulls in the outcome column with 'Unknown'
def fill_blank_outcome_status(df):

    """

    A function to fill blank outcome status values with 'Unknown'.

    Input: Crime dataframe.

    Output: Crime dataframe with filled outcome status values.

    """

    # Fills nulls in the outcome column with 'Unknown'
    df["outcome_status"] = df["outcome_status"].fillna("Unknown")

    # Returns dataframe
    return df


df = fill_blank_outcome_status(df_crimes)
df.isna().sum()

category            0
location_type       0
location            0
context             0
outcome_status      0
persistent_id       0
id                  0
location_subtype    0
month               0
dtype: int64

## Checking for Duplicates

In [None]:
def remove_duplicates(df):
    if "id" in df.columns:
        return df.drop_duplicates(subset=["id"]).reset_index(drop=True)

    subset = [c for c in ["latitude", "longitude", "month", "category"] if c in df.columns]
    return df.drop_duplicates(subset=subset).reset_index(drop=True)

df = remove_duplicates(df)
df.shape

(8992, 11)

## Check Datatypes


In [None]:
df.dtypes

category             object
location_type        object
context              object
outcome_status       object
persistent_id        object
id                    int64
location_subtype     object
month                object
latitude            float64
longitude           float64
street_name          object
dtype: object

# Separating Location Column

In [None]:
def extract_coordinates_and_street(df):

    """

    A function to extract latitude, longitude and street name from the location column.

    Input: Crime dataframe.

    Output: Crime dataframe with new latitude, longitude and street name columns.

    """

    # Extracts coordinates and street name from location column
    coords = pd.json_normalize(df["location"])
    df["latitude"] = pd.to_numeric(coords["latitude"], errors="coerce")
    df["longitude"] = pd.to_numeric(coords["longitude"], errors="coerce")
    df["street_name"] = coords["street.name"]
    df.drop(columns=["location"], inplace=True)
    
    # Returns dataframe
    return df

df = extract_coordinates_and_street(df)
df.head()

Unnamed: 0,category,location_type,context,outcome_status,persistent_id,id,location_subtype,month,latitude,longitude,street_name
0,violent-crime,Force,,"{'category': 'Under investigation', 'date': '2...",df6cc51975204a343e55a7e08a8a3f1029fbde63d2e931...,131982578,,2025-10,52.062879,-0.019925,On or near Mccrae Close
1,criminal-damage-arson,Force,,{'category': 'Investigation complete; no suspe...,f62c697dd5d263369b8904b568da6976f28ebf92191792...,131794527,,2025-10,51.684363,-0.012469,On or near Lea Road
2,vehicle-crime,Force,,{'category': 'Investigation complete; no suspe...,9bb62a2928602e3bbd9feb391414001832834b187866d5...,131987899,,2025-10,51.684363,-0.012469,On or near Lea Road
3,anti-social-behaviour,Force,,Unknown,,131988851,,2025-10,51.685564,-0.031441,On or near Waltham Cross Bus Station
4,anti-social-behaviour,Force,,Unknown,,131989323,,2025-10,51.684728,-0.021813,On or near Alexandra Way


## Separating Date Column

In [None]:
def extract_date_components(df):

    """

    A function to extract year and month from the month column.

    Input: Crime dataframe.

    Output: Crime dataframe with new year and month columns.

    """

    # Converts month column to datetime format
    df["month"] = pd.to_datetime(df["month"], format="%Y-%m")

    # Extracts year and month from month column
    df["year"] = df["month"].dt.year
    df["month"] = df["month"].dt.month

    # Returns dataframe
    return df

df = extract_date_components(df)
df.head()

Unnamed: 0,category,location_type,context,outcome_status,persistent_id,id,location_subtype,month,latitude,longitude,street_name,year
0,violent-crime,Force,,"{'category': 'Under investigation', 'date': '2...",df6cc51975204a343e55a7e08a8a3f1029fbde63d2e931...,131982578,,10,52.062879,-0.019925,On or near Mccrae Close,2025
1,criminal-damage-arson,Force,,{'category': 'Investigation complete; no suspe...,f62c697dd5d263369b8904b568da6976f28ebf92191792...,131794527,,10,51.684363,-0.012469,On or near Lea Road,2025
2,vehicle-crime,Force,,{'category': 'Investigation complete; no suspe...,9bb62a2928602e3bbd9feb391414001832834b187866d5...,131987899,,10,51.684363,-0.012469,On or near Lea Road,2025
3,anti-social-behaviour,Force,,Unknown,,131988851,,10,51.685564,-0.031441,On or near Waltham Cross Bus Station,2025
4,anti-social-behaviour,Force,,Unknown,,131989323,,10,51.684728,-0.021813,On or near Alexandra Way,2025


## Combining Cleaning into one Step

In [None]:
def cleaning(df):

    """

    A function to clean the crime dataframe.

    Input: Crime dataframe.

    Output: Cleaned crime dataframe.

    """

    # Fills blank outcome status values
    df = fill_blank_outcome_status(df)

    # Removes duplicate rows
    df = remove_duplicates(df)

    # Extracts latitude, longitude and street name
    df = extract_coordinates_and_street(df)

    # Extracts year and month from month column
    df = extract_date_components(df)

    # Returns cleaned dataframe
    return df

# Graphs

## First graph

In [None]:
def get_columns_