In [18]:
! pip install dash



In [19]:
import pandas as pd
import requests
from dash import Dash, dcc, html, Input, Output
import plotly.express as px

In [20]:
# Now I will create the boundaries of each pollutant, NO2 & PM2.5
# I got the pollutant boundaries from DEFRA
Pollutant_boundaries = {
    "PM25":{"max_green": 35, "max_yellow": 53, "max_red": 70},
    "NO2":{"max_green": 200, "max_yellow": 400, "max_red": 600}
}

In [22]:
# API consumption using BreatheLondon's API
API_key = "AIzaSyCO8yvEqQ8_T7xqBH73Iyes62nu4AtesP4"
# The metadata for list sensors will be used to place markers on the map
# Sensor data is to colour code the markers and show the readings
List_sensors_URL = "https://breathe-london-7x54d7qf.ew.gateway.dev/ListSensors"
Sensor_data_URL = "https://breathe-london-7x54d7qf.ew.gateway.dev/SensorData"
# Creating a dict of http headers to send a request to the Breathe London server to get what I need from the API
# Following the website (Breathe London) they said I need 2 headers to call the metadata
headers = {
    "X-API-KEY": API_key,
    "Content-Type": "application/json"
}

# Now I can request for json metadata
# I can check if the requests is working through the 2nd line
def get_metadata():
  r = requests.get(List_sensors_URL, headers = headers, timeout = 60)
  r.raise_for_status()
# Since the json file isn't in the correct format for python, I have to convert it into the proper dict that python understands
  return pd.DataFrame(r.json())

# Small EDA to see if the above is running smoothly, the features in the metadata, and number of sensors they have
df_sensors = get_metadata()
print(len(df_sensors))
print(df_sensors.columns.tolist())


# Using the sensor data url to call the necessary latest readings which is PM2.5 & NO2
def sensor_readings(species):
  parameters = {"Species": species}
  readings_data = requests.get(Sensor_data_URL, headers = headers, timeout = 60)
  readings_data.raise_for_status()
# Since the json file isn't in the correct format for python, I have to convert it into the proper dict that python understands
  return pd.DataFrame(readings_data.json())

# Small EDA to see if the above is running smoothly, the features in the metadata, and number of sensors they have
df_PM25 = sensor_readings("PM25")
df_NO2 = sensor_readings("NO2")
print("PM2.5 Readings", len(df_PM25))
print("NO2 Readings", len(df_NO2))



274
['SiteCode', 'SiteName', 'DeviceCode', 'InstallationCode', 'Facility', 'Location', 'Latitude', 'Longitude', 'Borough', 'SiteClassification', 'SensorHeightAboveGround', 'DistanceToKerb', 'SponsorName', 'SiteLocationType', 'StartDate', 'EndDate', 'PowerTag', 'SiteDescription', 'SitePhotoURL', 'SensorContract']
PM2.5 Readings 466
NO2 Readings 466


In [27]:
# Formatting the data I've pulled from sensor readings into a proper table by school
def latest_by_school(df, value_name):
  # I'm keeping only the features required to ...
  df =df[["SiteCode","ScaledValue","DateTime"]].copy()
  # This converts the str DateTime pulled from the API to an actual datetime format
  # In case some of the string cant be converted I used errors so that it will not crash the code
  df["DateTime"] = pd.to_datetime(df["DateTime"], errors = "coerce", utc = True)

  # Now I want to keep the latest readings per sensor per site code (school)
  # Tail (1) gives me the last row from each group which is the latest reading
  df = df.sort_values("DateTime").groupby("SiteCode").tail(1)
  # After doing so, I will put it into a table that tells the readings per site
  return df[["SiteCode","ScaledValue"]].rename(columns = {"ScaledValue":value_name})

df_latestPM25 = latest_by_school(df_PM25, "PM25")
df_latestNO2 = latest_by_school(df_NO2, "NO2")

# To. merge both tables so it creates 1 whole table with both air pollutant readings
# SiteCode is the primary key
df = (
    df_sensors
    .merge(df_latestPM25, on = "SiteCode", how = "left")
    .merge(df_latestNO2, on = "SiteCode", how = "left")
    )

print(df[["SiteCode","PM25","NO2"]].head())

  SiteCode   PM25    NO2
0   BL0034    NaN    NaN
1   BL0020    NaN    NaN
2   BL0021  13.99  13.99
3   BL0022  15.12  15.12
4   BL0023    NaN    NaN
