In [None]:
## Suggest getting Lindsay to update below text before going live.

# Air Quality in Newcastle and the North East

These graphs analyse the air quality data obtained from a small number of precision monitoring stations located across Newcastle and the North-East.

This is raw data, and has not been corrected for atmospheric and tunneling effects. As such, any conclusions drawn from the values or trends should be caveated. 


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime

matplotlib.rcParams.update({
    'font.size': 13,
    'timezone': 'Europe/London'
})

## The code threw up some SettingWithCopyWarnings that I will fix at somepoint
import warnings
warnings.filterwarnings('ignore')

In [None]:
## Load in baseline and update datasets
baselineAQData = pd.read_pickle('../cache/baseline-airquality-airmon-api.pkl')
recentAQData = pd.read_pickle('../cache/update-airquality-airmon-api.pkl')

lastReading = recentAQData['Timestamp'].max()
lastReading = datetime.strptime(lastReading, '%Y-%m-%d %H:%M:%S')
print('Last data obtained %s' 
    % (lastReading.strftime('%d %B %Y %H:%M')))

In [None]:
sensorList = recentAQData["Sensor Name"].unique()

## Not sure how to un-hard-code these - couldn't see any 'User Friendly Names' in the metadata
friendlyNames = {
        'PER_AIRMON_MONITOR1048100': 'Coast Road @ Centurion Park',
        'PER_AIRMON_MONITOR1156100': 'Angel of the North',
        'PER_AIRMON_MONITOR1056100': 'Tyne Bridge: A167 Gateshead',
        'PER_AIRMON_MONITOR914': 'Gosforth Street - Salters Road',
        'PER_AIRMON_MONITOR1155100': 'Four Lane Ends: Front Street - Benton Road',
        'PER_AIRMON_MONITOR1135100': 'Pilgrim Street - Hood Street',
        'PER_AIRMON_MONITOR915': 'Jesmond Road - Coast Road',
        'PER_AIRMON_MONITOR1157100': 'St James Boulevard - Sunderland Street'
        }

In [None]:
## Set start date of lockdown impact analysis
## Required as pickle includes from 1st March - intended for use in week-by-week analysis
## Currently set to 16th March
startDate = '2020-03-16 00:00:00'

iterPlot = 1

## For each station, data since the 16th March 2020 has been aggregated for each hour and day, with the median value shown on the below graphs. 

* The shaded area represents a normal percentile boundary obtained for each day/hour, calculated from data between November 2019 and Febuary 2020.
* The dotted line represents the median or each day/hour, calculated from the same time period.
* The solid line represents the aggregated observed data since the 16th March.

Note: Two stations (Jesmond Road - Coast Road, & St James Boulevard - Sunderland Street) do not provide Particulate Matter (PM) 2.5 or PM 10 data.


In [None]:
for s in sensorList:
    mainTitle = friendlyNames[s]
    
    ## Select Data Records for Desired Sensor
    ## Baseline
    baselineSensor = baselineAQData[baselineAQData["Sensor Name"]==s]
    ## Remove suspect readings
    baselineSensor = baselineSensor[baselineSensor["Flagged as Suspect Reading"]==False]
    ## Add columns with weekday and hour
    baselineSensor.loc[:,"Weekday"] = pd.to_datetime(baselineSensor['Timestamp']).dt.weekday
    baselineSensor.loc[:,"Hour"] = pd.to_datetime(baselineSensor['Timestamp']).dt.hour
    ## Remove columns unnecessary for analysis
    baselineSensor.drop(columns=['Flagged as Suspect Reading','Location (WKT)','Ground Height Above Sea Level',
                            'Sensor Height Above Ground','Broker Name','Third Party','Sensor Centroid Longitude',
                            'Sensor Centroid Latitude','Raw ID','Units','Timestamp','Sensor Name'], axis=1, inplace=True)
    
    ## Recent
    recentSensor = recentAQData[recentAQData["Sensor Name"]==s]
    ## Limit to after Covid-19 lockdown
    recentSensor = recentSensor[recentSensor['Timestamp']>=startDate]
    recentSensor = recentSensor[recentSensor["Flagged as Suspect Reading"]==False]
    recentSensor.loc[:,"Weekday"] = pd.to_datetime(recentSensor['Timestamp']).dt.weekday
    recentSensor.loc[:,"Hour"] = pd.to_datetime(recentSensor['Timestamp']).dt.hour
    recentSensor.drop(columns=['Flagged as Suspect Reading','Location (WKT)','Ground Height Above Sea Level',
                            'Sensor Height Above Ground','Broker Name','Third Party','Sensor Centroid Longitude',
                            'Sensor Centroid Latitude','Raw ID','Timestamp','Sensor Name'], axis=1, inplace=True)
    
    ## Key Variables - easy to add more plots 
    ## Wasn't sure what are 'key' variables, and didn't want to overload with information
    ## Both Baseline and Recent api calls now have everything, so theoretically adding here should be the only required change
    variables = ['NO2','PM2.5','PM10']
    
    ## Check if variables has been measured in recent data
    for v in variables[:]:
        if v not in recentSensor['Variable'].unique():  
            variables.remove(v)
 
    ## Set up subplots of more than 1 variable
    if len(variables) > 1:
        nrows = len(variables)
        figHeight = len(variables) * 6.5
        fig, axs = plt.subplots(nrows,1, figsize=(18,figHeight))
        row=0
    else:
        fig, axs = plt.subplots(figsize=(18,6.5))
    
    ## Legend Lists
    plotPercentileRange = []
    plotMedian = []

    for v in variables:  
        ## Select Records for Specific Variable
        dfBaseline = baselineSensor[baselineSensor["Variable"]==v]
        dfBaseline.drop(columns=['Variable'], axis=1, inplace=True)
        dfRecent = recentSensor[recentSensor["Variable"]==v]
        yaxLabel = v + ' ('+ dfRecent['Units'].iloc[0] +')'
        dfRecent.drop(columns=['Variable','Units'], axis=1, inplace=True)
        
        ## Aggregate Covid Data
        aggregateColumns = ['Weekday', 'Hour']
        baselineMean = dfBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).median()
        baselineLQ = dfBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
        baselineHQ = dfBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)
        ## Aggregate Covid Data
        aggregateColumns = ['Weekday', 'Hour']
        recentMean = dfRecent.groupby(aggregateColumns, group_keys=False, as_index=False).median()
        recentLQ = dfRecent.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
        recentHQ = dfRecent.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)
        
        ## Set up subplot
        if len(variables) > 1:
            plt.axes(axs[row])
            
        plt.title(s + ' : ' + v, fontsize=12)
        plt.xlim(0,167)
        plt.xlabel('Day/Hour')
        plt.xticks(ticks=[0,12,24,36,48,60,72,84,96,108,120,132,144,156,168], 
                   labels=['Mon 00','Mon 12','Tues 00','Tues 12','Wed 00','Wed 12','Thurs 00','Thurs 12',
                           'Fri 00','Fri 12','Sat 00','Sat 12','Sun 00','Sun 12'])
        plt.ylabel(yaxLabel)
        
        ## Plot Quantiles 
        plotPercentileRange.append(plt.fill_between(x=baselineLQ.index,y1=baselineLQ['Value'],y2=baselineHQ['Value'],
                                                    color ="#f64a8a",alpha=0.2,linewidth=0, 
                                                    label='15 to 85%ile: Nov 2019 - Feb 2020'))
        
        ## Quartile range for March data - turned off  for now, maybe turn on as lockdown continues
        #plt.fill_between(x=recentLQ.index,y1=recentLQ['Value'],y2=recentHQ['Value'],color = "#233067",
        #                     alpha=0.2,linewidth=0,label="15 to 85%ile: Post 16th March")
        
        ## Plot Median Lines
        plotMedian.append(plt.plot(baselineMean.index,baselineMean["Value"], color = "#f64a8a",linestyle=':',alpha=0.4,
                                  label="Median: Nov 2019 - Feb 2020"))
        plt.plot(recentMean["Value"], color = "#233067",label="Median: Post-16th March")

        plt.legend(loc=0, prop={'size': 9})
    
        row = row+1
    
    ## Different Plot Layout (Title, Figure Text) depending on Number of Panels
    ## Could not get this to work! For now, have added legend elements to main individual legends. 
    if len(variables) > 1:
#         plt.figlegend(
#             [plotPercentileRange[:1],plotMedian[:1]],
#             ['15 to 85%ile: Nov 2019 - Feb 2020',
#                 'Median: Nov 2019 - Feb 2020'],
#             loc=[0.72,0.04],
#             ncol=4,
#             labelspacing=0,
#             handletextpad=0.4,
#             columnspacing=0.4
#         )
        plt.suptitle('Air Quality - %s' % mainTitle, x=0.5,y=0.91, fontsize='15', fontweight='bold')
    else:
#         plt.figlegend(
#             plotPercentileRange[:1]+plotMedian[:1],
#             ['15 to 85%ile: Nov 2019 - Feb 2020',
#                 'Median: Nov 2019 - Feb 2020'],
#             loc=[0.72,0.02],
#             ncol=4,
#             labelspacing=0,
#             handletextpad=0.4,
#             columnspacing=0.4
#         )
        plt.suptitle('Air Quality - %s' % mainTitle, fontsize='15', fontweight='bold')
    
    ## Savefig now for consistency - Figure Text only added to 'Bottom' image for now
    ## Can update to be one all big image if needed  
    station = s.lower()
    timestamp = str(datetime.now().strftime("%d-%m"))
    plt.savefig('../output/airquality-' + station + '-' + timestamp + '.png', bbox_inches='tight')
    
    ## Add Figure Text to Bottom Image for Dashboard
    if iterPlot == len(sensorList):
        if len(variables) > 1:
            plt.figtext(
                0.09,
                0.075,
                'Urban Observatory (https://www.urbanobservatory.ac.uk/).\n'
                'Miles Clement <m.a.clement2@ncl.ac.uk>.',
                horizontalalignment='left',
                color='#606060',
                fontdict={'size': 11}
            )
        else:
            plt.figtext(
                0.09,
                0,
                'Urban Observatory (https://www.urbanobservatory.ac.uk/).\n'
                'Miles Clement <m.a.clement2@ncl.ac.uk>.',
                horizontalalignment='left',
                color='#606060',
                fontdict={'size': 11}
            )
        
    iterPlot = iterPlot +  1    

    plt.show()
