# Air quality across Newcastle, Gateshead and North Tyneside

These graphs show the air quality data obtained from a small number of precision monitoring stations located across the region, and compare them to values that have been observed during the same time-period in previous years.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from datetime import datetime, timedelta, date
import calendar
import dateutil.tz

import calendar
cal = calendar.Calendar(0)

matplotlib.rcParams.update({
    'font.size': 13,
    'timezone': 'Europe/London'
})

## The code threw up some SettingWithCopyWarnings that I will fix at somepoint
import warnings
warnings.filterwarnings('ignore')

In [None]:
tzLocal = dateutil.tz.gettz('Europe/London')
dateToday = datetime.combine(date.today(), datetime.min.time()).replace(tzinfo=tzLocal)

In [None]:
rawDFBaseline = pd.read_pickle('../cache/baseline-airquality-airmon.pkl')
rawDFRecent = pd.read_pickle('../cache/update-airquality-airmon.pkl')

lastReading = rawDFRecent['Timestamp'].max()
## Readings currently in UTC, convert to local after clock change
lastReading = datetime.strptime(lastReading, '%Y-%m-%d %H:%M:%S') + timedelta(hours=1)

print('Last data obtained %s' 
    % (lastReading.strftime('%d %B %Y %H:%M')))

In [None]:
sensors = rawDFBaseline["Sensor Name"].unique().tolist()
sensorListBaseline = []
sensorListNoBaseline = []

## Split sensors into those with baseline data, and those without t
for s in sensors:
    temp = rawDFBaseline.copy()
    temp = temp[temp['Sensor Name']==s]
    if pd.to_datetime(temp["Timestamp"].min()) < datetime(2019, 1, 1):
        sensorListBaseline.append(s)
    else:
        sensorListNoBaseline.append(s)

## Not sure how to un-hard-code these - couldn't see any 'User Friendly Names' in the metadata
sensorNames = {
    'PER_AIRMON_MONITOR1048100': 'Coast Road at Centurion Park',
    'PER_AIRMON_MONITOR1156100': 'Durham Road at Angel of the North',
    'PER_AIRMON_MONITOR1056100': 'Tyne Bridge (A167)',
    'PER_AIRMON_MONITOR914': 'Gosforth Street - Salters Road',
    'PER_AIRMON_MONITOR1155100': 'Four Lane Ends: Front Street - Benton Road',
    'PER_AIRMON_MONITOR1135100': 'Pilgrim Street - Hood Street',
    'PER_AIRMON_MONITOR915': 'Jesmond Road - Coast Road',
    'PER_AIRMON_MONITOR1157100': 'St James Boulevard - Sunderland Street'
}

In [None]:
## Set start date of lockdown impact analysis
## Currently set to 9th March - week prior to lockdown
startDate = '2020-03-02 00:00:00'
baselineEnd = '2020-02-29 23:45:00'

In [None]:
## Tidy up dataframes
## Make copies
baselineDF = rawDFBaseline.copy()
recentDF = rawDFRecent.copy()

## Baseline
## Remove readings after baseline enddate (set to 8th March so no overlap with plotted measurements)
baselineDF  = baselineDF[baselineDF['Timestamp']<=baselineEnd]
## Remove suspect readings
baselineDF = baselineDF[baselineDF["Flagged as Suspect Reading"]==False]
## Add in extra date/time related columns to dataframes
baselineDF.loc[:,"WeekNumber"] = pd.to_datetime(baselineDF['Timestamp']).dt.week
baselineDF.loc[:,"Weekday"] = pd.to_datetime(baselineDF['Timestamp']).dt.weekday
baselineDF.loc[:,"Hour"] = pd.to_datetime(baselineDF['Timestamp']).dt.hour
## Remove columns unnecessary for analysis
baselineDF.drop(columns=['Flagged as Suspect Reading','Location (WKT)','Ground Height Above Sea Level',
        'Sensor Height Above Ground','Broker Name','Third Party','Sensor Centroid Longitude',
        'Sensor Centroid Latitude','Raw ID','Units'], axis=1, inplace=True)

## Recent
recentDF = recentDF[recentDF['Timestamp']>=startDate]
recentDF = recentDF[recentDF["Flagged as Suspect Reading"]==False]
recentDF.loc[:,"WeekNumber"] = pd.to_datetime(recentDF['Timestamp']).dt.week
recentDF.loc[:,"Weekday"] = pd.to_datetime(recentDF['Timestamp']).dt.weekday
recentDF.loc[:,"Hour"] = pd.to_datetime(recentDF['Timestamp']).dt.hour
recentDF.drop(columns=['Flagged as Suspect Reading','Location (WKT)','Ground Height Above Sea Level',
        'Sensor Height Above Ground','Broker Name','Third Party','Sensor Centroid Longitude',
        'Sensor Centroid Latitude','Raw ID','Units'], axis=1, inplace=True)

In [None]:
currentWk = recentDF['WeekNumber'].iloc[-1]

## Restrict Baseline to week n +/-4 weeks
baselineDFWks = baselineDF[(baselineDF['WeekNumber'] >= currentWk-4) & 
                           (baselineDF['WeekNumber'] <= currentWk+4)]

## Select data from week of 2nd March (pre-lockdown)
preLockdownDF = recentDF[recentDF['WeekNumber'] == 10]

## Select recent data for current and preceding week
postLockdownDF = recentDF[recentDF['WeekNumber'] >= currentWk-1]

## Append
recentDFWks = preLockdownDF.append(postLockdownDF)

## For each station, seasonal data from previous years have been aggregated (by hour & day) and compared to real-time data (aggregated by hour & day) since the lockdown was announced. 

* The shaded area represents a seasonal normal percentile (15th-85th) for each day/hour. This is calculated from data collected between Jan 2017 and Feb 2020, with records corresponding to the current week of the year, plus 4 weeks either side, selected from previous years to represent seasonal normality.
* The dotted line represents the median value for each day/hour, calculated from the same data subset used for the shaded area.
* The solid lines represent the aggregated observed data for the current week, last week, and the week starting 2nd March (pre-societal measures).

Plots are all in UTC.

<span style="color:red;font-size:16pt;font-weight:bold"> Important! The data presented here is not corrected for weather conditions at the time of measurement. In particular, wind speed and direction can heavily influence air quality readings. Some displayed variations may be caused by environmental conditions, rather than being a direct impact of the Covid-19 societal measures. </span>





In [None]:
colourArray = ['#f64a8a', '#233067', '#00A7CC']

for s in sensorListBaseline:
    mainTitle = sensorNames[s]
    
    ## Select Data Records for Desired Sensor
    sensorDFBaseline = baselineDFWks[baselineDFWks["Sensor Name"]==s]
    sensorDFBaseline.drop(columns=['Timestamp','Sensor Name'], axis=1, inplace=True)

    sensorDFRecent = recentDFWks[recentDFWks["Sensor Name"]==s]
    sensorDFRecent.drop(columns=['Sensor Name'], axis=1, inplace=True)
    
    ## List Week Numbers
    weekList = sensorDFRecent['WeekNumber'].unique().tolist()
    
    ## Key Variables - easy to add more plots 
    ## Wasn't sure what are 'key' variables, and didn't want to overload with information
    ## Both Baseline and Recent api calls now have everything, so theoretically adding here should be the only required change
    variables = ['NO2','PM2.5','PM10']
    variableFullNames = {
        'NO2': r'$NO_{2}$',
        'PM2.5': r'$PM_{2.5}$',
        'PM10': r'$PM_{10}$'
    }

    ## Check if sensor is current recording variables
    for v in variables[:]:
        temp = sensorDFRecent[sensorDFRecent["Variable"]==v]
        if temp.empty:
            variables.remove(v)
        elif temp["WeekNumber"].iloc[-1] != currentWk:
            variables.remove(v)
            
    plotWindows = len(variables)
    ## Set up subplots of more than 1 variable
    if plotWindows > 1:
        figHeight = plotWindows * 6.5
        fig, axs = plt.subplots(plotWindows,1, figsize=(18,figHeight))
    elif plotWindows == 1:
        fig, axs = plt.subplots(figsize=(18,6.5))
    row=0
    
    
    for v in variables:
        variableDFBaseline = sensorDFBaseline[sensorDFBaseline["Variable"]==v]
        variableDFBaseline.drop(columns=['Variable'], axis=1, inplace=True)
        
        ## Aggregate Baseline Data
        aggregateColumns = ['Weekday', 'Hour']
        baselineMean = variableDFBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).median()
        baselineLQ = variableDFBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.15)
        baselineHQ = variableDFBaseline.groupby(aggregateColumns, group_keys=False, as_index=False).quantile(.85)   
        
        variableDFRecent = sensorDFRecent[sensorDFRecent["Variable"]==v]
                
        ## Find Max Y Value for plots 
        maxYOp1 = baselineHQ["Value"].max()
        tempDFOp2 = variableDFRecent.copy()
        tempDFOp2 = tempDFOp2.drop(columns=['Variable',"Timestamp"], axis=1)
        maxYOp2 = tempDFOp2["Value"].quantile(.9993)
        maxY = max(maxYOp1,maxYOp2)
        yaxLabel = v + r' ($μg m^{-3}$)'
        
        ## Set up subplot
        if plotWindows > 1:
            plt.axes(axs[row])
        row = row+1 
        plt.title(variableFullNames[v], fontsize=14)
        plt.xlim(0,167)
        plt.ylim(0,maxY)
        plt.xlabel('Day of week')
        plt.xticks(ticks=[0,11,23,35,47,59,71,83,95,107,119,131,143,155,167], 
                labels=['Mon 00','Mon 12','Tues 00','Tues 12','Wed 00','Wed 12','Thurs 00','Thurs 12',
                'Fri 00','Fri 12','Sat 00','Sat 12','Sun 00','Sun 12','Mon 00'])
        plt.ylabel(yaxLabel)

        ## Plot Baseline Quantiles 
        plt.fill_between(x=baselineLQ.index,y1=baselineLQ['Value'],y2=baselineHQ['Value'],
                    color ="#f64a8a",alpha=0.2,linewidth=0,label="15-85%ile: Seasonal Average")
        
        ## Plot Baseline Median Lines
        plt.plot(baselineMean.index,baselineMean["Value"], color = "#f64a8a",linestyle=':',
                 alpha=0.4,label="Median: Seasonal Average")
        
        ## Remove weeks not in dataframe for specific sensor/variable
        for w in weekList[:]:
            if w not in variableDFRecent['WeekNumber'].unique():  
                weekList.remove(w)
        
        c = iter(reversed(colourArray[:len(weekList)]))

        for w in weekList:
            weekDFRecent = variableDFRecent[variableDFRecent['WeekNumber']==w]

            # Find Monday of week from first record of week
            # Somethiing off with pandas getting weeknumber and then converting back, hence w-1
            wkMon = datetime.strptime('2020-' + str(w-1) + '-Monday', '%Y-%W-%A')
            wkMon = wkMon.strftime('%b-%d')
        
            # Aggregate Covid Data
            aggregateColumns = ['Weekday', 'Hour']
            covidMean = weekDFRecent.groupby(aggregateColumns, group_keys=False, as_index=False).median()
            covidMean.set_index(['Weekday', 'Hour'], inplace=True)
                
            ## Copy baselineMean DF to get Weekday and Hour columns. 
            ## Merge with covidMean, plot
            meanDF = baselineMean.copy()
            meanDF.drop(columns=['Value'], axis=1, inplace=True)
            meanDF.set_index(['Weekday', 'Hour'], inplace=True)
            mergeDF = pd.merge(meanDF, covidMean, how='left', left_index=True, right_index=True)
            mergeDF.reset_index(inplace=True)
            plt.plot(mergeDF["Value"], label="Week of "+wkMon ,color=next(c), \
                         linewidth=2.0 if w == weekList[-1] else 1.0, \
                         alpha=1.0 if w == weekList[-1] else 0.7)
                

        plt.legend(loc=0, prop={'size': 9})

    if len(variables) > 1:
        plt.suptitle(mainTitle, x=0.5,y=0.91, fontsize='14')
    else:
        plt.suptitle(mainTitle, x=0.5,y=0.97, fontsize='14')

    plt.savefig('../output/airquality-' + s.lower() + '-week' + str(w) + '.png', bbox_inches='tight')
    plt.show()


#### The following stations have been installed within the last 6 months. As a result, there is insufficient data to create a seasonal baseline.

In [None]:
colourArray = ['#f64a8a', '#233067', '#00A7CC']
iterPlot = 1

for s in sensorListNoBaseline:
    mainTitle = sensorNames[s]

    sensorDFRecent = recentDFWks[recentDFWks["Sensor Name"]==s]
    sensorDFRecent.drop(columns=['Sensor Name'], axis=1, inplace=True)
    
    ## List Week Numbers
    weekList = sensorDFRecent['WeekNumber'].unique().tolist()
    
    ## Key Variables - easy to add more plots 
    ## Wasn't sure what are 'key' variables, and didn't want to overload with information
    ## Both Baseline and Recent api calls now have everything, so theoretically adding here should be the only required change
    variables = ['NO2','PM2.5','PM10']
    variableFullNames = {
        'NO2': r'$NO_{2}$',
        'PM2.5': r'$PM_{2.5}$',
        'PM10': r'$PM_{10}$'
    }

    ## Check if sensor is current recording variables
    for v in variables[:]:
        temp = sensorDFRecent[sensorDFRecent["Variable"]==v]
        if temp.empty:
            variables.remove(v)
        elif temp["WeekNumber"].iloc[-1] != currentWk:
            variables.remove(v)
        
    plotWindows = len(variables)
    ## Set up subplots of more than 1 variable
    if plotWindows > 1:
        figHeight = plotWindows * 6.5
        fig, axs = plt.subplots(plotWindows,1, figsize=(18,figHeight))
    elif plotWindows == 1:
        fig, axs = plt.subplots(figsize=(18,6.5))
    row=0
    
    for v in variables:
        variableDFRecent = sensorDFRecent[sensorDFRecent["Variable"]==v]
                
        ## Find Max Y Value for plots 
        tempDF = variableDFRecent.copy()
        tempDF = tempDF.drop(columns=['Variable',"Timestamp"], axis=1)
        maxY = tempDF["Value"].quantile(.9993)
        yaxLabel = v + r' ($μg m^{-3}$)'
        
        ## Set up subplot
        if plotWindows > 1:
            plt.axes(axs[row])
        row = row+1 
        plt.title(variableFullNames[v], fontsize=14)
        plt.xlim(0,167)
        plt.ylim(0,maxY)
        plt.xlabel('Day of week')
        plt.xticks(ticks=[0,11,23,35,47,59,71,83,95,107,119,131,143,155,167], 
                labels=['Mon 00','Mon 12','Tues 00','Tues 12','Wed 00','Wed 12','Thurs 00','Thurs 12',
                'Fri 00','Fri 12','Sat 00','Sat 12','Sun 00','Sun 12','Mon 00'])
        plt.ylabel(yaxLabel)     
        
        for w in weekList[:]:
            if w not in variableDFRecent['WeekNumber'].unique():  
                weekList.remove(w)
        
        c = iter(reversed(colourArray[:len(weekList)]))

        for w in weekList:
            weekDFRecent = variableDFRecent[variableDFRecent['WeekNumber']==w]

            # Find Monday of week from first record of week
            # Includes check/adjustment to make sure first record is a Monday
            wkMon = datetime.strptime('2020-' + str(w-1) + '-Monday', '%Y-%W-%A')
            wkMon = wkMon.strftime('%b-%d')
        
            # Aggregate Covid Data
            aggregateColumns = ['Weekday', 'Hour']
            covidMean = weekDFRecent.groupby(aggregateColumns, group_keys=False, as_index=False).median()
            covidMean.set_index(['Weekday', 'Hour'], inplace=True)
                
            ## Copy baselineMean DF to get Weekday and Hour columns. 
            ## Merge with covidMean, plot
            meanDF = baselineMean.copy()
            meanDF.drop(columns=['Value'], axis=1, inplace=True)
            meanDF.set_index(['Weekday', 'Hour'], inplace=True)
            mergeDF = pd.merge(meanDF, covidMean, how='left', left_index=True, right_index=True)
            mergeDF.reset_index(inplace=True)
            plt.plot(mergeDF["Value"], label="Week of "+wkMon ,color=next(c), \
                         linewidth=2.0 if w == weekList[-1] else 1.0, \
                         alpha=1.0 if w == weekList[-1] else 0.7)

        plt.legend(loc=0, prop={'size': 9})

    if len(variables) > 1:
        plt.suptitle(mainTitle, x=0.5,y=0.91, fontsize='14')
    else:
        plt.suptitle(mainTitle, x=0.5,y=0.97, fontsize='14')

    plt.savefig('../output/airquality-' + s.lower() + '-week' + str(w) + '.png', bbox_inches='tight')

    ## Add Figure Text to Bottom Image for Dashboard
    ## 'y' value might need adjusting in future as more plots added
    if iterPlot == len(sensorListBaseline):
        if len(variables) == 3:
            plt.figtext(0.09,0.075,'Urban Observatory (https://www.urbanobservatory.ac.uk/).\n'
            'Miles Clement <m.a.clement2@ncl.ac.uk>.', horizontalalignment='left',color='#606060',
                fontdict={'size': 11})
        elif len(variables) == 1:
            plt.figtext(0.09,0.0015,'Urban Observatory (https://www.urbanobservatory.ac.uk/).\n'
            'Miles Clement <m.a.clement2@ncl.ac.uk>.', horizontalalignment='left',color='#606060',
                fontdict={'size': 11})

    iterPlot = iterPlot +  1   
    plt.show()
