In [None]:
import matplotlib
import pandas as pd
import numpy as np
import pickle
import urllib.request
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime
import warnings
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import matplotlib.patheffects as pe
import re

# Plotting is externalised
import importlib
import traffic

importlib.reload(traffic)

# Used across most of the plots for people flows
tzLocal = dateutil.tz.gettz('Europe/London')
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzLocal)
plottableTypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

peakHours = [7, 8, 9, 16, 17, 18]
interPeakHours = [10, 11, 12, 13, 14, 15]

# Amount of 15 minute time slots allowed to be missing
capMissingAllHours = 3600 * 2.5 # No more than 2.5 hours missing

capMissingSubHours = 1800 # No more than 30 minutes missing

In [None]:
dfPlaceAverages = None
dfAllSeries = {}

def addAverage(placeName, average):
    global dfPlaceAverages
    
    average = average.to_frame(name=placeName)
    
    if dfPlaceAverages is None:
        dfPlaceAverages = average
    else:
        if placeName in dfPlaceAverages.columns:
            dfPlaceAverages.drop(columns=[placeName], inplace=True)
        dfPlaceAverages = dfPlaceAverages.join(
            average, 
            how='outer'
        )
        
def addSeries(placeName, frame, period = 'all'):
    global dfAllSeries
    
    if period not in dfAllSeries:
        dfAllSeries[period] = None
    
    if dfAllSeries[period] is None:
        dfAllSeries[period] = frame.copy()
    else:
        dfAllSeries[period] = dfAllSeries[period].join(
            frame, 
            how='outer',
            lsuffix=placeName
        )

## Traffic volumes against a baseline

In the Downing Street daily press briefings during coronavirus, a series of slides are presented indicating the change in motor vehicle traffic, relative to a baseline. The [methodology is described in this document](https://www.gov.uk/government/publications/coronavirus-covid-19-transport-data-methodology-note), and [the slides are available on GOV.UK](https://www.gov.uk/government/collections/slides-and-datasets-to-accompany-coronavirus-press-conferences).

The charts shown below are intentionally presented in a similar style, but for smaller geographies and with specific measurements highlighted that could be indicators for the purpose of the journeys, such as those near to supermarkets, hospitals, connecting to the strategic road network, or near industrial complexes. 

You can [download the calculated percentages as a CSV file](https://covid.view.urbanobservatory.ac.uk/output/all-traffic-relative.csv).

### Tyne and Wear

This region's statistics are obtained from aggregate statistics collected by automatic numberplate recognition cameras (ANPR), used to invoke signal and traffic control strategies in the region. The underlying ANPR data is aggregated to four minute intervals. The data is provided by [Tyne and Wear UTMC](https://www.transportnortheast.com/public/map/map.htm) and archived by the [Newcastle Urban Observatory](http://www.urbanobservatory.ac.uk/).

In [None]:
#govChartStart = datetime.datetime.strptime('2020-03-01T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)
#dateBaselineEnd = datetime.datetime.strptime('2020-03-15T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)
#trafficCountInterval = 900

In [None]:
pdTrafficTyneWear = pickle.load(open('../cache/recent-traffic-volumes-pd.pkl', 'rb'))

# TODO: Make this reflect the last entry in the frame, not the time now
print('Last data obtained %s' 
    % (np.max(pdTrafficTyneWear.index).strftime('%d %B %Y %H:%M')))

In [None]:
skipStart = datetime.datetime.strptime('2020-03-17T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)
skipEnd = datetime.datetime.strptime('2020-03-18T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzLocal)

# Points have already been de-duplicated in the update script. Only unique easting/northing pairs are included
pdTrafficTyneWear = pdTrafficTyneWear[(pdTrafficTyneWear.index < skipStart) | (pdTrafficTyneWear.index > skipEnd)].copy()

In [None]:
pdTrafficRecentRelativePc = traffic.makeRelativeToBaseline(pdTrafficTyneWear, capMissingAllHours)

pdTrafficRecentRelativePcPeak = traffic.makeRelativeToBaseline(pdTrafficTyneWear, capMissingSubHours, peakHours)
pdTrafficRecentRelativePcInterPeak = traffic.makeRelativeToBaseline(pdTrafficTyneWear, capMissingSubHours, interPeakHours)
# pdTrafficRecentRelativePc

In [None]:
tyneWearAuthorities = {
    'GH': 'Gateshead',
    'NB': 'Northumberland',
    'NC': 'Newcastle upon Tyne',
    'NT': 'North Tyneside',
    'SL': 'Sunderland',
    'ST': 'South Tyneside',
    'CD': 'County Durham'
}
tyneWearSensorZones = {}

for sensor in pdTrafficRecentRelativePc.columns:
    m = re.search('^CAJT_([A-Z]{2})', sensor)
    if m is None:
        # County Durham...
        if sensor.startswith('ANPR'):    
            authority = 'CD'
        else:
            authority = 'Unknown'
    else:
        authority = m.group(1)
        
    if (authority not in tyneWearSensorZones):
        tyneWearSensorZones[authority] = []
    tyneWearSensorZones[authority].append(sensor)

In [None]:
def getMedian(series):
    pdTrafficRecentRelativeAuth = series[tyneWearSensorZones[authority]]
    
    dfMedianPc = pdTrafficRecentRelativeAuth.median(axis=1)
    dfMedianPc.index = dfMedianPc.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))
    
    return dfMedianPc

for authority in tyneWearSensorZones.keys():
    dfMedianPc = getMedian(pdTrafficRecentRelativePc)
    #dfMedianPcPeak = getMedian(pdTrafficRecentRelativePcPeak)
    #dfMedianPcInterPeak = getMedian(pdTrafficRecentRelativePcInterPeak)
    
    pdTrafficRecentRelativeAuth = pdTrafficRecentRelativePc[tyneWearSensorZones[authority]]
    
    dfMedianPc = pdTrafficRecentRelativeAuth.median(axis=1)
    dfMedianPc.index = dfMedianPc.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))
    
    addAverage(tyneWearAuthorities[authority], dfMedianPc)
    addSeries(tyneWearAuthorities[authority], pdTrafficRecentRelativeAuth)
    addSeries(tyneWearAuthorities[authority], pdTrafficRecentRelativePcPeak[tyneWearSensorZones[authority]], 'peak')
    addSeries(tyneWearAuthorities[authority], pdTrafficRecentRelativePcInterPeak[tyneWearSensorZones[authority]], 'inter-peak')
    # dfMedianPc

    highlights = {
        # Northumberland
        'CAJT_NBA189_MF8_RB9.end.northbound': 'A189 Bedlington\nNorthbound',
        'CAJT_NBA189_RB9_MF8.start.southound': 'A189 Bedlington\nSouthbound',
        'CAJT_NBA189_MF8_RB9.start.northbound': 'A189 at A19\nNorthbound',
        'CAJT_NBA189_RB9_MF8.end.southbound': 'A189 at A19\nSouthbound',
        
        # Gateshead
        'CAJT_GHB1296_ODR3_ODR2.start.northbound': 'Twds QE Hospital\nfrom South',
        'CAJT_GHA167_DR3_NB4.start.southbound': 'Angel of North\nSouthbound',
        'CAJT_GHA1114_CR2_HD3.end.westbound': 'Metrocentre Int\nWestbound',
        'CAJT_GHA695_SG_SB.start.eastbound': 'Stargate Ind Est\ntwds Gateshead',
        'CAJT_GHA167_DR3_DR2.end.northbound': 'Low Fell\nNorthbound',
        
        # Newcastle
        'CAJT_NCA193_CR3_SRB2.end.westbound': 'Byker Bypass\nWestbound',
        'CAJT_NCA193_SRB2_SR3.end.eastbound': 'Shields Road\nShopping Eastbound',
        'CAJT_NCA695_SWR2_SWR1.end.eastbound': 'Newcastle College\nScotswood Road',
        'CAJT_NCB1307_SR2_SR1.end.westbound': 'Sandyford Road\nCivic Centre',
        'CAJT_NCA189_JDR4_GR.start.southbound': 'Jesmond Dene Rd\nSouthbound',
        
        # Sunderland
        'CAJT_SLA183_B3_CR2.end.eastbound': 'Sunderland Hosp\nfrom West',
        'CAJT_SLA1290_DL1_NO2.end.westbound': 'Nissan\nfrom A19',
        'CAJT_SLA1018_NR3_NR2.end.southbound': 'Tesco Extra\nfrom North',
        'CAJT_SLA1018_CR3_CK4.start.southbound': 'Ocean Road\nfrom North',
        
        # North Tyneside
        'CAJT_NTA188_BR5_BR6.end.northbound': 'Quorum BP\nfrom South',
        'CAJT_NTA188_BR6_BR5.start.southbound': 'Quorum BP\nfrom North',
        'CAJT_NTA1058_CR5_BR6.end.easthbound': 'Beach Road\nEastbound',
        'CAJT_NTA193_HS4_CB5.start.eastbound': 'Wallsend\nEastbound',
        'CAJT_NTA193_CB5_HS4.end.westbound': 'Wallsend\nWestbound',
        
        # South Tyneside
        'CAJT_STA185_TD1_HS2.start.westbound': 'Port of Tyne\nfrom East',
        'CAJT_STA194_AR1A_TD1.start.eastbound': 'Port of Tyne\nfrom South',
        'CAJT_STA1018_GR4_WR5.end.northbound': 'Beach Road\nNorthbound',
        'CAJT_STA1300_LL2_JRR1.end.eastbound': 'South Tyneside\nHosp Eastbound',
        'CAJT_STA184_NR3_AR4.end.eastbound': 'West Boldon\nEastbound',
        
        # 'CAJT_NCA695_SWR3_SB.start': 'Scotswood Road\nReece Group',
        #'CAJT_NCA189_SJB2_SJB1.start': 'Gallowgate - St\nJames Park'
    }

    # Too crowded :-)
    medianSet = {
        'Median': dfMedianPc #,
        # 'Peak (7-10, 16-19)': dfMedianPcPeak,
        # 'Inter-peak (10-16)': dfMedianPcInterPeak 
    }
    plt, fig, ax = traffic.plotTraffic(pdTrafficRecentRelativeAuth, medianSet, highlights)

    plt.suptitle(tyneWearAuthorities[authority], fontsize=13, fontweight='bold')
    ax.set_title('Traffic volumes relative to baseline of %s to 15 March 2020' % (np.min(pdTrafficTyneWear.index).strftime('%d %B %Y')), fontsize=12)
    
    totalObservations = '{:,}'.format(int(pdTrafficTyneWear[pdTrafficRecentRelativeAuth.select_dtypes(plottableTypes).columns].sum().sum()))
    plt.figtext(
        0.05,
        -0.07,
        'Data is the median across %u monitoring points in %s. Each point is first considered individually relative to baseline data for that day of the week, calculated over the last six months. A total of %s\n' % (len(pdTrafficRecentRelativeAuth.columns), tyneWearAuthorities[authority], totalObservations) +
        'vehicle observations by automatic numberplate recognition cameras were used in the creation of these statistics. Urban Observatory (https://www.urbanobservatory.ac.uk/). Luke Smith <luke.smith@ncl.ac.uk>.',
        horizontalalignment='left',
        color='#606060',
        fontdict={'size': 11}
    )

    fig.show()

### Hull

Statistics in Hull are obtained from inductive loops buried in the road surface, used as part of the SCOOT algorithm that coordinates sets of traffic signals. The underlying data should be at five minute intervals, and represents an average vehicle flow rather than absolute number. It is aggregated to 15 minute intervals first and small gaps of up to 30 minutes are filled with interpolation. The 15 minute interval should be sufficient to smooth out differences caused by traffic light cycles themselves.

Data is provided by Hull City Council through their [open data portal](https://opendata.hullcc.gov.uk/). Thanks to Adam Jennison and his colleagues for helping to make this data available.

In [None]:
pdTrafficHull = pickle.load(open('../cache/hull-recent-traffic-volumes-pd.pkl', 'rb'))

# TODO: Make this reflect the last entry in the frame, not the time now
print('Last data obtained %s' 
    % (np.max(pdTrafficHull.index).strftime('%d %B %Y %H:%M')))

In [None]:
pdTrafficRecentRelativePc = traffic.makeRelativeToBaseline(pdTrafficHull, capMissingAllHours)
pdTrafficRecentRelativePcPeak = traffic.makeRelativeToBaseline(pdTrafficHull, capMissingSubHours * 2, peakHours)
pdTrafficRecentRelativePcInterPeak = traffic.makeRelativeToBaseline(pdTrafficHull, capMissingSubHours * 2, interPeakHours)

# pdTrafficRecentRelativePc

In [None]:
dfMedianPc = pdTrafficRecentRelativePc.median(axis=1)
dfMedianPc = dfMedianPc[np.isnan(dfMedianPc) == False]
dfMedianPc.index = dfMedianPc.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))

addAverage('Hull', dfMedianPc)
# Too much volatility in this SCOOT data
#addSeries('Hull', pdTrafficRecentRelativePc)
#addSeries('Hull', pdTrafficRecentRelativePcPeak, 'peak')
#addSeries('Hull', pdTrafficRecentRelativePcInterPeak, 'inter-peak')

# dfMedianPc

In [None]:
highlights = {
    'N42323B': 'Morrisons at\nHolderness Road',
    'N48111F': 'Tesco at\nGreenwood Ave',
    'N11131C': 'Hull Hospital\nCar Park',
    'N41113F': 'Mytongate\nRoundabout',
    'N10111B': 'Walton St\nWest Park',
    'N41243H': 'ASDA\nMount Pleasant'
}

plt, fig, ax = traffic.plotTraffic(pdTrafficRecentRelativePc, dfMedianPc, highlights)

plt.suptitle('Hull', fontsize=13, fontweight='bold')
ax.set_title('Traffic volumes relative to baseline of 7 February to 15 March 2020', fontsize=12)

plt.figtext(
    0.05,
    -0.07,
    'Data is the median across %u monitoring points in Hull used in SCOOT traffic signal control. Each point is first considered individually relative to baseline data for that day of the week, calculated 7 Feb - 15 Mar. \n' % len(pdTrafficRecentRelativePc.columns) +
    'Thanks to Adam Jennison at Hull City Council for providing data. Luke Smith <luke.smith@ncl.ac.uk>.',
    horizontalalignment='left',
    color='#606060',
    fontdict={'size': 11}
)

fig.show()

### Sheffield

Statistics in Sheffield are obtained from inductive loops buried in the road surface, used as part of the SCOOT algorithm that coordinates sets of traffic signals. The underlying data should be at five minute intervals, and represents an average vehicle flow rather than absolute number. It is aggregated to 15 minute intervals first and small gaps of up to 30 minutes are filled with interpolation. The 15 minute interval should be sufficient to smooth out differences caused by traffic light cycles themselves.

Data is provided by Sheffield City Council and archived by the [Sheffield Urban Flows Observatory](https://urbanflows.ac.uk/), part of the network of UKCRIC Urban Observatories.

In [None]:
pdTrafficSheffield = pickle.load(open('../cache/sheffield-recent-traffic-volumes-pd.pkl', 'rb'))

# TODO: Make this reflect the last entry in the frame, not the time now
print('Last data obtained %s' 
    % (np.max(pdTrafficSheffield.index).strftime('%d %B %Y %H:%M')))

In [None]:
pdTrafficRecentRelativePc = traffic.makeRelativeToBaseline(pdTrafficSheffield, capMissingAllHours)
pdTrafficRecentRelativePcPeak = traffic.makeRelativeToBaseline(pdTrafficSheffield, capMissingSubHours, peakHours)
pdTrafficRecentRelativePcInterPeak = traffic.makeRelativeToBaseline(pdTrafficSheffield, capMissingSubHours, interPeakHours)

#pdTrafficRecentRelativePc

dfMedianPc = pdTrafficRecentRelativePc.median(axis=1)
dfMedianPc = dfMedianPc[np.isnan(dfMedianPc) == False]
dfMedianPc.index = dfMedianPc.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))

addAverage('Sheffield', dfMedianPc)
addSeries('Sheffield', pdTrafficRecentRelativePc)
addSeries('Sheffield', pdTrafficRecentRelativePcPeak, 'peak')
addSeries('Sheffield', pdTrafficRecentRelativePcInterPeak, 'inter-peak')

#dfMedianPc

In [None]:
highlights = {
    '[SCC]1FJD2': 'Hallamshire\nHospital',
    '[SCC]DET005': 'Netherthorpe Rd\nBrook Hill',
    '[SCC]DET009': 'Parkway\nEastbound',
    '[SCC]DET002': 'Hanover Way\nA625',
    '[SCC]D502_1': 'Chapeltown\nCowley Ln'
}

plt, fig, ax = traffic.plotTraffic(pdTrafficRecentRelativePc, dfMedianPc, highlights)

plt.suptitle('Sheffield', fontsize=13, fontweight='bold')
ax.set_title('Traffic volumes relative to baseline of 1 January to 15 March 2020', fontsize=12)

"""plt.figtext(
    0.05,
    -0.07,
    'Data is the median across %u monitoring points in Hull used in SCOOT traffic signal control. Each point is first considered individually relative to baseline data for that day of the week, calculated 7 Feb - 15 Mar. \n' % len(pdTrafficRecentRelativePc.columns) +
    'Thanks to Adam Jennison at Hull City Council for providing data. Luke Smith <luke.smith@ncl.ac.uk>.',
    horizontalalignment='left',
    color='#606060',
    fontdict={'size': 11}
)
"""

fig.show()

### Average across all authorities

These plots includes a median taken from all of the individual monitoring points across all of the authorities above. When considering the median within an authority however, the level of monitoring varies substantially, so be aware that the median for areas such as Northumberland will be more sensitive to changes at a small number of locations.

The availability of historic data varies between authorities. A long baseline has been used where possible to calculate the percentage changes, but this will vary between a few weeks (e.g. Hull) and more than six months (e.g. Newcastle).

In [None]:
dfMedianAll = dfAllSeries['all'].median(axis=1)
dfMedianPeak = dfAllSeries['peak'].median(axis=1)
dfMedianInterPeak = dfAllSeries['inter-peak'].median(axis=1)

In [None]:
dfMedianAll.index = dfMedianAll.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))
dfMedianPeak.index = dfMedianPeak.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))
dfMedianInterPeak.index = dfMedianInterPeak.index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))

dfMedianPeak = dfMedianPeak.reindex(dfMedianAll.index)
dfMedianInterPeak = dfMedianInterPeak.reindex(dfMedianAll.index)

dfAllSeries['all'].index = dfAllSeries['all'].index.map(lambda d: datetime.datetime.combine(d, datetime.time.min).replace(tzinfo=tzLocal))

In [None]:
plt, fig, ax = traffic.plotTraffic(
    dfPlaceAverages, # dfAllSeries['all'].join(dfPlaceAverages),
    {
        'Median': dfMedianAll,
        'Peak (7-10, 16-19)': dfMedianPeak,
        'Inter-peak (10-16)': dfMedianInterPeak
    },
    { p: p for p in dfPlaceAverages.columns },
    True,
    normalLineAlpha=0.2
)

percentilesToPlot = [0.1, 0.25]
for i, pc in enumerate(percentilesToPlot):
    dfLower = dfAllSeries['inter-peak'].quantile(pc, axis=1)
    dfUpper = dfAllSeries['inter-peak'].quantile(1.0 - pc, axis=1)

    percentileLegend = ax.fill_between(
        dfLower.index,
        dfLower,
        dfUpper,
        color='#233067',
        linewidth=0,
        #edgecolor='red',
        alpha=0.1 if i == 0 else 0.15
    )
    
plt.suptitle('All authorities (Tyne and Wear, Hull and Sheffield)', fontsize=13, fontweight='bold')
ax.set_title('Traffic volumes relative to baselines prior to mandatory social distancing measures', fontsize=12)

plt.figlegend(
    [percentileLegend],
    ['Inter-peak percentiles at ' + ', '.join(map(lambda p: str(round(p * 100)), percentilesToPlot)) + '%'],
    loc='upper right',
    ncol=1,
    labelspacing=0,
    handletextpad=0.4,
    columnspacing=0.4
)

plt.figtext(
    0.05,
    -0.07,
    'Data is the median across %u monitoring points across Tyne and Wear, Hull, and Sheffield. Each point is first considered individually relative to baseline data for that day of the week, using varying dates depending on\n' % len(dfAllSeries['all'].columns) +
    'data availability. A mixture of SCOOT and ANPR data is used. Luke Smith <luke.smith@ncl.ac.uk>. Peak and inter-peak points are only provided for weekdays. Bank holidays and weekends are shaded.',
    horizontalalignment='left',
    color='#606060',
    fontdict={'size': 11}
)

#plt.savefig('../output/traffic-all-authorities.jpg', format='jpg', dpi=800)

fig.show()

In [None]:
dfSummary = dfPlaceAverages.join(dfMedianAll.to_frame(name='All authorities'))
dfSummary = dfSummary[dfSummary.index < dateToday]
dfSummary.to_pickle('../cache/all-traffic-relative.pkl')
dfSummary.to_csv('../output/all-traffic-relative.csv')
dfSummary.index = dfSummary.index.map(lambda d: d.strftime('%d %B (%A)'))

formattersSummary = {
    authority: '{:,.1f}%' for authority in dfSummary.columns
}
dfSummaryStyler = dfSummary.style \
    .format(formattersSummary) \
    .set_caption('All traffic data relative to baseline') \
    .set_table_styles(
        [dict(selector="th",props=[('text-align', 'center'), ('word-wrap', 'break-word')]),
         dict(selector="caption", props=[('font-weight', 'bold'), ('font-size', '120%')]),
         dict(selector="tr th:nth-child(1)", props=[('width', '140px'), ('max-width', '140px'), ('text-align', 'left')]),
         dict(selector="tr td", props=[('width', '75px')])
        ]
    )

dfSummaryStyler.bar(color='#FFA07A50', vmin=0, vmax=100.0)
dfSummaryStyler.apply(lambda data: ['border-top: 1px dashed black' if ('Saturday' in data.name) else '' for v in data], axis=1)
dfSummaryStyler.apply(lambda data: ['border-bottom: 1px dashed black' if  ('Sunday' in data.name) else '' for v in data], axis=1)

dfSummaryStyler