In [1]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime
import re
import gc

In [2]:
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')

# Used across all of the plots
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzUTC)
baselineEnd = datetime.datetime.strptime('2020-03-16T23:59:59Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzUTC)

resampleFrequency = 480

In [3]:
# Doesn't do anything with old data yet, because of the large number of gaps in recent...
try:
    dfPointInterpTsOld = pd.read_pickle('../cache/recent-traffic-volumes-pd.pkl')
    dfPointInterpTsOld = dfPointInterpTsOld[dfPointInterpTsOld.index < dateToday - pd.Timedelta(days=5)]
    previousDataEnd = np.max(dfPointInterpTsOld.index).replace(tzinfo=tzLocal).astimezone(tzUTC)
    print('Loaded previous data.')
    print('  %s' % previousDataEnd)
except:
    dfPointInterpTsOld = None
    previousDataEnd = baselineEnd
    print('No existing data could be loaded.')

Loaded previous data.
  2020-04-21 23:56:00+00:00


In [4]:
# Identify all of the journey time pair links

print('Last updated %s' % (datetime.datetime.now(tzLocal).strftime('%d %B %Y %H:%M')))

anprRequestBase = 'https://api.newcastle.urbanobservatory.ac.uk/api/v2/sensors/entity'

# Fetch a list of all the car parks...
anprLinks = {}
anprRequestPage = 1
anprResponse = None

anprNameMatcher = re.compile('^(.*) - (.*) to (.*)$')

while anprResponse is None or len(anprResponse) > 1:
    anprResponse = json.loads(
        urllib.request.urlopen(
            '%s?name="Vehicle%%20monitoring%%20pair%%20"&page=%u' % (anprRequestBase, anprRequestPage)
        ).read().decode('utf-8')
    )['items']

    anprRequestPage = anprRequestPage + 1

    for journeyTimeLink in anprResponse:     
        for feed in journeyTimeLink['feed']:
            systemCodeNumber = feed['brokerage'][0]['sourceId'].split(':')[0]
            
            # Some links are set up to only count bus numberplates for public transport journey times
            if systemCodeNumber.endswith('_BUS') or \
               'latest' not in feed['timeseries'][0]:
                continue
            
            if not systemCodeNumber in anprLinks:
                linkDescription = journeyTimeLink['meta'].copy()
                linkDescription['timeseriesIRIs'] = {}
                anprLinks[systemCodeNumber] = linkDescription
                print('Discovered monitoring link "%s"' % anprLinks[systemCodeNumber]['longName'].strip())
            
            anprLinks[systemCodeNumber]['systemCodeNumber'] = systemCodeNumber
            
            for ts in feed['timeseries']:
                timeseriesType = None
                
                if feed['metric'] == 'Journey time':
                    timeseriesType = 'timeseriesJourneyTime'
                elif feed['metric'] == 'Number plates at start of link':
                    timeseriesType = 'timeseriesPlatesIn'
                elif feed['metric'] == 'Number plates at end of link':
                    timeseriesType = 'timeseriesPlatesOut'
                    
                for link in ts['links']:
                    if link['rel'] == 'archives' and timeseriesType is not None:
                        anprLinks[systemCodeNumber]['timeseriesIRIs'][timeseriesType] = link['href']
            
            nameElements = anprNameMatcher.match(anprLinks[systemCodeNumber]['longName'])

            if nameElements is None:
                print('Unable to match name "%s". Skipping.' % anprLinks[systemCodeNumber]['longName'])
                del anprLinks[systemCodeNumber]
                continue

            anprLinks[systemCodeNumber]['highwayDescription'] = nameElements[1]
            anprLinks[systemCodeNumber]['startDescription'] = nameElements[2]
            anprLinks[systemCodeNumber]['endDescription'] = nameElements[3]

anprLinks = pd.DataFrame.from_records(list(anprLinks.values()), index=['systemCodeNumber'])
print('Discovered %u ANPR pairs.' % len(anprLinks.index))

anprLinks

Last updated 27 April 2020 09:00
Discovered monitoring link "A1 (Northbound) - A69/Hillhead Road to J77 A696/A167 Kingston Park"
Discovered monitoring link "A1 (Northbound) - Felton to Rosebrough"
Discovered monitoring link "A1 (Northbound) - J75 A69/A186 Denton Burn to J77 A696/A167 Kingston Park"
Discovered monitoring link "A1 (Northbound) - J77 A696/A167 Kingston Park to J80 A19 Seaton Burn"
Discovered monitoring link "A1 (Northbound) - J80 A19 Seaton Burn to Felton"
Discovered monitoring link "A1 (Southbound) - Felton to J80 A19 Seaton Burn"
Discovered monitoring link "A1 (Southbound) - J77 A696/A167 Kingston Park to A69/Hillhead Road"
Discovered monitoring link "A1 (Southbound) - J77 A696/A167 Kingston Park to J75 A69/A186 Denton Burn"
Discovered monitoring link "A1 (Southbound) - J80 A19 Seaton Burn to J77 A696/A167 Kingston Park"
Discovered monitoring link "A1 (Southbound) - Rosebrough to Felton"
Discovered monitoring link "A1018 North Bridge Street (Northbound) - A183 Dame Doro

Unnamed: 0_level_0,longName,shortName,endEasting,endNorthing,startEasting,startNorthing,timeseriesIRIs,highwayDescription,startDescription,endDescription
systemCodeNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ANPR_Measurement_Site_30070291,A1 (Northbound) - A69/Hillhead Road to J77 A69...,A1 NB A69 - J77 A696,420982,567494,419121,565865,{'timeseriesJourneyTime': 'https://api.newcast...,A1 (Northbound),A69/Hillhead Road,J77 A696/A167 Kingston Park\r
ANPR_Measurement_Site_30070627,A1 (Northbound) - Felton to Rosebrough,A1 NB Felton - Rosebrough,414285,626783,417411,598994,{'timeseriesJourneyTime': 'https://api.newcast...,A1 (Northbound),Felton,Rosebrough
ANPR_Measurement_Site_30070289,A1 (Northbound) - J75 A69/A186 Denton Burn to ...,A1 NB J75 A69 - J77 A696,420982,567494,419805,565819,{'timeseriesJourneyTime': 'https://api.newcast...,A1 (Northbound),J75 A69/A186 Denton Burn,J77 A696/A167 Kingston Park
ANPR_Measurement_Site_30070530,A1 (Northbound) - J77 A696/A167 Kingston Park ...,A1 NB J77 A696 - J80 A19,423107,575079,420982,567494,{'timeseriesJourneyTime': 'https://api.newcast...,A1 (Northbound),J77 A696/A167 Kingston Park,J80 A19 Seaton Burn
ANPR_Measurement_Site_30070625,A1 (Northbound) - J80 A19 Seaton Burn to Felton,A1 NB J80 A19 - Felton,417411,598994,423107,575079,{'timeseriesJourneyTime': 'https://api.newcast...,A1 (Northbound),J80 A19 Seaton Burn,Felton
...,...,...,...,...,...,...,...,...,...,...
CAJT_SLHYLTONRD_3_2,Hylton Road (Eastbound) - St. Luke's Road to K...,Hylton Rd EB SLukes Rd - Kyll Rd,437742,556728,436004,556343,{'timeseriesJourneyTime': 'https://api.newcast...,Hylton Road (Eastbound),St. Luke's Road,Kayll Road
CAJT_SLHYLTONRD_2_3,Hylton Road (Westbound) - Kayll Road to St. Lu...,Hylton Rd WB Kyll Rd - SLukes Rd,436031,556353,437773,556735,{'timeseriesJourneyTime': 'https://api.newcast...,Hylton Road (Westbound),Kayll Road,St. Luke's Road
CAJT_SLHYLTONRD_1_2,Hylton Road (Westbound) - Lime Street to Kayll...,Hylton Rd WB Lime St - Kayll Rd,437773,556735,438620,557059,{'timeseriesJourneyTime': 'https://api.newcast...,Hylton Road (Westbound),Lime Street,Kayll Road
CAJT_GHA1114_TEAMST1_CR2,A1114/Team Street (Westbound) - Autumn Drive t...,Team St WB Autumn Dr - Kgfshr Ct,422961,562553,424033,562458,{'timeseriesJourneyTime': 'https://api.newcast...,A1114/Team Street (Westbound),Autumn Drive,Kingfisher Court


In [5]:
# Convert links into a list of measurement points

anprPoints = {}
anprDirectionMatcher = re.compile('^.*\((.*)\)$')
anprDuplicateRegister = []

for systemCodeNumber in anprLinks.index:
    linkDefinition = anprLinks[anprLinks.index == systemCodeNumber]
    for end in ['start', 'end']:
        coordinates = (linkDefinition[end + 'Easting'].values[0], linkDefinition[end + 'Northing'].values[0])
        pointDescription = linkDefinition[end + 'Description'].values[0].strip()
        highwayDescription = linkDefinition['highwayDescription'].values[0].strip()
        countDirection = anprDirectionMatcher.match(linkDefinition['highwayDescription'].values[0].strip())
        vectorId = '%s (%s)' % (coordinates, countDirection)
        
        if countDirection is None:
            print('Unable to find direction in description "%s"' % highwayDescription)
            countDirection = 'Unknown'
        else:
            countDirection = countDirection[1]
        
        #print(coordinates, pointDescription, highwayDescription)
        
        if vectorId in anprPoints:
            anprPoints[vectorId]['linkCount'] = anprPoints[vectorId]['linkCount'] + 1
            anprDuplicateRegister.append({
                'originalId': systemCodeNumber,
                'originalEnd': end,
                'isAlias': True,
                'timeseriesName': anprPoints[vectorId]['timeseriesName']
            })
        else:
            vehicleCountName = '%s.%s.%s' % (systemCodeNumber, end, countDirection.lower())
            anprPoints[vectorId] = {
                'systemCodeNumber': systemCodeNumber,
                'end': end,
                'timeseriesName': vehicleCountName,
                'pointDescription': pointDescription,
                'highwayDescription': highwayDescription,
                'easting': linkDefinition[end + 'Easting'].values[0],
                'northing': linkDefinition[end + 'Northing'].values[0],
                'linkCount': 1
            }
            anprDuplicateRegister.append({
                'alternativeId': systemCodeNumber,
                'originalEnd': end,
                'isAlias': False,
                'timeseriesName': anprPoints[vectorId]['timeseriesName']
            })
            
anprPoints = pd.DataFrame.from_records(list(anprPoints.values()))

print('Found %u unique monitoring points.' % len(anprPoints.index))
anprPoints.head(40)

Found 389 unique monitoring points.


Unnamed: 0,systemCodeNumber,end,timeseriesName,pointDescription,highwayDescription,easting,northing,linkCount
0,ANPR_Measurement_Site_30070291,start,ANPR_Measurement_Site_30070291.start.northbound,A69/Hillhead Road,A1 (Northbound),419121,565865,1
1,ANPR_Measurement_Site_30070291,end,ANPR_Measurement_Site_30070291.end.northbound,J77 A696/A167 Kingston Park,A1 (Northbound),420982,567494,3
2,ANPR_Measurement_Site_30070627,start,ANPR_Measurement_Site_30070627.start.northbound,Felton,A1 (Northbound),417411,598994,2
3,ANPR_Measurement_Site_30070627,end,ANPR_Measurement_Site_30070627.end.northbound,Rosebrough,A1 (Northbound),414285,626783,1
4,ANPR_Measurement_Site_30070289,start,ANPR_Measurement_Site_30070289.start.northbound,J75 A69/A186 Denton Burn,A1 (Northbound),419805,565819,1
5,ANPR_Measurement_Site_30070530,end,ANPR_Measurement_Site_30070530.end.northbound,J80 A19 Seaton Burn,A1 (Northbound),423107,575079,2
6,ANPR_Measurement_Site_30071024,start,ANPR_Measurement_Site_30071024.start.southbound,Felton,A1 (Southbound),417416,598997,2
7,ANPR_Measurement_Site_30071024,end,ANPR_Measurement_Site_30071024.end.southbound,J80 A19 Seaton Burn,A1 (Southbound),423113,575096,2
8,ANPR_Measurement_Site_30070678,start,ANPR_Measurement_Site_30070678.start.southbound,J77 A696/A167 Kingston Park,A1 (Southbound),421018,567521,3
9,ANPR_Measurement_Site_30070678,end,ANPR_Measurement_Site_30070678.end.southbound,A69/Hillhead Road,A1 (Southbound),419204,565816,1


In [6]:
pdAnprDuplicateRegister = pd.DataFrame.from_records(anprDuplicateRegister)
pdAnprDuplicateRegister

Unnamed: 0,alternativeId,originalEnd,isAlias,timeseriesName,originalId
0,ANPR_Measurement_Site_30070291,start,False,ANPR_Measurement_Site_30070291.start.northbound,
1,ANPR_Measurement_Site_30070291,end,False,ANPR_Measurement_Site_30070291.end.northbound,
2,ANPR_Measurement_Site_30070627,start,False,ANPR_Measurement_Site_30070627.start.northbound,
3,ANPR_Measurement_Site_30070627,end,False,ANPR_Measurement_Site_30070627.end.northbound,
4,ANPR_Measurement_Site_30070289,start,False,ANPR_Measurement_Site_30070289.start.northbound,
...,...,...,...,...,...
473,,end,True,CAJT_SLHYLTONRD_2_3.start.westbound,CAJT_SLHYLTONRD_1_2
474,CAJT_GHA1114_TEAMST1_CR2,start,False,CAJT_GHA1114_TEAMST1_CR2.start.westbound,
475,CAJT_GHA1114_TEAMST1_CR2,end,False,CAJT_GHA1114_TEAMST1_CR2.end.westbound,
476,CAJT_GHA1114_CR2_TEAMST1,start,False,CAJT_GHA1114_CR2_TEAMST1.start.eastbound,


In [7]:
dfLinkHistoric = pd.read_pickle('../cache/baseline-traffic-volumes-pd.pkl')
dfLinkHistoric.index = dfLinkHistoric.index.tz_localize(tzUTC).tz_convert(tzLocal)
dfLinkHistoric.sort_index(inplace=True)
gc.collect()

dfLinkHistoric.head(5)

Unnamed: 0_level_0,CAJT_GHA1114_CR2_HD3.platesIn,CAJT_GHA1114_CR2_TEAMST1.platesIn,CAJT_GHA1114_HD3_CR2.platesIn,CAJT_GHA1114_TEAMST1_CR2.platesIn,CAJT_GHA167_DR1_DR2.platesIn,CAJT_GHA167_DR1_TB.platesIn,CAJT_GHA167_DR2A_DR2.platesIn,CAJT_GHA167_DR2_DR1.platesIn,CAJT_GHA167_DR2_DR3.platesIn,CAJT_GHA167_DR3_DR2.platesIn,...,CAJT_STA194_TD1_LL2.platesOut,CAJT_STA194_TD1_NR3.platesOut,CAJT_STA194_TD1_WR5.platesOut,CAJT_STA194_WR5_TD1.platesOut,CAJT_STB1298_SR1_SR2.platesOut,CAJT_STB1298_SR1_WR5.platesOut,CAJT_STB1298_SR2_SR1.platesOut,CAJT_STB1298_SR2_SR3.platesOut,CAJT_STB1298_SR3_SR2.platesOut,CAJT_STB1298_WR5_SR1.platesOut
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-01 01:00:00+01:00,2.0,2.0,6.0,0.0,2.0,3.0,5.0,1.0,0.0,4.0,...,12.0,0.0,0.0,11.0,2.0,0.0,4.0,5.0,4.0,2.0
2019-07-01 01:04:00+01:00,0.0,2.0,2.0,3.0,4.0,3.0,3.0,0.0,3.0,7.0,...,12.0,0.0,0.0,11.0,6.0,0.0,1.0,9.0,0.0,6.0
2019-07-01 01:08:00+01:00,2.0,1.0,2.0,3.0,2.0,5.0,1.0,0.0,0.0,5.0,...,4.0,1.0,0.0,6.0,1.0,0.0,2.0,2.0,2.0,4.0
2019-07-01 01:12:00+01:00,4.0,0.0,0.0,4.0,3.0,2.0,9.0,0.0,2.0,3.0,...,6.0,1.0,0.0,5.0,1.0,0.0,0.0,4.0,1.0,0.0
2019-07-01 01:16:00+01:00,0.0,1.0,2.0,2.0,3.0,4.0,5.0,1.0,3.0,5.0,...,4.0,0.0,0.0,9.0,3.0,0.0,2.0,2.0,0.0,3.0


In [8]:
daysPerRequest = 30

dfPointTs = None

for pointIndex in anprPoints.index:
    pointRow = anprPoints[anprPoints.index == pointIndex]
    point = pointRow.to_dict(orient='records')[0]
    systemCodeNumber = point['systemCodeNumber']
    linkRow = anprLinks[anprLinks.index == systemCodeNumber]
    link = linkRow.to_dict(orient='records')[0]
    linkIRIs = link['timeseriesIRIs']
    
    #vehicleCountName = '%s at %s' % (point['highwayDescription'], point['pointDescription'])
    vehicleCountName = point['timeseriesName'] # '%s.%s' % (point['systemCodeNumber'], point['end'])
    vehicleCountIRIRequired = 'timeseriesPlatesIn' if point['end'] == 'start' else 'timeseriesPlatesOut'
    
    historicColumn = '%s.%s' % (
        systemCodeNumber,
        'platesIn' if point['end'] == 'start' else 'platesOut'
    )
    
    if vehicleCountIRIRequired not in linkIRIs:
        print('No data available for %s' % vehicleCountName)
        continue
    
    vehicleCountIRI = linkIRIs[vehicleCountIRIRequired]
    
    print(vehicleCountName)
    print('  [', end='')
    
    # TODO: Load the base data here instead
    pointTimeseries = None
    
    for windowStart in dateutil.rrule.rrule(
        dateutil.rrule.DAILY,
        interval=daysPerRequest,
        dtstart=previousDataEnd + pd.Timedelta(seconds=1),
        until=dateToday + pd.Timedelta(hours=24)
    ):
        windowEnd = windowStart + pd.Timedelta(days=daysPerRequest) - pd.Timedelta(seconds=1)

        if windowEnd > dateToday + pd.Timedelta(hours=24):
            windowEnd = dateToday + pd.Timedelta(hours=24)
            
        windowResponse = json.loads(
            urllib.request.urlopen(
              '%s?startTime=%s&endTime=%s' % (vehicleCountIRI, windowStart.isoformat().replace('+00:00', 'Z'), windowEnd.isoformat().replace('+00:00', 'Z'))
            ).read().decode('utf-8')
        )['historic']['values']

        if pointTimeseries is None:
            pointTimeseries = windowResponse
        else:
            pointTimeseries.extend(windowResponse)
        print('.', end='')
        
    print(']')
    
    if np.sum(list(map(lambda v: v['value'], pointTimeseries))) < 1:
        print('Empty timeseries returned for %s' % vehicleCountName)
        continue
    
    #if pointTimeseries is not None:
    #    break

    dfPoint = pd.DataFrame \
        .from_records(pointTimeseries, exclude=['duration']) \
        .rename(columns={'value': vehicleCountName})
    dfPoint['time'] = dfPoint['time'].apply(lambda t: datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=tzUTC).astimezone(tzLocal))
    dfPoint.set_index('time', inplace=True, drop=True)
    
    if dfPointInterpTsOld is not None and vehicleCountName in dfPointInterpTsOld.columns:
        dfPoint = pd.concat([
            dfPointInterpTsOld[vehicleCountName],
            dfPoint[vehicleCountName]
        ]).to_frame()
    elif historicColumn in dfLinkHistoric.columns:
        dfPoint = pd.concat([
            dfLinkHistoric.rename(columns={ historicColumn: vehicleCountName })[vehicleCountName],
            dfPoint[vehicleCountName]
        ]).to_frame()
    
    # Underlying data is 4 minute blocks, make it 8 minutes
    pointDataStart = np.min(dfPoint.index)
    dfPoint = dfPoint.resample('480s', kind='timestamp', base=0).sum()
    gc.collect()
    
    if dfPointTs is None:
        dfPointTs = dfPoint
    else:
        dfPointTs = dfPointTs.join(
            dfPoint, 
            how='outer',
            rsuffix=' (%s)' % systemCodeNumber
        )
        dfPoint = None
    
    gc.collect()
    
dfPointTs.sort_index(inplace=True)

No data available for ANPR_Measurement_Site_30070291.start.northbound
No data available for ANPR_Measurement_Site_30070291.end.northbound
No data available for ANPR_Measurement_Site_30070627.start.northbound
No data available for ANPR_Measurement_Site_30070627.end.northbound
No data available for ANPR_Measurement_Site_30070289.start.northbound
No data available for ANPR_Measurement_Site_30070530.end.northbound
No data available for ANPR_Measurement_Site_30071024.start.southbound
No data available for ANPR_Measurement_Site_30071024.end.southbound
No data available for ANPR_Measurement_Site_30070678.start.southbound
No data available for ANPR_Measurement_Site_30070678.end.southbound
No data available for ANPR_Measurement_Site_30070677.end.southbound
No data available for ANPR_Measurement_Site_30071023.start.southbound
CAJT_SLA1018_NBS1_WW1.start.northbound
  [.]
Empty timeseries returned for CAJT_SLA1018_NBS1_WW1.start.northbound
CAJT_SLA1018_NBS1_WW1.end.northbound
  [.]
CAJT_SLA1018_NB

In [9]:
dfPointTs.tail(10)

Unnamed: 0_level_0,CAJT_SLA1018_NBS1_WW1.end.northbound,CAJT_SLA1018_NBS1_NR2.end.northbound,CAJT_STA1018_NR3_GR4.start.northbound,CAJT_SLA1018_IS3_WW.start.northbound,CAJT_SLA1018_CK4_IS3.start.northbound,CAJT_SLA1018_CK4_IS3.end.northbound,CAJT_SLA1018_NR2_NR3.start.northbound,CAJT_SLA1018_NR2_NR3.end.northbound,CAJT_SLA1018_WW1_NBS1.start.southbound,CAJT_SLA1018_NR3_NR2.start.southbound,...,CAJT_NCCOACHLN_COACH2_REDHL1.start.southbound,CAJT_NCCOACHLN_COACH2_REDHL1.end.southbound,CAJT_SLHYLTONRD_2_1.end.eastbound,CAJT_SLHYLTONRD_3_2.start.eastbound,CAJT_SLHYLTONRD_2_3.end.westbound,CAJT_SLHYLTONRD_1_2.start.westbound,CAJT_GHA1114_TEAMST1_CR2.start.westbound,CAJT_GHA1114_TEAMST1_CR2.end.westbound,CAJT_GHA1114_CR2_TEAMST1.start.eastbound,CAJT_GHA1114_CR2_TEAMST1.end.eastbound
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-27 07:52:00+01:00,12.0,10.0,17.0,34.0,24.0,34.0,10.0,20.0,26.0,43.0,...,16.0,32.0,27.0,14.0,3.0,31.0,10.0,12.0,12.0,23.0
2020-04-27 08:00:00+01:00,19.0,14.0,17.0,37.0,34.0,37.0,14.0,14.0,19.0,52.0,...,12.0,18.0,23.0,8.0,4.0,32.0,10.0,8.0,5.0,18.0
2020-04-27 08:08:00+01:00,17.0,20.0,19.0,43.0,21.0,43.0,20.0,19.0,24.0,51.0,...,12.0,33.0,27.0,12.0,8.0,30.0,8.0,6.0,4.0,17.0
2020-04-27 08:16:00+01:00,21.0,11.0,21.0,39.0,23.0,39.0,11.0,22.0,20.0,34.0,...,7.0,26.0,16.0,10.0,7.0,19.0,7.0,8.0,12.0,15.0
2020-04-27 08:24:00+01:00,10.0,7.0,14.0,42.0,32.0,42.0,7.0,13.0,28.0,50.0,...,14.0,25.0,21.0,9.0,5.0,31.0,14.0,12.0,9.0,21.0
2020-04-27 08:32:00+01:00,20.0,21.0,17.0,35.0,26.0,35.0,21.0,17.0,27.0,39.0,...,14.0,25.0,20.0,6.0,10.0,28.0,8.0,11.0,8.0,10.0
2020-04-27 08:40:00+01:00,15.0,15.0,20.0,30.0,13.0,30.0,15.0,22.0,31.0,60.0,...,10.0,23.0,19.0,12.0,5.0,35.0,11.0,2.0,10.0,13.0
2020-04-27 08:48:00+01:00,19.0,22.0,19.0,37.0,20.0,37.0,22.0,17.0,33.0,56.0,...,21.0,36.0,24.0,10.0,5.0,18.0,9.0,10.0,11.0,13.0
2020-04-27 08:56:00+01:00,20.0,11.0,20.0,18.0,13.0,18.0,17.0,20.0,32.0,57.0,...,15.0,25.0,25.0,13.0,3.0,17.0,11.0,9.0,15.0,16.0
2020-04-27 09:04:00+01:00,,,,,,,,,,,...,6.0,15.0,9.0,2.0,4.0,17.0,4.0,7.0,3.0,4.0


In [10]:
# Deduplicate the index
dfPointTs = dfPointTs.loc[~dfPointTs.index.duplicated(keep='first')]
dfPointTs

Unnamed: 0_level_0,CAJT_SLA1018_NBS1_WW1.end.northbound,CAJT_SLA1018_NBS1_NR2.end.northbound,CAJT_STA1018_NR3_GR4.start.northbound,CAJT_SLA1018_IS3_WW.start.northbound,CAJT_SLA1018_CK4_IS3.start.northbound,CAJT_SLA1018_CK4_IS3.end.northbound,CAJT_SLA1018_NR2_NR3.start.northbound,CAJT_SLA1018_NR2_NR3.end.northbound,CAJT_SLA1018_WW1_NBS1.start.southbound,CAJT_SLA1018_NR3_NR2.start.southbound,...,CAJT_NCCOACHLN_COACH2_REDHL1.start.southbound,CAJT_NCCOACHLN_COACH2_REDHL1.end.southbound,CAJT_SLHYLTONRD_2_1.end.eastbound,CAJT_SLHYLTONRD_3_2.start.eastbound,CAJT_SLHYLTONRD_2_3.end.westbound,CAJT_SLHYLTONRD_1_2.start.westbound,CAJT_GHA1114_TEAMST1_CR2.start.westbound,CAJT_GHA1114_TEAMST1_CR2.end.westbound,CAJT_GHA1114_CR2_TEAMST1.start.eastbound,CAJT_GHA1114_CR2_TEAMST1.end.eastbound
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-01 00:56:00+01:00,1.0,5.0,1.0,2.0,5.0,2.0,5.0,1.0,0.0,2.0,...,2.0,0.0,4.0,3.0,1.0,4.0,0.0,2.0,2.0,2.0
2019-07-01 01:04:00+01:00,3.0,3.0,5.0,8.0,2.0,8.0,3.0,6.0,4.0,2.0,...,2.0,3.0,3.0,1.0,4.0,8.0,6.0,2.0,3.0,5.0
2019-07-01 01:12:00+01:00,2.0,8.0,6.0,5.0,1.0,5.0,8.0,5.0,1.0,4.0,...,1.0,3.0,2.0,3.0,3.0,4.0,6.0,4.0,1.0,2.0
2019-07-01 01:20:00+01:00,2.0,3.0,2.0,4.0,2.0,4.0,3.0,2.0,2.0,5.0,...,3.0,3.0,7.0,0.0,2.0,6.0,5.0,2.0,2.0,5.0
2019-07-01 01:28:00+01:00,5.0,7.0,4.0,7.0,3.0,7.0,7.0,5.0,3.0,2.0,...,5.0,2.0,5.0,1.0,1.0,7.0,7.0,3.0,4.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-27 08:32:00+01:00,20.0,21.0,17.0,35.0,26.0,35.0,21.0,17.0,27.0,39.0,...,14.0,25.0,20.0,6.0,10.0,28.0,8.0,11.0,8.0,10.0
2020-04-27 08:40:00+01:00,15.0,15.0,20.0,30.0,13.0,30.0,15.0,22.0,31.0,60.0,...,10.0,23.0,19.0,12.0,5.0,35.0,11.0,2.0,10.0,13.0
2020-04-27 08:48:00+01:00,19.0,22.0,19.0,37.0,20.0,37.0,22.0,17.0,33.0,56.0,...,21.0,36.0,24.0,10.0,5.0,18.0,9.0,10.0,11.0,13.0
2020-04-27 08:56:00+01:00,20.0,11.0,20.0,18.0,13.0,18.0,17.0,20.0,32.0,57.0,...,15.0,25.0,25.0,13.0,3.0,17.0,11.0,9.0,15.0,16.0


In [11]:
dfPointTs.to_pickle('../cache/recent-traffic-volumes-pd.pkl')
anprLinks.to_pickle('../cache/recent-traffic-volumes-link-metadata-pd.pkl')
anprPoints.to_pickle('../cache/recent-traffic-volumes-point-metadata-pd.pkl')
pdAnprDuplicateRegister.to_pickle('../cache/recent-traffic-volumes-point-alternative-ids.pkl')

In [12]:
# Sorry folks, but it's an >80MB file otherwise
dfPointTs.resample('960s').sum().to_csv('../output/t&w-anpr-volumes-pd-16min.csv')
anprLinks.to_csv('../output/t&w-anpr-volumes-link-metadata-pd.csv')
anprPoints.to_csv('../output/t&w-anpr-volumes-point-metadata-pd.csv')
pdAnprDuplicateRegister.to_csv('../output/t&w-anpr-volumes-point-metadata-original-id-mapping.csv')