In [None]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime
import re
import gc
import time

In [None]:
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')

earliestData = datetime.datetime.strptime('2020-04-20T00:00:00Z', '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=tzUTC)
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzUTC)

In [None]:
# Fetch any previous data
previousDataEnd = None
pointTsByIntervalOld = {}
pdSourcesOld = None

try:
    pdSources = pd.read_pickle('../cache/recent-feature-counts-point-metadata.pkl')

    for interval in pdSources['interval'].unique():
        print('Loading data with %u second interval...' % interval)
        pointTsByIntervalOld[interval] = pd.read_pickle('../cache/recent-feature-counts-pd-%usec.pkl' % interval)
        intervalDataEnd = np.max(pointTsByIntervalOld[interval].index)

        if previousDataEnd is None or intervalDataEnd > previousDataEnd:
            previousDataEnd = intervalDataEnd
            
    print('Loaded previous data.')
    pdSourcesOld = pdSources

except:
    pdSources = None
    print('No existing data could be loaded.')
    
if previousDataEnd is None:
    previousDataEnd = earliestData
else:
    previousDataEnd = datetime.datetime.combine(previousDataEnd.date(), datetime.datetime.min.time()).replace(tzinfo=tzUTC)

for interval in pdSources['interval'].unique():
    pointTsByIntervalOld[interval] = pointTsByIntervalOld[interval][pointTsByIntervalOld[interval].index < previousDataEnd]
    
print('  Start reading from %s' % previousDataEnd)

In [None]:
visionApiBase = 'https://uo-vision.dev.urbanobservatory.ac.uk/stills/dict'
visionResponse = json.loads(
    urllib.request.urlopen(visionApiBase).read().decode('utf-8')
)

In [None]:
pdSources = pd.DataFrame.from_records(visionResponse).transpose()
pdSources['min_date'] = pdSources['min_date'].apply(lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').replace(tzinfo=tzUTC))
pdSources['max_date'] = pdSources['max_date'].apply(lambda d: datetime.datetime.strptime(d, '%Y-%m-%d').replace(tzinfo=tzUTC))
pdSources = pdSources.join(pdSourcesOld['interval'])

camerasByInterval = {}
pointTsByInterval = {}

pdSources

In [None]:
for cameraId in pdSources.index:
    #alreadyExists = False
    #for testInterval in pointTsByInterval:
    #    if pointTsByInterval[testInterval] is not None and (('%s: Source image' % cameraId) in pointTsByInterval[testInterval].columns):
    #        alreadyExists = True
            
    #if alreadyExists:
    #    continue
    
    source = pdSources[pdSources.index == cameraId].to_dict(orient='records')[0]
    
    print(cameraId)
    print('  [', end='')
    
    sourceTs = None
    
    for date in dateutil.rrule.rrule(
            dateutil.rrule.DAILY,
            interval=1,
            dtstart=source['min_date'] if source['min_date'] > previousDataEnd else previousDataEnd,
            until=source['max_date']
        ):
        
        windowResponse = None
        windowAttempts = 0
        while windowResponse is None:
            try:
                windowAttempts = windowAttempts + 1
                windowResponse = json.loads(
                    urllib.request.urlopen(
                      'https://uo-vision.dev.urbanobservatory.ac.uk/stills/counts?location=%s&date=%s' % (cameraId, date.isoformat()[0:10])
                    ).read().decode('utf-8')
                )
            except:
                windowAttempts = windowAttempts + 1
                print('x', end='')
                time.sleep(min(windowAttempts, 10))

        sourceOnDay = pd.DataFrame.from_records(pd.json_normalize(windowResponse), index=['ts'])
        if not sourceOnDay.empty:
            sourceOnDay.index = sourceOnDay.index.to_series().apply(lambda t: datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S').replace(tzinfo=tzUTC))
        
        if len(sourceOnDay.columns) == 0:
            continue
        
        if sourceTs is None:
            sourceTs = sourceOnDay
        else:
            sourceTs = sourceTs.append(sourceOnDay)
        
        print('.', end='')
    
    if sourceTs is None:
        continue
    
    sourceTs.drop(columns=['camera'], inplace=True)
    sourceTs.rename(inplace=True, errors='ignore', columns={
        'url': 'Source image',
        'counts.bus': 'Bus',
        'counts.car': 'Car',
        'counts.cyclist': 'Cyclist',
        'counts.motorcyclist': 'Motorcyclist',
        'counts.person': 'Person',
        'counts.truck': 'Truck',
        'counts.van': 'Van'
    })
    
    sourceTs = sourceTs.loc[~sourceTs.index.duplicated(keep='first')]
    tsInterval = sourceTs.index.to_series().diff().median().seconds
    
    if 'interval' in source:
        sourceInterval = source['interval']
    elif np.isnan(tsInterval) == True:
        continue
    else:
        sourceInterval = min(600, round(tsInterval / 60) * 60)
    camerasByInterval[cameraId] = sourceInterval

    sourceTsNumeric = sourceTs.resample('%us' % sourceInterval).nearest().drop(columns=['Source image'], errors='ignore').fillna(0)
    sourceTs = sourceTsNumeric.join(sourceTs['Source image'].resample('%us' % sourceInterval).nearest())
    
    sourceTs = sourceTs.add_prefix('%s: ' % cameraId)
    
    print('] Interval %u seconds' % sourceInterval)
    
    if sourceInterval not in pointTsByInterval:
        pointTsByInterval[sourceInterval] = None
    
    if pointTsByInterval[sourceInterval] is None:
        pointTsByInterval[sourceInterval] = sourceTs
    else:
        if ('%s: Source image' % cameraId) in pointTsByInterval[sourceInterval].columns:
            pointTsByInterval[sourceInterval] = pointTsByInterval[sourceInterval].concat(sourceTs, sort=True, axis='index')
        else:        
            pointTsByInterval[sourceInterval] = pointTsByInterval[sourceInterval].join(sourceTs)

In [None]:
for interval in pointTsByInterval:
    if interval in pointTsByIntervalOld:
        pointTsByInterval[interval] = pointTsByIntervalOld[interval].append(pointTsByInterval[interval])

In [None]:
pointTsByInterval[60]

In [None]:
pdSources.drop(columns=['interval'], inplace=True, errors='ignore')
pdSourcesWithInterval = pdSources.join(pd.DataFrame.from_dict(camerasByInterval, orient='index', columns=['interval']))
pdSourcesWithInterval.to_pickle('../cache/recent-feature-counts-point-metadata.pkl')
pdSourcesWithInterval.to_csv('../output/recent-feature-counts-point-metadata.csv')
pdSourcesWithInterval

for interval in pointTsByInterval:
    pointTsByInterval[interval].to_pickle('../cache/recent-feature-counts-pd-%usec.pkl' % interval)
    pointTsByInterval[interval].to_csv('../output/recent-feature-counts-pd-%usec.csv' % interval)

In [None]:
# Use for testing only...

#pdSourcesAll = None

#for interval in [60, 120, 300]:
#    # Per hour...
#    pdResampled = (pointTsByInterval[interval].resample('1800s').sum() * (1800 / interval) / 30)
#    if pdSourcesAll is None:
#        pdSourcesAll = pdResampled
#    else:
#        pdSourcesAll = pdSourcesAll.join(pdResampled)

#pdSourcesAll.groupby(axis='columns', by=lambda x: x[x.find(':') + 1:]).sum().plot(figsize=(35, 15), stacked=False, legend=True)


#pdSourcesAll[list(filter(lambda cn: 'Car' in cn, pdSourcesAll.columns))] #.plot(figsize=(35, 6.5), stacked=True, legend=False)
        
#ax = (dfPointTs[list(filter(lambda cn: 'Person' in cn, dfPointTs.columns))].resample('900s').mean() / 15).sum(axis=1).plot(figsize=(35, 6.5))
#ax = (dfPointTs[list(filter(lambda cn: 'Person' in cn, dfPointTs.columns))].resample('900s').mean() / 15).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Pedestrians per minute')
#ax = (dfPointTs[list(filter(lambda cn: 'Car' in cn, dfPointTs.columns))].resample('900s').mean() / 15).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Cars per minute')
#ax = (dfPointTs[list(filter(lambda cn: 'Bus' in cn, dfPointTs.columns))].resample('3600s').mean()).plot(figsize=(35, 6.5), stacked=True, legend=False)
#ax.set_ylabel('Buses per hour')