In [None]:
import pandas as pd
import numpy as np
import json
import math
import pickle
import urllib.request
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime
import sys
import gc
import re

In [None]:
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')

dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())

# Load in baseline data that's obtained month-by-month in the baseline data script
peopleDataRaw = pickle.load(open('../cache/baseline-pedestrian-flow-api-json.pkl', 'rb'))

In [None]:
peopleDataRequestSince = 0
peopleDataBaselineStart = sys.maxsize

for sensor in peopleDataRaw:
    for variable in sensor['data']:
        for record in sensor['data'][variable]:
            if record['Timestamp'] > peopleDataRequestSince:
                peopleDataRequestSince = record['Timestamp']
            if record['Timestamp'] < peopleDataBaselineStart:
                peopleDataBaselineStart = record['Timestamp']
            
peopleDataRequestSince = datetime.datetime.fromtimestamp(peopleDataRequestSince / 1000)
peopleDataBaselineStart = datetime.datetime.fromtimestamp(peopleDataBaselineStart / 1000)

print('Using baseline profile data for each cross line from %s until %s' % (peopleDataBaselineStart, peopleDataRequestSince))

In [None]:
# Add to the baseline data the most recent data
peopleRequestBase = 'https://newcastle.urbanobservatory.ac.uk/api/v1.1/sensors/data/json/'
peopleRequestVariables = [
    'Walking North East',
    'Walking North West',
    'Walking South East',
    'Walking North',
    'Walking South',
    'Walking East',
    'Walking West'
]
peopleRequestIRI = ('%s?variable=%s&starttime=%s&endtime=%s') % (
    peopleRequestBase,
    ','.join(str(x).replace(' ', '%20') for x in peopleRequestVariables),
    (peopleDataRequestSince + pd.Timedelta(seconds=1)).strftime('%Y%m%d%H%M%S'),
    (dateToday + pd.Timedelta(days=1.5)).strftime('%Y%m%d%H%M%S')
)

print('Loading recent data...')
peopleDataWindow = json.loads(
  urllib
    .request
    .urlopen(peopleRequestIRI)
    .read()
    .decode('utf-8')
)['sensors']

for sensor in peopleDataWindow:
    for variable in sensor['data']:
        targetSensor = next(s for s in peopleDataRaw if s['Sensor Name'] == sensor['Sensor Name'])

        if variable not in targetSensor['data']:
            continue

        targetVariable = targetSensor['data'][variable]

        print('  Found %u recent observations and %u baseline observations for %s on %s' % (
            len(sensor['data'][variable]),
            len(targetSensor['data'][variable]),
            variable,
            targetSensor['Sensor Name']['0']
        ))

        targetVariable.extend(sensor['data'][variable])

In [None]:
print('Obtained data from %u sensors.' % len(peopleDataRaw))

In [None]:
# Number of seconds to resample the pedestrian data to for all subsequent processing
peopleCountInterval = 900

In [None]:
cameraFriendlyNames = {
    #'PER_PEOPLE_BLACKETT-NORTHUMBERLAND-W': 'Blackett St pavement (north side) outside Rox',
    #'PER_PEOPLE_BLACKETT-BOOTS': 'Blackett St outside Boots',
    'PER_PEOPLE_THE_CORE_LINE_0': 'Blue Star Square at Newcastle Helix (east side)',
    'PER_PEOPLE_THE_CORE_LINE_1': 'Blue Star Square at Newcastle Helix (west side)',
    'PER_PEOPLE_USB_LINE_0': 'Science Square at Newcastle Helix',
    'PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_0': 'Northumberland St near Fenwick (west side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_LONG_DISTANCE_HEAD_1': 'Northumberland St near Fenwick (east side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_MID_DISTANCE_HEAD_0': 'Northumberland St near TK Maxx',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_0': 'Pilgrim St (west side) pavement near Goldsmiths',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_1': 'Pilgrim St crossing island between Blackett St and New Bridge St West',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_2': 'Pilgrim St (east side) pavement near The Stack',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_3': 'Pavement (south side) corner Pilgrim St and Blackett St',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_4': 'Pavement (south side) corner Pilgrim St and New Bridge St West',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_5': 'Blackett St crossing from Pilgrim St to Northumberland St (west side)',
    'PER_PEOPLE_NORTHUMERLAND_LINE_SHORT_DISTANCE_HEAD_6': 'New Bridge St West crossing Pilgrim St to Northumberland St (east side)'
}

peopleCountFrames = {}

for sensor in peopleDataRaw:
    dfSensor = None
    
    if sensor['Sensor Name']['0'] not in cameraFriendlyNames:
        continue
        
    cameraName = cameraFriendlyNames[sensor['Sensor Name']['0']]
    
    for variable in sensor['data'].keys():
        # Skip vehicle counts or bus data
        if 'Walking' not in variable:
            continue
        
        # Ignore everything but the timestamp and the value
        dfPeopleTs = pd.DataFrame.from_records(sensor['data'][variable], columns=['Timestamp', 'Value'])
        
        # Timestamps are milliseconds since 1970 (epoch), so convert them to proper timestamps
        dfPeopleTs['Timestamp'] = (dfPeopleTs['Timestamp'].astype(int) / 1000) \
            .apply(lambda t: datetime.datetime.fromtimestamp(t).replace(tzinfo=tzUTC).astimezone(tzLocal))
        
        dfPeopleTs = dfPeopleTs.rename(columns={'Value': variable})
        dfPeopleTs.set_index('Timestamp', inplace=True, drop=True)
        
        if dfSensor is None:
            dfSensor = dfPeopleTs
        else:
            dfSensor = dfSensor.join(dfPeopleTs)
    
    if dfSensor is None:
        print('No data available from "%s" camera.' % cameraName)
        continue
    else:
        print('Data from "%s" camera has been resampled to %u second intervals.' % (cameraName, peopleCountInterval))
    
    dfSensor = dfSensor.resample('%us' % peopleCountInterval).apply(lambda x: np.sum(x.values))
    peopleCountFrames[cameraName] = dfSensor
    
peopleDataRaw = None
gc.collect() ;

In [None]:
# If you need to preview the data from one of the cameras...
testCam = list(peopleCountFrames.keys())[0]
print(testCam)
peopleCountFrames[testCam]

In [None]:
pickle.dump(peopleCountFrames, open('../cache/recent-pedestrian-flows-pd.pkl', 'wb'))

In [None]:
peopleCountCombined = None

for sensor in peopleCountFrames:
    if peopleCountCombined is None:
        peopleCountCombined = peopleCountFrames[sensor].add_prefix('%s: ' % sensor)
    else:
        peopleCountCombined = peopleCountCombined.join(peopleCountFrames[sensor].add_prefix('%s: ' % sensor))
        gc.collect()
    
peopleCountCombined.to_csv('../output/recent-pedestrian-flows-pd.csv')

gc.collect()