In [None]:
import pandas as pd
import numpy as np
import json
import math
import urllib.request
import re
import gc
import io
import dateutil.parser
import dateutil.rrule
import dateutil.tz
import datetime

In [None]:
# Used across most of the plots for people flows
tzUTC = dateutil.tz.gettz('UTC')
tzLocal = dateutil.tz.gettz('Europe/London')
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzLocal)

trafficCountInterval = 900

In [None]:
# Doesn't do anything with old data yet, because of the large number of gaps in recent...
try:
    dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time()).replace(tzinfo=tzLocal)
    dfPointInterpTsOld = pd.read_pickle('../cache/sheffield-recent-traffic-volumes-pd.pkl')
    dfPointInterpTsOld = dfPointInterpTsOld[dfPointInterpTsOld.index < dateToday - pd.Timedelta(days=15)]
    baselineEnd = np.max(dfPointInterpTsOld.index).replace(tzinfo=tzLocal).astimezone(tzUTC)
    print('Loaded previous data.')
    print('  %s' % baselineEnd)
except:
    dfPointInterpTsOld = None
    baselineEnd = dateToday - pd.Timedelta(days=60)
    print('No existing data could be loaded.')

In [None]:
trafficCountFetchFrom = (baselineEnd - pd.Timedelta(hours=84)).isoformat().replace('+00:00', '')
trafficCountUrl = 'https://sheffield-portal.urbanflows.ac.uk/uflobin/ufdex?freqInMin=5&byContent=TRAFF_FLOW&bySelect=TRAFF_FLOW&aktion=CSV_show&Tfrom=%s' % trafficCountFetchFrom

csvCountTs = urllib.request.urlopen(trafficCountUrl).read().decode('utf-8')

In [None]:
pdCountTs = pd.read_csv(io.StringIO(csvCountTs), comment='#', names=['Timestamp', 'Sensor', 'Flow'], header=0)
# pdCountTs = pdCountTs.pivot(columns='Sensor', index='Timestamp', values='Flow')

In [None]:
dfPointTs = None

for loopId in pdCountTs['Sensor'].unique():
    print('Processing timeseries for counter "%s"...' % loopId)
    
    pdLoopTs = pdCountTs[pdCountTs['Sensor'] == loopId].copy()
    pdLoopTs.drop(columns=['Sensor'], inplace=True)
    pdLoopTs['Timestamp'] = pdLoopTs['Timestamp'].apply(lambda t: datetime.datetime.fromtimestamp(t).replace(tzinfo=tzUTC).astimezone(tzLocal))
    pdLoopTs.set_index('Timestamp', inplace=True, drop=True)
    
    if (pdLoopTs['Flow'].sum() == 0.0):
        print('  No vehicle flow data available.')
        continue
    
    pdLoopTs.rename(columns={'Flow': loopId}, inplace=True)
    
    if dfPointTs is None:
        dfPointTs = pdLoopTs
    else:
        dfPointTs = dfPointTs.join(
            pdLoopTs, 
            how='outer'
        )
        pdLoopTs = None
    
dfPointTs

In [None]:
# Incoming data is actually 5 minutes, so align to that
# then accept we have lots of gaps and make it 15 minute data
# and then interpolate to fill gaps if possible, maximum of an hour distance
dfPointInterpTs = dfPointTs \
    .resample('900s').median() \
    .interpolate('linear', limit=2) \
    .apply(lambda v: v * 15)

dfPointInterpTsMerged = dfPointInterpTsOld[dfPointInterpTsOld.index < baselineEnd - pd.Timedelta(hours=72)].append(dfPointInterpTs)
dfPointInterpTsMerged = dfPointInterpTsMerged.loc[~dfPointInterpTsMerged.index.duplicated(keep='first')]
dfPointInterpTsMerged.to_pickle('../cache/sheffield-recent-traffic-volumes-pd.pkl')

# 15 minute timeseries
#dfPointInterpTs.tail(50)

# Disable for now... need to be appending this data
#dfPointInterpTs.to_pickle('../cache/sheffield-recent-traffic-volumes-pd.pkl')