In [1]:
import pandas as pd
import numpy as np
import pickle
import urllib.request
import dateutil.parser
import dateutil.rrule
import datetime
from IPython.display import display, HTML

## Summary

These tables compare the footfall data at two strategic locations in Newcastle, one counter spanning the full width of Northumberland St, and a second on the corner of Blackett St and John Dobson St used as a thoroughfare to the shopping district.

For each day and each direction, the following percentages are given with respect to the average pedestrians per hour:
 * Change since the day before
 * Change since the same weekday last week
 * Change compared to the median for the same weekday calculated over the last year
 
Statistics are provided for the morning peak, afternoon peak, inter-peak period (essentially daytime outside of peak hours), and night. As footfall is unlikely to be evenly distributed across these periods, the statistics for the current period will be volatile while data is still coming in.

In [2]:
# Used across most of the plots for people flows
dateToday = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())
peopleCountInterval = 900
peopleCountFrames = pickle.load(open('../cache/recent-pedestrian-flows-pd.pkl', 'rb'))

# TODO: Make this reflect the last entry in the frame, not the time now
print('Last data obtained %s' 
    % (np.max(list(map(lambda f: np.max(f.index), peopleCountFrames.values()))).strftime('%d %B %Y %H:%M')))

Last data obtained 19 March 2020 09:15


In [3]:
# Ignore non-numeric columns in the dataframe
plottableTypes = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
perMinuteFactor = (peopleCountInterval / 60)

In [4]:
sensorsToSummarise = {
    'Northumberland St near TK Maxx': 'Newcastle: City centre shopping district (Northumberland St near TK Maxx)',
    'Pavement (south side) corner John Dobson St and Blackett St': 'Newcastle: City centre edge of shopping district (corner of Blackett St and John Dobson St)'
}

def classifyTime(t):
    hour = int(t.strftime('%H'))
    if hour < 7:
        return 'Night (19:00 - 07:00)'
    elif hour < 10:
        return 'Morning peak (07:00 - 10:00)'
    elif hour < 16:
        return 'Inter-peak (10:00 - 16:00)'
    elif hour < 19:
        return 'Evening peak (16:00 - 19:00)'
    else:
        return 'Night (19:00 - 07:00)'

periodDurations = {
    'Morning peak (07:00 - 10:00)': 3,   # 7 - 10
    'Inter-peak (10:00 - 16:00)': 6,     # 10 - 16
    'Evening peak (16:00 - 19:00)': 3,   # 16 - 19
    'Night (19:00 - 07:00)': 12
}

allPeriods = list(periodDurations.keys())

for sensorName in sensorsToSummarise.keys():
    dateIndex = []
    directionIndex = []
    summaryData = []
    
    dfSensor = peopleCountFrames[sensorName].copy()
    dfSensor.insert(0, 'Date', dfSensor.index.to_series().apply(lambda t: t.date()))
    dfSensor.insert(0, 'Day of week', dfSensor.index.to_series().apply(lambda t: t.strftime('%A')))
    dfSensor.insert(1, 'Time of day', dfSensor.index.to_series().apply(lambda t: t.strftime('%H:%M:%S')))
    dfSensor.insert(1, 'Period', dfSensor.index.to_series().apply(classifyTime))

    dfDailyPeriodTotals = dfSensor.groupby(['Date', 'Day of week', 'Period'], as_index=False).sum()
    dfAveragePeriodTotals = dfDailyPeriodTotals.groupby(['Day of week', 'Period'], as_index=False).median()
    
    for d in range(9, -1, -1):
        sensorDate = dateToday - pd.Timedelta(days=d)
        dateIndex.extend(np.repeat(sensorDate.strftime('%A %d %B'), 2))

        directionColumns = dfSensor.select_dtypes(plottableTypes).columns
        directionIndex.extend(directionColumns)
                         
        dfSensorOnDate = dfSensor.loc \
            [(sensorDate <= dfSensor.index) &
            (dfSensor.index < sensorDate + pd.Timedelta(hours=24))] \
            .copy() \
            .groupby(['Period']) \
            .agg(['sum', 'count']) \
            [directionColumns]
        dfSensorLastWeek = dfSensor.loc \
            [(sensorDate - pd.Timedelta(days=7) <= dfSensor.index) &
            (dfSensor.index < sensorDate - pd.Timedelta(days=7) + pd.Timedelta(hours=24))] \
            .copy() \
            .groupby(['Period']) \
            .agg(['sum', 'count']) \
            [directionColumns]
        dfSensorYesterday = dfSensor.loc \
            [(sensorDate - pd.Timedelta(days=1) <= dfSensor.index) &
            (dfSensor.index < sensorDate - pd.Timedelta(days=1) + pd.Timedelta(hours=24))] \
            .copy() \
            .groupby(['Period']) \
            .agg(['sum', 'count']) \
            [directionColumns]

        dfSensorAverageDayOfWeek = dfAveragePeriodTotals[dfAveragePeriodTotals['Day of week'] == sensorDate.strftime('%A')]

        for direction in directionColumns:
            summaryRow = []
            for period in allPeriods:
                periodStats = dfSensorOnDate[dfSensorOnDate.index == period][direction]
                if not periodStats['sum'].empty:

                    # Convert to an hourly value
                    periodTotal = periodStats['sum'].values[0]
                    periodHourly = periodTotal / periodStats['count'].values[0] * (3600 / peopleCountInterval)

                    # Change on yesterday
                    yesterdayHourly = dfSensorYesterday[dfSensorYesterday.index == period][direction]
                    yesterdayHourly = (yesterdayHourly['sum'].values[0] / yesterdayHourly['count'].values[0]) * (3600 / peopleCountInterval)
                    yesterdayChange = (periodHourly - yesterdayHourly) / yesterdayHourly

                    # Change on last week
                    lastWeekHourly = dfSensorLastWeek[dfSensorLastWeek.index == period][direction]
                    lastWeekHourly = (lastWeekHourly['sum'].values[0] / lastWeekHourly['count'].values[0]) * (3600 / peopleCountInterval)
                    lastWeekChange = (periodHourly - lastWeekHourly) / lastWeekHourly

                    # Change on normal profile
                    profileHourly = dfSensorAverageDayOfWeek[dfSensorAverageDayOfWeek['Period'] == period][direction]
                    profileHourly = profileHourly.values[0] / periodDurations[period]
                    profileChange = (periodHourly - profileHourly) / profileHourly

                    summaryRow.extend([
                        periodHourly, # Total
                        yesterdayChange, # Change on yesterday
                        lastWeekChange, # Change on last week
                        profileChange, # Change on average
                    ])
                else:
                    summaryRow.extend(np.repeat(0.0, 4))     
            summaryData.append(summaryRow)

    rowIndex = pd.MultiIndex.from_arrays([
            dateIndex,
            directionIndex
        ],
        names=['Date', 'Direction']
    )

    formattersSummary = {}
    colPeriods = []
    colStats = []
    for period in allPeriods:
        formattersSummary[(period, 'Hourly average flow')] = '{:,.0f}'
        formattersSummary[(period, 'Change from day before (%)')] = '{:+,.0%}'
        formattersSummary[(period, 'Change from week before (%)')] = '{:+,.0%}'
        formattersSummary[(period, 'Change from annual average (%)')] = '{:+,.0%}'
        colPeriods.extend(np.repeat(period, 4))
        colStats.extend([
            'Hourly average flow',
            'Change from day before (%)',
            'Change from week before (%)',
            'Change from annual average (%)'
        ])

    colIndex = pd.MultiIndex.from_arrays(
        [colPeriods, colStats],
        names=['Period', 'Statistic']
    )

    dfSummary = pd.DataFrame(summaryData, columns=colIndex, index=rowIndex)
    dfSummaryStyler = dfSummary.style \
        .format(formattersSummary) \
        .set_caption(sensorsToSummarise[sensorName]) \
        .set_table_styles(
            [dict(selector="th",props=[('text-align', 'center')]),
                dict(selector="tr:nth-child(2) th.col_heading",
                     props=[('vertical-align', 'bottom'),
                            ('writing-mode', 'vertical-rl'),
                            ]),
             dict(selector="caption",props=[('font-weight', 'bold'), ('font-size', '120%')])
            ]
        )

    periodBarColours = {
        'Morning peak (07:00 - 10:00)': '#FFA07A50',
        'Inter-peak (10:00 - 16:00)': '#EE1F5F50',
        'Evening peak (16:00 - 19:00)': '#FFA07A50',
        'Night (19:00 - 07:00)': '#A0FF7A50'
    }

    for period in allPeriods:
        dfSummaryStyler.background_gradient(
            subset=[(period, 'Change from annual average (%)')],
            vmin=-1.0,
            vmax=1.0,
            cmap='PiYG'
        )
        dfSummaryStyler.bar(subset=[(period, 'Hourly average flow')], color=periodBarColours[period], vmin=0)

    display(HTML(dfSummaryStyler._repr_html_()))

Unnamed: 0_level_0,Period,Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Night (19:00 - 07:00),Night (19:00 - 07:00),Night (19:00 - 07:00),Night (19:00 - 07:00)
Unnamed: 0_level_1,Statistic,Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%)
Date,Direction,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Tuesday 10 March,Walking North,1005,+13%,+0%,-12%,1852,-14%,-13%,-19%,1491,+26%,-5%,+19%,206,+104%,-6%,+5%
Tuesday 10 March,Walking South,598,+4%,-6%,-15%,1921,-13%,-13%,-21%,1986,+27%,-10%,+9%,209,+83%,-6%,+20%
Wednesday 11 March,Walking North,935,-7%,-5%,-5%,1851,-0%,-14%,-19%,1006,-33%,-41%,-23%,122,-41%,-47%,-37%
Wednesday 11 March,Walking South,565,-6%,-7%,-9%,1893,-1%,-19%,-24%,1366,-31%,-39%,-28%,119,-43%,-53%,-32%
Thursday 12 March,Walking North,996,+7%,+1%,-10%,1817,-2%,-20%,-25%,1360,+35%,-23%,-4%,161,+32%,-36%,-23%
Thursday 12 March,Walking South,642,+14%,-2%,-7%,1873,-1%,-20%,-29%,1702,+25%,-27%,-14%,163,+36%,-48%,-17%
Friday 13 March,Walking North,682,-32%,-30%,-34%,1937,+7%,-17%,-22%,1542,+13%,-5%,+5%,288,+79%,-11%,+0%
Friday 13 March,Walking South,457,-29%,-28%,-32%,2105,+12%,-17%,-24%,2068,+22%,-11%,-2%,297,+82%,-10%,-2%
Saturday 14 March,Walking North,507,-26%,-12%,-42%,2659,+37%,-6%,-12%,1767,+15%,+3%,+13%,402,+39%,+16%,+25%
Saturday 14 March,Walking South,357,-22%,-6%,-42%,2745,+30%,-8%,-15%,2169,+5%,-6%,+6%,353,+19%,-3%,+18%


Unnamed: 0_level_0,Period,Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Morning peak (07:00 - 10:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Inter-peak (10:00 - 16:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Evening peak (16:00 - 19:00),Night (19:00 - 07:00),Night (19:00 - 07:00),Night (19:00 - 07:00),Night (19:00 - 07:00)
Unnamed: 0_level_1,Statistic,Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%),Hourly average flow,Change from day before (%),Change from week before (%),Change from annual average (%)
Date,Direction,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
Tuesday 10 March,Walking North,303,+4%,-1%,-12%,418,-1%,+3%,-10%,414,+21%,-5%,+4%,58,+133%,-10%,-11%
Tuesday 10 March,Walking South,237,+9%,+2%,+2%,408,+9%,+10%,-4%,498,+25%,-4%,+10%,63,+182%,-13%,-1%
Wednesday 11 March,Walking North,283,-6%,+4%,-12%,415,-1%,+10%,-14%,285,-31%,-35%,-30%,29,-49%,-60%,-59%
Wednesday 11 March,Walking South,224,-5%,-6%,-0%,405,-1%,+8%,-10%,326,-35%,-41%,-33%,30,-53%,-65%,-57%
Thursday 12 March,Walking North,255,-10%,-8%,-24%,289,-30%,-22%,-40%,414,+45%,-6%,-5%,43,+47%,-45%,-46%
Thursday 12 March,Walking South,202,-10%,-14%,-13%,271,-33%,-19%,-40%,478,+47%,-7%,-4%,45,+53%,-46%,-39%
Friday 13 March,Walking North,225,-12%,-24%,-35%,394,+36%,+8%,-25%,458,+11%,-5%,+1%,86,+99%,-20%,-11%
Friday 13 March,Walking South,164,-19%,-23%,-27%,361,+33%,+5%,-27%,507,+6%,-17%,-5%,108,+140%,-14%,+5%
Saturday 14 March,Walking North,168,-26%,-9%,-34%,509,+29%,-2%,-25%,482,+5%,-2%,-2%,122,+43%,+24%,+5%
Saturday 14 March,Walking South,95,-42%,+15%,-25%,501,+39%,-7%,-22%,641,+26%,-1%,-2%,127,+17%,+2%,+8%
