In [66]:
from dataset import read_dataset
import pandas as pd
import datetime
import altair as alt

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
df = read_dataset('waze')

df = df.dropna()

df.shape

(51568, 7)

In [26]:
df['date_time'] = pd.to_datetime(df.date_time)

def get_(idx):
    return lambda x: eval(x)[idx]

df.loc[:, 'grid_x'] = df['grid'].apply(get_(0))
df.loc[:, 'grid_y'] = df['grid'].apply(get_(1))

df['freq'] = 1

df.head(15)

Unnamed: 0,y,x,date_time,pubMillis,reliability,magvar,grid,grid_x,grid_y,freq
0,-86.860466,36.311239,2019-02-28 15:03:02,1551384182656,7,41,"(23, 11)",23,11,1
1,-86.787896,36.259388,2019-02-28 14:59:54,1551383994502,6,340,"(20, 15)",20,15,1
2,-86.771863,36.177169,2019-02-28 08:33:42,1551360822718,7,180,"(14, 15)",14,15,1
3,-86.772109,36.176825,2019-02-28 08:38:38,1551361118755,5,245,"(14, 15)",14,15,1
4,-86.772799,36.17683,2019-02-28 08:42:14,1551361334568,6,277,"(14, 15)",14,15,1
5,-86.606643,36.063504,2019-02-28 08:55:13,1551362113439,6,239,"(6, 25)",6,25,1
6,-86.605551,36.064049,2019-02-28 08:57:20,1551362240909,5,237,"(6, 25)",6,25,1
7,-86.77584,36.263113,2019-02-28 08:28:37,1551360517967,8,8,"(20, 15)",20,15,1
8,-86.772024,36.176759,2019-02-28 08:36:00,1551360960873,8,277,"(14, 15)",14,15,1
9,-86.7725,36.176802,2019-02-28 08:40:52,1551361252695,5,277,"(14, 15)",14,15,1


In [4]:
grid_shape = (max(df.grid_x) + 1, max(df.grid_y) + 1)

grid_shape

(30, 30)

In [5]:
period = (min(df.date_time).date(), (max(df.date_time)).date() + datetime.timedelta(days=1))

period

(datetime.date(2019, 2, 15), datetime.date(2019, 8, 2))

In [6]:
index = pd.date_range(*period, freq='30T')

index[:5]

DatetimeIndex(['2019-02-15 00:00:00', '2019-02-15 00:30:00',
               '2019-02-15 01:00:00', '2019-02-15 01:30:00',
               '2019-02-15 02:00:00'],
              dtype='datetime64[ns]', freq='30T')

In [29]:
grids = [[df[df.grid == str((x, y))][['date_time', 'freq']] for y in range(grid_shape[1])] for x in range(grid_shape[0])]

for x in range(grid_shape[0]):
    for y in range(grid_shape[1]):
        cell = grids[x][y]
        cell = cell.groupby(['date_time']).sum()  # Dupilicate time in the grid cell?
        cell = cell.reindex(cell.index.union(index))
        cell['date'] = ['{:04d}-{:02d}-{:02d}'.format( r.year, r.month, r.day) for r in cell.index]
        cell['hod'] = ['{:02d}:{}'.format(r.hour, ['00', 30][r.minute < 30]) for r in cell.index]
        cell = cell.groupby(['date', 'hod']).sum().reset_index()
        grids[x][y] = cell
        
grids[23][11]

Unnamed: 0,date,hod,freq
0,2019-02-15,00:00,0.0
1,2019-02-15,00:30,0.0
2,2019-02-15,01:00,0.0
3,2019-02-15,01:30,0.0
4,2019-02-15,02:00,0.0
...,...,...,...
8060,2019-08-01,22:00,0.0
8061,2019-08-01,22:30,0.0
8062,2019-08-01,23:00,0.0
8063,2019-08-01,23:30,0.0


In [70]:
grid_hod_summary = {}

for x in range(grid_shape[0]):
    for y in range(grid_shape[1]):
        cell = grids[x][y][['hod', 'freq']]
        cell = cell.groupby(['hod']).mean().reset_index()
        for hod, freq in zip(cell['hod'], cell['freq']):
            if hod not in grid_hod_summary:
                grid_hod_summary[hod] = []
            grid_hod_summary[hod].append({
                'x': x,
                'y': y,
                'z': freq,
            })

len(grid_hod_summary)

48

In [81]:
chart = None
row = None
for idx, key in enumerate(grid_hod_summary):
    source = pd.DataFrame(grid_hod_summary[key])
    if row is None:
        row = alt.Chart(source).mark_rect().encode(
            y='y:O',
            x='x:O',
            color='z:Q'
        ).properties(
            title=key
        )
    else:
        row |= alt.Chart(source).mark_rect().encode(
            y='y:O',
            x='x:O',
            color='z:Q'
        ).properties(
            title=key
        )
        
    if (idx + 1) % 4 == 0:
        if chart is None:
            chart = row
        else:
            chart &= row
        row = None
        
chart