In [1]:
import pandas as pd
from collections import Counter
import json

In [2]:
# read data
df = pd.DataFrame.from_csv('./user_4_geohash.csv')

# pd.read_csv() also works.
# I prefer pd.DataFram.from_csv() for timeindex dataframe
# since it handles the timeindex automatically when the first
# column is time index.

In [3]:
# Be careful with timezone inforatmion.
# pd.DataFrame.from_csv() always converts the timeindex
# to UTC timezone.
# Need to convert the time index into local timezone for
# better analysis.

# The next two functions only handle the index datetime objects.
# Localize timezone/add initial timezone inforamtion.
df = df.tz_localize('UTC')

# Lonvert to New York time/or other timezone.
df = df.tz_convert('America/New_York')

In [4]:
# Convert the 'entry_time' column to datetime obejcts if needed
df['time'] = df['entry_time'].apply(lambda x: pd.to_datetime(x).
                                              tz_localize('UTC').
                                              tz_convert('America/New_York'))

In [5]:
df['weekday'] = df['time'].apply(lambda x: x.weekday())

In [6]:
df['time_in_day'] = [x.time() for x in df.index]

In [33]:
# generate data for each block
data = {}
for i in range(5):
    t = pd.to_datetime('8:00:00', format= '%H:%M:%S')
    e = pd.to_datetime('17:00:00', format='%H:%M:%S')

    c = 0
    data2 = {}
    while t.time() <= e.time():
        n_t = t + pd.to_timedelta('15m')
        tmp = df.loc[(df['weekday'] == i) &
                     (df['time_in_day'] >= t.time()) &
                     (df['time_in_day'] < n_t.time())]
        cnt = Counter(tmp['geohash_8'])
        mf = cnt.most_common()[0]
        t = n_t
        data2[c] = (mf)
        c += 1
    data[i] = data2

In [35]:
# save to json file
with open('./data.txt', 'w') as f:
    json.dump(data, f)