In [None]:
import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.signal import savgol_filter


sns.set_style('whitegrid')

In [None]:
# Ambassadors submission times
timestamps = """10/19/2021 16:36:55
10/20/2021 13:46:20
10/19/2021 16:42:50
10/19/2021 16:58:22
10/20/2021 16:33:42
10/20/2021 10:19:41
10/19/2021 20:23:07
10/20/2021 2:33:20
10/20/2021 9:31:23
10/19/2021 17:07:18
10/20/2021 6:26:36
10/19/2021 19:32:22
10/19/2021 16:53:24
10/20/2021 19:26:48
10/20/2021 13:26:09
10/19/2021 18:30:38
10/19/2021 17:00:12
10/19/2021 17:05:55
10/19/2021 16:53:10
10/19/2021 17:17:34
10/19/2021 19:35:42
10/20/2021 10:32:14
10/21/2021 18:11:24
10/19/2021 17:37:41
10/20/2021 21:50:59
10/19/2021 17:21:32
10/20/2021 9:18:01
10/21/2021 13:51:06
10/21/2021 22:04:24
10/19/2021 17:28:38
10/19/2021 18:03:58
10/19/2021 21:39:17
10/19/2021 17:04:15
10/19/2021 16:41:30
10/22/2021 4:30:52
10/19/2021 17:29:13
10/19/2021 20:33:03
10/20/2021 10:16:23
10/19/2021 17:23:46
10/19/2021 17:56:22
10/19/2021 17:25:09
10/19/2021 21:04:18
10/19/2021 16:29:44
10/19/2021 20:05:48
10/19/2021 16:51:34
10/20/2021 1:48:41
10/19/2021 15:31:26
10/22/2021 13:20:54
10/23/2021 0:57:01
10/22/2021 20:35:57
10/22/2021 16:08:22
10/23/2021 2:42:34
10/26/2021 17:40:36
10/27/2021 13:48:09"""

In [None]:
dts = [datetime.strptime(ts, '%m/%d/%Y %H:%M:%S') for ts in sorted(timestamps.split('\n'))]
df = pd.DataFrame(dts, columns=['timestamp'])
df

In [None]:
days = 3
start_ts = df.iloc[0]['timestamp']
end_ts = start_ts + timedelta(days=days)
start_ts, end_ts

In [None]:
mins = None # 15
hours = 3
colname = 'day_quarter'

def group_ts(df, mins=None, hours=None):
    if mins is not None:
        series = df.timestamp.apply(lambda ts: datetime(ts.year, ts.month, ts.day, ts.hour, ts.minute // mins * mins))
    elif hours is not None:
        series = df.timestamp.apply(lambda ts: datetime(ts.year, ts.month, ts.day, ts.hour // hours * hours))
    else:
        series = df.timestamp

    return series

df[colname] = group_ts(df, mins, hours)
df.head()

In [None]:
df_grouped = df.groupby(colname).count().reset_index().rename(columns={'timestamp': 'count'})

df_grouped.head()

In [None]:
n_ts = int(days * 24 * 60 / mins) if mins is not None else int(days * 24 / hours)
ts_all = [start_ts + timedelta(minutes=mins * i) for i in range(n_ts)] if mins is not None else [start_ts + timedelta(hours=hours * i) for i in range(n_ts)]
ts_all = group_ts(pd.DataFrame(ts_all, columns=['timestamp']), mins, hours)

In [None]:
df_empty = pd.DataFrame(ts_all.tolist(), columns=['day_quarter'])
df_empty.head()

In [None]:
df_merged = pd.merge(df_grouped, df_empty, how='outer', on=colname)
df_merged = df_merged.fillna(0)

In [None]:
mask = df_merged[colname] <= end_ts
df_merged[mask].shape, df_merged.shape

In [None]:
smoothed_count = savgol_filter(df_merged['count'][mask].to_numpy(), 25, 3)
smoothed_count[:10]

In [None]:
fig, ax = plt.subplots(1,1, figsize=(12,8), dpi=120, facecolor='w')
ax = sns.lineplot(data=df_merged[mask], x=colname, y='count', label='count')
sns.lineplot(x=df_merged[mask][colname], y=smoothed_count, ax=ax, label='smoothed', alpha=0.8)

ax.axvline(start_ts + timedelta(hours=3), color='k', ls='--', label='start + 3h')

plt.legend()
#ax.set_xscale('log')
plt.title(f'number of ambassador applications since {start_ts} until {end_ts}')
plt.show()
