In [None]:
import os

from googleapiclient.discovery import build
from datetime import datetime, timedelta
import json
import numpy as np
import arrow
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from source.utils import authenticate

In [None]:
data_dir = '../data/'

In [None]:
save_fig = False
download_all_events = False

In [None]:
creds = authenticate()
service = build('calendar', 'v3', credentials=creds)

In [None]:
sns.set_style('whitegrid')

### Calendars 

In [None]:
clist = service.calendarList().list().execute()

In [None]:
def print_citem(citem):
    print('Summary:\t' + citem['summary'])
    print('id:\t\t' + citem['id'])
    print('***********')

In [None]:
for citem in clist['items']:
    print_citem(citem)

#### Primary calendar 

In [None]:
def print_default_calendar(clist):
    for citem in clist['items']:
        if citem.get('primary', False):
            print_citem(citem)

In [None]:
print_default_calendar(clist)

### Colors 

In [None]:
event_colors = service.colors().get().execute()['event']

In [None]:
event_colors

In [None]:
def_col = 11

In [None]:
keyfile = data_dir + 'col2meaning.json'
with open(keyfile, 'r') as fo:
    col2meaning = json.load(fo)
col2meaning

In [None]:
types = list(col2meaning.values())
types.remove('default')
types = np.array(types)
list(types)

### Events

In [None]:
start = datetime(2014,1,1)
end = datetime(start.year + 1, 1,1)
start, end

In [None]:
def print_event(e):
    print('Summary:\t' + e.get('summary', 'summary'))
    print('ColorId:\t' + e.get('colorId', 'unknown'))
    print('Start:\t\t' + str(e.get('start', 'start')))
    print('End:\t\t' + str(e.get('end', 'end')))
    print('Status:\t\t' + e.get('status', 'status'))
    print('====')

In [None]:
now = arrow.get(datetime.now().isoformat() + '+02:00').datetime

def prepare_req(min_ts = start.isoformat() + 'Z', max_ts = end.isoformat() + 'Z'):

    print('get everything since', min_ts)# 'until', max_ts)

    req_orig = service.events().list(calendarId='primary',
                                          timeMin=min_ts,
                                          #timeMax=max_ts,
                                          #maxResults=15, 
                                          singleEvents=True,
                                          orderBy='startTime')
    return req_orig


In [None]:
req_orig = prepare_req()

In [None]:
'''
col2meaning = { 
    e.get('colorId'): e.get('summary') for e in events[:-3]
}

col2meaning
'''
pass

In [None]:
def print_event_res(events_result):
    print(f'got {len(events_result.get("items", []))} results')
    print('next page token:  ' + events_result.get('nextPageToken', 'no next page'))

In [None]:
%%time

events_result = req_orig.execute()
events = events_result.get('items', [])
print_event(events[0])

print_event_res(events_result)

In [None]:
def parse_to_datetime(time_obj):
    if type(time_obj) is datetime or type(time_obj) is 'float':
        return time_obj
    
    if 'date' in time_obj:
        s = time_obj['date']
    elif 'dateTime' in time_obj:
        s = time_obj['dateTime']
    else:
        s = time_obj
        
    return arrow.get(s).datetime

In [None]:
def download_events(verbose=False):
    events = []
    prev_req = req_orig = prepare_req()
    events_result = req_orig.execute()

    events.append(events_result.get('items', []))
    prev_res = events_result

    i = 0
    while prev_req is not None:
        print(f'round {i}', end='\r')
        prev_req = service.events().list_next(prev_req, prev_res)
        if prev_req is None:
            break
        prev_res = prev_req.execute()
        
        if verbose: 
            print_event_res(prev_res)

        res_events = prev_res.get('items', [])

        events.append(res_events)

        last_start = parse_to_datetime(res_events[-1].get('start'))
        if last_start > now:
            print('last start was in the future, we can stop', last_start)
            break
        i += 1

    print('Finished fetching all the events')
    
    return events

In [None]:
def parse_event(e):
    start = e.get('start')
    col = e.get('colorId', '0')
    end = e.get('end')
    summary = e.get('summary', '')

    whole_day = 'date' in start
    
    start = parse_to_datetime(start)
    end = parse_to_datetime(end)

    duration_s = (end - start).total_seconds() # <-- total seconds would compute also the full day events
    t = col2meaning[col]

    return {
        'start': start,
        'type': t,
        'summary': summary,
        'duration_s': duration_s,
        'whole_day': whole_day,
    }

In [None]:
def group_datetime_by_week(ts):
    year, week = ts.isocalendar()[:2]
    return datetime.strptime(f'{year}-{min(week*7, 365)}', "%Y-%j")

In [None]:
def get_quarter(ts):
    mon = ts.month
    quartal = mon // 3
    quartal += 1 if mon % 3 != 0 else 0 
    return quartal

def get_invoiced_quarter(ts):
    if type(ts) is float:
        return 666 # ts was not defined
    
    assert type(ts) is str, 'expected timestamp to be a string'
    mon = int(ts[5:7])
    quartal = mon // 3
    quartal += 1 if mon % 3 != 0 else 0 
    return quartal

### Download and save the events 

In [None]:
def save_events(df, time_str):
    hist_df.to_csv(data_dir + f'calendar_events_until_{time_str}.csv', index=False)

In [None]:
%%time

if download_all_events:
    events = download_events()
    # includes all recurring future events as well
    events = np.concatenate(events) if type(events) is list else events
    print('events.shape', events.shape)
    
    print('available fields')
    print(events[0].keys())
    print()
    
    print_event(events[0])
    print_event(events[-1])

    events_parsed = [ parse_event(e) for e in events ]
    
    events_all = pd.DataFrame(data=events_parsed)

    yesterday = now - timedelta(days=1)
    print(yesterday)
    
    hist_df = events_all[events_all.start < yesterday]

    save_events(hist_df, yesterday.strftime("%Y-%m-%d_%H:%M:%S"))
    print('hist df saved')

### Load the events from disk 

In [None]:
def get_files_in_dir(directory, return_dirs=False, verbose=False):
    if not os.path.exists(directory):
        raise Exception(f'{directory} does not exist!')
    for (path, dirs, files) in os.walk(directory):
        if verbose:
            print('path: ', path)
            print('dirs', dirs)
            print('files')
            for i, file in enumerate(files):
                print('\t', i, file)
        break
    return files if not return_dirs else (files, dirs)

In [None]:
files = get_files_in_dir(data_dir)
files

In [None]:
ind = 0
assert files[ind].split('.')[-1] in 'csv', f'file was not a csv file! it was {file[ind]}'

events_file = data_dir + files[ind]
print(f'load events from file {events_file}')

with open(events_file, 'rb') as fo:
    events_all = pd.read_csv(events_file)
    
events_all

#### Durations 

In [None]:
def process_durations(df):
    df['duration_min'] = df['duration_s'] / 60
    df['duration_h'] = df['duration_min'] / 60
    df['duration_d'] = df['duration_h'] / 24
    
    return df

In [None]:
events_all = process_durations(events_all)

#### Dates 

In [None]:
def process_dates(df):
    df['start'] = df.start.apply(lambda ts: parse_to_datetime(ts))
    df['year'] = df.start.apply(lambda ts: ts.year) #datetime.strptime(f'{ts.year}-1-1', '%Y-%m-%d'))
    df['year_mon'] = df.start.apply(lambda ts: datetime.strptime(f'{ts.year}-{ts.month}-1', '%Y-%m-%d'))
    df['mon'] = df.start.apply(lambda ts: datetime.strptime(f'2000-{ts.month}-1', '%Y-%m-%d'))
    df['year_week'] = df.start.apply(group_datetime_by_week)
    df['quarter'] = df.start.apply(get_quarter)
    return df

In [None]:
events_all = process_dates(events_all)

In [None]:
events = events_all[events_all.whole_day == False]
events

In [None]:
def fill_in_missing_types(data, time_col='year_week', unique_times=None):
    
    unique_times = unique_times if unique_times is not None else data[time_col].unique()
    
    to_append = []
    for week in unique_times:
        entries = data[data[time_col] == week]

        type_present = [False]*len(types)
        for ind, row in entries.iterrows():
            i = np.argmax(row.type == types)
            type_present[i] = True

        for pres, t in zip(type_present, types):
            if not pres:
                to_append.append({'type': t, time_col: week, 'duration_h': 0})


    data = data.append(to_append)
    data = data.sort_values([time_col, 'type'])
        
    return data

In [None]:
events_year = events.groupby(['type', 'year']).agg({'duration_h': np.sum}).reset_index()
events_year = fill_in_missing_types(events_year, 'year')
events_year = events_year.sort_values(['year', 'type'])
events_year

In [None]:
events_week = events.groupby(['type', 'year_week']).agg({'duration_h': np.sum}).reset_index()
events_week = fill_in_missing_types(events_week, 'year_week', events_all.year_week.unique())
events_week['year'] = events_week.year_week.apply(lambda ts: ts.year)
events_week = events_week.sort_values(['year_week', 'type'])
events_week

### Event types 

In [None]:
event_colors['0'] = dict(background='blue')
event_colors

In [None]:
palette = {
    col2meaning[c]: event_colors[c]['background'] for c in event_colors
}
palette

In [None]:
def show_values_on_bars(axs):
    def _show_on_single_plot(ax):        
        for p in ax.patches:
            _x = p.get_x() + p.get_width() / 2
            _y = p.get_y() + p.get_height()
            value = str(int(round(p.get_height(),0)))
            ax.text(_x, _y, value, ha="center", fontsize=8) 

    if isinstance(axs, np.ndarray):
        for idx, ax in np.ndenumerate(axs):
            _show_on_single_plot(ax)
    else:
        _show_on_single_plot(axs)

In [None]:
fig, axes = plt.subplots(1,1,figsize=(12,8), dpi=120, facecolor='w')

data = events.groupby('type').agg({'duration_d': np.sum}).reset_index()

sns.barplot(data=data.sort_values('type'), x='type', y='duration_d', ax=axes, palette=palette)
plt.title('Use of time per event type from 2015-2021')

plt.xticks(rotation=90)

plt.show()

In [None]:
fig, axes = plt.subplots(1,1,figsize=(12,8), dpi=120, facecolor='w')

sns.barplot(data=events.sort_values('type'), x='type', y='duration_h', ax=axes, palette=palette)
plt.title('distribution of event duration')

plt.xticks(rotation=90)

plt.show()

### Event types over time 

#### By year 

In [None]:
time_agg = 'duration_h'
data = events.groupby(['type', 'year']).agg({time_agg: np.sum}).reset_index()
data = data.sort_values('year')

n_years = data.year.nunique()

top = 1.05 * data[time_agg].max()

fig, axes = plt.subplots(n_years,1,figsize=(12,6*n_years), dpi=120, facecolor='w')

for i, year in enumerate(data.year.unique()):
    ax = axes[i]
    
    query = f'year == {year}'
    sns.barplot(data=data.query(query).sort_values('type'), x='type', y=time_agg, ax=ax, palette=palette)
    ax.set_title(f'{year}')
    
    show_values_on_bars(ax)
    ax.set_ylim(top=top)
    ax.xaxis.set_tick_params(rotation=90)

plt.tight_layout()
plt.show()

#### By week 

In [None]:
time_agg = 'duration_h'
data = events_week
n_years = data.year.nunique()

data = fill_in_missing_types(data, 'year_week')

top = 1.05 * data[time_agg].max()

fig, axes = plt.subplots(n_years,1,figsize=(12,6*n_years), dpi=120, facecolor='w')

for i, year in enumerate(events.year.unique()):
    ax = axes[i]
    
    query = f'year == {year}'
    sns.lineplot(data=data.query(query).sort_values('type'), x='year_week', y=time_agg, hue='type', ax=ax, palette=palette)
    ax.set_title(f'{year}')
    
    ax.set_ylim(top=top)

plt.tight_layout()
plt.show()

In [None]:
for i, t in enumerate(types):
    print(i, t)

In [None]:
work_types = [types[3], types[8], types[9]]
work_types

In [None]:
time_agg = 'duration_h'

any_work_hours = np.array([data[data.type == t][time_agg] for t in work_types]).sum(axis=0)
any_work_hours[-10:]

In [None]:
data = events_week

fig, axes = plt.subplots(1,1,figsize=(18,8), dpi=120, facecolor='w')

ax = axes


query = f'type == "{types[0]}"'
sns.lineplot(x=data.year_week.unique(), y=data.query(query)[time_agg], ax=ax, label=types[0])

sns.lineplot(x=data.year_week.unique(), y=any_work_hours, ax=ax, label='efficient working hours')

ax.axhline(37.5, ls='--', color='k', zorder=0, label='full work week')

plt.legend()

plt.title('Efficient working hours per week')
plt.tight_layout()
plt.show()

In [None]:
working_df = pd.DataFrame(data={'year_week': data.year_week.unique(), 'working_h': any_work_hours})
working_df

In [None]:
working_df['overtime'] = working_df.working_h.apply(lambda x: x >= 37.5)
working_df['clear overtime'] = working_df.working_h.apply(lambda x: x >= 40)
working_df

### Busiest weeks 

In [None]:
working_df_sorted = working_df.sort_values(['working_h', 'year_week'], ascending=[False, True]).reset_index()

In [None]:
working_df_sorted[:10]

In [None]:
working_df_sorted[-10:]

### Overtime 2020 

In [None]:
working_20 = working_df[np.bitwise_and(working_df.year_week > datetime(2020, 1, 1), working_df.year_week < datetime(2021,1,1))]

In [None]:
working_20.groupby('overtime').nunique()['year_week']

In [None]:
working_20.groupby('clear overtime').nunique()['year_week']