Import required packages


In [28]:
import pandas as pd
import os
import datetime
import numpy as np

In [2]:
def get_query(table_name: str, start_date: str, end_date: str) -> str:
    return f"""
        SELECT 
         event_date,
         event_timestamp, 
         event_name, 
         event_parameters.key AS event_param_key, 
         event_parameters.value.int_value AS event_param_int, 
         event_parameters.value.string_value AS event_param_string,
         user_pseudo_id, 
         user_prop.key AS user_prop_key, 
         user_prop.value.string_value AS user_prop_string
        FROM
         `{table_name}` as T,
          UNNEST(user_properties) AS user_prop,
          UNNEST(event_params) AS event_parameters
        WHERE 
          _TABLE_SUFFIX BETWEEN "{start_date}" AND "{end_date}"
          AND ((event_name = "user_engagement" AND event_parameters.key = "engagement_time_msec") OR (event_name = "notification_receive") OR event_name = "notification_open")
          AND (user_prop.key = "subjectId" OR user_prop.key = "projectId")
    """

In [3]:

def get_list_of_studies(query_result: pd.DataFrame):
    return query_result[query_result['user_prop_key'] == 'projectId'][['user_pseudo_id', 'user_prop_string']]


def write_to_csv_file(filename: str, filepath: str, data: pd.DataFrame):
    os.makedirs(filepath, exist_ok=True)
    with open(os.path.join(filepath, filename), 'w') as f:
        f.write(data.to_csv(index_label='index'))
    print('Written to File successfully.')


In [4]:
def format_datetime(obj: datetime) -> str:
    return obj.strftime('%Y%m%d')

In [5]:
!pip install pyarrow



In [6]:
from google.cloud import bigquery

# Only run once, if data is saved please proceed to next step.

# Construct a BigQuery client object.
client = bigquery.Client.from_service_account_json('radar-armt-notification-c2040a9b15cb.json')

table = 'radar-armt-notification.analytics_180955751.events_*'
#start = format_datetime(datetime.datetime.now() - datetime.timedelta(days=200))
#end = format_datetime(datetime.datetime.now())

start = "20191130"
end = "20210628"

print('Getting data from BigQuery')
result = client.query(get_query(table, start, end))  # Make an API request.

print('Converting to Dataframe')
result_df: pd.DataFrame = result.to_dataframe()
    
print('Saving to file')
# Save dataframe to file so we don't need to query bigqeury again and again.
write_to_csv_file('data.csv', 'data/' + f'{start}' + f'-{end}', data=result_df)

Getting data from BigQuery
Converting to Dataframe
saving to file
Written to File successfully.


In [None]:
# load data saved in csv file

file = f'data/{start}-{end}/data.csv'

result_df = pd.read_csv(file)

In [7]:
result_df.head()

Unnamed: 0,event_date,event_timestamp,event_name,event_param_key,event_param_int,event_param_string,user_pseudo_id,user_prop_key,user_prop_string
0,20191226,1577352601826000,notification_receive,firebase_conversion,1.0,,9338dc7dba04cb2a95f10ce046385f8b,projectId,RADAR-MDD-KCL-s1
1,20191226,1577352601826000,notification_receive,message_type,,display,9338dc7dba04cb2a95f10ce046385f8b,projectId,RADAR-MDD-KCL-s1
2,20191226,1577352601826000,notification_receive,firebase_event_origin,,fcm,9338dc7dba04cb2a95f10ce046385f8b,projectId,RADAR-MDD-KCL-s1
3,20191226,1577352601826000,notification_receive,firebase_conversion,1.0,,9338dc7dba04cb2a95f10ce046385f8b,subjectId,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8
4,20191226,1577352601826000,notification_receive,message_type,,display,9338dc7dba04cb2a95f10ce046385f8b,subjectId,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8


In [49]:
def pre_process(data: pd.DataFrame, required_params = ['message_type', 'engagement_time_msec']) -> pd.DataFrame:

    data = data[data['event_param_key'].isin(required_params)]
    print('removed unnecessary rows')
    data.index = pd.to_datetime(data['event_timestamp'] * 1000)
    print('Created date time index')
    project_ids = data[data['user_prop_key'] == 'projectId']['user_prop_string']
    print('got project Ids')
    data = data[data['user_prop_key'] != 'projectId']
    print('removed rows with project id as they are duplicates')
    data = data.drop(axis=1, columns=['user_prop_key'])
    print('dropped user prop keys')
    data.loc[:,'engagement_time_msec'] = data['event_param_int']
    print('created column of engagement time msec')
    data.rename({'user_prop_string': 'subjectId'}, axis=1, inplace=True)
    print('renamed user prop string to subjectId')
    data.loc[:,'message_type'] = data['event_param_string']
    print('created column. message type')
    data = data.drop(axis=1, columns = ['event_param_int', 'event_param_string', 'event_param_key'])
    print('dropped unnecessary columns')
    data = pd.concat([data, project_ids.dropna()], axis=1)
    print('added column for project Ids')
    data.rename({'user_prop_string': 'projectId'}, axis=1, inplace=True)
    print('renamed user prop string to projectId')
    return data

In [50]:
# If possible load this from file as it take a long time to pre process the data due to its volume.

processed_df = pre_process(result_df)
write_to_csv_file('processed-data.csv', 'data/20190927-20200414', data=processed_df)

removed unnecessary rows
Created date time index
got project Ids
removed rows with project id as they are duplicates
dropped user prop keys
created column of engagement time msec
renamed user prop string to subjectId
created column. message type
dropped unnecessary columns
added column for project Ids
renamed user prop string to projectId
Written to File successfully.


In [None]:
processed_df = pd.read_csv('data/20190927-20200414/processed-data.csv', parse_dates=True, date_parser=pd.to_datetime, index_col='index')

In [46]:
processed_df.head()

Unnamed: 0_level_0,event_date,event_timestamp,event_name,user_pseudo_id,subjectId,engagement_time_msec,message_type,projectId
event_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-12-26 09:30:01.826000,20191226,1577352601826000,notification_receive,9338dc7dba04cb2a95f10ce046385f8b,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8,,display,RADAR-MDD-KCL-s1
2019-12-26 10:00:03.297000,20191226,1577354403297000,notification_receive,9338dc7dba04cb2a95f10ce046385f8b,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8,,display,RADAR-MDD-KCL-s1
2019-12-26 11:00:08.341000,20191226,1577358008341000,notification_receive,9338dc7dba04cb2a95f10ce046385f8b,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8,,display,RADAR-MDD-KCL-s1
2019-12-26 11:29:59.872000,20191226,1577359799872000,notification_receive,9338dc7dba04cb2a95f10ce046385f8b,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8,,display,RADAR-MDD-KCL-s1
2019-12-26 14:44:22.215001,20191226,1577371462215001,user_engagement,9338dc7dba04cb2a95f10ce046385f8b,a61fc535-ae92-4bfd-b9d9-36c9f4e27bd8,14233.0,,RADAR-MDD-KCL-s1


In [None]:
start_date = pd.to_datetime('2019-11-12')
end_date = pd.to_datetime('2020-04-11')

processed_df_2 = processed_df.loc[(processed_df.index > start_date) & (processed_df.index < end_date)]

In [None]:
resampled = processed_df_2[processed_df_2['event_name'] == 'user_engagement'].resample('20D')

In [None]:
import matplotlib.pyplot as plt

#.drop(labels=pd.to_datetime('2020-03-26'), axis='index')
resampled['engagement_time_msec'].sum().plot(figsize=(15,4))
plt.title('Total User Engagement Time')

plt.xlabel('Time')
plt.ylabel('Total Engagement Time (msec)')

plt.xticks(['2019-12-15', '2020-01-01', '2020-01-15', '2020-02-01', '2020-02-15', '2020-03-01', '2020-03-15' ])

In [None]:
# Calculate user engagement per user

user_eng_df = processed_df_2[processed_df_2['event_name'] == 'user_engagement']
grouped_df = user_eng_df.groupby([pd.Grouper(freq='19D')])

In [None]:
for name, group in grouped_df:
    print(name)

In [None]:
# Aggregate enagement time as sum and count the number of unique users
grouped_df_agg = grouped_df.agg(engagement_time_sum_min=pd.NamedAgg(column='engagement_time_msec', aggfunc=lambda x: x.sum()/ 1000 / 60),
                                user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc= lambda x: x.nunique()))

In [None]:
grouped_df_agg.head()

In [None]:
grouped_df_agg['engagement_per_user'] = grouped_df_agg['engagement_time_sum_min']/grouped_df_agg['user_pseudo_id_count']

In [None]:
grouped_df_agg.tail()

In [None]:
grouped_df_agg.index = grouped_df_agg.index + datetime.timedelta(days=19)

In [None]:

#grouped_df_agg.loc[pd.to_datetime('2020-03-25'), :] = grouped_df_agg.loc[pd.to_datetime('2020-03-06'), :]
grouped_df_agg['engagement_per_user'].plot(figsize=(15, 4))

plt.title('User Engagement Time/User')

plt.xlabel('Time')
plt.ylabel('Engagement Time/User (minutes)')

plt.xticks([
            '2019-11-15', '2019-12-01',
            '2019-12-15', '2020-01-01',
            '2020-01-15', '2020-02-01',
            '2020-02-15', '2020-03-01',
            '2020-03-15', '2020-04-01', '2020-04-15' ])

In [None]:
notifs_df = processed_df_2[(processed_df_2['event_name'] == 'notification_open') & (processed_df_2['message_type'] == 'fcm')]

In [None]:
resampled_notifs = notifs_df.groupby([pd.Grouper(freq='19D')])

In [None]:
resampled_notifs

In [None]:
resampled_notifs_agg = resampled_notifs.agg(notif_open_count=pd.NamedAgg(column='event_name', aggfunc='count'),
                     user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc=lambda x: x.nunique()))

In [None]:
resampled_notifs_agg['notif_open_per_user'] = resampled_notifs_agg['notif_open_count'] / resampled_notifs_agg['user_pseudo_id_count']

In [None]:
resampled_notifs_agg.head()

In [None]:
resampled_notifs_agg.index = resampled_notifs_agg.index + datetime.timedelta(days=19)

In [None]:
resampled_notifs_agg['notif_open_per_user'].plot(figsize=(15, 4))


plt.title('Number of opened Notifications / User')

plt.xlabel('Time')
plt.ylabel('Number of opened Notifications / User')

plt.xticks([
            '2019-11-15', '2019-12-01',
            '2019-12-15', '2020-01-01',
            '2020-01-15', '2020-02-01',
            '2020-02-15', '2020-03-01',
            '2020-03-15', '2020-04-01', '2020-04-15' ])

In [None]:
notifs_received_df = processed_df_2[(processed_df_2['event_name'] == 'notification_receive') &
                                    (processed_df_2['message_type'] == 'display')]

In [None]:
resampled_notifs_received_df = notifs_received_df.groupby([pd.Grouper(freq='19D')])

In [None]:
resampled_notifs_received_df_agg = resampled_notifs_received_df.agg(notif_receive_count=pd.NamedAgg(column='event_name', aggfunc='count'),
                     user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc=lambda x: x.nunique()))

In [None]:
resampled_notifs_received_df_agg['notif_received_per_user'] = resampled_notifs_received_df_agg['notif_receive_count'] / resampled_notifs_received_df_agg['user_pseudo_id_count']

In [None]:
resampled_notifs_received_df_agg.head()

In [None]:
resampled_notifs_received_df_agg.index = resampled_notifs_received_df_agg.index + datetime.timedelta(days=19)

In [None]:
resampled_notifs_received_df_agg['notif_received_per_user'].plot(figsize=(15, 4))


plt.title('Number of Received Notifications / User')

plt.xlabel('Time')
plt.ylabel('Number of Received Notifications / User')

plt.xticks(['2019-11-01',
            '2019-11-15', '2019-12-01',
            '2019-12-15', '2020-01-01',
            '2020-01-15', '2020-02-01',
            '2020-02-15', '2020-03-01',
            '2020-03-15', '2020-04-01' ])

In [None]:
resampled_notifs_received_df_agg['percent_open'] = resampled_notifs_agg['notif_open_count'] / resampled_notifs_received_df_agg['notif_receive_count']

In [None]:
resampled_notifs_received_df_agg.tail()

In [None]:
resampled_notifs_received_df_agg.index = resampled_notifs_received_df_agg.index + datetime.timedelta(days=19)

In [None]:
resampled_notifs_received_df_agg['percent_open'].plot(figsize=(15, 4))


plt.title('Percent of opened notifications vs received')

plt.xlabel('Time')
plt.ylabel('Percent of opened notifications vs received')

plt.xticks(['2019-11-15', '2019-12-01',
            '2019-12-15', '2020-01-01',
            '2020-01-15', '2020-02-01',
            '2020-02-15', '2020-03-01',
            '2020-03-15', '2020-04-01',
            '2020-04-15'])
plt.show()

In [None]:
# Calculate user engagement per user per project

user_eng_project_df = processed_df_2[processed_df_2['event_name'] == 'user_engagement']
project_grouped_df = user_eng_project_df.groupby(['projectId', pd.Grouper(freq='19D')])

In [None]:
# Aggregate enagement time as sum and count the number of unique users
project_grouped_df_agg = project_grouped_df.agg(engagement_time_sum_min=pd.NamedAgg(column='engagement_time_msec', aggfunc=lambda x: x.sum()/ 1000 / 60),
                                user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc= lambda x: x.nunique()))

In [None]:
project_grouped_df_agg['engagement_per_user'] = project_grouped_df_agg['engagement_time_sum_min']/project_grouped_df_agg['user_pseudo_id_count']

In [None]:
project_grouped_df_agg.head()

In [None]:
for (name, timerange), group in project_grouped_df:
    print(name)

In [None]:
LONDON_UK = ['RADAR-MDD-KCL-s1']
BARCELONA_SPAIN = ['RADAR-MDD-CIBER-s1', 'RADAR-MSDis-VHIR-s1', 'RADAR-MSDep-VHIR-s1', 'RADAR-MDD-IISPV-s1']
COPENHAGEN_DENMARK = ['RADAR-MSDep-RegionH-s1', 'RADAR-MSDis-RegionH-s1']
MILAN_ITALY = ['RADAR-MSDep-OSR-s1', 'RADAR-MSDis-OSR-s1']
AMSTERDAM_NETHERLANDS = ['RADAR-MDD-VUmc-s1']
ALL_RADAR_CNS = LONDON_UK + BARCELONA_SPAIN + COPENHAGEN_DENMARK + MILAN_ITALY + AMSTERDAM_NETHERLANDS

In [None]:
print(ALL_RADAR_CNS)

In [None]:
# Drop all columns not part of RADAR-CNS

projects_to_discard = [project for project in project_grouped_df_agg.index.get_level_values('projectId') if project not in ALL_RADAR_CNS ]
print(projects_to_discard)
project_grouped_df_agg = project_grouped_df_agg.drop(labels=projects_to_discard, axis=0, level='projectId')

In [None]:
project_grouped_df_agg.head()

In [None]:
# Calculate notifications opened per user per project

user_notif_project_df = processed_df_2[(processed_df_2['event_name'] == 'notification_open') & (processed_df_2['message_type'] == 'fcm')]
project_grouped_notif_df = user_notif_project_df.groupby(['projectId', pd.Grouper(freq='19D')])

In [None]:
# Aggregate notif as count and count the number of unique users
project_grouped_notif_df_agg = project_grouped_notif_df.agg(notif_open_count=pd.NamedAgg(column='event_name', aggfunc='count'),
                                user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc= lambda x: x.nunique()))

In [None]:
project_grouped_notif_df_agg['notif_open_per_user'] = project_grouped_notif_df_agg['notif_open_count'] / project_grouped_notif_df_agg['user_pseudo_id_count']

In [None]:
project_grouped_notif_df_agg = project_grouped_notif_df_agg.drop(labels=projects_to_discard, axis=0, level='projectId')

In [None]:
project_grouped_notif_df_agg.head()

In [None]:
notifs_received_df = processed_df_2[(processed_df_2['event_name'] == 'notification_receive') &
                                    (processed_df_2['message_type'] == 'display')]
project_grouped_notifs_received_df = notifs_received_df.groupby(['projectId', pd.Grouper(freq='19D')])

In [None]:
# Aggregate notif as count and count the number of unique users
project_grouped_notifs_received_df_agg = project_grouped_notifs_received_df.agg(notif_receive_count=pd.NamedAgg(column='event_name', aggfunc='count'),
                                user_pseudo_id_count=pd.NamedAgg(column='user_pseudo_id', aggfunc= lambda x: x.nunique()))

In [None]:
project_grouped_notifs_received_df_agg = project_grouped_notifs_received_df_agg.drop(labels=projects_to_discard, axis=0, level='projectId')

In [None]:
project_grouped_notif_df_agg['percent_open'] = project_grouped_notif_df_agg['notif_open_count'] / project_grouped_notifs_received_df_agg['notif_receive_count']

In [None]:
project_grouped_notif_df_agg.head()

In [None]:
project_grouped_notifs_received_df_agg.head()

In [None]:
def get_data_for_study(grouped_data: pd.DataFrame,
                       study_names: list,
                       aggregations: dict,
                       days_to_add = 19):
    data = grouped_data.loc[study_names, :]
    data = data.reset_index(level='projectId')
    data_agg = data.groupby('event_timestamp').agg(aggregations)
    data_agg.index = data_agg.index + datetime.timedelta(days=days_to_add)
    return data_agg
    
def plot_for_study(data:pd.DataFrame,
                   column_name: str,
                   title: str,
                   ylabel: str,
                   lockdown_rec: str,
                   lockdown: str,
                   xlabel='Time',
                   figsize=(15, 4),
                   ylim_min = 6,
                   ylim_max = 23):
    data[column_name].plot(figsize=(15, 4))

    plt.title(title)

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    plt.xticks(['2019-11-15', '2019-12-01',
                '2019-12-15', '2020-01-01',
                '2020-01-15', '2020-02-01',
                '2020-02-15', '2020-03-01',
                '2020-03-15', '2020-04-01',
                '2020-04-15'])
    plt.vlines([lockdown_rec], ylim_min, ylim_max, linestyles='dashed', colors='g', label='National Recommendation')
    plt.vlines([lockdown], ylim_min, ylim_max, linestyles='dashed', colors='r', label='National Lockdown')
    plt.show()
    

In [None]:
# UK User engagement plot
london_data = get_data_for_study(project_grouped_df_agg,
                                    LONDON_UK,
                                    {'engagement_time_sum_min' : 'sum', 'user_pseudo_id_count': 'sum'})
london_data['engagement_per_user'] = london_data['engagement_time_sum_min'] / london_data['user_pseudo_id_count']

plot_for_study(london_data,
               'engagement_per_user',
               'London, UK',
               'User enagement/ User (minutes)',
               '2020-03-21',
               '2020-03-24',
               ylim_min = 6,
               ylim_max = 23)

In [None]:
def aggregate_percent_open(x):
    

In [None]:
# UK number of opened notifications per user plot
london_notif_data = get_data_for_study(project_grouped_notif_df_agg,
                                    LONDON_UK,
                                    {'notif_open_count' : 'sum', 'user_pseudo_id_count': 'sum'})
london_notif_received_data = get_data_for_study(project_grouped_notifs_received_df_agg,
                                    LONDON_UK,
                                    {'notif_receive_count' : 'sum', 'user_pseudo_id_count': 'sum'})
london_notif_data['notif_open_per_user'] = london_notif_data['notif_open_count'] / london_notif_data['user_pseudo_id_count']
london_notif_data['percent_open'] = london_notif_data['notif_open_count'] / london_notif_received_data['notif_receive_count']
plot_for_study(london_notif_data,
               'notif_open_per_user',
               'London, UK',
               'Number of opened Notifications / User',
               '2020-03-21',
               '2020-03-24',
               ylim_min = 2,
               ylim_max = 9)

In [None]:
# UK percent of opened notifications vs received plot

plot_for_study(london_notif_data,
               'percent_open',
               'London, UK',
               'Percent of opened notifications vs received',
               '2020-03-21',
               '2020-03-24',
               ylim_min = 0.3,
               ylim_max = 0.5)

In [None]:
# Spain User engagement plot
barcelona_data = get_data_for_study(project_grouped_df_agg,
                                    BARCELONA_SPAIN,
                                    {'engagement_time_sum_min' : 'sum', 'user_pseudo_id_count': 'sum'})
barcelona_data['engagement_per_user'] = barcelona_data['engagement_time_sum_min'] / barcelona_data['user_pseudo_id_count']

plot_for_study(barcelona_data,
               'engagement_per_user',
               'Barcelona, Spain',
               'User enagement/ User (minutes)',
               '2020-03-09',
               '2020-03-14',
               ylim_min = 6,
               ylim_max = 10)

In [None]:
# Spain number of opened notifications per user plot

barcelona_notif_data = get_data_for_study(project_grouped_notif_df_agg,
                                    BARCELONA_SPAIN,
                                    {'notif_open_count' : 'sum', 'user_pseudo_id_count': 'sum'})
barcelona_notif_received_data = get_data_for_study(project_grouped_notifs_received_df_agg,
                                    BARCELONA_SPAIN,
                                    {'notif_receive_count' : 'sum', 'user_pseudo_id_count': 'sum'})
barcelona_notif_data['notif_open_per_user'] = barcelona_notif_data['notif_open_count'] / barcelona_notif_data['user_pseudo_id_count']
barcelona_notif_data['percent_open'] = barcelona_notif_data['notif_open_count'] / barcelona_notif_received_data['notif_receive_count']
plot_for_study(barcelona_notif_data,
               'notif_open_per_user',
               'Barcelona, Spain',
               'Number of opened Notifications / User',
               '2020-03-09',
               '2020-03-14',
               ylim_min = 2,
               ylim_max = 2.5)


In [None]:
# Spain percent of opened notifications vs received plot
plot_for_study(barcelona_notif_data,
               'percent_open',
               'Barcelona, Spain',
               'Percent of opened notifications vs received',
               '2020-03-09',
               '2020-03-14',
               ylim_min = 0.3,
               ylim_max = 0.5)

In [None]:
# Denmark User engagement plot
copenhagen_data = get_data_for_study(project_grouped_df_agg,
                                    COPENHAGEN_DENMARK,
                                    {'engagement_time_sum_min' : 'sum', 'user_pseudo_id_count': 'sum'})
copenhagen_data['engagement_per_user'] = copenhagen_data['engagement_time_sum_min'] / copenhagen_data['user_pseudo_id_count']

plot_for_study(copenhagen_data,
               'engagement_per_user',
               'Copenhagen, Denmark',
               'User enagement/ User (minutes)',
               '2020-03-13',
               '2020-03-19',
               ylim_min = 4,
               ylim_max = 8)

In [None]:
# Denmark number of opened notifications per user plot
copenhagen_notif_data = get_data_for_study(project_grouped_notif_df_agg,
                                    COPENHAGEN_DENMARK,
                                    {'notif_open_count' : 'sum', 'user_pseudo_id_count': 'sum'})
copenhagen_notif_received_data = get_data_for_study(project_grouped_notifs_received_df_agg,
                                    COPENHAGEN_DENMARK,
                                    {'notif_receive_count' : 'sum', 'user_pseudo_id_count': 'sum'})
copenhagen_notif_data['notif_open_per_user'] = copenhagen_notif_data['notif_open_count'] / copenhagen_notif_data['user_pseudo_id_count']
copenhagen_notif_data['percent_open'] = copenhagen_notif_data['notif_open_count'] / copenhagen_notif_received_data['notif_receive_count']
plot_for_study(copenhagen_notif_data,
               'notif_open_per_user',
               'Copenhagen, Denmark',
               'Number of opened Notifications / User',
               '2020-03-13',
               '2020-03-19',
               ylim_min = 1.5,
               ylim_max = 3.5)

In [None]:
# Denmark percent of opened notifications vs received plot
plot_for_study(copenhagen_notif_data,
               'percent_open',
               'Copenhagen, Denmark',
               'Percent of opened notifications vs received',
               '2020-03-13',
               '2020-03-19',
               ylim_min = 0.2,
               ylim_max = 0.5)

In [None]:
# Italy User engagement plot
milan_data = get_data_for_study(project_grouped_df_agg,
                                    MILAN_ITALY,
                                    {'engagement_time_sum_min' : 'sum', 'user_pseudo_id_count': 'sum'})
milan_data['engagement_per_user'] = milan_data['engagement_time_sum_min'] / milan_data['user_pseudo_id_count']

plot_for_study(milan_data,
               'engagement_per_user',
               'Milan, Italy',
               'User enagement/ User (minutes)',
               '2020-02-22',
               '2020-03-10',
               ylim_min = 4,
               ylim_max = 10)

In [None]:
# Italy number of opened notifications per user plot
milan_notif_data = get_data_for_study(project_grouped_notif_df_agg,
                                    MILAN_ITALY,
                                    {'notif_open_count' : 'sum', 'user_pseudo_id_count': 'sum'})
milan_notif_received_data = get_data_for_study(project_grouped_notifs_received_df_agg,
                                    MILAN_ITALY,
                                    {'notif_receive_count' : 'sum', 'user_pseudo_id_count': 'sum'})
milan_notif_data['notif_open_per_user'] = milan_notif_data['notif_open_count'] / milan_notif_data['user_pseudo_id_count']
milan_notif_data['percent_open'] = milan_notif_data['notif_open_count'] / milan_notif_received_data['notif_receive_count']
plot_for_study(milan_notif_data,
               'notif_open_per_user',
               'Milan, Italy',
               'Number of opened Notifications / User',
               '2020-02-22',
               '2020-03-10',
               ylim_min = 2,
               ylim_max = 3)

In [None]:
# Italy percent of opened notifications vs received plot

plot_for_study(milan_notif_data,
               'percent_open',
               'Milan, Italy',
               'Percent of opened notifications vs received',
               '2020-02-22',
               '2020-03-10',
               ylim_min = 0.2,
               ylim_max = 0.5)