In [1]:
import pandas as pd
import numpy as np
import datetime as datetime

In [20]:
# read data
contingent_ema = pd.read_csv('eventcontingent-ema.csv')
puff_marker = pd.read_csv('puff-probability.csv')

contingent_ema_alternative = pd.read_csv('eventcontingent-ema-alternative.csv')
puff_marker_alternative = pd.read_csv('puff-probability-alternative.csv')

contingent_ema_backup = pd.read_csv('eventcontingent-ema-backup.csv')
puff_marker_backup = pd.read_csv('puff-probability-backup.csv')

In [24]:
def contingent_puff(contingent_ema, puff_marker):
    '''
    checks how many smoking events recorded in contingent_ema are covered by at least one HTMG
    '''
    
    ema_stats = {}
    ema_times = {}
    puff_times = {}
    
    # store all HTMG
    for index, row in puff_marker.iterrows():
        user = row['participant_id']
        if user not in puff_times:
            puff_times[user] = set()

        try:
            time = datetime.datetime.strptime(row['date'], '%m/%d/%y %H:%M')
        except:
            continue
        date = (time.year, time.month, time.day, int(row['hour']))
        puff_times[user].add(date)
    
    # store smoking events
    for index, row in contingent_ema.iterrows():
        user = row['participant_id']
        if user not in ema_stats:
            ema_stats[user] = {'total':0, 'completed':0, 'smoked':0, 'HTM':0}
            ema_times[user] = [] 
        ema_stats[user]['total'] += 1
        if row['status'] != "MISSED":
            ema_stats[user]['completed'] += 1

        try:
            time = datetime.datetime.strptime(row['date'], '%m/%d/%y %H:%M')
        except:
            time = datetime.datetime.strptime(row['date'], '%Y-%m-%d %H:%M:%S')

        date = (time.year, time.month, time.day, time.hour, time.minute)
        
        ema_stats[user]['smoked'] += 1 # because every entry in contingent_ema is a smoking event
        ema_times[user].append(date)

        # check if the smoking event is covered by HTMG within one hour limit
        puff_time0 = (time.year, time.month, date[2], date[3]-1)
        puff_time1 = (time.year, time.month, date[2], date[3])

        if user not in puff_times:
            continue
        if puff_time0 in puff_times[user] or puff_time1 in puff_times[user]:
            ema_stats[user]['HTM']+= 1 # plus one if covered
                
    return ema_stats

ema_stats = contingent_puff(contingent_ema, puff_marker)
for user in ema_stats:
    print(user, ema_stats[user])

201 {'total': 4, 'completed': 4, 'smoked': 4, 'HTM': 4}
202 {'total': 9, 'completed': 9, 'smoked': 9, 'HTM': 9}
205 {'total': 6, 'completed': 6, 'smoked': 6, 'HTM': 4}
208 {'total': 7, 'completed': 7, 'smoked': 7, 'HTM': 6}
209 {'total': 4, 'completed': 4, 'smoked': 4, 'HTM': 4}
211 {'total': 7, 'completed': 7, 'smoked': 7, 'HTM': 7}
212 {'total': 3, 'completed': 3, 'smoked': 3, 'HTM': 2}
213 {'total': 7, 'completed': 7, 'smoked': 7, 'HTM': 7}
214 {'total': 11, 'completed': 11, 'smoked': 11, 'HTM': 11}
215 {'total': 6, 'completed': 6, 'smoked': 6, 'HTM': 6}
216 {'total': 2, 'completed': 2, 'smoked': 2, 'HTM': 2}
217 {'total': 8, 'completed': 8, 'smoked': 8, 'HTM': 8}
218 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 4}
219 {'total': 23, 'completed': 23, 'smoked': 23, 'HTM': 22}
220 {'total': 9, 'completed': 9, 'smoked': 9, 'HTM': 8}
221 {'total': 3, 'completed': 3, 'smoked': 3, 'HTM': 3}
222 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 4}
228 {'total': 18, 'completed': 18, 'smok

In [22]:
ema_stats = contingent_puff(contingent_ema_alternative, puff_marker_alternative)
for user in ema_stats:
    print(user, ema_stats[user])

223 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 5}
224 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 5}
226 {'total': 12, 'completed': 11, 'smoked': 12, 'HTM': 11}
227 {'total': 39, 'completed': 39, 'smoked': 39, 'HTM': 29}


In [23]:
ema_stats = contingent_puff(contingent_ema_backup, puff_marker_backup)
for user in ema_stats:
    print(user, ema_stats[user])

202 {'total': 9, 'completed': 9, 'smoked': 9, 'HTM': 9}
204 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 5}
205 {'total': 6, 'completed': 6, 'smoked': 6, 'HTM': 4}
207 {'total': 9, 'completed': 9, 'smoked': 9, 'HTM': 6}
208 {'total': 14, 'completed': 14, 'smoked': 14, 'HTM': 13}
209 {'total': 4, 'completed': 4, 'smoked': 4, 'HTM': 4}
211 {'total': 7, 'completed': 7, 'smoked': 7, 'HTM': 7}
212 {'total': 3, 'completed': 3, 'smoked': 3, 'HTM': 2}
213 {'total': 7, 'completed': 7, 'smoked': 7, 'HTM': 7}
214 {'total': 11, 'completed': 11, 'smoked': 11, 'HTM': 11}
215 {'total': 6, 'completed': 6, 'smoked': 6, 'HTM': 6}
216 {'total': 2, 'completed': 2, 'smoked': 2, 'HTM': 2}
217 {'total': 8, 'completed': 8, 'smoked': 8, 'HTM': 8}
218 {'total': 5, 'completed': 5, 'smoked': 5, 'HTM': 4}
219 {'total': 23, 'completed': 23, 'smoked': 23, 'HTM': 22}
220 {'total': 10, 'completed': 10, 'smoked': 10, 'HTM': 9}
222 {'total': 8, 'completed': 8, 'smoked': 8, 'HTM': 7}
228 {'total': 18, 'completed': 18