In [1]:
import pandas as pd
import numpy as np
import datetime as datetime

In [2]:
keys = ['8to9', '9to10', '10to11', '11to12','12to13','13to14','14to15','15to16','16to17','17to18','18to19','19to20']
contingent_keys = {'Less than 5 minutes': 3, '5 to 15 minutes': 10, '15 to 30 minutes': 23, 'More than 30 minutes': 30}

In [3]:
# read data
contingent_ema = pd.read_csv('eventcontingent-ema.csv')
eod_ema = pd.read_csv('eod-ema.csv')

contingent_ema_alternative = pd.read_csv('eventcontingent-ema-alternative.csv')
eod_ema_alternative = pd.read_csv('eod-ema-alternative.csv')

contingent_ema_backup = pd.read_csv('eventcontingent-ema-backup.csv')
eod_ema_backup = pd.read_csv('eod-ema-backup.csv')

In [4]:
def eod_bias_variance(contingent_ema, eod_ema, window):
    '''
    treating the contingent ema as 'ground truth', compute the eod_ema's bias and variance. Takes 
    the window of time within which bias is computed as a parameter. Events that fall outside of the window
    is counted as missed.
    '''
    
    contingent_events = {} # store contingent events by participant
    total_contingent_events = 0 # track the total number of contingent events
    # store contingent events
    for index, row in contingent_ema.iterrows():
        if row['status'] == "MISSED":
            continue
        
        try:
            time = datetime.datetime.strptime(row['date'], '%m/%d/%y %H:%M')
        except:
            time = datetime.datetime.strptime(row['date'], '%Y-%m-%d %H:%M:%S')
        date = (time.year, time.month, time.day) # granularize time to day
        if row['participant_id'] not in contingent_events:
            contingent_events[row['participant_id']] = {}
        if date not in contingent_events[row['participant_id']]:
            contingent_events[row['participant_id']][date] = set() # make the date key
        offset = contingent_keys[row['when_smoke']] # compute smoking time offset based on response
        
        # compute smoking time
        smoking_hour = time.hour
        smoking_minute = time.minute
        if time.minute - offset < 0 :
            smoking_minute = 60 + time.minute - offset
            smoking_hour -= 1
            
        contingent_events[row['participant_id']][date].add((smoking_hour, smoking_minute)) # store the hour, minute pair
        total_contingent_events += 1
            
    eod_events = {} # store hour windows of smoking reported by eod_ema
    total_eod_events = 0 # track the total number of contingent events
    
    # store events in eod_ema
    for index, row in eod_ema.iterrows():
        if row['status'] == "MISSED":
            continue
        for i in keys:
            if row[i] == 1:
                try:
                    time = datetime.datetime.strptime(row['date'], '%m/%d/%Y %H:%M')
                except:
                    time = datetime.datetime.strptime(row['date'], '%Y-%m-%d %H:%M:%S')
                hour = int(i.split('to')[0])
                date = (time.year, time.month, time.day)
                if time.hour  == 0 or time.hour == 1:
                    date = (time.year, time.month, time.day-1)
                if row['participant_id'] not in eod_events:
                    eod_events[row['participant_id']] = {}
                if date not in eod_events[row['participant_id']]:
                    eod_events[row['participant_id']][date] = set() # make the date key
                eod_events[row['participant_id']][date].add(hour) # store the hour
                total_eod_events += 1
    
    bias = [] # store the bias for contingent events that are tracked in eod
    missed_events = [] # store the contingent events that are missed in eod
    
    # compute eod_ema bias
    for user in contingent_events:
        current_events = contingent_events[user] # events associated with the current participant
        for date in current_events:
            
            # in the case missed by eod
            if user not in eod_events or date not in eod_events[user]:
                for event in current_events[date]:
                    missed_events.append(event)
                continue
                
            events_hours = current_events[date] # all contingent events hour,minute pairs
            eod_hours = eod_events[user][date]
            for event in events_hours:
                hour = event[0]
                missed = True
                for i in range(0, window+1): # iterate through the window
                    if hour + i in eod_hours:
                        real_time = hour*60+event[1]
                        estimated_time = (hour+i) * 60 + 30
                        current_bias = abs(estimated_time-real_time)
                        bias.append(current_bias)
                        missed = False
                        break # skip once we found the closest reported time
                    if hour-i in eod_hours:
                        real_time = hour*60+event[1]
                        estimated_time = (hour-i) * 60 + 30
                        current_bias = abs(estimated_time-real_time)
                        bias.append(current_bias)
                        missed = False
                        break
                if missed:
                    missed_events.append(event) # count as missed if not reported in eod within the window allowed
    
    # assess the truthfulness of self-report?
    
    print(len(bias), len(missed_events))
    print("EOD bias variance report with bias window set to {}".format(window))
    print("The number of contingent events tracked by EOD within this window is {}".format(len(bias)))
    print("This is {}%. Compare to the total number of contingent events: {}".format(100*len(bias)/total_contingent_events,total_contingent_events))
    print("The mean of the bias is {}".format(np.mean(bias)))
    print("The variance of the bias is {}".format(np.var(bias)))

In [5]:
eod_bias_variance(contingent_ema, eod_ema,1)

107 72
EOD bias variance report with bias window set to 1
The number of contingent events tracked by EOD within this window is 107
This is 59.77653631284916%. Compare to the total number of contingent events: 179
The mean of the bias is 33.88785046728972
The variance of the bias is 649.5201327626868


In [6]:
eod_bias_variance(contingent_ema, eod_ema,2)

124 55
EOD bias variance report with bias window set to 2
The number of contingent events tracked by EOD within this window is 124
This is 69.27374301675978%. Compare to the total number of contingent events: 179
The mean of the bias is 45.346774193548384
The variance of the bias is 1424.2910379812695


In [7]:
eod_bias_variance(contingent_ema, eod_ema,3)

129 50
EOD bias variance report with bias window set to 3
The number of contingent events tracked by EOD within this window is 129
This is 72.06703910614524%. Compare to the total number of contingent events: 179
The mean of the bias is 50.55813953488372
The variance of the bias is 2050.3551469262666


In [8]:
eod_bias_variance(contingent_ema, eod_ema,4)

129 50
EOD bias variance report with bias window set to 4
The number of contingent events tracked by EOD within this window is 129
This is 72.06703910614524%. Compare to the total number of contingent events: 179
The mean of the bias is 50.55813953488372
The variance of the bias is 2050.3551469262666
