In [1]:
import json
import pandas as pd
import datetime
from pytz import timezone


In [2]:
participants = {'107279156336793537009': 1, '115767467286550120166': 2, '104694983709365197975': 3, '108565623227009900784': 4, '107360463347436073559': 5, '100937764714847904352': 6, '105012360635537965053': 7, '110353078720257828530': 8,
                '103564003925636425038': 9, '00060000DEDE392A': 10, '100935129088843356602': 11, '116084227873580561392': 12, '117105890044607595797': 13, '107711334134980088812': 14, '107858562133949273618': 15, '107772607988378311880': 16, '109160689719008133998': 17}


In [3]:
date = '20220907'
path = './data/raw/dark-pita-default-rtdb-export_' + date + '.json'


In [4]:
with open(path, 'r') as json_file:
    data = json.load(json_file)['user-data']


### Data Preprocessing


In [5]:
# Remove researchers content
researcher_ids = ['100689073241975873280',
                  '102686626036253115345',
                  '105980611853356916531',
                  '107417400614572912348',
                  '109939652125735554083',
                  '112229224879924656055',
                  '114069654270801652660']


In [6]:
action_log = data['user-action']
diary_note = data['user-diary']
print(len(action_log), len(diary_note))

for researcher_id in researcher_ids:
    if researcher_id in action_log:
        del action_log[researcher_id]

    if researcher_id in diary_note:
        del diary_note[researcher_id]
print(len(action_log), len(diary_note))


24 18
17 16


In [7]:
statistics = {}
for user_id in action_log.keys():
    statistics.update({user_id: {'id': participants[user_id]}})

print(statistics)


{'00060000DEDE392A': {'id': 10}, '100935129088843356602': {'id': 11}, '100937764714847904352': {'id': 6}, '103564003925636425038': {'id': 9}, '104694983709365197975': {'id': 3}, '105012360635537965053': {'id': 7}, '107279156336793537009': {'id': 1}, '107360463347436073559': {'id': 5}, '107711334134980088812': {'id': 14}, '107772607988378311880': {'id': 16}, '107858562133949273618': {'id': 15}, '108565623227009900784': {'id': 4}, '109160689719008133998': {'id': 17}, '110353078720257828530': {'id': 8}, '115767467286550120166': {'id': 2}, '116084227873580561392': {'id': 12}, '117105890044607595797': {'id': 13}}


### Diary Note Processing


#### How many times our participants send diary notes?


In [8]:
for user_id in diary_note.keys():
    counter = len(diary_note[user_id])
    statistics[user_id]['send_diary_note'] = counter

for user_id in action_log.keys():
    if 'send_diary_note' not in statistics[user_id]:
        statistics[user_id]['send_diary_note'] = 0

print(statistics['107711334134980088812'])


{'id': 14, 'send_diary_note': 5}


#### Export diary notes for each participant


In [9]:
def date_transform(timestamp):
    data = int(timestamp/1000)
    data = datetime.datetime.utcfromtimestamp(data)
    utc_tz = timezone('UTC')
    data = data.replace(tzinfo=utc_tz)
    datas = data.astimezone(timezone('US/Eastern'))
    return datas.strftime("%Y-%m-%d %H:%M:%S")


print(date_transform(1661806859798))


2022-08-29 17:00:59


In [10]:
header = {'user_id': [], 'date': [], 'question_one': [],
          'question_two': [], 'question_three': [], 'screenshot': [], 'url': []}
df_diary = pd.DataFrame(header)


In [11]:
for user_id in diary_note.keys():
    for diary_id in diary_note[user_id].keys():
        diary = diary_note[user_id][diary_id]
        data = {'user_id': participants[user_id], 'date': date_transform(diary['timestamp']), 'question_one': diary['one'],
                'question_two': diary['two'], 'question_three': diary['three'], 'screenshot': diary['screenshot'], 'url': diary['url']}
        df_diary.loc[len(df_diary)] = data

df_diary.to_excel('./data/export/user_diary_note_' +
                  date + '.xlsx', index=False)


### Action Log Processing


#### When did the participants start the study?


In [12]:
for user_id in action_log.keys():
    statistics[user_id]['start_date'] = date_transform(
        list(action_log[user_id].values())[0]['timestamp'])

print(statistics['110353078720257828530'])


{'id': 8, 'send_diary_note': 16, 'start_date': '2022-08-24 14:41:15'}


#### How many actions each participant creates


In [13]:
for user_id in action_log.keys():
    counter = 0
    for action_id in action_log[user_id].keys():
        counter = counter + 1

    statistics[user_id]['send_action'] = counter

print(statistics['110353078720257828530'])


{'id': 8, 'send_diary_note': 16, 'start_date': '2022-08-24 14:41:15', 'send_action': 7608}


#### How many times our participants enter sites containing our sampled dark pattern instances (i.e., how many times our probe is triggered)?


In [14]:
# When the site contains an instance, the banner would be triggered.

for user_id in action_log.keys():
    counter = 0
    for action_id in action_log[user_id].keys():
        if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'trigger banner':
            counter = counter + 1

    statistics[user_id]['trigger_probe'] = counter

print(statistics['107711334134980088812'])


{'id': 14, 'send_diary_note': 5, 'start_date': '2022-08-27 14:27:56', 'send_action': 8901, 'trigger_probe': 290}


#### How many times our participants change dark patterns (i.e., how many times they select a UI alternative and save changes)?


In [15]:
# When users change a dark pattern, they have to choose a UI alternative and save settings.
for user_id in action_log.keys():
    counter = 0
    save_settings = []
    for action_id in action_log[user_id].keys():
        if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'save settings':
            counter = counter + 1
            save_settings.append(action_log[user_id][action_id])
    statistics[user_id]['change_dark_pattern'] = counter

    # Export individual user actions for all changes of UI alternatives
    data = {'timestamp': []}
    for save_setting in save_settings:
        data['timestamp'] = date_transform(save_setting['timestamp'])
        for key in save_setting['action']:
            if key in data:
                data[key].append(save_setting['action'][key])
            else:
                data[key] = [save_setting['action'][key]]

    df = pd.DataFrame(data)
    df.to_excel('./data/export/individual_user_action/' +
                user_id + '_action_' + date + '.xlsx', index=False)

print(statistics['107711334134980088812'])


{'id': 14, 'send_diary_note': 5, 'start_date': '2022-08-27 14:27:56', 'send_action': 8901, 'trigger_probe': 290, 'change_dark_pattern': 17}


#### How many times our participants experience each UI alternative?


In [16]:
ui_alternatives = ['amazon_buy_now_hide', 'amazon_buy_now_fairness', 'amazon_buy_now_friction', 'amazon_disguised_ads_hide', 'amazon_disguised_ads_friction', 'amazon_disguised_ads_disclosure', 'amazon_disguised_ads_counterfact', 'amazon_discount_price_hide', 'amazon_discount_price_disclosure', 'amazon_discount_price_reflection', 'amazon_discount_price_action', 'amazon_home_card_focus', 'amazon_home_card_reflection', 'amazon_home_card_progress', 'youtube_recommended_video_focus',
                   'youtube_recommended_video_preview', 'youtube_recommended_video_reflection', 'youtube_video_dislike_fairness', 'youtube_sidebar_video_focus', 'youtube_sidebar_video_preview', 'youtube_sidebar_video_reflection', 'twitter_whats_happening_hide', 'twitter_promoted_highlight', 'twitter_promoted_friction', 'facebook_reels_hide', 'facebook_reels_counterfact', 'facebook_reels_friction', 'facebook_suggested_for_you_hide', 'facebook_suggested_for_you_highlight', 'netflix_timeline_reflection', 'netflix_hugepreview_disable']
print(len(ui_alternatives))


31


In [17]:
# When users change a dark pattern, they have to choose a UI alternative and save settings.

for ui_alternative in ui_alternatives:
    for user_id in action_log.keys():
        counter = 0
        for action_id in action_log[user_id].keys():
            if 'action' in action_log[user_id][action_id] and action_log[user_id][action_id]['action'] == 1 and 'description' in action_log[user_id][action_id] and ui_alternative in action_log[user_id][action_id]['description']:
                counter = counter + 1

        statistics[user_id][ui_alternative] = counter

print(statistics['107711334134980088812'])


{'id': 14, 'send_diary_note': 5, 'start_date': '2022-08-27 14:27:56', 'send_action': 8901, 'trigger_probe': 290, 'change_dark_pattern': 17, 'amazon_buy_now_hide': 0, 'amazon_buy_now_fairness': 0, 'amazon_buy_now_friction': 0, 'amazon_disguised_ads_hide': 6, 'amazon_disguised_ads_friction': 10, 'amazon_disguised_ads_disclosure': 2, 'amazon_disguised_ads_counterfact': 2, 'amazon_discount_price_hide': 0, 'amazon_discount_price_disclosure': 0, 'amazon_discount_price_reflection': 0, 'amazon_discount_price_action': 0, 'amazon_home_card_focus': 4, 'amazon_home_card_reflection': 4, 'amazon_home_card_progress': 11, 'youtube_recommended_video_focus': 14, 'youtube_recommended_video_preview': 2232, 'youtube_recommended_video_reflection': 4, 'youtube_video_dislike_fairness': 0, 'youtube_sidebar_video_focus': 2025, 'youtube_sidebar_video_preview': 6, 'youtube_sidebar_video_reflection': 3, 'twitter_whats_happening_hide': 70, 'twitter_promoted_highlight': 2, 'twitter_promoted_friction': 146, 'facebook

#### Which sites are accessed by our participants?


In [18]:
sites = {'amazon': ['amazon_buy_now_hide', 'amazon_buy_now_fairness', 'amazon_buy_now_friction', 'amazon_disguised_ads_hide', 'amazon_disguised_ads_friction', 'amazon_disguised_ads_disclosure', 'amazon_disguised_ads_counterfact', 'amazon_discount_price_hide', 'amazon_discount_price_disclosure', 'amazon_discount_price_reflection', 'amazon_discount_price_action', 'amazon_home_card_focus', 'amazon_home_card_reflection', 'amazon_home_card_progress'],
         'youtube': ['youtube_recommended_video_focus', 'youtube_recommended_video_preview', 'youtube_recommended_video_reflection', 'youtube_video_dislike_fairness', 'youtube_sidebar_video_focus', 'youtube_sidebar_video_preview', 'youtube_sidebar_video_reflection'],
         'twitter': ['twitter_whats_happening_hide', 'twitter_promoted_highlight', 'twitter_promoted_friction'],
         'facebook': ['facebook_reels_hide', 'facebook_reels_counterfact', 'facebook_reels_friction', 'facebook_suggested_for_you_hide', 'facebook_suggested_for_you_highlight'],
         'nextflix': ['netflix_timeline_reflection', 'netflix_hugepreview_disable']}


In [19]:
for user_id in statistics:
    for site in sites.keys():
        statistics[user_id][site] = 0
        for ui_alternative in sites[site]:
            if statistics[user_id][ui_alternative] > 0:
                statistics[user_id][site] = 1

print(statistics['107711334134980088812'])


{'id': 14, 'send_diary_note': 5, 'start_date': '2022-08-27 14:27:56', 'send_action': 8901, 'trigger_probe': 290, 'change_dark_pattern': 17, 'amazon_buy_now_hide': 0, 'amazon_buy_now_fairness': 0, 'amazon_buy_now_friction': 0, 'amazon_disguised_ads_hide': 6, 'amazon_disguised_ads_friction': 10, 'amazon_disguised_ads_disclosure': 2, 'amazon_disguised_ads_counterfact': 2, 'amazon_discount_price_hide': 0, 'amazon_discount_price_disclosure': 0, 'amazon_discount_price_reflection': 0, 'amazon_discount_price_action': 0, 'amazon_home_card_focus': 4, 'amazon_home_card_reflection': 4, 'amazon_home_card_progress': 11, 'youtube_recommended_video_focus': 14, 'youtube_recommended_video_preview': 2232, 'youtube_recommended_video_reflection': 4, 'youtube_video_dislike_fairness': 0, 'youtube_sidebar_video_focus': 2025, 'youtube_sidebar_video_preview': 6, 'youtube_sidebar_video_reflection': 3, 'twitter_whats_happening_hide': 70, 'twitter_promoted_highlight': 2, 'twitter_promoted_friction': 146, 'facebook

#### Export action statistics


In [20]:
data = {}
for user_id in statistics:
    for key in statistics[user_id]:
        if key in data:
            data[key].append(statistics[user_id][key])
        else:
            data[key] = [statistics[user_id][key]]

df_action = pd.DataFrame(data)
df_action.to_excel('./data/export/user_action_statistics_' +
                   date + '.xlsx', index=False)


#### Daily actions


In [21]:
import time


def day_counter(day1, day2):
    time_array1 = time.strptime(day1, "%Y-%m-%d")
    timestamp_day1 = int(time.mktime(time_array1))
    time_array2 = time.strptime(day2, "%Y-%m-%d")
    timestamp_day2 = int(time.mktime(time_array2))
    result = (timestamp_day2 - timestamp_day1) // 60 // 60 // 24
    return result


day1 = "2018-07-09"
day2 = "2020-09-26"
print(day_counter(day1, day2))


810


In [27]:
start_date = ['2022-08-22', '2022-08-23', '2022-08-23', '2022-08-23', '2022-08-24', '2022-08-24', '2022-08-24', '2022-08-24',
              '2022-08-24', '2022-08-25', '2022-08-25', '2022-08-26', '2022-08-26', '2022-08-27', '2022-08-27', '2022-08-27', '2022-08-27', ]
daily_action = []

day_span = 14
for day in range(day_span):
    records = []
    for user_id in participants.keys():
        user_action = {'trigger banner': 0, 'save settings': 0}
        for action_id in action_log[user_id].keys():
            action_date = date_transform(
                action_log[user_id][action_id]['timestamp'])[:10]
            # print(day_counter(start_date[participants[user_id]], action_date))
            if (day_counter(start_date[participants[user_id]-1], action_date) == day):
                if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'trigger banner':
                    user_action['trigger banner'] = 1
                if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'save settings':
                    user_action['save settings'] = 1
        records.append(user_action)
    daily_action.append(records)


# print(daily_action)


In [28]:
start_date = ['2022-08-22', '2022-08-23', '2022-08-23', '2022-08-23', '2022-08-24', '2022-08-24', '2022-08-24', '2022-08-24',
              '2022-08-24', '2022-08-25', '2022-08-25', '2022-08-26', '2022-08-26', '2022-08-27', '2022-08-27', '2022-08-27', '2022-08-27', ]
daily_dairy = []

day_span = 14
for day in range(day_span):
    records = []
    for user_id in participants.keys():
        user_action = {'send diary': 0}
        if user_id in diary_note.keys():
            for diary_id in diary_note[user_id].keys():
                diary_date = date_transform(diary_note[user_id][diary_id]['timestamp'])[:10]
                if (day_counter(start_date[participants[user_id]-1], diary_date) == day):
                    user_action['send diary'] = 1
        records.append(user_action)
    daily_dairy.append(records)

# print(daily_dairy)


In [29]:
outputs = []

day_span = 14
for day in range(day_span):
    entry = []
    for user_id in range(17):
        i = ''
        if daily_action[day][user_id]['trigger banner']:
            i = i + 'a'
        if daily_action[day][user_id]['save settings']:
            i = i + 'b'
        if daily_dairy[day][user_id]['send diary']:
            i = i + 'c'
        entry.append(i)
    outputs.append(entry)

print(outputs)

[['ab', 'ab', 'ab', 'ab', 'ab', 'abc', 'abc', 'ac', 'abc', 'abc', 'abc', 'ab', 'abc', 'ab', 'ab', 'ab', 'ab'], ['a', '', '', 'a', 'ac', '', 'abc', 'ac', 'abc', 'abc', 'ac', 'a', 'abc', 'abc', 'ab', '', 'ac'], ['a', '', 'ac', '', 'ac', '', 'abc', 'ac', 'ab', '', 'ac', 'a', '', 'ab', 'abc', 'abc', ''], ['a', 'a', 'a', '', 'ac', '', 'abc', 'abc', 'abc', 'abc', 'ac', 'ab', 'abc', 'a', 'abc', 'ab', ''], ['', '', 'a', '', 'ac', '', 'ac', 'ab', '', '', '', 'a', '', 'abc', 'ab', 'abc', ''], ['', '', 'a', '', '', '', 'a', 'a', 'a', 'abc', '', 'a', '', 'a', 'abc', 'a', ''], ['', 'a', 'abc', '', 'ac', 'a', 'a', 'ac', 'a', '', 'ac', 'a', 'ab', 'a', '', 'a', ''], ['', 'abc', 'a', '', 'abc', '', 'abc', 'ab', 'ab', 'abc', 'ac', 'abc', '', 'abc', 'abc', 'a', ''], ['abc', 'a', 'ac', '', '', 'a', 'ac', 'ac', '', 'ab', '', 'a', 'a', 'a', 'a', 'a', ''], ['', 'a', 'ab', '', 'ac', 'a', 'ab', 'ac', '', 'a', '', 'a', 'c', 'ab', 'abc', 'abc', ''], ['', 'a', 'ab', '', '', '', 'ab', 'ac', '', 'a', '', 'a', 'a', 

In [30]:
df = pd.DataFrame({'day{}'.format(i+1): x for i,x in enumerate(outputs)})
df.to_excel('./data/export/user_daily_action_diary' +
                   date + '.xlsx', index=False)