In [33]:
import json
import pandas as pd
import datetime
from pytz import timezone


In [34]:
path = './data/raw/dark-pita-default-rtdb-export_20220831.json'


In [35]:
with open(path, 'r') as json_file:
    data = json.load(json_file)['user-data']


### Data Preprocessing


In [36]:
# Remove researchers content
researcher_ids = ['100689073241975873280',
                  '102686626036253115345',
                  '105980611853356916531',
                  '107417400614572912348',
                  '109939652125735554083',
                  '112229224879924656055',
                  '114069654270801652660']


In [37]:
action_log = data['user-action']
diary_note = data['user-diary']
print(len(action_log), len(diary_note))

for researcher_id in researcher_ids:
    if researcher_id in action_log:
        del action_log[researcher_id]

    if researcher_id in diary_note:
        del diary_note[researcher_id]
print(len(action_log), len(diary_note))


24 17
17 15


In [38]:
statistics = {}
for user_id in action_log.keys():
    statistics.update({user_id: {'id': user_id}})

print(statistics)


{'00060000DEDE392A': {'id': '00060000DEDE392A'}, '100935129088843356602': {'id': '100935129088843356602'}, '100937764714847904352': {'id': '100937764714847904352'}, '103564003925636425038': {'id': '103564003925636425038'}, '104694983709365197975': {'id': '104694983709365197975'}, '105012360635537965053': {'id': '105012360635537965053'}, '107279156336793537009': {'id': '107279156336793537009'}, '107360463347436073559': {'id': '107360463347436073559'}, '107711334134980088812': {'id': '107711334134980088812'}, '107772607988378311880': {'id': '107772607988378311880'}, '107858562133949273618': {'id': '107858562133949273618'}, '108565623227009900784': {'id': '108565623227009900784'}, '109160689719008133998': {'id': '109160689719008133998'}, '110353078720257828530': {'id': '110353078720257828530'}, '115767467286550120166': {'id': '115767467286550120166'}, '116084227873580561392': {'id': '116084227873580561392'}, '117105890044607595797': {'id': '117105890044607595797'}}


### Diary note Processing


#### How many times our participants send diary notes?


In [39]:
for user_id in diary_note.keys():
    counter = len(diary_note[user_id])
    statistics[user_id]['send_diary_note'] = counter

for user_id in action_log.keys():
    if 'send_diary_note' not in statistics[user_id]:
        statistics[user_id]['send_diary_note'] = 0

print(statistics['107711334134980088812'])


{'id': '107711334134980088812', 'send_diary_note': 2}


#### Export diary notes for each participant


In [40]:
def date_transform(timestamp):
    data = int(timestamp/1000)
    data = datetime.datetime.utcfromtimestamp(data)
    utc_tz = timezone('UTC')
    data = data.replace(tzinfo=utc_tz)
    datas = data.astimezone(timezone('US/Eastern'))
    return datas.strftime("%Y-%m-%d %H:%M:%S")


print(date_transform(1661806859798))


2022-08-29 17:00:59


In [41]:
header = {'user_id': [], 'date': [], 'question_one': [],
          'question_two': [], 'question_three': [], 'screenshot': [], 'url': []}
df_diary = pd.DataFrame(header)


In [42]:
for user_id in diary_note.keys():
    for diary_id in diary_note[user_id].keys():
        diary = diary_note[user_id][diary_id]
        data = {'user_id': user_id, 'date': date_transform(diary['timestamp']), 'question_one': diary['one'],
                'question_two': diary['two'], 'question_three': diary['three'], 'screenshot': diary['screenshot'], 'url': diary['url']}
        df_diary.loc[len(df_diary)] = data

df_diary.to_excel('./data/export/user_diary_note_20220831.xlsx')


### Action Log Processing


#### How many actions each participant creates

In [43]:
for user_id in action_log.keys():
    counter = 0
    for action_id in action_log[user_id].keys():
        counter = counter + 1

    statistics[user_id]['send_action'] = counter

print(statistics['110353078720257828530'])


{'id': '110353078720257828530', 'send_diary_note': 9, 'send_action': 1515}


#### How many times our participants enter sites containing our sampled dark pattern instances (i.e., how many times our probe is triggered)?


In [44]:
# When the site contains an instance, the banner would be triggered.

for user_id in action_log.keys():
    counter = 0
    for action_id in action_log[user_id].keys():
        if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'trigger banner':
            counter = counter + 1

    statistics[user_id]['trigger_probe'] = counter

print(statistics['107711334134980088812'])


{'id': '107711334134980088812', 'send_diary_note': 2, 'send_action': 6547, 'trigger_probe': 175}


#### How many times our participants change dark patterns (i.e., how many times they select a UI alternative and save changes)?


In [50]:
# When users change a dark pattern, they have to choose a UI alternative and save settings.
for user_id in action_log.keys():
    counter = 0
    save_settings = []
    for action_id in action_log[user_id].keys():
        if 'description' in action_log[user_id][action_id] and action_log[user_id][action_id]['description'] == 'save settings':
            counter = counter + 1
            save_settings.append(action_log[user_id][action_id])
    statistics[user_id]['change_dark_pattern'] = counter

    # Export individual user actions for all changes of UI alternatives
    data = {'timestamp': []}
    for save_setting in save_settings:        
        data['timestamp'] = date_transform(save_setting['timestamp'])
        for key in save_setting['action']:
            if key in data:
                data[key].append(save_setting['action'][key])
            else:
                data[key] = [save_setting['action'][key]]

    df = pd.DataFrame(data)
    df.to_excel('./data/export/individual_user_action/' + user_id + '_action_20220831.xlsx')

print(statistics['107711334134980088812'])


{'id': '107711334134980088812', 'send_diary_note': 2, 'send_action': 6547, 'trigger_probe': 175, 'change_dark_pattern': 12}


#### How many times our participants experience each UI alternative?


In [30]:
ui_alternatives = ['amazon_buy_now_hide', 'amazon_buy_now_fairness', 'amazon_buy_now_friction', 'amazon_disguised_ads_hide', 'amazon_disguised_ads_friction', 'amazon_disguised_ads_disclosure', 'amazon_disguised_ads_counterfact', 'amazon_discount_price_hide', 'amazon_discount_price_disclosure', 'amazon_discount_price_reflection', 'amazon_discount_price_action', 'amazon_home_card_focus', 'amazon_home_card_reflection', 'amazon_home_card_progress', 'youtube_recommended_video_focus',
                   'youtube_recommended_video_preview', 'youtube_recommended_video_reflection', 'youtube_video_dislike_fairness', 'youtube_sidebar_video_focus', 'youtube_sidebar_video_preview', 'youtube_sidebar_video_reflection', 'twitter_whats_happening_hide', 'twitter_promoted_highlight', 'twitter_promoted_friction', 'facebook_reels_hide', 'facebook_reels_counterfact', 'facebook_reels_friction', 'facebook_suggested_for_you_hide', 'facebook_suggested_for_you_highlight', 'netflix_timeline_reflection', 'netflix_hugepreview_disable']
print(len(ui_alternatives))


31


In [31]:
# When users change a dark pattern, they have to choose a UI alternative and save settings.

for ui_alternative in ui_alternatives:
    for user_id in action_log.keys():
        counter = 0
        for action_id in action_log[user_id].keys():
            if 'action' in action_log[user_id][action_id] and action_log[user_id][action_id]['action'] == 1 and 'description' in action_log[user_id][action_id] and ui_alternative in action_log[user_id][action_id]['description']:
                counter = counter + 1

        statistics[user_id][ui_alternative] = counter

print(statistics['107711334134980088812'])


{'id': '107711334134980088812', 'send_diary_note': 2, 'send_action': 6547, 'trigger_probe': 175, 'change_dark_pattern': 12, 'amazon_buy_now_hide': 0, 'amazon_buy_now_fairness': 0, 'amazon_buy_now_friction': 0, 'amazon_disguised_ads_hide': 2, 'amazon_disguised_ads_friction': 10, 'amazon_disguised_ads_disclosure': 2, 'amazon_disguised_ads_counterfact': 2, 'amazon_discount_price_hide': 0, 'amazon_discount_price_disclosure': 0, 'amazon_discount_price_reflection': 0, 'amazon_discount_price_action': 0, 'amazon_home_card_focus': 1, 'amazon_home_card_reflection': 2, 'amazon_home_card_progress': 1, 'youtube_recommended_video_focus': 12, 'youtube_recommended_video_preview': 2216, 'youtube_recommended_video_reflection': 4, 'youtube_video_dislike_fairness': 0, 'youtube_sidebar_video_focus': 2024, 'youtube_sidebar_video_preview': 6, 'youtube_sidebar_video_reflection': 3, 'twitter_whats_happening_hide': 42, 'twitter_promoted_highlight': 2, 'twitter_promoted_friction': 66, 'facebook_reels_hide': 0, '

#### Export Action Statistics


In [32]:
data = {}
for user_id in statistics:
    for key in statistics[user_id]:
        if key in data:
            data[key].append(statistics[user_id][key])
        else:
            data[key] = [statistics[user_id][key]]

df_action = pd.DataFrame(data)
df_action.to_excel('./data/export/user_action_statistics_20220831.xlsx')
