In [1]:
import sys
sys.path.append('../implementation/')
import ast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.special as sp
from tqdm import tqdm
import time
from healey_adaboost_naive_bayes import AdaBoostNB
import warnings
from util import flatten_list
warnings.filterwarnings('ignore')

In [2]:
# Loading the underlying data and user interaction data
data_path = '../data/vast_2011_challenge/vast_data_sample_reduced.pkl'
bookmarks_ui_data_path = '../data/vast_2011_challenge/bookmark_interactions_clean.pkl'
hovers_ui_data_path = '../data/vast_2011_challenge/hover_interactions.pkl'
hovers = False
if hovers:
    output_file_path = '../output/vast/vast_11_hovers_ada_nb.pkl'
    interaction_data = pd.read_pickle(hovers_ui_data_path)
else:
    output_file_path = '../output/vast/vast_11_ada_nb.pkl'
    interaction_data = pd.read_pickle(bookmarks_ui_data_path)
    interaction_data = interaction_data[interaction_data['experimental_group'] == 'control']
    interaction_data = interaction_data.reset_index(drop=True)

underlying_data = pd.read_pickle(data_path)

ks = [1, 5, 10, 20, 50, 100]
d_attributes = ['topic']
c_attributes = [['latitude', 'longitude']]

In [3]:
nb_results = pd.DataFrame()

for participant_index, row in interaction_data.iterrows():
    print(f'Processing user {row.user} task {row.experimental_group}')
    results = {'participant_id': row.user, 'task': row.experimental_group}
    model = AdaBoostNB(underlying_data, c_attributes, d_attributes)
    predicted = pd.DataFrame()
    rank_predicted = []
    for i in tqdm(range(len(interaction_data.iloc[participant_index].interaction_session))):
        interaction = interaction_data.iloc[participant_index].interaction_session[i]
        model.update(interaction)

        if i < len(interaction_data.iloc[participant_index].interaction_session) - 1:
            probability_of_next_point = model.predict()
            next_point = interaction_data.iloc[participant_index].interaction_session[i+1]
            predicted_next_dict = {}
            for k in ks:
                predicted_next_dict[k] = (next_point in probability_of_next_point.nlargest(k).index.values)
            predicted = predicted.append(predicted_next_dict, ignore_index=True)
            sorted_prob = probability_of_next_point.sort_values(ascending=False)
            rank, = np.where(sorted_prob.index.values == next_point)
            rank_predicted.append(rank[0] + 1)
    ncp = predicted.sum()/len(predicted)
    results['rank'] = rank_predicted 
    
    for col in ncp.index:
        results[f'ncp-{col}'] = ncp[col]
        
    nb_results = nb_results.append(results, ignore_index=True)
    
nb_results.to_pickle(output_file_path)

Processing user ottNn7b8Wt1EL-I0bfFbOA task control


100%|██████████| 45/45 [00:08<00:00,  5.17it/s]


Processing user Tn8m4yLs6E0GDO6sggwYTw task control


100%|██████████| 147/147 [00:26<00:00,  5.61it/s]


Processing user SW8GwZ1tt4Z5DiK3J4rw2g task control


100%|██████████| 145/145 [00:31<00:00,  4.64it/s]


Processing user POzOiTeXMSdTEbmSmI4wpg task control


100%|██████████| 52/52 [00:11<00:00,  4.38it/s]


Processing user n-Oq8YP_DhsByGZyxuGroQ task control


100%|██████████| 20/20 [00:04<00:00,  4.74it/s]


Processing user UNaSIzwWzwgmGLZsEyvgsQ task control


100%|██████████| 45/45 [00:10<00:00,  4.22it/s]


Processing user 0R7Oc16T5OzW-_YNs9rkkg task control


100%|██████████| 85/85 [00:19<00:00,  4.30it/s]


Processing user k0XtDgVMlnH0N1ivlez7hw task control


100%|██████████| 22/22 [00:04<00:00,  4.49it/s]


Processing user bifsEtKMXIJ88523WNgtnw task control


100%|██████████| 15/15 [00:03<00:00,  4.27it/s]


Processing user q-1VAayPIoWBjTCQi_DoVQ task control


100%|██████████| 56/56 [00:12<00:00,  4.42it/s]


Processing user 0FomFSRLtGmolWmt6v-BzQ task control


100%|██████████| 23/23 [00:04<00:00,  4.67it/s]


Processing user s_Iy02wt_jLcFG3_vAw1ig task control


100%|██████████| 76/76 [00:15<00:00,  5.00it/s]


Processing user Rr-Am9XWLG_j9RzUhfpI6Q task control


100%|██████████| 41/41 [00:08<00:00,  4.82it/s]


Processing user diIEYYtFfdAwwLsRtdW0wg task control


100%|██████████| 62/62 [00:12<00:00,  5.02it/s]


Processing user SG09OWw6EdZOVvZlTjM-FQ task control


100%|██████████| 55/55 [00:11<00:00,  4.81it/s]


Processing user 0aJkmv1hnmWrVGAwLE6RIg task control


100%|██████████| 75/75 [00:18<00:00,  3.97it/s]


Processing user wMWdKtkmZiTUIW_s_XSiKw task control


100%|██████████| 54/54 [00:13<00:00,  3.99it/s]


Processing user 3RJkTqsdzektLw8VmK3d1A task control


100%|██████████| 60/60 [00:12<00:00,  4.98it/s]


Processing user K4oiwhn0PHkER7IOBzp3tA task control


100%|██████████| 97/97 [00:19<00:00,  5.01it/s]


Processing user 9cgrSAf3_zRo509cVXexBw task control


100%|██████████| 69/69 [00:11<00:00,  6.02it/s]


Processing user 705Te_LAeladSVRFI5Sp9w task control


100%|██████████| 55/55 [00:09<00:00,  5.89it/s]


Processing user NRyzJB-AeSx5AOWhloLZ8g task control


100%|██████████| 33/33 [00:05<00:00,  5.85it/s]


Processing user anfa5Zq4bjh-81ZE-RYzzA task control


100%|██████████| 23/23 [00:04<00:00,  5.71it/s]


Processing user N0VtS5g31siXqoUhQnAAPQ task control


100%|██████████| 153/153 [00:26<00:00,  5.75it/s]


Processing user -qgyUblFjzGauFx_Qt1xxQ task control


100%|██████████| 82/82 [00:14<00:00,  5.58it/s]


Processing user rQCaKZQ2F25eBST_bWyKFA task control


100%|██████████| 13/13 [00:02<00:00,  5.92it/s]


Processing user t3dx1A8gxcC7T0GkYvk4rQ task control


100%|██████████| 66/66 [00:10<00:00,  6.10it/s]


Processing user pM3lshrmqr3py5jVQkCGog task control


100%|██████████| 149/149 [00:24<00:00,  6.02it/s]


Processing user PtJP0V71F8Ud9kyU_molEw task control


100%|██████████| 70/70 [00:10<00:00,  6.40it/s]


Processing user fgTmANyUtAen0exuGrzVzQ task control


100%|██████████| 26/26 [00:03<00:00,  6.50it/s]


Processing user nHFRA1QAG9pg93oF4T1L2Q task control


100%|██████████| 34/34 [00:05<00:00,  6.43it/s]


Processing user Q00_bx5Pq4Yn1_3XcqEp2g task control


100%|██████████| 72/72 [00:11<00:00,  6.11it/s]


Processing user Y3r-Fnt1-qWQS7_mmHI5jw task control


100%|██████████| 98/98 [00:16<00:00,  6.08it/s]


Processing user 5ECfx4KIAg6Yzyjhr4WnWg task control


100%|██████████| 81/81 [00:14<00:00,  5.65it/s]


Processing user uq51tok4glK6YATXynq5EQ task control


100%|██████████| 48/48 [00:08<00:00,  5.91it/s]


Processing user ZIRBQFPgBo9Lt_kHHC3MiA task control


100%|██████████| 74/74 [00:11<00:00,  6.64it/s]


Processing user T2jkw-sDvoBLWJWDJ7zJTw task control


100%|██████████| 79/79 [00:11<00:00,  6.86it/s]


Processing user 9gpKpL2nWUcfz8mxAh0Xzw task control


100%|██████████| 93/93 [00:14<00:00,  6.47it/s]


Processing user 4ffSoA9uBjbb6IiECmA6bA task control


100%|██████████| 77/77 [00:11<00:00,  6.85it/s]


Processing user zuHD08T_ba4T2Gjp96aXhA task control


100%|██████████| 33/33 [00:04<00:00,  7.21it/s]


Processing user Nq4mKs5hJ8Dc063VT1f3qg task control


100%|██████████| 110/110 [00:15<00:00,  7.19it/s]


Processing user MmnvXyPd-cyyKRhtUwKqhQ task control


100%|██████████| 88/88 [00:11<00:00,  7.50it/s]


Processing user JjcI7g84OpS3dppwczTegw task control


100%|██████████| 156/156 [00:21<00:00,  7.35it/s]


Processing user 3BgluksNw7LMIzSlCIQ03A task control


100%|██████████| 51/51 [00:06<00:00,  7.51it/s]


Processing user w3wvq5o6qqajfpLRsKR_-w task control


100%|██████████| 111/111 [00:14<00:00,  7.56it/s]


Processing user sy8HjGfNzVj1cjblJSY22Q task control


100%|██████████| 93/93 [00:12<00:00,  7.47it/s]


Processing user y8og7vrDFCRp4730ewy5og task control


100%|██████████| 41/41 [00:05<00:00,  7.83it/s]


Processing user pSEODcY6AnYvn02J9GGdcg task control


100%|██████████| 75/75 [00:09<00:00,  7.54it/s]


Processing user U7Pma3fUpC72WNzsPyaY7Q task control


100%|██████████| 41/41 [00:05<00:00,  7.63it/s]


Processing user HrEpGdg2fgOR7zYhlElElw task control


100%|██████████| 98/98 [00:13<00:00,  7.32it/s]


Processing user ykgZOJHZlLka4fBLn89ZGg task control


100%|██████████| 86/86 [00:11<00:00,  7.63it/s]


Processing user QVknIlaxajN5eM1CrZB-JA task control


100%|██████████| 42/42 [00:05<00:00,  7.61it/s]


Processing user aDbzRAWuqzHLqD8l7uEwpg task control


100%|██████████| 78/78 [00:10<00:00,  7.55it/s]


Processing user Xi8Tbn4gbexPQpZa5VGkUw task control


100%|██████████| 63/63 [00:08<00:00,  7.59it/s]


Processing user riPuXirdQKONUOzAK2ZVPQ task control


100%|██████████| 71/71 [00:09<00:00,  7.58it/s]


Processing user 6FdkeNiQYrM_ol2yVmmxYA task control


100%|██████████| 89/89 [00:11<00:00,  7.63it/s]


Processing user GXaCM667o1nnMLifJ7MMMw task control


100%|██████████| 71/71 [00:09<00:00,  7.45it/s]


Processing user OsR8tpHbiZDTgN_Fni1xvA task control


100%|██████████| 55/55 [00:07<00:00,  7.57it/s]


Processing user wSZ7_Au-lq0AvLyYHckErA task control


100%|██████████| 97/97 [00:12<00:00,  7.68it/s]


Processing user FyySgtJHe3bgGN1nAvmRlA task control


100%|██████████| 68/68 [00:08<00:00,  7.59it/s]


Processing user sIUlHrVUba449u2GUMASHA task control


100%|██████████| 81/81 [00:10<00:00,  7.65it/s]


Processing user LMM61OcS7fIJKjpp8PLRwA task control


100%|██████████| 28/28 [00:03<00:00,  7.58it/s]


Processing user twF3ibAy0Lt1-rqJShli2w task control


100%|██████████| 139/139 [00:18<00:00,  7.51it/s]


Processing user x2YPwqGiPma1PtdaR0pcsA task control


100%|██████████| 34/34 [00:04<00:00,  7.60it/s]


Processing user F0Wnn8mBVBKSysvLxzhJew task control


100%|██████████| 74/74 [00:10<00:00,  7.14it/s]


Processing user FZl65iI0EoU-NFlIzal82Q task control


100%|██████████| 58/58 [00:07<00:00,  7.39it/s]


Processing user tYCAwT3T7WtrpGuuLWyeyw task control


100%|██████████| 44/44 [00:05<00:00,  7.56it/s]


Processing user vORWFrZw9Qf5YJnFImjhMg task control


100%|██████████| 53/53 [00:07<00:00,  7.48it/s]


Processing user 5bKBzTuSK8-yD97HTS1dZw task control


100%|██████████| 55/55 [00:07<00:00,  7.57it/s]


Processing user TNh4yciycOwATX7OZcMqAw task control


100%|██████████| 39/39 [00:05<00:00,  7.52it/s]


Processing user xFutzVnyUPJa1FdcHDBuOA task control


100%|██████████| 49/49 [00:06<00:00,  7.48it/s]


Processing user 5GV8_vl7CrnU1n_e3BYAyA task control


100%|██████████| 59/59 [00:07<00:00,  7.52it/s]


Processing user RZKgmh54IrTo2bVMpg0L9w task control


100%|██████████| 38/38 [00:04<00:00,  7.72it/s]


Processing user Z2yONYSDzxqhG_m_AZAnog task control


100%|██████████| 117/117 [00:14<00:00,  8.08it/s]
