In [1]:
import json
import os
import pandas as pd
import numpy as np
import math
from IPython.display import display

In [3]:
def sum_timers(timers): 
    sums = {}
    if type(timers) is list: 
        for i in timers:
            if i['type'] not in sums:
                sums[i['type']] = i['time']
            else:
                sums[i['type']] = sums[i['type']] + i['time'] # Return as list instead of sum,
            # TODO: Convert from ms to s?
    return sums

In [4]:
def list_timers(timers): 
    lists = {
        'zoom': [],
        'drag': [],
        'highlight': [],
        'slower': [],
        'faster': [],
        'slider': []
    }
    if type(timers) is list: 
        for i in timers:
            lists[i['type']].append(i['time'])
            # TODO: Convert from ms to s?
    return lists

In [5]:
def parse_result(df, group, result): 
    # group, result_id, question_id, answer, confidence, time, interaaction_timer, type_interaction, correct, diff
    data = []
    
    question = ''
    answer = 0
    time = 0
    confidence = 0
    
    zoom_timers = []
    drag_timers = []
    highlight_timers = []
    slower_timers = []
    faster_timers = []
    slider_timers = []
    
    zoom_interactions = 0
    drag_interactions = 0
    highlight_interactions = 0
    slower_interactions = 0
    faster_interactions = 0
    slider_interactions = 0
    
    for index, row in df.iterrows():
        survey_question = row['question']
     
        if 'preferences' in survey_question: #Preferences
#             print('preferences')
            continue
        elif 'comments' in survey_question: # Comments
#             print('comments')
            continue
        elif 'confidence' not in survey_question[0]: # Question Answer   
            # Separate for NL and M
            question = row['question'][0]
            answer = row['question'][1]
            time = row['time']
            
            interactions = row['interactions']
            
            timers = row['timers']
            lists = list_timers(row['timers'])
        
        if (index+1)%2 == 0:
            if 'preferences' not in survey_question and 'comments' not in survey_question:
                confidence = row['question'][1]

                # Arrays of timers for each interaction
                zoom_timers = lists['zoom']
                drag_timers = lists['drag']
                highlight_timers = lists['highlight']
                slower_timers = lists['slower']
                faster_timers = lists['faster']
                slider_timers = lists['slider']
                
                # Number of total interactions per interaction type
                if type(interactions) is dict: 
                    zoom_interactions = interactions.get('zooms')
                    drag_interactions = interactions.get('drags')
                    highlight_interactions = interactions.get('highlights')
                    slower_interactions = interactions.get('slower')
                    faster_interactions = interactions.get('faster')
                    slider_interactions = interactions.get('slider')
                
                data.append([group, result, question, answer, time, confidence, 
                             zoom_timers, drag_timers, highlight_timers, slower_timers, faster_timers, slider_timers,
                             zoom_interactions, drag_interactions, highlight_interactions, slower_interactions, faster_interactions, slider_interactions])

                question = ''
                answer = 0
                time = 0
                confidence = 0
                
                zoom_timers = []
                drag_timers = []
                highlight_timers = []
                slower_timers = []
                faster_timers = []
                slider_timers = []

                zoom_interactions = 0
                drag_interactions = 0
                highlight_interactions = 0
                slower_interactions = 0
                faster_interactions = 0
                slider_interactions = 0
            
    results = pd.DataFrame(data, columns = ['group', 'id', 'question', 'answer', 'time', 'confidence', 'zoom_timers', 'drag_timers', 'highlight_timers', 'slower_timers', 'faster_timers', 'slider_timers', 'zoom_interactions', 'drag_interactions', 'highlight_interactions', 'slower_interactions', 'faster_interactions', 'slider_interactions'])
    return results

In [28]:
# Go through results dir and load all files in data frame
def read_files(): 
    directory = r'results'
    survey_count = 0
    survey_results = pd.DataFrame([], columns = ['group', 'id', 'question', 'answer', 'time', 'confidence', 'zoom_timers', 'drag_timers', 'highlight_timers', 'slower_timers', 'faster_timers', 'slider_timers', 'zoom_interactions', 'drag_interactions', 'highlight_interactions', 'slower_interactions', 'faster_interactions', 'slider_interactions'])
    for filename in os.listdir(directory):
    
        if filename.endswith(".json"):
            with open(directory + '/' + filename, encoding='utf-8', mode='r') as current_file:
                
                text = os.path.splitext(filename)[0]
                # parse id and group from file name
                result_id = text.split('_')[1]
                group = text.split('_')[2]
                
                df = pd.read_json(current_file)
                
                data = parse_result(df, group, result_id) # parse single survey result

                survey_results = survey_results.append(data, ignore_index = True) # append to results
                survey_count+=1
        else:
            print('File not JSON', filename)
            continue
    survey_results = survey_results.fillna(0)
    survey_results = survey_results.astype({
        'zoom_interactions': int,
        'drag_interactions': int,
        'highlight_interactions': int,
        'slower_interactions': int,
        'faster_interactions': int,
        'slider_interactions': int,
        'time': int,
    })
#     survey_results['drag_interactions'].fillna(0)
#     survey_results['highlight_interactions'].fillna(0)
#     survey_results['slower_interactions'].fillna(0)
#     survey_results['faster_interactions'].fillna(0)
#     survey_results['slider_interactions'].fillna(0)
    
    print(f'Parsed {survey_count} surveys.')
    display(survey_results)
    return survey_results

In [17]:
def save_csv(results): 
    results.to_csv('results.csv')
    print('CSV generated \'results.csv\'')

In [30]:
survey_data = read_files()
save_csv(survey_data)

Parsed 76 surveys.


Unnamed: 0,group,id,question,answer,time,confidence,zoom_timers,drag_timers,highlight_timers,slower_timers,faster_timers,slider_timers,zoom_interactions,drag_interactions,highlight_interactions,slower_interactions,faster_interactions,slider_interactions
0,B,02f10e19-83e5-48a3-be35-5258e2e89c1a,m_t1_clique_si,3,754,1,[100],[],"[3400, 429, 8300, 428, 233, 214, 1599, 57199, ...",[],[],[],16,0,162,0,0,0
1,B,02f10e19-83e5-48a3-be35-5258e2e89c1a,nl_t3_single_si,1,86,5,"[156, 249, 310, 327, 1317, 177, 248, 243, 164,...","[4407, 3694, 3410, 3901, 2870, 701, 3894, 4036]",[],[],[],[],26,8,0,0,0,0
2,B,02f10e19-83e5-48a3-be35-5258e2e89c1a,m_t1_single_tl,3,62,5,"[156, 151, 203, 318, 162, 159, 158, 277, 1295,...",[],"[315, 381, 516, 279, 915, 551, 284, 277, 367, ...",[],[],"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",10,0,23,0,0,10
3,B,02f10e19-83e5-48a3-be35-5258e2e89c1a,m_t3_clique_jp,5,605,5,[],[],"[1947, 263, 396, 9420, 464, 364, 25930, 304, 8...",[],[],[],0,0,100,0,0,0
4,B,02f10e19-83e5-48a3-be35-5258e2e89c1a,m_t1_single_si,2,40,5,[],[],"[1249, 360, 783, 332, 415, 518, 3727, 481, 114...",[],[],[],0,0,10,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4022,B,ffadb36b-5051-4684-b9cf-ed767a599434,m_t2_clique_si,3,46,1,[],[],[325],[],[],[],0,0,1,0,0,0
4023,B,ffadb36b-5051-4684-b9cf-ed767a599434,m_t2_clique_an,3,202,1,[],[],[325],[],[],[],0,0,1,0,0,0
4024,B,ffadb36b-5051-4684-b9cf-ed767a599434,m_t1_single_jp,3,20,5,[],[],"[1241, 955, 359, 1660, 780, 253, 4972]",[],[],[],0,0,7,0,0,0
4025,B,ffadb36b-5051-4684-b9cf-ed767a599434,nl_t3_clique_si,4,126,1,"[76, 782, 2964]","[2488, 1398, 3334]",[],[],[],[],3,3,0,0,0,0


CSV generated 'results.csv'
