In [1]:
import json
import os
import pandas as pd
import numpy as np
import math
from IPython.display import display

In [2]:
correct_answers = {
    "nl_t2_clique_tl" : 3,
    "m_t2_clique_jp" : 3,
    "m_t2_single_si" : 2,
    "m_t1_single_si" : 2,
    "nl_t1_clique_jp" : 4,
    "nl_t2_single_tl" : 4,
    "m_t1_clique_si" : 3,
    "nl_t1_clique_tl" : 1,
    "nl_t3_clique_an" : 4,
    "m_t1_clique_tl" : 1,
    "m_t1_clique_an" : 2,
    "nl_t3_clique_jp" : 3,
    "m_t1_single_jp" : 3,
    "nl_t1_single_jp" : 2,
    "m_t2_single_tl" : 4,
    "nl_t2_single_an" : 4,
    "nl_t1_single_si" : 3,
    "m_t3_single_si" : 3,
    "m_t3_single_an" : 4,
    "nl_t3_clique_si" : 5,
    "m_t2_clique_an" : 4,
    "nl_t1_single_tl" : 3,
    "m_t3_single_tl" : 4,
    "m_t2_clique_si" : 6,
    "m_t2_single_jp" : 4,
    "nl_t3_single_jp" : 1,
    "nl_t1_clique_si" : 3,
    "m_t1_single_an" : 2,
    "nl_t2_single_si" : 3,
    "nl_t3_single_si" : 1,
    "m_t3_clique_si" : 5,
    "m_t1_single_tl" : 3,
    "m_t3_clique_tl" : 1,
    "nl_t2_single_jp" : 4,
    "nl_t3_clique_tl" : 1,
    "nl_t3_single_tl" : 2,
    "m_t2_single_an" : 2,
    "m_t3_clique_an" : 4,
    "nl_t2_clique_jp" : 3,
    "m_t3_clique_jp" : 3,
    "nl_t1_clique_an" : 2,
    "nl_t3_single_an" : 2,
    "nl_t1_single_an" : 4,
    "nl_t2_clique_si" : 6,
    "nl_t2_clique_an" : 4,
    "m_t3_single_jp" : 1,
    "m_t2_clique_tl" : 3,
    "m_t1_clique_jp" : 4,
    "control1": "red",
    "control2": 4,
    "control3": "LT02"
}

In [3]:
def is_correct(answer, question):
    return answer == correct_answers[question]

In [4]:
def sum_timers(timers): 
    sums = {}
    if type(timers) is list: 
        for i in timers:
            if i['type'] not in sums:
                sums[i['type']] = i['time']
            else:
                sums[i['type']] = sums[i['type']] + i['time'] # Return as list instead of sum,
            # TODO: Convert from ms to s?
    return sums

In [5]:
def list_timers(timers): 
    lists = {
        'zoom': [],
        'drag': [],
        'highlight': [],
        'slower': [],
        'faster': [],
        'slider': []
    }
    if type(timers) is list: 
        for i in timers:
            lists[i['type']].append(i['time'])
            # TODO: Convert from ms to s?
    return lists

In [6]:
def parse_result(df, group, result): 
    # group, result_id, question_id, answer, confidence, time, interaaction_timer, type_interaction, correct, diff
    data = []
    
    question = ''
    answer = 0
    time = 0
    confidence = 0
    correct = False
    
    zoom_timers = []
    drag_timers = []
    highlight_timers = []
    slower_timers = []
    faster_timers = []
    slider_timers = []
    
    zoom_interactions = 0
    drag_interactions = 0
    highlight_interactions = 0
    slower_interactions = 0
    faster_interactions = 0
    slider_interactions = 0
    
    for index, row in df.iterrows():
        survey_question = row['question']
     
        if 'preferences' in survey_question: #Preferences
#             print('preferences')
            continue
        elif 'comments' in survey_question: # Comments
#             print('comments')
            continue
        elif 'confidence' not in survey_question[0]: # Question Answer   
            # Separate for NL and M
            question = row['question'][0]
            answer = row['question'][1]
            time = row['time']
            
            interactions = row['interactions']
            
            timers = row['timers']
            lists = list_timers(row['timers'])
        
        if (index+1)%2 == 0:
            if 'preferences' not in survey_question and 'comments' not in survey_question:
                confidence = row['question'][1]
                correct = is_correct(answer, question)
                
                # Arrays of timers for each interaction
                zoom_timers = lists['zoom']
                drag_timers = lists['drag']
                highlight_timers = lists['highlight']
                slower_timers = lists['slower']
                faster_timers = lists['faster']
                slider_timers = lists['slider']
                
                # Number of total interactions per interaction type
                if type(interactions) is dict: 
                    zoom_interactions = interactions.get('zooms')
                    drag_interactions = interactions.get('drags')
                    highlight_interactions = interactions.get('highlights')
                    slower_interactions = interactions.get('slower')
                    faster_interactions = interactions.get('faster')
                    slider_interactions = interactions.get('slider')
                
                data.append([group, result, question, answer, time, confidence, correct_answers.get(question), correct,
                             zoom_timers, drag_timers, highlight_timers, slower_timers, faster_timers, slider_timers,
                             zoom_interactions, drag_interactions, highlight_interactions, slower_interactions, faster_interactions, slider_interactions])

                question = ''
                answer = 0
                time = 0
                confidence = 0
                correct = False
                
                zoom_timers = []
                drag_timers = []
                highlight_timers = []
                slower_timers = []
                faster_timers = []
                slider_timers = []

                zoom_interactions = 0
                drag_interactions = 0
                highlight_interactions = 0
                slower_interactions = 0
                faster_interactions = 0
                slider_interactions = 0
            
    results = pd.DataFrame(data, columns = ['group', 'id', 'question', 'answer', 'time', 'confidence', 'correct_answer', 'correct', 'zoom_timers', 'drag_timers', 'highlight_timers', 'slower_timers', 'faster_timers', 'slider_timers', 'zoom_interactions', 'drag_interactions', 'highlight_interactions', 'slower_interactions', 'faster_interactions', 'slider_interactions'])
    return results

In [7]:
def parse_feedback(df, group, result):
    # group, result_id, NL-JP, NL-SI, NL-TL, NL-AN, M-JP, M-SI, M-TL, M-AN, comments
    data = []
    
    nl_jp = 0
    nl_si = 0
    nl_tl = 0
    nl_an = 0
    m_jp = 0
    m_si = 0
    m_tl = 0
    m_an = 0
    comments = ""
    
    for index, row in df.iterrows():
        survey_question = row['question']
        if 'preferences' in survey_question: #Preferences
            
            nl_jp = survey_question.get('preferences').get('NL').get('JP')
            nl_si = survey_question.get('preferences').get('NL').get('SI')
            nl_tl = survey_question.get('preferences').get('NL').get('TL')
            nl_an = survey_question.get('preferences').get('NL').get('AN')
            
            m_jp = survey_question.get('preferences').get('M').get('JP')
            m_si = survey_question.get('preferences').get('M').get('SI')
            m_tl = survey_question.get('preferences').get('M').get('TL')
            m_an = survey_question.get('preferences').get('M').get('AN')

        elif 'comments' in survey_question: # Comments
            
            comments = survey_question.get('comments')
            
        if index == len(df)-1:           
            data.append([group, result, nl_jp, nl_si, nl_tl, nl_an, m_jp, m_si, m_tl, m_an, comments])
            
            nl_jp = 0
            nl_si = 0
            nl_tl = 0
            nl_an = 0
            m_jp = 0
            m_si = 0
            m_tl = 0
            m_an = 0
            comments = ""
    
    results = pd.DataFrame(data, columns = ['group', 'id', 'NL-JP', 'NL-SI', 'NL-TL', 'NL-AN', 'M-JP', 'M-SI', 'M-TL', 'M-AN', 'comments'])
    return results

In [8]:
# Go through results dir and load all files in data frame
def read_files(): 
    directory = r'results'
    survey_count = 0
    survey_results = pd.DataFrame([], columns = ['group', 'id', 'question', 'answer', 'time', 'confidence', 'correct_answer', 'correct', 'zoom_timers', 'drag_timers', 'highlight_timers', 'slower_timers', 'faster_timers', 'slider_timers', 'zoom_interactions', 'drag_interactions', 'highlight_interactions', 'slower_interactions', 'faster_interactions', 'slider_interactions'])
    feedback_results = pd.DataFrame([], columns = ['group', 'id', 'NL-JP', 'NL-SI', 'NL-TL', 'NL-AN', 'M-JP', 'M-SI', 'M-TL', 'M-AN', 'comments'])
    for filename in os.listdir(directory):
    
        if filename.endswith(".json"):
            with open(directory + '/' + filename, encoding='utf-8', mode='r') as current_file:
                
                text = os.path.splitext(filename)[0]
                # parse id and group from file name
                result_id = text.split('_')[1]
                group = text.split('_')[2]
                
                df = pd.read_json(current_file)
                
                data = parse_result(df, group, result_id) # parse single survey result
                feedback = parse_feedback(df, group, result_id) # parse feedback
                
                survey_results = survey_results.append(data, ignore_index = True) # append to results
                feedback_results = feedback_results.append(feedback, ignore_index = True) # append to results
                survey_count+=1
        else:
            print('File not JSON', filename)
            continue
    survey_results = survey_results.fillna(0)
    survey_results = survey_results.astype({
        'zoom_interactions': int,
        'drag_interactions': int,
        'highlight_interactions': int,
        'slower_interactions': int,
        'faster_interactions': int,
        'slider_interactions': int,
        'time': int,
    })
#     survey_results['drag_interactions'].fillna(0)
#     survey_results['highlight_interactions'].fillna(0)
#     survey_results['slower_interactions'].fillna(0)
#     survey_results['faster_interactions'].fillna(0)
#     survey_results['slider_interactions'].fillna(0)
    
    print(f'Parsed {survey_count} surveys.')
#     display(survey_results)
    return [survey_results, feedback_results]

In [9]:
def save_csv(results, name): 
    results.to_csv(f'{name}.csv')
    print(f'CSV generated \'{name}.csv\'')

In [10]:
survey_data, feedback_data = read_files()
save_csv(survey_data, 'survey_results')
save_csv(feedback_data, 'feedback_results')

Parsed 76 surveys.
CSV generated 'survey_results.csv'
CSV generated 'feedback_results.csv'
