# Generate JSON

The purpose of this Notebook is to aggregate the answers from the survey into JSON files, in order to facilitate the future analysis of the results.

In addition, the initial quantitative analysis of the combined results will also take place in this Notebook.

## Expected File Structure

The code expects there to be two folders at the same level as the Notebook itself: an “Answers” folder containing the results for each survey taker, and a “Character Lists” folder with the character order for each survey taker. The files are expected to be named “username Answers.txt” and  “username Character List.txt”, respectively, and the code expects there to be one Answers and one Character List file for every user. It's also expected that there'll be a "Data" folder one level above this Notebook, containing images for each character and devided between Main and Supporting, according ot their label.

The resulting JSON files will be created at the same level as the Notebook.

In [None]:
# This cell reads the input files and generates the initial answers structure

import pathlib
import codecs

# A dictionary of all characters, devided according to their label
# Each character will have a list of answers
characters = {"Main": {}, "Supporting": {}}
for role in ["Main", "Supporting"]:
    for character in pathlib.Path("../data/"+role).iterdir():
        name = str(character).split("\\")[-1]
        characters[role][name] = []

accuracies = []
knowledge = []

for answersPath in pathlib.Path("Answers").iterdir():
    answersFileName = str(answersPath).split("\\")[-1]
    userName = answersFileName.split()[0]
    answersFile = open("Answers/"+answersFileName, "r")
    answers = []
    for line in answersFile:
        answers.append(line[:-1])
    answersFile.close()
    charactersFile = codecs.open("Character Lists/"+userName+" Character List.txt", "r", "utf-8")
    characterList = []
    for line in charactersFile:
        lineList = line.split(",")
        role = lineList[-1][:-1]
        name = lineList[0]
        for i in range(1, len(lineList)-1):
            name += "," + lineList[i]
        characterList.append((name, role))
    charactersFile.close()
    rightAnswers = 0
    knownCharacters = 0
    for i in range(len(answers)):
        characterName = characterList[i][0].split("/")[-1]
        characters[characterList[i][1]][characterName].append(answers[i])
        if answers[i] == "Known":
            knownCharacters += 1
        elif answers[i] == characterList[i][1]:
            rightAnswers += 1
    accuracy = rightAnswers/(len(answers)-knownCharacters)*100
    known = (knownCharacters/len(answers))*100
    accuracies.append(accuracy)
    knowledge.append(known)

In [None]:
# Determining the majority answer for each character

simpleHumanResults = {"Main": {}, "Supporting": {}}

for role in ["Main", "Supporting"]:
    for character in characters[role]:
        answerCount = {"Main": 0, "Supporting": 0, "Known": 0}
        for answer in characters[role][character]:
            answerCount[answer] += 1
        if answerCount["Supporting"] > answerCount["Main"]:
            simpleHumanResults[role][character] = "Supporting"
        else:
            simpleHumanResults[role][character] = "Main"

In [None]:
# Saving the dicts as JSON files

import json

json.dump(simpleHumanResults, open("Simple Human Results.json", "w"))

In [None]:
json.dump(characters, open("Human Results.json", "w"))

## Initial Analysis of the Results

In [None]:
# Calculating total accuracy

totalAnswers = 0
knownAnswers = 0
rightAnswers = 0

results = {"Main": {}, "Supporting": {}}
for role in ["Main", "Supporting"]:
    for character in characters[role]:
        results[role][character] = {"total": 0, "known": 0, "right": 0}

roleResults = {"Main": [0, 0, 0], "Supporting": [0, 0, 0]}
        
for role in ["Main", "Supporting"]:
    for character in characters[role]:
        for answer in characters[role][character]:
            totalAnswers += 1
            results[role][character]["total"] += 1
            roleResults[role][2] += 1
            if answer == "Known":
                knownAnswers += 1
                results[role][character]["known"] += 1
                roleResults[role][1] += 1
            elif answer == role:
                rightAnswers += 1
                results[role][character]["right"] += 1
                roleResults[role][0] += 1
                
accuracy = rightAnswers/(totalAnswers-knownAnswers)*100
known = (knownAnswers/totalAnswers)*100

print("Number of answers: "+str(totalAnswers))
print("Total accuracy: "+str(accuracy)+"%")
print("Percentage of known characters: "+str(known)+"%")

In [None]:
# Data on characters that every survey taker got wrong

failures = []

for role in ["Main", "Supporting"]:
    for character in results[role]:
        if results[role][character]["right"] == 0 and results[role][character]["total"] > results[role][character]["known"]:
            failures.append(character)
            
print("Number of characters with no right answers:", len(failures))

In [None]:
# Calculating CrossEntropy of the combined results
# Source: https://stackoverflow.com/questions/47377222/what-is-the-problem-with-my-implementation-of-the-cross-entropy-function

import numpy as np

def cross_entropy(predictions, targets, epsilon=1e-12):
    """
    Computes cross entropy between targets (encoded as one-hot vectors)
    and predictions. 
    Input: predictions (N, k) ndarray
           targets (N, k) ndarray        
    Returns: scalar
    """
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(targets*np.log(predictions+1e-9))/N
    return ce

In [None]:
groupedResults = []
targets = []
probabilisticHumanResults = {"Main": {}, "Supporting": {}}

for character in results["Main"]:
    validAnswers = results["Main"][character]["total"]-results["Main"][character]["known"]
    if validAnswers == 0:
        continue
    targets.append([1, 0])
    mainChance = results["Main"][character]["right"] / validAnswers
    supportingChance = 1 - mainChance
    groupedResults.append([mainChance, supportingChance])
    probabilisticHumanResults["Main"][character] = [mainChance, supportingChance]
    
for character in results["Supporting"]:
    validAnswers = results["Supporting"][character]["total"]-results["Supporting"][character]["known"]
    if validAnswers == 0:
        continue
    targets.append([0, 1])
    supportingChance = results["Supporting"][character]["right"] / validAnswers
    mainChance = 1 - supportingChance
    groupedResults.append([mainChance, supportingChance])
    probabilisticHumanResults["Supporting"][character] = [mainChance, supportingChance]
    
groupedResults = np.array(groupedResults)
targets = np.array(targets)
crossEntropy = cross_entropy(groupedResults, targets)
print("CrossEntropy:", crossEntropy)

In [None]:
# Saving probabilistic results as JSON

json.dump(probabilisticHumanResults, open("Probabilistic Human Results.json", "w"))