# Human Classification Test

This Notebook implements a survey meant to be run on Google Colab. The code implements an interface survey takers can use and uses the drive API to load the images and save the answers. 

## Expected Files and Folders

The code expects you to provide the drive IDs of “Main.zip” and “Supporting.zip”, containing the images for the survey. You can individually zip the “Main” and “Supporting” folders inside the “Data” folder to get the same files used originally.

It's also required that you prove the drive IDs for a folder where the answers will be saved and one for the random character orders of each user. Inside those folders, files are expected to be named “username Answers.txt” and “username Character List.txt”.

Once you’ve adjusted the folder IDs and made sure the code is working, feel free to delete this cell, as it’s not meant to be seen by survey takers.

In [None]:
# Downaloadiing the images from Google Drive

import io
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth
import zipfile

#@title Getting Started
#@markdown To start the task, just click the play (arrow) button on the left.<br/>
#@markdown You'll be asked to connect to your google account. 
#@markdown (Don't worry, we don't have access to anything related to your data, it just needs to be connect to any google account in order to acces the files located in my drive) <br/>
#@markdown After that, keep following the instructions on the cells below.

# Connecting to the drive API
auth.authenticate_user()
driveService = build('drive', 'v3')

# Downaloading "Main.zip to the colab eviroment and unzipping it"
mainID = '' #File ID of "Main.zip"
request = driveService.files().get_media(fileId=mainID)
fh = io.FileIO('Main.zip', mode='wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Loading Image Set 1 %d%%." % int(status.progress() * 100))
with zipfile.ZipFile('Main.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')

# Downaloading "Supporting.zip to the colab eviroment and unzipping it"
supportingID = '' #File ID of "Supporting.zip"
request = driveService.files().get_media(fileId=supportingID)
fh = io.FileIO('Supporting.zip', mode='wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Loading Image Set 2 %d%%." % int(status.progress() * 100))
with zipfile.ZipFile('Supporting.zip', 'r') as zip_ref:
    zip_ref.extractall('./data')

In [None]:
# Getting user data and loading their previous progress, if it exists

import pathlib
import codecs
import random
from googleapiclient.http import MediaFileUpload

characters = [] #List of every character in a random order unique to each user
answers = [] #List of answwers the user has given, one for each character,
             #in the same order as the "characters" list
saveExists = False

#@title Insert your username
#@markdown If you have already started the task before, insert the same username and you'll continue from where you last left off.<br/>
#@markdown After inserting your username, click the Play (Arrow) button on the left to confirm.

# Textbox to insert username
username = '' #@param {type:"string"}
realName = username.replace(" ", "")

# Loads the "Character List" file for the current user and populates the "characters" list
# Only to be called if there is an "answers" file for the user
def loadCharacterOder():
  global characters
  
  # Looking the "Character List" file for the current user
  # and downloading it once found
  charactersFolderID = "" #Folder ID of where to find the "Character List" files
  querry = f"parents = '{charactersFolderID}'"
  res = driveService.files().list(q=querry, pageSize=1000).execute()
  charactersFiles = res.get("files")
  charactersFileName = ""
  for f in charactersFiles:
    if f["name"].split()[0] == realName:
      charactersFileName = f["name"]
      charactersFileRes = driveService.files().get_media(fileId=f["id"])
      fh = io.FileIO(charactersFileName, mode='w')
      downloader = MediaIoBaseDownload(fh, charactersFileRes)
      done = False
      while done is False:
          status, done = downloader.next_chunk()
          print("Loading your character list %d%%." % int(status.progress() * 100))
      break
  
  # Reading the "Character List" file and populating the "characters" list
  f = codecs.open(charactersFileName, "r", "utf-8")
  for line in f:
    lineList = line.split(",")
    role = lineList[-1][:-1]
    name = lineList[0]
    for i in range(1, len(lineList)-1):
      name += "," + lineList[i]
    characters.append((name, role))
  f.close()

# Loads the "Answers" file for the current user and populates the "answers" list
def loadAnswers(answersFile):
  global answers

  # Downloading the "Answers" file for the current user
  answersFileName = answersFile["name"]
  answersFileRes = driveService.files().get_media(fileId=answersFile["id"])
  fh = io.FileIO(answersFileName, mode='w')
  downloader = MediaIoBaseDownload(fh, answersFileRes)
  done = False
  while done is False:
    status, done = downloader.next_chunk()
    print("Loading your previous answers %d%%." % int(status.progress() * 100))
  
  # Reading the "Answers" file and populating the "answers" list
  f = open(answersFile["name"], "r")
  for line in f:
    answers.append(line[:-1])
  f.close()

# Looking the "Answers" file for the current user
# If there is one, loads it along with the user's character order
answersFolderID = "" #Folder ID of where to find the "Answers" files
querry = f"parents = '{answersFolderID}'"
res = driveService.files().list(q=querry, pageSize=1000).execute()
answerFiles = res.get("files")
for f in answerFiles:
  if f["name"].split()[0] == realName:
    loadCharacterOder()
    loadAnswers(f)
    saveExists = True
    break

# If an "Answers" file wasn't found, it's a new user
# Generating a random character order for the user
if not saveExists:
  # Populating and suffling the "characters" list
  for role in ["Main", "Supporting"]:
    for character in pathlib.Path("data/"+role).iterdir():
      name = str(character).split("\\")[-1]
      characters.append((name, role))
  random.shuffle(characters)
  
  # Creating a "Character List" file
  f = codecs.open(realName+" Character List.txt", "w", "utf-8")
  for character in characters:
    f.write(character[0]+","+character[1]+"\n")
  f.close()
  
  # Uploading the "character List" file to google drive
  charactersFolderID = "" #Folder ID of where to find the "Character List" files
  charactersFileName = realName+" Character List.txt"
  charactersFileMetadata = {
                             'name': charactersFileName,
                             "parents" : [charactersFolderID]
                           }
  charactersMedia = MediaFileUpload(charactersFileName, mimetype='text/plain')
  charactersFile = driveService.files().create(body=charactersFileMetadata,
                                      media_body=charactersMedia,
                                      fields='id').execute()

In [None]:
# Running the actual survey via UI

import ipywidgets as widgets
from IPython.display import display
from IPython.display import clear_output
from googleapiclient.http import MediaFileUpload

#@title The Classification Task
#@markdown You'll be asked to guess if a character is a Main Character or a Supporting Character, based purely on an image of said character.<br/>
#@markdown If you already know the character just pick the option that says so. 
#@markdown We want to measure human performance on this task when the only information a person has is an image of the character.<br/>
#@markdown You can stop at any point by clicling the "Stop and Save" button nad come back later to continue from where you left off.<br/>
#@markdown To get started, press the play button on the left, followed by the "Start" button that will appear.

i = len(answers)
prevWidget = ""

# Uploads the "Answers" file to Google Drive
def saveData():
  answersFolderID = "" #Folder ID of where to find the "Answers" files
  
  # The drive API doesn't override a file if you upload one with the same name,
  # instead allowing the two to exist,
  # so we first delete the previous "Answers" file for the user
  querry = f"parents = '{answersFolderID}'"
  res = driveService.files().list(q=querry, pageSize=1000).execute()
  answerFiles = res.get("files")
  for f in answerFiles:
    if f["name"].split()[0] == realName:
      driveService.files().delete(fileId=f["id"]).execute()
      break
  
  # Uploading the new "Answers" file
  answersFileName = realName+" Answers.txt"
  answersFileMetadata = {
                          'name': answersFileName,
                          "parents" : [answersFolderID]
                        }
  answersMedia = MediaFileUpload(answersFileName, mimetype='text/plain')
  answersFile = driveService.files().create(body=answersFileMetadata,
                                      media_body=answersMedia,
                                      fields='id').execute()

# Shows the user's current accuracy
# "state" is used to display in the text if the result is final or the survey is still ongoing
def showAccuracy(state):
  rightAnswers = 0
  knownCharacters = 0
  for i in range(len(answers)):
    if answers[i] == "Known":
      knownCharacters += 1
    elif answers[i] == characters[i][1]:
      rightAnswers += 1
  print("Your "+state+" accuracy: "+str((rightAnswers/(len(answers)-knownCharacters))*100)+"%")
  print("Percentage of unknown characters: "+str(100-(knownCharacters/len(answers))*100)+"%")

# Callback for the "Check Your Accuracy" button
# Shows current accuracy
def accuracyButtonClicked(b):
  showAccuracy("current")

# Callback for the "Stop and Save" button
# Clears the screen, saves the user's answers and shows current accuracy
def stopButtonClicked(b):
  global answers
  clear_output()
    
  # Creating an "Answers" file to be uploaded to Google Drive
  f = open(realName+" Answers.txt", "w")
  for answer in answers:
    f.write(answer+"\n")
  f.close()
  saveData()
  showAccuracy("current")

# Callback for the "Next Character" button
# Clears the screen, displays the next character and sets up the UI
def nextButtonClicked(b):
  global i
  global prevWidget
  global answers
  clear_output()
    
  # Getting the answer for the previous character
  if type(prevWidget) != str:
    answers.append(prevWidget.value)
  
  # Saving every 50 characters,
  # to ensure there isn't a large loss of progress in case of disconnect from colab
  if i != 0 and i%50 == 0:
    f = open(realName+" Answers.txt", "w")
    for answer in answers:
      f.write(answer+"\n")
    f.close()
    saveData()
    
  # The user has classified every character, displaying ther final accuracy
  if i >= len(characters):
    showAccuracy("final")
  else:
    # Showing the current progress and how many characters there are left
    print(str(i+1)+"/"+str(len(characters)))
    
    # Loading and displaying the current character's image
    imgPath = characters[i][0]
    imgFile = open(imgPath, "rb")
    image = imgFile.read()
    imgWidget = widgets.Image(
        value=image,
        format='png',
        width=225,
        height=350,
    )
    
    # Widget used to get the user's answer for the current character
    dropdownWidget = widgets.Dropdown(
        options=[("I Already Know the Character", "Known"), ("Main Character", "Main"), ("Supporting Character", "Supporting")],
        value="Known",
        description='Looks like a:',
        disabled=False,
    )
    
    # Displaying the UI
    display(imgWidget, dropdownWidget, nextButton, accuracyButton, stopButton)
    i += 1
    
    # saving the current widget so that its value can be accessed in the next iteration
    # if you try to access it now, you'll do it before the user has had the chance to 
    # interact with and, therefore, get the default value
    prevWidget = dropdownWidget

# Defining the UI elements
startButton = widgets.Button(description="Start")
startButton.on_click(nextButtonClicked)
nextButton = widgets.Button(description="Next Character")
nextButton.on_click(nextButtonClicked)
accuracyButton = widgets.Button(description="Check Your Accuracy")
accuracyButton.on_click(accuracyButtonClicked)
stopButton = widgets.Button(description="Stop and Save")
stopButton.on_click(stopButtonClicked)

# Displaying the initial UI
display(startButton)