# Mechanical Turk Label Drawer for Polygon Labels

This notebook allows us to use the AWS SDK to pull information from batches assigned to workers in order to draw labels recieved onto our dataset for review.


In [None]:
import boto3
import keys
import pandas as pd
import json
import csv
import cv2
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import requests
import xmltodict

from IPython.display import display, HTML
from io import BytesIO
from PIL import Image


## Classes:

### AWS SDK Class

In [None]:
# # # # AWS API # # # #
class AWS():
    """Operating System Tools Class"""
    # Intializes AWS SDK with Credentials provided in keys.py
    def botoStart():
        mturk = boto3.client('mturk', aws_access_key_id=keys.key1, aws_secret_access_key=keys.key2,region_name='us-east-1',
                              endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com/')
        return mturk
    
    # Returns the labels for a particular HIT_ID
    def workerRes(mturk,hit_id):
        
        worker_results = mturk.list_assignments_for_hit(HITId=hit_id, AssignmentStatuses=['Submitted'])

        if worker_results['NumResults'] > 0:
            for assignment in worker_results['Assignments']:
                xml_doc = xmltodict.parse(assignment['Answer'])

                #print("Worker's answer was:")

                if type(xml_doc['QuestionFormAnswers']['Answer']) is list:
                    # Multiple fields in HIT layout
                    # This one is the one in use since we have multiple fields
                    for answer_field in xml_doc['QuestionFormAnswers']['Answer']:
                        print("For input field: " + answer_field['QuestionIdentifier'])
                        print("Submitted answer: " + answer_field['FreeText'])
                else:
                    # One field found in HIT layout
                    print("For input field: " + xml_doc['QuestionFormAnswers']['Answer']['QuestionIdentifier'])
                    print("Submitted answer: " + xml_doc['QuestionFormAnswers']['Answer']['FreeText'])

        else:
            print("No results ready yet")
            
        return answer_field['FreeText']

### CSV Tools

In [1]:
# # # # CSV Tools # # # #
class CSVTools():
    
    # Extract the HIT list from the batch report
    def CSVBatchtoHitList(filename):
        
        try:
            csvPath = ("./Batch_Reports/" + filename)
            hit_list = []
            
            colnames = ['HITId']
            data = pd.read_csv(csvPath, usecols=["HITId"], names=colnames)
            hit_list = data.HITId.tolist()
            del hit_list[0]
            
            print("Current file: " + filename)
            print()
            print("Number of HITs: " + str(len(hit_list)))
        
        except:
            print(filename + " not found!")
            
        return hit_list
    
    # Extract list of image urls from the batch report
    def CSVBatchtoImageList(filename):
    
        try:
            csvPath = ("./Batch_Reports/" + filename)
            image_list = []
            
            colnames = ['Input.image_url']
            data = pd.read_csv(csvPath, usecols=[27], names=colnames)
            image_list = data['Input.image_url'].tolist()
            del image_list[0]
            
            print("Current file: " + filename)
            print()
            print("Number of Images: " + str(len(hit_list)))
        
        except:
            print(filename + " not found!")
        return image_list

In [None]:
# # # # OpenCV # # # #
class OpenCVTools():
    
    def labelDrawer2bmp(realJson,img,image_url,file_prefix):
        
        # Create empty list for labels
        Label_1list = list()
        Label_2list = list()
        
        # Iterate through json to generate a list of coordinates for the labels to draw using OpenCV2
        for target in realJson:
            if (target['label'] == 'Label_1'):
                print('entered Label_1')
                # Create an x,y numpy array
                for v in target['vertices']:
                    temp = np.array([v['x'], v['y']])
                    Label_1list.append(temp)

            if (target['label'] == 'Label_2'):
                print('entered Label_2')
                # create an array of the x and the y
                for v in target['vertices']:
                    temp = np.array([v['x'], v['y']])
                    Label_2list.append(temp)

            # Draw the Label_1 and clear the list & array
            Label_1Array = np.array(Label_1list)
            cv2.polylines(img,[Label_1Array],True,(0,255,255))
            Label_1Array.fill(0)
            Label_1list.clear()

            # Draw the Label_2 and clear the list & array
            Label_2Array = np.array(Label_2list)
            cv2.polylines(img,[Label_2Array],True,(0,255,255))
            Label_2Array.fill(0)
            Label_2list.clear()

        # Write labeled image to bitmap
        cv2.imwrite("./Batch_Pictures/" + file_prefix +'/' + image_url + ".bmp", img)

## Main Program

In [None]:
# Start up boto instance
mturk       = AWS.botoStart()

# Configure batch filename following Set#Batch + Report.csv format
# can be altered to your batch naming convention
file_prefix = 'Set1Batch'
file_name   = file_prefix + 'Report.csv'

# Generate HIT List from batch report
hit_list = CSVTools.CSVBatchtoHitList(file_name)

# Generate Image List from batch report
image_list = CSVTools.CSVBatchtoImageList(file_name)

In [None]:
# Set S3 Bucket Prefix where images are hosted
S3_Bucket_Prefix = "https://mturkbucketglab.s3.amazonaws.com/BUCKET_GOES_HERE"

# Iterate through HIT's to generate labeled images as bitmaps
for hit_id, image_url in zip(hit_list,image_list):
    # Call Worker Results and return json string
    jsonStr = AWS.workerRes(mturk,hit_id)
    
    # Converts string to json
    realJson = json.loads(jsonStr)
    
    # Generate full url for each image
    full_url = S3_Bucket_Prefix + image_url
    response = requests.get(full_url)
    
    # Grab image and store as a numpy array for CV2 use
    img = np.array(Image.open(BytesIO(response.content)), dtype=np.uint8)
    
    # Generate BitMaps for every image that was labeled
    OpenCVTools.labelDrawer2bmp(realJson,img,image_url, file_prefix)
    