# Introduction

This notebook will walk you through creating and monitoring your HITs. 

It provides methods to create HITs, pretty-print HIT and assignment status, expire/edit HITs, create qualifications, and download collected data. 

Before continuing, make sure that you have read the README and set all config fields to their desired values.

## Requirements: 

This code requires Python3 and the following packages: 
- boto3 
- beautiful soup 4

Before using, you will have to set up an authentication key to use the Amazon API and include it in a credentials file. See here: https://aws.amazon.com/developers/getting-started/python/

# Setup

Read the config file and establish a connection to MTurk.

A connection is made to production or to the sandbox based on values in the config. 

In [22]:
import datetime
import boto3
import json
import copy
import pprint
from bs4 import BeautifulSoup as bs 
from uuid import uuid4
from Naked.toolshed.shell import muterun_js
import json
import math
from IPython.core.display import display, HTML

In [23]:
# Sandbox or Production? You only spend money in Production.
USING_PROD = True

# Safety flags that prevent you from accidentally messing up your HITs. 
# Set to False except when you are performing these specific tasks.
ALLOW_HIT_CREATION = True
ALLOW_ASSIGNMENT_ADDITION = False
ALLOW_CREATE_QUAL = True
ALLOW_UPDATE_EXPIRATION = False

In [24]:
if USING_PROD:
    print("USING PROD")
    endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'
    origin="production"
else:
    print("USING SANDBOX")
    endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
    origin="sandbox"

session = boto3.session.Session(profile_name='default')
cl = session.client('mturk', region_name='us-east-1', endpoint_url=endpoint_url)

USING PROD


# Make new HIT

In [4]:
# List of qualifications that you will use to filter potential workers. 
# These require that workers come from the US and have an approval rating >= 95%
# Edit this list to specify different qualifications for workers 
QUALS = [
    {
        'QualificationTypeId': '00000000000000000071',
        'Comparator': 'EqualTo',
        'LocaleValues': [{
            'Country': 'US',
        }],
    },
    {
        'QualificationTypeId': '000000000000000000L0',
        'Comparator': 'GreaterThanOrEqualTo',
        'IntegerValues': [
            95
        ],
    },
]

In [5]:
# generic helper that sets metadata fields based on the config file.
def create_hit(task, questionText, quals=QUALS): 
    if ALLOW_HIT_CREATION:
        response = cl.create_hit(
            Title=task['title'],
            Description=task['description'],
            Keywords=','.join(task['keywords']),
            MaxAssignments=task['numAssignments'],
            Reward=task['rewardAmount'],
            AssignmentDurationInSeconds=task['duration'],
            LifetimeInSeconds=task['lifetime'],
            AutoApprovalDelayInSeconds=604800,
            Question=questionText,
            QualificationRequirements=quals,
        )
        print(response)
    else:
        raise RuntimeError("This action is not currently enabled; set `ALLOW_HIT_CREATION` to true to proceed with this action")

# creates a HIT in the form of an External Question inside an iFrame
def create_hit_iframe(task):
    questionText = "<ExternalQuestion xmlns=\"http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/"
    questionText += "2006-07-14/ExternalQuestion.xsd\">\n<ExternalURL>" + task['taskUrl']
    questionText += "</ExternalURL>\n  <FrameHeight>700</FrameHeight>\n</ExternalQuestion>"
    create_hit(task, questionText)
    
# Helper to create a HIT in the form of a simple UI with a link to an external page and an
# input box for a completion code 
def create_hit_external(task):
    with open('questionform_template.xml', 'r') as myfile:
        template=myfile.read() 
    question_xml = template % (task["title"], task["instructions"], task['taskUrl'])
    create_hit(task, question_xml)

In [6]:
# Use this cell to configure your HIT
dataset = 'infographics_zoomlens' # file in datasets dir (without extension)
big_name = 'Infographics'

hit_config = {
    # you probably want to change these
    "numAssignments": 120, # per HIT
    "rewardAmount": "1.25",
    "lifetime": 24 * 60 * 60, # in seconds
    # one HIT gets created per dictionary
    # you can add items to the dict to override the values above
    # alternatively, provide a number to create that many HITs with the default settings
    "HITs": 1, # DO NOT SET ABOVE 1; IF YOU CHANGE THIS TO A DICT YOU NEED TO INTEGRATE WITH THE CODE ABOVE
    # you probably do not want to change these
    "title": "Zoom Maps: " + big_name,
    "description": "Pinch and Zoom to look at " + big_name,
    "keywords": ["zoom", "pinch", "gesture", "mobile", "phone", "tablet", "image", "photo", "picture"],
    "duration": 30 * 60, # in seconds
    "taskUrl": "https://enigmatic-everglades-39931.herokuapp.com?dataset=" + dataset
}

external_submit = False # DO NOT CHANGE -- this means use iFrame
if isinstance(hit_config['HITs'], int):
    hit_config['HITs'] = [{} for i in range(hit_config['HITs'])]

In [8]:
# Use this cell to launch your HIT! 
hit_creation_function = create_hit_external if external_submit else create_hit_iframe

print("creating " + str(len(hit_config['HITs'])) + " HITs")
for hit_config_override in hit_config['HITs']: 
    task = copy.deepcopy(hit_config)
    task.update(hit_config_override)
    hit_creation_function(task)

creating 1 HITs
{'HIT': {'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY', 'HITTypeId': '3P23GFB3B7X2SRMFBS0XHO2AAOQDBA', 'HITGroupId': '33UIGJTG3OGMX2ZCQR1ZS700VNYZ1D', 'CreationTime': datetime.datetime(2019, 4, 4, 21, 42, 14, tzinfo=tzlocal()), 'Title': 'Zoom Maps: Infographics', 'Description': 'Pinch and Zoom to look at Infographics', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://enigmatic-everglades-39931.herokuapp.com?dataset=infographics_zoomlens</ExternalURL>\n  <FrameHeight>700</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'zoom,pinch,gesture,mobile,phone,tablet,image,photo,picture', 'HITStatus': 'Assignable', 'MaxAssignments': 120, 'Reward': '1.25', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 4, 5, 21, 42, 14, tzinfo=tzlocal()), 'AssignmentDurationInSeconds': 1800, 'QualificationRequirements': [{'QualificationTypeId': '0000000000000000007

# HIT monitoring helpers

Helper functions that will be useful for monitoring the status of your HIT. See next section for how to use them.

In [7]:
# Contacts MTurk API to get all assignments for a HIT
# Returns them in a list. 
def get_all_assignments(hitid): 
    assignments = []
    should_continue = True
    next_token = False
    while (should_continue): 
        args = {
            'HITId': hitid, 
            'MaxResults': 100
        }
        if (next_token): 
            args['NextToken'] = next_token
        r = cl.list_assignments_for_hit(**args)
        next_token = r.get('NextToken', False)
        assignments.extend(r["Assignments"])
        should_continue = len(r["Assignments"]) > 0
    return assignments

# Summarizes all hits in `hits` in a human-readable way. 
# Prints out the HIT Title, id, if it is expired, and how many assignments it has
# completed, pending, and left for work. 
def summarize_hits(hits): 
    ret = ""
    for hit in hits: 
        expiration = hit['Expiration'].replace(tzinfo=None)
        is_expired = expiration < datetime.datetime.now()
        available = hit['NumberOfAssignmentsAvailable']
        pending = hit['NumberOfAssignmentsPending']
        approved = hit['NumberOfAssignmentsCompleted']
        completed = hit['MaxAssignments'] - available - pending - approved
        description = ("Title: {title}\n" 
        "ID: {hid}\n"
        "\tAssignments available: {left}\n"
        "\tAssignments pending: {pending}\n"
        "\tAssignments awaiting approval: {completed}\n"
        "\tAssignments completed (approved): {approved}\n"
        "\tExpired: {exp}\n\n").format(
            title=hit['Title'], 
            hid=hit['HITId'], 
            left=available, 
            pending=pending,
            completed=completed,
            approved=approved,
            exp=str(is_expired)
        )
        ret += description
    print(ret)
    
# Prints a human-readable summary of all pending/submitted/approved assignments for all hits in `hits`
def summarize_assignments(hits):
    ret = ""
    for hit in hits: 
        hid = hit['HITId']
        title =  hit['Title']
        name = "HIT %s: %s" % (hid, title)
        ret += name + "\n"
        assignments = get_all_assignments(hid)
        if len(assignments) == 0: 
            ret += "\tNo pending/submitted/approved assignments for this HIT\n"
        for a in assignments: 
            desc = "\tAssignment {aid}\n\t\tStatus: {status}\n".format(aid=a['AssignmentId'], status=a['AssignmentStatus'])
            ret += desc
    print(ret)
    
# Refreshes data about the requested hits
def get_last_n_hits(n=1, offset=0): 
    return cl.list_hits(MaxResults=(n + offset))['HITs'][offset:]

def get_hits_by_title(title, howmany=1):
    hits = []
    i = 0
    while len(hits) < howmany:
        got_hits = get_last_n_hits(10, i)
        for got_hit in got_hits:
            if got_hit['Title'] == title:
                hits.append(got_hit)
        i += 10
    return hits[:howmany]

# HIT monitoring

In [6]:
# refresh hits
hits = get_last_n_hits(100)

In [8]:
hits = get_hits_by_title('Zoom Maps: Infographics', 1)

In [9]:
# Summarizes all outstanding HITs
summarize_hits(hits)

Title: Zoom Maps: Infographics
ID: 34HEO7RUG637KZS9TNQ57N6GLM9ARY
	Assignments available: 0
	Assignments pending: 0
	Assignments awaiting approval: 120
	Assignments completed (approved): 0
	Expired: False




In [None]:
# Summarizes assignments for all oustanding HITs 
summarize_assignments(hits)

# Approve HITs

Approves all outstanding assignments for the HITs displayed above. 

In [14]:
def approve_all(hits): 
    num_approved = 0
    for hit in hits: 
        # make sure you keep getting assignments 
        assignments = get_all_assignments(hit["HITId"])
        #print(assignments)
        for a in assignments: 
            if a['AssignmentStatus'] != 'Approved':
                print("Approving assignment")
                num_approved += 1
                cl.approve_assignment(AssignmentId=a['AssignmentId'])
    print("Approved %d assignments" % num_approved)

In [15]:
approve_all(hits)

Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving assignment
Approving ass

# Update expiration or num tasks

In [None]:
# changes the expiration date on a HIT to days_from_now days in the future
def update_expiration(hitid, days_from_now): 
    if ALLOW_UPDATE_EXPIRATION: 
        days = days_from_now*datetime.timedelta(days=1)
        expire_time = datetime.datetime.now() + days

        response = cl.update_expiration_for_hit(HITId=hitid, ExpireAt=expire_time)
        print(response)
        return response
    else: 
        raise RuntimeError("This action is not currently enabled; set `ALLOW_UPDATE_EXPIRATION` to true to proceed with this action")
    
def expire_hit(hit): 
    return update_expiration(hit, -10)

In [None]:
def add_assignments(hitid, num_assignments): 
    if ALLOW_ASSIGNMENT_ADDITION: 
        response = cl.create_additional_assignments_for_hit(
            HITId=hitid,
            NumberOfAdditionalAssignments=num_assignments
        )
        print(response)
        return response
    else: 
        raise RuntimeError("This action is not currently enabled; set `ALLOW_ASSIGNMENT_ADDITION` to true to proceed with this action")

In [None]:
# Use this cell to expire a HIT 
hit_id = ''
expire_hit(hit_id)

In [None]:
# Use this cell to update a HIT's expiration date
hitid = ''
days_from_now = 1
update_expiration(hit_id, days_from_now)

In [None]:
# Use this cell to add assignments to a HIT 
hit_id = "FILL THIS IN"
num_assignments_to_add = 0
add_assignments(hit_id, num_assignments_to_add)

# Add custom qualifications 

## Add a qualification to disqualify workers who have done work before

- uses "negative qualification" method from https://github.com/cloudyr/MturkR/wiki/qualifications-as-blocks

#### NOTE: quals are kept separate for the sandbox and prod. Make sure you are creating and assigning your quals in prod. 

### Structure of a new qualification

In [17]:
NEW_QUAL = {
    'Name': 'qualName',
    'Keywords': ['keywords', 'for', 'qual'],
    'Description': 'What is this qual, and why are you assigning it?',
    'QualificationTypeStatus': 'Active',
    'AutoGranted': False
}

### Helpers for creating, viewing, and assigning qualifications

In [18]:
# Registers a custom qualification with MTurk 
def create_qual(new_qual):
    if ALLOW_CREATE_QUAL:
        qual_kwargs = copy.deepcopy(new_qual)
        qual_kwargs['Keywords'] = ','.join(qual_kwargs['Keywords'])
        response = cl.create_qualification_type(**qual_kwargs)
#         print(response)
        Id = response['QualificationType']['QualificationTypeId']
        return Id
    else: 
        raise RuntimException("This action is not currently enabled; set `ALLOW_CREATE_QUAL` to true to proceed with this action")
        
# Gets all the custom quals you have created and prints them
def list_quals(): 
    response = cl.list_qualification_types(
            Query='hasCompletedVisualGraphRecallTask',
            MustBeRequestable=False
    )
    print(response)
    
# Assigns a qualification to a worker 
def assign_qual(qual_id, worker_ids): 
    for worker in worker_ids: 
        response = cl.associate_qualification_with_worker(
                QualificationTypeId=qual_id, 
                WorkerId=worker,
                IntegerValue=1,
                SendNotification=False
        )
        print(response)
        assert response
        
# Gets the ids of all workers who worked on a particular hit 
def get_workers_for_hit(hitid): 
    a = get_all_assignments(hitid)
    workers = [a_['WorkerId'] for a_ in a]
    return workers
    
# Confirms that every worker in worker_ids has qual with qual_id
def confirm_quals(qual_id, worker_ids): 
    for w in worker_ids: 
        response = cl.get_qualification_score(
                QualificationTypeId=qual_id,
                WorkerId=w
        )
        response = response['Qualification']
        assert response['Status'] == 'Granted'
        assert response['IntegerValue'] == 1
        
# Assigns qual with `qual_id` to every worker who has completed an assignment for the hit with `hitid`
def assign_qual_for_hit(hitid, qual_id): 
    workers = get_workers_for_hit(hitid)
    print("got workers")
    assign_qual(qual_id, workers)
    print("assigned qual")
    confirm_quals(qual_id, workers)
    print("confirmed qual")

### Use the following cells to manipulate qualifications

In [23]:
# Use this cell to view the custom qualifications you have created
list_quals()

{'NumResults': 1, 'NextToken': 'p1:z27l+YreZcx8GA9dbYVFvssUHtkQppnDW9lspWhWu8KT+8tDK1AEQAzmc445AQ==', 'QualificationTypes': [{'QualificationTypeId': '3Q3I6L0BKOF89DJMPYQCYLJBC2VW5E', 'CreationTime': datetime.datetime(2018, 3, 23, 17, 19, 7, tzinfo=tzlocal()), 'Name': 'hasCompletedVisualGraphRecallTask', 'Description': 'Assigned to people who have already completed work on a visual graph recall task. A worker can only complete this type of task once.', 'Keywords': 'Already completed visual graph recall task', 'QualificationTypeStatus': 'Active', 'IsRequestable': True, 'AutoGranted': False}], 'ResponseMetadata': {'RequestId': '53ac8562-0d77-4b85-8241-6d21e0ea511b', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '53ac8562-0d77-4b85-8241-6d21e0ea511b', 'content-type': 'application/x-amz-json-1.1', 'content-length': '531', 'date': 'Thu, 04 Apr 2019 16:55:31 GMT'}, 'RetryAttempts': 0}}


In [24]:
# Use this cell to create a new qual 
qual_to_create = {}
create_qual(qual_to_create)

NameError: name 'RuntimException' is not defined

In [None]:
# Use this cell to assign a custom qual to every worker who has done a specific HIT
hit_id = "FILL THIS IN"
qual_id_to_assign = "FILL THIS IN"
assign_qual_for_hit(hit_id, qual_id_to_assign)

# Create Compensation HIT

Mistakes happen, and sometimes they can lead to a worker who put in an honest effort being unable to complete a task and get paid. It's a good idea to compensate these workers when they reach out because it helps maintain relations with workers and is the right thing to do.

However, workers can only be paid upon completing a task. The workaround is to create a custom qualification, assign it to the worker you want to compensate, and create a no-work HIT requiring the custom qualification. This code does that.

In [25]:
# worker_ids is str[]
# compensation is str but should match the regex ^\d*\.\d\d$ (e.g. "1.00")
# for_hit_id is str -- optional, but helpful for records
def compensate_workers(worker_ids, compensation, for_hit_id=""):
    with open('compensation.xml', 'r') as myfile:
        question_xml=myfile.read()

    keywords = ['compensation']
    description = 'Compensation for HIT'
    if for_hit_id:
        keywords.append(for_hit_id)
        description += ' ' + for_hit_id

    # create qual, assign to workers
    custom_qual = {
        'Name': str(uuid4()), # a qual must have a unique name
        'Keywords': keywords,
        'Description': description,
        'QualificationTypeStatus': 'Active',
        'AutoGranted': False
    }
    qual_id = create_qual(custom_qual)
    assign_qual(qual_id, worker_ids)

    # create HIT requiring qual
    task = {
        'numAssignments': len(worker_ids),
        'lifetime': 3 * 24 * 60 * 60, # 3 days
        'duration': 5 * 60, # 5 min
        'rewardAmount': compensation,
        'title': description,
        'keywords': keywords,
        'description': description,
    }
    quals = [{
        'QualificationTypeId': qual_id,
        'Comparator': 'Exists',
        'ActionsGuarded': 'DiscoverPreviewAndAccept'
    }]
    create_hit(task, question_xml, quals)

In [26]:
worker_ids = ['ARHBT4K14I7U', 'A2T974CBTMP1AT', 'A3VTF0VGQ7648H'] # worker_id strings in a list
compensation = "3.00" # change to the amount of dollars you want to give
for_hit_id = "33UIGJTG3OGMX2ZCQR1ZS700VNYZ1D (#2)" # hit_id string (what you are compensating for)compensate_workers(worker_ids, compensation, for_hit_id)
compensate_workers(worker_ids, compensation, for_hit_id)

{'ResponseMetadata': {'RequestId': '820c9d86-f0a3-47a0-9dec-86cfec95c160', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '820c9d86-f0a3-47a0-9dec-86cfec95c160', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Sat, 06 Apr 2019 01:41:51 GMT'}, 'RetryAttempts': 0}}
{'ResponseMetadata': {'RequestId': '058d18af-af01-4cdd-908e-3902771503e2', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '058d18af-af01-4cdd-908e-3902771503e2', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Sat, 06 Apr 2019 01:41:51 GMT'}, 'RetryAttempts': 0}}
{'ResponseMetadata': {'RequestId': 'a7e0a1fc-c74f-4411-a2ab-09f9dceacadd', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'a7e0a1fc-c74f-4411-a2ab-09f9dceacadd', 'content-type': 'application/x-amz-json-1.1', 'content-length': '2', 'date': 'Sat, 06 Apr 2019 01:41:53 GMT'}, 'RetryAttempts': 0}}
{'HIT': {'HITId': '3D1UCPY6GGI1I9ESLZ5VTYX5PM1830', 'HITTypeId': '3K9PHCH9NOSYJ8UQ

# Download data

Helper to download data from MTurk 

In [11]:
def pretty_print(obj):
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(obj)
    pp = None

# Downloads all the assignments completed for `hits` as a list of dictionaries. 
# If a download_path is given, also saves that data as json 
def get_assignment_content(hits, download_path="", should_print=False): 
    all_responses = []
    for hit in hits: 
        hitid = hit['HITId']
        assignments = get_all_assignments(hitid)
        for a in assignments:
            a_xml = a['Answer']
            #print(a_xml)
            soup = bs(a_xml, "html.parser")
            answers = soup.find_all("answer")
            #print(answers)
            results = {'HITId': a['HITId'], 'AssignmentId': a['AssignmentId'], 'WorkerId': a['WorkerId']}
            for ans in answers: 
                identifier = ans.find('questionidentifier').string
                answer = ans.find('freetext').string
                try: 
                    results[identifier] = json.loads(answer)
                except:
                    results[identifier] = answer
            all_responses.append(results)
    if should_print: 
        pretty_print(all_responses)
    if download_path: 
        with open(download_path, 'w') as outfile: 
            json.dump(all_responses, outfile)
    return all_responses
            

In [12]:
# Use this cell to download data
SAVE_PATH = '../../zoommaps-analysis/results/infographics_zoomlens/mturk-infographics_zoomlens.json'
responses = get_assignment_content(hits, download_path=SAVE_PATH, should_print=True)

[   {   'AssignmentId': '3BXQMRHWK083PCR9KMGG98KMBEAUM4',
        'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY',
        'WorkerId': 'AKEWR9CC1QJ00',
        'userId': '5ca6b29684a4aa0017232f3d'},
    {   'AssignmentId': '3JBT3HLQF9CQWEIWIP7G74TWI10ZPU',
        'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY',
        'WorkerId': 'A2HFBFZEJLIGBG',
        'userId': '5ca6b2a984a4aa0017232f48'},
    {   'AssignmentId': '37TD41K0AIJA1DNEISR2RBXMW3LSC4',
        'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY',
        'WorkerId': 'A3GZVH1LI90MBD',
        'userId': '5ca6b32e84a4aa0017232f78'},
    {   'AssignmentId': '3FFJ6VRIL2Y1D7P21044S8PYEHN0IU',
        'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY',
        'WorkerId': 'ASI6KXF2GAUTQ',
        'userId': '5ca6b30384a4aa0017232f63'},
    {   'AssignmentId': '3R0T90IZ1TM7B8ENNSXL2XEJ8TSCG7',
        'HITId': '34HEO7RUG637KZS9TNQ57N6GLM9ARY',
        'WorkerId': 'A1B5Q9WUNIT5GV',
        'userId': '5ca6b3ab84a4aa0017232fa8'},
    {   'AssignmentId': '3MAOD8E

In [28]:
list(filter(lambda r: r['WorkerId'] == 'A15340BRCER2UO', responses))

[]