In [14]:
# path to the config file (see example). 
# will work with a config file for https://github.com/kimberli/mturk-template, 
# but a minimal example is included.
# Also supports an additional feature, the "variants" key, which is a list of dictionaries. 
# If "variants" is specified, for each dictionary it contains, those keys will be meshed with the "hitCreation"
# key and one task will be made per variant. Else, config["hitCreation"]["numTasks"] versions of the same
# task will be launched. 
CONFIG_PATH = "./config.json"

# where to save downloaded results 
SAVE_PATH = "."
# Path to the html task
TASK_PATH = "."

# where to save downloaded results 
SAVE_PATH = "./result.csv" 

# Whether to launch a hit per fold.txt in folder "files"
LAUNCH_HITS_FOR_ALL_FOLDS = False
FOLD_REQ_STR = 'poster_clean'

In [15]:
from boto3 import client
import json
import copy
import os
from uuid import uuid4

_USING_PROD = None

with open(CONFIG_PATH, 'r') as f:
    config = json.loads(f.read())
    hit_config = config['hitCreation']

if hit_config['production']:
    print("USING PROD")
    _USING_PROD = True
    endpoint_url = 'https://mturk-requester.us-east-1.amazonaws.com'
else:
    print("USING SANDBOX")
    _USING_PROD = False
    endpoint_url = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
        
cl = client('mturk', region_name='us-east-1', endpoint_url=endpoint_url)

if hit_config['fold']:
    hit_config['taskUrl'] = hit_config['taskUrl'] + "?url=%s" % hit_config['fold']
elif fld:
    hit_config['taskUrl'] = fld
else:
    fld = input('Define fold (input name+.txt)')
    hit_config['taskUrl'] = hit_config['taskUrl'] + "?url=%s" % fld

if LAUNCH_HITS_FOR_ALL_FOLDS:
    all_folds = [f for f in os.listdir(os.path.join(TASK_PATH,'files')) if FOLD_REQ_STR in f]        
    print('Using folds:',all_folds)
    
print("TASK URL:", hit_config['taskUrl'])

USING SANDBOX
TASK URL: https://cfosco.github.io/mturk-importance/?url=natimgs35_0.txt


In [16]:
# Safety flags that prevent you from accidentally messing up your HITs. 
# Set to False except when you are performing these specific tasks. 
ALLOW_HIT_CREATION = True
ALLOW_ASSIGNMENT_ADDITION = False
ALLOW_CREATE_QUAL = True
ALLOW_UPDATE_EXPIRATION = False

In [17]:
# List of qualifications that you will use to filter potential workers. 
# These require that workers come from the US and have an approval rating >= 95%
QUALS = [
       {
           'QualificationTypeId': '00000000000000000071',
           'Comparator': 'EqualTo',
           'LocaleValues': [{
               'Country': 'US',
           }],
       },
        
       {
           'QualificationTypeId': '000000000000000000L0',
           'Comparator': 'GreaterThanOrEqualTo',
           'IntegerValues': [
               95
           ],
       },
    ]

In [20]:
# creates a HIT in the form of an External Question inside an iFrame
def create_hit(task):
    questionText = "<ExternalQuestion xmlns=\"http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/"
    questionText += "2006-07-14/ExternalQuestion.xsd\">\n<ExternalURL>" + task['taskUrl']
    questionText += "</ExternalURL>\n  <FrameHeight>700</FrameHeight>\n</ExternalQuestion>"

    response = cl.create_hit(
        MaxAssignments=task['numAssignments'],
        AutoApprovalDelayInSeconds=604800,
        LifetimeInSeconds=task['lifetime'],
        AssignmentDurationInSeconds=task['duration'],
        Reward=task['rewardAmount'],
        Title=task['title'],
        Keywords=task['keywords'],
        Description=task['description'],
        Question=questionText,
        QualificationRequirements=QUALS,
    )

    print(response)
    print("\n")
    
# Helpers for creating HITs. 

# generic helper that sets metadata fields based on the config file.
def create_hit(task, questionText, quals=QUALS): 
    response = cl.create_hit(
        MaxAssignments=task['numAssignments'],
        AutoApprovalDelayInSeconds=604800,
        LifetimeInSeconds=task['lifetime'],
        AssignmentDurationInSeconds=task['duration'],
        Reward=task['rewardAmount'],
        Title=task['title'],
        Keywords=task['keywords'],
        Description=task['description'],
        Question=questionText,
        QualificationRequirements=quals,
    )
    print(response)
    print("\n")

# creates a HIT in the form of an External Question inside an iFrame
def create_hit_iframe(task):
    questionText = "<ExternalQuestion xmlns=\"http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/"
    questionText += "2006-07-14/ExternalQuestion.xsd\">\n<ExternalURL>" + task['taskUrl']
    questionText += "</ExternalURL>\n  <FrameHeight>700</FrameHeight>\n</ExternalQuestion>"
    create_hit(task, questionText)
    
# Helper to create a HIT in the form of a simple UI with a link to an external page and an
# input box for a completion code 
def create_hit_external(task):
    with open('questionform_template.xml', 'r') as myfile:
        template=myfile.read() 
    question_xml = template % (hit_config["title"], hit_config["description"], hit_url)
    create_hit(task, question_xml)

In [24]:
if ALLOW_HIT_CREATION: 
    if config.get('variants', None): 
        print("creating " + str(len(config['variants'])) + " variants")
        for var in config['variants']: 
            task = copy.deepcopy(config)
            task.update(var)
            create_hit(task)
    
    elif LAUNCH_HITS_FOR_ALL_FOLDS:
        print("creating", len(all_folds), "tasks")
        for fold in all_folds:
            hit_config['taskUrl'] = hit_config['taskUrl'].split('?')[0] + "?url=%s" % fold
            create_hit(hit_config)
    else:
        print("creating " + str(hit_config['numTasks']) + " tasks")
        for i in range(hit_config['numTasks']):
            #create_hit(hit_config)
            create_hit_iframe(hit_config)

creating 1 tasks
{'HIT': {'HITId': '3K2CEDRACBAL2FEQUFGVMBI09CFMTA', 'HITTypeId': '30NHI4EO3UD99GICXIW3TB0XNEGBTG', 'HITGroupId': '35VN5BQM7UBVL5VR9SWFEM6SUT0J5Q', 'CreationTime': datetime.datetime(2019, 5, 13, 17, 24, 7, tzinfo=tzlocal()), 'Title': 'Annotate the most important regions on graphic designs', 'Description': 'Manually highlight the important parts of a graphic design. You will be shown a set of 10 to 15 images, and you will have to indicate which parts feel important to you.', 'Question': '<ExternalQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2006-07-14/ExternalQuestion.xsd">\n<ExternalURL>https://cfosco.github.io/mturk-importance/?url=natimgs35_0.txt</ExternalURL>\n  <FrameHeight>700</FrameHeight>\n</ExternalQuestion>', 'Keywords': 'labeling, importance, highlighting, graphic, designs, images', 'HITStatus': 'Assignable', 'MaxAssignments': 1, 'Reward': '0.85', 'AutoApprovalDelayInSeconds': 604800, 'Expiration': datetime.datetime(2019, 5,

## HIT MONITORING ##

In [None]:
# Contacts MTurk API to get all assignments for a HIT
# Returns them in a list. 
def get_all_assignments(hitid): 
    assignments = []
    should_continue = True
    next_token = False
    while (should_continue): 
        args = {
            'HITId': hitid, 
            'MaxResults': 100
        }
        if (next_token): 
            args['NextToken'] = next_token
        r = cl.list_assignments_for_hit(**args)
        next_token = r.get('NextToken', False)
        assignments.extend(r["Assignments"])
        should_continue = len(r["Assignments"]) > 0
    return assignments


def get_hits(max_results=200):
    hits = []
    mr = min(max_results, 100)
    should_continue = True
    next_token=False
    c=0
    while(should_continue):
        args = {
            'MaxResults': mr
        }
        if (next_token): 
            args['NextToken'] = next_token
        r = cl.list_hits(**args)
        next_token = r.get('NextToken', False)
        hits.extend(r["HITs"])
        c += len(r["HITs"])
        should_continue = next_token and (c<max_results)
#         mr = mr-100
    return hits

# Summarizes all hits in `hits` in a human-readable way. 
# Prints out the HIT Title, id, if it is expired, and how many assignments it has
# completed, pending, and left for work. 
def summarize_hits(hits, get_submitted=True): 
    print(len(hits))
    ret = ""
    for hit in hits: 
        expiration = hit['Expiration'].replace(tzinfo=None)
        is_expired = expiration < datetime.datetime.now()
        description = ("Title: {title}\n" 
        "ID: {hid}\n"
        "\tAssignments left: {left}\n"
        "\tAssignments completed: {complete}\n"
        "\tAssignments pending: {pending}\n"
        ).format(
            title=hit['Title'], 
            hid=hit['HITId'], 
            left=hit['NumberOfAssignmentsAvailable'], 
            complete=hit['NumberOfAssignmentsCompleted'], 
            pending=hit['NumberOfAssignmentsPending']
            
        )
        
        if get_submitted:
            submitted=0
            assignments = get_all_assignments(hit['HITId'])
            if assignments:
                for a in assignments: 
                    if a['AssignmentStatus'] == 'Submitted':
                        submitted+=1
                    
            description+='\tAssignments submitted: %d\n' % submitted
        
        description += "\tExpired: {exp}\n\n".format(exp=str(is_expired))
        
        ret += description
    print(ret)
    
# Prints a human-readable summary of all pending/submitted/approved assignments for all hits in `hits`
def summarize_assignments(hits):
    ret = ""
    for hit in hits: 
        hid = hit['HITId']
        title =  hit['Title']
        name = "HIT %s: %s" % (hid, title)
        ret += name + "\n"
        assignments = get_all_assignments(hid)
        if len(assignments) == 0: 
            ret += "\tNo pending/submitted/approved assignments for this HIT\n"
        for a in assignments: 
            desc = "\tAssignment {aid}\n\t\tStatus: {status}\n".format(aid=a['AssignmentId'], status=a['AssignmentStatus'])
            ret += desc
    print(ret)
    
# Refreshes data about the requested hits
def refresh_hits(): 
    global hits 
    global MAX_RESULTS
    hits = cl.list_hits(MaxResults=MAX_RESULTS)['HITs']

In [None]:
# API call to grab HIT data from MTurk 
hits = get_hits(max_results=30)
print(len(hits))

In [None]:
# Summarizes all outstanding HITs
# refresh_hits()
summarize_hits(hits)

## APPROVE HITS ##

In [None]:
def approve_all(hits): 
    num_approved = 0
    for hit in hits: 
        # make sure you keep getting assignments 
        assignments = get_all_assignments(hit["HITId"])
        #print(assignments)
        for a in assignments: 
            if a['AssignmentStatus'] != 'Approved':
                print("Approving assignment")
                num_approved += 1
                cl.approve_assignment(AssignmentId=a['AssignmentId'])
    print("Approved %d assignments" % num_approved)