In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from src.data.auto_mturk import get_drive_service, download_forms_urls, create_mturk_client
import pickle
import os.path
from google_auth_oauthlib.flow import InstalledAppFlow
from src.constants import TOKEN_PATH,CREDS_PATH, FORMS_URLS_PATH, AWS_KEYS_PATH
from src.utils import read_access_keys
import io
import shutil
import pandas as pd
import xmltodict
from datetime import datetime
from googleapiclient.http import MediaIoBaseDownload

In [261]:
from botocore.exceptions import RequestError

ImportError: cannot import name 'RequestError'

In [3]:
file_id = "1iOe--jfnoSjLkZ506XPqyEqO64krXByn_qLuPF3BaQA"
# (1). Retrieve the urls from the app script

# retrieve gdrive service
service = get_drive_service()


# download the most recent forms_urls
download_forms_urls(FORMS_URLS_PATH,file_id,service)
forms_url = pd.read_csv(FORMS_URLS_PATH,sep=r"\s+",header=None,names=['url'],index_col=0)

Download 100%


In [4]:
forms_url = forms_url.url.to_dict()

In [281]:
def display_answer(worker_results):
    """
    Args:
        answer(dict): the xmldoc
    """
    if worker_results['NumResults'] > 0:
        for assignment in worker_results['Assignments']:
            xml_doc = xmltodict.parse(assignment['Answer'])

            print("Worker's answer was:")
            if type(xml_doc['QuestionFormAnswers']['Answer']) is list:
                # Multiple fields in HIT layout
                for answer_field in xml_doc['QuestionFormAnswers']['Answer']:
                    print("For input field: " + answer_field['QuestionIdentifier'])
                    print("Submitted answer: " + answer_field['FreeText'])
            else:
                # One field found in HIT layout
                print("For input field: " + xml_doc['QuestionFormAnswers']['Answer']['QuestionIdentifier'])
                print("Submitted answer: " + xml_doc['QuestionFormAnswers']['Answer']['FreeText'])
    else:
        print("No results ready yet")

In [362]:
class Turker():
    def __init__(self,hittypeid,hitlayout,lifetimeinsec,production=False):
        """
        Args:
            hittypeid (str): hittypeid of the template to use
            hitlayout (str): hitlayout of the template to use
            lifetimeinsec (int): lifetime in seconds
        """
        self.production = production
        # retrieval of the access keys
        aws_access_key_id,aws_secret_access_key = read_access_keys(AWS_KEYS_PATH)
        # creation of an self.client client
        self.client = create_mturk_client(aws_access_key_id,aws_secret_access_key,production)
        self.hittypeid = hittypeid
        self.hitlayout = hitlayout
        self.lifetimeinsec = lifetimeinsec
        self.url = "https://workersandbox.mturk.com/mturk/preview?groupId="# if production else "https://worker.mturk.com/mturk/preview?groupId="
    def get_url(self,hit_id):
        return self.url + hit_id

    def list_hits(self):
        hits = self.client.list_hits()['HITs']
        if len(hits) == 0: 
            print("No Hits available")
        for i,hit in enumerate(hits):
            hitid = hit['HITId']
            print(f"({i+1}): Hit:{hitid} Status: {hit['HITStatus']}")
            if hit['HITId'] != 'Assignable':
                comp = hit["NumberOfAssignmentsCompleted"]
                maxo = hit["MaxAssignments"]
                print(f'Completed tasks: {comp}/{maxo}')
            print(f"URL: {self.get_url(hit['HITGroupId'])}")

    def __approve_all_assignments(self,hit_id):
        assignments = self.client.list_assignments_for_hit(HITId=hit_id,AssignmentStatuses=['Submitted'])
        assignments = assignments['Assignments']
        for ass in assignments:
            self.client.approve_assignment(AssignmentId=ass['AssignmentId'])

    def approve_all_hits(self):
        hits = self.client.list_reviewable_hits()['HITs']
        for hit in hits:
            self.__approve_all_assignments(hit['HITId'])

    def delete_all_hits(self):
        hits = self.client.list_hits()['HITs']
        for hit in hits:
            self.delete_hit(hit['HITId'])

    def delete_hit(self,hit_id):
        try:
            self.client.delete_hit(HITId=hit_id)
            print(f"Deleting hit {hit_id}")
        except:
            print(f"Hit {hit_id} in Unassignable mode")

    def stop_all_hits(self):
        hits = self.client.list_hits()['HITs']
        for hit in hits:
            self.stop_hit(hit['HITId'])

    def stop_hit(self,hit_id):
        status= self.client.get_hit(HITId=hit_id)['HIT']['HITStatus']
        # If HIT is active then set it to expire immediately
        if status=='Assignable' or status=='Unassignable':
            response = self.client.update_expiration_for_hit(
                HITId=hit_id,
                ExpireAt=datetime(2015, 1, 1)
            )
            print(f"Stop hit {hit_id}")

    def approve_delete_all_hits(self):
        self.approve_all_hits()
        self.delete_all_hits()

    def create_forms_hits(self,forms_url,hittypeid=None,hitlayout=None):
        """
        Args:
            forms_url(dict): mapping between forms index and their respective url        
        """
        hittypeid = self.hittypeid if hittypeid is None else hittypeid 
        hitlayout = self.hitlayout if hitlayout is None else hitlayout
        form_hit = {}
        for idx,url in forms_url.items():
            print(f"Creating hit for form {idx}")
            
            myhit = self.client.create_hit(
                        HITLayoutId=hitlayout,
                        MaxAssignments=20,

                        HITLayoutParameters = [{'Name':'url',
                                   'Value':url}],
                        LifetimeInSeconds = self.lifetimeinsec,
                        AutoApprovalDelayInSeconds=600,
                        AssignmentDurationInSeconds=600,
                        Reward='0.01',
                        Title=f'Emojis Descriptions n {idx}',
                        Keywords='emojis, description, sentiment, emotions',
                        Description='Describe emojis by a single accurate word',
                        QualificationRequirements=[
                            {
                                'QualificationTypeId': '3OR1BBO28PIVPWZMRDTWE8U6OZXNGN',
                                'Comparator': 'DoesNotExist',
                                'ActionsGuarded': 'DiscoverPreviewAndAccept'
                            }
                        # TODO: add location and hit percentage
                        ]
            )
            form_hit[idx] = myhit['HIT']['HITId']
        self.form_hit = form_hit
        

In [363]:
turk = Turker(hittypeid="3NKVIC4SHKLQ1JDME7PHL2B12T1NBF",
             hitlayout="3QS25971A2UW6PKAU5WN9771C2JXNI",
             lifetimeinsec=600)

In [364]:
turk.list_hits()

(1): Hit:3DTJ4WT8BD0Y0JV6XUWKXIV5W6ZEZH Status: Reviewable
1/1
URL: https://workersandbox.mturk.com/mturk/preview?groupId=38FT9G80EMTRK43GQZNRKO403IDLHE
(2): Hit:3XDSWAMB22DAPTA1E77CG1JY6R1QC6 Status: Reviewable
0/1
URL: https://workersandbox.mturk.com/mturk/preview?groupId=3LUMOMQJCYHJXL149CVWTX6RP9LPLL
(3): Hit:3OND0WXMHW0C9YVCR8QDXBR00GIHEU Status: Reviewable
0/1
URL: https://workersandbox.mturk.com/mturk/preview?groupId=3589VT1T3C2DZA1RQC6R9TO4L5GNJM


In [351]:
from time import sleep
def worker_tag_monitor(qualification_type_id='3OR1BBO28PIVPWZMRDTWE8U6OZXNGN'):
    while True:
        time.sleep(1)
        print("searching..")
        # search for workers already tagged
        exworkers = set()
        qualifs = turk.client.list_workers_with_qualification_type(QualificationTypeId=qualification_type_id,)
        for qualif in qualifs['Qualifications']:
            if qualif['QualificationTypeId'] == qualification_type_id:
                exworkers.add(qualif['WorkerId'])

        # search for new workers
        worker_ids = set()
        for hit in turk.client.list_hits()['HITs']:
            hitid = hit['HITId']
            result = turk.client.list_assignments_for_hit(HITId=hitid,AssignmentStatuses=['Submitted','Approved','Rejected'])
            assignments = result['Assignments']

            for assignment in assignments:
                workerid = assignment['WorkerId']
                worker_ids.add(workerid)

        worker_ids = worker_ids - exworkers
        for workerid in worker_ids:
            print(f"Tagging worker {workerid}")
            turk.client.associate_qualification_with_worker(
            QualificationTypeId=qualification_type_id,
            WorkerId=workerid,
            IntegerValue=1,
            SendNotification=False
        )

searching..
searching..
searching..


KeyboardInterrupt: 

In [352]:
turk.list_hits()

(1): Hit:3DTJ4WT8BD0Y0JV6XUWKXIV5W6ZEZH Status: Reviewable 
 URL: https://workersandbox.mturk.com/mturk/preview?groupId=38FT9G80EMTRK43GQZNRKO403IDLHE
(2): Hit:3XDSWAMB22DAPTA1E77CG1JY6R1QC6 Status: Assignable 
 URL: https://workersandbox.mturk.com/mturk/preview?groupId=3LUMOMQJCYHJXL149CVWTX6RP9LPLL
(3): Hit:3OND0WXMHW0C9YVCR8QDXBR00GIHEU Status: Assignable 
 URL: https://workersandbox.mturk.com/mturk/preview?groupId=3589VT1T3C2DZA1RQC6R9TO4L5GNJM


In [330]:
turk.create_forms_hits(forms_url)

Creating hit for form 0
Creating hit for form 1
Creating hit for form 2


In [327]:
turk.approve_all_hits()

In [354]:
worker_results = turk.client.list_assignments_for_hit(HITId="3DTJ4WT8BD0Y0JV6XUWKXIV5W6ZEZH", AssignmentStatuses=['Submitted'])

In [355]:
worker_results

{'NextToken': 'p1:fC1RYSvUb4PIeE59XU6j8/kHqTj3Ho1m3WiKha997LFIdRaCx3kYaSso4RwqDg==',
 'NumResults': 1,
 'Assignments': [{'AssignmentId': '39LOEL67OTQJS9BGPMKDPQQK6KU83V',
   'WorkerId': 'A29C1XYH77RQYM',
   'HITId': '3DTJ4WT8BD0Y0JV6XUWKXIV5W6ZEZH',
   'AssignmentStatus': 'Submitted',
   'AutoApprovalTime': datetime.datetime(2020, 11, 21, 20, 16, 24, tzinfo=tzlocal()),
   'AcceptTime': datetime.datetime(2020, 11, 21, 20, 6, 19, tzinfo=tzlocal()),
   'SubmitTime': datetime.datetime(2020, 11, 21, 20, 6, 24, tzinfo=tzlocal()),
   'Answer': '<?xml version="1.0" encoding="ASCII"?><QuestionFormAnswers xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2005-10-01/QuestionFormAnswers.xsd"><Answer><QuestionIdentifier>surveycode</QuestionIdentifier><FreeText>hit0</FreeText></Answer></QuestionFormAnswers>'}],
 'ResponseMetadata': {'RequestId': '5b46b45f-b219-4476-a230-6a6161d5b066',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '5b46b45f-b219-4476-a230-6a6161

In [176]:
worker_results = turk.client.list_assignments_for_hit(HITId="3OND0WXMHW0C9YVCR8QDXBR00FHEHO", AssignmentStatuses=['Submitted'])
display_answer(worker_results)

Worker's answer was:
For input field: surveycode
Submitted answer: hit3


In [328]:
turk.stop_all_hits()

turk.delete_all_hits()

Deleting hit 3P4C70TRMR2CF8ATOOVZP65JUD6GLH


In [232]:
turk.client.get_hit(HITId="3VO4XFFP1574DCF5NQZ10FRK5F77QP")

{'HIT': {'HITId': '3VO4XFFP1574DCF5NQZ10FRK5F77QP',
  'HITTypeId': '3QA1H4GB4RCFG6O9NDVBQ4OP98C3MW',
  'HITGroupId': '3L2A3M5C18OOB5OIYYD5GARR4LJQG1',
  'HITLayoutId': '3QS25971A2UW6PKAU5WN9771C2JXNI',
  'CreationTime': datetime.datetime(2020, 11, 21, 16, 9, 11, tzinfo=tzlocal()),
  'Title': 'Answer a survey about your opinions',
  'Description': 'Give us your opinion about our products',
  'Keywords': 'survey, demographics',
  'HITStatus': 'Reviewable',
  'MaxAssignments': 1,
  'Reward': '0.01',
  'AutoApprovalDelayInSeconds': 259200,
  'Expiration': datetime.datetime(2020, 11, 21, 16, 19, 11, tzinfo=tzlocal()),
  'AssignmentDurationInSeconds': 3600,
  'QualificationRequirements': [],
  'HITReviewStatus': 'NotReviewed',
  'NumberOfAssignmentsPending': 0,
  'NumberOfAssignmentsAvailable': 0,
  'NumberOfAssignmentsCompleted': 0},
 'ResponseMetadata': {'RequestId': 'cc811ff5-955b-4428-a190-bfc28153fd25',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cc811ff5-955b-4428-a

## Qualifications

In [187]:
turk.client.create_qualification_type(
    Name='emojidone',
    Description="Qualification to recognize workers that already performed an emoji-related task",
    QualificationTypeStatus='Active',
    AutoGranted=True,
    AutoGrantedValue=1
)

RequestError: An error occurred (RequestError) when calling the CreateQualificationType operation: You have already created a QualificationType with this name. A QualificationType's name must be unique among all of the QualificationTypes created by the same user. (1605981881577 s)

In [188]:
response = turk.client.get_qualification_type(
    QualificationTypeId='3OR1BBO28PIVPWZMRDTWE8U6OZXNGN'
)

In [315]:
turk.client.disassociate_qualification_from_worker(
    WorkerId='A29C1XYH77RQYM',
    QualificationTypeId='3OR1BBO28PIVPWZMRDTWE8U6OZXNGN',
    Reason=''
)

## HIT type

In [134]:
hittype = turk.client.create_hit_type(
    AutoApprovalDelayInSeconds=600,
    AssignmentDurationInSeconds=600,
    Reward='0.01',
    Title='Emojis Descriptions',
    Keywords='emojis, description, sentiment, emotions',
    Description='Describe emojis by a single accurate word',
    QualificationRequirements=[
        {
            'QualificationTypeId': '3OR1BBO28PIVPWZMRDTWE8U6OZXNGN',
            'Comparator': 'DoesNotExist',
            'ActionsGuarded': 'DiscoverPreviewAndAccept'
        }
    # TODO: add location and hit percentage
    ]
)
# 3NKVIC4SHKLQ1JDME7PHL2B12T1NBF
hittype

{'HITTypeId': '3NKVIC4SHKLQ1JDME7PHL2B12T1NBF',
 'ResponseMetadata': {'RequestId': 'dcbbc9d0-0ee2-4d31-976f-14b08e5f0faa',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dcbbc9d0-0ee2-4d31-976f-14b08e5f0faa',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '46',
   'date': 'Sat, 21 Nov 2020 17:51:42 GMT'},
  'RetryAttempts': 0}}

In [59]:
hittype = response

## Google urls file download

In [5]:
service = get_drive_service()

In [7]:
file_id = '11UjUJ9WfafdMEXZww9Q9GBnXsqX0dPWPdZGXGfFr_t4'

## Urls loading

In [10]:
forms_url = pd.read_csv(forms_path,sep=r"\s+",header=None,names=['url'],index_col=0)

## Personal Test

In [14]:
import boto3
from boto.mturk.connection import MTurkConnection
from boto.mturk.question import HTMLQuestion
from boto.mturk.layoutparam import LayoutParameter


## Personal test with boto3

In [48]:
# This will return your current MTurk balance if you are connected to Production.
# If you are connected to the Sandbox it will return $10,000.
print(client.get_account_balance()['AvailableBalance'])

10000.00


In [None]:
myhit = client.create_hit_with_hit_type(
    HITTypeId="3QA1H4GB4RCFG6O9NDVBQ4OP98C3MW",
    HITLayoutId="3QS25971A2UW6PKAU5WN9771C2JXNI",
    HITLayoutParameters = [{'Name':'url',
                           'Value':'https://forms.gle/XKVGkivEhe2JwuEW7'}],
    LifetimeInSeconds = 3600,
    #AutoApprovalDelayInSeconds = 3600
)

In [57]:
print("A new HIT has been created. You can preview it here:")
if create_hits_in_production:
    print("https://worker.mturk.com/mturk/preview?groupId=" + myhit['HIT']['HITGroupId'])
    
    
else:
    print("https://workersandbox.mturk.com/mturk/preview?groupId=" + myhit['HIT']['HITGroupId'])
print("HITID = " + myhit['HIT']['HITId'] + " (Use to Get Results)")

A new HIT has been created. You can preview it here:
https://workersandbox.mturk.com/mturk/preview?groupId=3L2A3M5C18OOB5OIYYD5GARR4LJQG1
HITID = 3ZFRE2BDQ9Z76JMNSZMCL7GCCBMZXL (Use to Get Results)


## Retrieving Results

In [58]:
import boto3
import xmltodict

In [79]:
aws_access_key_id, aws_secret_access_key = read_access_keys("../creds/aws.txt")
mturk = boto3.client('mturk',
   aws_access_key_id = aws_access_key_id,
   aws_secret_access_key = aws_secret_access_key,
   region_name='us-east-1',
   endpoint_url = mturk_environment['endpoint']
)
# Use the hit_id previously created
hit_id = myhit['HIT']['HITId']
# We are only publishing this task to one Worker
# So we will get back an array with one item if it has been completed
worker_results = mturk.list_assignments_for_hit(HITId=hit_id, AssignmentStatuses=['Submitted'])

display_answer(worker_results)

In [85]:
def answers_2_dataframe(worker_results):
    df = [{'WorkerID':assignment['WorkerId'],
           'HITId':assignment['HITId'],
           'Answer':xmltodict.parse(assignment['Answer'])} 
          for assignment in worker_results['Assignments']
           ]
    for answer in df:
        xml_doc = answer['Answer']
        
        if type(xml_doc['QuestionFormAnswers']['Answer']) is list:
            # Multiple fields in HIT layout
            for answer_field in xml_doc['QuestionFormAnswers']['Answer']:
                key = answer_field['QuestionIdentifier']
                answer_field['FreeText']
                answer[key] = value
        else:
            # One field found in HIT layout
            key = xml_doc['QuestionFormAnswers']['Answer']['QuestionIdentifier']
            value = xml_doc['QuestionFormAnswers']['Answer']['FreeText']
            answer[key] = value
        del answer['Answer']
    return pd.DataFrame(df)

In [86]:
answers_2_dataframe(worker_results)

Unnamed: 0,WorkerID,HITId,surveycode
0,A29C1XYH77RQYM,3ZFRE2BDQ9Z76JMNSZMCL7GCCBMZXL,EMOJI378910


# Dead Code

In [39]:
# Create your connection to MTurk
mtc = MTurkConnection(aws_access_key_id='',
                      aws_secret_access_key='',
                      host='mechanicalturk.sandboxdf.amazonaws.com')

In [25]:
url = LayoutParameter('url','https://forms.gle/XKVGkivEhe2JwuEW7')
params   = LayoutParameters([url])
response = mtc.create_hit(
  hit_layout    ="3QS25971A2UW6PKAU5WN9771C2JXNI",
  layout_params =params,
  hit_type      ="3QA1H4GB4RCFG6O9NDVBQ4OP98C3MW"
)

# The response included several fields that will be helpful later
hit_type_id = response[0].HITTypeId
hit_id = response[0].HITId
print("Your HIT has been created. You can see it at this link:")
print("https://workersandbox.mturk.com/mturk/preview?groupId={}".format(hit_type_id))
print("Your HIT ID is: {}".format(hit_id))

gaierror: [Errno -3] Temporary failure in name resolution