In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# real imports
import ipywidgets as widgets
from functools import partial
from pathlib import Path
import pandas as pd

from mt2gf.utils import read_access_keys
from mt2gf.gform import (get_gform_map,
                         get_batch_gform_map,
                         get_drive_service,
                         download_drive_txt,
                         download_multi_csv,
                         download_csv)
from mt2gf.preprocess import get_batch_indexes, create_batch_directories
from mt2gf.mturk import (create_mturk_client,
                         Turker, MTurkParam)
from mt2gf.widgets import ControlPanel
from mt2gf.watcher import Watcher

from src.constants import CREDS_PATH, REPO_PATH,WATCHER_FORMS_RESULTS_DIR,FORMS_RESULTS_DIR

## Parameters

<div class="alert alert-success">
Parameter for dataset gathering
</div>

In [5]:
# Set to False to use the sandbox
production = False

#################### File folder params ####################

repo_path = REPO_PATH
# directory where the credentials.json file is present and where the token.pk file will be stored
creds_dir = CREDS_PATH
results_dir = FORMS_RESULTS_DIR

# Whether to collect the data in batches
use_batch = True

#################### Mturk params ####################

aws_key_path = creds_dir.joinpath("aws.csv")

# link to the hit layout
if production:
    hit_layout = "3ACG29O6JDJKYOPH2ORTS52TR57YKA"
else:
    hit_layout = "3XJFTJAV8QARKRU4KW7Q2OQT6WM9R4"


#################### Gforms params ####################

gform_map_path = results_dir.joinpath("gform_map.txt")

#################### Monitor params ####################

watcher_forms_result_dir = WATCHER_FORMS_RESULTS_DIR

# Maximum number of forms a worker is allowed to work in our project
max_forms_per_worker = 30

# number of forms per batch
batch_size = 7

___

## Gform management

<div class="alert alert-success">
Adapt the following variables if you want a specific batch size/ batch number. If BATCH_NUMBER is set to <b>None</b>, the batch number is automatically computed. 
</div>

In [4]:
# file id to the gform mapping file
gform_map_id = "1QH7lTq_0mOhEmknMJwLJwZeZV1MSxf8ztN4AKtzrlTI"
# Retrieve the urls of the forms generated by the app script
service = get_drive_service(CREDS_PATH)

### Batch Implementation

In [6]:
# create the directories where to store the batch results
create_batch_directories(results_dir, 18)

# retrieve the indexes for the next batch to compute
batch_dir, batch_number, form_indexes = get_batch_indexes(parent_dir=results_dir,
                                                          batch_number=None,
                                                          batch_size=batch_size,
                                                          MaxAssignments=max_forms_per_worker)
print(f"Treating batch number {batch_number},forms:{form_indexes}")
target_dir = batch_dir
gform_map = get_batch_gform_map(
    service, gform_map_id, gform_map_path, form_indexes)


Treating batch number 0,forms:[0, 1, 2, 3, 4, 5, 6]
Download 100%


___

## Monitor

<div class="alert alert-success">
The monitor allow us to check which workers answer more than max_forms_per_worker forms and tag them such to prevent any further answering in order to guarantee a minimum diversity among the dataset
</div>

In [7]:
watcher = Watcher(form_results_dir=watcher_forms_result_dir,
                  gform_map=gform_map,
                  aws_key_path=aws_key_path,
                  drive_service=service,
                  max_forms_per_worker=max_forms_per_worker,
                  qualification_type_name="emojidone",
                  qualification_description="test qualification",
                  production=production)

In [8]:
# Qualification requirement the monitor uses to tag workers
QualificationRequirements = [watcher.get_qualif_requirement(),
                            {
                                'QualificationTypeId': '000000000000000000L0', #PercentAssignmentsApproved
                                'Comparator': 'GreaterThanOrEqualTo',
                                'IntegerValues':[99],
                                'ActionsGuarded': 'DiscoverPreviewAndAccept'
                            },
                            {
                                'QualificationTypeId': '00000000000000000071', #Location
                                'Comparator': 'EqualTo',
                                'LocaleValues': [
                                    {
                                        'Country': 'US',
                                    },
                                ],
                                'ActionsGuarded': 'DiscoverPreviewAndAccept'
                            },
                            {
                                'QualificationTypeId': '00000000000000000040', # Number of hits
                                'Comparator': 'GreaterThanOrEqualTo',
                                'IntegerValues':[500],
                                'ActionsGuarded': 'DiscoverPreviewAndAccept'
                            }]

___

## MTurk management

In [9]:
param = MTurkParam(
    production=production,
    MaxAssignments=30,
    LifetimeInDays=4,  # 10 min
    AutoApprovalDelayInDays=1,
    AssignmentDurationInSeconds=800,
    Reward='0.3',
    HITTitle="Emojis Descriptions n",
    Keywords='emojis, description, sentiment, emotions',
    Description='Describe emojis by a single accurate word',
    aws_key_path=aws_key_path,
    QualificationRequirements=QualificationRequirements,
    hit_layout=hit_layout
)

### Callbacks instanciation

In [13]:
turk = Turker(meta_dir=repo_path,
              param=param,
              gservice=service,
              gform_map=gform_map,
              formresdir=target_dir,
              frauder_callbacks=[],
              check_conf_code=False)

Estimated cost:27.00 $


___

## Control Panel

<div class="alert alert-success">
Control panel to handle HITs states and collect results. Cf documentation of Mturk2Gform for more information
</div>

In [14]:
control_panel = ControlPanel(turk=turk,
                             watcher=watcher
                             )

HBox(children=(Button(description='list hits', style=ButtonStyle()), Button(description='create hits', style=B…

HBox(children=(Button(description='list assignments', style=ButtonStyle()), Button(description='approve all', …

HBox(children=(Button(description='approve correct (dry)', style=ButtonStyle(button_color='lightgreen')), Butt…

HBox(children=(Button(description='stop all hits', style=ButtonStyle(button_color='orange')), Button(button_st…

HBox(children=(Button(button_style='info', description='start monitor', style=ButtonStyle()), Button(button_st…

HBox(children=(Button(button_style='primary', description='list tagged workers', style=ButtonStyle()), Button(…

Text(value='', placeholder='Results HITid/formidx')