In [2]:
import generate_codecharts
import os
import string
import random
import json 
import matplotlib.pyplot as plt
import numpy as np
import base64 
import glob

In [None]:
sourcedir = '../demo_experiment_images/' # replace this with your own images
tutorial_source_dir = 'tutorial_images' # where tutorial images are stored

In [3]:
# PARAMETERS for generating subject files
num_subject_files = 3    # number of participant files to generate (= # of assignments that will be put up)    
num_images_per_sf = 35   # number of images each participant will see (in this case, = # images in image_dir)
num_imgs_per_tutorial = 4
num_sentinels_per_tutorial = 2
num_sentinels_per_sf = 5 # excluding the tutorial
add_sentinels_to_tutorial = True

ncodecharts = num_subject_files*num_images_per_sf # can be changed
sentinel_images_per_bucket = num_subject_files*num_sentinels_per_sf # can be changed

# params for generating sentinels
target_type = "img" # one of fix_cross, red_dot, or img
target_imdir = "sentinel_target_images"

# set these parameters
num_buckets = 1
start_bucket_at = 0
which_buckets = [0]  # can make this a list of specific buckets e.g., [4,5,6]

rootdir = '../assets/task_data'
if not os.path.exists(rootdir):
    print('Creating directory %s'%(rootdir))
    os.makedirs(rootdir)

real_image_dir = os.path.join(rootdir,'real_images')
real_CC_dir = os.path.join(rootdir,'real_CC')
sentinel_image_dir = os.path.join(rootdir,'sentinel_images')
sentinel_CC_dir = os.path.join(rootdir,'sentinel_CC')
sentinel_targetim_dir = os.path.join(rootdir, 'sentinel_target')

In [5]:
import create_padded_image_dir

all_image_dir = os.path.join(rootdir,'all_images')

if not os.path.exists(all_image_dir):
    print('Creating directory %s'%(all_image_dir))
    os.makedirs(all_image_dir)
    
allfiles = []
for ext in ('*.jpeg', '*.png', '*.jpg'):
    allfiles.extend(glob.glob(os.path.join(sourcedir, ext)))
print("len allfiles", len(allfiles))
    
image_width,image_height = create_padded_image_dir.save_padded_images(all_image_dir,allfiles)

Creating directory ./task_data/all_images
len allfiles 51
Image widths: dict_keys([640])
Image heights: dict_keys([480])
Padding 51 image files to dimensions: [1786,1340]...
Done!


In [6]:
from generate_central_fixation_cross import save_fixation_cross

save_fixation_cross(rootdir,image_width,image_height);

using font size: 37


In [7]:
from distribute_image_files_by_buckets import distribute_images

distribute_images(all_image_dir,real_image_dir,num_buckets,start_bucket_at)

In [8]:
from create_codecharts_dir import create_codecharts

create_codecharts(real_CC_dir,ncodecharts,image_width,image_height)

0/2000
100/2000
200/2000
300/2000
400/2000
500/2000
600/2000
700/2000
800/2000
900/2000
1000/2000
1100/2000
1200/2000
1300/2000
1400/2000
1500/2000
1600/2000
1700/2000
1800/2000
1900/2000


In [9]:
import generate_sentinels

border_padding = 100 # used to guarantee that chosen sentinel location is not too close to border to be hard to spot

generate_sentinels.generate_sentinels(sentinel_image_dir,sentinel_CC_dir,num_buckets,start_bucket_at,sentinel_images_per_bucket,\
                       image_width,image_height,border_padding,target_type, target_imdir)

In [10]:
from generate_tutorials import generate_tutorials

# inherit border_padding and fixcross styles from above cell
border_padding = 100

tutorial_image_dir = os.path.join(rootdir,'tutorial_images') # where processed tutorial images will be saved
if not os.path.exists(tutorial_image_dir):
    print('Creating directory %s'%(tutorial_image_dir))
    os.makedirs(tutorial_image_dir)
    
allfiles = []
for ext in ('*.jpeg', '*.png', '*.jpg'):
    allfiles.extend(glob.glob(os.path.join(tutorial_source_dir, ext)))
print("len allfiles", len(allfiles))

create_padded_image_dir.save_padded_images(tutorial_image_dir,allfiles,toplot=False,maxwidth=image_width,maxheight=image_height)

# TODO: or pick a random set of images to serve as tutorial images
N = 6 # number of images to use for tutorials (these will be sampled from to generate subject files below)
      # note: make this larger than num_imgs_per_tutorial so not all subject files have the same tutorials
    
N_sent = 6 # number of sentinels to use for tutorials 
# note: if equal to num_sentinels_per_tutorial, all subject files will have the same tutorial sentinels

generate_tutorials(tutorial_image_dir,rootdir,image_width,image_height,border_padding,N,target_type,target_imdir,N_sent)


Creating directory ./task_data/tutorial_images
len allfiles 9
Padding 9 image files to dimensions: [1786,1340]...
Done!


In [11]:
start_subjects_at = 0     # where to start naming subject files at (if had already created files)
if os.path.exists(os.path.join(rootdir,'subject_files/bucket0')):
    subjfiles = glob.glob(os.path.join(rootdir,'subject_files/bucket0/*.json'))
    start_subjects_at = len(subjfiles)

#real_codecharts = os.listdir(real_CC_dir)
real_codecharts = glob.glob(os.path.join(real_CC_dir,'*.jpg'))
sentinel_codecharts = glob.glob(os.path.join(sentinel_CC_dir,'*.jpg'))

with open(os.path.join(real_CC_dir,'CC_codes_full.json')) as f:
    real_codes_data = json.load(f) # contains mapping of image path to valid codes

## GENERATING SUBJECT FILES 
subjdir = os.path.join(rootdir,'subject_files')
if not os.path.exists(subjdir):
    os.makedirs(subjdir)
    os.makedirs(os.path.join(rootdir,'full_subject_files'))
    
with open(os.path.join(rootdir,'tutorial_full.json')) as f:
    tutorial_data = json.load(f) 
    
tutorial_real_filenames = [fn for fn in tutorial_data.keys() if tutorial_data[fn]['flag']=='tutorial_real']
tutorial_sentinel_filenames = [fn for fn in tutorial_data.keys() if tutorial_data[fn]['flag']=='tutorial_sentinel']
    
# iterate over all buckets 
for b in range(len(which_buckets)): 
    
    bucket = 'bucket%d'%(which_buckets[b])
    img_bucket_dir = os.path.join(real_image_dir,bucket)
    #img_files = os.listdir(img_bucket_dir)
    #img_files = glob.glob(os.path.join(img_bucket_dir,'*.jpg'))
    img_files = []
    for ext in ('*.jpeg', '*.png', '*.jpg'):
        img_files.extend(glob.glob(os.path.join(img_bucket_dir, ext)))
            
    sentinel_bucket_dir = os.path.join(sentinel_image_dir,bucket)
    #sentinel_files = os.listdir(sentinel_bucket_dir)
    sentinel_files = glob.glob(os.path.join(sentinel_bucket_dir,'*.jpg'))
    
    with open(os.path.join(sentinel_bucket_dir,'sentinel_codes_full.json')) as f:
        sentinel_codes_data = json.load(f) # contains mapping of image path to valid codes
        
    subjdir = os.path.join(rootdir,'subject_files',bucket)
    if not os.path.exists(subjdir):
        os.makedirs(subjdir)
        os.makedirs(os.path.join(rootdir,'full_subject_files',bucket))
    
    # for each bucket, generate subject files 
    for i in range(num_subject_files):
        
        random.shuffle(img_files)
        random.shuffle(sentinel_files)
        random.shuffle(real_codecharts)
        
        # for each subject files, add real images 
        sf_data = []
        full_sf_data = []

        # ADDING TUTORIALS
        random.shuffle(tutorial_real_filenames)
        random.shuffle(tutorial_sentinel_filenames)
        
        # initialize temporary arrays, because will shuffle real & sentinel tutorial images before adding to
        # final subject files
        sf_data_temp = []
        full_sf_data_temp = []
        
        for j in range(num_imgs_per_tutorial):
            
            image_data = {}
            fn = tutorial_real_filenames[j]
            image_data["image"] = fn
            image_data["codechart"] = tutorial_data[fn]['codechart_file'] # stores codechart path 
            image_data["codes"] = tutorial_data[fn]['valid_codes'] # stores valid codes 
            image_data["flag"] = 'tutorial_real' # stores flag of whether we have real or sentinel image
            full_image_data = image_data.copy() # identical to image_data but includes a key for coordinates
            full_image_data["coordinates"] = tutorial_data[fn]['coordinates'] # store (x, y) coordinate of each triplet 
            
            sf_data_temp.append(image_data)
            full_sf_data_temp.append(full_image_data)
        
        if add_sentinels_to_tutorial and num_sentinels_per_tutorial>0:
            
            for j in range(num_sentinels_per_tutorial):
                image_data2 = {}
                fn = tutorial_sentinel_filenames[j]
                image_data2["image"] = fn
                image_data2["codechart"] = tutorial_data[fn]['codechart_file'] # stores codechart path 
                image_data2["correct_code"] = tutorial_data[fn]['correct_code']
                image_data2["codes"] = tutorial_data[fn]['valid_codes'] # stores valid codes 
                image_data2["flag"] = 'tutorial_sentinel' # stores flag of whether we have real or sentinel image
                full_image_data2 = image_data2.copy() # identical to image_data but includes a key for coordinates
                full_image_data2["coordinate"] = tutorial_data[fn]['coordinate'] # stores coordinate for correct code
                full_image_data2["codes"] = tutorial_data[fn]['valid_codes'] # stores valid codes 
                full_image_data2["coordinates"] = tutorial_data[fn]['coordinates'] # store (x, y) coordinate of each triplet 
                
                sf_data_temp.append(image_data2)
                full_sf_data_temp.append(full_image_data2)
                
        # up to here, have sequentially added real images and then sentinel images to tutorial
        # now want to shuffle them
                
        perm = np.random.permutation(len(sf_data_temp))
        for j in range(len(perm)): # note need to make sure sf_data and full_sf_data correspond
            sf_data.append(sf_data_temp[perm[j]])
            full_sf_data.append(full_sf_data_temp[perm[j]])
        
        # ADDING REAL IMAGES 
        for j in range(num_images_per_sf): 
            image_data = {}
            image_data["image"] = img_files[j] # stores image path 

            # select a code chart
            pathname = real_codecharts[j] # since shuffled, will pick up first set of random codecharts
            
            image_data["codechart"] = pathname # stores codechart path 
            image_data["codes"] = real_codes_data[pathname]['valid_codes'] # stores valid codes 
            image_data["flag"] = 'real' # stores flag of whether we have real or sentinel image
            
            full_image_data = image_data.copy() # identical to image_data but includes a key for coordinates
            full_image_data["coordinates"] = real_codes_data[pathname]['coordinates'] # store locations - (x, y) coordinate of each triplet 

            sf_data.append(image_data)
            full_sf_data.append(full_image_data)

        ## ADDING SENTINEL IMAGES 
        sentinel_spacing = int(num_images_per_sf/float(num_sentinels_per_sf))
        insertat = num_imgs_per_tutorial+num_sentinels_per_tutorial + 1; # don't insert before all the tutorial images are done
        for j in range(num_sentinels_per_sf):
            sentinel_image_data = {}
            sentinel_pathname = sentinel_files[j]
            sentinel_image_data["image"] = sentinel_pathname # stores image path 
            sentinel_image_data["codechart"] = sentinel_codes_data[sentinel_pathname]['codechart_file']
            sentinel_image_data["correct_code"] = sentinel_codes_data[sentinel_pathname]['correct_code']
            sentinel_image_data["codes"] = sentinel_codes_data[sentinel_pathname]["valid_codes"]
            sentinel_image_data["flag"] = 'sentinel' # stores flag of whether we have real or sentinel image
            
            # for analysis, save other attributes too
            full_sentinel_image_data = sentinel_image_data.copy() # identical to sentinel_image_data but includes coordinate key 
            full_sentinel_image_data["coordinate"] = sentinel_codes_data[sentinel_pathname]["coordinate"] # stores the coordinate of the correct code 
            full_sentinel_image_data["codes"] = sentinel_codes_data[sentinel_pathname]["valid_codes"] # stores other valid codes
            full_sentinel_image_data["coordinates"] = sentinel_codes_data[sentinel_pathname]["coordinates"] # stores the coordinate of the valid code 
            
            insertat = insertat + random.choice(range(sentinel_spacing-1,sentinel_spacing+2))
            insertat = min(insertat,len(sf_data)-1)

            sf_data.insert(insertat, sentinel_image_data)
            full_sf_data.insert(insertat, full_sentinel_image_data)

        # Add an image_id to each subject file entry
        image_id = 0 # represents the index of the image in the subject file 
        for d in range(len(sf_data)): 
            sf_data[d]['index'] = image_id
            full_sf_data[d]['index'] = image_id
            image_id+=1

        subj_num = start_subjects_at+i
        with open(os.path.join(rootdir,'subject_files',bucket,'subject_file_%d.json'%(subj_num)), 'w') as outfile: 
            print(outfile.name)
            json.dump(sf_data, outfile)
        with open(os.path.join(rootdir,'full_subject_files',bucket,'subject_file_%d.json'%(subj_num)), 'w') as outfile: 
            json.dump(full_sf_data, outfile)

./task_data/subject_files/bucket0/subject_file_0.json
./task_data/subject_files/bucket0/subject_file_1.json
./task_data/subject_files/bucket0/subject_file_2.json
./task_data/subject_files/bucket0/subject_file_3.json
./task_data/subject_files/bucket0/subject_file_4.json
./task_data/subject_files/bucket0/subject_file_5.json
./task_data/subject_files/bucket0/subject_file_6.json
./task_data/subject_files/bucket0/subject_file_7.json
./task_data/subject_files/bucket0/subject_file_8.json
./task_data/subject_files/bucket0/subject_file_9.json
./task_data/subject_files/bucket0/subject_file_10.json
./task_data/subject_files/bucket0/subject_file_11.json
./task_data/subject_files/bucket0/subject_file_12.json
./task_data/subject_files/bucket0/subject_file_13.json
./task_data/subject_files/bucket0/subject_file_14.json
./task_data/subject_files/bucket0/subject_file_15.json
./task_data/subject_files/bucket0/subject_file_16.json
./task_data/subject_files/bucket0/subject_file_17.json
./task_data/subject_

./task_data/subject_files/bucket0/subject_file_151.json
./task_data/subject_files/bucket0/subject_file_152.json
./task_data/subject_files/bucket0/subject_file_153.json
./task_data/subject_files/bucket0/subject_file_154.json
./task_data/subject_files/bucket0/subject_file_155.json
./task_data/subject_files/bucket0/subject_file_156.json
./task_data/subject_files/bucket0/subject_file_157.json
./task_data/subject_files/bucket0/subject_file_158.json
./task_data/subject_files/bucket0/subject_file_159.json
./task_data/subject_files/bucket0/subject_file_160.json
./task_data/subject_files/bucket0/subject_file_161.json
./task_data/subject_files/bucket0/subject_file_162.json
./task_data/subject_files/bucket0/subject_file_163.json
./task_data/subject_files/bucket0/subject_file_164.json
./task_data/subject_files/bucket0/subject_file_165.json
./task_data/subject_files/bucket0/subject_file_166.json
./task_data/subject_files/bucket0/subject_file_167.json
./task_data/subject_files/bucket0/subject_file_1