# Data Prep for Crowd Annotation Pipeline

1. Collect raw data 
2. Adjust contrast of images
3. Chop up images into manageable pieces
4. Make into montages
5. Upload to Figure8

Files are named by these scripts such that the code blocks can run back-to-back with minimal input. For this reason, it is recommended that users run through the whole pipeline before processing another set of images.

In [3]:
# import statements
from __future__ import absolute_import


from io import BytesIO

from IPython.display import Image
from ipywidgets import interact, interactive, fixed
import matplotlib as mpl
from skimage import data, filters, io, img_as_uint
import numpy as np
import skimage as sk
import os
from scipy import ndimage
import scipy
import sys
from imageio import imread
import matplotlib.pyplot as plt

import scipy.ndimage as ndi

%matplotlib inline

from dcde.pre_annotation.montage_makers import montage_maker, multiple_montage_maker
from dcde.pre_annotation.overlapping_chopper import overlapping_crop_dir
from dcde.pre_annotation.aws_upload import aws_upload, upload
from dcde.pre_annotation.montage_to_csv import csv_maker
from dcde.pre_annotation.fig_eight_upload import fig_eight
from dcde.pre_annotation.contrast_adjustment import contrast
from dcde.utils.io_utils import get_img_names
#from dcde.utils.widget_utils import arr2img, choose_img, edit_image

In [5]:
#sometimes raw images are in .tif stacks, not individual .tif files
#optional code block for turning into individual slices

## 2. Adjust contrast of images
Before doing anything else, we need to adjust the contrast of the raw data. contrast_adjustment blurs the data using a gaussian filter, finds the edges, inverts, and does additional equalization if needed. The user defines the parameters needed using the widgets below.

In [4]:
# Define path to desired raw directory
base_dir = "/gnv_home/data/Valentine/"
raw_folder = "Valentine_Svensson_20x_images"
identifier = "Val_test"

dirpath = os.path.join(base_dir, raw_folder)

In [22]:
# Functions that the notebook will play with interactively to run widgets
def arr2img(arr):
    """Display a 2- or 3-d numpy array as an image."""
    if arr.ndim == 2:
        format, cmap = 'png', mpl.cm.gray
    elif arr.ndim == 3:
        format, cmap = 'jpg', None
    else:
        raise ValueError("Only 2- or 3-d arrays can be displayed as images.")
    # Don't let matplotlib autoscale the color range so we can control overall luminosity
    vmax = 255 if arr.dtype == 'uint8' else 1.0
    with BytesIO() as buffer:
        mpl.image.imsave(buffer, arr, format=format, cmap=cmap)
        out = buffer.getvalue()
    return Image(out)

def choose_img(name):
    """Used to choose which image we want to use for the widget tester"""
    global img
    filepath = os.path.join(dirpath, name)
    img = imread(filepath)
    return arr2img(img)

def edit_image(image, blur=1.0, sobel_toggle = True, sobel_factor = 100, invert_img = True, gamma_adjust = 1.0, equalize_hist=False, equalize_adapthist=False):
    """Used to edit the image using the widget tester"""
    global sigma
    global sobel_option
    global sobel
    global hist
    global adapthist
    global gamma
    global invert
    
    new_image = filters.gaussian(image, sigma=blur, multichannel=False)
    
    if sobel_toggle:
        new_image += sobel_factor *sk.filters.sobel(new_image)
    new_image = sk.exposure.adjust_gamma(new_image, gamma_adjust, gain = 1)
    if invert_img:
        new_image[:] = -1.0*new_image[:]
    new_image=sk.exposure.rescale_intensity(new_image, in_range = 'image', out_range = 'float')
    
    if(equalize_hist == True):
        #new_image=sk.exposure.rescale_intensity(new_image, in_range = 'image', out_range = 'np.uint16')
        new_image = sk.exposure.equalize_hist(new_image, nbins=256, mask=None)
        
    if(equalize_adapthist == True):
        new_image = sk.exposure.equalize_adapthist(new_image, kernel_size=None, clip_limit=0.01, nbins=256)
     
    new_image = sk.exposure.rescale_intensity(new_image, in_range = 'image', out_range = np.uint8)
    new_image = new_image.astype(np.uint8)
    
    hist = equalize_hist
    adapthist = equalize_adapthist
    sigma = blur
    gamma = gamma_adjust
    invert = invert_img
    sobel = sobel_factor
    sobel_option = sobel_toggle
    
    return arr2img(new_image)

In [6]:
# Choose which raw image you would like to use to test on the contrast adjustment
interact(choose_img, name=get_img_names(dirpath));

interactive(children=(Dropdown(description='name', options=('IMG_P1_A1_20x_1.tif', 'IMG_P1_A1_20x_2.tif', 'IMG…

In [23]:
# Test with choosen image to fix adjustment parameters
interact(edit_image, image=fixed(img), sigma=(0.0,4,0.3), gamma_adjust=(0.1,4,0.1), sobel_factor=(10,10000,100));

interactive(children=(FloatSlider(value=1.0, description='blur', max=3.0, min=-1.0), Checkbox(value=True, desc…

In [8]:
# With choosen parameters, process all the raw data in the folder
contrast(base_dir, raw_folder, identifier, sigma, hist, adapthist, gamma, sobel_option, sobel, invert)

Processed data will be located at /gnv_home/data/Valentine/Valentine_Svensson_20x_images_contrast_adjusted
Processing image 1 of 56


  .format(dtypeobj_in, dtypeobj_out))


Processing image 2 of 56
Processing image 3 of 56
Processing image 4 of 56
Processing image 5 of 56
Processing image 6 of 56
Processing image 7 of 56
Processing image 8 of 56
Processing image 9 of 56
Processing image 10 of 56
Processing image 11 of 56
Processing image 12 of 56
Processing image 13 of 56
Processing image 14 of 56
Processing image 15 of 56
Processing image 16 of 56
Processing image 17 of 56
Processing image 18 of 56
Processing image 19 of 56
Processing image 20 of 56
Processing image 21 of 56
Processing image 22 of 56
Processing image 23 of 56
Processing image 24 of 56
Processing image 25 of 56
Processing image 26 of 56
Processing image 27 of 56
Processing image 28 of 56
Processing image 29 of 56
Processing image 30 of 56
Processing image 31 of 56
Processing image 32 of 56
Processing image 33 of 56
Processing image 34 of 56
Processing image 35 of 56
Processing image 36 of 56
Processing image 37 of 56
Processing image 38 of 56
Processing image 39 of 56
Processing image 40 

## 3. Chop up images into manageable pieces

Each full-size image usually has many cells in it. This makes them difficult to fully annotate! For ease of annotation (and better results), each frame is chopped up into smaller, overlapping frames, ultimately creating a set of movies. 

These smaller movies can be made with overlapping edges, making it easier to stitch annotations together into one large annotated movie (in the post-annotation pipeline). A large overlap will result in redundant annotations.

Even if you want to process the full-sized image, run the chopper with num_segments of 1. The montage makers are written to run on the output of the chopper.

In [8]:
# base_direc = "/home/geneva/Desktop/Nb_testing/"
# raw_direc = os.path.join(base_direc, "MouseBrain_s7_nuclear")
# identifier = "MouseBrain_s7_nuc"

num_x_segments = 5
num_y_segments = 5
overlap_perc = 10

In [9]:
raw_direc = "/gnv_home/contrast_test/pics181220/raw_contrast_adjusted"
overlapping_crop_dir(raw_direc, identifier, num_x_segments, num_y_segments, overlap_perc)

Current Image Size:  (567, 1171)
Correct? (y/n): y
Your new images will be  280  pixels by  135  pixels big.
Processing...
Cropped files saved to /gnv_home/contrast_test/pics181220/raw_contrast_adjusted_chopped_5_5


## 4. Make into montages
multiple_montage_maker is written to run on the output of the chopper, ie the folder where each chopped movie folder is saved. It will make montages of each subfolder according to the variables specified. It will make more than one montage per subfolder if there are enough frames to do so.

The variables used in multiple_montage_maker are saved in a JSON file so they can be reused in post-annotation processing.

In [13]:
montage_len = 10

direc = raw_direc + "_chopped_" + str(num_x_segments) + "_" + str(num_y_segments)
#direc = "/home/geneva/Desktop/Nb_testing/nuclear_test_chopped_4_4"

save_direc = os.path.join(base_dir, identifier + "_montages_" + str(num_x_segments) + "_" + str(num_y_segments))
#save_direc = "/home/geneva/Desktop/Nb_testing/montages"

log_direc = os.path.join(base_dir, "json_logs")

row_length = 5
x_buffer = 5
y_buffer = 5

In [14]:
multiple_montage_maker(montage_len, direc, save_direc, identifier, 
                       num_x_segments, num_y_segments, row_length, x_buffer, y_buffer, log_direc)

Now montaging images from: test_x_00_y_00
You will be able to make 1 montages from this movie.
The last 1 frames will not be used in a montage. 



ValueError: could not broadcast input array from shape (122,313) into shape (135,280)

## 5. Upload to Figure Eight
Now that the images are processed into montages, they need to be uploaded to an AWS bucket and submitted to Figure Eight. This involves uploading the files to AWS, making a CSV file with the links to the uploaded images, and using that CSV file to create a Figure Eight job.

### Upload files to AWS
aws_upload will look for image files in the specified directory (folder_to_upload, set by default to be wherever the output of multiple_montage_maker was saved) and upload them into a bucket.

For the Van Valen lab, the default bucket is "figure-eight-deepcell" and keys (aws_folder + file names) correspond to the file structure of our data server.

aws_upload returns a list of the urls to which images were uploaded.

In [None]:
#import os

bucket_name = "figure-eight-deepcell" #default
aws_folder = "MouseBrain/set7"
folder_to_upload = save_direc #usually .../montages
#data_to_upload = "/home/geneva/Desktop/Nb_testing/montages/"

uploaded_montages = aws_upload(bucket_name, aws_folder, folder_to_upload)

#os.path.join("https://s3.us-east-2.amazonaws.com", bucket_name, aws_folder)
#print(uploaded_montages)
#from io_utils import get_img_names
#imgs_to_upload = get_img_names(folder_to_upload)
#for index, img in enumerate(imgs_to_upload):
#    print(img)
#    print(os.path.join(folder_to_upload, img))

### Make CSV file
Figure Eight jobs can be created easily by using a CSV file where each row contains information about one task. For our jobs, each row has the link to the location of one montage, and information about that montage (currently, just the "identifier" specified at the beginning of the pipeline). The CSV file is saved as "identifier".csv in a folder that only holds CSVs. CSV folders are usually in cell-type directories, so identifiers should be able to distinguish between sets, parts, etc.

In [None]:
#identifier = "test"
csv_direc = os.path.join(base_direc, "CSV")

In [None]:
csv_maker(uploaded_montages, identifier, csv_direc)

### Create Figure Eight job
The Figure Eight API allows us to create a new job and upload data to it from this notebook. However, since our jobs don't include required test questions, editing job information such as the title of the job must be done via the website. This section of the notebook uses the API to create a job and upload data to it, then reminds the user to finish editing the job on the website.

Some sample job IDs to copy are provided below.

In [None]:
#job_id_to_copy = 1344258 #Elowitz timelapse RFP pilot
job_id_to_copy = 1346216 #Deepcell MouseBrain 3x5
#job_id_to_copy = 1306431 #Deepcell overlapping Mibi
#job_id_to_copy = 1292179 #Deepcell HEK
#job_id_to_copy =

In [None]:
from dcde.pre_annotation.fig_eight_upload import fig_eight

fig_eight(csv_direc, identifier, job_id_to_copy)