# Create hybridization.json from a directory of image files

This notebook creates a starfish json file from a directory of files and shows how to load the resulting data using starfish. 

In [1]:
import os
import re
import json
import numpy as np
import glob
from skimage.io import imread, imsave
from collections import Counter, OrderedDict
from typing import Mapping, Dict, List, Generator, Tuple

from starfish.constants import Indices
import hashlib
from itertools import product

  return f(*args, **kwds)


In [2]:
def file_hash(filename: str) -> str:
    """return sha256 hash for file"""
    h = hashlib.sha256()
    with open(filename, 'rb', buffering=0) as f:
        for b in iter(lambda : f.read(128*1024), b''):
            h.update(b)
    return h.hexdigest()

In [3]:
# simple experiment metadata object
experiment_metadata = {
    "version": "0.0.0",
    "hybridization_images": "hybridization.json",
    "auxiliary_images": {
        "nuclei": "nuclei.json"
    }
}

## Write a few methods to generate JSON data

The actual execution block is in the next section

In [93]:
def exFISH_files_to_indices(glob_pattern: str, regex: str) \
        -> Generator[Tuple[str, Dict[Indices, int]], None, None]:
    """yield metadata parsed from the read name of a globbed directory
    
    Parameters
    ----------
    glob_pattern : str
    
    Notes
    -----
    - For exFISH, imaging round is not used, so it is just always 1. 
    
    Yields
    ------
    Tuple[str, Dict[Indices, int]] : 
        tuple of filename and a dictionary that contains the tile metadata (channel, imaging round, and z-plane)
    """
    files = glob.glob(glob_pattern)
        
    for f in files:
        dir, basename = os.path.split(f)
        raw_z, raw_channel = re.match(regex, basename).groups()
        z = int(raw_z) - 1
        channel = int(raw_channel) - 1
        yield f, {Indices.CH: channel, Indices.ROUND: 0, Indices.Z: z}

        
def create_imaging_json(
    files_to_indices_map: Dict[str, Dict[Indices, int]], default_tile_format='TIFF') \
        -> dict:
    """Creates a imaging json file that specifies how the TIFF files construct a 5-d image tensor
    Parameters
    ----------
    files_to_indices_map : Generator[Tuple[str, Dict[Indices, int]], None, None]
        map of file names, to the indices for that file
    
    Returns
    -------
    dict : 
        imaging json file in starfish v0.0.0 format
        
    """
    tiles = []
    default_tile_shape = None
    
    for file_name, tile_indices in files_to_indices_map:
        if default_tile_shape is None:
            default_tile_shape = list(imread(file_name).shape)
        hash_ = file_hash(file_name)
        tiles.append(
            {
                "coordinates": {
                    "x": [0, 0.0001],
                    "y": [0, 0.0001],
                    "z": [0, 0.0001],
                },
                "indices": {k.value: v for (k, v) in tile_indices.items()},
                "file": os.path.basename(file_name),
                "sha256": hash_
            }
        )
    
    # get tile shape
    rounds = 1 + max(t["indices"][Indices.ROUND] for t in tiles)
    channels = 1 + max(t["indices"][Indices.CH] for t in tiles)
    z_planes = 1 + max(t["indices"][Indices.Z] for t in tiles)

    return {
        "version": "0.0.0",
        "dimensions": ["x", "y"] + list(k.value for k in tile_indices.keys()),
        "default_tile_shape": default_tile_shape,
        "default_tile_format": default_tile_format,
        "shape": {
            f"{Indices.ROUND.value}": rounds,
            f"{Indices.CH.value}": channels,
            f"{Indices.Z.value}": z_planes
        },
        "tiles": tiles
    }

# faked up codebook
exFISH_codebook = [
    {
        "codeword": {'c': 0, 'h': 0, 'v': 1}, 
        "target": "gene_1"
    },
]

# Create the starfish specification files

In [95]:
regex = re.compile(r'40x_ROI1_YFP_488_ActB_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/YFP_brain_slice/fov_001/40x_ROI1_YFP_488_ActB")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [96]:
regex = re.compile(r'40x_ROI2_YFP_488_ActB_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/YFP_brain_slice/fov_002/40x_ROI2_YFP_488_ActB")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [97]:
regex = re.compile(r'40x_A1_TFRC_1_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/cultured_cells/041216_After_006/fov_001/40x_A1_TFRC_1_z")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [98]:
regex = re.compile(r'40x_A2_ActB_3_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/cultured_cells/041216_After_006/fov_002/40x_A2_ActB_3_z")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [99]:
regex = re.compile(r'40x_A3_Top2A_5_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/cultured_cells/041216_After_006/fov_003/40x_A3_Top2A_5_z")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [100]:
regex = re.compile(r'60x_A2_After_EEF2_TxR_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/cultured_cells/040216_After_006/fov_001/60x_A2_After_EEF2_TxR_z")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

In [101]:
regex = re.compile(r'60x_A2_After_UBC_647_z([0-9]{3})_c([0-9]{3}).tif')

DIRECTORY = os.path.expanduser("~/google_drive/starfish/data/exFISH/cultured_cells/040216_After_006/fov_002/60x_A2_After_UBC_647_z")

# generate maps of filenames to tile metadata
nuclei_indices = exFISH_files_to_indices(DIRECTORY + '*c002*.tif', regex)
gene_indices = exFISH_files_to_indices(DIRECTORY + '*c001*.tif', regex)

# create the hybridization json
hyb_json = create_imaging_json(gene_indices)

# ... and the nuclei json
nuclei_json = create_imaging_json(nuclei_indices)

# write everything to disk
with open(os.path.join(os.path.dirname(DIRECTORY), 'hybridization.json'), 'w') as f:
    json.dump(hyb_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'nuclei.json'), 'w') as f:
    json.dump(nuclei_json, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'experiment.json'), 'w') as f:
    json.dump(experiment_metadata, f)
with open(os.path.join(os.path.dirname(DIRECTORY), 'codebook.json'), 'w') as f:
    json.dump(exFISH_codebook, f)

Now, we can read the data into starfish, and below we display the 1st channel (CY3)

In [102]:
from starfish.io import Stack
experiment_json = os.path.join(os.path.dirname(DIRECTORY), 'experiment.json')
s = Stack.from_experiment_json(experiment_json)

In [104]:
import warnings
with warnings.catch_warnings():
    warnings.simplefilter('ignore', FutureWarning)
    s.image.show_stack({Indices.CH: 0}, rescale=True)

Rescaling ...
