# Analysis of DNA-MERFISH for CTP11

by Pu Zheng

2022.06.15

analysis for dataset:

dna_data_folder = r'\\10.245.74.158\Chromatin_NAS_5\20220419-P_brain_CTP11-13_from_0415storm65'

rna_data_folder = r'\\10.245.74.158\Chromatin_NAS_0\20220415-P_brain_M1_noclear_storm65'

save_folder = r'\\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65'

In [8]:
%run "..\..\Startup_py3.py"
sys.path.append(r"..\..\..\..\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

import h5py
from ImageAnalysis3.classes import _allowed_kwds
import ast

33800


## 0.1 Folders

In [9]:
save_folder = r'\\10.245.74.158\Chromatin_NAS_8\Analyzed_data\20220419-CTP11-13_from_0415storm65'
save_filenames = [os.path.join(save_folder, _fl) for _fl in os.listdir(save_folder)
                  if _fl.split(os.extsep)[-1]=='hdf5']
# extract fov_id
save_fov_ids = [int(os.path.basename(_fl).split('.hdf5')[0].split('_')[-1]) for _fl in save_filenames]

debug = False

print(f"{len(save_filenames)} fovs detected")

segmentation_folder = os.path.join(save_folder, 'Segmentation')
if not os.path.exists(segmentation_folder):
    os.makedirs(segmentation_folder)
    print(f"Creating segmentation_folder: {segmentation_folder}")
else:
    print(f"Use segmentation_folder: {segmentation_folder}")

cand_spot_folder = os.path.join(save_folder, 'CandSpots')
if not os.path.exists(cand_spot_folder):
    os.makedirs(cand_spot_folder)
    print(f"Creating cand_spot_folder: {cand_spot_folder}")
else:
    print(f"Use cand_spot_folder: {cand_spot_folder}")

decoder_folder = cand_spot_folder.replace('CandSpots', 'Decoder')
if debug:
    _version = 0
    while os.path.exists(os.path.join(decoder_folder, f'v{_version}')):
        _version += 1
    decoder_folder = os.path.join(decoder_folder, f'v{_version}')
if not os.path.exists(decoder_folder):
    os.makedirs(decoder_folder)
    print(f"Creating decoder_folder: {decoder_folder}")
else:
    print(f"Use decoder_folder: {decoder_folder}")

217 fovs detected
Use segmentation_folder: \\10.245.74.158\Chromatin_NAS_8\Analyzed_data\20220419-CTP11-13_from_0415storm65\Segmentation
Use cand_spot_folder: \\10.245.74.158\Chromatin_NAS_8\Analyzed_data\20220419-CTP11-13_from_0415storm65\CandSpots
Use decoder_folder: \\10.245.74.158\Chromatin_NAS_8\Analyzed_data\20220419-CTP11-13_from_0415storm65\Decoder


In [10]:
pixel_sizes = np.array([250,108,108])
single_im_size = np.array([50,2048,2048])


# 1. Partition DNA-MERFISH spots

In [11]:
from ImageAnalysis3.classes.partition_spots import Spots_Partition
from ImageAnalysis3.classes.preprocess import Spots3D
from ImageAnalysis3.figure_tools import plot_partition
import pandas as pd

In [12]:
from ImageAnalysis3.segmentation_tools.cell import Align_Segmentation
import ImageAnalysis3.io_tools.spots 

In [13]:
search_radius = 3
overwrite_cand_spots = False
add_relabel_spots = False
data_type = 'combo' # 'combo'
#_total_seg_save_file defined in 2.3
if not os.path.exists(os.path.join(cand_spot_folder, data_type)):
    print(os.path.join(cand_spot_folder, data_type))
    os.makedirs(os.path.join(cand_spot_folder, data_type))
# load microscope.json
dna_microscope_file = r'\\mendel\pu_documents\Merfish_analysis\Merfish_Analysis_Scripts\merlin_parameters\microscope\storm6_microscope.json'

In [18]:
with h5py.File(_segmentation_savefile, 'r') as _f:
    _mask = _f['216']['dna_mask'][:]

In [14]:
%%time
_partition_args = []

for _fov_id, _save_filename in zip(save_fov_ids, save_filenames):

    _segmentation_savefile = os.path.join(segmentation_folder, 
        os.path.basename(_save_filename).replace('.hdf5', '_Segmentation.hdf5') )
    # savename
    _cand_spot_filename = os.path.join(cand_spot_folder, data_type, 
                                      os.path.basename(_save_filename).replace('.hdf5', f'_CandSpots.csv') )
    if os.path.isfile(_cand_spot_filename) and not overwrite_cand_spots:
        continue
    # load segmentation label matrix and uids
    _align_seg = Align_Segmentation('', '', _save_filename, '', dna_microscope_file, np.array([]))
    _align_seg._load(_segmentation_savefile)
    seg_label, fovcell_2_uid = _align_seg.dna_mask, _align_seg.fovcell_2_uid
    # load spots
    spots_list, combo_bits = ia.io_tools.spots.load_preprocess_spots(
        _save_filename, data_type, None, pixel_sizes=pixel_sizes,
    )
    # add relabeled spots if specified.
    if add_relabel_spots:
        relabel_spots_list, relabel_bits = ia.io_tools.spots.load_preprocess_spots(
            _save_filename, f'relabeled_{data_type}', None, pixel_sizes=pixel_sizes,
        )
        for _rspots, _rbit in zip(relabel_spots_list, relabel_bits):
            _ind = list(combo_bits).index(_rbit)
            _old_spots = spots_list[_ind]
            # merge
            _merged_spots = ia.io_tools.spots.merge_RelabelSpots(
                _old_spots, _rspots, pixel_sizes=pixel_sizes,
            )
            # replace
            spots_list[_ind] = _merged_spots
    # 
    _all_spots = ia.io_tools.spots.merge_Spots3DList(spots_list, pixel_sizes=pixel_sizes)
    # partition args
    _args = (
        _fov_id, 
        np.array(_all_spots), _all_spots.bits, _all_spots.channels,
        seg_label, fovcell_2_uid,
        dna_microscope_file,
        search_radius, pixel_sizes,
        True, 
        True, _cand_spot_filename,
        True, 60, 
        False, True,
    )
    _partition_args.append(_args)
    
print(len(_partition_args))

0
Wall time: 9.52 s


## 1.1 parallel process

In [9]:
%%time
import multiprocessing as mp
# Multiprocessing
print(f"- Start multiprocessing spot partitioning", end=' ')
_start_time = time.time()
with mp.Pool(12) as _partition_pool:
    # start multiprocessing
    _partition_pool.starmap(ia.classes.partition_spots.batch_partition_DNA_spots, 
                            _partition_args, chunksize=1)
    # close multiprocessing
    _partition_pool.close()
    _partition_pool.join()
    _partition_pool.terminate()
print(f"finish in {time.time()-_start_time:.3f}s. ")

- Start multiprocessing spot partitioning finish in 3462.256s. 
Wall time: 57min 42s


### 1.1.2 sequential version

In [20]:
# sequential
for _args in _partition_args:
    ia.classes.partition_spots.batch_partition_DNA_spots(*_args)
    break

- Partition cand_spots for fov:0
-- partition barcodes for 1315719 spots


1315719it [01:14, 17701.57it/s]


- Save 1304959 spots to file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots\Conv_zscan_000_CandSpots.csv


<IPython.core.display.Javascript object>

-- save iamage to file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots\Figures\Fov-0_SpotCountPerCell.png
-- finish partition in 196.523s. 


## Partition sequential genes

In [10]:
search_radius = 3
overwrite_cand_spots = False
add_relabel_spots = False
data_type = 'unique'
expect_spot_num = 4

#_total_seg_save_file defined in 2.3
if not os.path.exists(os.path.join(cand_spot_folder,data_type)):
    print(os.path.join(cand_spot_folder,data_type))
    os.makedirs(os.path.join(cand_spot_folder,data_type))
# load microscope.json
dna_microscope_file = r'\\mendel\pu_documents\Merfish_analysis\Merfish_Analysis_Scripts\merlin_parameters\microscope\storm6_microscope.json'

\\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots\unique


In [11]:
%%time
_partition_args = []

for _fov_id, _save_filename in zip(save_fov_ids, save_filenames):

    _segmentation_savefile = os.path.join(segmentation_folder, 
        os.path.basename(_save_filename).replace('.hdf5', '_Segmentation.hdf5') )
    # savename
    _cand_spot_filename = os.path.join(cand_spot_folder, data_type, 
                                      os.path.basename(_save_filename).replace('.hdf5', f'_CandSpots.csv') )
    if os.path.isfile(_cand_spot_filename) and not overwrite_cand_spots:
        continue
    # load segmentation label matrix and uids
    _align_seg = Align_Segmentation('', '', _save_filename, '', dna_microscope_file, np.array([]))
    _align_seg._load(_segmentation_savefile)
    seg_label, fovcell_2_uid = _align_seg.dna_mask, _align_seg.fovcell_2_uid
    # load spots
    spots_list, combo_bits = ia.io_tools.spots.load_preprocess_spots(
        _save_filename, data_type, None, pixel_sizes=pixel_sizes,
    )
    # add relabeled spots if specified.
    if add_relabel_spots:
        relabel_spots_list, relabel_bits = ia.io_tools.spots.load_preprocess_spots(
            _save_filename, f'relabeled_{data_type}', None, pixel_sizes=pixel_sizes,
        )
        for _rspots, _rbit in zip(relabel_spots_list, relabel_bits):
            _ind = list(combo_bits).index(_rbit)
            _old_spots = spots_list[_ind]
            # merge
            _merged_spots = ia.io_tools.spots.merge_RelabelSpots(
                _old_spots, _rspots, pixel_sizes=pixel_sizes,
            )
            # replace
            spots_list[_ind] = _merged_spots
    # 
    _all_spots = ia.io_tools.spots.merge_Spots3DList(spots_list, pixel_sizes=pixel_sizes)
    # partition args
    _args = (
        _fov_id, 
        np.array(_all_spots), _all_spots.bits, _all_spots.channels,
        seg_label, fovcell_2_uid,
        dna_microscope_file,
        search_radius, pixel_sizes,
        True, 
        True, _cand_spot_filename,
        True, expect_spot_num, 
        False, True,
    )
    _partition_args.append(_args)
    
print(len(_partition_args))

-- loading segmentation info from fov:0 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_000_Segmentation.hdf5
-- loading segmentation info from fov:1 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_001_Segmentation.hdf5
-- loading segmentation info from fov:2 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_002_Segmentation.hdf5
-- loading segmentation info from fov:3 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_003_Segmentation.hdf5
-- loading segmentation info from fov:4 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_004_Segmentation.hdf5
-- loading segmentation info from fov:5 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_005_Segmentation.hdf5
-- loading segmentation info from 

-- loading segmentation info from fov:51 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_051_Segmentation.hdf5
-- loading segmentation info from fov:52 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_052_Segmentation.hdf5
-- loading segmentation info from fov:53 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_053_Segmentation.hdf5
-- loading segmentation info from fov:54 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_054_Segmentation.hdf5
-- loading segmentation info from fov:55 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_055_Segmentation.hdf5
-- loading segmentation info from fov:56 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_056_Segmentation.hdf5
-- loading segmentation info

-- loading segmentation info from fov:102 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_102_Segmentation.hdf5
-- loading segmentation info from fov:103 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_103_Segmentation.hdf5
-- loading segmentation info from fov:104 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_104_Segmentation.hdf5
-- loading segmentation info from fov:105 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_105_Segmentation.hdf5
-- loading segmentation info from fov:106 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_106_Segmentation.hdf5
-- loading segmentation info from fov:107 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_107_Segmentation.hdf5
-- loading segmentatio

-- loading segmentation info from fov:153 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_153_Segmentation.hdf5
-- loading segmentation info from fov:154 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_154_Segmentation.hdf5
-- loading segmentation info from fov:155 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_155_Segmentation.hdf5
-- loading segmentation info from fov:156 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_156_Segmentation.hdf5
-- loading segmentation info from fov:157 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_157_Segmentation.hdf5
-- loading segmentation info from fov:158 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_158_Segmentation.hdf5
-- loading segmentatio

-- loading segmentation info from fov:204 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_204_Segmentation.hdf5
-- loading segmentation info from fov:205 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_205_Segmentation.hdf5
-- loading segmentation info from fov:206 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_206_Segmentation.hdf5
-- loading segmentation info from fov:207 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_207_Segmentation.hdf5
-- loading segmentation info from fov:208 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_208_Segmentation.hdf5
-- loading segmentation info from fov:209 into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation\Conv_zscan_209_Segmentation.hdf5
-- loading segmentatio

In [12]:
%%time
import multiprocessing as mp
# Multiprocessing
print(f"- Start multiprocessing spot partitioning", end=' ')
_start_time = time.time()
with mp.Pool(12) as _partition_pool:
    # start multiprocessing
    _partition_pool.starmap(ia.classes.partition_spots.batch_partition_DNA_spots, 
                            _partition_args, chunksize=1)
    # close multiprocessing
    _partition_pool.close()
    _partition_pool.join()
    _partition_pool.terminate()
print(f"finish in {time.time()-_start_time:.3f}s. ")

- Start multiprocessing spot partitioning finish in 349.913s. 
Wall time: 5min 49s


### 1.2.2 sequential version

In [14]:
# sequential
for _args in _partition_args:
    ia.classes.partition_spots.batch_partition_DNA_spots(*_args)
    break

- Partition cand_spots for fov:0
-- partition barcodes for 7330 spots


7330it [00:00, 20363.25it/s]


- Save 7280 spots to file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots\unique\Conv_zscan_000_CandSpots.csv


<IPython.core.display.Javascript object>

-- save iamage to file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots\unique\Figures\Fov-0_SpotCountPerCell.png
-- finish partition in 1.478s. 
