# MERFISH Data reorganization

In [1]:
import pandas as pd
import numpy as np
import os, sys, time
import re
import shutil
from importlib import reload
# this version doesn't have ChromAn compiled, so directly load from src:
sys.path.append(r"/lab/weissman_imaging/puzheng/Softwares/ChromAn")
import src

In [2]:
from src.file_io.data_organization import search_fovs_in_folders, Color_Usage
data_home =  r"/lab/weissman_imaging/puzheng"

data_folder = os.path.join(data_home, '4T1Tumor', '20240510-F242depc20um_MF4dna')

fds, fovs = search_fovs_in_folders(data_folder)


- searching in folder: /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna
-- 9 folders, 412 fovs detected.


In [3]:
# Color usage file marks the organization of imaging files
color_usage_filename = os.path.join(data_folder, 'Analysis', 'color_usage_MF4dna.csv')
color_usage_df = Color_Usage(color_usage_filename)

- load color_usage from file: /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/Analysis/color_usage_MF4dna.csv


In [4]:
date = os.path.basename(data_folder).split('-')[0]

sample = 'F242'
library = 'MF4'

print(date, sample, library)

20240510 F242 MF4


In [5]:
# load data_organization and match:
sys.path.append(r"/lab/weissman_imaging/puzheng/Softwares")
from ChromAn.src.file_io.data_organization import Data_Organization, search_fovs_in_folders

data_organization_filename = f'../../merlin_parameters/dataorganization/{date}-{library}_16bit.csv'
_do = Data_Organization(data_organization_filename,)

- load color_usage from file: ../../merlin_parameters/dataorganization/20240510-MF4_16bit.csv


In [6]:
_do

Unnamed: 0,channelName,readoutName,imageType,imageRegExp,bitNumber,imagingRound,color,frame,zPos,fiducialImageType,fiducialRegExp,fiducialImagingRound,fiducialFrame,fiducialColor
0,bit1,Stv_29,748_637_477_405_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,1,0,748,[0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_405_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,0,66,477
1,bit2,Stv_30,748_637_477_405_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,2,0,637,[1 5 9 13 17 21 25 29 33 37 41 45 49 53 57 61 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_405_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,0,66,477
2,bit3,Stv_31,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,3,1,748,[0 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 4...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,1,50,477
3,bit4,Stv_32,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,4,1,637,[1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,1,50,477
4,bit5,Stv_33,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,5,2,748,[0 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 4...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,2,50,477
5,bit6,Stv_35,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,6,2,637,[1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,2,50,477
6,bit7,Stv_36,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,7,3,748,[0 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 4...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,3,50,477
7,bit8,Stv_37,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,8,3,637,[1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,3,50,477
8,bit9,Stv_39,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,9,4,748,[0 3 6 9 12 15 18 21 24 27 30 33 36 39 42 45 4...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,4,50,477
9,bit10,Stv_40,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,10,4,637,[1 4 7 10 13 16 19 22 25 28 31 34 37 40 43 46 ...,[-10.8 -9.6 -8.4 -7.2 -6.0 -4.8 -3.6 -2.4 -1.2...,748_637_477_s18,(?P<imageType>[\w|-]+)_(?P<fov>[0-9]+)_(?P<ima...,4,50,477


In [7]:
from src.file_io.data_organization import create_folder
# shutter files
ref_image_type = _do.loc[1,'imageType'] #'748_637_477_405_s13_n1000' # ref 58 / 65
polyt_image_type = _do.loc[_do['channelName']=='PolyT','imageType'].values[0] #'748_637_477_s13_n1000'
image_type = _do.loc[4,'imageType'] #'748_637_477_s13_n1000' # ref 36/39

print(ref_image_type, polyt_image_type, image_type)
# fiducial beads
#ref_channel = 488
#ref_frame = 13 # start from 0!

# target folder
merfish_data_folder = r'/lab/solexa_weissman/puzheng/MERFISH_data'

# target folder:
target_folder = os.path.join(merfish_data_folder, f'{date}-{sample}_{library}')
create_folder(target_folder)


748_637_477_405_s18 748_637_477_s18 748_637_477_s18
Creating folder: /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4


In [None]:
#sel_fovs = []
folder_regexp = r'H([0-9]+)[MRCP]([0-9+])'

ref_round = 0
polyt_round = _do.loc[_do['channelName']=='PolyT','imagingRound'].values[0]
#fov_ids = np.arange(2)
overwrite = False
remove_source = False


for _fov_id, _fov_name in enumerate(fovs):
    # check if this fov is selected:
    if 'sel_fovs' in locals() and _fov_id not in sel_fovs:
        continue # skip if not selected
    # prceed for the rest:
    for _fd in fds:
        # match round and hyb
        _match = re.search(folder_regexp, os.path.basename(_fd))
        if _match is None:
            continue
        # if not exist in color_usage, skip
        if os.path.basename(_fd) not in list(color_usage_df.index):
            continue
        _round, _region = _match.groups()
        # mutliple files for each image:
        _files = [os.path.join(_fd, _fl) for _fl in os.listdir(_fd) if _fov_name.split(os.extsep)[0] in _fl]
        # reference round specified
        if int(_round) == ref_round:
            _image_type = ref_image_type
        elif int(_round) == polyt_round:
            _image_type = polyt_image_type
        else:
            _image_type = image_type
    
        # target file
        _target_files = [os.path.join(target_folder, f"{_image_type}_{_fov_id}_{_round}{os.extsep}{_fl.split(os.extsep)[-1]}") 
                         for _fl in _files]
        for _fl, _tar_fl in zip(_files, _target_files):
            if remove_source:
                print(f"move {_fl} to {_tar_fl}")
                shutil.move(_fl, _tar_fl)
            else:
                if not os.path.isfile(_tar_fl) or overwrite:
                    print(f"copy {_fl} to {_tar_fl}")
                    shutil.copyfile(_fl, _tar_fl)
                else:
                    print(f"{_tar_fl} already exist, skip")

copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/H0M1/Conv_zscan_000.power to /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4/748_637_477_405_s18_0_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/H0M1/Conv_zscan_000.xml to /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4/748_637_477_405_s18_0_0.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/H0M1/Conv_zscan_000.dax to /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4/748_637_477_405_s18_0_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/H0M1/Conv_zscan_000.inf to /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4/748_637_477_405_s18_0_0.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F242depc20um_MF4dna/H0M1/Conv_zscan_000.off to /lab/solexa_weissman/puzheng/MERFISH_data/20240510-F242_MF4/748_637_477_405_s18_0_0.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20240510-F24

## Check file length

In [9]:
# double check if filenumber equals to expected:
print(len(os.listdir(target_folder)), len(color_usage_df) * len(fovs) * 5)

1350 1350


## Create analysis folder

In [10]:
analysis_home = r'/lab/weissman_imaging/puzheng/MERFISH_analysis'
analysis_folder = os.path.join(analysis_home, os.path.basename(target_folder))
analysis_segmentation_folder = os.path.join(analysis_folder, 'CellPoseSegment', 'segmentation_label')

print(analysis_segmentation_folder)
if not os.path.exists(analysis_segmentation_folder):
    os.makedirs(analysis_segmentation_folder)


/lab/weissman_imaging/puzheng/MERFISH_analysis/20240506-4T1fullyEdited_fc_MF8/CellPoseSegment/segmentation_label


# Generate data_organization

go to jupyter: https://c4b15.wi.mit.edu/user/puzheng/notebooks/lab/weissman_imaging/puzheng/Softwares/Weissman_MERFISH_Scripts/MERFISH_preprocess/Data_organization/20230830-Generate_dataOrganization.ipynb