# MERFISH Data reorganization

In [5]:
import pandas as pd
import numpy as np
import os, sys, time
import re
import shutil
from importlib import reload
# this version doesn't have ChromAn compiled, so directly load from src:
sys.path.append(r"/lab/weissman_imaging/puzheng/Softwares/ChromAn")
import src

In [6]:
from src.file_io.data_organization import search_fovs_in_folders, Color_Usage
data_home =  r"/lab/weissman_imaging/puzheng"

data_folder = os.path.join(data_home, '4T1Tumor', '20231226-MF4DNA_4T1preEditTop8')

fds, fovs = search_fovs_in_folders(data_folder)


- searching in folder: /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8
-- 10 folders, 84 fovs detected.


In [7]:
# Color usage file marks the organization of imaging files
color_usage_filename = os.path.join(data_folder, 'Analysis', 'color_usage.csv')
color_usage_df = Color_Usage(color_usage_filename)

- load color_usage from file: /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/Analysis/color_usage.csv


In [10]:
date = os.path.basename(data_folder).split('-')[0]

sample = '4T1preEditTop8'
library = 'MF4dna'

print(date, sample, library)

20231226 4T1preEditTop8 MF4dna


In [11]:
# load data_organization and match:
sys.path.append(r"/lab/weissman_imaging/puzheng/Softwares")
from ChromAn.src.file_io.data_organization import Data_Organization, search_fovs_in_folders

data_organization_filename = r'../../merlin_parameters/dataorganization/20231226-MF4dna_16bit.csv'
_do = Data_Organization(data_organization_filename,)

- load color_usage from file: ../../merlin_parameters/dataorganization/20231226-MF4dna_16bit.csv


In [12]:
from src.file_io.data_organization import create_folder
# shutter files
ref_image_type = _do.loc[0,'imageType'] #'748_637_477_405_s13_n1000' # ref 58 / 65
polyt_image_type = _do.loc[_do['channelName']=='PolyT','imageType'].values[0] #'748_637_477_s13_n1000'
image_type = _do.loc[4,'imageType'] #'748_637_477_s13_n1000' # ref 36/39

# fiducial beads
#ref_channel = 488
#ref_frame = 13 # start from 0!

# target folder
merfish_data_folder = r'/lab/weissman_imaging/puzheng/MERFISH_data'

# target folder:
target_folder = os.path.join(merfish_data_folder, f'{date}-{sample}_{library}')
create_folder(target_folder)


Creating folder: /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna


In [None]:
#sel_fovs = []
folder_regexp = r'H([0-9]+)[MRCP]([0-9+])'

ref_round = 0
polyt_round = _do.loc[_do['channelName']=='PolyT','imagingRound'].values[0]
#fov_ids = np.arange(2)
overwrite = True
remove_source = False

#sel_fovs = [43]

for _fov_id, _fov_name in enumerate(fovs):
    # check if this fov is selected:
    if 'sel_fovs' in locals() and _fov_id not in sel_fovs:
        continue # skip if not selected
    # prceed for the rest:
    for _fd in fds:
        # match round and hyb
        _match = re.search(folder_regexp, os.path.basename(_fd))
        if _match is None:
            continue
        # if not exist in color_usage, skip
        if os.path.basename(_fd) not in list(color_usage_df.index):
            continue
        _round, _region = _match.groups()
        # mutliple files for each image:
        _files = [os.path.join(_fd, _fl) for _fl in os.listdir(_fd) if _fov_name.split(os.extsep)[0] in _fl]
        # reference round specified
        if int(_round) == ref_round:
            _image_type = ref_image_type
        elif int(_round) == polyt_round:
            _image_type = polyt_image_type
        else:
            _image_type = image_type
    
        # target file
        _target_files = [os.path.join(target_folder, f"{_image_type}_{_fov_id}_{_round}{os.extsep}{_fl.split(os.extsep)[-1]}") 
                         for _fl in _files]
        for _fl, _tar_fl in zip(_files, _target_files):
            if remove_source:
                print(f"move {_fl} to {_tar_fl}")
                shutil.move(_fl, _tar_fl)
            else:
                if not os.path.isfile(_tar_fl) or overwrite:
                    print(f"copy {_fl} to {_tar_fl}")
                    shutil.copyfile(_fl, _tar_fl)
                else:
                    print(f"{_tar_fl} already exist, skip")

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_00.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_0_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_00.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_0_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_00.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_0_0.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_00.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_0_0.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_00.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_4

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_01.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_1_0.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_01.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_1_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_01.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_1_0.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_01.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_1_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_01.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_4

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_01.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_1_8.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_01.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_1_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_01.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_1_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_01.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_1_8.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_02.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_2_0

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_02.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_2_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_02.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_2_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_02.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_2_8.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_02.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_2_8.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_03.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_3_0.x

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_03.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_3_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_04.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_4_0.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_04.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_4_0.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_04.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_4_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_04.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_05.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_5_0.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_05.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_5_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_05.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_5_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_05.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_5_0.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_05.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_4

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_05.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_5_8.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_05.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_5_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_05.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_5_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_06.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_6_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_06.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_06.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_6_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_06.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_6_8.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_07.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_7_0.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_07.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_7_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_07.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_4

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_07.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_7_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_07.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_7_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_07.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_7_8.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_07.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_7_8.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_08.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_8_0

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_08.off to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_8_8.off
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_08.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_8_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_08.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_8_8.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_09.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_9_0.dax
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_09.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_9

copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_09.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_9_8.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H8M9/Conv_zscan_09.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_s13_9_8.xml
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_10.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_10_0.inf
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_10.power to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405_s13_10_0.power
copy /lab/weissman_imaging/puzheng/4T1Tumor/20231226-MF4DNA_4T1preEditTop8/H0M1/Conv_zscan_10.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20231226-4T1preEditTop8_MF4dna/748_637_477_405

## Check file length

In [14]:
# double check if filenumber equals to expected:
print(len(os.listdir(target_folder)), len(color_usage_df) * len(fovs) * 5)

3780 3780


# Generate data_organization

go to jupyter: https://c4b15.wi.mit.edu/user/puzheng/notebooks/lab/weissman_imaging/puzheng/Softwares/Weissman_MERFISH_Scripts/MERFISH_preprocess/Data_organization/20230830-Generate_dataOrganization.ipynb

In [None]:
## Automatic generate data_organization
from src.file_io.data_organization import Color_Usage

In [None]:
def generate_dataOrganization():
    

