# MERFISH Data reorganization

In [1]:
import pandas as pd
import numpy as np
import os, sys, time
import re
import shutil
from importlib import reload
# this version doesn't have ChromAn compiled, so directly load from src:
sys.path.append(r"/lab/weissman_imaging/puzheng/Softwares/ChromAn")
import src

In [4]:
from src.file_io.data_organization import search_fovs_in_folders, Color_Usage
data_home =  r"/lab/weissman_imaging/puzheng"

data_folder = os.path.join(data_home, 'msBrain', '20230830-msBrain_Mop_MF1')

fds, fovs = search_fovs_in_folders(data_folder)


- searching in folder: /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1
-- 20 folders, 398 fovs detected.


In [12]:
# Color usage file marks the organization of imaging files
color_usage_filename = os.path.join(data_folder, 'Analysis', 'color_usage.csv')
color_usage_df = Color_Usage(color_usage_filename)

- load color_usage from file: /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/Analysis/color_usage.csv


['H0M1',
 'H1M2',
 'H2M3',
 'H3M4',
 'H4M5',
 'H5M6',
 'H6M7',
 'H7M8',
 'H8M9',
 'H10M11',
 'H11M12',
 'H12M13']

In [6]:
date = os.path.basename(data_folder).split('-')[0]

sample = 'msTest'
library = 'MO4'

print(date, sample, library)

20230830 msTest MO4


In [7]:
from src.file_io.data_organization import create_folder
# shutter files
ref_image_type = '748_637_477_405_s13_n1000' # ref 58 / 65
polyt_image_type = '748_637_477_s13_n1000'
image_type = '748_637_477_s13_n1000' # ref 36/39

# fiducial beads
#ref_channel = 488
#ref_frame = 13 # start from 0!

# target folder
merfish_data_folder = r'/lab/weissman_imaging/puzheng/MERFISH_data'

# target folder:
target_folder = os.path.join(merfish_data_folder, f'{date}-{sample}_{library}')
create_folder(target_folder)


Creating folder: /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4


In [22]:
#sel_fovs = []
folder_regexp = r'H([0-9]+)[MRCP]([0-9+])'

ref_round = 0
polyt_round = 8
#fov_ids = np.arange(2)
overwrite = True
remove_source = False

sel_fovs = [43]

for _fov_id, _fov_name in enumerate(fovs):
    # check if this fov is selected:
    if 'sel_fovs' in locals() and _fov_id not in sel_fovs:
        continue # skip if not selected
    # prceed for the rest:
    for _fd in fds:
        # match round and hyb
        _match = re.search(folder_regexp, os.path.basename(_fd))
        if _match is None:
            continue
        # if not exist in color_usage, skip
        if os.path.basename(_fd) not in list(color_usage_df.index):
            continue
        _round, _region = _match.groups()
        # mutliple files for each image:
        _files = [os.path.join(_fd, _fl) for _fl in os.listdir(_fd) if _fov_name.split(os.extsep)[0] in _fl]
        # reference round specified
        if int(_round) == ref_round:
            _image_type = ref_image_type
        elif int(_round) == polyt_round:
            _image_type = polyt_image_type
        else:
            _image_type = image_type
    
        # target file
        _target_files = [os.path.join(target_folder, f"{_image_type}_{_fov_id}_{_round}{os.extsep}{_fl.split(os.extsep)[-1]}") 
                         for _fl in _files]
        for _fl, _tar_fl in zip(_files, _target_files):
            if remove_source:
                print(f"move {_fl} to {_tar_fl}")
                shutil.move(_fl, _tar_fl)
            else:
                if not os.path.isfile(_tar_fl) or overwrite:
                    print(f"copy {_fl} to {_tar_fl}")
                    shutil.copyfile(_fl, _tar_fl)
                else:
                    print(f"{_tar_fl} already exist, skip")

copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H0M1/Conv_zscan_043.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_405_s13_n1000_43_0.inf
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H0M1/Conv_zscan_043.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_405_s13_n1000_43_0.xml
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H0M1/Conv_zscan_043.power to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_405_s13_n1000_43_0.power
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H0M1/Conv_zscan_043.off to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_405_s13_n1000_43_0.off
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H0M1/Conv_zscan_043.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_405_s13_n1000_43_0.dax
copy /lab/weissman_imaging/puzh

copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H10M11/Conv_zscan_043.inf to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_s13_n1000_43_10.inf
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H10M11/Conv_zscan_043.xml to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_s13_n1000_43_10.xml
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H10M11/Conv_zscan_043.power to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_s13_n1000_43_10.power
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H10M11/Conv_zscan_043.off to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_s13_n1000_43_10.off
copy /lab/weissman_imaging/puzheng/msBrain/20230830-msBrain_Mop_MF1/H10M11/Conv_zscan_043.dax to /lab/weissman_imaging/puzheng/MERFISH_data/20230830-msTest_MO4/748_637_477_s13_n1000_43_10.dax
copy /lab/weissman_imaging/puzheng/m

## Check file length

In [21]:
# double check if filenumber equals to expected:
print(len(os.listdir(target_folder)), len(color_usage_df) * len(fovs) * 5)

25600 23880


# Generate data_organization

go to jupyter: https://c4b15.wi.mit.edu/user/puzheng/notebooks/lab/weissman_imaging/puzheng/Softwares/Weissman_MERFISH_Scripts/MERFISH_preprocess/Data_organization/20230830-Generate_dataOrganization.ipynb

In [None]:
## Automatic generate data_organization
from src.file_io.data_organization import Color_Usage

In [None]:
def generate_dataOrganization():
    

