In [1]:
import os
import glob
import numpy as np
import pandas as pd
import datetime
import shutil


pd.set_option("display.max_colwidth", 200)

In [2]:
def date_to_sortable(date):
    """Data in format: 2022Nov07
    """
    year, month, day = date[:4], date[4:7], date[7:]
    dictmon = {
        "Sep": "09",
        "Oct": "10",
        "Nov": "11",
    }
    return "-".join([year, dictmon[month], day])

In [3]:
ddir = '/bigstore/Images2022/Gaby/dredFISH'
outdir = '/bigstore/GeneralStorage/Data/dredFISH'

In [4]:
datasets = np.sort(glob.glob("DPNMF*", dir_fd=ddir))
datasets

array(['DPNMF-FR_7C,PFA+Methanol,PFA,Methanol_2022Nov07',
       'DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27',
       'DPNMF-FR_Z1,Z2,9A,Z3,Z4,6C_2022Nov15', 'DPNMF-R_8C_2022Oct18',
       'DPNMF_uncleared_2022Sep20', 'DPNMF_uncleared_2022Sep21'],
      dtype='<U47')

In [5]:
res = []
for dst in datasets:
    date = date_to_sortable(dst.split('_')[-1])
    
    p = os.path.join(ddir, dst) 
    a = glob.glob("fishdata*", dir_fd=p)
    a.sort(key=lambda x: os.path.getmtime(os.path.join(p, x)))
    
    if len(a) > 0:
        picked = a[-1]
    else:
        picked = ""
        
    p = os.path.join(ddir, dst, picked) 
    b1 = glob.glob("*_metadata.csv", dir_fd=p)
    b1.sort(key=lambda x: os.path.getmtime(os.path.join(p, x)))
    
    b2 = glob.glob("*_matrix.csv", dir_fd=p)
    b2.sort(key=lambda x: os.path.getmtime(os.path.join(p, x)))
    
    res.append({
        'date': date,
        'dataset': dst,
        'fishdata-all': a,
        'fishdata-picked': picked,
        'n_metadata_csv': len(b1),
        'n_matrix_csv': len(b2),
    })
    
    # print(date, dst, end=":\n")
    # print(a)
res = pd.DataFrame(res).sort_values(['date', 'dataset'])
res

Unnamed: 0,date,dataset,fishdata-all,fishdata-picked,n_metadata_csv,n_matrix_csv
4,2022-09-20,DPNMF_uncleared_2022Sep20,[],,0,0
5,2022-09-21,DPNMF_uncleared_2022Sep21,[],,0,0
3,2022-10-18,DPNMF-R_8C_2022Oct18,"[fishdata_without_strip, fishdata_with_strip, fishdata_with_strip_no_highpass, fishdata_stitched_correction, fishdata_stitched, fishdata_test, fishdata_2022Nov01, fishdata_2022Nov03, fishdata_2022...",fishdata_2022Dec13,0,0
0,2022-11-07,"DPNMF-FR_7C,PFA+Methanol,PFA,Methanol_2022Nov07",[fishdata_2022Dec06],fishdata_2022Dec06,0,0
2,2022-11-15,"DPNMF-FR_Z1,Z2,9A,Z3,Z4,6C_2022Nov15","[fishdata_2022Nov15, fishdata_2022Dec02b, fishdata_2022Dec05, fishdata_2022Dec09, fishdata_2022Sep06, fishdata_2022Dec12]",fishdata_2022Dec12,6,6
1,2022-11-27,DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27,"[fishdata_2022Dec06, fishdata_2022Dec08, fishdata_2022Dec09]",fishdata_2022Dec09,39,39


In [6]:
resundone = res[res['n_metadata_csv']==0]
resundone

Unnamed: 0,date,dataset,fishdata-all,fishdata-picked,n_metadata_csv,n_matrix_csv
4,2022-09-20,DPNMF_uncleared_2022Sep20,[],,0,0
5,2022-09-21,DPNMF_uncleared_2022Sep21,[],,0,0
3,2022-10-18,DPNMF-R_8C_2022Oct18,"[fishdata_without_strip, fishdata_with_strip, fishdata_with_strip_no_highpass, fishdata_stitched_correction, fishdata_stitched, fishdata_test, fishdata_2022Nov01, fishdata_2022Nov03, fishdata_2022...",fishdata_2022Dec13,0,0
0,2022-11-07,"DPNMF-FR_7C,PFA+Methanol,PFA,Methanol_2022Nov07",[fishdata_2022Dec06],fishdata_2022Dec06,0,0


In [7]:
resdone = res[res['n_metadata_csv']!=0]
resdone

Unnamed: 0,date,dataset,fishdata-all,fishdata-picked,n_metadata_csv,n_matrix_csv
2,2022-11-15,"DPNMF-FR_Z1,Z2,9A,Z3,Z4,6C_2022Nov15","[fishdata_2022Nov15, fishdata_2022Dec02b, fishdata_2022Dec05, fishdata_2022Dec09, fishdata_2022Sep06, fishdata_2022Dec12]",fishdata_2022Dec12,6,6
1,2022-11-27,DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27,"[fishdata_2022Dec06, fishdata_2022Dec08, fishdata_2022Dec09]",fishdata_2022Dec09,39,39


# Save results out

In [8]:
outdir

'/bigstore/GeneralStorage/Data/dredFISH'

In [10]:
# for idx, res in resdone.iterrows():
#     dst = res['dataset']
#     dst_out = dst.replace(',', '_')+"_Dec12_withstrip"
#     picked = res['fishdata-picked']
    
#     # ...
#     _indir = os.path.join(ddir, dst, picked) 
#     b1 = glob.glob("*_metadata.csv", dir_fd=_indir)
#     b1.sort(key=lambda x: os.path.getmtime(os.path.join(_indir, x)))
    
#     b2 = glob.glob("*_matrix.csv", dir_fd=_indir)
#     b2.sort(key=lambda x: os.path.getmtime(os.path.join(_indir, x)))
    
#     # create dataset dir (and 'orig' sub dir)
#     _outdir = os.path.join(outdir, dst_out)
#     if not os.path.isdir(_outdir):
#         os.mkdir(_outdir)
        
#     _outdir = os.path.join(outdir, dst_out, 'orig')
#     if not os.path.isdir(_outdir):
#         os.mkdir(_outdir)
        
#     for b in np.hstack([b1, b2]):
#         src = os.path.join( _indir, b)
#         dst = os.path.join(_outdir, b.replace(',', '_'))
#         print(b.replace(',', '_'))
#         shutil.copy(src, dst)
        

In [11]:
# mod = "nuclei"
# suffix = f"_Dec12_nostrip_{mod}"
# picked = "fishdata_2022Dec08" # "fishdata_2022Dec09" 

mod_all = [
    'nuclei',
    'cytoplasm',
    'total',
    
    'nuclei',
    'cytoplasm',
    'total',
]
suffix_all = [
    '_Dec12_strip_nuc',
    '_Dec12_strip_cyt',
    '_Dec12_strip_tol',
    
    '_Dec12_nostrip_nuc',
    '_Dec12_nostrip_cyt',
    '_Dec12_nostrip_tol',
]
picked_all = [
    'fishdata_2022Dec09',
    'fishdata_2022Dec09',
    'fishdata_2022Dec09',
    
    'fishdata_2022Dec08',
    'fishdata_2022Dec08',
    'fishdata_2022Dec08',
]


for mod, suffix, picked in zip(mod_all, suffix_all, picked_all):
    dst = "DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27"
    dst_out = dst.replace(',', '_')+suffix
    print(mod, suffix, picked, dst_out)

    # ...
    _indir = os.path.join(ddir, dst, picked) 
    b1 = glob.glob(f"*{mod}_metadata.csv", dir_fd=_indir)
    b1.sort(key=lambda x: os.path.getmtime(os.path.join(_indir, x)))

    b2 = glob.glob(f"*{mod}_matrix.csv", dir_fd=_indir)
    b2.sort(key=lambda x: os.path.getmtime(os.path.join(_indir, x)))

    # create dataset dir (and 'orig' sub dir)
    _outdir = os.path.join(outdir, dst_out)
    if not os.path.isdir(_outdir):
        os.mkdir(_outdir)

    _outdir = os.path.join(outdir, dst_out, 'orig')
    if not os.path.isdir(_outdir):
        os.mkdir(_outdir)

    for b in np.hstack([b1, b2]):
        src = os.path.join( _indir, b)
        dst = os.path.join(_outdir, b.replace(',', '_'))
        print(b.replace(',', '_'))
        shutil.copy(src, dst)


nuclei _Dec12_strip_nuc fishdata_2022Dec09 DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Dec12_strip_nuc
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section1_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section18_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section22_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section23_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section24_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section7_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section21_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section14_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section5_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section8_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section2_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section13_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section17_nuclei_metadata.csv
DPNMF-FR_R1_4A_UC_R2_5C_2022Nov27_Section1_nuclei_matrix.csv
DPNMF-FR_R1_4A_UC_R