In [None]:
from pathlib import Path
import numpy as np
import random
import pandas as pd

from aicsimageio import AICSImage
from aicsimageio.readers.ome_tiff_reader import OmeTiffReader
from aicsimageio.writers import OmeTiffWriter

import sys
src_path = str(Path.cwd().parent.parent)
if src_path not in sys.path:
    sys.path.append(src_path)

import src.d00_utils.utilities as utils

In [None]:
multiexp_dir = Path(input())

In [None]:
save_dir = Path(input())

In [None]:
csv_savepath = save_dir / 'combined_well_conditions_modified.csv'

In [None]:
exps = ['CE023', 'CE025', 'CE027']

exp_dir = [d for d in multiexp_dir.iterdir() if any(d.match(e) for e in exps)]
print(len(exp_dir))

In [None]:
# Get paths for csvs with condition info
conditions_dirpaths = []

for e in exp_dir:
    exp_conditions = list(e.glob('*well_conditions_modified*'))
    conditions_dirpaths.extend(exp_conditions)
    if len(exp_conditions) == 0:
        [conditions_dirpaths.extend(list(d.glob('*well_conditions_modified*'))) for d in e.iterdir() if d.is_dir()]

print(len(conditions_dirpaths))
print(conditions_dirpaths)

In [None]:
list_dfs = [pd.read_csv(p) for p in conditions_dirpaths]

In [None]:
df = pd.concat(list_dfs)
df

In [None]:
df['wellID'] = df['Experiment'] + '-' + df['Ibidi'] + '-' + df['Well']
df.to_csv(csv_savepath, index=False)

In [None]:
#conditions = [(exps == 'CE027') & (ibidis == 'I1')]
CE027_rep1 = ['I1', 'I2', 'I3', 'I4']
CE027_rep2 = ['I5', 'I6', 'I7', 'I8']
conditions = [df['Experiment']=='CE023',
              df['Experiment']=='CE025',
              (df['Experiment']=='CE027') & (df['Ibidi'].isin(CE027_rep1)),
              (df['Experiment']=='CE027') & (df['Ibidi'].isin(CE027_rep2))]

choices = [1, 2, 3, 4]
df['Replicate'] = np.select(conditions, choices, -1)
df

In [None]:
df['Virus_old'] = df['Virus']
virus_old = np.unique(df['Virus'].astype(str))
print(virus_old)

In [None]:
corrections = ['218', '218 + 424', '218 + 281', '218 + 424', '281', '281 + 351', '281 + 351', '283', '218 + 283', '281 + 351', 'nan']

print(np.unique(corrections))


for old, new in zip(virus_old, corrections):
    print(f'{old}: {new}')
conditions = [df['Virus_old']==e for e in virus_old]
df['Virus'] = np.select(conditions, corrections, 'nan')
df.to_csv(csv_savepath, index=False)

In [None]:
conditions_df = df
#conditions_df = pd.read_csv(well_conditions_csvpath)
conditions_df.head()

In [None]:
conditions_df = conditions_df[conditions_df['Exp or ctrl']=='Exp']
grouping_vars = ['Virus', 'Replicate', 'DIV']
sample_df = conditions_df.groupby(grouping_vars).sample(n=1)
sample_df
print(len(sample_df))
sample_df['wellID'] = sample_df['Experiment'] + '-' + sample_df['Well']
sample_df

In [None]:
dirpaths = []

dirpath = None
while dirpath != 'DONE':
    dirpath = input('Dirpath (or type DONE if done):')
    if dirpath == 'DONE':
        break
    else:
        dirpaths.append(Path(dirpath))

In [None]:
# dirpaths = []
# for d in exp_dir:
#     print(d)
#     dirpaths.extend(d.glob('**'))


dirpaths = [d for d in dirpaths if 'caaxch' in d.name]
print(len(dirpaths))
print(dirpaths)

In [None]:
n = 1 # Number of randomly chosen images to use from each well
img_purpose = ['test', 'train']

In [None]:
# Initiate dictionary to hold imgpaths, grouping variables
img_list_d = {}
for p in img_purpose:
    img_list_d[p] = []

# Select imagepaths
for i, row in sample_df.iterrows():
    wellID = row['wellID']
    
    imgpaths = []
    for dirpath in dirpaths:
        w_search = '*' + wellID.replace('-', '*') + '*.ome.tif'
        imgpaths.extend([path for path in dirpath.glob(w_search)])
    
    
    grouping_vars_str = ', '.join([f'{var}: {row[var]}' for var in grouping_vars])
    print(f'{wellID} [{grouping_vars_str}]: {len(imgpaths)} images found')
    
    num_to_select = n * len(img_purpose)
    if len(imgpaths) > num_to_select:
        selected_bywell = random.sample(imgpaths, num_to_select)
    
        for i, p in enumerate(img_purpose):
            img_list_d[p].extend(selected_bywell[i:(i + n)])
            
for i, p in enumerate(img_purpose):
    print(f'{p}: {len(img_list_d[p])} images selected')

stack_df = pd.DataFrame()
stack_df['imgpath'] = np.concatenate([img_list for p, img_list in img_list_d.items()])
stack_df['purpose'] = np.concatenate([[p]*len(img_list) for p, img_list in img_list_d.items()])

stack_df

In [None]:
prev_train_imgpath = input('Input path for previous training image (if using). Otherwise, type NONE.')

if prev_train_imgpath=='NONE':
    print('No previous training image.')

In [None]:
savedir = Path('/Users/kwu2/Library/CloudStorage/GoogleDrive-kwu2@stanford.edu/My Drive/Lab/ImageJ/training_imgs')

stack_basename = input("Enter basename for stacked image (excluding suffix):")

i = 0
savepaths = np.array([savedir / (f'{stack_basename}_{p}_{i}.ome.tif') for p in img_purpose])

while savepaths[0].is_file() or savepaths[1].is_file():
    i = i+1
    savepaths = np.array([savedir / (f'{stack_basename}_{p}_{i}.ome.tif') for p in img_purpose])
    
for path in savepaths:
    print(path.name)
    
csv_savename = f'{stack_basename}_{i}.csv'

In [None]:
def add_imgs_to_list(imgpaths, img_list, df, num_tps=3):

    for i, imgpath in enumerate(imgpaths):
        img_file = AICSImage(imgpath, reader=OmeTiffReader)
        img = img_file.data
        
        if i==0:
            physical_pixel_sizes = img_file.physical_pixel_sizes
        
        if img.shape[0] > 1:
            tps = [0, random.randint(1, img.shape[0]-2), img.shape[0]-1]
            tps = random.sample(tps, num_tps)
            print(f'timepoints: {tps}')
            df.loc[df['imgpath']==imgpath, 'timepoints'] = ', '.join([str(tp) for tp in tps])
            img_fewtps = np.concatenate([img[tp, np.newaxis, :, :, :, :] for tp in tps], axis=0)
            img_list.append(img_fewtps)
        else:
            img_list.append(img)
            df.loc[df['imgpath']==imgpath, 'timepoints'] = str(0)
            
    return img_list, df, physical_pixel_sizes

# Get the smallest y and x dimensions to crop all images to the same size
def crop_imgs_to_match_size(img_list):
    y_min = None
    x_min = None

    for img in img_list:

        if y_min is None:
            y_min = img.shape[3]
        else:
            y_min = np.minimum(img.shape[3], y_min)

        if x_min is None:
            x_min = img.shape[4]
        else:
            x_min = np.minimum(img.shape[4], x_min)

    imgs_crop = [img[:, :, :, :y_min, :x_min] for img in img_list]
    return imgs_crop

In [None]:
for i, p in enumerate(img_list_d.keys()):
    img_list = []
    imgpaths_subset = img_list_d[p]
    
    # Only append previous training images to training image stack
    if p == 'test':
        if prev_train_imgpath!='NONE':
            prev_train_imgpath = Path(prev_train_imgpath)
            prev_img_file = AICSImage(prev_train_imgpath, reader=OmeTiffReader)
            prev_img = prev_img_file.data
            img_list.append(prev_img)
            
    img_list, stack_df, physical_pixel_sizes = add_imgs_to_list(imgpaths_subset, img_list, stack_df, num_tps=2)
    img_list = crop_imgs_to_match_size(img_list)
    img_stacked = np.concatenate(img_list, axis=0)
    ome_metadata = utils.construct_ome_metadata(img_stacked, physical_pixel_sizes)
    
    OmeTiffWriter.save(img_stacked, savepaths[i], ome_xml=ome_metadata)
    stack_df.to_csv(savedir/csv_savename, index=False)