In [None]:
import pickle
import tqdm
import collections 
import re
import numpy as np

In [None]:
single_modal_dict_prefix = '/gpfs/data/geraslab/Ashen/multimodal/single_modality_dictionary/01102022'

with open(os.path.join(single_modal_dict_prefix, 'ultrasound.pkl') , "rb") as f:
    us_data = pickle.load(f)

In [None]:
file = '/gpfs/data/geraslab/Ashen/ultrasound_data/data_list_final_reader_study.pkl'
with open(file, "rb") as f:
    read_study_data = pickle.load(f)

In [None]:
us_exam_list = read_study_data[0]

In [None]:
def get_us_info(acn, lateral, us_data):
    slices = sorted([int(k) for k, v in us_data[acn]['laterality'].items() 
                     if v==lateral])

    filenames = [us_data[acn]['image_UID'][str(s)] for s in slices]
    
    us = {'img_prefix': '/gpfs/data/geraslab/Ashen/ultrasound_data/npy_img', 
          'accession_number': acn,
          'indices': slices,
          'filenames': filenames
         }

    return us

def generate_list(exam, us_data=us_data):
    
    breasts = []
    for lateral in ['left', 'right']:
        us = get_us_info(
            exam['AccessionnNumber'], 
            lateral, 
            us_data)
        cancer_label = eval(exam['cancer_label'])
        breast_instance = {'accession_number': exam['AccessionnNumber'],
                            'lateral': lateral,
                            'mrn': exam['patient_ID'], 
                            'study_date': exam['StudyDate'],
                            'benign': cancer_label[f"{lateral}_benign"], 
                            'malignant': cancer_label[f"{lateral}_malignant"],
                            'biopsied': np.nan,
                            'us': us}
        breasts.append(breast_instance)
            
    return breasts

In [None]:
us_image_list = [img for exam in us_exam_list for img in generate_list(exam)]

In [None]:
prefix = '/gpfs/data/geraslab/Nan/data/breast_mml_datalists/20220111/breasts_lists'
savepath = 'nyu_readerstudy_us'
phase = 'val'

os.mkdir(os.path.join(prefix, savepath))
file = os.path.join(prefix, savepath, phase)

print(phase, "#pairs:", len(us_image_list), '\n\t saved at: ', savepath)

with open(file, 'wb') as f:
    pickle.dump(us_image_list, f)

In [None]:
datafile = '/gpfs/data/geraslab/Ashen/multimodal/multi_modality_datalist/03092022/datalist.pkl'
with open(datafile, "rb") as f:
    data_full_set = pickle.load(f)

In [None]:
def generate_list(mrn, cancer_label, exam, us_data=us_data):
    
    breasts = []
    for lateral in ['left', 'right']:
        us = get_us_info(exam['acn'], lateral,  us_data)

        breast_instance = {'accession_number': exam['acn'],
                            'lateral': lateral,
                            'mrn': mrn, 
                            'study_date': exam['study_date'],
                            'benign': cancer_label[f"{lateral}_benign"], 
                            'malignant': cancer_label[f"{lateral}_malignant"],
                            'biopsied': cancer_label[f"{lateral}_biopsied"],
                            'us': us}
        breasts.append(breast_instance)
        
    return breasts

In [None]:
us_images_train = []
for episode in tqdm.tqdm(data_full_set[0]):
    cancer_label = episode['cancer_label']
    for exam in episode['exams']:
        if exam['modality'].startswith('us'):
            images = generate_list(episode['mrn'], cancer_label, exam, us_data=us_data)
            us_images_train.extend(images)

In [None]:
us_images_val = []
for episode in tqdm.tqdm(data_full_set[1]):
    cancer_label = episode['cancer_label']
    for exam in episode['exams']:
        if exam['modality'].startswith('us'):
            images = generate_list(episode['mrn'], cancer_label, exam, us_data=us_data)
            us_images_val.extend(images)

In [None]:
sum([x['malignant'] for x in us_images_train ])

In [None]:
prefix = '/gpfs/data/geraslab/Nan/data/breast_mml_datalists/20220111/breasts_lists'
savepath = 'nyu_us'
phase = 'train'

os.mkdir(os.path.join(prefix, savepath))
file = os.path.join(prefix, savepath, phase)

print(phase, "#pairs:", len(us_images_train), '\n\t saved at: ', savepath)

with open(file, 'wb') as f:
    pickle.dump(us_images_train, f)

In [None]:
phase = 'val'

# os.mkdir(os.path.join(prefix, savepath))
file = os.path.join(prefix, savepath, phase)

print(phase, "#pairs:", len(us_images_val), '\n\t saved at: ', savepath)

with open(file, 'wb') as f:
    pickle.dump(us_images_val, f)