In [5]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
try:
    np.load('a')
except FileNotFoundError:
    print('haha')

haha


In [14]:
import h5py
import nibabel as nib
import os
import glob
from dev_tools.my_tools import print_red, minmax_normalize
import pdb
import numpy as np
import yaml
from tqdm.notebook import tqdm
import pickle


def create_h5(source_folder, mean_std_file, overwrite=False):
    try:
        affine = np.load('data/affine.npy')
    except FileNotFoundError:
        affine = None
    
    target = os.path.join('data',source_folder.split('_')[-1]+'.h5')
    
    if os.path.exists(target) and not overwrite:
        print('{:s} exists already.'.format(target))
        return
    
    with open(mean_std_file,'rb') as f:
        mean_std_values = pickle.load(f)
    
    with h5py.File(target,'w') as f:
        img_dirs  = glob.glob(os.path.join(source_folder,'*/*' 
                                             if source_folder.split('_')[-1] == 'Training' else '*'))
        for img_dir in tqdm(img_dirs,desc='writing {:s}'.format(target)):
            if not os.path.isdir(img_dir):
                continue
            sub_id = img_dir.split('/')[-1]
            h5_subid = f.create_group(sub_id)
            brain_widths = []
            for mod_file in os.listdir(img_dir):
                img = nib.load(os.path.join(img_dir,mod_file))
                if affine is None:
                    affine = img.affine
                    np.save('data/affine',affine)
                img_npy = img.get_data()
                mod = mod_file.split('_')[-1].split('.')[0]
                if mod != 'seg':
                    img_npy = normalize(img_npy,
                                        mean = mean_std_values['{:s}_mean'.format(mod)],
                                        std = mean_std_values['{:s}_std'.format(mod)])
                    brain_widths.append(cal_outline(img_npy))
                h5_subid.create_dataset(mod_file,data=img_npy)
            start_edge = np.min(brain_widths,axis=0)[0]
            end_edge = np.max(brain_widths,axis=0)[1]
            brain_width = np.vstack((start_edge,end_edge))
            h5_subid.create_dataset('brain_width',data=brain_width)
    return

def cal_outline(img_npy):
    '''
    return an numpy array shape=(2,3), indicating the outline of the brain area.
    '''
    brain_index = np.asarray(np.nonzero(img_npy))
    start_edge = np.maximum(np.min(brain_index,axis=1)-1,0)
    end_edge = np.minimum(np.max(brain_index,axis=1)+1,img_npy.shape)
    
    return np.vstack((start_edge,end_edge))

def normalize(img_npy,mean,std,offset=0.1, mul_factor=100):
    '''
    offset and mul_factor are used to make a distinction between brain voxel and background(zeros).
    '''
    brain_index = np.nonzero(img_npy)
    img_npy[brain_index] = (minmax_normalize((img_npy[brain_index]-mean)/std) + offset) * mul_factor
    return img_npy

                          
                          

In [3]:
with open('data/mean_std.pkl','rb') as f:
    a = pickle.load(f)
a

{'flair_mean': 415.9545,
 'flair_std': 1246.5544,
 't1_mean': 574.6952,
 't1_std': 1102.1575,
 't1ce_mean': 643.7283,
 't1ce_std': 1123.4411,
 't2_mean': 657.6712,
 't2_std': 1307.7268}

In [7]:

def cal_mean_std(source_folder,saved_path,overwrite=False):
    '''
    Calculte the mean value and standard deviation for each modalities.
    Return a dictionary {'t1_mean': ,'t1_std': ,'t2_mean': ,'t2_std': ,...}
    '''
    if os.path.exists(saved_path) and not overwrite:
        print('{:s} exists already.'.format(saved_path))
        return
    sub_dirs = glob.glob(os.path.join(source_folder,'*/*')) # SD
    
    mean_std_values = {}
    
    for mod in config['data']['all_mods']:
        mean = 0
        amount = 0
        for sub_dir in tqdm(sub_dirs,
                             desc='Calculating {:s}\'s mean value'
                             .format(mod)):
            file_name = os.path.join(sub_dir,sub_dir.split('/')[-1]+'_{:s}.nii.gz'.format(mod))
            img_npy = nib.load(file_name).get_data()
            brain_area = img_npy[np.nonzero(img_npy)]
            mean += np.sum(brain_area)
            amount += len(brain_area)
        mean /= amount
        mean_std_values['{:s}_mean'.format(mod)] = round(mean,4)
        print('{:s}\'s mean value = {:.2f}'.format(mod,mean))
        
        std = 0
        for sub_dir in tqdm(sub_dirs,
                             desc='Calculating {:s}\'s std value'
                             .format(mod)):
            file_name = os.path.join(sub_dir,sub_dir.split('/')[-1]+'_{:s}.nii.gz'.format(mod))
            img_npy = nib.load(file_name).get_data()
            brain_area = img_npy[np.nonzero(img_npy)]
            std += np.sum((brain_area-mean)**2)
        std = np.sqrt(std/amount)
        mean_std_values['{:s}_std'.format(mod)] = round(std,4)
        print('{:s}\'s std value = {:.2f}'.format(mod,std))
    print(mean_std_values)
    with open(saved_path,'wb') as f:
        pickle.dump(mean_std_values,f)
        
    

In [17]:
with open('config.yml') as f:
    config = yaml.load(f,Loader=yaml.FullLoader)


cal_mean_std(source_folder=config['data']['source_train'],
             saved_path=config['data']['mean_std_file'])

mean_std_file = config['data']['mean_std_file']
create_h5(config['data']['source_train'],mean_std_file)
create_h5(config['data']['source_val'],mean_std_file)
create_h5(config['data']['source_test'],mean_std_file)

data/mean_std.pkl exists already.
data/Training.h5 exists already.
data/Validation.h5 exists already.
data/Testing.h5 exists already.


In [67]:
img = nib.load('data/MICCAI_BraTS_2019_Data_Training/HGG\
/BraTS19_TMC_11964_1/BraTS19_TMC_11964_1_t1.nii.gz').get_data()
np.mean(np.ravel(img)[np.flatnonzero(img)])


534.3123637025672

In [73]:
b = img[np.nonzero(img)]
np.sum(b)/len(b)

534.3123637025672

In [52]:
a = {'a':1,'b':2}
with open('test.pkl','wb') as f:
    pickle.dump(a,f)

In [60]:
with open('data/mean_std.pkl','rb') as f:
    res = pickle.load(f)
    print(res)

{'t1_std': 1082.5379, 't1_mean': 571.9798, 't1ce_std': 1093.0112, 't2_mean': 652.5108, 'flair_mean': 411.4047, 't2_std': 1285.4105, 'flair_std': 1219.138, 't1ce_mean': 637.505}


In [64]:
np.mean([1,2,3,50,34])

18.0

In [66]:
np.mean([np.mean([1,34]),np.mean([2,3,50])])

17.916666666666664