In [1]:
import os
import numpy as np
from scipy import ndimage
from skimage import io
from matplotlib import pyplot as plt
import pickle
import nibabel as nib
from scipy.ndimage.interpolation import zoom
import random

In [2]:
def pickle_store(file_name,save_data):
    fileObj = open(file_name,'wb')
    pickle.dump(save_data,fileObj)
    fileObj.close()

def extract_positive(whole_img,whole_label,box_size,smallest_ratio=0.97,step_size=7,verbal=False):
    img_size=np.shape(whole_img)
    bv_voxel_num=np.sum(whole_label)
    positive_sub_volumes=[]
    x_slice,y_slice,z_slice = ndimage.find_objects(whole_label)[0]
    offset=9
    
    x_start = x_slice.stop-offset if x_slice.stop-offset >= box_size else box_size
    x_stop = x_slice.start+box_size+offset if x_slice.start+box_size+offset <= img_size[0] else img_size[0]
    
    y_start = y_slice.stop-offset if y_slice.stop-offset >= box_size else box_size
    y_stop = y_slice.start+box_size+offset if y_slice.start+box_size+offset <= img_size[1] else img_size[1]
    
    z_start = z_slice.stop-offset if z_slice.stop-offset >= box_size else box_size
    z_stop = z_slice.start+box_size+offset if z_slice.start+box_size+offset <= img_size[2] else img_size[2]
    
    for i in range(x_start,x_stop+1,step_size):
        for j in range(y_start,y_stop+1,step_size):
            for k in range(z_start,z_stop+1,step_size):
                contain_ratio = np.sum(whole_label[i-box_size:i,
                                                   j-box_size:j,
                                                   k-box_size:k])/(bv_voxel_num+0.001)
                if verbal==True:
                    print('Current box contain bv ratio: ',contain_ratio)
                if contain_ratio > smallest_ratio:
                    positive_sub_volumes.append((i,j,k))
    return positive_sub_volumes

In [3]:
file_path = os.path.join('/scratch/zq415/grammar_cor/Localization/data','2018_0711_train_sub_volumes22.pickle')
with open(file_path,'rb') as f:
    #(neg_subvolumes,pos_subvolumes,img2,filtered_img2, img_label2, 
    # img, filtered_img, img_label, data_dic[i][0])
    all_data = pickle.load(f)
f.close()
HALF_SIDE = 128

In [4]:
all_sub_volumes={}
total_pos=0
all_name = {}

for i in range(len(all_data)):
    img = all_data[i][5]
    filtered_img = all_data[i][6]
    img_label = all_data[i][7]
    all_name[i] = all_data[i][8]
    print("Number {0}: img_size: {1}, bv_size: {2}, bv_volume: {3}".format(i,
                                                                           np.shape(img),
                                                                           ndimage.find_objects(img_label)[0],
                                                                           np.sum(img_label)))
    
    print("Extracting positive: ...")
    pos_subvolumes=extract_positive(img,img_label,HALF_SIDE)
    current_pos_num=len(pos_subvolumes)
    if current_pos_num>700:
        pos_subvolumes=extract_positive(img,img_label,HALF_SIDE,smallest_ratio=0.98,step_size=9)
        current_pos_num=len(pos_subvolumes)

    if current_pos_num<100:
        pos_subvolumes=extract_positive(img,img_label,HALF_SIDE,smallest_ratio=0.96,step_size=4)
        current_pos_num=len(pos_subvolumes)
    total_pos+=current_pos_num
    
    print("{}:, pos_len={} \n".format(all_name[i],current_pos_num))
    
    all_sub_volumes[i]=(pos_subvolumes,img,filtered_img,img_label,all_name[i])


pickle_store('./data/2018_0711_train_sub_volumes22_seg.pickle',all_sub_volumes)
print(total_pos)

Number 0: img_size: (210, 242, 242), bv_size: (slice(87, 153, None), slice(38, 132, None), slice(18, 83, None)), bv_volume: 30640
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13.5/20171211_En1_E13p5__upload/20171211_En1_E13_E2aMut_ext.nii:, pos_len=243 

Number 1: img_size: (210, 242, 262), bv_size: (slice(82, 166, None), slice(102, 176, None), slice(17, 74, None)), bv_volume: 16795
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13.5/20171211_En1_E13p5__upload/20171211_En1_E13_E8a.nii:, pos_len=207 

Number 2: img_size: (210, 242, 262), bv_size: (slice(56, 119, None), slice(77, 187, None), slice(18, 75, None)), bv_volume: 21659
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13.5/20171211_En1_E13p5__upload/20171211_En1_E13_E11a.nii:, pos_len=180 

Number 3: img_size: (210, 242, 222), bv_size: (slice(85, 161, No

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E11/20170314_En1_E11p5-files__upload/20170314_En1_E11_E5a.nii:, pos_len=128 

Number 27: img_size: (180, 202, 142), bv_size: (slice(73, 148, None), slice(110, 162, None), slice(43, 89, None)), bv_volume: 23739
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E11/20170314_En1_E11p5-files__upload/20170314_En1_E11_E1a.nii:, pos_len=126 

Number 28: img_size: (180, 202, 162), bv_size: (slice(42, 124, None), slice(89, 148, None), slice(72, 142, None)), bv_volume: 48829
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E11/20170314_En1_E11p5-files__upload/20170314_En1_E11_E6a.nii:, pos_len=252 

Number 29: img_size: (180, 202, 162), bv_size: (slice(65, 121, None), slice(90, 162, None), slice(17, 80, None)), bv_volume: 28962
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_ne

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E10/20171208_En1_E10p5-files__upload/20171208_En1_E10_E1a.nii:, pos_len=384 

Number 53: img_size: (180, 242, 162), bv_size: (slice(56, 107, None), slice(43, 113, None), slice(22, 107, None)), bv_volume: 29903
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E10/20171208_En1_E10p5-files__upload/20171208_En1_E10_E5a.nii:, pos_len=280 

Number 54: img_size: (180, 242, 162), bv_size: (slice(85, 122, None), slice(45, 112, None), slice(41, 108, None)), bv_volume: 20212
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E10/20171208_En1_E10p5-files__upload/20171208_En1_E10_E4a.nii:, pos_len=320 

Number 55: img_size: (180, 242, 162), bv_size: (slice(77, 113, None), slice(56, 127, None), slice(5, 60, None)), bv_volume: 16477
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_new

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E14/20170317_En1_E14p5-files__upload/20170317_En1_E14_E1b.nii:, pos_len=200 

Number 79: img_size: (180, 282, 222), bv_size: (slice(56, 111, None), slice(106, 213, None), slice(19, 66, None)), bv_volume: 15154
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E14/20170323_En1_E14p5-files__upload/20170323_En1_E14_E5a.nii:, pos_len=160 

Number 80: img_size: (180, 282, 242), bv_size: (slice(50, 144, None), slice(203, 252, None), slice(107, 171, None)), bv_volume: 14642
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E14/20170323_En1_E14p5-files__upload/20170323_En1_E14_E6a.nii:, pos_len=320 

Number 81: img_size: (180, 242, 242), bv_size: (slice(59, 157, None), slice(154, 212, None), slice(153, 226, None)), bv_volume: 18361
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/201804

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20170619_En1_E12p5_Ex-files__upload/20170619_En1_E12_Ex_E4b_Mut_reg.nii:, pos_len=280 

Number 105: img_size: (180, 242, 142), bv_size: (slice(31, 88, None), slice(97, 178, None), slice(57, 112, None)), bv_volume: 36795
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20170619_En1_E12p5_Ex-files__upload/20170619_En1_E12_Ex_E2a_reg.nii:, pos_len=159 

Number 106: img_size: (180, 242, 202), bv_size: (slice(53, 97, None), slice(45, 119, None), slice(26, 108, None)), bv_volume: 35347
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20170315_En1_E12p5-files__upload/20170315_En1_E12_E2b.nii:, pos_len=320 

Number 107: img_size: (180, 242, 202), bv_size: (slice(74, 133, None), slice(141, 208, None), slice(34, 124, None)), bv_volume: 28560
Extracting positive: ...
/scratch/zq415/grammar_cor/Lo

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20171002_En1_E12p5-files__upload/20171002_En1_E12_E1a.nii:, pos_len=144 

Number 131: img_size: (180, 222, 202), bv_size: (slice(34, 79, None), slice(70, 147, None), slice(52, 143, None)), bv_volume: 25542
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20171002_En1_E12p5-files__upload/20171002_En1_E12_E6a.nii:, pos_len=457 

Number 132: img_size: (180, 222, 202), bv_size: (slice(39, 105, None), slice(127, 188, None), slice(51, 141, None)), bv_volume: 27775
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20171002_En1_E12p5-files__upload/20171002_En1_E12_E5b.nii:, pos_len=335 

Number 133: img_size: (180, 222, 202), bv_size: (slice(49, 97, None), slice(133, 199, None), slice(18, 109, None)), bv_volume: 22959
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/2018041

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12/20170619_En1_E12p5-files__upload/20170619_En1_E12_E2a.nii:, pos_len=434 

Number 157: img_size: (180, 242, 222), bv_size: (slice(63, 119, None), slice(129, 194, None), slice(12, 105, None)), bv_volume: 23877
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170316_En1_E13p5-files__upload/20170316_En1_E13_E1a.nii:, pos_len=192 

Number 158: img_size: (180, 242, 222), bv_size: (slice(63, 122, None), slice(126, 191, None), slice(32, 118, None)), bv_volume: 22769
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170316_En1_E13p5-files__upload/20170316_En1_E13_E1b.nii:, pos_len=384 

Number 159: img_size: (180, 242, 242), bv_size: (slice(75, 136, None), slice(125, 226, None), slice(141, 221, None)), bv_volume: 27879
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/201

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170626_En1_E13p5-files__upload/20170626_En1_E13p5_E5a.nii:, pos_len=224 

Number 183: img_size: (180, 222, 222), bv_size: (slice(55, 120, None), slice(81, 172, None), slice(16, 75, None)), bv_volume: 19679
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170207_En1_E13p5-files__upload/20170207_En1_E13_E10d_Mut.nii:, pos_len=224 

Number 184: img_size: (180, 222, 202), bv_size: (slice(21, 94, None), slice(69, 143, None), slice(27, 82, None)), bv_volume: 19603
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170207_En1_E13p5-files__upload/20170207_En1_E13_E12c_Mut.nii:, pos_len=180 

Number 185: img_size: (180, 222, 128), bv_size: (slice(46, 107, None), slice(73, 161, None), slice(46, 107, None)), bv_volume: 18904
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/dat

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20171004_En1_E13-files__upload/20171004_En1_E13_E6a.nii:, pos_len=216 

Number 209: img_size: (180, 242, 222), bv_size: (slice(58, 130, None), slice(104, 192, None), slice(11, 69, None)), bv_volume: 23839
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20171004_En1_E13-files__upload/20171004_En1_E13_E1a.nii:, pos_len=184 

Number 210: img_size: (180, 242, 242), bv_size: (slice(62, 119, None), slice(85, 185, None), slice(153, 208, None)), bv_volume: 24419
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E13/20170706_En1_E13p5-files__upload/20170706_En1_E13_E3a_reg.nii:, pos_len=336 

Number 211: img_size: (180, 242, 222), bv_size: (slice(45, 148, None), slice(42, 92, None), slice(67, 123, None)), bv_volume: 18911
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/2018041

/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12.5/20171009_En1_E12_Ext___tiff/20171009_En1_E12_E2a.nii:, pos_len=200 

Number 235: img_size: (210, 242, 222), bv_size: (slice(77, 137, None), slice(128, 219, None), slice(14, 83, None)), bv_volume: 27278
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12.5/20171009_En1_E12_Ext___tiff/20171009_En1_E12_E11a.nii:, pos_len=132 

Number 236: img_size: (210, 242, 162), bv_size: (slice(10, 82, None), slice(67, 171, None), slice(85, 142, None)), bv_volume: 24929
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_nii_with_filtered/E12.5/20171009_En1_E12_Ext___tiff/20171009_En1_E12_E6a.nii:, pos_len=312 

Number 237: img_size: (180, 242, 202), bv_size: (slice(36, 98, None), slice(65, 137, None), slice(96, 178, None)), bv_volume: 22202
Extracting positive: ...
/scratch/zq415/grammar_cor/Localization/data/20180419_newdata_