In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import skimage, os
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
from skimage.measure import label,regionprops, perimeter
from skimage.morphology import binary_dilation, binary_opening
from skimage.filters import roberts, sobel
from skimage import measure, feature
from skimage.segmentation import clear_border
from skimage import data
from scipy import ndimage as ndi
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
import scipy.misc
import numpy as np
from glob import glob
from skimage.io import imread
import re

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.animation as anim

import imageio
from skimage.transform import resize

import copy
from scipy import ndimage as nd
import itertools
import cv2

In [None]:
all_images = glob(os.path.join('/content/drive/MyDrive/Data/guangdi_1/Hospital B','*.nii.gz'))
all_masks = glob(os.path.join('/content/drive/MyDrive/Data/guangdi_1/mask_B','*.nii.gz'))
df = pd.read_excel('/content/drive/MyDrive/Data/guangdi_1/clinic.xlsx', sheet_name='Hospital B')
len(all_images), len(all_masks), df.shape[0]

(104, 104, 104)

In [1]:
df.head()

In [None]:
def match_img_mask(row):
  img_path = np.nan
  mask_path = np.nan
  name = '_'.join(row['缩写'].strip().upper().split(' '))

  for img in all_images:
    org_img = img
    img = img.split('/')[-1]
    f_i = re.search(r"\d", img).start()
    cur_name = img[: f_i-1]
    if name == cur_name:
      img_path = org_img
      break
  
  for mask in all_masks:
    org_mask = mask
    mask = mask.split('/')[-1]
    f_i = re.search(r"\d", mask).start()
    cur_name = mask[: f_i-1]
    if name == cur_name:
      mask_path = org_mask
      break

  return pd.Series([img_path, mask_path])

In [None]:
df[['img_path', 'mask_path']] = df.apply(match_img_mask, axis=1)

In [2]:
show_ids = np.random.randint(df.shape[0])
print(df.iloc[show_ids]['缩写'])
# print(df.iloc[show_ids]['IDx'])
print(df.iloc[show_ids]['img_path'])
print(df.iloc[show_ids]['mask_path'])

In [None]:
print(df[(df['img_path'].isna()) | (df['mask_path'].isna())].shape)
# df[(df['img_path'].isna()) | (df['mask_path'].isna())].to_csv('B_not_match.csv', index=False)

(2, 18)


In [None]:
df = df.dropna().reset_index()

In [3]:
%matplotlib inline
try:
    import nibabel as nib
except:
    raise ImportError('Install NIBABEL')

show_ids = np.random.randint(df.shape[0])

img_p = df['img_path'][show_ids]
mask_p = df['mask_path'][show_ids]
print(f'image path: {img_p}')
print(f'mask path: {mask_p}')

test_image = nib.load(img_p).get_fdata()
test_mask = nib.load(mask_p).get_fdata()
print(test_image.shape, test_mask.shape)


fig, (ax1, ax2) = plt.subplots(1,2, figsize = (12, 6))
extract_no = 12
ax1.imshow(test_image[:, :, test_image.shape[2] // 2])
ax1.set_title('Image')
ax2.imshow(test_mask[:, :, test_mask.shape[2] // 2])
ax2.set_title('Mask')
print(np.unique(test_mask))

In [None]:
def mask_image(image_p, mask_p, mask_id):
  image = copy.deepcopy(image_p)
  mask = copy.deepcopy(mask_p)
  assert image.shape == mask.shape, f'image shape {image.shape} not match mask shape {mask.shape}'

  d1 = mask.shape[0]
  d2 = mask.shape[1]
  d3 = mask.shape[2]

  # 将非mask的部分置黑
  mask_image = []
  for i in range(d3):
    cur_slice = image[:, :, i]
    cur_mask = mask[:, :, i]

    for a in range(d1):
      for b in range(d2):
        if cur_mask[a, b] != mask_id:
          cur_slice[a, b] = 0
    mask_image.append(cur_slice)
  return np.array(mask_image).transpose(1, 2, 0)

# masked_image = mask_image(test_image, test_mask, 3)
# print(masked_image.shape, test_mask.shape)

def FindLabelBox3D(img, offset):
    '''
    img:ct-label-data
    offset:copy-level
    '''
    xdim = np.zeros(2)  # bouding box 和 x轴的交点
    ydim = np.zeros(2)  # bouding box 和 y轴的交点
    zdim = np.zeros(2)  # bouding box 和 z轴的交点
    tmp = np.squeeze(np.sum(np.sum(img, axis=2), axis=1))
    for i in range(len(tmp)):
        if tmp[i] == 0:
            xdim[0] = i
        else:
            break
    xdim[1] = len(tmp)
    for i in reversed(range(len(tmp))):
        if tmp[i] == 0:
            xdim[1] = i
        else:
            break
    # for y
    tmp = np.squeeze(np.sum(np.sum(img, axis=2), axis=0))
    for i in range(len(tmp)):
        if tmp[i] == 0:
            ydim[0] = i
        else:
            break
 
    ydim[1] = len(tmp)
    for i in reversed(range(len(tmp))):
        if tmp[i] == 0:
            ydim[1] = i
        else:
            break
    # for z
    tmp = np.squeeze(np.sum(np.sum(img, axis=1), axis=0))
    for i in range(len(tmp)):
        if tmp[i] == 0:
            zdim[0] = i
        else:
            break
 
    zdim[1] = len(tmp)
    for i in reversed(range(len(tmp))):
        if tmp[i] == 0:
            zdim[1] = i
        else:
            break
 
    # offset
    xdim[0] = max(0, xdim[0] - offset)
    xdim[1] = min(np.size(img, 0), xdim[1] + offset)
 
    ydim[0] = max(0, ydim[0] - offset)
    ydim[1] = min(np.size(img, 1), ydim[1] + offset)
 
    zdim[0] = max(0, zdim[0] - offset)
    zdim[1] = min(np.size(img, 2), zdim[1] + offset)
 
    return xdim, ydim, zdim

def get_mask_roi(image_p, mask_p, mask_id):
  image = copy.deepcopy(image_p)
  mask = copy.deepcopy(mask_p)
  image = mask_image(image, mask, mask_id)
  xdim, ydim, zdim = FindLabelBox3D(mask, mask_id)
  return image[int(xdim[0]):int(xdim[1]), int(ydim[0]):int(ydim[1]), int(zdim[0]):int(zdim[1])]

masked_image = get_mask_roi(test_image, test_mask, 1)
print(masked_image.shape)

(38, 45, 8)


In [None]:
def resize_volume(image_p):
    img = copy.deepcopy(image_p)
    """Resize across z-axis"""
    # Set the desired depth
    desired_depth = 32
    desired_width = 128
    desired_height = 128
    z_ids = np.linspace(0, img.shape[2]-1, desired_depth)
    z_ids = list(map(lambda x: int(x), z_ids))
    img3d = np.stack([cv2.resize(img[:, :, i], (desired_width, desired_height)) for i in z_ids]).T
    if np.min(img3d) < np.max(img3d):
      img3d = img3d - np.min(img3d)
      img3d = img3d / np.max(img3d)
    if img3d.shape[-1] < desired_depth:
      n_zero = np.zeros((desired_height, desired_width, desired_depth - img3d.shape[-1]))
      img3d = np.concatenate((img3d,  n_zero), axis = -1)
    return img3d

resize_image = resize_volume(masked_image)
print(resize_image.shape, np.sum(resize_image))

(128, 128, 32) 53075.75241277364


In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize = (12, 6))
extract_no = 12
ax1.imshow(masked_image[:, :, masked_image.shape[2] // 2])
ax1.set_title('Image')
ax2.imshow(resize_image[:, :, resize_image.shape[2] // 2])
ax2.set_title('Resize Image')
ax3.imshow(test_mask[:, :, extract_no])
ax3.set_title('Mask')

Text(0.5, 1.0, 'Mask')

<Figure size 864x432 with 3 Axes>

In [None]:
from pathlib import Path
res_folder = './masked/'
Path(res_folder).mkdir(parents=True, exist_ok=True)

def extract_mask_ids(mask):
  mask_ids = []
  for i in range(mask.shape[2]):
    mask_ids += list(np.unique(mask[:, :, i]))
  return list(set(mask_ids))

ignore_cnt = 0
except_cnt = 0


for index, row in df.iterrows():
  print(f'Processing {index + 1}th patient')
  name = row['缩写']
  image_path = row['img_path']
  mask_path = row['mask_path']
  image = nib.load(image_path).get_fdata()
  mask = nib.load(mask_path).get_fdata()

  labels = extract_mask_ids(mask)
  print(labels)

  for label in labels:
    if label == 0:
      continue
    try:
      print(f'label {label}...')
      masked_image = get_mask_roi(image, mask, label)
      masked_image = resize_volume(masked_image)
      if np.sum(masked_image) == 0:
        print('No valid image from mask, ignore!')
        ignore_cnt += 1
        continue
      print(f'{name}_{label}_{masked_image.shape}')
      nib_file = nib.Nifti1Image(masked_image, np.eye(4)) 
      file_name = f'./masked/{name}_{label}.nii.gz'
      nib.save(nib_file, file_name) 
    except Exception as e:
      print(e)
      except_cnt += 1
print(f'invalid image is {ignore_cnt}')
print(f'exception image is {except_cnt}')



In [None]:
!cp ./masked/* /content/drive/MyDrive/Data/guangdi_1/Ex_Mask_B/

In [None]:
# !rm ./masked/*

rm: cannot remove './masked/*': No such file or directory


In [None]:
label_file = glob('./masked/*')
e_label = set()
for file in label_file:
  if file.split('/')[-1].split('_')[-1].split('.')[0] not in ['1', '2', '3']:
    e_label.add(file.split('/')[-1].split('.')[0])

In [None]:
data = []
for p in list(e_label):
  data.append(p.split('_'))

e_df = pd.DataFrame(data, columns=['name', 'unexpected mask label'])
e_df

Unnamed: 0,name,unexpected mask label


In [None]:
e_df.to_csv('unexpected_mask.csv', index=False)