In [1]:
from math import sqrt
import copy
import  traceback
import shutil
import random

import numpy as np  # linear algebra
import pydicom
import os
import matplotlib.pyplot as plt
import cv2
from pydicom.uid import UID
from PIL import Image
from tqdm import tqdm

In [6]:
def load_scan(path):
    slices = [] #slices = [pydicom.dcmread(path + '/' + s) for s in filter(lambda x: x.endswith('.dcm'), os.listdir(path))]
    for s in os.listdir(path):
        if os.path.isdir(os.path.join(path, s)): #if not s.endswith('.dcm'):
            continue
        sl = pydicom.dcmread(os.path.join(path, s))
        try:
            sl_p = sl.pixel_array
        except AttributeError:
            traceback.print_exc()
            print(f'\tDelete {os.path.join(path, s)}')
            os.remove(os.path.join(path, s))
        else:
            slices.append(sl)
    slices.sort(key=lambda x: float(x.InstanceNumber))
    return slices

In [3]:
lower_b, upper_b = -100, 500

## 1.阳性数据

In [7]:
# 将dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '1':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))

            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                if img.height != img.width:
                    print()
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

# generate_image('/nfs3-p1/zsxm/dataset/2021-9-8/')
# generate_image('/nfs3-p1/zsxm/dataset/2021-9-13/')
# generate_image('/nfs3-p1/zsxm/dataset/2021-9-19/')
# generate_image('/nfs3-p1/zsxm/dataset/2021-9-28/')
# generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-imh/')
# generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-pau/')
# generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-aa/')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20/')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20-imh/')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20-pau/')

****Processing 俞沧子-S-82-89-J-82-258****
****Processing 傅阿乔-S-69-115-J-96-257****
****Processing 卢承流-J-79-350****
****Processing 叶超涵-S-97-103-J-97-323****
****Processing 姜浩芳-S-63-118-J-63-288****
****Processing 孙建华-J-64-173****


  py_encodings = _handle_illegal_standalone_encodings(


****Processing 张林生-J-75-237****
****Processing 朱大波-J-23-179****
****Processing 李资浩-S-41-68****
****Processing 欧兆辉-J-59-245****
****Processing 江西向-J-114-274****
****Processing 蒋世良-S-47-101-J-47-312****
****Processing 许建敏-J-102-397****
****Processing 许杏琴-J-150-266****
****Processing 赵晴-J-9-75****
****Processing 陈志武-J-69-407****
****Processing 韩桂英-J-92-143****
****Processing 马康美-S-74-114-J-74-318****
****Processing 俞子珊-J-71-191****
****Processing 刘培华-J-56-224****
****Processing 姚群芳-J-70-141-J-160-192****
****Processing 孙业武-S-100-164-J-100-306****


  py_encodings = _handle_illegal_standalone_encodings(


****Processing 张信基-J-54-62****
****Processing 慈能满-J-76-106****
****Processing 章国林-J-82-180****
****Processing 章群娣-S-59-81-J-59-73****
****Processing 陈金财-J-69-226****
****Processing 亓富兰-J-14-78****
****Processing 冷清裕-J-186-328****
****Processing 刘世杰-J-212-270****
****Processing 刘炳付-J-329-360****
****Processing 吕正雄-J-286-293****
****Processing 吴金荣-J-277-282****
****Processing 周朝进-J-264-288****
****Processing 周舍福-J-74-82****
****Processing 唐秋平-J-535-542-J-550-566****
****Processing 徐行福-J-387-396****
****Processing 戴照圣-J-116-255****
****Processing 戴行铨-J-275-309****
****Processing 曹建花-J-284-290****
****Processing 曹生玉-J-54-237****
****Processing 朱惠贤-J-85-98****
****Processing 李佩芝-J-101-216****
****Processing 李永琪-J-504-513****
****Processing 杜广胜-J-186-370****
****Processing 武汉卿-J-314-324****
****Processing 潘根朝-J-114-324****
****Processing 童利君-J-21-24****
****Processing 茹志莲-J-78-253****
****Processing 谢必廉-J-68-76****
****Processing 谢文龙-J-72-79****
****Processing 金有才-J-93-110****
****Processing

In [8]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测
def move_together_for_detect(input_folder, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))

        try:
            shutil.copytree(os.path.join(input_folder, patient, '1', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-8/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-13/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-19/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-28/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-10-19-pau/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20-imh/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20-pau/', '/nfs3-p1/zsxm/dataset/9_detect/')

****Processing 俞沧子-S-82-89-J-82-258****
****Processing 傅阿乔-S-69-115-J-96-257****
****Processing 卢承流-J-79-350****
****Processing 叶超涵-S-97-103-J-97-323****
****Processing 姜浩芳-S-63-118-J-63-288****
****Processing 孙建华-J-64-173****
****Processing 张林生-J-75-237****
****Processing 朱大波-J-23-179****
****Processing 李资浩-S-41-68****
****Processing 欧兆辉-J-59-245****
****Processing 江西向-J-114-274****
****Processing 蒋世良-S-47-101-J-47-312****
****Processing 许建敏-J-102-397****
****Processing 许杏琴-J-150-266****
****Processing 赵晴-J-9-75****
****Processing 陈志武-J-69-407****
****Processing 韩桂英-J-92-143****
****Processing 马康美-S-74-114-J-74-318****
****Processing 俞子珊-J-71-191****
****Processing 刘培华-J-56-224****
****Processing 姚群芳-J-70-141-J-160-192****
****Processing 孙业武-S-100-164-J-100-306****
****Processing 张信基-J-54-62****
****Processing 慈能满-J-76-106****
****Processing 章国林-J-82-180****
****Processing 章群娣-S-59-81-J-59-73****
****Processing 陈金财-J-69-226****
****Processing 亓富兰-J-14-78****
****Processing 冷清裕-J-186-3

In [None]:
# shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-8', '/nfs3-p1/zsxm/dataset/9_detect/pred')
# shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-13', '/nfs3-p1/zsxm/dataset/9_detect/pred')
# shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-19', '/nfs3-p1/zsxm/dataset/9_detect/pred')
# shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-28', '/nfs3-p1/zsxm/dataset/9_detect/pred')
shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/aorta_temp', '/nfs3-p1/zsxm/dataset/9_detect/pred')

In [None]:
# 用在更换窗宽窗位的时候，因为已经经过yolov5的检测生成了标签，故直接移动图片到images文件夹下即可
def directly_move_to_image_label(input_folder, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        image_path = os.path.join(dst_path, name, 'images')
        if os.path.exists(image_path):
            print(f"\tremove {image_path}")
            shutil.rmtree(image_path)

        try:
            shutil.copytree(os.path.join(input_folder, patient, '1', f'images_{lower_b}_{upper_b}'), image_path)
        except:
            traceback.print_exc()
            
directly_move_to_image_label('/nfs3-p1/zsxm/dataset/2021-9-8/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
directly_move_to_image_label('/nfs3-p1/zsxm/dataset/2021-9-13/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
directly_move_to_image_label('/nfs3-p1/zsxm/dataset/2021-9-19/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive_new')

In [9]:
# 将移动到待测文件夹中的原图移动到images文件夹
def move_ori_image_to_sigle_folder(input_path, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    for patient in os.listdir(input_path):
        print(f'****Processing {patient}****')
        if not os.path.exists(os.path.join(dst_path, patient)):
            os.mkdir(os.path.join(dst_path, patient))
        image_path = os.path.join(dst_path, patient, 'images')
        if os.path.exists(image_path):
            shutil.rmtree(image_path)
        os.mkdir(image_path)
        for img in os.listdir(os.path.join(input_path, patient)):
            if os.path.isfile(os.path.join(input_path, patient, img)):
                shutil.move(os.path.join(input_path, patient, img), os.path.join(image_path, img))
        os.rmdir(os.path.join(input_path, patient))
    os.rmdir(input_path)
    
# move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-8/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-13/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-19/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-28/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-11-20/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2')
move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-11-20-imh/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/imh2')
move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-11-20-pau/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2')

****Processing 俞沧子-S-82-89-J-82-258****
****Processing 傅阿乔-S-69-115-J-96-257****
****Processing 卢承流-J-79-350****
****Processing 叶超涵-S-97-103-J-97-323****
****Processing 姜浩芳-S-63-118-J-63-288****
****Processing 孙建华-J-64-173****
****Processing 张林生-J-75-237****
****Processing 朱大波-J-23-179****
****Processing 李资浩-S-41-68****
****Processing 欧兆辉-J-59-245****
****Processing 江西向-J-114-274****
****Processing 蒋世良-S-47-101-J-47-312****
****Processing 许建敏-J-102-397****
****Processing 许杏琴-J-150-266****
****Processing 赵晴-J-9-75****
****Processing 陈志武-J-69-407****
****Processing 韩桂英-J-92-143****
****Processing 马康美-S-74-114-J-74-318****
****Processing 俞子珊-J-71-191****
****Processing 刘培华-J-56-224****
****Processing 姚群芳-J-70-141-J-160-192****
****Processing 孙业武-S-100-164-J-100-306****
****Processing 张信基-J-54-62****
****Processing 慈能满-J-76-106****
****Processing 章国林-J-82-180****
****Processing 章群娣-S-59-81-J-59-73****
****Processing 陈金财-J-69-226****
****Processing 亓富兰-J-14-78****
****Processing 冷清裕-J-186-3

In [11]:
# 将yolov5的检测结果（标签和带bounding box的图像）移动到相应文件夹
def move_detect_result_to_folder(input_path, output_path):
    for patient in os.listdir(input_path):
        print(f'Processing {patient}')
        image_path = os.path.join(output_path, patient, 'pred_images')
        if os.path.exists(image_path):
            shutil.rmtree(image_path)
        os.mkdir(image_path)
        for file in os.listdir(os.path.join(input_path, patient)):
            if os.path.isfile(os.path.join(input_path, patient, file)):
                shutil.move(os.path.join(input_path, patient, file), os.path.join(image_path, file))
            elif os.path.isdir(os.path.join(input_path, patient, file)):
                if os.path.exists(os.path.join(output_path, patient, file)):
                    shutil.rmtree(os.path.join(output_path, patient, file))
                shutil.move(os.path.join(input_path, patient, file), os.path.join(output_path, patient, file))
        os.rmdir(os.path.join(input_path, patient))
    os.rmdir(input_path)
                
# move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-8/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-13/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-19/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
# move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-28/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive')
move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-11-20/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2')
move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-11-20-imh/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/imh2')
move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-11-20-pau/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2')

Processing 傅阿乔-S-69-115-J-96-257
Processing 卢承流-J-79-350
Processing 叶超涵-S-97-103-J-97-323
Processing 姜浩芳-S-63-118-J-63-288
Processing 孙建华-J-64-173
Processing 张林生-J-75-237
Processing 朱大波-J-23-179
Processing 李资浩-S-41-68
Processing 欧兆辉-J-59-245
Processing 江西向-J-114-274
Processing 蒋世良-S-47-101-J-47-312
Processing 许建敏-J-102-397
Processing 许杏琴-J-150-266
Processing 赵晴-J-9-75
Processing 陈志武-J-69-407
Processing 韩桂英-J-92-143
Processing 马康美-S-74-114-J-74-318
Processing 俞子珊-J-71-191
Processing 刘培华-J-56-224
Processing 姚群芳-J-70-141-J-160-192
Processing 孙业武-S-100-164-J-100-306
Processing 张信基-J-54-62
Processing 慈能满-J-76-106
Processing 章国林-J-82-180
Processing 章群娣-S-59-81-J-59-73
Processing 陈金财-J-69-226
Processing 亓富兰-J-14-78
Processing 冷清裕-J-186-328
Processing 刘世杰-J-212-270
Processing 刘炳付-J-329-360
Processing 吕正雄-J-286-293
Processing 吴金荣-J-277-282
Processing 周朝进-J-264-288
Processing 周舍福-J-74-82
Processing 唐秋平-J-535-542-J-550-566
Processing 徐行福-J-387-396
Processing 戴照圣-J-116-255
Processing 戴行铨-J-275-309

In [12]:
# 切出主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.2 < corr2[2] < 0.8, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    for patient in sorted(os.listdir(input_path)):
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, 'images')
        label_path = os.path.join(input_path, patient, 'labels')
        crop_path = os.path.join(input_path, patient, 'crops')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        pl = patient.lower().split('-')
        for i, s in enumerate(pl):
            if i!=0 and s.startswith('im'):
                pl[i] = s.replace('im', '')
        crop_flag = True
        for i, s in enumerate(pl):
            if s != 's' and s != 'j':
                continue
            start, end = int(pl[i+1])-1, int(pl[i+2])
            for j in range(start, end):
                img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
                img = np.array(img)
                try:
                    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
                except:
                    traceback.print_exc()
                    crop_flag = False
                else:#if crop_flag:
                    crop = img[y1:y2, x1:x2]
                    crop = Image.fromarray(crop)
                    crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}.png'))
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)

epl1 = []
epl2 = []
epl3 = []

crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2', epl1)
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/imh2', epl2)
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
# print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2', epl3)

******Processing 俞沧子-S-82-89-J-82-258******
******Processing 傅阿乔-S-69-115-J-96-257******


Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/傅阿乔-S-69-115-J-96-257/labels/傅阿乔-S-69-115-J-96-257_0199.txt'


******Processing 卢承流-J-79-350******
******Processing 叶超涵-S-97-103-J-97-323******
******Processing 姜浩芳-S-63-118-J-63-288******


Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：姜浩芳-S-63-118-J-63-288_0117.txt
Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/姜浩芳-S-63-118-J-63-288/labels/姜浩芳-S-63-118-J-63-288_0153.txt'
Traceback (most recent call last):
  File "<ipython-input-12-5113a831

******Processing 孙建华-J-64-173******
******Processing 张林生-J-75-237******
******Processing 朱大波-J-23-179******
******Processing 李资浩-S-41-68******
******Processing 欧兆辉-J-59-245******
******Processing 江西向-J-114-274******
******Processing 蒋世良-S-47-101-J-47-312******
******Processing 许建敏-J-102-397******
******Processing 许杏琴-J-150-266******
******Processing 赵晴-J-9-75******
******Processing 陈志武-J-69-407******
******Processing 韩桂英-J-92-143******
******Processing 马康美-S-74-114-J-74-318******
******Processing 俞子珊-J-71-191******
******Processing 刘培华-J-56-224******
******Processing 姚群芳-J-70-141-J-160-192******
******Processing 孙业武-S-100-164-J-100-306******
******Processing 张信基-J-54-62******
******Processing 慈能满-J-76-106******
******Processing 章国林-J-82-180******
******Processing 章群娣-S-59-81-J-59-73******
******Processing 陈金财-J-69-226******
******Processing 亓富兰-J-14-78******


Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.580078, 0.814453)出界：亓富兰-J-14-78_0021.txt
Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.580078, 0.808594)出界：亓富兰-J-14-78_0023.txt
Traceback (most recent call last):
  File "<ipython-input-12

******Processing 冷清裕-J-186-328******
******Processing 刘世杰-J-212-270******


Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.206055, 0.482422)出界：刘世杰-J-212-270_0218.txt
Traceback (most recent call last):
  File "<ipython-input-12-5113a831b4ed>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-12-5113a831b4ed>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.208008, 0.481445)出界：刘世杰-J-212-270_0220.txt
Traceback (most recent call last):
  File "<ipython-inpu

******Processing 刘炳付-J-329-360******
******Processing 吕正雄-J-286-293******
******Processing 吴金荣-J-277-282******
******Processing 周朝进-J-264-288******
******Processing 周舍福-J-74-82******
******Processing 唐秋平-J-535-542-J-550-566******
******Processing 徐行福-J-387-396******
******Processing 戴照圣-J-116-255******
******Processing 戴行铨-J-275-309******
******Processing 曹建花-J-284-290******
******Processing 曹生玉-J-54-237******
******Processing 朱惠贤-J-85-98******
******Processing 李佩芝-J-101-216******
******Processing 李永琪-J-504-513******
******Processing 杜广胜-J-186-370******
******Processing 武汉卿-J-314-324******
******Processing 潘根朝-J-114-324******
******Processing 童利君-J-21-24******
******Processing 茹志莲-J-78-253******
******Processing 谢必廉-J-68-76******
******Processing 谢文龙-J-72-79******
******Processing 金有才-J-93-110******
******Processing 钟清廉-J-247-252******
******Processing 陈胜平-J-60-262******


In [14]:
print(len(epl1))
print(epl1)
print(len(epl2))
print(epl2)
print(len(epl3))
print(epl3)

2
['傅阿乔-S-69-115-J-96-257', '姜浩芳-S-63-118-J-63-288']
0
[]
2
['亓富兰-J-14-78', '刘世杰-J-212-270']


In [23]:
# 将切出的主动脉移动到一起
def move_to_classify(input_root, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    for patient in os.listdir(input_root):
        if os.path.isfile(os.path.join(input_root, patient)):
            continue
        print(f'****Processing {patient}****')
        for file in os.listdir(os.path.join(input_root, patient, 'crops')):
            shutil.copy(os.path.join(input_root, patient, 'crops', file), output_path)

move_to_classify('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/', f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/1')
move_to_classify('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/imh2/', f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/2')
move_to_classify('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/', f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/3')

****Processing 俞沧子-S-82-89-J-82-258****
****Processing 傅阿乔-S-69-115-J-96-257****
****Processing 卢承流-J-79-350****
****Processing 叶超涵-S-97-103-J-97-323****
****Processing 姜浩芳-S-63-118-J-63-288****
****Processing 孙建华-J-64-173****
****Processing 张林生-J-75-237****
****Processing 朱大波-J-23-179****
****Processing 李资浩-S-41-68****
****Processing 欧兆辉-J-59-245****
****Processing 江西向-J-114-274****
****Processing 蒋世良-S-47-101-J-47-312****
****Processing 许建敏-J-102-397****
****Processing 许杏琴-J-150-266****
****Processing 赵晴-J-9-75****
****Processing 陈志武-J-69-407****
****Processing 韩桂英-J-92-143****
****Processing 马康美-S-74-114-J-74-318****
****Processing 俞子珊-J-71-191****
****Processing 刘培华-J-56-224****
****Processing 姚群芳-J-70-141-J-160-192****
****Processing 孙业武-S-100-164-J-100-306****
****Processing 张信基-J-54-62****
****Processing 慈能满-J-76-106****
****Processing 章国林-J-82-180****
****Processing 章群娣-S-59-81-J-59-73****
****Processing 陈金财-J-69-226****
****Processing 亓富兰-J-14-78****
****Processing 冷清裕-J-186-3

In [21]:
# 切出范围外冗余为3的主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.2 < corr2[2] < 0.8, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    for patient in sorted(os.listdir(input_path)):
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, 'images')
        label_path = os.path.join(input_path, patient, 'labels')
        crop_path = os.path.join(input_path, patient, 'crops')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        pl = patient.lower().split('-')
        for i, s in enumerate(pl):
            if i!=0 and s.startswith('im'):
                pl[i] = s.replace('im', '')
        crop_flag = True
        for i, s in enumerate(pl):
            if s != 's' and s != 'j':
                continue
            start, end = int(pl[i+1])-1, int(pl[i+2])
            for j in range(start-3, end+3):
                try:
                    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
                    img = np.array(img)
                    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
                except:
                    traceback.print_exc()
                    crop_flag = False
                else:#if crop_flag:
                    crop = img[y1:y2, x1:x2]
                    crop = Image.fromarray(crop)
                    if start <= j < end:
                        crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}.png'))
                    else:
                        crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}_n.png'))
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)

epl1 = []
epl2 = []
epl3 = []

crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2', epl1)
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/imh2', epl2)
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2', epl3)

******Processing 俞沧子-S-82-89-J-82-258******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：俞沧子-S-82-89-J-82-258_0078.txt
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：俞沧子-S-82-89-J-82-258_0079.txt
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 45, in crop_images
    img =

******Processing 傅阿乔-S-69-115-J-96-257******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/傅阿乔-S-69-115-J-96-257/labels/傅阿乔-S-69-115-J-96-257_0199.txt'


******Processing 卢承流-J-79-350******
******Processing 叶超涵-S-97-103-J-97-323******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：叶超涵-S-97-103-J-97-323_0093.txt


******Processing 姜浩芳-S-63-118-J-63-288******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：姜浩芳-S-63-118-J-63-288_0117.txt
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/姜浩芳-S-63-118-J-63-288/labels/姜浩芳-S-63-118-J-63-288_0153.txt'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea

  File "<ipython-input-21-e91b69ea8ffe>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/positive2/姜浩芳-S-63-118-J-63-288/labels/姜浩芳-S-63-118-J-63-288_0290.txt'


******Processing 孙建华-J-64-173******
******Processing 张林生-J-75-237******
******Processing 朱大波-J-23-179******
******Processing 李资浩-S-41-68******
******Processing 欧兆辉-J-59-245******
******Processing 江西向-J-114-274******
******Processing 蒋世良-S-47-101-J-47-312******
******Processing 许建敏-J-102-397******
******Processing 许杏琴-J-150-266******
******Processing 赵晴-J-9-75******
******Processing 陈志武-J-69-407******
******Processing 韩桂英-J-92-143******
******Processing 马康美-S-74-114-J-74-318******
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
******Processing 俞子珊-J-71-191******
******Processing 刘培华-J-56-224******
******Processing 姚群芳-J-70-14

Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：孙业武-S-100-164-J-100-306_0096.txt


******Processing 张信基-J-54-62******
******Processing 慈能满-J-76-106******
******Processing 章国林-J-82-180******
******Processing 章群娣-S-59-81-J-59-73******
******Processing 陈金财-J-69-226******
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
******Processing 亓富兰-J-14-78******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.580078, 0.814453)出界：亓富兰-J-14-78_0021.txt
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.580078, 0.808594)出界：亓富兰-J-14-78_0023.txt
Traceback (most recent call last):
  File "<ipython-input-21

******Processing 冷清裕-J-186-328******
******Processing 刘世杰-J-212-270******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/刘世杰-J-212-270/labels/刘世杰-J-212-270_0208.txt'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 47, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-21-e91b69ea8ffe>", line 13, in find_coordinate
    assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
AssertionError: 边界框1中心(0.206055, 0.482422)出界：刘世杰-J-212-270_0218.txt
Traceback (most recent call last):
  File "<ipython-input-21-e91

******Processing 刘炳付-J-329-360******
******Processing 吕正雄-J-286-293******
******Processing 吴金荣-J-277-282******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 45, in crop_images
    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
  File "/home/zsxm/miniconda3/envs/pytorch/lib/python3.8/site-packages/PIL/Image.py", line 2912, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/吕正雄-J-286-293/images/吕正雄-J-286-293_0293.png'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 45, in crop_images
    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
  File "/home/zsxm/miniconda3/envs/pytorch/lib/python3.8/site-packages/PIL/Image.py", line 2912, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/吕正雄-J-286-293/images/吕正雄-J-286-293_0294.png'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69e

******Processing 周朝进-J-264-288******
******Processing 周舍福-J-74-82******
******Processing 唐秋平-J-535-542-J-550-566******
******Processing 徐行福-J-387-396******
******Processing 戴照圣-J-116-255******
******Processing 戴行铨-J-275-309******
******Processing 曹建花-J-284-290******
******Processing 曹生玉-J-54-237******
******Processing 朱惠贤-J-85-98******
******Processing 李佩芝-J-101-216******
******Processing 李永琪-J-504-513******
******Processing 杜广胜-J-186-370******
******Processing 武汉卿-J-314-324******
******Processing 潘根朝-J-114-324******
******Processing 童利君-J-21-24******
******Processing 茹志莲-J-78-253******


Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 45, in crop_images
    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
  File "/home/zsxm/miniconda3/envs/pytorch/lib/python3.8/site-packages/PIL/Image.py", line 2912, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/茹志莲-J-78-253/images/茹志莲-J-78-253_0253.png'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ffe>", line 45, in crop_images
    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
  File "/home/zsxm/miniconda3/envs/pytorch/lib/python3.8/site-packages/PIL/Image.py", line 2912, in open
    fp = builtins.open(filename, "rb")
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau2/茹志莲-J-78-253/images/茹志莲-J-78-253_0254.png'
Traceback (most recent call last):
  File "<ipython-input-21-e91b69ea8ff

******Processing 谢必廉-J-68-76******
******Processing 谢文龙-J-72-79******
******Processing 金有才-J-93-110******
******Processing 钟清廉-J-247-252******
******Processing 陈胜平-J-60-262******


In [22]:
print(len(epl1))
print(epl1)
print(len(epl2))
print(epl2)
print(len(epl3))
print(epl3)

4
['俞沧子-S-82-89-J-82-258', '傅阿乔-S-69-115-J-96-257', '叶超涵-S-97-103-J-97-323', '姜浩芳-S-63-118-J-63-288']
1
['孙业武-S-100-164-J-100-306']
4
['亓富兰-J-14-78', '刘世杰-J-212-270', '吕正雄-J-286-293', '茹志莲-J-78-253']


## 2.阴性数据

In [None]:
# 将dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '1':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))

            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

generate_image('/nfs3-p2/zsxm/dataset/2021-9-17-negative/')
generate_image('/nfs3-p2/zsxm/dataset/2021-9-29-negative/')

In [None]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测
def move_together_for_detect(input_folder, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))

        try:
            shutil.copytree(os.path.join(input_folder, patient, '1', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-9-17-negative', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-9-29-negative', '/nfs3-p1/zsxm/dataset/9_detect/')

In [None]:
shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-17-negative', '/nfs3-p1/zsxm/dataset/9_detect/pred')
shutil.move('/disk1/zsxm/pythonWorkspace/yolov5/runs/detect/2021-9-29-negative', '/nfs3-p1/zsxm/dataset/9_detect/pred')

In [None]:
# 用在更换窗宽窗位的时候，因为已经经过yolov5的检测生成了标签，故直接移动图片到images文件夹下即可
def directly_move_to_image_label(input_folder, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        image_path = os.path.join(dst_path, name, 'images')
        if os.path.exists(image_path):
            print(f"\tremove {image_path}")
            shutil.rmtree(image_path)

        try:
            shutil.copytree(os.path.join(input_folder, patient, '1', f'images_{lower_b}_{upper_b}'), image_path)
        except:
            traceback.print_exc()
            
directly_move_to_image_label('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative')

In [None]:
# 将移动到待测文件夹中的原图移动到images文件夹
def move_ori_image_to_sigle_folder(input_path, dst_path):
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    for patient in os.listdir(input_path):
        print(f'****Processing {patient}****')
        if not os.path.exists(os.path.join(dst_path, patient)):
            os.mkdir(os.path.join(dst_path, patient))
        image_path = os.path.join(dst_path, patient, 'images')
        if os.path.exists(image_path):
            shutil.rmtree(image_path)
        os.mkdir(image_path)
        for img in os.listdir(os.path.join(input_path, patient)):
            if os.path.isfile(os.path.join(input_path, patient, img)):
                shutil.move(os.path.join(input_path, patient, img), os.path.join(image_path, img))
        os.rmdir(os.path.join(input_path, patient))
    os.rmdir(input_path)
    
move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-17-negative/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative')
move_ori_image_to_sigle_folder('/nfs3-p1/zsxm/dataset/9_detect/2021-9-29-negative/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative')

In [None]:
# 将yolov5的检测结果（标签和带bounding box的图像）移动到相应文件夹
def move_detect_result_to_folder(input_path, output_path):
    for patient in os.listdir(input_path):
        print(f'Processing {patient}')
        image_path = os.path.join(output_path, patient, 'pred_images')
        if os.path.exists(image_path):
            shutil.rmtree(image_path)
        os.mkdir(image_path)
        for file in os.listdir(os.path.join(input_path, patient)):
            if os.path.isfile(os.path.join(input_path, patient, file)):
                shutil.move(os.path.join(input_path, patient, file), os.path.join(image_path, file))
            elif os.path.isdir(os.path.join(input_path, patient, file)):
                if os.path.exists(os.path.join(output_path, patient, file)):
                    shutil.rmtree(os.path.join(output_path, patient, file))
                shutil.move(os.path.join(input_path, patient, file), os.path.join(output_path, patient, file))
        os.rmdir(os.path.join(input_path, patient))
    os.rmdir(input_path)
                
move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-17-negative/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative')
move_detect_result_to_folder('/nfs3-p1/zsxm/dataset/9_detect/pred/2021-9-29-negative/', '/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative')

In [None]:
# 切出主动脉,这里有问题啊，branch_end前后0.3的切片切不出来，建议以后更改为和下面一样的方案
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.2 < corr2[2] < 0.8, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    for patient in sorted(os.listdir(input_path)):
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, 'images')
        label_path = os.path.join(input_path, patient, 'labels')
        crop_path = os.path.join(input_path, patient, 'crops')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        pl = patient.split('-')
        assert len(pl) == 5
        aorta_start, branch_start, branch_end, aorta_end = int(pl[1])-1, int(pl[2])-1, int(pl[3])-1, int(pl[4])-1
        crop_flag = True
        offset = branch_end - branch_start
        start, end = branch_start + int(0.1*offset), branch_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}.png'))
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png'))
        offset = aorta_end - branch_end
        start, end = branch_end + int(0.1*offset), aorta_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png')) 
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)
            
epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative', epl1)

In [None]:
print(len(epl1))
print(epl1)

In [None]:
# # 将切出的主动脉移动到一起
# def move_to_classify(input_root, output_path):
#     if not os.path.exists(output_path):
#         os.makedirs(output_path)
    
#     for patient in os.listdir(input_root):
#         if os.path.isfile(os.path.join(input_root, patient)):
#             continue
#         print(f'****Processing {patient}****')
#         for file in os.listdir(os.path.join(input_root, patient, 'crops')):
#             shutil.copy(os.path.join(input_root, patient, 'crops', file), output_path)
            
# move_to_classify('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative/', f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/0/')

In [None]:
# 切出范围外冗余为3的主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.2 < corr2[2] < 0.8, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    for patient in sorted(os.listdir(input_path)):
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, 'images')
        label_path = os.path.join(input_path, patient, 'labels')
        crop_path = os.path.join(input_path, patient, 'crops')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        pl = patient.split('-')
        assert len(pl) == 5
        aorta_start, branch_start, branch_end, aorta_end = int(pl[1])-1, int(pl[2])-1, int(pl[3])-1, int(pl[4])-1
        crop_flag = True
        offset = branch_end - branch_start
        start, end = branch_start + int(0.1*offset), branch_end - int(0.2*offset)
        for i in range(start-3, end+3):
            try:
                img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
                img = np.array(img)
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                if start <= i < end:
                    crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}.png'))
                else:
                    crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}_n.png'))
        offset = aorta_end - branch_start
        start, end = branch_start + int(0.05*offset), aorta_end - int(0.1*offset)
        for i in range(start-3, end+3):
            try:
                img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
                img = np.array(img)
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                if start <= i < end:
                    crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png'))
                else:
                    crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}_n.png'))
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)
            
epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative', epl1)

In [None]:
print(len(epl1))
print(epl1)

## 3.固定训练集和测试集

### 3.1 阳性数据

In [None]:
patient_roots = ['/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau']
patient_list = []
for patient_root in patient_roots:
    for patient in sorted(os.listdir(patient_root)):
        if os.path.isdir(os.path.join(patient_root, patient)):
            #patient_list.append(os.path.join(patient_root, patient))
            patient_list.append(patient)

In [None]:
random.seed(7987)
val_patient_list = random.sample(patient_list, int(len(patient_list)*0.2))
train_patient_list = [p for p in patient_list if p not in val_patient_list]#list(set(patient_list) - set(val_patient_list))
train_patient_list.sort()
val_patient_list.sort()
print(len(train_patient_list), len(val_patient_list), len(train_patient_list)+len(val_patient_list))

In [None]:
ori_train_list = set()
ori_val_list = set()
for img in os.listdir('/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/train/3'):
    patient = img.split('_')[0]
    ori_train_list.add(patient)
for img in os.listdir('/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/val/3'):
    patient = img.split('_')[0]
    ori_val_list.add(patient)
ori_train_list = sorted(list(ori_train_list))
ori_val_list = sorted(list(ori_val_list))
print(len(ori_train_list), len(ori_val_list), len(ori_train_list)+len(ori_val_list), len(patient_list))

In [None]:
train_patient_list = [os.path.join('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau', p) for p in ori_train_list]
val_patient_list = [os.path.join('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/pau', p) for p in ori_val_list]

In [None]:
def move_to_classify(patient_list, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    for patient in patient_list:
        print(f'****Processing {patient}****')
        for file in os.listdir(os.path.join(patient, 'crops')):
            shutil.copy(os.path.join(patient, 'crops', file), output_path)
            
move_to_classify(train_patient_list, f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/wrong_basic_img/train/3/')
move_to_classify(val_patient_list, f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/wrong_basic_img/val/3/')

### 3.2阴性数据

In [None]:
patient_roots = ['/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative/']
patient_list = []
for patient_root in patient_roots:
    for patient in sorted(os.listdir(patient_root)):
        if os.path.isdir(os.path.join(patient_root, patient)):
            #patient_list.append(os.path.join(patient_root, patient))
            patient_list.append(patient)

In [None]:
random.seed(7888)
val_patient_list = random.sample(patient_list, int(len(patient_list)*0.2))
train_patient_list = [p for p in patient_list if p not in val_patient_list]#list(set(patient_list) - set(val_patient_list))
train_patient_list.sort()
val_patient_list.sort()
print(len(train_patient_list), len(val_patient_list), len(train_patient_list)+len(val_patient_list))

In [None]:
ori_train_list = set()
ori_val_list = set()
for img in os.listdir('/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/train/0'):
    patient = img.split('_')[0]
    ori_train_list.add(patient)
for img in os.listdir('/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/val/0'):
    patient = img.split('_')[0]
    ori_val_list.add(patient)
ori_train_list = sorted(list(ori_train_list))
ori_val_list = sorted(list(ori_val_list))
print(len(ori_train_list), len(ori_val_list), len(ori_train_list)+len(ori_val_list), len(patient_list))

In [None]:
train_patient_list = [os.path.join('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative', p) for p in ori_train_list]
val_patient_list = [os.path.join('/nfs3-p1/zsxm/dataset/aorta_ct_img_label/negative', p) for p in ori_val_list]

In [None]:
def move_to_classify(patient_list, output_path):
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    
    for patient in patient_list:
        print(f'****Processing {patient}****')
        for file in os.listdir(os.path.join(patient, 'crops')):
            shutil.copy(os.path.join(patient, 'crops', file), output_path)
            
move_to_classify(train_patient_list, f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/wrong_basic_img/train/0/')
move_to_classify(val_patient_list, f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/wrong_basic_img/val/0/')

## 4.检查并统计crop图像尺寸

In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt
from numpy.lib.function_base import median

paths = [f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/0/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/val/0/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/1/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/val/1/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/2/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/val/2/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/train/3/',
         f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/center/val/3/',
        ]

size_list = []

count = 0
for path in paths:
    path_count = 0
    for f in os.listdir(path):
        path_count += 1
        img = Image.open(os.path.join(path, f))
        size_list.append(img.height)
        if img.height != img.width:
            print(f'{os.path.join(path, f)}: height:{img.height}, width:{img.width}')
            count += 1
    print(f'{path}: {path_count}')

size_list.sort()

sum = 0
hash_list = {}
for size in size_list:
    sum += size
    hash_list[size] = hash_list.get(size, 0) + 1

avg = sum / len(size_list)

key = len(size_list) // 2
med = (size_list[key] + size_list[-key-1]) / 2

v_list = list(hash_list.items())
v_list.sort(key=lambda x: x[1], reverse=True)
mode = v_list[0][0]

print(f'avg:{avg}, median:{med}, mode:{mode}({v_list[0][1]}), h_w_not_equal:{count}')

In [None]:
interval = 3
bin_num = (max(size_list)-min(size_list)) // interval
bins = [i for i in range(min(size_list), max(size_list)+interval, interval)]
fig = plt.figure(figsize=(30, 8), dpi=160)
plt.hist(size_list, bins=bins)
plt.xticks(ticks=bins)
plt.show()

In [None]:
print(max(size_list))

In [None]:
def delete_wrong(ori_path, new_path):
    ori_set = set(os.listdir(ori_path))
    new_set = set([img for img in os.listdir(new_path) if len(img.split('_')) == 3])
    print(len(ori_set), len(new_set))
    print(((ori_set-new_set)))
    #print('**********\n**********\n***********\n*******\n************\n*************\n***********\n********')
    print((len(new_set-ori_set)))
    for img in new_set-ori_set:
        shutil.move(os.path.join(new_path, img), '/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/wrong_basic_img/backup/val/0')
    
delete_wrong(f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/val/0/', f'/nfs3-p1/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/wrong_basic_img/val/0/')

In [None]:
backup_path = '/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/wrong_basic_img/backup'
center_path = '/nfs3-p1/zsxm/dataset/aorta_classify_ct_-100_500/center'
for dataset in ['train', 'val']:
    for cat in range(4):
        print(f'*****{dataset}/{cat}*****')
        backup_set = set(os.listdir(os.path.join(backup_path, dataset, str(cat))))
        for img in os.listdir(os.path.join(center_path, dataset, str(cat))):
            if img in backup_set:
                print(os.path.join(center_path, dataset, str(cat), img))
                #os.remove(os.path.join(center_path, dataset, str(cat), img))