In [8]:
from math import sqrt
import copy
import  traceback
import shutil
import random

import numpy as np  # linear algebra
import pydicom
import os
import matplotlib.pyplot as plt
import cv2
from pydicom.uid import UID
from PIL import Image
from tqdm import tqdm
import openpyxl

In [2]:
def load_scan(path):
    slices = [] #slices = [pydicom.dcmread(path + '/' + s) for s in filter(lambda x: x.endswith('.dcm'), os.listdir(path))]
    for s in os.listdir(path):
        if os.path.isdir(os.path.join(path, s)): #if not s.endswith('.dcm'):
            continue
        sl = pydicom.dcmread(os.path.join(path, s))
        try:
            sl_p = sl.pixel_array
        except AttributeError:
            traceback.print_exc()
            print(f'\tDelete {os.path.join(path, s)}')
            os.remove(os.path.join(path, s))
        else:
            slices.append(sl)
    slices.sort(key=lambda x: float(x.InstanceNumber))
    return slices

In [3]:
window_width, window_level = 600, 200
lower_b, upper_b = window_level - window_width//2, window_level + window_width//2
print(lower_b, upper_b)

-100 500


## 1.阴性数据

In [4]:
# 打印哪个病例没有2
def print_no_cta(input_dir):
    for patient in os.listdir(input_dir):
        patient_path = os.path.join(input_dir, patient)
        if os.path.isfile(patient_path): continue
        if '2' not in os.listdir(patient_path): print(patient_path)
            
print_no_cta('/nfs3-p2/zsxm/dataset/2021-9-17-negative')

/nfs3-p2/zsxm/dataset/2021-9-17-negative/wanghaifang-40-47-83-160


In [10]:
# 将2下的dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '2':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))
            print_flag = False
            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                if img.height != img.width:
                    if not print_flag:
                        print(patient, 'height not equal to width\n')
                        print_flag = True
                    height = width = min(img.height, img.width)
                    if img.height != height:
                        start = (img.height - height) / 2
                        img = img.crop((0, start, img.width, start + height))
                    elif img.width != width:
                        start = (img.width - width) / 2
                        img = img.crop((start, 0, start + height, img.height))
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

generate_image('/nfs3-p1/zsxm/dataset/2021-9-17-negative/')

****Processing baixiaoxu-22-31-52-147****
****Processing baojinjia-17-21-35-83****
****Processing baoyanzu-21-25-42-93****
****Processing bianyinqiao-31-38-59-137****
****Processing bianzhaorong-17-22-38-92****
****Processing caijingnan1-41-50-73-184****
****Processing caiqishu-34-43-70-176****
****Processing caiyouzhe-14-18-32-83****
****Processing caoyuxia-16-21-35-87****
****Processing chenfugui-36-46-76-149****
****Processing chengzhiqiang-33-41-67-159****
****Processing chenjian-26-30-44-89****
****Processing chenjianhua-14-17-32-80****
****Processing chenjianjun-25-34-65-175****
****Processing chenjianping-15-22-41-91****
****Processing chenjingjing-38-47-67-184****
****Processing chenjinmei-12-17-34-80****
****Processing chenpanyang-21-31-71-161****
****Processing chenqiuying-13-17-30-78****
****Processing chensaimu-34-44-78-166****
****Processing chenxiufen-17-21-34-82****
****Processing chenyanli-20-23-34-80****
****Processing chenyinfen-18-22-38-81****
****Processing chesheng

In [11]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测
def move_together_for_detect(input_folder, dst_path):
    workbook_path = os.path.join(input_folder, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                break
        else:
            raise Exception(f'cant find {patient.split("-")[0]} in label.xlsx')
        if flag: continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))

        try:
            shutil.copytree(os.path.join(input_folder, patient, '2', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', '/nfs3-p1/zsxm/dataset/9_detect/')

****Processing baixiaoxu-22-31-52-147****
****Processing baojinjia-17-21-35-83****
****Processing baoyanzu-21-25-42-93****
****Processing bianyinqiao-31-38-59-137****
****Processing bianzhaorong-17-22-38-92****
****Processing caijingnan1-41-50-73-184****
****Processing caiyouzhe-14-18-32-83****
****Processing caoyuxia-16-21-35-87****
****Processing chenfugui-36-46-76-149****
****Processing chengzhiqiang-33-41-67-159****
****Processing chenjian-26-30-44-89****
****Processing chenjianhua-14-17-32-80****
****Processing chenjianjun-25-34-65-175****
****Processing chenjianping-15-22-41-91****
****Processing chenjingjing-38-47-67-184****
****Processing chenjinmei-12-17-34-80****
****Processing chenpanyang-21-31-71-161****
****Processing chenqiuying-13-17-30-78****
****Processing chensaimu-34-44-78-166****
****Processing chenxiufen-17-21-34-82****
****Processing chenyanli-20-23-34-80****
****Processing chenyinfen-18-22-38-81****
****Processing cheshengying-25-34-64-139****
****Processing deng

In [13]:
#将检测结果移动回原文件夹内
def move_back(result_path, ori_path):
    for patient in sorted(os.listdir(result_path)):
        print(f'Processing {patient}')
        p_res_path = os.path.join(result_path, patient)
        o_res_path = os.path.join(ori_path, patient, '2', f'pred_images_{lower_b}_{upper_b}')
        if os.path.exists(o_res_path):
            shutil.rmtree(o_res_path)
        os.mkdir(o_res_path)
        for file in os.listdir(p_res_path):
            if os.path.isfile(os.path.join(p_res_path, file)):
                shutil.move(os.path.join(p_res_path, file), os.path.join(o_res_path, file))
            elif os.path.isdir(os.path.join(p_res_path, file)):
                if os.path.exists(os.path.join(ori_path, patient, file)):
                    shutil.rmtree(os.path.join(ori_path, patient, file))
                shutil.move(os.path.join(p_res_path, file), os.path.join(ori_path, patient, '2', file))
        os.rmdir(p_res_path)
    os.rmdir(result_path)
                
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-17-negative', '/nfs3-p1/zsxm/dataset/2021-9-17-negative/')

Processing qianaqin-18-26-52-141
Processing huangweigen-24-32-62-138
Processing lizufu-21-26-42-93
Processing liyingsheng-13-17-34-87
Processing wangwei-16-20-36-87
Processing fangrurong-16-25-55-132
Processing wangbeibei-28-38-62-148
Processing fangchengfu-26-39-76-182
Processing luoshuijin-15-19-35-74
Processing caoyuxia-16-21-35-87
Processing luojun-22-29-51-139
Processing chenyanli-20-23-34-80
Processing jiangshengfang-19-23-34-79
Processing taoxiuwei-26-34-54-137
Processing jinangui-17-21-37-81
Processing shengmingzhen-10-13-26-75
Processing dongyonghong-14-19-34-85
Processing wangjianlin-15-21-37-82
Processing lishankui-17-25-56-142
Processing bianzhaorong-17-22-38-92
Processing bianyinqiao-31-38-59-137
Processing wangying-30-36-77-162
Processing chenjinmei-12-17-34-80
Processing wangfancun-28-39-72-175
Processing malingfeng-12-17-32-75
Processing hexiena-18-22-38-82
Processing wangyouxin-34-42-64-160
Processing chenyinfen-18-22-38-81
Processing heguansheng-15-20-43-92
Processing

In [21]:
# 切出主动脉,这里有问题啊，branch_end前后0.3的切片切不出来，建议以后更改为和下面一样的方案
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.2 < corr1[2] < 0.8, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.2 < corr2[2] < 0.8, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    ls = row[4].value.split('-')
                    assert len(ls) == 4, f'{patient} ls wrong'
                    aorta_start, branch_start = int(ls[0])-1, int(ls[1])-1
                    branch_end, aorta_end = int(ls[2])-1, int(ls[3])-1
                    lsct = row[3].value.split('-')
                    assert len(lsct) == 4, f'{patient} lsct wrong'
                    ct_start, ct_end = int(lsct[0])-1, int(lsct[3])-1
                break
        if flag: continue
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        m, n = ct_end - ct_start, aorta_end - aorta_start
        ot, q = [], 0
        for p in range(m):
            min_dis = abs(q/n-p/m)
            while q < n:
                q += 1
                if abs(q/n-p/m) < min_dis:
                    min_dis = abs(q/n-p/m)
                else:
                    q -= 1
                    ot.append(q)
                    break
        assert len(ot) == m, f'{patient} ot wrong'
        len_ct = len(os.listdir(os.path.join(input_path, patient, '1', f'images_{lower_b}_{upper_b}')))
        len_cta = len(os.listdir(image_path))
        cta_ct_table = [None] * len_cta
        for i in range(len_ct):
            idx = aorta_start + (i-ct_start)//m*n + ot[(i-ct_start)%m]
            if idx < 0 or idx >= len_cta: continue
            cta_ct_table[idx] = i
        
        crop_flag = True
        offset = branch_end - branch_start
        start, end = branch_start + int(0.1*offset), branch_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop_name = f'{patient}_s_{i:04d}.png' if cta_ct_table[i] is None else f'{patient}_s_{i:04d}_{cta_ct_table[i]:04d}.png'
                crop.save(os.path.join(crop_path, crop_name))
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop_name = f'{patient}_j_{i:04d}.png' if cta_ct_table[i] is None else f'{patient}_j_{i:04d}_{cta_ct_table[i]:04d}.png'
                crop.save(os.path.join(crop_path, crop_name))
        offset = aorta_end - branch_end
        start, end = branch_end + int(0.1*offset), aorta_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop_name = f'{patient}_j_{i:04d}.png' if cta_ct_table[i] is None else f'{patient}_j_{i:04d}_{cta_ct_table[i]:04d}.png'
                crop.save(os.path.join(crop_path, crop_name))
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)
            
epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', epl1)

******Processing baixiaoxu-22-31-52-147******
******Processing baojinjia-17-21-35-83******
******Processing baoyanzu-21-25-42-93******
******Processing bianyinqiao-31-38-59-137******
******Processing bianzhaorong-17-22-38-92******
******Processing caijingnan1-41-50-73-184******
******Processing caiyouzhe-14-18-32-83******
******Processing caoyuxia-16-21-35-87******
******Processing chenfugui-36-46-76-149******
******Processing chengzhiqiang-33-41-67-159******
******Processing chenjian-26-30-44-89******
******Processing chenjianhua-14-17-32-80******
******Processing chenjianjun-25-34-65-175******
******Processing chenjianping-15-22-41-91******
******Processing chenjingjing-38-47-67-184******
******Processing chenjinmei-12-17-34-80******
******Processing chenpanyang-21-31-71-161******
******Processing chenqiuying-13-17-30-78******
******Processing chensaimu-34-44-78-166******
******Processing chenxiufen-17-21-34-82******
******Processing chenyanli-20-23-34-80******
******Processing cheny

Traceback (most recent call last):
  File "<ipython-input-21-965ae67ecc6d>", line 83, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
  File "<ipython-input-21-965ae67ecc6d>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：qihuazhong-16-20-29-82_0021.txt


******Processing quzhuming-31-41-69-192******
******Processing shenfengmei-29-36-58-130******
******Processing shengmingzhen-10-13-26-75******
******Processing shenyan-16-23-57-125******
******Processing shenyunzhou-24-33-70-141******
******Processing shouyunfang-20-26-42-136******
******Processing taoxiuwei-26-34-54-137******


Traceback (most recent call last):
  File "<ipython-input-21-965ae67ecc6d>", line 108, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
  File "<ipython-input-21-965ae67ecc6d>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-9-17-negative/taoxiuwei-26-34-54-137/2/labels/taoxiuwei-26-34-54-137_0296.txt'
Traceback (most recent call last):
  File "<ipython-input-21-965ae67ecc6d>", line 108, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
  File "<ipython-input-21-965ae67ecc6d>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-9-17-negative/taoxiuwei-26-34-54-137/2/labels/taoxiuwei-26-34-54-137_0297.txt'


******Processing tuyurong-15-18-34-88******
******Processing wangahai-16-22-43-91******


Traceback (most recent call last):
  File "<ipython-input-21-965ae67ecc6d>", line 108, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
  File "<ipython-input-21-965ae67ecc6d>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-9-17-negative/wangahai-16-22-43-91/2/labels/wangahai-16-22-43-91_0722.txt'


******Processing wangbeibei-28-38-62-148******
******Processing wangfancun-28-39-72-175******
******Processing wangjianlin-15-21-37-82******
******Processing wangjie-15-19-31-49******
******Processing wangwei-16-20-36-87******
******Processing wangying-30-36-77-162******


Traceback (most recent call last):
  File "<ipython-input-21-965ae67ecc6d>", line 83, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
  File "<ipython-input-21-965ae67ecc6d>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：wangying-30-36-77-162_0041.txt


******Processing wangyouxiang-21-27-41-131******
******Processing wangyouxin-34-42-64-160******
******Processing wangyuncai-14-20-34-91******


In [22]:
print(len(epl1))
print(epl1)

4
['qihuazhong-16-20-29-82', 'taoxiuwei-26-34-54-137', 'wangahai-16-22-43-91', 'wangying-30-36-77-162']


## 2.疾病数据

In [47]:
# 打印哪个病例没有2
def print_no_cta(input_dir):
    no_cta_list = []
    for patient in sorted(os.listdir(input_dir)):
        patient_path = os.path.join(input_dir, patient)
        if os.path.isfile(patient_path): continue
        if '2' not in os.listdir(patient_path):
            no_cta_list.append(patient_path)
            print(patient_path, os.listdir(patient_path))
    return no_cta_list
            
no_cta_list = print_no_cta('/nfs3-p2/zsxm/dataset/2021-10-19-pau')

In [36]:
#将某个scan重命名为2，如果thickness距离1的thickness相同则选择thickness小的重命名
for patient in no_cta_list:
    scans = os.listdir(patient)
    if '1' not in scans:
        print(patient, 'not have 1')
        continue
    if len(scans) == 2:
        for scan in scans:
            if scan != '1':
                os.rename(os.path.join(patient, scan), os.path.join(patient, '2'))
    else:
        tk_list = []
        for scan in scans:
            for s in os.listdir(os.path.join(patient, scan)):
                if os.path.isdir(os.path.join(patient, scan, s)) or not s.endswith('.dcm'):
                    continue
                sl = pydicom.dcmread(os.path.join(patient, scan, s))
                try:
                    sl_p = sl.pixel_array
                except AttributeError:
                    continue
                else:
                    if scan == '1':
                        ct_thickness = sl.SliceThickness
                    else:
                        tk_list.append((sl.SliceThickness, scan))
        min_dis, min_scan, min_tk = 10000, None, 10000
        for tk, scan in tk_list:
            dis = abs(tk-ct_thickness)
            if dis < min_dis or (dis == min_dis and tk < min_tk):
                min_dis, min_scan, min_tk = dis, scan, tk
        print(patient, min_scan)
        os.rename(os.path.join(patient, min_scan), os.path.join(patient, '2'))

/nfs3-p1/zsxm/dataset/2021-9-13/chenmimao-S-Im32-40 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/lidaoheng-S-Im22-36-J-Im22-92 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/lijianming-J-Im32-124 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/miqiurao-S-Im22-34--J-Im22-85 2-5mm
/nfs3-p1/zsxm/dataset/2021-9-13/panhejian-J-Im39-57 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/shenyangfeng-J-Im36-120 2-2mm
/nfs3-p1/zsxm/dataset/2021-9-13/wangxinchun-J-Im18-81 2-5mm
/nfs3-p1/zsxm/dataset/2021-9-13/wujufen-J-Im31-135 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/wuyueming-J-Im36-138 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/yangyulin-J-Im16-58 2-5mm
/nfs3-p1/zsxm/dataset/2021-9-13/yaojianmin-S-Im20-29-J-Im20-86 2-5mm
/nfs3-p1/zsxm/dataset/2021-9-13/yinyuanyuan-J-Im20-78 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/yujinfang-J-Im31-106 2-3mm
/nfs3-p1/zsxm/dataset/2021-9-13/yuyunguo-J-Im12-15 2-5mm
/nfs3-p1/zsxm/dataset/2021-9-13/zhanghaitao-S-Im25-49-J-Im25-145 2-3.5mm
/nfs3-p1/zsxm/dataset/2021-9-13/zhanglimin-J-Im40-149 2-2.5mm
/nfs3-p1/zsx

In [50]:
# 将2下的dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    workbook_path = os.path.join(input_folder, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                break
        else:
            #raise Exception(f'cant find {patient.split("-")[0]} in label.xlsx')
            print(f'cant find {patient.split("-")[0]} in label.xlsx')
        if flag: continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '2':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))
            print_flag = False
            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                if img.height != img.width:
                    if not print_flag:
                        print(patient, 'height not equal to width\n')
                        print_flag = True
                    height = width = min(img.height, img.width)
                    if img.height != height:
                        start = (img.height - height) / 2
                        img = img.crop((0, start, img.width, start + height))
                    elif img.width != width:
                        start = (img.width - width) / 2
                        img = img.crop((start, 0, start + height, img.height))
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-pau')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-imh')

----------------------------------------------------------------------------
****Processing yanyueying-J-30-98****


In [51]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测
def move_together_for_detect(input_folder, dst_path):
    workbook_path = os.path.join(input_folder, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                break
        else:
            raise Exception(f'cant find {patient.split("-")[0]} in label.xlsx')
        if flag: continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))

        try:
            shutil.copytree(os.path.join(input_folder, patient, '2', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-8/', '/nfs3-p1/zsxm/dataset/9_detect/')
# move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-13/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-10-19-pau/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', '/nfs3-p1/zsxm/dataset/9_detect/')

****Processing yanyueying-J-30-98****


In [52]:
#将检测结果移动回原文件夹内
def move_back(result_path, ori_path):
    for patient in sorted(os.listdir(result_path)):
        print(f'Processing {patient}')
        p_res_path = os.path.join(result_path, patient)
        o_res_path = os.path.join(ori_path, patient, '2', f'pred_images_{lower_b}_{upper_b}')
        if os.path.exists(o_res_path):
            shutil.rmtree(o_res_path)
        os.mkdir(o_res_path)
        for file in os.listdir(p_res_path):
            if os.path.isfile(os.path.join(p_res_path, file)):
                shutil.move(os.path.join(p_res_path, file), os.path.join(o_res_path, file))
            elif os.path.isdir(os.path.join(p_res_path, file)):
                if os.path.exists(os.path.join(ori_path, patient, file)):
                    shutil.rmtree(os.path.join(ori_path, patient, file))
                shutil.move(os.path.join(p_res_path, file), os.path.join(ori_path, patient, '2', file))
        os.rmdir(p_res_path)
    os.rmdir(result_path)
                
# move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-8', '/nfs3-p1/zsxm/dataset/2021-9-8/')
# move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-13', '/nfs3-p2/zsxm/dataset/2021-9-13/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-10-19-pau', '/nfs3-p1/zsxm/dataset/2021-10-19-pau/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-10-19-imh', '/nfs3-p2/zsxm/dataset/2021-10-19-imh/')

Processing bairuixin-J-29-46
Processing baochangmu-J-18-77
Processing baodezhong-J-41-44
Processing chenbingrong-J-61-96
Processing chenfuding-J-29-37
Processing chenmazhang-J-18-22
Processing chenmingbao-J-38-96
Processing chenrongqin-J-32-100
Processing chensimeng-J-31-33
Processing dongyoufa-J-48-87
Processing fanggaoshen-J-82-87
Processing fangpingan-J-29-34
Processing feiliangming-J-23-41
Processing gaohuarong-J-51-109
Processing gaozhihua-J-82-94
Processing gebingzhao-J-38-40
Processing guxin-J-80-88
Processing heqiang-J-36-45
Processing heshimo-J-44-46
Processing heshufang-J-28-30
Processing huajinyuan-J-58-82
Processing huanggentian-J-121-132
Processing huangyuanzhong-J-24-103
Processing huboan-J-33-131
Processing huxuegen-S-37-37-J-60-74
Processing jiangxianghong-J-132-146
Processing jinguozhong-J-37-116
Processing lifurong-J-54-62
Processing linjinmu-J-36-120
Processing linxiulian-S-17-18-J-17-17
Processing linyunfu-S-27-29
Processing liushurong-J-72-78
Processing liuyonglian

In [60]:
# 切出主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.15 < y < 0.85, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.15 < corr1[2] < 0.85, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.15 < corr2[2] < 0.85, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    pl = row[4].value.lower().split('-')
                    plct = row[3].value.lower().split('-')
                    assert len(pl) == len(plct), f'{input_path}:{patient}, {len(pl)}, {len(plct)}'
                break
        if flag: continue
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        ct_lbs, cta_lbs = [], []
        for i in range(len(pl)):
            if pl[i] != 's' and  pl[i] != 'j':
                cta_lbs.append(int(pl[i])-1)
                ct_lbs.append(int(plct[i])-1)
        ct_start, ct_end, cta_start, cta_end = min(ct_lbs), max(ct_lbs), min(cta_lbs), max(cta_lbs)
        m, n = ct_end - ct_start, cta_end - cta_start
        ot, q = [], 0
        for p in range(m):
            min_dis = abs(q/n-p/m)
            while q < n:
                q += 1
                if abs(q/n-p/m) < min_dis:
                    min_dis = abs(q/n-p/m)
                else:
                    q -= 1
                    ot.append(q)
                    break
        assert len(ot) == m, f'{patient} ot wrong'
        len_ct = len(os.listdir(os.path.join(input_path, patient, '1', f'images_{lower_b}_{upper_b}')))
        len_cta = len(os.listdir(image_path))
        cta_ct_table = [None] * len_cta
        for i in range(len_ct):
            idx = cta_start + (i-ct_start)//m*n + ot[(i-ct_start)%m]
            if idx < 0 or idx >= len_cta: continue
            cta_ct_table[idx] = i
        
        crop_flag = True
        for i, s in enumerate(pl):
            if s != 's' and s != 'j':
                continue
            start, end = int(pl[i+1])-1, int(pl[i+2])
            for j in range(start, end):
                img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
                img = np.array(img)
                try:
                    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
                except:
                    traceback.print_exc()
                    crop_flag = False
                else:#if crop_flag:
                    crop = img[y1:y2, x1:x2]
                    crop = Image.fromarray(crop)
                    crop_name = f'{patient}_{s}_{j:04d}.png' if cta_ct_table[j] is None else f'{patient}_{s}_{j:04d}_{cta_ct_table[j]:04d}.png'
                    crop.save(os.path.join(crop_path, crop_name))
        if not crop_flag:
            #print('Delete crop_path')
            #shutil.rmtree(crop_path)
            error_patient_list.append(patient)

epl1 = []

crop_images('/nfs3-p1/zsxm/dataset/2021-9-8/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-13/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-10-19-pau/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', epl1)

******Processing chenaijun-S-Im25-38-J-Im25-85******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：chenaijun-S-Im25-38-J-Im25-85_0059.txt


******Processing chenazhao-S-Im17-30******
******Processing chengshizheng-J-Im30-145******
******Processing chenjuli-J-Im18-49******
******Processing chenlibo-S-Im39-57******
******Processing chenmin-J-Im26-67******
******Processing chenqiusheng-J-Im34-143******
******Processing chujiancheng-J-Im45-122******
******Processing dingzhiqun-S-Im21-29******
******Processing fangtianming-S-Im15-30-J-Im15-65******
******Processing gaosheng-J-Im19-67******
******Processing gaoxiangqin-J-Im24-75******
******Processing guanxujun-S-Im25-35-J-Im25-87******
******Processing guoheying-J-Im32-127******
******Processing hanchun-J-Im20-86******
******Processing heguoyu-J-Im36-141******
******Processing hexingwen-S-Im25-35-J-Im25-93******
******Processing hongjinjun-S-Im24-28-J-Im24-83******
******Processing hongzhimin-S-Im30-39-J-30-144******
******Processing huajin-J-Im30-67******
******Processing huangqiang-J-Im42-147******
******Processing huangxiansheng-J-Im25-59******
******Processing huangxihong-J

Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：qiuyuanxing-S-Im30-32_0029.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：qumin-S-Im22-35-J-Im22-28_0020.txt


******Processing shaoyefeng-S-Im34-43-J-Im34-48******
******Processing shengwenping-S-Im37-46-J-Im37-116******
******Processing shenjie-S-Im39-59-J-Im39-85******
******Processing shenqi-J-Im24-72******
******Processing shizhongheng-J-Im23-50******
******Processing tangabiao-S-Im41-49-J-Im41-138******
******Processing tangjinlong-J-Im25-41******
******Processing wanglibo-J-Im29-102******
******Processing wanglinsen-J-Im38-121******
******Processing wangqingdong-J-Im19-88******
******Processing wangyuefeng-S-Im34-38-J-Im34-144******
******Processing wangziyang-J-Im42-95******
******Processing weijuyun-S-Im24-32******
******Processing wutonggen-S-Im22-29-J-Im22-82******
******Processing xixiaoguo-S-Im16-31-J-Im16-43******
******Processing xuguochun-J-Im43-60******
******Processing xushichao-J-Im35-145******
******Processing xuyuping-S-Im21-33-J-Im21-69******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：xuyuping-S-Im21-33-J-Im21-69_0018.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：xuyuping-S-Im21-33-J-Im21-69_0019.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_

******Processing yangen-J-Im61-183******
******Processing yangxiulian-S-Im20-26-J-Im20-44******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：yangxiulian-S-Im20-26-J-Im20-44_0017.txt


******Processing yangyuexian-S-Im38-63******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：yangyuexian-S-Im38-63_0150.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：yangyuexian-S-Im38-63_0151.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1,

******Processing yangzhicheng-J-Im19-82******
******Processing yanjuanfeng-J-Im30-32******
******Processing yaozaisheng-J-Im16-28******
******Processing yesanghua-J-Im41-138******
******Processing yexinxiang-S-Im32-38-J-Im32-102******
******Processing yexiyou-J-Im35-139******
******Processing yingjianquan-S-Im12-14-J-Im12-36******
******Processing yujiazhen-S-Im19-27-J-Im19-77******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：yujiazhen-S-Im19-27-J-Im19-77_0021.txt


******Processing zhanfangai-S-Im17-23-J-Im17-32******
******Processing zhangfuyang-S-Im15-20-J-Im15-87******
******Processing zhanghang-S-Im16-20-J-Im16-41******
******Processing zhangweiyan-J-Im40-57******
******Processing zhangyunqin-S-Im14-25-J-Im14-64******
******Processing zhaozanping-J-Im23-90******
******Processing zhaozhengxing-J-Im35-99******
******Processing zhongweiliang-J-Im66-170******
******Processing zhongxuefang-S-Im19-34-J-Im19-72******
******Processing zhoubozhong-J-Im29-131******
******Processing zhoufeng-S-Im35-40-J-Im35-147******
******Processing zhouliang-J-Im47-123******
******Processing zhoumingfang-J-Im36-41******
******Processing zhuchunguang-J-Im21-86******
******Processing zhuhancheng-S-Im27-36-J-Im27-86******
******Processing zongminghui-S-Im18-25-J-Im18-81******
******Processing caiweiguang-J-Im35-152******
******Processing chenbo-S-Im24-45-J-Im24-97******
******Processing chenfujun-J-Im43-100******
******Processing chenggang-J-Im18-81******
******Processi

Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-9-13/guquankang-J-Im18-69/2/labels/guquankang-J-Im18-69_0108.txt'
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-9-13/guquankang-J-Im18-69/2/labels/guquankang-J-Im18-69_0109.txt'
Traceback (most recent call last):
  File "<ipython-input-60-755679f

******Processing bairuixin-J-29-46******
******Processing baochangmu-J-18-77******
******Processing baodezhong-J-41-44******
******Processing chenbingrong-J-61-96******
******Processing chenfuding-J-29-37******
******Processing chenmazhang-J-18-22******
******Processing chenmingbao-J-38-96******
******Processing chenrongqin-J-32-100******
******Processing chensimeng-J-31-33******
******Processing dongyoufa-J-48-87******
******Processing fanggaoshen-J-82-87******
******Processing fangpingan-J-29-34******
******Processing feiliangming-J-23-41******
******Processing gaohuarong-J-51-109******
******Processing gaozhihua-J-82-94******
******Processing gebingzhao-J-38-40******
******Processing guxin-J-80-88******
******Processing heqiang-J-36-45******
******Processing heshimo-J-44-46******
******Processing heshufang-J-28-30******
******Processing huajinyuan-J-58-82******
******Processing huanggentian-J-121-132******
******Processing huangyuanzhong-J-24-103******
******Processing huboan-J-33-1

Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：heazi-S-16-30_0026.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：heazi-S-16-30_0027.txt


******Processing hongshunchang-J-26-89******
******Processing huangbaichao-J-31-105******
******Processing hucailu-J-32-55******
******Processing huyaxiang-J-40-89******
******Processing huyingfu-S-31-39-J-31-105******
******Processing jiangbihua-S-21-30-J-21-55******
******Processing jiangjianping-J-17-63******
******Processing jiangqianfeng-J-35-98******
******Processing jiatiandeng-S-41-55-J-41-142******
******Processing jinguoqing-S-15-24-J-15-76******
******Processing kebaoming-S-29-40******
******Processing laidaishan-J-24-47******
******Processing lichengqian-J-35-100******
******Processing liguihua-S-22-24******
******Processing limingyu-J-27-55******
******Processing lindepei-J-136-158******
******Processing lingsulan-J-46-107******
******Processing linjian-J-34-105******
******Processing lisanliang-J-37-101******
******Processing lisong-J-39-158******
******Processing luae-J-30-83******
******Processing lujikang-S-17-31******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：lujikang-S-17-31_0031.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：lujikang-S-17-31_0032.txt


******Processing luojincha-S-12-24-J-12-44******
******Processing miaoyihua-J-17-28******
******Processing nimeihua-S-27-43-J-27-64******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：nimeihua-S-27-43-J-27-64_0076.txt


******Processing ouyangyuedong-S-23-28-J-23-58******
******Processing panchenglei-S-29-36-J-29-78******
******Processing panxiaohua-S-23-30-J-23-58******
******Processing pengxuechuan-S-34-49-J-34-110******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：pengxuechuan-S-34-49-J-34-110_0100.txt


******Processing qianxiaofu-J-13-68******
******Processing qichunhong-J-34-37******
******Processing qiuhongren-J-41-68******
******Processing shenchunyan-S-43-54-J-43-148******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-10-19-imh/shenchunyan-S-43-54-J-43-148/2/labels/shenchunyan-S-43-54-J-43-148_0130.txt'
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-10-19-imh/shenchunyan-S-43-54-J-43-148/2/labels/shenchunyan-S-43-54-J-43-148_0132.txt'


******Processing shenjianyi-J-31-76******
******Processing shenlinhua-S-35-43-J-35-147******
******Processing shenqingchuan-S-26-39******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：shenqingchuan-S-26-39_0317.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 7, in find_coordinate
    assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
AssertionError: 如果只有一个label那么此时应为降主动脉, 但实际为s：shenqingchuan-S-26-39_0318.txt
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1,

******Processing shenyang-J-34-96******
******Processing shizheng-J-32-101******
******Processing songshuizhen-J-9-43******
******Processing songxin-J-23-98******


Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-10-19-imh/songxin-J-23-98/2/labels/songxin-J-23-98_0085.txt'
Traceback (most recent call last):
  File "<ipython-input-60-755679fed7bb>", line 87, in crop_images
    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
  File "<ipython-input-60-755679fed7bb>", line 3, in find_coordinate
    with open(label_file, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/nfs3-p1/zsxm/dataset/2021-10-19-imh/songxin-J-23-98/2/labels/songxin-J-23-98_0086.txt'


******Processing sunhuhua-J-31-103******
******Processing sunshaozhi-J-20-64******
******Processing sunshimin-J-22-45******
******Processing taolidi-J-41-121******
******Processing tianjianjun-S-40-65-J-40-125******
******Processing wangchunli-S-19-32******
******Processing wangjinlong-S-33-43-J-33-76******
******Processing wangruixing-J-25-63******
******Processing wangwenzhong-J-32-122******
******Processing wanlaoge-J-40-81******
******Processing wuabao-J-20-52******
******Processing wuguomiao-J-22-50******
******Processing wurenjin-J-29-103******
******Processing wurihua-J-30-64******
******Processing wuxisheng-S-27-31-J-27-27******
******Processing wuzonghe-J-15-62******
******Processing xialiming-J-22-66******
******Processing xiashengyun-J-20-79******
******Processing xigenmu-J-22-58******
******Processing xiongliandi-S-19-33-J-19-59******
******Processing xuaijun-J-34-91******
******Processing xujufeng-S-17-27-J-17-35******
******Processing xuliang-J-34-142******
******Processi

In [61]:
print(len(epl1))
print(epl1)

15
['chenaijun-S-Im25-38-J-Im25-85', 'qiuyuanxing-S-Im30-32', 'qumin-S-Im22-35-J-Im22-28', 'xuyuping-S-Im21-33-J-Im21-69', 'yangxiulian-S-Im20-26-J-Im20-44', 'yangyuexian-S-Im38-63', 'yujiazhen-S-Im19-27-J-Im19-77', 'guquankang-J-Im18-69', 'heazi-S-16-30', 'lujikang-S-17-31', 'nimeihua-S-27-43-J-27-64', 'pengxuechuan-S-34-49-J-34-110', 'shenchunyan-S-43-54-J-43-148', 'shenqingchuan-S-26-39', 'songxin-J-23-98']


## 3.复制文件

In [68]:
ct_path = f'/nfs3-p2/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}'
for dataset in ['train', 'val']:
    if dataset == 'train':
        dst_path = f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}'
    else:
        pass
    dst_path = f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/trainA' if dataset == 'train' else f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/testA'
    if os.path.exists(dst_path):
        shutil.rmtree(dst_path)
    os.makedirs(dst_path)
    for cate in range(4):
        ori_path = os.path.join(ct_path, dataset, str(cate))
        for img in os.listdir(ori_path):
            shutil.copy(os.path.join(ori_path, img), os.path.join(dst_path, img))

In [69]:
train_set = set()
val_set = set()
for img in os.listdir(f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/trainA'):
    train_set.add(img.split('_')[0])
for img in os.listdir(f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/testA'):
    val_set.add(img.split('_')[0])

In [70]:
print(len(train_set), len(val_set))

876 202


In [71]:
def move_cta(input_path, train_set, val_set):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                break
        if flag: continue
        print(f'******Processing {patient}******')
        if patient in train_set:
            dst_path = f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/trainB'
        elif patient in val_set:
            dst_path = f'/nfs3-p2/zsxm/dataset/gan_aorta_{lower_b}_{upper_b}/testB'
        else:
            raise Exception(f'{patient} neither in train_set nor in val_set')
        os.makedirs(dst_path, exist_ok=True)
        ori_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        for img in os.listdir(ori_path):
            shutil.copy(os.path.join(ori_path, img), os.path.join(dst_path, img))
            
move_cta('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', train_set, val_set)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-8/', train_set, val_set)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-13/', train_set, val_set)
move_cta('/nfs3-p1/zsxm/dataset/2021-10-19-pau/', train_set, val_set)
move_cta('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', train_set, val_set)

******Processing baixiaoxu-22-31-52-147******
******Processing baojinjia-17-21-35-83******
******Processing baoyanzu-21-25-42-93******
******Processing bianyinqiao-31-38-59-137******
******Processing bianzhaorong-17-22-38-92******
******Processing caijingnan1-41-50-73-184******
******Processing caiyouzhe-14-18-32-83******
******Processing caoyuxia-16-21-35-87******
******Processing chenfugui-36-46-76-149******
******Processing chengzhiqiang-33-41-67-159******
******Processing chenjian-26-30-44-89******
******Processing chenjianhua-14-17-32-80******
******Processing chenjianjun-25-34-65-175******
******Processing chenjianping-15-22-41-91******
******Processing chenjingjing-38-47-67-184******
******Processing chenjinmei-12-17-34-80******
******Processing chenpanyang-21-31-71-161******
******Processing chenqiuying-13-17-30-78******
******Processing chensaimu-34-44-78-166******
******Processing chenxiufen-17-21-34-82******
******Processing chenyanli-20-23-34-80******
******Processing cheny

******Processing yesanghua-J-Im41-138******
******Processing yexinxiang-S-Im32-38-J-Im32-102******
******Processing yexiyou-J-Im35-139******
******Processing yingjianquan-S-Im12-14-J-Im12-36******
******Processing yujiazhen-S-Im19-27-J-Im19-77******
******Processing zhanfangai-S-Im17-23-J-Im17-32******
******Processing zhangfuyang-S-Im15-20-J-Im15-87******
******Processing zhanghang-S-Im16-20-J-Im16-41******
******Processing zhangweiyan-J-Im40-57******
******Processing zhangyunqin-S-Im14-25-J-Im14-64******
******Processing zhaozanping-J-Im23-90******
******Processing zhaozhengxing-J-Im35-99******
******Processing zhongweiliang-J-Im66-170******
******Processing zhongxuefang-S-Im19-34-J-Im19-72******
******Processing zhoubozhong-J-Im29-131******
******Processing zhoufeng-S-Im35-40-J-Im35-147******
******Processing zhouliang-J-Im47-123******
******Processing zhoumingfang-J-Im36-41******
******Processing zhuchunguang-J-Im21-86******
******Processing zhuhancheng-S-Im27-36-J-Im27-86******
**

******Processing xulinfeng-S-15-29-J-15-39******
******Processing xuqindi-S-29-39-J-29-71******
******Processing xuyuezhen-J-38-108******
******Processing xuzhengcheng-J-29-58******
******Processing yangyanwen-J-22-40******
******Processing yanyueying-J-30-98******
******Processing yanzhengmin-J-36-103******
******Processing yeguixiang-S-37-49-J-37-100******
******Processing yulitang-S-24-40******
******Processing zhangdeting-J-20-56******
******Processing zhangdongju-J-26-33******
******Processing zhangjufen-S-20-41-J-20-54******
******Processing zhanglingyun-J-17-57******
******Processing zhanglvhao-J-24-41******
******Processing zhangrijun-J-23-66******
******Processing zhangrufen-S-25-38-J-25-77******
******Processing zhangweihu-J-26-38******
******Processing zhangxiaoshun-S-24-35-J-24-90******
******Processing zhangyougou-J-11-73******
******Processing zhaoxiao-S-20-30-J-20-48******
******Processing zhengxinfu-J-36-74******
******Processing zhengyingchao-J-55-77******
******Proces