In [None]:
from math import sqrt
import copy
import  traceback
import shutil
import random

import numpy as np  # linear algebra
import pydicom
from pydicom.errors import InvalidDicomError
import os
import matplotlib.pyplot as plt
import cv2
from pydicom.uid import UID
from PIL import Image
from tqdm import tqdm
import openpyxl

In [None]:
def load_scan(path):
    slices = [] #slices = [pydicom.dcmread(path + '/' + s) for s in filter(lambda x: x.endswith('.dcm'), os.listdir(path))]
    for s in os.listdir(path):
        if os.path.isdir(os.path.join(path, s)): #if not s.endswith('.dcm'):
            continue
        sl = pydicom.dcmread(os.path.join(path, s), force=True)
        try:
            sl_p = sl.pixel_array
        except (AttributeError, InvalidDicomError):
            traceback.print_exc()
            print(f'\tDelete {os.path.join(path, s)}')
            os.remove(os.path.join(path, s))
        else:
            slices.append(sl)
    slices.sort(key=lambda x: float(x.InstanceNumber))
    return slices

In [None]:
window_width, window_level = 600, 200
lower_b, upper_b = window_level - window_width//2, window_level + window_width//2
print(lower_b, upper_b)

# 1.阴性数据

In [None]:
# 打印哪个病例没有2
def print_no_cta(input_dir):
    print(f'**********{input_dir}')
    no_cta_list = []
    for patient in sorted(os.listdir(input_dir)):
        patient_path = os.path.join(input_dir, patient)
        if os.path.isfile(patient_path): continue
        if '2' not in os.listdir(patient_path):
            no_cta_list.append(patient_path)
            print(patient_path, os.listdir(patient_path))
            continue
        if f'images_{lower_b}_{upper_b}' not in os.listdir(os.path.join(patient_path, '2')):
            print(f'have 2 but not have images_{lower_b}_{upper_b}', patient_path)
    return no_cta_list
            
no_cta_list = []
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-9-17-negative'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-9-29-negative'))
print(no_cta_list)

In [None]:
#将某个scan重命名为2，如果thickness距离1的thickness相同则选择thickness小的重命名
for patient in no_cta_list:
    scans = os.listdir(patient)
    if '1' not in scans:
        print(patient, 'not have 1')
        continue
    if len(scans) == 2:
        for scan in scans:
            if scan != '1':
                os.rename(os.path.join(patient, scan), os.path.join(patient, '2'))
    else:
        tk_list = []
        for scan in scans:
            for s in os.listdir(os.path.join(patient, scan)):
                if os.path.isdir(os.path.join(patient, scan, s)) or not s.endswith('.dcm'):
                    continue
                sl = pydicom.dcmread(os.path.join(patient, scan, s))
                try:
                    sl_p = sl.pixel_array
                except AttributeError:
                    continue
                else:
                    if scan == '1':
                        ct_thickness = sl.SliceThickness
                    else:
                        tk_list.append((sl.SliceThickness, scan))
        min_dis, min_scan, min_tk = 10000, None, 10000
        for tk, scan in tk_list:
            dis = abs(tk-ct_thickness)
            if dis < min_dis or (dis == min_dis and tk < min_tk):
                min_dis, min_scan, min_tk = dis, scan, tk
        print(patient, min_scan)
        os.rename(os.path.join(patient, min_scan), os.path.join(patient, '2'))

In [None]:
def check_label(input_path):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    for i, row in enumerate(sheet.iter_rows()):
        if i == 0: continue
        if row[3].value is not None:
            lsct = row[3].value.split('-')
            assert len(lsct) == 4, f'{input_path}:{patient} CT label wrong:{lsct}'
            assert int(lsct[0]) < int(lsct[1]) < int(lsct[2]) < int(lsct[3]), f'{input_path}:{patient} CT label error:{lsct}'
        if row[4].value is not None:
            ls = row[4].value.split('-')
            assert len(ls) == 4, f'{input_path}:{patient} CTA label wrong:{ls}'
            assert int(ls[0]) < int(ls[1]) < int(ls[2]) < int(ls[3]), f'{input_path}:{patient} CTA label error:{ls}'
            
check_label('/nfs3-p1/zsxm/dataset/2021-9-17-negative/')
check_label('/nfs3-p1/zsxm/dataset/2021-9-29-negative/')

In [None]:
# 将2下的dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)) or f'images_{lower_b}_{upper_b}' in os.listdir(os.path.join(input_folder, patient, '2')):
            continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '2':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))
            print_flag = False
            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                if img.height != img.width:
                    if not print_flag:
                        print(patient, f'height({img.height}) not equal to width({img.width})\n')
                        print_flag = True
                    height = width = min(img.height, img.width)
                    if img.height != height:
                        start = (img.height - height) / 2
                        img = img.crop((0, start, img.width, start + height))
                    elif img.width != width:
                        start = (img.width - width) / 2
                        img = img.crop((start, 0, start + height, img.height))
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

generate_image('/nfs3-p1/zsxm/dataset/2021-9-17-negative/')
generate_image('/nfs3-p1/zsxm/dataset/2021-9-29-negative/')

In [None]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测, not_move=True表示若有labels则不移动去检测
def move_together_for_detect(input_folder, dst_path, not_move=True):   
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        if not_move and os.path.exists(os.path.join(input_folder, patient, '2', 'labels')) \
        and os.path.exists(os.path.join(input_folder, patient, '2', f'pred_images_{lower_b}_{upper_b}')):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))
        try:
            shutil.copytree(os.path.join(input_folder, patient, '2', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-29-negative/', '/nfs3-p1/zsxm/dataset/9_detect/')

In [None]:
#将检测结果移动回原文件夹内
def move_back(result_path, ori_path):
    if not os.path.exists(result_path):
        print(f'目录不存在：{result_path}')
        return
    for patient in sorted(os.listdir(result_path)):
        print(f'Processing {patient}')
        p_res_path = os.path.join(result_path, patient)
        o_res_path = os.path.join(ori_path, patient, '2', f'pred_images_{lower_b}_{upper_b}')
        if os.path.exists(o_res_path):
            shutil.rmtree(o_res_path)
        os.mkdir(o_res_path)
        for file in os.listdir(p_res_path):
            if os.path.isfile(os.path.join(p_res_path, file)):
                shutil.move(os.path.join(p_res_path, file), os.path.join(o_res_path, file))
            elif os.path.isdir(os.path.join(p_res_path, file)):
                if os.path.exists(os.path.join(ori_path, patient, '2', file)):
                    shutil.rmtree(os.path.join(ori_path, patient, '2', file))
                shutil.move(os.path.join(p_res_path, file), os.path.join(ori_path, patient, '2', file))
        os.rmdir(p_res_path)
    os.rmdir(result_path)
                
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-17-negative', '/nfs3-p1/zsxm/dataset/2021-9-17-negative/')

In [None]:
# 切出主动脉,这里有问题啊，branch_end前后0.3的切片切不出来，建议以后更改为和下面一样的方案
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.15 < corr1[2] < 0.85, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.15 < corr2[2] < 0.85, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    ls = row[4].value.split('-')
                    assert len(ls) == 4, f'{patient} ls wrong'
                    aorta_start, branch_start = int(ls[0])-1, int(ls[1])-1
                    branch_end, aorta_end = int(ls[2])-1, int(ls[3])-1
                    lsct = row[3].value.split('-')
                    assert len(lsct) == 4, f'{patient} lsct wrong'
                    ct_start, ct_end = int(lsct[0])-1, int(lsct[3])-1
                break
        if flag: continue
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        crop_flag = True
        offset = branch_end - branch_start
        start, end = branch_start + int(0.1*offset), branch_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}.png'))
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png'))
        offset = aorta_end - branch_end
        start, end = branch_end + int(0.1*offset), aorta_end - int(0.2*offset)
        for i in range(start, end):
            img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
            img = np.array(img)
            try:
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png'))
        if not crop_flag:
            error_patient_list.append(patient)
            
epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', epl1)

In [None]:
print(len(epl1))
print(epl1)

In [None]:
# 切出范围外冗余为3的主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.2 < y < 0.8, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.15 < corr1[2] < 0.85, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.15 < corr2[2] < 0.85, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    ls = row[4].value.split('-')
                    assert len(ls) == 4, f'{patient} ls wrong'
                    aorta_start, branch_start = int(ls[0])-1, int(ls[1])-1
                    branch_end, aorta_end = int(ls[2])-1, int(ls[3])-1
                break
        if flag: continue
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops3_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        crop_flag = True
        offset = branch_end - branch_start
        start, end = branch_start + int(0.1*offset), branch_end - int(0.2*offset)
        for i in range(start-3, end+3):
            try:
                img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
                img = np.array(img)
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 's')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                if start <= i < end:
                    crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}.png'))
                else:
                    crop.save(os.path.join(crop_path, f'{patient}_s_{i:04d}_n.png'))
        offset = aorta_end - branch_start
        start, end = branch_start + int(0.05*offset), aorta_end - int(0.1*offset)
        for i in range(start-3, end+3):
            try:
                img = Image.open(os.path.join(image_path, f'{patient}_{i:04d}.png'))
                img = np.array(img)
                x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{i:04d}.txt'), 'j')
            except:
                traceback.print_exc()
                crop_flag = False
            else:#if crop_flag:
                crop = img[y1:y2, x1:x2]
                crop = Image.fromarray(crop)
                if start <= i < end:
                    crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}.png'))
                else:
                    crop.save(os.path.join(crop_path, f'{patient}_j_{i:04d}_n.png'))
        if not crop_flag:
            error_patient_list.append(patient)

# 2.疾病数据

In [None]:
# 打印哪个病例没有2
def print_no_cta(input_dir):
    print(f'**********{input_dir}')
    no_cta_list = []
    for patient in sorted(os.listdir(input_dir)):
        patient_path = os.path.join(input_dir, patient)
        if os.path.isfile(patient_path): continue
        if '2' not in os.listdir(patient_path):
            no_cta_list.append(patient_path)
            print(patient_path, os.listdir(patient_path))
            continue
        if f'images_{lower_b}_{upper_b}' not in os.listdir(os.path.join(patient_path, '2')):
            print(f'have 2 but not have images_{lower_b}_{upper_b}', patient_path)
    return no_cta_list

no_cta_list = []
no_cta_list.extend(print_no_cta('/nfs3-p1/zsxm/dataset/2021-9-8'))
no_cta_list.extend(print_no_cta('/nfs3-p1/zsxm/dataset/2021-9-13/'))
no_cta_list.extend(print_no_cta('/nfs3-p1/zsxm/dataset/2021-9-19/'))
no_cta_list.extend(print_no_cta('/nfs3-p1/zsxm/dataset/2021-9-28/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-10-19-imh/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-10-19-pau/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-10-19-aa/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-11-20/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-11-20-imh/'))
no_cta_list.extend(print_no_cta('/nfs3-p2/zsxm/dataset/2021-11-20-pau/'))
print(no_cta_list)

In [None]:
#将某个scan重命名为2，如果thickness距离1的thickness相同则选择thickness小的重命名
for patient in no_cta_list:
    scans = os.listdir(patient)
    if '1' not in scans:
        print(patient, 'not have 1')
        continue
    if len(scans) == 2:
        for scan in scans:
            if scan != '1':
                os.rename(os.path.join(patient, scan), os.path.join(patient, '2'))
    else:
        tk_list = []
        for scan in scans:
            for s in os.listdir(os.path.join(patient, scan)):
                if os.path.isdir(os.path.join(patient, scan, s)) or not s.endswith('.dcm'):
                    continue
                sl = pydicom.dcmread(os.path.join(patient, scan, s))
                try:
                    sl_p = sl.pixel_array
                except AttributeError:
                    continue
                else:
                    if scan == '1':
                        ct_thickness = sl.SliceThickness
                    else:
                        tk_list.append((sl.SliceThickness, scan))
        min_dis, min_scan, min_tk = 10000, None, 10000
        for tk, scan in tk_list:
            dis = abs(tk-ct_thickness)
            if dis < min_dis or (dis == min_dis and tk < min_tk):
                min_dis, min_scan, min_tk = dis, scan, tk
        print(patient, min_scan)
        os.rename(os.path.join(patient, min_scan), os.path.join(patient, '2'))

In [None]:
def check_label(input_path):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    for i, row in enumerate(sheet.iter_rows()):
        if i == 0: continue
        if row[3].value is not None and row[4].value is not None:
            plct = row[3].value.lower().split('-')
            pl = row[4].value.lower().split('-')
            assert len(pl) == len(plct), f'CT和CTA标签不等长{input_path}:{patient}, CT:{plct}, CTA:{pl}'
            
check_label('/nfs3-p1/zsxm/dataset/2021-9-8/')
check_label('/nfs3-p1/zsxm/dataset/2021-9-13/')
check_label('/nfs3-p1/zsxm/dataset/2021-9-19/')
check_label('/nfs3-p1/zsxm/dataset/2021-9-28/')
check_label('/nfs3-p2/zsxm/dataset/2021-10-19-imh/')
check_label('/nfs3-p2/zsxm/dataset/2021-11-20/')
check_label('/nfs3-p2/zsxm/dataset/2021-11-20-imh/')

In [None]:
# 将2下的dcm文件根据窗宽窗位转化为png图片
def generate_image(input_folder):
    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        print(f'****Processing {patient}****')
        for scan in os.listdir(os.path.join(input_folder, patient)):
            if scan != '2':
                continue
            name = patient #name = patient.split('-')[0]
            image_path = os.path.join(input_folder, patient, scan, f'images_{lower_b}_{upper_b}')
            if os.path.exists(image_path):
                shutil.rmtree(image_path)
            os.mkdir(image_path)

            ct = load_scan(os.path.join(input_folder, patient, scan))
            print_flag = False
            for i in range(len(ct)):
                img = ct[i].pixel_array.astype(np.int16)
                intercept = ct[i].RescaleIntercept
                slope = ct[i].RescaleSlope
                if slope != 1:
                    img = (slope * img.astype(np.float64)).astype(np.int16)
                img += np.int16(intercept)
                img = np.clip(img, lower_b, upper_b)
                img = ((img-lower_b)/(upper_b-lower_b)*255).astype(np.uint8)
                img = Image.fromarray(img)
                if img.height != img.width:
                    if not print_flag:
                        print(patient, f'height({img.height}) not equal to width({img.width})\n')
                        print_flag = True
                    height = width = min(img.height, img.width)
                    if img.height != height:
                        start = (img.height - height) / 2
                        img = img.crop((0, start, img.width, start + height))
                    elif img.width != width:
                        start = (img.width - width) / 2
                        img = img.crop((start, 0, start + height, img.height))
                img.save(os.path.join(image_path, f'{name}_{i:04d}.png'))

generate_image('/nfs3-p1/zsxm/dataset/2021-9-8/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p1/zsxm/dataset/2021-9-13/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p1/zsxm/dataset/2021-9-19/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p1/zsxm/dataset/2021-9-28/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-aa/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-imh/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-10-19-pau/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20-imh/')
print('----------------------------------------------------------------------------')
generate_image('/nfs3-p2/zsxm/dataset/2021-11-20-pau/')

In [None]:
# 将各个病例中的png图片文件夹统一移动到一起供yolov5检测, not_move=True表示若有labels则不移动去检测
def move_together_for_detect(input_folder, dst_path, not_move=True):   
    if not os.path.exists(dst_path):
        os.mkdir(dst_path)
    root_name = input_folder.split('/')[-1] if input_folder.split('/')[-1] != '' else input_folder.split('/')[-2]
    dst_path = os.path.join(dst_path, root_name)

    for patient in sorted(os.listdir(input_folder)):
        if os.path.isfile(os.path.join(input_folder, patient)):
            continue
        if not_move and os.path.exists(os.path.join(input_folder, patient, '2', 'labels')) \
        and os.path.exists(os.path.join(input_folder, patient, '2', f'pred_images_{lower_b}_{upper_b}')):
            continue
        print(f'****Processing {patient}****')
        name = patient #name = patient.split('-')[0]
        if os.path.exists(os.path.join(dst_path, name)):
            print(f"\tremove {os.path.join(dst_path, name)}")
            shutil.rmtree(os.path.join(dst_path, name))

        try:
            shutil.copytree(os.path.join(input_folder, patient, '2', f'images_{lower_b}_{upper_b}'), os.path.join(dst_path, name))
        except:
            traceback.print_exc()

move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-8/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-13/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-19/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p1/zsxm/dataset/2021-9-28/', '/nfs3-p1/zsxm/dataset/9_detect/')
#move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-10-19-aa/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-10-19-imh/', '/nfs3-p1/zsxm/dataset/9_detect/')
#move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-10-19-pau/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20/', '/nfs3-p1/zsxm/dataset/9_detect/')
move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20-imh/', '/nfs3-p1/zsxm/dataset/9_detect/')
#move_together_for_detect('/nfs3-p2/zsxm/dataset/2021-11-20-pau/', '/nfs3-p1/zsxm/dataset/9_detect/')

In [None]:
#将检测结果移动回原文件夹内
def move_back(result_path, ori_path):
    if not os.path.exists(result_path):
        print(f'目录不存在：{result_path}')
        return
    for patient in sorted(os.listdir(result_path)):
        print(f'Processing {patient}')
        p_res_path = os.path.join(result_path, patient)
        o_res_path = os.path.join(ori_path, patient, '2', f'pred_images_{lower_b}_{upper_b}')
        if os.path.exists(o_res_path):
            shutil.rmtree(o_res_path)
        os.mkdir(o_res_path)
        for file in os.listdir(p_res_path):
            if os.path.isfile(os.path.join(p_res_path, file)):
                shutil.move(os.path.join(p_res_path, file), os.path.join(o_res_path, file))
            elif os.path.isdir(os.path.join(p_res_path, file)):
                if os.path.exists(os.path.join(ori_path, patient, '2', file)):
                    shutil.rmtree(os.path.join(ori_path, patient, '2', file))
                shutil.move(os.path.join(p_res_path, file), os.path.join(ori_path, patient, '2', file))
        os.rmdir(p_res_path)
    os.rmdir(result_path)


#move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-8', '/nfs3-p2/zsxm/dataset/2021-9-8/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-13', '/nfs3-p1/zsxm/dataset/2021-9-13/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-19', '/nfs3-p2/zsxm/dataset/2021-9-19/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-9-28', '/nfs3-p2/zsxm/dataset/2021-9-28/')
#move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-10-19-imh', '/nfs3-p2/zsxm/dataset/2021-10-19-imh/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-11-20', '/nfs3-p1/zsxm/dataset/2021-11-20/')
move_back('/home/zsxm/pythonWorkspace/yolov5_old/runs/detect/2021-11-20-imh', '/nfs3-p2/zsxm/dataset/2021-11-20-imh/')

In [None]:
# 切出主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.15 < y < 0.85, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.15 < corr1[2] < 0.85, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.15 < corr2[2] < 0.85, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    pl = row[4].value.lower().split('-')
                    plct = row[3].value.lower().split('-')
                    assert len(pl) == len(plct), f'CT和CTA标签不等长{input_path}:{patient}, {len(pl)}, {len(plct)}'
                break
        if flag: continue
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        crop_flag = True
        for i, s in enumerate(pl):
            if s != 's' and s != 'j':
                continue
            start, end = int(pl[i+1])-1, int(pl[i+2])
            for j in range(start, end):
                img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
                img = np.array(img)
                try:
                    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
                except:
                    traceback.print_exc()
                    crop_flag = False
                else:#if crop_flag:
                    crop = img[y1:y2, x1:x2]
                    crop = Image.fromarray(crop)
                    crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}.png'))
        if not crop_flag:
            error_patient_list.append(patient)

epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/2021-9-8/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-13/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-19/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-28/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-11-20/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-11-20-imh/', epl1)

In [None]:
print(len(epl1))
print(epl1)

In [None]:
# 切出范围外冗余为3的主动脉
def find_coordinate(height, width, label_file, aorta):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    assert len(lines) <= 2, f'label.txt应该存储不多于2个label：{label_file.split("/")[-1]}'
    if len(lines) == 1:
        assert aorta == 'j', f'如果只有一个label那么此时应为降主动脉, 但实际为{aorta}：{label_file.split("/")[-1]}'
        corr = list(map(lambda x: float(x), lines[0].split()))
        x, y, w, h = corr[1], corr[2], corr[3], corr[4]
        assert 0.25 < x < 0.75 and 0.15 < y < 0.85, f'边界框中心({x}, {y})出界：{label_file.split("/")[-1]}'
    else:
        corr1, corr2 = list(map(lambda x: float(x), lines[0].split())), list(map(lambda x: float(x), lines[1].split()))
        assert 0.25 < corr1[1] < 0.75 and 0.15 < corr1[2] < 0.85, f'边界框1中心({corr1[1]}, {corr1[2]})出界：{label_file.split("/")[-1]}'
        assert 0.25 < corr2[1] < 0.75 and 0.15 < corr2[2] < 0.85, f'边界框2中心({corr2[1]}, {corr2[2]})出界：{label_file.split("/")[-1]}'
        if aorta == 's':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] < corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        elif aorta == 'j':
            x, y, w, h = (corr1[1], corr1[2], corr1[3], corr1[4]) if corr1[2] > corr2[2] else (corr2[1], corr2[2], corr2[3], corr2[4])
        else:
            raise Exception(f'aorta 应该为"s"或"j"其中之一: {label_file.split("/")[-1]}')
    w, h = int(width*w), int(height*h)
    w, h = max(w, h), max(w, h)
    return int(width*x-w/2), int(height*y-h/2), int(width*x+w/2+1), int(height*y+h/2+1)

def crop_images(input_path, error_patient_list):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                    pl = row[4].value.lower().split('-')
                    plct = row[3].value.lower().split('-')
                    assert len(pl) == len(plct), f'CT和CTA标签不等长{input_path}:{patient}, {len(pl)}, {len(plct)}'
                break
        if flag: continue
        
        print(f'******Processing {patient}******')
        image_path = os.path.join(input_path, patient, '2', f'images_{lower_b}_{upper_b}')
        label_path = os.path.join(input_path, patient, '2', 'labels')
        crop_path = os.path.join(input_path, patient, '2', f'crops3_{lower_b}_{upper_b}')
        if os.path.exists(crop_path):
            shutil.rmtree(crop_path)
        os.mkdir(crop_path)
        
        crop_flag = True
        for i, s in enumerate(pl):
            if s != 's' and s != 'j':
                continue
            start, end = int(pl[i+1])-1, int(pl[i+2])
            for j in range(start-3, end+3):
                try:
                    img = Image.open(os.path.join(image_path, f'{patient}_{j:04d}.png'))
                    img = np.array(img)
                    x1, y1, x2, y2 = find_coordinate(*img.shape[0:2], os.path.join(label_path, f'{patient}_{j:04d}.txt'), s)
                except:
                    traceback.print_exc()
                    crop_flag = False
                else:#if crop_flag:
                    crop = img[y1:y2, x1:x2]
                    crop = Image.fromarray(crop)
                    if start <= j < end:
                        crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}.png'))
                    else:
                        crop.save(os.path.join(crop_path, f'{patient}_{s}_{j:04d}_n.png'))
        if not crop_flag:
            error_patient_list.append(patient)
            
epl1 = []
crop_images('/nfs3-p1/zsxm/dataset/2021-9-8/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-13/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-19/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-9-28/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-11-20/', epl1)
crop_images('/nfs3-p1/zsxm/dataset/2021-11-20-imh/', epl1)

In [None]:
print(len(epl1))
print(epl1)

# 3.复制文件

In [None]:
classify_path = f'/nfs3-p2/zsxm/dataset/aorta_classify_cta_{lower_b}_{upper_b}'
os.makedirs(classify_path, exist_ok=True)
for dataset in ['train', 'val']:
    dst_path = os.path.join(classify_path, dataset)
    os.makedirs(dst_path, exist_ok=True)
    for cate in range(3):
        cls_path = os.path.join(dst_path, str(cate))
        os.makedirs(cls_path, exist_ok=True)

In [None]:
train_set = set()
val_set = set()
ct_path = f'/nfs3-p2/zsxm/dataset/aorta_classify_ct_{lower_b}_{upper_b}/'
for cate in os.listdir(os.path.join(ct_path, 'train')):
    for img in os.listdir(os.path.join(ct_path, 'train', cate)):
        train_set.add(img.split('_')[0])
for img in os.listdir(os.path.join(ct_path, 'val')):
    for img in os.listdir(os.path.join(ct_path, 'val', cate)):
        val_set.add(img.split('_')[0])

In [None]:
print(len(train_set), len(val_set))

In [None]:
def move_cta(input_path, cate, train_set, val_set):
    workbook_path = os.path.join(input_path, 'label.xlsx')
    wb = openpyxl.load_workbook(workbook_path)
    sheet = wb['Sheet1']
    
    for patient in sorted(os.listdir(input_path)):
        if os.path.isfile(os.path.join(input_path, patient)):
            continue
        flag = True
        for row in sheet.iter_rows():
            if row[0].value == patient.split('-')[0]:
                if row[3].value is not None and row[4].value is not None:
                    flag = False
                break
        if flag: continue
        print(f'******Processing {patient}******')
        if patient in train_set:
            dst_path = os.path.join(classify_path, 'train', str(cate))
        elif patient in val_set:
            dst_path = os.path.join(classify_path, 'val', str(cate))
        else:
            raise Exception(f'{patient} neither in train_set nor in val_set')
        ori_path = os.path.join(input_path, patient, '2', f'crops_{lower_b}_{upper_b}')
        for img in os.listdir(ori_path):
            shutil.copy(os.path.join(ori_path, img), os.path.join(dst_path, img))
            

move_cta('/nfs3-p1/zsxm/dataset/2021-9-17-negative/', train_set, val_set, 0)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-29-negative/', train_set, val_set, 0)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-8/', train_set, val_set, 1)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-13/', train_set, val_set, 1)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-19/', train_set, val_set, 1)
move_cta('/nfs3-p1/zsxm/dataset/2021-9-28/', train_set, val_set, 1)
move_cta('/nfs3-p1/zsxm/dataset/2021-10-19-imh/', train_set, val_set, 2)
move_cta('/nfs3-p1/zsxm/dataset/2021-11-20/', train_set, val_set, 1)
move_cta('/nfs3-p1/zsxm/dataset/2021-11-20-imh/', train_set, val_set, 2)