# 0. 导入必要的库

In [418]:
import pandas as pd
import numpy as np
import cv2
import json
import yaml
import pydicom
import torch
import os
from torch.utils.data import Dataset
from matplotlib import pyplot as plt
from PIL import Image,ImageDraw,ImageFont
from PIL import ImageFile

# 1. 处理dicom原始影像文件

In [316]:
#下文是在单个文件夹中搜寻文件的辅助函数
def get_data_files(input_dir,keyword):
    file_list = os.listdir(input_dir)
    file_list = [file for file in file_list if keyword in file]
    file_list = [input_dir + file for file in file_list]
    if len(file_list) < 1:
        print('No input data files found in input data folder, and contains %s!'%keyword)
    return file_list

In [317]:
#在指定文件夹中搜寻原始dicom格式影像文件
dicom_file_folder = './muba/raw_data/dicom/'
imgage_file_folder = './muba/images/'
dicom_files = get_data_files(dicom_file_folder,'dcm')
#逐个处理dicom文件
for dicom_file in dicom_files:
    #读取dicom文件
    ds = pydicom.read_file(dicom_file)
    #提取像素信息，数组
    img_arr = ds.pixel_array
    #对数组信息进行min_max归一化处理
    max_value = img_arr.max()
    min_value = img_arr.min()
    img_std = (img_arr - min_value) / (max_value - min_value)
    #根据平均亮度进行判断处理，确保背景显示为黑色
    if img_std.mean() > 0.5:
        img_std = 1-img_std
    #确定输出目录，根据 jpg_2812 对应宽度2812的方式，将不同大小的图像分不同目录保存
    output_folder = os.path.join(imgage_file_folder,'jpg_'+str(img_std.shape[1]))
    if not os.path.exists(output_folder):
        print(f'Creating output folder: {output_folder}')
        os.mkdir(output_folder)
    output_file_path = os.path.join(output_folder,dicom_file.split('/')[-1].replace('dcm','jpg'))
    # 将数组信息从浮点数转为0~255的整数，并保存为jpg格式，保真度设为99%，避免过多损失图像细节
    print(f'Saving image file to: {output_file_path}, shape = {img_arr.shape}')
    img_std = img_std*255
    img_std = img_std.astype('int16')
    image = Image.fromarray(img_std)
    image = image.convert('RGB')
    image.save(output_file_path,quality=99)

Saving image file to: ./muba/images/jpg_1890/2154382_18039_2_4.jpg, shape = (2457, 1890)
Saving image file to: ./muba/images/jpg_2012/1897781_8702_1.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_1890/2154382_18027_2_3.jpg, shape = (2457, 1890)
Saving image file to: ./muba/images/jpg_2012/2076218_17510_3.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2800/2178225_30000020061600124049300000708_2.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_1890/2154382_18015_2_4.jpg, shape = (2457, 1890)
Saving image file to: ./muba/images/jpg_2012/2013601_35860_3.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2800/2203663_30000020071400124775100000393_1.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2800/2205093_30000020071500094690000000579_1.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_3328/2187535_2263_1.jpg, shape = (4096, 3328)
Saving image file to: ./muba/images/jpg_2800/2205093_3

Saving image file to: ./muba/images/jpg_2800/2185315_30000020062400244219400000117_4.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2560/2173935_6550_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2203458_5611_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_266/2154382_18013_1.jpg, shape = (425, 266)
Saving image file to: ./muba/images/jpg_2560/2181451_4980_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2012/1413566_5360_3.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2800/2194645_30000020070600010300900001109_4.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_3328/2180596_3529_1.jpg, shape = (4096, 3328)
Saving image file to: ./muba/images/jpg_2560/2181629_5298_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2800/2117153_30000020041600171040200000013_3.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2560/2202224_391_1.jpg, sha

Saving image file to: ./muba/images/jpg_2560/2189185_1531_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2205047_8555_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2199403_7780_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2199403_7786_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2203458_5623_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2179535_1108_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2012/2081357_3382_2.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_1890/2154382_18003_2_1.jpg, shape = (2457, 1890)
Saving image file to: ./muba/images/jpg_2560/2203608_6167_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_1890/2149244_5941_2_1.jpg, shape = (2457, 1890)
Saving image file to: ./muba/images/jpg_2560/2191371_7623_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2800/2206

Saving image file to: ./muba/images/jpg_2560/2193887_11021_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2012/1911271_4077_2.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2560/2187260_1975_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2800/2206203_30000020071523563519900000592_1.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2012/2079836_28390_2.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2560/2178438_14031_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2193887_11015_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2181629_5292_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2800/2027825_30000019120400241084400000217_2.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2800/2205926_30000020071523563519900000440_4.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_2800/2199991_30000020

Saving image file to: ./muba/images/jpg_2800/1998252_30000019110500294897100000254_5.jpg, shape = (3518, 2800)
Saving image file to: ./muba/images/jpg_266/2149244_5939_1.jpg, shape = (425, 266)
Saving image file to: ./muba/images/jpg_2560/2191812_7959_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2012/2013601_35860_4.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2560/2179833_1477_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2202224_385_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2012/1894994_7292_3.jpg, shape = (2812, 2012)
Saving image file to: ./muba/images/jpg_2560/2201334_10104_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_2560/2191371_7629_1.jpg, shape = (3328, 2560)
Saving image file to: ./muba/images/jpg_3328/2187654_2535_1.jpg, shape = (4096, 3328)
Saving image file to: ./muba/images/jpg_2012/1894994_7292_1.jpg, shape = (2812, 2012)
Saving image file to: ./muba/im

# 2. 解析导出的Json格式标注文件

In [329]:
def get_data_files(input_dir,keyword,file_type):
    file_list = os.listdir(input_dir)
    file_list = [file for file in file_list if '.'+file_type in file]
    file_list = [file for file in file_list if keyword in file]
    file_list = [input_dir + file for file in file_list]
    if len(file_list) < 1:
        print('No input data files found, files should be ended with .csv, and contains %s!'%keyword)
    return file_list

In [330]:
def extract_json_patient(json_patient,dicom_file_folder):
    #此函数用于从json格式字符串中解析单个病患相关图片的标注信息
    df_image_list = []
    #每个病患patient对应了多个影像image
    for image_key in json_patient.keys():
        #每个影像image可能对应有多个病灶label
        image_info_list = []
        image_text = json_patient[image_key]
        #提取病患patient_id
        patient_id = image_key.split('@')[0]
        uid = image_key.split('@')[1]
        #提取影像image的id
        image_id = str(uid.split('.')[-1]) + '_' + image_key.split('@')[-1]
        #所有的病灶marker标注信息存储在poligonMarkerArray_list中
        poligonMarkerArray_list = image_text['poligonMarkArray']
        #每个marker对应一个病灶, marker包含了病灶分类信息（良性钙化点，恶性钙化点...）以及序号marker_index
        #病灶分类与病灶的标注框并没有放在一起，需要通过序号marker_index进行关联
        marker_info = []
        for marker in poligonMarkerArray_list:
            marker_info.append([marker['index'],marker['label'],marker['name']])
        for marker in image_text['freeMarkArray']:
            marker_info.append([marker['index'],marker['label'],marker['name']])
        df_marker = pd.DataFrame(marker_info,columns=['mark_index','label','name'])
        #下面是提取病灶的标注框信息，其与病灶的标签label通过marker_index进行关联
        border_info = []
        for border in image_text['borderList']:
            border_info.append([border['index'],border['x1'],border['x2'],border['y1'],border['y2']])
        df_border = pd.DataFrame(border_info,columns=['mark_index','x1','x2','y1','y2'])
        df = df_marker.merge(df_border,how='left',on='mark_index')
        df['patient_id'] = patient_id
        df['image_id'] = image_id
        df['comment'] = 'ok'
        #读取dicom图像文件，获取原始影像文件的长宽信息
        dicom_file_path = os.path.join(dicom_file_folder,str(patient_id)+'_'+image_id+'.dcm')
        if os.path.exists(dicom_file_path):
            ds = pydicom.read_file(dicom_file_path)
            img = ds.pixel_array
            df['width'] = img.shape[1]
            df['height'] = img.shape[0]
            df_image_list.append(df)
        else:
            df['width'] = -1
            df['height'] = -1
            df['comment'] = '未找到对应的影像文件'
            print(f'{dicom_file_path} not found!')
            continue
    #将单个病患patient对应的多个影像的标注信息合成为一个完整的pandas DataFrame
    if len(df_image_list)>=1:
        df_image = pd.concat(df_image_list,axis=0)
        df_image = df_image[['patient_id','image_id','mark_index','width','height',
                             'x1','x2','y1','y2','label','name','comment']]
    else:
        df_image = None
    return df_image

In [331]:
#打开导出的json标注文件，加载json信息，抽取出病患列表
with open('./muba/raw_data/label_raw.json','rb') as f:
    label_raw_all = json.load(f)
    patient_info_list = label_raw_all['RECORDS']

#根据病患列表，从已加载的json信息中，抽取出详细的标注信息
dicom_file_folder = './muba/raw_data/dicom/'
df_data = []
print('正在提取每个患者的标注信息：', end = ' ... ')
for patient_info in patient_info_list:
    patient_id = patient_info['检查号']
    label_text = patient_info['标注信息']
    print(patient_id, end = ' ... ')
    label_info = json.loads(label_text)
    df_image = extract_json_patient(label_info,dicom_file_folder)
    df_data.append(df_image)
    
#将所有病患的标注信息合并为一个完整的DataFrame
label_data = pd.concat(df_data,axis=0)
label_data.reset_index(drop=True,inplace=True)

正在提取每个患者的标注信息： ... 1413566 ... 1861739 ... 1894994 ... 1896172 ... 1897781 ... 1911271 ... 1998252 ... 2011802 ... 2013601 ... 2027825 ... 2076053 ... 2076218 ... 2079836 ... 2081357 ... 2084421 ... 2086515 ... 2086992 ... 2117153 ... 2139295 ... 2140613 ... 2142067 ... 2149244 ... 2154382 ... 2173935 ... 2177570 ... 2178225 ... 2178438 ... 2179535 ... 2179833 ... 2180596 ... 2180764 ... ./muba/raw_data/dicom/2180764_3993_1.dcm not found!
./muba/raw_data/dicom/2180764_3999_1.dcm not found!
2181144 ... 2181161 ... 2181451 ... 2181629 ... 2181821 ... 2182025 ... 2183508 ... 2183987 ... 2185315 ... 2187260 ... 2187535 ... 2187603 ... 2187649 ... 2187654 ... 2188794 ... 2189185 ... 2189409 ... 2190446 ... 2191371 ... 2191812 ... 2192124 ... 2193407 ... 2193698 ... 2193887 ... 2194619 ... 2194645 ... 2195560 ... 2196068 ... 2196739 ... 2198808 ... 2199242 ... 2199294 ... 2199403 ... 2199991 ... 2200043 ... 2200443 ... 2201330 ... 2201334 ... 2201574 ... 2202224 ... 2203458 ... 2203459 ... 2

In [332]:
#对标注信息进行后期处理，将xy坐标转为yolo定制的标注数据表达方式
label_data['iid'] = label_data['patient_id'].astype(str) + '_' + label_data['image_id'].astype(str)
label_data['x'] = (label_data['x1']+label_data['x2'])/2/label_data['width']
label_data['y'] = (label_data['y1']+label_data['y2'])/2/label_data['height']
label_data['w'] = np.abs(label_data['x1']-label_data['x2'])/label_data['width']
label_data['h'] = np.abs(label_data['y1']-label_data['y2'])/label_data['height']
#保存处理后的标注信息
label_data.to_csv('./muba/label_data.csv',index=False)
patient_number = len(set(label_data['patient_id']))
image_number = len(set(label_data['iid']))
print(f'标注信息表处理完毕，表中包含{patient_number}个病患以及{image_number}张影像')
label_data.shape

标注信息表处理完毕，表中包含85个病患以及201张影像


(603, 17)

In [333]:
#抽查处理后的标注信息，看格式和内容是否符合预期
label_data.head(3)

Unnamed: 0,patient_id,image_id,mark_index,width,height,x1,x2,y1,y2,label,name,comment,iid,x,y,w,h
0,1413566,5360_2,10,2012,2812,296.21955,677.473348,1262.799855,1541.766049,结构扭曲,吴卓,ok,1413566_5360_2,0.241971,0.498678,0.18949,0.099206
1,1413566,5360_3,12,2012,2812,298.197107,656.817239,2213.057504,2569.318294,结构扭曲,吴卓,ok,1413566_5360_3,0.23733,0.850351,0.178241,0.126693
2,1861739,1372_1,18,2012,2812,1762.636659,1819.999005,760.523223,820.617109,良性钙化,吴卓,ok,1861739_1372_1,0.890317,0.281142,0.02851,0.021371


### 这里看到标注类別不均衡，恶性钙化最多，良性钙化也不少，恶性肿块较少，良性肿块非常少

In [334]:
#对标注信息进行筛选，只考虑下面显示的4种标注相关信息
label_mapping = {'良性肿块':0,'恶性肿块':1,'良性钙化':2,'恶性钙化':3}
label_data = label_data[label_data.label.isin(label_mapping.keys())].copy()
label_data = label_data[label_data.comment=='ok'].copy()
label_data.reset_index(drop=True,inplace=True)
label_data.label.value_counts()

恶性钙化    230
良性钙化    153
恶性肿块     60
良性肿块     14
Name: label, dtype: int64

### 2012宽度的图片24张，2560宽度的图片89张，2800宽度的图片44张，3328宽度的图片12张，共169图片

In [335]:
label_data.groupby(['width','height'])['image_id'].nunique()

width  height
2012   2812      24
2560   3328      89
2800   3518      44
3328   4096      12
Name: image_id, dtype: int64

### 2012宽度图片11个病人，2560宽度图片38个病人，2800宽度图片20个病人，3328宽度图片5个病人，共74病人

In [336]:
label_data.groupby(['width','height'])['patient_id'].nunique()

width  height
2012   2812      11
2560   3328      38
2800   3518      20
3328   4096       5
Name: patient_id, dtype: int64

# 3. 生成对应每张图片的标注文本文件

In [337]:
# temp = label_data[label_data.width.isin([2012,2560])].copy()
temp = label_data.copy()
temp.reset_index(drop=True,inplace=True)

#根据训练集和验证集的image图片名称，提取对应的iid，为后续提取标注信息做准备
train_image_files = get_data_files('./muba/images/train/','_','jpg')
train_iid = [(file.split('/')[-1]).split('.')[0] for file in train_image_files]
valid_image_files = get_data_files('./muba/images/valid/','_','jpg')
valid_iid = [(file.split('/')[-1]).split('.')[0] for file in valid_image_files]
len(train_iid),len(valid_iid)

(235, 53)

In [340]:
for iid in train_iid:
    df = temp[temp.iid==iid].copy()
    df.reset_index(drop=True,inplace=True)
    file_path = os.path.join(label_file_folder+'train/'+str(iid)+'.txt')
    text = ''
    #如果标注信息表中找不到对应的iid，那么生成空白的txt标注文件
    if df.shape[0] < 1:
        with open(file_path,'w') as f:
            print(f'写入空白标注文件: {file_path}')
            f.write(text)
        continue
    #如果标注信息表中能够找到对应的iid，那么提取并生成对应的txt标注文件
    for ind in df.index:
        label = df.loc[ind,'label']
        width = df.loc[ind,'width']
        height = df.loc[ind,'height']
        x = df.loc[ind,'x']
        y = df.loc[ind,'y']
        w = df.loc[ind,'w']
        h = df.loc[ind,'h']
        text = text + str(label_mapping[label]) +' '+str(x)+' '+str(y)+' '+str(w)+' '+str(h) + '\n'
    with open(file_path,'w') as f:
        print(f'写入标注文件: {file_path}')
        f.write(text)

写入空白标注文件: ./muba/labels/train/2154382_18033_1.txt
写入空白标注文件: ./muba/labels/train/2179535_1126_1.txt
写入标注文件: ./muba/labels/train/2076053_30000020021103055211900000013_3.txt
写入标注文件: ./muba/labels/train/2203458_5605_1.txt
写入空白标注文件: ./muba/labels/train/2192124_8419_1.txt
写入标注文件: ./muba/labels/train/2205093_30000020071500094690000000579_4.txt
写入标注文件: ./muba/labels/train/2194619_11361_1.txt
写入空白标注文件: ./muba/labels/train/2198808_30000020070900112728900000828_1.txt
写入空白标注文件: ./muba/labels/train/2205047_8561_1.txt
写入空白标注文件: ./muba/labels/train/2139295_30000020051100371931400000013_1.txt
写入标注文件: ./muba/labels/train/2201334_10104_1.txt
写入空白标注文件: ./muba/labels/train/2188794_1001_1.txt
写入标注文件: ./muba/labels/train/2191371_7629_1.txt
写入标注文件: ./muba/labels/train/2193698_30000020070301185101000000289_2.txt
写入空白标注文件: ./muba/labels/train/2200443_9239_1.txt
写入标注文件: ./muba/labels/train/2195560_491_1.txt
写入标注文件: ./muba/labels/train/2192124_8431_1.txt
写入空白标注文件: ./muba/labels/train/2208678_30000020072001080720

写入空白标注文件: ./muba/labels/train/2076053_30000020021103055211900000013_4.txt
写入标注文件: ./muba/labels/train/2027825_30000019120400241084400000217_2.txt
写入标注文件: ./muba/labels/train/2203459_5737_1.txt
写入空白标注文件: ./muba/labels/train/2207439_6585_1.txt
写入空白标注文件: ./muba/labels/train/2179535_1120_1.txt
写入标注文件: ./muba/labels/train/2142067_30000020051200031579200000183_4.txt
写入空白标注文件: ./muba/labels/train/2200043_8615_1.txt
写入空白标注文件: ./muba/labels/train/2195560_479_1.txt
写入空白标注文件: ./muba/labels/train/2191812_7971_1.txt
写入标注文件: ./muba/labels/train/2149244_5935_1.txt
写入标注文件: ./muba/labels/train/2187649_2485_1.txt
写入标注文件: ./muba/labels/train/2189185_1531_1.txt
写入空白标注文件: ./muba/labels/train/2027825_30000019120400241084400000217_1.txt
写入标注文件: ./muba/labels/train/2196068_30000020070700240317500000693_3.txt
写入空白标注文件: ./muba/labels/train/2187260_1981_1.txt
写入空白标注文件: ./muba/labels/train/2193887_11027_1.txt
写入空白标注文件: ./muba/labels/train/2154382_18021_1.txt
写入空白标注文件: ./muba/labels/train/2180764_4011_1.txt
写入空白标注

In [341]:
for iid in valid_iid:
    df = temp[temp.iid==iid].copy()
    df.reset_index(drop=True,inplace=True)
    file_path = os.path.join(label_file_folder+'valid/'+str(iid)+'.txt')
    text = ''
    #如果标注信息表中找不到对应的iid，那么生成空白的txt标注文件
    if df.shape[0] < 1:
        with open(file_path,'w') as f:
            print(f'写入空白标注文件: {file_path}')
            f.write(text)
        continue
    #如果标注信息表中能够找到对应的iid，那么提取并生成对应的txt标注文件
    for ind in df.index:
        label = df.loc[ind,'label']
        width = df.loc[ind,'width']
        height = df.loc[ind,'height']
        x = df.loc[ind,'x']
        y = df.loc[ind,'y']
        w = df.loc[ind,'w']
        h = df.loc[ind,'h']
        text = text + str(label_mapping[label]) +' '+str(x)+' '+str(y)+' '+str(w)+' '+str(h) + '\n'
    with open(file_path,'w') as f:
        print(f'写入标注文件: {file_path}')
        f.write(text)

写入标注文件: ./muba/labels/valid/2183987_13560_1.txt
写入空白标注文件: ./muba/labels/valid/2173935_6544_1.txt
写入标注文件: ./muba/labels/valid/2173935_6532_1.txt
写入标注文件: ./muba/labels/valid/1998252_30000019110500294897100000254_1.txt
写入空白标注文件: ./muba/labels/valid/2173935_6550_1.txt
写入标注文件: ./muba/labels/valid/1998252_30000019110500294897100000254_3.txt
写入空白标注文件: ./muba/labels/valid/2187603_2419_1.txt
写入标注文件: ./muba/labels/valid/2181161_4290_1.txt
写入空白标注文件: ./muba/labels/valid/2178438_14049_1.txt
写入空白标注文件: ./muba/labels/valid/2190446_3935_1.txt
写入标注文件: ./muba/labels/valid/2182025_30000020061901293760700000173_1.txt
写入空白标注文件: ./muba/labels/valid/2199991_30000020070923575560700000519_4.txt
写入标注文件: ./muba/labels/valid/2189409_30000020063000384313100000092_4.txt
写入标注文件: ./muba/labels/valid/2178438_14037_1.txt
写入标注文件: ./muba/labels/valid/2190446_3923_1.txt
写入空白标注文件: ./muba/labels/valid/2181629_5304_1.txt
写入标注文件: ./muba/labels/valid/2189409_30000020063000384313100000092_3.txt
写入空白标注文件: ./muba/labels/valid/2196

# 4 生成带标注框的jpg文件

In [419]:
jpg_file_folder = './muba/images/train/'
label_file_folder = './muba/labels/train/'
target_folder = './muba/images/jpg_with_label/'
image_files = get_data_files(jpg_file_folder,'_','jpg')
image_files.sort()

In [429]:
label_color_mapping = {0:'green',1:'purple',2:'blue',3:'red'}
label_text_mapping = {0:'良性钙化',1:'恶性钙化',2:'良性肿块',3:'恶性肿块'}
for image_file in image_files:
    image = Image.open(image_file)
    iid = (image_file.split('/')[-1]).split('.')[0]
    label_file = os.path.join(label_file_folder,str(iid)+'.txt')
    print(image_file)
    if not os.path.exists(label_file):
        continue
    df_label = pd.read_csv(label_file,sep=' ',names=['label','x','y','w','h'])
    if df_label.shape[0] <=0:
        continue
    df_label.reset_index(drop=True,inplace=True)
    draw = ImageDraw.Draw(image)
    for ind in df_label.index:
        label,x,y,w,h = df_label.loc[ind,:]
        x1=(x-w/2)*image.width
        x2=(x+w/2)*image.width
        y1=(y-h/2)*image.height
        y2=(y+h/2)*image.height
        draw.line([(x1,y1),(x2,y1),(x2,y2),(x1,y2),(x1,y1)],width=5,fill=label_color_mapping[label])
        font = ImageFont.truetype('simhei.ttf', 50)
        draw.text((x1,y2), label_text_mapping[label],
                  font=font,fill=label_color_mapping[label],align='left') 
        image.save(os.path.join(target_folder,str(iid)+'_with_label'+'.jpg'))

./muba/images/train/2027825_30000019120400241084400000217_1.jpg
./muba/images/train/2027825_30000019120400241084400000217_2.jpg
./muba/images/train/2027825_30000019120400241084400000217_3.jpg
./muba/images/train/2027825_30000019120400241084400000217_4.jpg
./muba/images/train/2076053_30000020021103055211900000013_1.jpg
./muba/images/train/2076053_30000020021103055211900000013_2.jpg
./muba/images/train/2076053_30000020021103055211900000013_3.jpg
./muba/images/train/2076053_30000020021103055211900000013_4.jpg
./muba/images/train/2117153_30000020041600171040200000013_1.jpg
./muba/images/train/2117153_30000020041600171040200000013_2.jpg
./muba/images/train/2117153_30000020041600171040200000013_3.jpg
./muba/images/train/2117153_30000020041600171040200000013_4.jpg
./muba/images/train/2139295_30000020051100371931400000013_1.jpg
./muba/images/train/2139295_30000020051100371931400000013_2.jpg
./muba/images/train/2139295_30000020051100371931400000013_3.jpg
./muba/images/train/2139295_300000200511

./muba/images/train/2203458_5611_1.jpg
./muba/images/train/2203458_5617_1.jpg
./muba/images/train/2203458_5623_1.jpg
./muba/images/train/2203459_5725_1.jpg
./muba/images/train/2203459_5731_1.jpg
./muba/images/train/2203459_5737_1.jpg
./muba/images/train/2203459_5743_1.jpg
./muba/images/train/2203608_6167_1.jpg
./muba/images/train/2203608_6173_1.jpg
./muba/images/train/2203608_6179_1.jpg
./muba/images/train/2203608_6185_1.jpg
./muba/images/train/2203663_30000020071400124775100000393_1.jpg
./muba/images/train/2203663_30000020071400124775100000393_2.jpg
./muba/images/train/2203663_30000020071400124775100000393_3.jpg
./muba/images/train/2203663_30000020071400124775100000393_4.jpg
./muba/images/train/2205047_8555_1.jpg
./muba/images/train/2205047_8561_1.jpg
./muba/images/train/2205047_8567_1.jpg
./muba/images/train/2205047_8573_1.jpg
./muba/images/train/2205051_8608_1.jpg
./muba/images/train/2205051_8614_1.jpg
./muba/images/train/2205051_8620_1.jpg
./muba/images/train/2205051_8626_1.jpg
./m

# 废弃备用代码

In [None]:
label_file_folder = './muba/labels/'
#根据标注信息表的iid列表进行循环比对
for iid in set(temp['iid']):
    #提取单个iid（对应单个病人的单张影像）对应的标注信息
    df = temp[temp.iid==iid].copy()
    df.reset_index(drop=True,inplace=True)
    patient_id = df.loc[0,'patient_id']
    image_id = df.loc[0,'image_id']
    #如果iid属于训练集，那么标注txt文件保存到 /labels/train/ 目录下
    if iid in train_iid:
        file_path = os.path.join(label_file_folder+'train/'+patient_id+'_'+image_id+'.txt')
    #如果iid属于验证集，那么标注txt文件保存到 /labels/valid/ 目录下
    elif iid in valid_iid:
        file_path = os.path.join(label_file_folder+'valid/'+patient_id+'_'+image_id+'.txt')
    #如果iid属于验证集，那么标注txt文件保存到 /labels/error/ 目录下
    else:
        file_path = os.path.join(label_file_folder+'error/'+patient_id+'_'+image_id+'.txt')
    #根据标注信息表，生成对应的标注文件字符串，并保存到对应的txt文件中
    text = ''
    for ind in df.index:
        label = df.loc[ind,'label']
        width = df.loc[ind,'width']
        height = df.loc[ind,'height']
        x = df.loc[ind,'x']
        y = df.loc[ind,'y']
        w = df.loc[ind,'w']
        h = df.loc[ind,'h']
        text = text + str(label_mapping[label]) +' '+str(x)+' '+str(y)+' '+str(w)+' '+str(h) + '\n'
    with open(file_path,'w') as f:
        print(f'写入标注文件: {}')
        f.write(text)