## 将kitti数据集的2D detection，先改成PASCAL VOC格式，再改成YOLO的格式

[参考博客1](https://blog.csdn.net/qq583083658/article/details/86321987)、[参考博客2](https://blog.csdn.net/Adams_just/article/details/123207625)

准备工作：下载kitti数据集

由于我们只做2D的对象检测，只需要下载Download left color images of object data set (12 GB)和对应的标签Download training labels of object data set (5 MB)。

解压后的文件结构如下：

![dir_structure](./figs/kitti_to_yolo/dir_structure.png)

其中训练集有7481张，测试集有7518张，共有 8个类别：Car（小轿车），Van（面包车），Truck（卡车），Tram（电车），Pedestrain（行人），Person(sit-ting)（行人），Cyclist（骑行人），Misc（杂项）。还有一项DontCare为不关心的物体；测试集没有给出标签。

### 1、更改kitti的标注文件，仅保留需要的类别

In [1]:
import os

kitti_training_dir_path = '/data/workspace/yuzijian/kitti_2d_detection/training'
kitti_training_labels_path = os.path.join(kitti_training_dir_path, 'label_2', '*.txt')
kitti_training_labels_path

'/data/workspace/yuzijian/kitti_2d_detection/training/label_2/*.txt'

In [2]:
import glob

txt_list = glob.glob(kitti_training_labels_path) # 类似于walk的方式，获得所有文件的路径
txt_list

['/data/workspace/yuzijian/kitti_2d_detection/training/label_2/004389.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/000861.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/003816.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/002646.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/004276.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/004676.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/007246.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/003541.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/001795.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/007354.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/004354.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/000354.txt',
 '/data/workspace/yuzijian/kitti_2d_detection/training/label_2/001190.txt',
 '/data/work

In [3]:
def show_category(txt_list):
    '''从标注文件中读取类别集，做了去重'''
    category_list= []
    for item in txt_list:
        try:
            with open(item) as tdf:
                for each_line in tdf:
                    labeldata = each_line.strip().split(' ') # 去掉前后多余的字符并把其分开
                    category_list.append(labeldata[0]) # 只要第一个字段，即类别
        except IOError as ioerr:
            print('File error:'+str(ioerr))
    print(set(category_list)) # 输出集合

def merge(line):
    '''将传入的list，合并成一行带换行号的、以空格分隔的str'''
    each_line=''
    for i in range(len(line)):
        if i!= (len(line)-1):
            each_line=each_line+line[i]+' '
        else:
            each_line=each_line+line[i] # 最后一条字段后面不加空格
    each_line=each_line+'\n'
    return (each_line)

print('before modify categories are:\n')
show_category(txt_list)

new_dir_path = os.path.join(kitti_training_dir_path, 'label_2_modified')                    # 修改后的labels存放在一个新的文件夹，位于training下
if not os.path.exists(new_dir_path):
    os.makedirs(new_dir_path)


for item in txt_list:
    new_txt=[]
    new_txt_path = os.path.join(new_dir_path, os.path.split(item)[1])
    try:
        with open(item, 'r') as r_tdf:
            for each_line in r_tdf:
                labeldata = each_line.strip().split(' ')
                if labeldata[0] in ['Truck','Van','Tram']: # 合并汽车类
                    labeldata[0] = labeldata[0].replace(labeldata[0],'Car')
                if labeldata[0] == 'Person_sitting': # 合并行人类
                    labeldata[0] = labeldata[0].replace(labeldata[0],'Pedestrian')
                if labeldata[0] == 'DontCare': # 忽略Dontcare类
                    continue
                if labeldata[0] == 'Misc': # 忽略Misc类
                    continue
                new_txt.append(merge(labeldata)) # 重新写入新的txt文件
        
        with open(new_txt_path,'w+') as w_tdf: # w+是打开原文件将内容删除，另写新内容进去
            for temp in new_txt:
                w_tdf.write(temp)
                
    except IOError as ioerr:
        print('File error:'+str(ioerr))
print('\nafter modify categories are:\n')
show_category(glob.glob(os.path.join(new_dir_path, '*.txt')))

before modify categories are:

{'Tram', 'Truck', 'Cyclist', 'Car', 'Person_sitting', 'Misc', 'DontCare', 'Pedestrian', 'Van'}

after modify categories are:

{'Pedestrian', 'Cyclist', 'Car'}


### 2、改成PASCAL VOC的XML格式

In [4]:
# kitti_txt_to_xml.py
# encoding:utf-8
# 根据一个给定的XML Schema，使用DOM树的形式从空白文件生成一个XML
from xml.dom.minidom import Document
import cv2
import os

def generate_xml(output_dir, first_name, split_lines, img_size, class_ind):
    doc = Document() # 创建DOM文档对象
    annotation = doc.createElement('annotation')
    doc.appendChild(annotation)
    title = doc.createElement('folder')
    title_text = doc.createTextNode('KITTI')
    title.appendChild(title_text)
    annotation.appendChild(title)
    img_name=first_name+'.png'
    title = doc.createElement('filename')
    title_text = doc.createTextNode(img_name)
    title.appendChild(title_text)
    annotation.appendChild(title)
    source = doc.createElement('source')
    annotation.appendChild(source)
    title = doc.createElement('database')
    title_text = doc.createTextNode('The KITTI Database')
    title.appendChild(title_text)
    source.appendChild(title)
    title = doc.createElement('annotation')
    title_text = doc.createTextNode('KITTI')
    title.appendChild(title_text)
    source.appendChild(title)
    size = doc.createElement('size')
    annotation.appendChild(size)
    title = doc.createElement('width')
    title_text = doc.createTextNode(str(img_size[1]))
    title.appendChild(title_text)
    size.appendChild(title)
    title = doc.createElement('height')
    title_text = doc.createTextNode(str(img_size[0]))
    title.appendChild(title_text)
    size.appendChild(title)
    title = doc.createElement('depth')
    title_text = doc.createTextNode(str(img_size[2]))
    title.appendChild(title_text)
    size.appendChild(title)
    for split_line in split_lines:
        line=split_line.strip().split()
        if line[0] in class_ind:
            object = doc.createElement('object')
            annotation.appendChild(object)
            title = doc.createElement('name')
            title_text = doc.createTextNode(line[0])
            title.appendChild(title_text)
            object.appendChild(title)
            bndbox = doc.createElement('bndbox')
            object.appendChild(bndbox)
            title = doc.createElement('xmin')
            title_text = doc.createTextNode(str(float(line[4])))
            title.appendChild(title_text)
            bndbox.appendChild(title)
            title = doc.createElement('ymin')
            title_text = doc.createTextNode(str(float(line[5])))
            title.appendChild(title_text)
            bndbox.appendChild(title)
            title = doc.createElement('xmax')
            title_text = doc.createTextNode(str(float(line[6])))
            title.appendChild(title_text)
            bndbox.appendChild(title)
            title = doc.createElement('ymax')
            title_text = doc.createTextNode(str(float(line[7])))
            title.appendChild(title_text)
            bndbox.appendChild(title)
    # 将DOM对象doc写入文件
    f = open(os.path.join(output_dir, first_name+'.xml'), 'w')
    f.write(doc.toprettyxml(indent = ''))
    f.close()

In [5]:
class_ind=('Pedestrian', 'Car', 'Cyclist')
labels_dir = new_dir_path       # 刚才改后的标注文件文件夹绝对路径

output_dir = os.path.join(os.path.split(kitti_training_dir_path)[0], 'PASCAL_version')
output_xml_dir = os.path.join(output_dir, 'Annotations')    # xml输出路径
output_jpg_dir = os.path.join(output_dir, 'JPEGImages')     # jpg输出路径
if not os.path.exists(output_xml_dir):
    os.makedirs(output_xml_dir)
if not os.path.exists(output_jpg_dir):
    os.makedirs(output_jpg_dir)

In [6]:
from tqdm import tqdm

for full_path in tqdm(glob.glob(os.path.join(labels_dir, '*.txt'))):
    # 游走得到标注文件路径
    file_name = os.path.split(full_path)[1]
    f = open(full_path)
    split_lines = f.readlines()
    first_name = file_name.split('.')[0] # 去扩展后的文件名
    img_png_name = first_name + '.png'
    img_jpg_name = first_name + '.jpg'
    img_path = os.path.join(kitti_training_dir_path, 'image_2', img_png_name)
    img = cv2.imread(img_path)
    img_size = img.shape
    generate_xml(output_xml_dir, first_name, split_lines, img_size, class_ind)
    cv2.imwrite(os.path.join(output_jpg_dir, img_jpg_name), img)
print('all txts has converted into xmls')

100%|██████████| 7481/7481 [05:56<00:00, 21.00it/s]

all txts has converted into xmls





### 3、将VOC格式的xml标签转换为darknet格式的标签xxx.txt

In [7]:
pascal_dir = os.path.join(os.path.split(kitti_training_dir_path)[0], 'PASCAL_version')
pascal_xml_dir = os.path.join(pascal_dir, 'Annotations')    # xml输出路径
pascal_jpg_dir = os.path.join(pascal_dir, 'JPEGImages')     # jpg输出路径
yolo_dir = os.path.join(os.path.split(kitti_training_dir_path)[0], 'yolo_version')
yolo_txt_dir = os.path.join(yolo_dir, 'labels')     # txt输出路径
yolo_jpg_dir = os.path.join(yolo_dir, 'images')     # jpg输出路径
if not os.path.exists(yolo_txt_dir):
    os.makedirs(yolo_txt_dir)
if not os.path.exists(yolo_jpg_dir):
    os.makedirs(yolo_jpg_dir)

# 将jpg文件复制到新文件夹下
os.system(f"cp {os.path.join(pascal_jpg_dir,'*')} {yolo_jpg_dir}")

0

In [8]:
# xml_to_yolo_txt.py
# 此代码和VOC_KITTI文件夹同目录
import glob
from tqdm import tqdm
import xml.etree.ElementTree as ET
# 这里的类名为我们xml里面的类名，顺序现在不需要考虑
class_names = ['Car', 'Cyclist', 'Pedestrian']

# 转换一个xml文件为txt
def single_xml_to_txt(xml_file, output_dir):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    # 保存的txt文件路径
    name = os.path.split(xml_file)[1]
    txt_file = os.path.join(output_dir, name.split('.')[0]+'.txt')
    with open(txt_file, 'w') as txt_file:
        for member in root.findall('object'):
            #filename = root.find('filename').text
            picture_width = int(root.find('size')[0].text)
            picture_height = int(root.find('size')[1].text)
            class_name = member[0].text
            # 类名对应的index
            class_num = class_names.index(class_name)

            box_x_min = float(member[1][0].text) # 左上角横坐标
            box_y_min = float(member[1][1].text) # 左上角纵坐标
            box_x_max = float(member[1][2].text) # 右下角横坐标
            box_y_max = float(member[1][3].text) # 右下角纵坐标
            # 转成相对位置和宽高
            x_center = float(box_x_min + box_x_max) / (2 * picture_width)
            y_center = float(box_y_min + box_y_max) / (2 * picture_height)
            width = float(box_x_max - box_x_min) /  picture_width
            height = float(box_y_max - box_y_min) /  picture_height
            # print(class_num, x_center, y_center, width, height)
            txt_file.write(str(class_num) + ' ' + str(x_center) + ' ' + str(y_center) + ' ' + str(width) + ' ' + str(height) + '\n')

# 转换文件夹下的所有xml文件为txt
def dir_xml_to_txt(xml_dir, output_dir):
    for xml_file in tqdm(glob.glob(os.path.join(xml_dir, '*.xml'))):
        single_xml_to_txt(xml_file, output_dir)

dir_xml_to_txt(pascal_xml_dir, yolo_txt_dir)

100%|██████████| 7481/7481 [00:00<00:00, 10107.32it/s]


### 4、划分训练集、验证集和测试集，生成train.txt、val.txt、test.txt

In [9]:
# kitti_train_val.py
import glob
import random

yolo_dir = os.path.join(os.path.split(kitti_training_dir_path)[0], 'yolo_version')
jpg_list = glob.glob(os.path.join(yolo_dir, 'images', '*.jpg'))

random.shuffle(jpg_list)    # inplace shuffle

train_txt_path = os.path.join(yolo_dir, 'train.txt')
val_txt_path = os.path.join(yolo_dir, 'val.txt')
test_txt_path = os.path.join(yolo_dir, 'test.txt')

# 8:1:1划分训练集、验证集和测试集
train_jpg_list = jpg_list[:int(len(jpg_list)*0.8)]
val_jpg_list = jpg_list[int(len(jpg_list)*0.8) : int(len(jpg_list)*0.8)+int(len(jpg_list)*0.1)]
test_jpg_list = jpg_list[int(len(jpg_list)*0.8)+int(len(jpg_list)*0.1):]

assert len(jpg_list) == len(train_jpg_list) + len(val_jpg_list) + len(test_jpg_list)

with open(train_txt_path, 'w') as tf:
    for jpg_file in train_jpg_list:
        tf.write(jpg_file + '\n')
    tf.close()
with open(val_txt_path, 'w') as tf:
    for jpg_file in val_jpg_list:
        tf.write(jpg_file + '\n')
    tf.close()
with open(test_txt_path, 'w') as tf:
    for jpg_file in test_jpg_list:
        tf.write(jpg_file + '\n')
    tf.close()