In [None]:
import json
import shutil
import os 
import yaml
import cv2
from glob import glob
from openpyxl import Workbook
from scipy.sparse import data
from sklearn.model_selection import train_test_split

In [None]:
# json 형태의 annotation 데이터를 YOLOv5 학습 형태에 맞게 txt 로 변환한다.

classification = []
count = []

TOP = 0
LEFT = 1
BOTTOM = 2
RIGHT = 3

SQURE = 1

class Label:

	def __init__(self, fileName, imagePath):
		data = self.parse_json_data(fileName)
		self.set_data(data, fileName, imagePath)

	def parse_json_data(self, fileName):
		with open(fileName, encoding='utf-8-sig') as jsonFile:
			jsonData = json.load(jsonFile)
		return jsonData

	def set_data(self, data, fileName, imagePath):
		self.fileName = '.'.join(os.path.basename(fileName).split('.')[0:-1])
		self.width = int(data['images'][0]['width'])
		self.height = int(data['images'][0]['height'])
		self.annotations = data['annotations']

	def get_bbox_point(self, bbox):
		top = 10000
		left = 10000
		bottom = -1
		right = -1

		for i in bbox:
			if (i[0] < left): left = i[0]
			if (i[0] > right): right = i[0]
			if (i[1] < top): top = i[1]
			if (i[1] > bottom): bottom = i[1]

		return ((top, left, bottom, right))

	def get_segment_point(self, seg):
		top = 10000
		left = 10000
		bottom = -1
		right = -1

		for i in range(len(seg)):
			if (i % 2 == 0):
				if (seg[i] < left): left = seg[i]
				if (seg[i] > right): right = seg[i]
			else:
				if (seg[i] < top): top = seg[i]
				if (seg[i] > bottom): bottom = seg[i]

		return ((top, left, bottom, right))

	def get_shape_point(self, shape):
		if (shape['bbox'] and len(shape['bbox']) != 0):
			return self.get_bbox_point(shape['bbox'])
		elif (shape['Segmentation'] and len(shape['Segmentation']) != 0):
			return self.get_segment_point(shape['Segmentation'][0])
		else:
			return (0, 0, 0, 0)

	def point_to_txt(self, point):
		if (point[0]) not in classification:
			classification.append(point[0])
			count.append(0)
		idx = classification.index(point[0])
		count[idx] += 1

		if (SQURE):
			r = (self.width - self.height) / 2
			w_center = ((point[1][LEFT] + point[1][RIGHT]) / 2) / self.width
			h_center = (((point[1][TOP] + point[1][BOTTOM]) / 2) + r) / self.width
			w_len = (point[1][RIGHT] - point[1][LEFT]) / self.width
			h_len = (point[1][BOTTOM] - point[1][TOP]) / self.width
		else:
			w_center = ((point[1][LEFT] + point[1][RIGHT]) / 2) / self.width
			h_center = ((point[1][TOP] + point[1][BOTTOM]) / 2) / self.height
			w_len = (point[1][RIGHT] - point[1][LEFT]) / self.width
			h_len = (point[1][BOTTOM] - point[1][TOP]) / self.height
		return (idx, w_center, h_center, w_len, h_len)

	def convert_data(self):
		self.points = []
		self.txt = []
		for shape in self.annotations:
			self.points.append((shape['species'], self.get_shape_point(shape)))
		for point in self.points:
			self.txt.append(self.point_to_txt(point))

	def write_data(self, path):
		with open(path + self.fileName + ".txt", 'w') as f:
			for line in self.txt:
				f.write(' '.join(map(str, line)) + "\n")

def convert_data(file, targetPath, imagePath):
	target = Label(file, imagePath)
	target.convert_data()
	target.write_data(targetPath)

json_count = [0]
def convert(targetPath, src):
	data_list = os.listdir(targetPath)
	for line in data_list:
		if os.path.isdir(os.path.join(targetPath, line)):
			convert(os.path.join(targetPath, line), src)
		elif '.json' in line:
			json_count[0] += 1
			print(json_count[0], "Data Convert:", line, end='\r')
			shutil.copyfile(targetPath + "/" + line, src + "/json/" + line)
			convert_data(targetPath + "/" + line, src + "/labels/", src + "/images/")
		else:
			pass

image_count = [0]
def copy_image(targetPath, src):
	data_list = os.listdir(targetPath)
	for line in data_list:
		if os.path.isdir(os.path.join(targetPath, line)):
			copy_image(os.path.join(targetPath, line), src)
		elif '.jpg' in line:
			image_count[0] += 1
			print(image_count[0], "Image Copy:", line, end='\r')
			shutil.copyfile(targetPath + "/" + line, src + "/images/" + line)
		else:
			pass

def write_yaml(target):
	data = {}
	data["names"] = classification
	data["nc"] = len(classification)
	data["train"] = "../Dataset/train.txt"
	data["val"] = "../Dataset/valid.txt"
	data["test"] = "../Dataset/test.txt"
	with open(target, 'w', encoding='utf-8-sig') as f:
		yaml.dump(data, f, allow_unicode=True)

def run_convert(targetPath, src, copy):
	if not os.path.isdir(src + "/images"):
		os.makedirs(src + "/images/")
	if not os.path.isdir(src + "/labels"):
		os.makedirs(src + "/labels")
	if not os.path.isdir(src + "/json"):
		os.makedirs(src + "/json")
	if copy:
		copy_image(targetPath, src)
	convert(targetPath, src)
	write_yaml(os.path.join(src, "data.yaml"))
	for i in range(len(classification)):
		print(classification[i], "is counted:", count[i])

run_convert("./OriginData", "./Dataset", True)


In [None]:
# 변환된 데이터셋에 있는 이미지를 학습에 맞게 정사각형으로 변환 및 416x416 사이즈로 변경한다.

IMG_SIZE = 416

def image_resize(target):
	img = cv2.imread(target)
	h, w = img.shape[:2]

	color = 0
	if w > h:
		borderSize = int((w - h) / 2)
		border = cv2.copyMakeBorder(
			img,
			top=borderSize,
			bottom=borderSize,
			left=0,
			right=0,
			borderType=cv2.BORDER_CONSTANT,
			value=[color, color, color]
		)
	else:
		borderSize = int((h - w) / 2)
		border = cv2.copyMakeBorder(
			img,
			top=0,
			bottom=0,
			left=borderSize,
			right=borderSize,
			borderType=cv2.BORDER_CONSTANT,
			value=[color, color, color]
		)

	resize = cv2.resize(border, (IMG_SIZE, IMG_SIZE), interpolation = cv2.INTER_CUBIC)
	cv2.imwrite(target, resize)

def resizing_img(targetPath):
	count = 0
	imgList = glob(targetPath + '/*.jpg')
	for img in imgList:
		count+=1
		print(count, "image resizing :", img)
		image_resize(img)

resizing_img("./Dataset/images")


In [None]:
# 학습에 사용할 데이터셋을 Train:Valid:Test 로 구분하여 각 8:1:1 의 비율로 나눈다.
# 나눠진 데이터셋의 상세 내용은 count.xlsx 엑셀파일에 저장
# 에러가 있는 데이터는 err.txt 에 저장

pr = [0]

animal = ["고라니", "멧돼지", "너구리 ", "다람쥐", "청설모", "반달가슴곰", "족제비", "멧토끼", "왜가리", "중대백로", "노루"]
obj_count = []
img_count = []
err_count = []

def get_json_data(fileName, trainList, validList, testList):

	with open(fileName, encoding='utf-8-sig') as jsonFile:
		jsonData = json.load(jsonFile)
	fName = ('.'.join(fileName.split('.')[0:-1]) + ".jpg").replace("/json/", "/images/")
	pr[0] += 1
	print(pr, fName)

	day = str(jsonData['images'][0]['type'])
	if day != "RGB" and day != "IR":
		err_count.append(fName)
		return

	obj_list = jsonData['annotations']

	trainCount = 0
	validCount = 0
	testCount = 0

	for i in obj_count:
		if i[0] == ("train", obj_list[0]['species'], day):	trainCount += i[1]
		elif i[0] == ("valid", obj_list[0]['species'], day):	validCount += i[1]
		elif i[0] == ("test", obj_list[0]['species'], day):	testCount += i[1]

	if int(trainCount / 8) < validCount:
		data_type = "train"
		trainList.append(fName)
	elif validCount <= testCount:
		data_type = "valid"
		validList.append(fName)
	else:
		data_type = "test"
		testList.append(fName)

	for obj in obj_list:
		check = 0
		for i in obj_count:
			if i[0] == (data_type, obj['species'], day):
				i[1] += 1
				check = 1
				break
		if not check:
			obj_count.append([(data_type, obj['species'], day), 1])

	spec = animal[int(os.path.basename(fileName).split('_')[0][1:]) - 1]
	check = 0
	for j in img_count:
		if j[0] == (data_type, spec, day):
			j[1] += 1
			check = 1
			break
	if not check:
		img_count.append([(data_type, spec, day), 1])

def parse_data(targetPath, trainList, validList, testList):
	data_list = os.listdir(targetPath)
	for line in data_list:
		if os.path.isdir(os.path.join(targetPath, line)):
			parse_data(os.path.join(targetPath, line), trainList, validList, testList)
		elif '.json' in line:
			get_json_data(targetPath + "/" + line, trainList, validList, testList)
		else:
			pass

def print_data(writePath):
	obj_count.sort()
	img_count.sort()

	count = 0
	print("==========object count==========")
	for i in obj_count:
		print(f"{i[0][0]}: {i[0][1]}: {i[0][2]}: {i[1]} counted")
		count += i[1]
	print("count:",count)

	count = 0
	print("==========image count==========")
	for i in img_count:
		print(f"{i[0][0]}: {i[0][1]}: {i[0][2]}: {i[1]} counted")
		count += i[1]
	print("count:",count)

	print("==========err count==========")
	with open(writePath + "/err.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(err_count) + "\n")
	print("count:",len(err_count))

def print_excel(writePath):
	wb = Workbook()
	ws = wb.active
	ws.title = "count"
	ws.append(["Object"])
	ws.append(["data_type", "object_type", "day", "count"])
	for i in obj_count:
		ws.append(i[0] + (i[1],))

	ws.append(["Image"])
	ws.append(["data_type", "object_type", "day", "count"])
	for i in img_count:
		ws.append(i[0] + (i[1],))
	wb.save(writePath + "/count.xlsx")

def split_dataset(targetPath, writePath):

	targetPath = os.path.abspath(targetPath)
	train_list = []
	valid_list = []
	test_list = []
	parse_data(targetPath, train_list, valid_list, test_list)
    
	with open(writePath + "/train.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(train_list) + "\n")
	with open(writePath + "/valid.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(valid_list) + "\n")
	with open(writePath + "/test.txt", "w", encoding='utf-8-sig') as f:
		f.write("\n".join(test_list) + "\n")

	print("train data: ", len(train_list))
	print("valid data: ", len(valid_list))
	print("test data: ", len(test_list))
	print_data(writePath)
	print_excel(writePath)

	return test_list

split_dataset("./Dataset/json", "./Dataset")


In [None]:
# yolo 및 yolo 실행에 필요한 모듈 설치
!git clone https://github.com/ultralytics/yolov5.git

%cd yolov5
!pip3 install -r requirements.txt

In [None]:
# yolo 학습 코드 (약 120시간 실행) 

!sudo python3 train.py --img 416 --batch 64 --epochs 100 --data ../Dataset/data.yaml --cfg ./models/yolov5s.yaml --weights yolov5s.pt

In [None]:
# yolo 테스트 코드

!sudo python3 val.py --data ../Dataset/data.yaml --batch 128 --weights ./runs/train/exp/weights/best.pt --task test --img 416 --save-txt --save-conf