In [5]:
import os
from tqdm.notebook import tqdm
from shutil import copyfile, copytree, move
import random

In [4]:
save_dir = 'LADD'
os.makedirs(save_dir, exist_ok=True)
dirs = ['summer_moscow_2019', 'spring_korolev_2019', 'summer_tambov_2019', 'winter_moscow_2018']

In [5]:
for directory in dirs:
    copytree(directory, save_dir, dirs_exist_ok=True)

### convert to yolo format

In [11]:
!pip install xmltodict

Collecting xmltodict
  Downloading xmltodict-0.13.0-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: xmltodict
Successfully installed xmltodict-0.13.0


In [222]:
import xmltodict
def xml2yolo(filepath, save_path):
    with open(filepath, 'r') as f:
        data = f.readlines()
    annot = xmltodict.parse(' '.join(data))
    img_h = int(annot['annotation']['size']['height'])
    img_w = int(annot['annotation']['size']['width'])
    peoples = []
    if 'object' not in annot['annotation'].keys():
        return
    if isinstance(annot['annotation']['object'], dict):
        annot['annotation']['object'] = [annot['annotation']['object']]
    for obj in annot['annotation']['object']:
        bbox = obj['bndbox']
        y_min, x_min, y_max, x_max = int(bbox['ymin']),  int(bbox['xmin']),  int(bbox['ymax']),  int(bbox['xmax'])
        w, h = x_max - x_min, y_max - y_min
        x_c = x_min + w//2
        y_c = y_min + h//2
        w /= img_w
        h /= img_h
        x_c /= img_w
        y_c /= img_h
        peoples.append(f"0 {x_c} {y_c} {w} {h}")
    
    write_txt(peoples, save_path)

def write_txt(data, save_path):
    with open(save_path, 'w') as f:
        for line in data:
            f.write(f"{line}\n")

In [224]:
labels_dir = 'LADD/labels'
annot_dir = 'LADD/Annotations'
os.makedirs(labels_dir, exist_ok=True)

In [227]:
for file in tqdm(os.listdir(annot_dir)):
    xml2yolo(os.path.join(annot_dir, file), os.path.join(labels_dir, file.split('.')[0]+'.txt'))

  0%|          | 0/1422 [00:00<?, ?it/s]

### TRAIN SET

In [6]:
import pandas as pd
import json
import cv2

def parse_shape(shape):
    data = []
    start = []
    end = []
    
    for index, s in enumerate(shape):
        if s=='{':
            start.append(index)
        if s=='}':
            end.append(index+1)
    
    for s, e in zip(start, end):
        data.append(json.loads(shape[s:e]))
    return data

def shape2yolo(shape, img_path, save_path):
    data = parse_shape(shape)
    h, w, _ = cv2.imread(img_path).shape
    peoples = []
    
    for i in data:
        x_c, y_c, w_2 = i['cx'], i['cy'], i['r']
        w_2 = w_2*2/w
        x_c /= w
        y_c /= h
        peoples.append(f"0 {x_c} {y_c} {w_2} {w_2}")
    
    write_txt(peoples, save_path)

In [11]:
df = pd.read_csv('train.csv', delimiter=',')
df[df['count_region']>0].head()

Unnamed: 0,ID_img,count_region,region_shape
8,3444.jpg,1.0,"['{""cx"":2259,""cy"":391,""r"":64}']"
217,3653.JPG,1.0,"['{""cx"":2719,""cy"":2097,""r"":75}']"
254,3690.JPG,1.0,"['{""cx"":2914,""cy"":1693,""r"":90}']"
399,3835.JPG,2.0,"['{""cx"":2549,""cy"":2329,""r"":80}', '{""cx"":2437,""..."
511,3947.JPG,1.0,"['{""cx"":3130,""cy"":1370,""r"":134}']"


In [None]:
train_path_basedataset = 'train'
omsk_dataset_full = 'omsk_full'
LADD_images = 'LADD/JPEGImages'
LADD_labels = 'LADD/labels'

os.makedirs(os.path.join(omsk_dataset_full, 'images'), exist_ok=True)
os.makedirs(os.path.join(omsk_dataset_full, 'labels'), exist_ok=True)
os.makedirs(os.path.join(omsk_dataset_full, 'test/images'), exist_ok=True)
os.makedirs(os.path.join(omsk_dataset_full, 'test/labels'), exist_ok=True)
os.makedirs(os.path.join(omsk_dataset_full, 'train/images'), exist_ok=True)
os.makedirs(os.path.join(omsk_dataset_full, 'train/labels'), exist_ok=True)

for index, row in df[df['count_region']>0].iterrows():
    copyfile(os.path.join(train_path_basedataset, row['ID_img']), 
             os.path.join(omsk_dataset_full, 'train/images', row['ID_img']))
    shape = row['region_shape']
    shape2yolo(shape, os.path.join(train_path_basedataset, row['ID_img']), 
               os.path.join(omsk_dataset_full, 'train/labels', row['ID_img'].split('.')[0]+'.txt'))

In [None]:
copytree('LADD/labels', os.path.join(omsk_dataset_full, 'labels'), dirs_exist_ok=True)
copytree('LADD/JPEGImages', os.path.join(omsk_dataset_full, 'images'), dirs_exist_ok=True)

In [None]:
images = os.listdir(LADD_images)
random.shuffle(images)

for image_name in images[:int(0.2*len(images))]:
    move(os.path.join(LADD_images, image_name), 
         os.path.join(omsk_dataset_full, 'test/images', image_name))
    if os.path.exists(os.path.join(LADD_labels, image_name.split('.')[0]+'.txt')):
        move(os.path.join(LADD_labels, image_name.split('.')[0]+'.txt'), 
             os.path.join(omsk_dataset_full, 'test/labels', image_name.split('.')[0]+'.txt'))
        
move(LADD_images, os.path.join(omsk_dataset_full, 'train'))
move(LADD_labels, os.path.join(omsk_dataset_full, 'train'))

# added empty images

In [237]:
empty_images = df[df['count_region']==0]['ID_img'].to_list()
random.shuffle(empty_images)

for image in empty_images[:350]:
    copyfile(os.path.join(train_path_basedataset, image), 
             os.path.join(omsk_dataset_full, 'train/images', image))
    
for image in empty_images[-350:]:
    copyfile(os.path.join(train_path_basedataset, image), 
             os.path.join(omsk_dataset_full, 'test/images', image))

In [238]:
# make train and test

# TRAIN

In [None]:
!git clone https://github.com/ultralytics/yolov5
cd "yolov5"
!pip install -r requirements.txt

In [246]:
!python train.py --img 1280 --batch -1 --epochs 100 --data "/home/jovyan/omsk_hack.yaml" --weights yolov5m6.pt --project "hackaton_omsk_find_people" --name "yolov5m6"

[34m[1mwandb[0m: Currently logged in as: [33mvictor30608[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mtrain: [0mweights=yolov5m6.pt, cfg=, data=/home/jovyan/hack/find_people/omsk.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=100, batch_size=-1, imgsz=1280, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=hackaton_omsk_find_people, name=yolov5m6, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0m⚠️ YOLOv5 is out of date by 44 commits. Use `git pull` or `git clone https://github.com/ultralytics/yolov5` to update.
YOLOv5 🚀 v6.2-195-gdf80e7c7 Python-3.9.7 torch-1.11.0+cu113 CUDA:0 (NVIDIA RTX A5000, 2425

In [251]:
!python detect.py --source "/home/jovyan/test" --weights "/home/jovyan/yolov5/hackaton_omsk_find_people/yolov5m6/weights/best.pt" --save-txt --save-conf --name "yolov5m6_people_test" --imgsz 1280 --exist-ok --conf-thres 0.2

[34m[1mdetect: [0mweights=['/home/jovyan/yolov5/hackaton_omsk_find_people/yolov5m63/weights/best.pt'], source=/home/jovyan/hack/find_people/test, data=data/coco128.yaml, imgsz=[1280, 1280], conf_thres=0.2, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=True, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=runs/detect, name=yolov5m6_people_test, exist_ok=True, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5 🚀 v6.2-195-gdf80e7c7 Python-3.9.7 torch-1.11.0+cu113 CUDA:0 (NVIDIA RTX A5000, 24256MiB)

Fusing layers... 
Model summary: 276 layers, 35248920 parameters, 0 gradients, 48.9 GFLOPs
image 1/3435 /home/jovyan/hack/find_people/test/1.JPG: 768x1280 (no detections), 15.4ms
image 2/3435 /home/jovyan/hack/find_people/test/10.JPG: 768x1280 (no detections), 11.9ms
image 3/3435 /home/jovyan/hack/find_people/test/100.JPG: 768x1280 (no detect

In [288]:
def read_txt(label_path, img_path, th=0.3):
    with open(os.path.join(label_path), 'r') as file:
        lines = file.readlines()
    h, w, _ = cv2.imread(img_path).shape
    lines = [line.rstrip().split(' ') for line in lines]
    result = []
    for line in lines:
        cl, xc, yc, w_, h_, t = list(map(float, line))
        xc*=w
        yc*=h
        w_*=w
        h_*=h
        if t>=th:
            result.append(f"{{\"cx\":{xc},\"cy\":{yc},\"r\":{max(w_,h_)}}}")
    return result

In [293]:
labels = '/home/jovyan/yolov5/runs/detect/yolov5m6_people_test/labels'
test_images_path = '/home/jovyan/hack/find_people/test'
res = {}
for file in tqdm(os.listdir(labels)):
    if os.path.exists(os.path.join(test_images_path, file.split('.')[0]+'.JPG')):
        predict =read_txt(os.path.join(labels, file), os.path.join(test_images_path, file.split('.')[0]+'.JPG'))
    else: 
        predict=read_txt(os.path.join(labels, file), os.path.join(test_images_path, file.split('.')[0]+'.jpg'))
    res[file.split('.')[0]] = predict

  0%|          | 0/106 [00:00<?, ?it/s]

In [294]:
df = pd.DataFrame(columns=['ID_img','region_shape'])

for file in tqdm(os.listdir(test_images_path)):
    if file.split('.')[0] not in res:
        res[file.split('.')[0]] = 0
    df = df.append({'ID_img':file,'region_shape':res[file.split('.')[0]]}, ignore_index=True)
    
df.to_csv(r'/home/jovyan/omsk_solution.csv', index=False)