In [1]:
import os
from PIL import Image, ImageFile
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from tqdm import tqdm

ImageFile.LOAD_TRUNCATED_IMAGES = True  # 이미지 파일이 손상되었을 때 에러 발생 방지

def get_imglist(dir="./sample/img"):
    imglist = [os.path.join(dir, f).replace("\\", "/") for f in os.listdir(dir) if f.endswith('.png')]
    return imglist
import os
import shutil

def create_temp_dir(base_dir='./temp_cropped_patches'):
    # 임시 폴더가 이미 존재하면 삭제하고 새로 생성
    if os.path.exists(base_dir):
        shutil.rmtree(base_dir)
    os.makedirs(base_dir)
    return base_dir

def save_cropped_patches_single_image(image_path, crop_size, save_dir):
    image_name = os.path.basename(image_path)
    print(f"Cropping image: {image_name}")
    
    image = Image.open(image_path).convert('RGB')
    image_width, image_height = image.size
    
    num_patches_x = (image_width + crop_size - 1) // crop_size
    num_patches_y = (image_height + crop_size - 1) // crop_size
    
    for i in tqdm(range(num_patches_x)):
        for j in range(num_patches_y):
            top_left_x = i * crop_size
            top_left_y = j * crop_size
            bottom_right_x = min(top_left_x + crop_size, image_width)
            bottom_right_y = min(top_left_y + crop_size, image_height)
            
            if bottom_right_x - top_left_x < crop_size:
                top_left_x = max(image_width - crop_size, 0)
                bottom_right_x = image_width
            if bottom_right_y - top_left_y < crop_size:
                top_left_y = max(image_height - crop_size, 0)
                bottom_right_y = image_height
            
            cropped_image = image.crop((top_left_x, top_left_y, bottom_right_x, bottom_right_y))
            
            
            
            crop_filename = f"{os.path.splitext(image_name)[0]}_{top_left_x}_{top_left_y}.png"
            crop_path = os.path.join(save_dir, crop_filename)
            cropped_image.save(crop_path)
    
    del image
    
class CroppedPatchDataset(Dataset):
    def __init__(self, crop_image_dir,resize_size):
        self.crop_image_paths = crop_image_dir
        self.transform = transforms.ToTensor()
        self.resize_size = resize_size
    def __len__(self):
        return len(self.crop_image_paths)
    
    def __getitem__(self, idx):
        crop_image_path = self.crop_image_paths[idx]
        image_name = os.path.basename(crop_image_path)
        
        cropped_image = Image.open(crop_image_path).convert('RGB')
        
        cropped_image = cropped_image.resize((self.resize_size,self.resize_size), resample=Image.Resampling.LANCZOS)
        # BGR로 변환
        cropped_image_np = np.array(cropped_image)
        cropped_image_bgr = cropped_image_np[:, :, ::-1]
        cropped_image_pil = Image.fromarray(cropped_image_bgr, 'RGB')
        
        cropped_image_tensor = self.transform(cropped_image_pil)
        
        
        name_parts = image_name.split('_')
        top_left_x = int(name_parts[-2])
        top_left_y = int(name_parts[-1].split('.')[0])
        position = torch.tensor([top_left_x, top_left_y])
        
        original_image_name = '_'.join(name_parts[:-2]) + '.png'
        
        return {
            'image_name': original_image_name,
            'image': cropped_image_tensor,
            'top_left_position': position
        }


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# from ultralytics import YOLO
# model = YOLO("yolo11s-obb.pt")
# preds = model.predict(images,conf=0.1, save=True)


0: 896x896 434.5ms
1: 896x896 (no detections), 434.5ms
2: 896x896 434.5ms
3: 896x896 (no detections), 434.5ms
4: 896x896 (no detections), 434.5ms
5: 896x896 (no detections), 434.5ms
6: 896x896 (no detections), 434.5ms
7: 896x896 (no detections), 434.5ms
8: 896x896 434.5ms
9: 896x896 (no detections), 434.5ms
10: 896x896 (no detections), 434.5ms
11: 896x896 (no detections), 434.5ms
12: 896x896 (no detections), 434.5ms
13: 896x896 (no detections), 434.5ms
14: 896x896 (no detections), 434.5ms
15: 896x896 434.5ms
16: 896x896 (no detections), 434.5ms
17: 896x896 (no detections), 434.5ms
18: 896x896 (no detections), 434.5ms
19: 896x896 434.5ms
20: 896x896 (no detections), 434.5ms
21: 896x896 (no detections), 434.5ms
22: 896x896 (no detections), 434.5ms
23: 896x896 (no detections), 434.5ms
24: 896x896 (no detections), 434.5ms
25: 896x896 (no detections), 434.5ms
26: 896x896 (no detections), 434.5ms
27: 896x896 434.5ms
28: 896x896 (no detections), 434.5ms
29: 896x896 (no detections), 434.5ms
3

In [8]:
# preds[27].obb.xywhr

tensor([[323.2016, 157.5453, 411.3317, 170.7461,   1.8658]])

In [9]:
# preds[27].obb.conf[0]

tensor([0.1042])

In [3]:
def run_model(image_names, images, positions, model, result):
    scale_factor = 0.25  # 예측 스케일과 패치 스케일의 비율: 256 / 1024

    preds = model.predict(images, conf=0.1, save=False)  # 예측 결과: [batch_size]

    for img_name, pred, pos in zip(image_names, preds, positions):
        if img_name not in result:
            result[img_name] = []

        top_left_x, top_left_y = pos[0].item(), pos[1].item()

        # 좌표 변환 및 conf 값 포함하여 저장
        for i in range(len(pred)):
            bbox = pred.obb.xywhr[i]  # [x, y, w, h, r]
            confidence = pred.obb.conf[i]  # confidence 값
            
            # 예측된 좌표를 원본 이미지 좌표로 변환
            x = bbox[0] * scale_factor + top_left_x
            y = bbox[1] * scale_factor + top_left_y
            w = bbox[2] * scale_factor
            h = bbox[3] * scale_factor
            r = bbox[4]  # 각도 값은 변환 불필요

            # 변환된 좌표와 confidence를 결과 리스트에 추가
            result[img_name].append({
                'xywhr': [x, y, w, h, r],
                'conf': confidence.item()
            })


In [None]:
from ultralytics import YOLO
import torch
# 사용 예시
#directory_path = '/workspace/dataset/'
directory_path = './'
base_dir='./temp_cropped_patches'

img_list = get_imglist(directory_path)

# color_stats_file = './train_color_stats.npz'

####################
model_name = "./yolo11s-obb.pt"
####################
# 크롭 사이즈 설정
crop_size = 256
batch_size = 32
model = YOLO(model_name)


result = {}  # 결과를 저장할 딕셔너리
for image_path in img_list:
    # 임시 폴더 생성
    temp_dir = create_temp_dir(base_dir=base_dir)
    image_name = os.path.basename(image_path)
    print(f"Processing image: {image_name}")
    # 이미지 크롭 및 임시 폴더에 저장
    save_cropped_patches_single_image(image_path, crop_size, temp_dir)
    
    temp_list = get_imglist(temp_dir)
    # # 데이터셋 및 DataLoader 설정
    dataset = CroppedPatchDataset(temp_list,resize_size=crop_size*4)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    result[image_name] = []
    for batch in tqdm(dataloader):
            images = batch['image']
            positions = batch['top_left_position']
            image_names = batch['image_name']
            
            # 좌표 변환을 포함한 모델 실행
            run_model(image_names, images, positions, model, result)


    # 원본 이미지별로 결과를 후처리하거나 저장
    per_image_result = result[image_name]
    
    # 임시 폴더 삭제
    shutil.rmtree(temp_dir)


Processing image: task_smaple.png
Cropping image: task_smaple.png


100%|██████████| 43/43 [00:35<00:00,  1.22it/s]
  0%|          | 0/58 [00:00<?, ?it/s]


0: 1024x1024 549.3ms
1: 1024x1024 549.3ms
2: 1024x1024 (no detections), 549.3ms
3: 1024x1024 549.3ms
4: 1024x1024 (no detections), 549.3ms
5: 1024x1024 (no detections), 549.3ms
6: 1024x1024 (no detections), 549.3ms
7: 1024x1024 549.3ms
8: 1024x1024 549.3ms
9: 1024x1024 549.3ms
10: 1024x1024 (no detections), 549.3ms
11: 1024x1024 (no detections), 549.3ms
12: 1024x1024 (no detections), 549.3ms
13: 1024x1024 (no detections), 549.3ms
14: 1024x1024 549.3ms
15: 1024x1024 (no detections), 549.3ms
16: 1024x1024 (no detections), 549.3ms
17: 1024x1024 (no detections), 549.3ms
18: 1024x1024 (no detections), 549.3ms
19: 1024x1024 (no detections), 549.3ms
20: 1024x1024 (no detections), 549.3ms
21: 1024x1024 549.3ms
22: 1024x1024 (no detections), 549.3ms
23: 1024x1024 (no detections), 549.3ms
24: 1024x1024 (no detections), 549.3ms
25: 1024x1024 549.3ms
26: 1024x1024 (no detections), 549.3ms
27: 1024x1024 (no detections), 549.3ms
28: 1024x1024 (no detections), 549.3ms
29: 1024x1024 549.3ms
30: 1024x

  2%|▏         | 1/58 [00:18<17:41, 18.62s/it]


0: 1024x1024 (no detections), 591.2ms
1: 1024x1024 591.2ms
2: 1024x1024 (no detections), 591.2ms
3: 1024x1024 591.2ms
4: 1024x1024 (no detections), 591.2ms
5: 1024x1024 (no detections), 591.2ms
6: 1024x1024 (no detections), 591.2ms
7: 1024x1024 (no detections), 591.2ms
8: 1024x1024 (no detections), 591.2ms
9: 1024x1024 (no detections), 591.2ms
10: 1024x1024 (no detections), 591.2ms
11: 1024x1024 591.2ms
12: 1024x1024 (no detections), 591.2ms
13: 1024x1024 (no detections), 591.2ms
14: 1024x1024 591.2ms
15: 1024x1024 (no detections), 591.2ms
16: 1024x1024 591.2ms
17: 1024x1024 (no detections), 591.2ms
18: 1024x1024 (no detections), 591.2ms
19: 1024x1024 (no detections), 591.2ms
20: 1024x1024 (no detections), 591.2ms
21: 1024x1024 (no detections), 591.2ms
22: 1024x1024 (no detections), 591.2ms
23: 1024x1024 591.2ms
24: 1024x1024 (no detections), 591.2ms
25: 1024x1024 (no detections), 591.2ms
26: 1024x1024 591.2ms
27: 1024x1024 591.2ms
28: 1024x1024 (no detections), 591.2ms
29: 1024x1024 

  3%|▎         | 2/58 [00:38<18:11, 19.48s/it]


0: 1024x1024 (no detections), 571.8ms
1: 1024x1024 571.8ms
2: 1024x1024 (no detections), 571.8ms
3: 1024x1024 (no detections), 571.8ms
4: 1024x1024 571.8ms
5: 1024x1024 (no detections), 571.8ms
6: 1024x1024 (no detections), 571.8ms
7: 1024x1024 (no detections), 571.8ms
8: 1024x1024 (no detections), 571.8ms
9: 1024x1024 (no detections), 571.8ms
10: 1024x1024 (no detections), 571.8ms
11: 1024x1024 (no detections), 571.8ms
12: 1024x1024 571.8ms
13: 1024x1024 (no detections), 571.8ms
14: 1024x1024 (no detections), 571.8ms
15: 1024x1024 (no detections), 571.8ms
16: 1024x1024 (no detections), 571.8ms
17: 1024x1024 (no detections), 571.8ms
18: 1024x1024 571.8ms
19: 1024x1024 (no detections), 571.8ms
20: 1024x1024 (no detections), 571.8ms
21: 1024x1024 (no detections), 571.8ms
22: 1024x1024 571.8ms
23: 1024x1024 (no detections), 571.8ms
24: 1024x1024 571.8ms
25: 1024x1024 571.8ms
26: 1024x1024 (no detections), 571.8ms
27: 1024x1024 (no detections), 571.8ms
28: 1024x1024 (no detections), 571.8

  5%|▌         | 3/58 [00:58<17:49, 19.44s/it]




In [None]:
result

result = 
{
    'image_name.png': [
        {'xywhr': [x1, y1, w1, h1, r1], 'conf': conf1},
        {'xywhr': [x2, y2, w2, h2, r2], 'conf': conf2},
        # ...
    ],
    # 다른 이미지들...
}

In [None]:


# YOLO 모델을 사용한 예측 함수
def run_yolo_on_images(dataloader, model, device):
    results = []  # 예측 결과를 저장할 리스트
    
    for batch in dataloader:
        for sub_batch in batch:  # 각 sub_batch에 대해 처리
            images = sub_batch['images'].to(device)  # 이미지를 device로 전송
            top_left_positions = sub_batch['top_left_positions'].to(device)  # 좌상단 좌표도 device로 전송
            
            # YOLO 모델 예측 수행
            preds = model.predict(images,conf=0.1,save=True)  # Ultralytics YOLO 모델의 predict 함수 사용
            
            obb = [ pred.obb.xywhr for pred in preds]
            conf = [ pred.obb.conf for pred in preds]
            results.append({
                'image_names': sub_batch['image_names'],
                'obb':obb,
                'conf':conf,
                'top_left_positions': top_left_positions
                
            })
            print(f"Processed {len(sub_batch['image_names'])} images with predictions.")
    
    return results

# 모델을 사용한 예측 수행
predictions = []
predictions += run_yolo_on_images(dataloader, model, device)
print("예측 완료")
print(len(predictions))

In [6]:
import csv
import torch
from torchvision.ops import nms
import math
import datetime
from ultralytics.utils.ops import nms_rotated

# END: Add timestamp to the CSV filename
# 파일 저장할 CSV 경로
csv_file = "./submission.csv"

# 데이터 샘플 (image_name, cx, cy, width, height, angle 등)
data = []


# NMS 임계값 (IoU 임계값)
nms_threshold = 0.7

# 'predictions' 리스트에 있는 각 배치에서 데이터를 추출
for i in range(len(predictions)):  # predictions 리스트에서 하나씩 꺼냄
    image_name = predictions[i]['image_names']  # 각 배치의 이미지 이름 리스트
    obb_list = predictions[i]['obb']  # 각 배치의 obb 리스트
    top_left_pos_list = predictions[i]['top_left_positions']  # 각 배치의 top_left_positions 리스트
    conf_list = predictions[i]['conf']  # 각 배치의 conf 리스트
    # 각 배치에서 이미지별로 순회
    for j in range(len(obb_list)):
        obb_tensor = obb_list[j]
        top_left_pos = top_left_pos_list[j]
        conf = conf_list[j]
        
        # obb_tensor가 비어있지 않은 경우에만 처리
        if len(obb_tensor) > 0:
            # NMS 처리를 위한 준비
            boxes = []
            scores = [conf[k].item() for k in range(len(conf))]
            
            for k in range(len(obb_tensor)):
                cx = obb_tensor[k][0].item()/2 + top_left_pos[0].item()
                cy = obb_tensor[k][1].item()/2 + top_left_pos[1].item()
                width = obb_tensor[k][2].item()/2
                height = obb_tensor[k][3].item()/2
                angle = obb_tensor[k][4].item()

                # 사각형 좌표로 변환 (cx, cy, width, height -> x1, y1, x2, y2)
                x1 = cx - width / 2
                y1 = cy - height / 2
                x2 = cx + width / 2
                y2 = cy + height / 2

                # 박스와 점수 추가
                boxes.append([x1, y1, x2, y2])
                

                # NMS 수행
                boxes_tensor = torch.tensor(boxes, dtype=torch.float32)
                scores_tensor = torch.tensor(scores, dtype=torch.float32)
                nms_indices = nms(boxes_tensor, scores_tensor, nms_threshold)
                print("nms indices 완료.")
                print(len(nms_indices))
                
                # NMS 후 남은 객체들에 대해 각도 변환 및 데이터 추가
            for idx in nms_indices:
                cx = obb_tensor[idx][0].item()/2 + top_left_pos[0].item()
                cy = obb_tensor[idx][1].item()/2 + top_left_pos[1].item()
                width = obb_tensor[idx][2].item()/2
                height = obb_tensor[idx][3].item()/2
                angle = obb_tensor[idx][4].item()
                
                # 라디안을 도 단위로 변환
                angle_deg = math.degrees(angle)
                
                # 각도를 0~360도 범위로 변환
                if angle_deg < 0:
                    angle_deg += 360

                # 데이터 추가
                data.append([image_name[j], cx, cy, width, height, angle_deg])
# CSV 파일로 저장
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # CSV의 헤더 작성
    writer.writerow(['image_name', 'cx', 'cy', 'width', 'height', 'angle'])
    
    # 각 행을 작성
    writer.writerows(data)

print(f"CSV 파일 '{csv_file}'이(가) 성공적으로 생성되었습니다.")


CSV 파일 './submission.csv'이(가) 성공적으로 생성되었습니다.
