In [None]:
!pip install -U -q ultralytics
!pip install -U -q aifactory
!pip install -q tqdm

In [1]:
import os
from PIL import Image, ImageFile
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np
from tqdm import tqdm
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True' 
ImageFile.LOAD_TRUNCATED_IMAGES = True  # 이미지 파일이 손상되었을 때 에러 발생 방지

def get_imglist(dir="/workspace/dataset/"):
    imglist = [os.path.join(dir, f).replace("\\", "/") for f in os.listdir(dir) if f.endswith('.png')]
    return imglist
import os
import shutil

def create_temp_dir(base_dir='./temp_cropped_patches'):
    # 임시 폴더가 이미 존재하면 삭제하고 새로 생성
    if os.path.exists(base_dir):
        shutil.rmtree(base_dir)
    os.makedirs(base_dir)
    return base_dir

def save_cropped_patches_single_image(image_path, crop_size, save_dir):
    image_name = os.path.basename(image_path)
    print(f"Cropping image: {image_name}")
    
    image = Image.open(image_path).convert('RGB')
    image_width, image_height = image.size
    
    num_patches_x = (image_width + crop_size - 1) // crop_size
    num_patches_y = (image_height + crop_size - 1) // crop_size
    
    for i in tqdm(range(num_patches_x)):
        for j in range(num_patches_y):
            top_left_x = i * crop_size
            top_left_y = j * crop_size
            bottom_right_x = min(top_left_x + crop_size, image_width)
            bottom_right_y = min(top_left_y + crop_size, image_height)
            
            if bottom_right_x - top_left_x < crop_size:
                top_left_x = max(image_width - crop_size, 0)
                bottom_right_x = image_width
            if bottom_right_y - top_left_y < crop_size:
                top_left_y = max(image_height - crop_size, 0)
                bottom_right_y = image_height
            
            cropped_image = image.crop((top_left_x, top_left_y, bottom_right_x, bottom_right_y))
            
            
            
            crop_filename = f"{os.path.splitext(image_name)[0]}_{top_left_x}_{top_left_y}.png"
            crop_path = os.path.join(save_dir, crop_filename)
            cropped_image.save(crop_path)
    
    del image
    
class CroppedPatchDataset(Dataset):
    def __init__(self, crop_image_dir,resize_size):
        self.crop_image_paths = crop_image_dir
        self.transform = transforms.ToTensor()
        self.resize_size = resize_size
    def __len__(self):
        return len(self.crop_image_paths)
    
    def __getitem__(self, idx):
        crop_image_path = self.crop_image_paths[idx]
        image_name = os.path.basename(crop_image_path)
        
        cropped_image = Image.open(crop_image_path).convert('RGB')
        
        cropped_image = cropped_image.resize((self.resize_size,self.resize_size), resample=Image.Resampling.LANCZOS)
        # BGR로 변환
        cropped_image_np = np.array(cropped_image)
        cropped_image_bgr = cropped_image_np[:, :, ::-1]
        cropped_image_pil = Image.fromarray(cropped_image_bgr, 'RGB')
        
        cropped_image_tensor = self.transform(cropped_image_pil)
        
        
        name_parts = image_name.split('_')
        top_left_x = int(name_parts[-2])
        top_left_y = int(name_parts[-1].split('.')[0])
        position = torch.tensor([top_left_x, top_left_y])
        
        original_image_name = '_'.join(name_parts[:-2]) + '.png'
        
        return {
            'image_name': original_image_name,
            'image': cropped_image_tensor,
            'top_left_position': position
        }


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def run_model(image_names, images, positions, model,scale_factor, result):
    scale_factor =scale_factor # 예측 스케일과 패치 스케일의 비율: 256 / 1024
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    preds = model.predict(images, conf=0.15, save=False,device= device)  # 예측 결과: [batch_size]

    for img_name, pred, pos in zip(image_names, preds, positions):
        if img_name not in result:
            result[img_name] = []

        top_left_x, top_left_y = pos[0].item(), pos[1].item()

        # 좌표 변환 및 conf 값 포함하여 저장
        for i in range(len(pred)):
            bbox = pred.obb.xywhr[i]  # [x, y, w, h, r]
            confidence = pred.obb.conf[i]  # confidence 값
            
            # bbox가 비어 있는 경우 해당 항목을 넘김
            if len(bbox) == 0:
            # bbox가 비어있음
                continue

            # 예측된 좌표를 원본 이미지 좌표로 변환
            x = bbox[0].item() * scale_factor + top_left_x
            y = bbox[1].item() * scale_factor + top_left_y
            w = bbox[2].item() * scale_factor
            h = bbox[3].item() * scale_factor
            r = bbox[4].item()  # 각도 값은 변환 불필요

            # 변환된 좌표와 confidence를 결과 리스트에 추가
            result[img_name].append({
                'xywhr': [x, y, w, h, r],
                'conf': confidence.item()
            })


In [None]:
import os
import torch
from ultralytics import YOLO
from ultralytics.utils.ops import nms_rotated
from torch.utils.data import DataLoader

# 데이터 경로 설정
directory_path = '/workspace/dataset/'
directory_path = './'
base_dir = './temp_cropped_patches'
img_list = get_imglist(directory_path)

# 모델 설정
model_name = "./yolo11n-obb.pt"
crop_size = 512
resize_size = 1024
batch_size = 32
model = YOLO(model_name)

result = {}  # 최종 결과를 저장할 딕셔너리

length = len(img_list)
divide_num = 2  # 원하는 분할 수로 설정
img_list_chunks = [img_list[i:i + length // divide_num] for i in range(0, length, length // divide_num)]

for chunk_idx, img_chunk in enumerate(img_list_chunks):
    # 임시 폴더 생성
    temp_dir = create_temp_dir(base_dir=base_dir)
    print(f"Processing chunk {chunk_idx + 1}/{len(img_list_chunks)}")

    # 현재 청크의 이미지별 결과 리스트 초기화
    for image_path in img_chunk:
        image_name = os.path.basename(image_path)
        result[image_name] = []

    # 모든 이미지를 크롭하여 임시 폴더에 저장
    for image_path in img_chunk:
        image_name = os.path.basename(image_path)
        print(f"Cropping image: {image_name}")
        # 패치 저장 시 일관된 이름 지정
        save_cropped_patches_single_image(image_path, crop_size, temp_dir)

    # 임시 폴더 내의 모든 패치 이미지 리스트 가져오기
    temp_list = get_imglist(temp_dir)

    # 데이터셋 및 DataLoader 설정
    dataset = CroppedPatchDataset(temp_list, resize_size=resize_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)  # 필요에 따라 num_workers 조정

    for batch in dataloader:
        images = batch['image']
        positions = batch['top_left_position']
        patch_image_names = batch['image_name']

        # 패치 이미지 이름에서 원본 이미지 이름과 위치 추출
        original_image_names = []
        for patch_name in patch_image_names:
            # 패치 이미지 이름이 'original_image_x_y.png' 형식이라고 가정
            base_name = os.path.splitext(patch_name)[0]
            parts = base_name.split('_')
            original_image_name = '_'.join(parts[:-2]) + '.png'  # 확장자는 필요에 따라 조정
            original_image_names.append(original_image_name)

        # 모델 실행 및 결과 저장
        run_model(original_image_names, images, positions, model, crop_size / resize_size, result)

    # 현재 청크의 모든 이미지에 대해 NMS 적용
    for image_path in img_chunk:
        image_name = os.path.basename(image_path)
        per_image_result = result[image_name]
        if per_image_result:
            # boxes와 scores 추출
            boxes = torch.tensor([pred['xywhr'] for pred in per_image_result])
            scores = torch.tensor([pred['conf'] for pred in per_image_result])

            # NMS 적용
            keep_indices = nms_rotated(boxes, scores, threshold=0.45)

            # NMS 결과를 최종적으로 업데이트
            result[image_name] = [per_image_result[i] for i in keep_indices]

    # 쿠다 캐시 제거 및 불필요한 메모리 제거
    torch.cuda.empty_cache()
    del dataset
    del dataloader
    # 임시 폴더 삭제
    shutil.rmtree(temp_dir)


Processing chunk 1/2
Cropping image: task_smaple.png
Cropping image: task_smaple.png




In [None]:
result

In [4]:
from ultralytics import YOLO
import torch
import shutil
from tqdm import tqdm
from ultralytics.utils.ops import nms_rotated

directory_path = '/workspace/dataset/'
# directory_path = './'
base_dir = './temp_cropped_patches'
img_list = get_imglist(directory_path)
# 모델 설정
model_name = "./l_s1024_bgr05_scale084_2.pt"
crop_size = 512
resize_size = 1024
batch_size = 32
model = YOLO(model_name)

result = {}  # 최종 결과를 저장할 딕셔너리

for image_path in img_list:
    # 임시 폴더 생성
    temp_dir = create_temp_dir(base_dir=base_dir)
    image_name = os.path.basename(image_path)
    print(f"Processing image: {image_name}")
    
    # 이미지 크롭 및 임시 폴더에 저장
    save_cropped_patches_single_image(image_path, crop_size, temp_dir)
    
    temp_list = get_imglist(temp_dir)
    # 데이터셋 및 DataLoader 설정
    dataset = CroppedPatchDataset(temp_list, resize_size=resize_size)
    dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=8)
    
    # 원본 이미지별 결과 리스트 초기화
    result[image_name] = []
    
    for batch in tqdm(dataloader):
        images = batch['image']
        positions = batch['top_left_position']
        image_names = batch['image_name']
        
        # 모델 실행 및 결과 저장
        run_model(image_names, images, positions, model, crop_size/resize_size, result)
    
    # 현재 이미지의 모든 패치 예측을 NMS 적용
    per_image_result = result[image_name]
    if per_image_result:
        # boxes와 scores 추출
        boxes = torch.tensor([pred['xywhr'] for pred in per_image_result])
        scores = torch.tensor([pred['conf'] for pred in per_image_result])
        
        # NMS 적용
        keep_indices = nms_rotated(boxes, scores, threshold=0.45)
        
        # NMS 결과를 최종적으로 업데이트
        result[image_name] = [per_image_result[i] for i in keep_indices]

    # 쿠다 캐시 제거 및 불필요한 메모리 제거
    torch.cuda.empty_cache()
    del dataset
    del dataloader
    # 임시 폴더 삭제
    shutil.rmtree(temp_dir)


Processing image: task_smaple.png
Cropping image: task_smaple.png


100%|██████████| 22/22 [00:34<00:00,  1.58s/it]
  return t.to(



0: 1024x1024 (no detections), 23.8ms
1: 1024x1024 (no detections), 23.8ms
2: 1024x1024 23.8ms
3: 1024x1024 23.8ms
4: 1024x1024 23.8ms
5: 1024x1024 (no detections), 23.8ms
6: 1024x1024 (no detections), 23.8ms
7: 1024x1024 (no detections), 23.8ms
8: 1024x1024 (no detections), 23.8ms
9: 1024x1024 (no detections), 23.8ms
10: 1024x1024 (no detections), 23.8ms
11: 1024x1024 (no detections), 23.8ms
12: 1024x1024 (no detections), 23.8ms
13: 1024x1024 (no detections), 23.8ms
14: 1024x1024 23.8ms
15: 1024x1024 (no detections), 23.8ms
16: 1024x1024 (no detections), 23.8ms
17: 1024x1024 (no detections), 23.8ms
18: 1024x1024 (no detections), 23.8ms
19: 1024x1024 23.8ms
20: 1024x1024 23.8ms
21: 1024x1024 23.8ms
22: 1024x1024 23.8ms
23: 1024x1024 23.8ms
Speed: 1.7ms preprocess, 23.8ms inference, 18.5ms postprocess per image at shape (1, 3, 1024, 1024)


  5%|▍         | 1/21 [00:02<00:54,  2.73s/it]


0: 1024x1024 (no detections), 21.0ms
1: 1024x1024 21.0ms
2: 1024x1024 21.0ms
3: 1024x1024 21.0ms
4: 1024x1024 (no detections), 21.0ms
5: 1024x1024 (no detections), 21.0ms
6: 1024x1024 (no detections), 21.0ms
7: 1024x1024 21.0ms
8: 1024x1024 (no detections), 21.0ms
9: 1024x1024 21.0ms
10: 1024x1024 (no detections), 21.0ms
11: 1024x1024 (no detections), 21.0ms
12: 1024x1024 (no detections), 21.0ms
13: 1024x1024 (no detections), 21.0ms
14: 1024x1024 (no detections), 21.0ms
15: 1024x1024 (no detections), 21.0ms
16: 1024x1024 (no detections), 21.0ms
17: 1024x1024 21.0ms
18: 1024x1024 21.0ms
19: 1024x1024 (no detections), 21.0ms
20: 1024x1024 (no detections), 21.0ms
21: 1024x1024 (no detections), 21.0ms
22: 1024x1024 (no detections), 21.0ms
23: 1024x1024 (no detections), 21.0ms
Speed: 1.5ms preprocess, 21.0ms inference, 12.5ms postprocess per image at shape (1, 3, 1024, 1024)


 10%|▉         | 2/21 [00:04<00:42,  2.24s/it]


0: 1024x1024 (no detections), 21.1ms
1: 1024x1024 (no detections), 21.1ms
2: 1024x1024 (no detections), 21.1ms
3: 1024x1024 (no detections), 21.1ms
4: 1024x1024 (no detections), 21.1ms
5: 1024x1024 (no detections), 21.1ms
6: 1024x1024 (no detections), 21.1ms
7: 1024x1024 (no detections), 21.1ms
8: 1024x1024 (no detections), 21.1ms
9: 1024x1024 (no detections), 21.1ms
10: 1024x1024 (no detections), 21.1ms
11: 1024x1024 (no detections), 21.1ms
12: 1024x1024 (no detections), 21.1ms
13: 1024x1024 (no detections), 21.1ms
14: 1024x1024 21.1ms
15: 1024x1024 21.1ms
16: 1024x1024 21.1ms
17: 1024x1024 21.1ms
18: 1024x1024 21.1ms
19: 1024x1024 21.1ms
20: 1024x1024 (no detections), 21.1ms
21: 1024x1024 21.1ms
22: 1024x1024 21.1ms
23: 1024x1024 (no detections), 21.1ms
Speed: 1.5ms preprocess, 21.1ms inference, 13.5ms postprocess per image at shape (1, 3, 1024, 1024)


 14%|█▍        | 3/21 [00:06<00:37,  2.08s/it]


0: 1024x1024 (no detections), 21.1ms
1: 1024x1024 (no detections), 21.1ms
2: 1024x1024 (no detections), 21.1ms
3: 1024x1024 21.1ms
4: 1024x1024 (no detections), 21.1ms
5: 1024x1024 21.1ms
6: 1024x1024 (no detections), 21.1ms
7: 1024x1024 (no detections), 21.1ms
8: 1024x1024 (no detections), 21.1ms
9: 1024x1024 (no detections), 21.1ms
10: 1024x1024 (no detections), 21.1ms
11: 1024x1024 (no detections), 21.1ms
12: 1024x1024 21.1ms
13: 1024x1024 (no detections), 21.1ms
14: 1024x1024 21.1ms
15: 1024x1024 (no detections), 21.1ms
16: 1024x1024 (no detections), 21.1ms
17: 1024x1024 (no detections), 21.1ms
18: 1024x1024 21.1ms
19: 1024x1024 (no detections), 21.1ms
20: 1024x1024 (no detections), 21.1ms
21: 1024x1024 (no detections), 21.1ms
22: 1024x1024 (no detections), 21.1ms
23: 1024x1024 (no detections), 21.1ms
Speed: 1.5ms preprocess, 21.1ms inference, 13.2ms postprocess per image at shape (1, 3, 1024, 1024)


 19%|█▉        | 4/21 [00:08<00:33,  2.00s/it]


0: 1024x1024 (no detections), 20.9ms
1: 1024x1024 (no detections), 20.9ms
2: 1024x1024 (no detections), 20.9ms
3: 1024x1024 (no detections), 20.9ms
4: 1024x1024 (no detections), 20.9ms
5: 1024x1024 20.9ms
6: 1024x1024 (no detections), 20.9ms
7: 1024x1024 (no detections), 20.9ms
8: 1024x1024 (no detections), 20.9ms
9: 1024x1024 (no detections), 20.9ms
10: 1024x1024 (no detections), 20.9ms
11: 1024x1024 (no detections), 20.9ms
12: 1024x1024 20.9ms
13: 1024x1024 (no detections), 20.9ms
14: 1024x1024 (no detections), 20.9ms
15: 1024x1024 (no detections), 20.9ms
16: 1024x1024 (no detections), 20.9ms
17: 1024x1024 (no detections), 20.9ms
18: 1024x1024 (no detections), 20.9ms
19: 1024x1024 (no detections), 20.9ms
20: 1024x1024 20.9ms
21: 1024x1024 (no detections), 20.9ms
22: 1024x1024 (no detections), 20.9ms
23: 1024x1024 (no detections), 20.9ms
Speed: 2.2ms preprocess, 20.9ms inference, 12.9ms postprocess per image at shape (1, 3, 1024, 1024)


 24%|██▍       | 5/21 [00:10<00:31,  1.95s/it]


0: 1024x1024 (no detections), 21.1ms
1: 1024x1024 (no detections), 21.1ms
2: 1024x1024 (no detections), 21.1ms
3: 1024x1024 (no detections), 21.1ms
4: 1024x1024 21.1ms
5: 1024x1024 21.1ms
6: 1024x1024 (no detections), 21.1ms
7: 1024x1024 (no detections), 21.1ms
8: 1024x1024 21.1ms
9: 1024x1024 (no detections), 21.1ms
10: 1024x1024 (no detections), 21.1ms
11: 1024x1024 (no detections), 21.1ms
12: 1024x1024 21.1ms
13: 1024x1024 (no detections), 21.1ms
14: 1024x1024 (no detections), 21.1ms
15: 1024x1024 (no detections), 21.1ms
16: 1024x1024 (no detections), 21.1ms
17: 1024x1024 (no detections), 21.1ms
18: 1024x1024 (no detections), 21.1ms
19: 1024x1024 (no detections), 21.1ms
20: 1024x1024 (no detections), 21.1ms
21: 1024x1024 21.1ms
22: 1024x1024 (no detections), 21.1ms
23: 1024x1024 (no detections), 21.1ms
Speed: 1.5ms preprocess, 21.1ms inference, 12.6ms postprocess per image at shape (1, 3, 1024, 1024)


 29%|██▊       | 6/21 [00:12<00:28,  1.92s/it]


0: 1024x1024 20.9ms
1: 1024x1024 20.9ms
2: 1024x1024 20.9ms
3: 1024x1024 20.9ms
4: 1024x1024 20.9ms
5: 1024x1024 20.9ms
6: 1024x1024 20.9ms
7: 1024x1024 (no detections), 20.9ms
8: 1024x1024 (no detections), 20.9ms
9: 1024x1024 (no detections), 20.9ms
10: 1024x1024 (no detections), 20.9ms
11: 1024x1024 (no detections), 20.9ms
12: 1024x1024 (no detections), 20.9ms
13: 1024x1024 (no detections), 20.9ms
14: 1024x1024 (no detections), 20.9ms
15: 1024x1024 (no detections), 20.9ms
16: 1024x1024 (no detections), 20.9ms
17: 1024x1024 (no detections), 20.9ms
18: 1024x1024 (no detections), 20.9ms
19: 1024x1024 (no detections), 20.9ms
20: 1024x1024 (no detections), 20.9ms
21: 1024x1024 (no detections), 20.9ms
22: 1024x1024 20.9ms
23: 1024x1024 20.9ms
Speed: 1.7ms preprocess, 20.9ms inference, 13.7ms postprocess per image at shape (1, 3, 1024, 1024)


 33%|███▎      | 7/21 [00:14<00:26,  1.92s/it]


0: 1024x1024 (no detections), 21.1ms
1: 1024x1024 (no detections), 21.1ms
2: 1024x1024 21.1ms
3: 1024x1024 (no detections), 21.1ms
4: 1024x1024 (no detections), 21.1ms
5: 1024x1024 (no detections), 21.1ms
6: 1024x1024 (no detections), 21.1ms
7: 1024x1024 (no detections), 21.1ms
8: 1024x1024 (no detections), 21.1ms
9: 1024x1024 (no detections), 21.1ms
10: 1024x1024 (no detections), 21.1ms
11: 1024x1024 (no detections), 21.1ms
12: 1024x1024 21.1ms
13: 1024x1024 21.1ms
14: 1024x1024 (no detections), 21.1ms
15: 1024x1024 (no detections), 21.1ms
16: 1024x1024 21.1ms
17: 1024x1024 (no detections), 21.1ms
18: 1024x1024 (no detections), 21.1ms
19: 1024x1024 (no detections), 21.1ms
20: 1024x1024 (no detections), 21.1ms
21: 1024x1024 (no detections), 21.1ms
22: 1024x1024 21.1ms
23: 1024x1024 (no detections), 21.1ms
Speed: 1.5ms preprocess, 21.1ms inference, 12.9ms postprocess per image at shape (1, 3, 1024, 1024)


 38%|███▊      | 8/21 [00:15<00:24,  1.90s/it]


0: 1024x1024 (no detections), 21.0ms
1: 1024x1024 21.0ms
2: 1024x1024 (no detections), 21.0ms
3: 1024x1024 (no detections), 21.0ms
4: 1024x1024 (no detections), 21.0ms
5: 1024x1024 (no detections), 21.0ms
6: 1024x1024 (no detections), 21.0ms
7: 1024x1024 21.0ms
8: 1024x1024 (no detections), 21.0ms
9: 1024x1024 (no detections), 21.0ms
10: 1024x1024 21.0ms
11: 1024x1024 21.0ms
12: 1024x1024 21.0ms
13: 1024x1024 21.0ms
14: 1024x1024 (no detections), 21.0ms
15: 1024x1024 (no detections), 21.0ms
16: 1024x1024 (no detections), 21.0ms
17: 1024x1024 (no detections), 21.0ms
18: 1024x1024 (no detections), 21.0ms
19: 1024x1024 (no detections), 21.0ms
20: 1024x1024 21.0ms
21: 1024x1024 21.0ms
22: 1024x1024 21.0ms
23: 1024x1024 21.0ms
Speed: 1.9ms preprocess, 21.0ms inference, 14.1ms postprocess per image at shape (1, 3, 1024, 1024)


 43%|████▎     | 9/21 [00:17<00:23,  1.92s/it]


0: 1024x1024 (no detections), 21.5ms
1: 1024x1024 (no detections), 21.5ms
2: 1024x1024 (no detections), 21.5ms
3: 1024x1024 (no detections), 21.5ms
4: 1024x1024 21.5ms
5: 1024x1024 21.5ms
6: 1024x1024 (no detections), 21.5ms
7: 1024x1024 (no detections), 21.5ms
8: 1024x1024 21.5ms
9: 1024x1024 21.5ms
10: 1024x1024 21.5ms
11: 1024x1024 (no detections), 21.5ms
12: 1024x1024 21.5ms
13: 1024x1024 (no detections), 21.5ms
14: 1024x1024 (no detections), 21.5ms
15: 1024x1024 21.5ms
16: 1024x1024 (no detections), 21.5ms
17: 1024x1024 (no detections), 21.5ms
18: 1024x1024 21.5ms
19: 1024x1024 21.5ms
20: 1024x1024 21.5ms
21: 1024x1024 21.5ms
22: 1024x1024 (no detections), 21.5ms
23: 1024x1024 (no detections), 21.5ms
Speed: 2.6ms preprocess, 21.5ms inference, 13.9ms postprocess per image at shape (1, 3, 1024, 1024)


 48%|████▊     | 10/21 [00:19<00:21,  1.93s/it]


0: 1024x1024 (no detections), 21.6ms
1: 1024x1024 (no detections), 21.6ms
2: 1024x1024 (no detections), 21.6ms
3: 1024x1024 21.6ms
4: 1024x1024 21.6ms
5: 1024x1024 21.6ms
6: 1024x1024 21.6ms
7: 1024x1024 21.6ms
8: 1024x1024 21.6ms
9: 1024x1024 21.6ms
10: 1024x1024 21.6ms
11: 1024x1024 21.6ms
12: 1024x1024 21.6ms
13: 1024x1024 21.6ms
14: 1024x1024 21.6ms
15: 1024x1024 (no detections), 21.6ms
16: 1024x1024 (no detections), 21.6ms
17: 1024x1024 (no detections), 21.6ms
18: 1024x1024 21.6ms
19: 1024x1024 21.6ms
20: 1024x1024 (no detections), 21.6ms
21: 1024x1024 (no detections), 21.6ms
22: 1024x1024 (no detections), 21.6ms
23: 1024x1024 (no detections), 21.6ms
Speed: 3.0ms preprocess, 21.6ms inference, 14.9ms postprocess per image at shape (1, 3, 1024, 1024)


 52%|█████▏    | 11/21 [00:21<00:19,  1.96s/it]


0: 1024x1024 (no detections), 21.3ms
1: 1024x1024 (no detections), 21.3ms
2: 1024x1024 21.3ms
3: 1024x1024 (no detections), 21.3ms
4: 1024x1024 (no detections), 21.3ms
5: 1024x1024 (no detections), 21.3ms
6: 1024x1024 (no detections), 21.3ms
7: 1024x1024 (no detections), 21.3ms
8: 1024x1024 (no detections), 21.3ms
9: 1024x1024 (no detections), 21.3ms
10: 1024x1024 (no detections), 21.3ms
11: 1024x1024 (no detections), 21.3ms
12: 1024x1024 (no detections), 21.3ms
13: 1024x1024 (no detections), 21.3ms
14: 1024x1024 21.3ms
15: 1024x1024 (no detections), 21.3ms
16: 1024x1024 (no detections), 21.3ms
17: 1024x1024 (no detections), 21.3ms
18: 1024x1024 21.3ms
19: 1024x1024 21.3ms
20: 1024x1024 21.3ms
21: 1024x1024 21.3ms
22: 1024x1024 (no detections), 21.3ms
23: 1024x1024 21.3ms
Speed: 1.5ms preprocess, 21.3ms inference, 13.0ms postprocess per image at shape (1, 3, 1024, 1024)


 57%|█████▋    | 12/21 [00:23<00:17,  1.93s/it]


0: 1024x1024 (no detections), 21.3ms
1: 1024x1024 (no detections), 21.3ms
2: 1024x1024 (no detections), 21.3ms
3: 1024x1024 21.3ms
4: 1024x1024 (no detections), 21.3ms
5: 1024x1024 21.3ms
6: 1024x1024 (no detections), 21.3ms
7: 1024x1024 21.3ms
8: 1024x1024 (no detections), 21.3ms
9: 1024x1024 21.3ms
10: 1024x1024 (no detections), 21.3ms
11: 1024x1024 (no detections), 21.3ms
12: 1024x1024 21.3ms
13: 1024x1024 (no detections), 21.3ms
14: 1024x1024 21.3ms
15: 1024x1024 (no detections), 21.3ms
16: 1024x1024 21.3ms
17: 1024x1024 (no detections), 21.3ms
18: 1024x1024 (no detections), 21.3ms
19: 1024x1024 21.3ms
20: 1024x1024 21.3ms
21: 1024x1024 (no detections), 21.3ms
22: 1024x1024 (no detections), 21.3ms
23: 1024x1024 (no detections), 21.3ms
Speed: 1.6ms preprocess, 21.3ms inference, 13.3ms postprocess per image at shape (1, 3, 1024, 1024)


 62%|██████▏   | 13/21 [00:25<00:15,  1.93s/it]


0: 1024x1024 (no detections), 20.9ms
1: 1024x1024 (no detections), 20.9ms
2: 1024x1024 20.9ms
3: 1024x1024 20.9ms
4: 1024x1024 (no detections), 20.9ms
5: 1024x1024 20.9ms
6: 1024x1024 20.9ms
7: 1024x1024 20.9ms
8: 1024x1024 (no detections), 20.9ms
9: 1024x1024 20.9ms
10: 1024x1024 (no detections), 20.9ms
11: 1024x1024 (no detections), 20.9ms
12: 1024x1024 (no detections), 20.9ms
13: 1024x1024 20.9ms
14: 1024x1024 (no detections), 20.9ms
15: 1024x1024 (no detections), 20.9ms
16: 1024x1024 20.9ms
17: 1024x1024 (no detections), 20.9ms
18: 1024x1024 (no detections), 20.9ms
19: 1024x1024 (no detections), 20.9ms
20: 1024x1024 (no detections), 20.9ms
21: 1024x1024 (no detections), 20.9ms
22: 1024x1024 (no detections), 20.9ms
23: 1024x1024 20.9ms
Speed: 1.4ms preprocess, 20.9ms inference, 13.1ms postprocess per image at shape (1, 3, 1024, 1024)


 67%|██████▋   | 14/21 [00:27<00:13,  1.92s/it]


0: 1024x1024 (no detections), 20.8ms
1: 1024x1024 20.8ms
2: 1024x1024 (no detections), 20.8ms
3: 1024x1024 (no detections), 20.8ms
4: 1024x1024 (no detections), 20.8ms
5: 1024x1024 (no detections), 20.8ms
6: 1024x1024 (no detections), 20.8ms
7: 1024x1024 (no detections), 20.8ms
8: 1024x1024 (no detections), 20.8ms
9: 1024x1024 20.8ms
10: 1024x1024 (no detections), 20.8ms
11: 1024x1024 (no detections), 20.8ms
12: 1024x1024 20.8ms
13: 1024x1024 (no detections), 20.8ms
14: 1024x1024 (no detections), 20.8ms
15: 1024x1024 (no detections), 20.8ms
16: 1024x1024 (no detections), 20.8ms
17: 1024x1024 20.8ms
18: 1024x1024 (no detections), 20.8ms
19: 1024x1024 (no detections), 20.8ms
20: 1024x1024 20.8ms
21: 1024x1024 20.8ms
22: 1024x1024 20.8ms
23: 1024x1024 (no detections), 20.8ms
Speed: 1.6ms preprocess, 20.8ms inference, 13.9ms postprocess per image at shape (1, 3, 1024, 1024)


 71%|███████▏  | 15/21 [00:29<00:11,  1.94s/it]


0: 1024x1024 (no detections), 20.8ms
1: 1024x1024 20.8ms
2: 1024x1024 (no detections), 20.8ms
3: 1024x1024 (no detections), 20.8ms
4: 1024x1024 20.8ms
5: 1024x1024 20.8ms
6: 1024x1024 20.8ms
7: 1024x1024 (no detections), 20.8ms
8: 1024x1024 (no detections), 20.8ms
9: 1024x1024 (no detections), 20.8ms
10: 1024x1024 (no detections), 20.8ms
11: 1024x1024 (no detections), 20.8ms
12: 1024x1024 (no detections), 20.8ms
13: 1024x1024 (no detections), 20.8ms
14: 1024x1024 (no detections), 20.8ms
15: 1024x1024 20.8ms
16: 1024x1024 (no detections), 20.8ms
17: 1024x1024 (no detections), 20.8ms
18: 1024x1024 (no detections), 20.8ms
19: 1024x1024 (no detections), 20.8ms
20: 1024x1024 20.8ms
21: 1024x1024 (no detections), 20.8ms
22: 1024x1024 (no detections), 20.8ms
23: 1024x1024 (no detections), 20.8ms
Speed: 1.4ms preprocess, 20.8ms inference, 12.6ms postprocess per image at shape (1, 3, 1024, 1024)


 76%|███████▌  | 16/21 [00:31<00:09,  1.92s/it]


0: 1024x1024 (no detections), 20.7ms
1: 1024x1024 (no detections), 20.7ms
2: 1024x1024 20.7ms
3: 1024x1024 (no detections), 20.7ms
4: 1024x1024 (no detections), 20.7ms
5: 1024x1024 (no detections), 20.7ms
6: 1024x1024 (no detections), 20.7ms
7: 1024x1024 (no detections), 20.7ms
8: 1024x1024 (no detections), 20.7ms
9: 1024x1024 (no detections), 20.7ms
10: 1024x1024 (no detections), 20.7ms
11: 1024x1024 (no detections), 20.7ms
12: 1024x1024 (no detections), 20.7ms
13: 1024x1024 (no detections), 20.7ms
14: 1024x1024 (no detections), 20.7ms
15: 1024x1024 (no detections), 20.7ms
16: 1024x1024 (no detections), 20.7ms
17: 1024x1024 20.7ms
18: 1024x1024 (no detections), 20.7ms
19: 1024x1024 20.7ms
20: 1024x1024 20.7ms
21: 1024x1024 (no detections), 20.7ms
22: 1024x1024 (no detections), 20.7ms
23: 1024x1024 (no detections), 20.7ms
Speed: 1.5ms preprocess, 20.7ms inference, 13.1ms postprocess per image at shape (1, 3, 1024, 1024)


 81%|████████  | 17/21 [00:33<00:07,  1.91s/it]


0: 1024x1024 20.8ms
1: 1024x1024 (no detections), 20.8ms
2: 1024x1024 (no detections), 20.8ms
3: 1024x1024 (no detections), 20.8ms
4: 1024x1024 (no detections), 20.8ms
5: 1024x1024 (no detections), 20.8ms
6: 1024x1024 (no detections), 20.8ms
7: 1024x1024 (no detections), 20.8ms
8: 1024x1024 (no detections), 20.8ms
9: 1024x1024 (no detections), 20.8ms
10: 1024x1024 (no detections), 20.8ms
11: 1024x1024 (no detections), 20.8ms
12: 1024x1024 (no detections), 20.8ms
13: 1024x1024 (no detections), 20.8ms
14: 1024x1024 20.8ms
15: 1024x1024 20.8ms
16: 1024x1024 (no detections), 20.8ms
17: 1024x1024 20.8ms
18: 1024x1024 (no detections), 20.8ms
19: 1024x1024 20.8ms
20: 1024x1024 20.8ms
21: 1024x1024 20.8ms
22: 1024x1024 (no detections), 20.8ms
23: 1024x1024 (no detections), 20.8ms
Speed: 1.5ms preprocess, 20.8ms inference, 13.3ms postprocess per image at shape (1, 3, 1024, 1024)


 86%|████████▌ | 18/21 [00:35<00:05,  1.91s/it]


0: 1024x1024 (no detections), 20.8ms
1: 1024x1024 (no detections), 20.8ms
2: 1024x1024 (no detections), 20.8ms
3: 1024x1024 (no detections), 20.8ms
4: 1024x1024 (no detections), 20.8ms
5: 1024x1024 (no detections), 20.8ms
6: 1024x1024 (no detections), 20.8ms
7: 1024x1024 (no detections), 20.8ms
8: 1024x1024 20.8ms
9: 1024x1024 (no detections), 20.8ms
10: 1024x1024 (no detections), 20.8ms
11: 1024x1024 (no detections), 20.8ms
12: 1024x1024 20.8ms
13: 1024x1024 20.8ms
14: 1024x1024 (no detections), 20.8ms
15: 1024x1024 20.8ms
16: 1024x1024 20.8ms
17: 1024x1024 (no detections), 20.8ms
18: 1024x1024 (no detections), 20.8ms
19: 1024x1024 (no detections), 20.8ms
20: 1024x1024 (no detections), 20.8ms
21: 1024x1024 (no detections), 20.8ms
22: 1024x1024 (no detections), 20.8ms
23: 1024x1024 (no detections), 20.8ms
Speed: 2.0ms preprocess, 20.8ms inference, 13.7ms postprocess per image at shape (1, 3, 1024, 1024)


 90%|█████████ | 19/21 [00:37<00:03,  1.91s/it]


0: 1024x1024 (no detections), 20.8ms
1: 1024x1024 (no detections), 20.8ms
2: 1024x1024 (no detections), 20.8ms
3: 1024x1024 (no detections), 20.8ms
4: 1024x1024 (no detections), 20.8ms
5: 1024x1024 (no detections), 20.8ms
6: 1024x1024 20.8ms
7: 1024x1024 20.8ms
8: 1024x1024 (no detections), 20.8ms
9: 1024x1024 (no detections), 20.8ms
10: 1024x1024 (no detections), 20.8ms
11: 1024x1024 20.8ms
12: 1024x1024 (no detections), 20.8ms
13: 1024x1024 (no detections), 20.8ms
14: 1024x1024 (no detections), 20.8ms
15: 1024x1024 (no detections), 20.8ms
16: 1024x1024 (no detections), 20.8ms
17: 1024x1024 20.8ms
18: 1024x1024 (no detections), 20.8ms
19: 1024x1024 (no detections), 20.8ms
20: 1024x1024 (no detections), 20.8ms
21: 1024x1024 (no detections), 20.8ms
22: 1024x1024 (no detections), 20.8ms
23: 1024x1024 (no detections), 20.8ms
Speed: 1.7ms preprocess, 20.8ms inference, 13.0ms postprocess per image at shape (1, 3, 1024, 1024)


 95%|█████████▌| 20/21 [00:38<00:01,  1.90s/it]


0: 1024x1024 (no detections), 18.0ms
1: 1024x1024 (no detections), 18.0ms
2: 1024x1024 (no detections), 18.0ms
3: 1024x1024 (no detections), 18.0ms
Speed: 1.5ms preprocess, 18.0ms inference, 22.9ms postprocess per image at shape (1, 3, 1024, 1024)


100%|██████████| 21/21 [00:39<00:00,  1.87s/it]


result = 
{'task_smaple.png': [{'xywhr': [tensor(229.2828, device='cuda:0'),
    tensor(10279.5654, device='cuda:0'),
    tensor(82.1188, device='cuda:0'),
    tensor(14.0488, device='cuda:0'),
    tensor(1.9125, device='cuda:0')],
   'conf': 0.34337615966796875},
  {'xywhr': [tensor(151.2560, device='cuda:0'),
    tensor(10446.5742, device='cuda:0'),
    tensor(110.7664, device='cuda:0'),
    tensor(25.3042, device='cuda:0'),
    tensor(2.5097, device='cuda:0')],
   'conf': 0.27472221851348877},
   ...]}


In [1]:
import csv
import math

# 저장할 CSV 경로
csv_file = "./submission.csv"
data = []  # CSV에 저장할 데이터를 담을 리스트

# 이미지 이름별로 데이터 변환
for image_name, predictions in result.items():
    if not predictions:
        continue
    
    # 각 예측 결과를 변환하여 data 리스트에 추가
    for pred in predictions:
        cx, cy, width, height, angle = pred['xywhr']
        
        # 각도 변환: 라디안 -> 도(degrees)
        angle_deg = math.degrees(angle)
        if angle_deg < 0:
            angle_deg += 360
        
        # 예측 결과를 리스트로 추가
        data.append([image_name, cx, cy, width, height, angle_deg])

# CSV 파일로 저장
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # CSV의 헤더 작성
    writer.writerow(['image_name', 'cx', 'cy', 'width', 'height', 'angle'])
    
    # 각 행을 작성
    writer.writerows(data)

print(f"CSV 파일 '{csv_file}'이(가) 성공적으로 생성되었습니다.")


NameError: name 'result' is not defined

In [3]:
import aifactory.score as aif
import time

t = time.time()
aif.submit(model_name="l_s1024_bgr05_scale084_2",
           key="246f41b0-c912-46f5-8f9a-42171aa1f7f0")
print("time:", time.time() - t)

file : task.py
python
제출 완료
time: 105.83351588249207
