In [103]:
import json
import matplotlib.pyplot as plt
from pathlib import Path
import cv2
import bbox_visualizer as bbv
from PIL import Image
import numpy as np
from IPython.display import display
import pandas as pd
import random

In [104]:
def make_coco_dataset(json_path, path_to_images=None, encoding='utf-8'):
    
    with open(json_path, encoding=encoding) as cocojson:
        annotations_json = json.load(cocojson)
        
    # Store the 3 sections of the json as seperate json arrays
    images = pd.json_normalize(annotations_json["images"])
    images.columns = "img_" + images.columns
    try:
        images["img_folder"]
    except:
        images["img_folder"] = ""
    
    # If specified a different image folder then use that one
    if path_to_images != None:
        images["img_folder"] = path_to_images

    astype_dict = {"img_width": "int64", "img_height": "int64", "img_depth": "int64"}
    astype_keys = list(astype_dict.keys())
    for element in astype_keys:
        if element not in images.columns:
            astype_dict.pop(element)
            
    images = images.astype(astype_dict)

    annotations = pd.json_normalize(annotations_json["annotations"])
    annotations.columns = "ann_" + annotations.columns

    categories = pd.json_normalize(annotations_json["categories"])
    categories.columns = "cat_" + categories.columns
    categories.cat_id = categories.cat_id.astype(str)
    
    df = annotations

    # Converting this to string resolves issue #23
    df.ann_category_id = df.ann_category_id.astype(str)

    df[
        ["ann_bbox_xmin", "ann_bbox_ymin", "ann_bbox_width", "ann_bbox_height"]
    ] = pd.DataFrame(df.ann_bbox.tolist(), index=df.index)
    df.insert(8, "ann_bbox_xmax", df["ann_bbox_xmin"] + df["ann_bbox_width"])
    df.insert(10, "ann_bbox_ymax", df["ann_bbox_ymin"] + df["ann_bbox_height"])

    # debug print(df.info())

    # Join the annotions with the information about the image to add the image columns to the dataframe
    df = pd.merge(images, df, left_on="img_id", right_on="ann_image_id", how="left")
    df = pd.merge(
        df, categories, left_on="ann_category_id", right_on="cat_id", how="left"
    )

    # Rename columns if needed from the coco column name to the pylabel column name
    df.rename(columns={"img_file_name": "img_filename"}, inplace=True)
    
    df.fillna('', inplace=True)

    # These should be strings
    df.cat_id = df.cat_id.astype(str)

    # These should be integers
    df.img_width = df.img_width.astype(int)
    df.img_height = df.img_height.astype(int)
    
    return df

In [69]:
def make_coco_dataset_with_prediction(json_path, pred_path, path_to_images=None, encoding='utf-8'):
    
    with open(json_path, encoding=encoding) as cocojson:
        annotations_json = json.load(cocojson)

    with open(pred_path, encoding=encoding) as predjson:
        predictions_json = json.load(predjson)
        
    # Store the 3 sections of the json as seperate json arrays
    images = pd.json_normalize(annotations_json["images"])
    images.columns = "img_" + images.columns
    try:
        images["img_folder"]
    except:
        images["img_folder"] = ""
    
    # If specified a different image folder then use that one
    if path_to_images != None:
        images["img_folder"] = path_to_images

    astype_dict = {"img_width": "int64", "img_height": "int64", "img_depth": "int64"}
    astype_keys = list(astype_dict.keys())
    for element in astype_keys:
        if element not in images.columns:
            astype_dict.pop(element)
            
    images = images.astype(astype_dict)
    
    annotations = pd.json_normalize(predictions_json)
    annotations.columns = "ann_" + annotations.columns
    
    categories = pd.json_normalize(annotations_json["categories"])
    categories.columns = "cat_" + categories.columns
    categories.cat_id = categories.cat_id.astype(str)
    
    df = annotations

    # Converting this to string resolves issue #23
    df.ann_category_id = df.ann_category_id.astype(str)

    print(df)
    
    df[
        ["ann_bbox_xmin", "ann_bbox_ymin", "ann_bbox_width", "ann_bbox_height"]
    ] = pd.DataFrame(df.ann_bbox.tolist(), index=df.index)
    df.insert(4, "ann_bbox_xmax", df["ann_bbox_xmin"] + df["ann_bbox_width"])
    df.insert(5, "ann_bbox_ymax", df["ann_bbox_ymin"] + df["ann_bbox_height"])

    # debug print(df.info())

    # Join the annotions with the information about the image to add the image columns to the dataframe
    df = pd.merge(images, df, left_on="img_id", right_on="ann_image_id", how="left")
    df = pd.merge(
        df, categories, left_on="ann_category_id", right_on="cat_id", how="left"
    )

    # Rename columns if needed from the coco column name to the pylabel column name
    df.rename(columns={"img_file_name": "img_filename"}, inplace=True)
    
    df.fillna('', inplace=True)

    # These should be strings
    df.cat_id = df.cat_id.astype(str)

    # These should be integers
    df.img_width = df.img_width.astype(int)
    df.img_height = df.img_height.astype(int)
    
    return df

In [115]:
def visualize_random_samples(dataset, images_per_class, specific_class=None):
    
    ds = dataset
    ds.cat_id = ds.cat_id.astype('float').astype('Int64') # Optional when the ids are string type
    
    grouped_by_classes = ds.groupby("cat_id")
    shortest_length = min(len(group) for _, group in grouped_by_classes)
    random_sample_by_classes = grouped_by_classes.sample(n=min(images_per_class, shortest_length)) # random_state=1 추가 가능
    
    if specific_class != None:
        df = random_sample_by_classes.loc[ds.cat_id == specific_class]
        final_df = df.groupby("cat_id")["img_filename"]
    else:
        final_df = random_sample_by_classes.groupby("cat_id")["img_filename"]        

    for group_name, group in final_df:
        dict_for_iteration = {}
        count = 1
        while count <= min(images_per_class, shortest_length):
            key = count
            value = []
            dict_for_iteration[key] = value 
            count += 1
            
        i = 1
        for row_index, row in group.items():
            dict_for_iteration[i].append(row)
            i += 1
            
            
        for key, value in dict_for_iteration.items():
            
            img_annots = ds.loc[ds.img_filename == value[0]]
            full_image_path = str(Path(img_annots.iloc[0].img_folder, img_annots.iloc[0].img_filename))
            
            labels = []
            bboxes = []
            
            for index, row in img_annots.iterrows():
                # If there are no annotations, then skip because there are no boxes to draw
                if row["ann_bbox_xmin"] != "":
                    labels.append(f"{row['cat_id']}: {row['cat_name']}")
                    bboxes.append(
                        [
                            int(row["ann_bbox_xmin"]),
                            int(row["ann_bbox_ymin"]),
                            int(row["ann_bbox_xmax"]),
                            int(row["ann_bbox_ymax"]),
                        ]
                    )
                    
            dict_for_iteration[key].append(full_image_path)
            dict_for_iteration[key].append(labels)
            dict_for_iteration[key].append(bboxes)
                        
        image_group = []
        
        for key, value in dict_for_iteration.items():
            
            full_image_path = value[1]
            labels = value[2]
            bboxes = value[3]
            
            detected_categories = list(label.split(":")[0] for label in labels)
            np_detected_categories = np.array(detected_categories)
            
            unique_detected_categories = np.unique(np_detected_categories)
            string_unique_detected_categories = ', '.join(str(e) for e in unique_detected_categories)
            number_of_detections = len(bboxes) 
    
            img = cv2.imread(str(full_image_path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.putText(img, text=f"# of detections = {number_of_detections}", org=(30,30),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255,0,127), thickness=2)
            img = cv2.putText(img, text=f"Categories = {string_unique_detected_categories}", org=(30,70),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(255,0,127), thickness=2)
            
            image_with_boxes = img
            
            for bbox in bboxes:
                image_with_boxes = bbv.draw_rectangle(image_with_boxes, bbox, bbox_color=random.sample(range(1, 255), 3), thickness=1)            
            
#             img_with_boxes = bbv.draw_multiple_rectangles(img, bboxes) # bbox_color=random.sample(range(1, 255), 3)
            img_with_labels = bbv.add_multiple_labels(image_with_boxes, labels, bboxes, top=True)
            image_group.append(Image.fromarray(img_with_labels))
            
        
        for image in image_group:
            display(image)
        # concatenated = np.concatenate(image_group, axis=0)
        # data = Image.fromarray(concatenated)
        
        # print(f"Random sample for category {group_name}")
        # display(data)

In [31]:
coco_json_path = '/home/ubuntu/workspace/visualization/augmented_data/train.json'
coco_image_path = '/home/ubuntu/workspace/visualization/augmented_data'

# IR_json_path = '/home/ubuntu/workspace/datasets/final_dataset/train/annotations/IR-train.json'
# IR_image_path = '/home/ubuntu/workspace/datasets/final_dataset/train/annotations/images_all/IR-train'
#prediction_path
#prediction_path = '/home/ubuntu/workspace/yuns/mmdetection/outputs/yolo_1.bbox.json'

In [32]:
coco_dataset = make_coco_dataset(coco_json_path, coco_image_path)

In [125]:
IR_dataset = make_coco_dataset(IR_json_path, IR_image_path)

In [72]:
prediction_dataset = make_coco_dataset_with_prediction(coco_json_path, prediction_path, coco_image_path)

      ann_image_id                                           ann_bbox  \
0                1  [1755.0513916015625, 962.1963500976562, 154.71...   
1                3  [1219.0260009765625, 626.973388671875, 150.429...   
2                7  [1947.950439453125, 879.6666259765625, 278.722...   
3                9  [145.99134826660156, 2.69860577583313, 55.1800...   
4               11  [854.6197509765625, 658.352783203125, 75.07727...   
...            ...                                                ...   
1466          3140  [866.8604736328125, 677.1395874023438, 68.4201...   
1467          3141  [382.1676940917969, 407.7758483886719, 110.636...   
1468          3142  [1178.0296630859375, 349.1967468261719, 84.314...   
1469          3146  [819.9989624023438, 706.6613159179688, 101.491...   
1470          3149  [406.489013671875, 556.4080810546875, 66.96130...   

      ann_score ann_category_id  
0      0.300054               0  
1      0.552959               0  
2      0.964306      

In [None]:
print(prediction_dataset)

In [33]:
# Convert empty cat_id to NaN

#coco_dataset['cat_id'] = coco_dataset['cat_id'].replace('', np.nan)
# coco_dataset = coco_dataset.dropna(axis=0, subset=['cat_id'])

# Convert empty cat_id to NaN

coco_dataset['cat_id'] = coco_dataset['cat_id'].replace('', np.nan)
# coco_dataset = coco_dataset.dropna(axis=0, subset=['cat_id'])
 

In [40]:
prediction_dataset.head()

print("Dataset length : ", len(prediction_dataset))

Dataset length :  3391


In [None]:
visualize_random_samples(coco_dataset, 10)

In [None]:
visualize_random_samples(IR_dataset, 10)

In [80]:
import os
import random
from PIL import Image

def draw_random_images(folder_path, num_images=10):
    # 폴더 내 모든 이미지 파일 목록 가져오기
    image_files = [f for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]

    if not image_files:
        print("폴더 내에 이미지 파일이 없습니다.")
        return

    # 이미지 파일 개수가 요청한 개수보다 적을 경우, 모든 이미지 파일 선택
    num_images = min(num_images, len(image_files))

    # 무작위로 이미지 파일 선택
    selected_images = random.sample(image_files, num_images)

    # 이미지 파일 그리기
    for image_file in selected_images:
        image_path = os.path.join(folder_path, image_file)
        image = Image.open(image_path)
        image.show()

In [None]:
# 폴더 경로 설정
folder_path = "/home/ubuntu/workspace/datasets/dacon/coco/test"

# 랜덤한 이미지 10개 그리기
draw_random_images(folder_path, num_images=10)