In [9]:
import numpy as np
import os
Dataset_path = 'ade20k_ksm/train/rgb/all'
depth_path = 'ADE_depth'
files500 = np.load("ADE_500files.npy")


In [2]:
# Create a dictionary to store images and their corresponding depth maps
data_dict = {}

# Iterate through the coordinates dictionary
for img_name in files500:
    # Build paths for RGB image and depth map
    img_path = os.path.join(Dataset_path, img_name)
    depth_path_full = os.path.join(depth_path, f"{img_name[:-4]}_depth.png")
    
    # Check if both files exist
    if os.path.exists(img_path) and os.path.exists(depth_path_full):
        
        # Store in dictionary with sub-dictionary structure
        data_dict[img_name] = {
            'image': img_path,
            'depth': depth_path_full,
        }

print(f"Processed {len(data_dict)} image pairs")


Processed 500 image pairs


In [11]:
from PIL import Image
import random
import json


# Dictionary to store coordinates and depths
point_data = {}

for img_name in data_dict.keys():
    attempts = 0
    max_attempts = 10000
    while attempts < max_attempts:
        # Load and resize images
        img = Image.open(data_dict[img_name]['image'])
        img = img.resize((336, 336))
        depth_img = Image.open(data_dict[img_name]['depth']).convert('L')
        depth_img = depth_img.resize((336, 336))
        depth_array = np.array(depth_img, dtype=np.float64)
        
        # Find points with minimum distance constraint
        points = []
        depths = []
        min_distance = 20  # Minimum pixel distance between points
        
        # Try to find 5 points
        point_attempts = 0
        max_point_attempts = 10000
        point_length = random.choice([2,3,4,5])
        while len(points) < point_length and point_attempts < max_point_attempts:
            x = random.randint(0, 300)
            y = random.randint(0, 300)
            
            # Check distance from existing points and depth requirements
            valid_point = True
            current_depth = depth_array[y, x]
            
            # Skip if depth is 0
            if current_depth == 0:
                valid_point = False
            
            # Check distance from existing points
            for px, py in points:
                dist = np.sqrt((x - px)**2 + (y - py)**2)
                if dist < min_distance:
                    valid_point = False
                    break
            
            # Check depth difference with existing points
            if valid_point and points:
                # Check depth difference with all existing points
                for existing_depth in depths:
                    if abs(current_depth - existing_depth) < 20:
                        valid_point = False
                        break
                    
            if valid_point:
                points.append((x, y))
                depths.append(current_depth)
                
            point_attempts += 1
        
        if len(points) == point_length:
            # Sort points by depth
            sorted_indices = np.argsort(depths)
            sorted_points = [points[i] for i in sorted_indices]
            sorted_depths = [depths[i] for i in sorted_indices]
            
            # Check if depth differences between consecutive points are around 10
            valid_depths = True
            # for i in range(len(sorted_depths)-1):
            #     if abs(sorted_depths[i+1] - sorted_depths[i]) < 5 or abs(sorted_depths[i+1] - sorted_depths[i]) > 15:
            #         valid_depths = False
            #         break
                    
            if valid_depths:
                point_data[img_name] = {
                    'coordinates': sorted_points,
                    'depths': sorted_depths
                }
                break
        
        attempts += 1

print(f"Successfully processed {len(point_data)} images with valid point configurations")


Successfully processed 500 images with valid point configurations


In [7]:
# Save point data to JSON file
with open('point_data.json', 'w') as f:
    json.dump(point_data, f, indent=2)


In [8]:
## use point data to draw markers on the images

import os
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import random
import json
import warnings
warnings.filterwarnings("ignore")

def add_point(text, draw, center_x, center_y, font_size):
    radius = 4  # Radius of the circle
    draw.ellipse([center_x - radius, center_y - radius, center_x + radius, center_y + radius], outline='red', width=2)
    
    # Use default font
    font = ImageFont.load_default()
    text_width = draw.textlength(text, font=font)
    text_height = font_size
    text_x, text_y = center_x , center_y - 25 # Position where the text will start

    # Define background dimensions and position based on text dimensions
    background_margin = 1  # Margin between text and background edge
    background_x0 = text_x - background_margin
    background_y0 = text_y - background_margin
    background_x1 = text_x + text_width + background_margin
    background_y1 = text_y + text_height + background_margin
    draw.rectangle([background_x0, background_y0, background_x1, background_y1], fill='black')
    
    # Draw text
    draw.text((text_x, text_y), text, font=font, fill='white')

def draw_markers(f, point_data, font_size = 12):
    W, H = 336, 336
    img = Image.open(f"{Dataset_path}/{f}")
    img = img.resize((W, H))
    
    draw = ImageDraw.Draw(img)
    
    # Get points and depths for this image
    points = point_data[f]['coordinates']
    depths = point_data[f]['depths']
    
    # Create point labels A-E and shuffle them
    labels = ['A', 'B', 'C', 'D', 'E'][:len(depths)]
    random.shuffle(labels)
    
    # Draw points with shuffled labels
    point_label_mapping = {}
    for i, (x, y) in enumerate(points):
        add_point(labels[i], draw, x, y, font_size)
        point_label_mapping[labels[i]] = {
            'coordinate': points[i],
            'depth': depths[i]
        }
    
    # Create query-answer pair
    max_depth_idx = depths.index(max(depths))
    max_depth_label = labels[max_depth_idx]
    
    query = "Which point is closest to the camera in the image?"
    answer = f"{max_depth_label}"
    
    # Save marked image
    if not os.path.exists("ADE_blink"):
        os.makedirs("ADE_blink")
    img.save(f"marked_images4/{f}")
    
    return query, answer, point_label_mapping



qa_pairs = {}
point_mappings = {}
from tqdm import tqdm

for i in tqdm(range(len(files500))):
    f = files500[i]
    if f not in point_data.keys():
        continue
    query, answer, mapping = draw_markers(f, point_data)
    qa_pairs[f] = {
        "query": query,
        "answer": answer
    }
    point_mappings[f] = mapping

# Save QA pairs to JSON
with open("depth_qa_pairs.json", "w") as f:
    json.dump(qa_pairs, f, indent=2)

# Save point label mappings to JSON
with open("point_label_mappings.json", "w") as f:
    json.dump(point_mappings, f, indent=2)


100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 169.03it/s]


In [None]:
##checks
with open("point_label_mappings.json", 'r') as f:
    point_mapping = json.load(f)
with open("depth_qa_pairs.json", 'r') as f:
    qa = json.load(f)

import numpy as np
for k,v in point_mapping.items():
    depths = [val['depth'] for val in v.values() ]
    assert sorted(depths) == depths
    idx = np.argmax(depths)
    assert qa[k]['answer'] == list(v.keys())[idx]