In [1]:
import os

import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from ultralytics import YOLO

In [2]:
# Root directory for the dataset
root = "data/DressCode/"

# Map labels to their corresponding directories
DIRECTORY_MAP = ["upper_body", "lower_body", "dresses"]

# Map labels to their corresponding segmentations (data/DressCode/readme.txt)
SEGMENT_MAP = [[4], [5, 6], [7]]

# Map labels to their corresponding classes (data/DeepFashion/DeepFashion2.yaml)
CLASS_MAP = [[0, 1, 2, 3, 4], [6, 7, 8], [9, 10, 11, 12]]

In [3]:
# Read in the dataset
train_pairs = pd.read_csv(
    os.path.join(root, "train_pairs.txt"),
    delimiter="\t",
    header=None,
    names=["model", "garment", "label"],
)

test_pairs = pd.read_csv(
    os.path.join(root, "test_pairs_paired.txt"),
    delimiter="\t",
    header=None,
    names=["model", "garment", "label"],
)

pairs = pd.concat([train_pairs, test_pairs])

pairs.head()

Unnamed: 0,model,garment,label
0,000000_0.jpg,000000_1.jpg,0
1,000001_0.jpg,000001_1.jpg,0
2,000002_0.jpg,000002_1.jpg,0
3,000003_0.jpg,000003_1.jpg,0
4,000004_0.jpg,000004_1.jpg,0


In [4]:
# Load in YOLO
yolo = YOLO("models/yolov8m.pt")

In [5]:
# Create output directories if they don't exist
for directory in DIRECTORY_MAP:
    os.makedirs(os.path.join(root, directory, "cropped_images"), exist_ok=True)

In [6]:
def get_bounding_box(mask: np.ndarray) -> tuple[int, int, int, int]:
    """
    Get the bounding box around the mask.

    Returns (x_min, y_min, x_max, y_max): The bounding box.
    """

    x_indices, y_indices = np.where(mask)

    if len(x_indices) == 0 or len(y_indices) == 0:
        return 0, 0, mask.shape[0], mask.shape[1]

    x_min = int(np.min(x_indices))
    x_max = int(np.max(x_indices))
    y_min = int(np.min(y_indices))
    y_max = int(np.max(y_indices))

    return x_min, y_min, x_max, y_max


def crop_model_image(model: str, label: int) -> bool:
    """
    Crop the model image using the corresponding segmentation. Saves the cropped image if successful.

    Returns True if the model image was cropped successfully, False otherwise.
    """

    # Load the model image
    model_image = Image.open(
        os.path.join(root, DIRECTORY_MAP[label], "images", model)
    ).convert("RGB")

    # Load the segmentation
    segmentation = np.array(
        Image.open(
            os.path.join(
                root, DIRECTORY_MAP[label], "label_maps", model.split("_")[0] + "_4.png"
            )
        )
    )
    
    # Get the mask for the label
    mask = np.isin(segmentation, SEGMENT_MAP[label])

    # Mask is empty, skip the image
    if not mask.any():
        return False

    # Get the bounding box for the mask
    x_min, y_min, x_max, y_max = get_bounding_box(mask)

    # Crop the image
    model_image_cropped = model_image.crop((y_min, x_min, y_max, x_max))

    # Save the cropped image
    model_image_cropped.save(
        os.path.join(root, DIRECTORY_MAP[label], "cropped_images", model)
    )

    return True

In [7]:
def crop_garment_image(garment: str, label: int) -> bool:
    """
    Crop the garment image using YOLO's predicted bounding boxes. Saves the cropped image if successful.

    Returns True if the garment image was cropped successfully, False otherwise.
    """

    # Load the garment image
    garment_image = Image.open(
        os.path.join(root, DIRECTORY_MAP[label], "images", garment)
    ).convert("RGB")

    # Predict on the image
    prediction_results = yolo.predict(garment_image, verbose=False)[0]

    # If there are no bounding boxes, skip the image
    if len(prediction_results) == 0:
        return False

    # Get the predicted bounding boxes
    boxes = prediction_results.boxes

    # Get only the boxes that are of the correct class
    classes = boxes.cls.cpu().numpy()

    # Get the indices of the boxes that are of the correct class
    correct_class_indices = np.where(np.isin(classes, CLASS_MAP[label]))[0]

    # If there are boxes of the correct class, keep only those boxes
    if len(correct_class_indices) > 0:
        boxes = boxes[correct_class_indices]

    # Get the confidence score for each box
    confidence = boxes.conf.cpu().numpy()

    # Choose the box with the highest confidence
    box = boxes[np.argmax(confidence)].xyxy.cpu().numpy().squeeze()

    # Crop the image
    garment_image_cropped = garment_image.crop(box)

    # Save the cropped image
    garment_image_cropped.save(
        os.path.join(root, DIRECTORY_MAP[label], "cropped_images", garment)
    )

    return True


# Define an array to store skipped images
skipped_images = []

In [8]:
# Crop all the model & garment images
for model, garment, label in tqdm(
    pairs.values, desc="Cropping Images", total=len(pairs), unit="image"
):
    # Crop the model image
    success = crop_model_image(model, label)

    if not success:
        skipped_images.append((model, garment, label))

    # Crop the garment image
    success = crop_garment_image(garment, label)

    if not success:
        skipped_images.append((model, garment, label))

# Print the number of cropped images
print(f"Successfully cropped {(1 - len(skipped_images) / len(pairs)) * 100:.2f}% of the dataset. ({len(skipped_images)} skipped)")

Cropping Images: 100%|██████████| 53792/53792 [26:01<00:00, 34.45image/s] 


In [31]:
# Remove skipped images from the training set
train_pairs = train_pairs[~train_pairs["model"].isin([image[0] for image in skipped_images])]

# Save the training set
train_pairs.to_csv(os.path.join(root, "train_pairs_cropped.txt"), sep="\t", header=False, index=False)

# Remove skipped images from the test set
test_pairs = test_pairs[~test_pairs["model"].isin([image[0] for image in skipped_images])]

# Save the test set
test_pairs.to_csv(os.path.join(root, "test_pairs_paired_cropped.txt"), sep="\t", header=False, index=False)