At this stage, you have a dataset with 6 folders, each containing images and their annotations in COCO format.

In this notebook, we will apply visual transformations to enrich our dataset.

In [None]:
import torch
from nacl.pwhash.argon2id import verify
from torch.utils.data import Dataset, DataLoader
import os
import json
import numpy as np
from PIL import Image
import cv2
from scipy.ndimage import map_coordinates
from noise import pnoise2
import math
import random
import albumentations as A
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from scipy.ndimage import map_coordinates
import random
import json
import cv2

As always, we implement a class to handle annotations in COCO format:

In [None]:
class KeypointDataset(Dataset):
    def __init__(self, coco_json, img_dir, img_list=None, transform=None, sigma=2, target_size=(512,512)):
        with open(coco_json, 'r') as f:
            self.coco_data = json.load(f)
        self.img_dir = img_dir
        self.transform = transform
        self.sigma = sigma
        self.target_size = target_size

        
        self.liste_id = {ann['id'] for ann in self.coco_data['annotations']}

       
        self.id_to_image_id = {ann['id']: ann['image_id'] for ann in self.coco_data['annotations']}

        
        self.img_id_to_file = {img['id']: img['file_name'] for img in self.coco_data['images']}

        
        self.id_to_keypoints = {ann['id']: ann['keypoints'] for ann in self.coco_data['annotations']}

As a first step, if you consider your dataset to be small, you can initially apply these next two cells,
which will multiply your dataset size by 4 by applying rotations, contrast variations,
and noise to both images and annotations

In [None]:
def apply_transformation(image_path, kp_v, kp_xy, transform):
    try:
        image = Image.open(image_path).convert('RGB')
        image = np.array(image)
        img_h, img_w = image.shape[:2]

        augmented = transform(image=image, keypoints=kp_xy)
        kps_aug_xy = augmented['keypoints']

        transformed_kps = []
        for i in range(len(kp_v)):
            if i < len(kps_aug_xy):
                x, y = kps_aug_xy[i]
                v = kp_v[i]

                if 0 <= x < img_w and 0 <= y < img_h and v > 0:
                    transformed_kps.extend([float(x), float(y), int(v)])
                else:
                    transformed_kps.extend([float(x), float(y), 0])
            else:
                transformed_kps.extend([0.0, 0.0, 0])

        return augmented['image'], transformed_kps
    except Exception as e:
        raise

def verify_keypoints(keypoints, img_width, img_height):
    validated = []
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i:i+3]
        if 0 <= x < img_width and 0 <= y < img_height and v > 0:
            validated.extend([x, y, v])
        else:
            validated.extend([x, y, 0])
    return validated

def count_visible_keypoints(keypoints):
    return len([v for i, v in enumerate(keypoints) if i % 3 == 2 and v > 0])


transfos = {
    "rot1": A.Compose([A.Rotate(limit=(0, 90), p=1)], keypoint_params=A.KeypointParams(format='xy')),
    "rot2": A.Compose([A.Rotate(limit=(0, 90), p=1)], keypoint_params=A.KeypointParams(format='xy')),

    "noise": A.Compose([A.GaussNoise(var_limit=(10.0, 50.0), p=1)]),

    "contrast": A.Compose([
    A.RandomBrightnessContrast(
        brightness_limit=0.3,
        contrast_limit=0.15,
        p=1.0
    )
    ], keypoint_params=A.KeypointParams(format='xy'))
}


Please fill in "big_dataset = " by specifying the path to your dataset

In [None]:
big_dataset = '../DATASET_TRANSFO_rot'

for element in os.listdir(big_dataset):
    if element.startswith('.'):
        continue

    dataset = os.path.join(big_dataset, element)
    annotations_path = os.path.join(dataset, 'annotations.json')

    if not os.path.exists(annotations_path):
        continue

    try:
        with open(annotations_path, 'r') as f:
            data = json.load(f)
    except Exception:
        continue

    try:
        X = KeypointDataset(coco_json=annotations_path, img_dir=dataset)
        liste_identifiants = X.liste_id

        if not liste_identifiants:
            continue

        new_image_id = max([img['id'] for img in data.get('images', [])], default=0)
        new_ann_id = max([ann['id'] for ann in data.get('annotations', [])], default=0)

        for id in liste_identifiants:
            try:
                id_image = X.id_to_image_id[id]
                image_name = X.img_id_to_file[id_image]
                keypoints = X.id_to_keypoints[id]
                full_image_name = os.path.join(dataset, image_name)

                if not os.path.exists(full_image_name):
                    continue

                keypoints_x = keypoints[::3]
                keypoints_y = keypoints[1::3]
                keypoints_v = keypoints[2::3]
                keypoints_xy = list(zip(keypoints_x, keypoints_y))

                for name, transform in transfos.items():
                    try:
                        new_image_id += 1
                        new_ann_id += 1

                        img_aug, kps_aug = apply_transformation(
                            full_image_name,
                            keypoints_v,
                            keypoints_xy,
                            transform
                        )

                        kps_aug = verify_keypoints(kps_aug, 512, 512)

                        if img_aug.dtype != np.uint8:
                            if img_aug.max() <= 1.0:
                                img_aug = (img_aug * 255).clip(0, 255).astype(np.uint8)
                            else:
                                img_aug = img_aug.clip(0, 255).astype(np.uint8)

                        img_aug_bgr = cv2.cvtColor(img_aug, cv2.COLOR_RGB2BGR)

                        base_name, ext = os.path.splitext(image_name)
                        filename_image_transformed = f"{name}_{base_name}{ext}"
                        chemin_sortie = os.path.join(dataset, filename_image_transformed)
                        num_keypoints = len(kps_aug)//3

                       
                        x_coords = kps_aug[::3]
                        y_coords = kps_aug[1::3]
                        x_max, x_min = max(x_coords), min(x_coords)
                        y_max, y_min = max(y_coords), min(y_coords)
                        area = (y_max - y_min) * (x_max - x_min)

                        success = cv2.imwrite(chemin_sortie, img_aug_bgr)
                        if not success:
                            continue

                        data['images'].append({
                            "id": new_image_id,
                            "file_name": filename_image_transformed,
                            "width": 512,
                            "height": 512
                        })

                        data['annotations'].append({
                            "id": new_ann_id,
                            "image_id": new_image_id,
                            "category_id": 1,  
                            "keypoints": kps_aug,
                            "num_keypoints": num_keypoints,  
                            "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],  
                            "area": area,  
                            "iscrowd": 0,
                            "segmentation": []
                        })

                    except Exception:
                        continue

            except Exception:
                continue

        try:
            with open(annotations_path, 'w') as f:
                json.dump(data, f, indent=4)
        except Exception:
            continue

    except Exception:
        continue

Now we will apply 2 different transformations to all images and annotations in our dataset,
one reproducing the curvature of a datamatrix code, the other the crumpling of a datamatrix code

In [None]:
def separate_keypoints(kp):
    x_black, y_black, x_white, y_white = [], [], [], []
    for i in range(0, len(kp), 3):
        if kp[i + 2] == 2:
            x_black.append(kp[i])
            y_black.append(kp[i + 1])
        else:
            x_white.append(kp[i])
            y_white.append(kp[i + 1])
    return x_black, y_black, x_white, y_white

def validate_keypoints(keypoints, width, height):
    """Validate and correct keypoints so they remain within image boundaries"""
    validated_keypoints = []
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i], keypoints[i + 1], keypoints[i + 2]
        if v > 0:
            x = max(0, min(width - 1, x))
            y = max(0, min(height - 1, y))
        validated_keypoints.extend([x, y, v])
    return validated_keypoints


def calculate_bbox_from_keypoints(keypoints):
    """Calculate the bounding box from visible keypoints"""
    if not keypoints:
        return 0, 0, 0, 0, 0

    visible_points = [(keypoints[i], keypoints[i+1]) for i in range(0, len(keypoints), 3) if keypoints[i+2] > 0]
    if not visible_points:
        return 0, 0, 0, 0, 0

    x_coords, y_coords = zip(*visible_points)
    x_max, x_min = max(x_coords), min(x_coords)
    y_max, y_min = max(y_coords), min(y_coords)
    area = (y_max - y_min) * (x_max - x_min)

    return x_min, y_min, x_max, y_max, area


def save_transformed_image(img_aug, dataset, filename):
    """Save the transformed image"""
    if img_aug.dtype != np.uint8:
        if img_aug.max() <= 1.0:
            img_aug = (img_aug * 255).clip(0, 255).astype(np.uint8)
        else:
            img_aug = img_aug.clip(0, 255).astype(np.uint8)

    img_aug_bgr = cv2.cvtColor(img_aug, cv2.COLOR_RGB2BGR)
    chemin_sortie = os.path.join(dataset, filename)
    success = cv2.imwrite(chemin_sortie, img_aug_bgr)

    if not success:
        print(f"Erreur lors de l'écriture de l'image: {chemin_sortie}")
        return False
    return True



In [None]:
def apply_curvature_transform(image, curvature_factor=0.3):
    height, width = image.shape[:2]
    y_dest, x_dest = np.mgrid[0:height, 0:width]
    x_source = x_dest - curvature_factor * np.sin(2 * np.pi * y_dest / height) * width
    y_source = y_dest.astype(float)

    result = np.zeros_like(image)
    for c in range(image.shape[2]):
        result[..., c] = map_coordinates(image[..., c], [y_source, x_source], order=1, mode='constant', cval=255)

    return result.astype(np.uint8), x_source, y_source


def deform_point(x, y, height, width, curvature_factor=0.3):
    delta_x = curvature_factor * np.sin(2 * np.pi * y / height) * width
    x_deformed = x + delta_x

    
    x_deformed = np.clip(x_deformed, 0, width - 1)
    y = np.clip(y, 0, height - 1)

    return x_deformed, y


def deform_keypoints(keypoints, height, width, curvature_factor=0.3):
    new_keypoints = []
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i], keypoints[i + 1], keypoints[i + 2]
        if v > 0:
            x_def, y_def = deform_point(x, y, height, width, curvature_factor)
            new_keypoints.extend([x_def, y_def, v])
        else:
            new_keypoints.extend([x, y, v])
    return new_keypoints

In [None]:
def generate_wrinkle_points(width, height, num_wrinkles=3, min_distance=80):
    """Generate randomly distributed crumpling"""
    wrinkle_points = []
    attempts = 0
    max_attempts = 1000

    while len(wrinkle_points) < num_wrinkles and attempts < max_attempts:
        x = random.uniform(width * 0.2, width * 0.8)
        y = random.uniform(height * 0.2, height * 0.8)

        valid = True
        for px, py, _, _ in wrinkle_points:
            if np.sqrt((x - px)**2 + (y - py)**2) < min_distance:
                valid = False
                break

        if valid:
            intensity = random.uniform(3, 8)
            radius = random.uniform(40, 80)
            wrinkle_points.append((x, y, intensity, radius))

        attempts += 1

    return wrinkle_points


def apply_wrinkle_transform(image, wrinkle_intensity=0.3, num_wrinkles=3):
    """Apply a realistic crumpling transformation to an image"""
    height, width = image.shape[:2]
    wrinkle_points = generate_wrinkle_points(width, height, num_wrinkles)

    y_dest, x_dest = np.mgrid[0:height, 0:width]
    x_source = x_dest.astype(float)
    y_source = y_dest.astype(float)

    for wx, wy, intensity, radius in wrinkle_points:
        dist = np.sqrt((x_dest - wx)**2 + (y_dest - wy)**2)
        influence = np.exp(-dist**2 / (2 * (radius * 1.5)**2))

        angle = random.uniform(0, 2 * np.pi)
        radial_factor = intensity * wrinkle_intensity * influence

        displacement_x = radial_factor * (
            np.cos(angle + dist / radius) * np.sin(dist / radius * 2) +
            0.2 * np.sin(2 * angle) * np.exp(-dist / (radius * 0.8))
        )

        displacement_y = radial_factor * (
            np.sin(angle + dist / radius) * np.sin(dist / radius * 2) +
            0.2 * np.cos(2 * angle) * np.exp(-dist / (radius * 0.8))
        )

        x_source += displacement_x
        y_source += displacement_y

    result = np.zeros_like(image)
    for c in range(image.shape[2]):
        result[..., c] = map_coordinates(
            image[..., c], [y_source, x_source], order=1, mode='constant', cval=255
        )

    displacement_x = x_source - x_dest
    displacement_y = y_source - y_dest

    return result.astype(np.uint8), displacement_x, displacement_y


def deform_point_wrinkle(x, y, displacement_x, displacement_y, width, height):
    """Apply crumpling deformation to a point"""
    x = np.clip(x, 0, width - 1)
    y = np.clip(y, 0, height - 1)

    x_int, y_int = int(x), int(y)
    x_frac, y_frac = x - x_int, y - y_int

    x_int = min(x_int, displacement_x.shape[1] - 1)
    y_int = min(y_int, displacement_x.shape[0] - 1)

    if x_int < displacement_x.shape[1] - 1 and y_int < displacement_x.shape[0] - 1:
        dx_00 = displacement_x[y_int, x_int]
        dx_01 = displacement_x[y_int, x_int + 1]
        dx_10 = displacement_x[y_int + 1, x_int]
        dx_11 = displacement_x[y_int + 1, x_int + 1]

        dy_00 = displacement_y[y_int, x_int]
        dy_01 = displacement_y[y_int, x_int + 1]
        dy_10 = displacement_y[y_int + 1, x_int]
        dy_11 = displacement_y[y_int + 1, x_int + 1]

        dx = (dx_00 * (1 - x_frac) * (1 - y_frac) +
              dx_01 * x_frac * (1 - y_frac) +
              dx_10 * (1 - x_frac) * y_frac +
              dx_11 * x_frac * y_frac)

        dy = (dy_00 * (1 - x_frac) * (1 - y_frac) +
              dy_01 * x_frac * (1 - y_frac) +
              dy_10 * (1 - x_frac) * y_frac +
              dy_11 * x_frac * y_frac)
    else:
        dx = displacement_x[y_int, x_int]
        dy = displacement_y[y_int, x_int]

    x_deformed = x + dx
    y_deformed = y + dy

    x_deformed = np.clip(x_deformed, 0, width - 1)
    y_deformed = np.clip(y_deformed, 0, height - 1)

    return x_deformed, y_deformed


def deform_keypoints_wrinkle(keypoints, displacement_x, displacement_y, width, height):
    """Apply crumpling transformation to keypoints"""
    new_keypoints = []
    for i in range(0, len(keypoints), 3):
        x, y, v = keypoints[i], keypoints[i + 1], keypoints[i + 2]
        if v > 0:
            x_def, y_def = deform_point_wrinkle(x, y, displacement_x, displacement_y, width, height)
            new_keypoints.extend([x_def, y_def, v])
        else:
            new_keypoints.extend([x, y, v])
    return new_keypoints


Please fill in "big_dataset = " by specifying the path to your dataset

In [None]:
big_dataset = 'DATASET'

for element in os.listdir(big_dataset):
    if element.startswith('.'):
        continue

    dataset = os.path.join(big_dataset, element)
    annotations_path = os.path.join(dataset, 'annotations.json')

    if not os.path.exists(annotations_path):
        continue

    try:
        with open(annotations_path, 'r') as f:
            data = json.load(f)
    except Exception as e:
        continue

    try:
        X = KeypointDataset(coco_json=annotations_path, img_dir=dataset)
        liste_identifiants = X.liste_id

        if not liste_identifiants:
            continue

       
        new_image_id = max([img['id'] for img in data.get('images', [])], default=0)
        new_ann_id = max([ann['id'] for ann in data.get('annotations', [])], default=0)

        for id in liste_identifiants:
            try:
                id_image = X.id_to_image_id[id]
                image_name = X.img_id_to_file[id_image]
                keypoints = X.id_to_keypoints[id]
                full_image_name = os.path.join(dataset, image_name)

                if not os.path.exists(full_image_name):
                    continue

                
                img = Image.open(full_image_name).convert("RGB")
                img_np = np.array(img)
                base_name, ext = os.path.splitext(image_name)

                
                new_image_id += 1
                new_ann_id += 1

                
                curvature = 0.2

                
                img_curved, x_source, y_source = apply_curvature_transform(img_np, curvature)
                kps_curved = deform_keypoints(keypoints, img_np.shape[0], img_np.shape[1], curvature)
                kps_curved = validate_keypoints(kps_curved, 512, 512)

                
                filename_curved = f"curved_{base_name}{ext}"
                if save_transformed_image(img_curved, dataset, filename_curved):
                    
                    num_keypoints_curved = len([v for v in kps_curved[2::3] if v > 0])
                    x_min, y_min, x_max, y_max, area = calculate_bbox_from_keypoints(kps_curved)

                   
                    data['images'].append({
                        "id": new_image_id,
                        "file_name": filename_curved,
                        "width": 512,
                        "height": 512
                    })

                    data['annotations'].append({
                        "id": new_ann_id,
                        "image_id": new_image_id,
                        "category_id": 1,
                        "keypoints": kps_curved,
                        "num_keypoints": num_keypoints_curved,
                        "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
                        "area": area,
                        "iscrowd": 0,
                        "segmentation": []
                    })

                
                new_image_id += 1
                new_ann_id += 1

                
                wrinkle_intensity = 0.3
                num_wrinkles = 3 

                
                img_wrinkled, displacement_x, displacement_y = apply_wrinkle_transform(
                    img_np, wrinkle_intensity, num_wrinkles
                )
                kps_wrinkled = deform_keypoints_wrinkle(
                    keypoints, displacement_x, displacement_y, img_np.shape[1], img_np.shape[0]
                )
                kps_wrinkled = validate_keypoints(kps_wrinkled, 512, 512)

                
                filename_wrinkled = f"crumpling_{base_name}{ext}"
                if save_transformed_image(img_wrinkled, dataset, filename_wrinkled):
                    
                    num_keypoints_wrinkled = len([v for v in kps_wrinkled[2::3] if v > 0])
                    x_min, y_min, x_max, y_max, area = calculate_bbox_from_keypoints(kps_wrinkled)

                    
                    data['images'].append({
                        "id": new_image_id,
                        "file_name": filename_wrinkled,
                        "width": 512,
                        "height": 512
                    })

                    data['annotations'].append({
                        "id": new_ann_id,
                        "image_id": new_image_id,
                        "category_id": 1,
                        "keypoints": kps_wrinkled,
                        "num_keypoints": num_keypoints_wrinkled,
                        "bbox": [x_min, y_min, x_max - x_min, y_max - y_min],
                        "area": area,
                        "iscrowd": 0,
                        "segmentation": []
                    })


            except Exception as e:
                print(f"Erreur lors du traitement de l'ID {id}: {e}")
                continue

       
        with open(annotations_path, 'w') as f:
            json.dump(data, f, indent=4)



    except Exception as e:
        print(f"Erreur lors du traitement du dataset {dataset}: {e}")
        continue



You now have a dataset of the same format but containing more data, having applied transformations to your initial images and annotations.