In [8]:
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torch

from torchvision import datasets

from torchvision.datasets import Imagenette
import torchvision.transforms as transforms

import torchvision.transforms.functional as F
from tqdm import tqdm

import os
from PIL import Image

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
cd "/content/drive/MyDrive/Colab_Notebooks/ViTAR/Dataset"

/content/drive/MyDrive/Colab_Notebooks/ViTAR/Dataset


In [6]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)])

test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)])

train_data = Imagenette(root='data', split='train', download=True, transform=train_transforms)
test_val_data = Imagenette(root='data', split='val', download=False, transform=test_transforms)

test_data, val_data = torch.utils.data.random_split(test_val_data, [int(0.5 * len(test_val_data)) + len(test_val_data) % 2, int(0.5 * len(test_val_data))])

Downloading https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz to data/imagenette2.tgz


100%|██████████| 1.56G/1.56G [00:35<00:00, 43.9MB/s]


Extracting data/imagenette2.tgz to data


In [10]:
def save_binned_dataset(dataset, bins, output_dir):
    """
    Perform a single pass through the dataset to preprocess and save binned datasets.

    Args:
        dataset: A PyTorch dataset (e.g., ImageFolder).
        bins: List of resolution bins to categorize the images.
        output_dir: Path to save the dataset organized by bins.

    Returns:
        None. The function saves resized images and labels to disk.
    """
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    # Create directories for each bin
    bin_dirs = {bin_size: os.path.join(output_dir, f"bin_{bin_size}") for bin_size in bins}
    for bin_dir in bin_dirs.values():
        os.makedirs(bin_dir, exist_ok=True)

    # Function to find the closest bin
    def find_closest_bin(resolution, bins):
        return min(bins, key=lambda x: abs(x - resolution))

    # Single pass through the dataset
    for idx in tqdm(range(len(dataset))):
        image, label = dataset[idx]

        # Ensure the image is a PIL.Image for resizing
        if isinstance(image, torch.Tensor):
            image = F.to_pil_image(image)

        resolution = max(image.size[1], image.size[0])  # Width, Height for PIL.Image
        closest_bin = find_closest_bin(resolution, bins)

        # Resize the image to the bin's resolution
        transform_resize = transforms.Compose([
            transforms.Resize((closest_bin, closest_bin)),  # Resize to square
            transforms.ToTensor()
        ])
        image_resized = transform_resize(image)

        # Save the resized image
        bin_dir = bin_dirs[closest_bin]
        image_path = os.path.join(bin_dir, f"{idx}.png")
        image_resized_pil = transforms.ToPILImage()(image_resized)
        image_resized_pil.save(image_path)

        # Save the label
        with open(os.path.join(bin_dir, "labels.txt"), "a") as label_file:
            label_file.write(f"{idx}.png {label}\n")

    print("Binning and saving completed.")


In [None]:
# Example Usage
# Define bins
bins = [224, 448, 640, 896, 1280, 1920, 2800, 4032]


# Perform single-pass binning and saving train
output_dir = "./binned_dataset/train"
save_binned_dataset(train_data, bins, output_dir)

  4%|▎         | 332/9469 [01:04<16:35,  9.18it/s]

In [None]:
# Perform single-pass binning and saving val
output_dir = "./binned_dataset/val"
save_binned_dataset(val_data, bins, output_dir)

In [None]:
# Perform single-pass binning and saving test
output_dir = "./binned_dataset/test"
save_binned_dataset(test_data, bins, output_dir)