### Importing Required Libraries

In [None]:
from torchvision.datasets import DTD
from torchvision import transforms
from collections import Counter
import numpy as np
from PIL import Image
import os, glob

### Loading the Dataset

In [None]:
transform = transforms.Compose([transforms.ToTensor()])
train = DTD(root="data", split="train", download=True, transform=transform)
val = DTD(root="data", split="val", transform=transform)
test = DTD(root="data", split="test", transform=transform)

100%|██████████| 625M/625M [00:46<00:00, 13.5MB/s]


In [None]:
n_train, n_val, n_test = len(train), len(val), len(test)
total_images = n_train + n_val + n_test
num_classes = len(train.classes)

### Data Statistics

In [None]:
print("Dataset Summary:\n")
print("Number of Classes:", num_classes)
print("Total Images:", total_images)
print("Train Images:", n_train)
print("Validation Images:",n_val)
print("Test Images:", n_test)

Dataset Summary:

Number of Classes: 47
Total Images: 5640
Train Images: 1880
Validation Images: 1880
Test Images: 1880


### Class Distributions

In [None]:
all_labels = [y for i, y in train] + [y for i, y in val] + [y for i, y in test]
counts = Counter(all_labels)
print("Class Distribution:\n")
print("Average number of images per Class:", np.mean(list(counts.values())))
print("Min number of images per Class:", min(counts.values()))
print("Max number of images per Class:", max(counts.values()))

Class Distribution:

Average number of images per Class: 120.0
Min number of images per Class: 120
Max number of images per Class: 120


### Dimensions of Images

In [None]:
IMG_ROOT = "/content/data/dtd/dtd/images"
paths = sorted(glob.glob(os.path.join(IMG_ROOT, "*", "*.jpg")))
W, H = [], []
for p in paths:
  with Image.open(p) as im:
    w, h = im.size
  W.append(w); H.append(h)
W, H = np.array(W), np.array(H)
print("Image Dimensions:\n")
print(f"Average Size: {int(W.mean())}×{int(H.mean())}")
print(f"Min Size: {W.min()}×{H.min()}")
print(f"Max Size: {W.max()}×{H.max()}")

Image Dimensions:

Average Size: 496×451
Min Size: 271×231
Max Size: 900×778


### RGB Statistics

In [None]:
r_means, g_means, b_means, r_stds, g_stds, b_stds = [], [], [], [], [], []
for p in paths:
  img = np.array(Image.open(p).convert('RGB')) / 255.0
  r_means.append(img[:,:,0].mean()); g_means.append(img[:,:,1].mean()); b_means.append(img[:,:,2].mean())
  r_stds.append(img[:,:,0].std());  g_stds.append(img[:,:,1].std());  b_stds.append(img[:,:,2].std())

print("RGB Channel Statistics:\n")
print(f"Mean: {[round(float(np.mean(r_means)),3), round(float(np.mean(g_means)),3), round(float(np.mean(b_means)),3)]}")
print(f"Std: {[round(float(np.mean(r_stds)),3), round(float(np.mean(g_stds)),3), round(float(np.mean(b_stds)),3)]}")

RGB Channel Statistics:

Mean: [0.528, 0.471, 0.423]
Std: [0.18, 0.182, 0.178]
