In [None]:
# For Google Colaboratory
import sys, os
if 'google.colab' in sys.modules:

    # mount google drive
    from google.colab import drive
    drive.mount('/content/gdrive')
    path_to_file = '/content/gdrive/My Drive/CS5242/Project/' # Update this path depending on where data is uploaded on your Google Drive.
    print(path_to_file)
    # move to Google Drive directory
    os.chdir(path_to_file)
    !pwd

### Unzip files

In [None]:
!ls

# !tar -xzf "/content/gdrive/My Drive/CS5242/Project/CUB_200_2011.tgz" -C "/content/gdrive/My Drive/CS5242/Project/"

### Merge all Metadata 

In [None]:
import pandas as pd

images_file = './CUB_200_2011/images.txt'
train_test_split_file = './CUB_200_2011/train_test_split.txt'
classes_file = './CUB_200_2011/classes.txt'
image_class_labels_file = './CUB_200_2011/image_class_labels.txt'
bounding_boxes = './CUB_200_2011/bounding_boxes.txt'

# Load each file into a DataFrame
df_images = pd.read_csv(images_file, sep=" ", names=['image_id', 'image_name'], header=None)
df_train_test_split = pd.read_csv(train_test_split_file, sep=" ", names=['image_id', 'is_training_image'], header=None)
df_classes = pd.read_csv(classes_file, sep=" ", names=['class_id', 'class_name'], header=None)
df_image_class_labels = pd.read_csv(image_class_labels_file, sep=" ", names=['image_id', 'class_id'], header=None)
df_bound = pd.read_csv(bounding_boxes, sep=" ", names=['image_id', 'bounding_x','bounding_y','bounding_width','bounding_height'], header=None)

# Merge the DataFrames
df_merged = pd.merge(df_images, df_image_class_labels, on='image_id')
df_merged = pd.merge(df_merged, df_classes, on='class_id')
df_merged = pd.merge(df_merged, df_train_test_split, on='image_id')
df_merged = pd.merge(df_merged, df_bound, on='image_id')

# Merge bounding boxes

display(df_merged.sample(20))


### EDA

In [None]:
from PIL import Image
def get_shape(x):
  image_filename = x['image_name']
  image_path = f'./CUB_200_2011/images/{image_filename}'
  img = Image.open(image_path)
  width, height = img.size
  x['width'] = width
  x['height'] = height
  return x

df_merged = df_merged.apply(lambda x: get_shape(x), axis=1)

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
points = ax.scatter(df_merged.width, df_merged.height, color='blue', alpha=0.5, picker=True)
ax.set_title("Image Resolution")
ax.set_xlabel("Width", size=14)
ax.set_ylabel("Height", size=14)

- Training: 5994 (Classes are quite balanced, only 6 classes with 29, the rest 30)
- Testing: 5749


In [None]:
display(df_merged['class_name'].value_counts())

In [None]:
pd.set_option('display.max_rows',None)
display(df_merged['is_training_image'].value_counts())
print(df_merged[df_merged['is_training_image'] == 1].class_name.value_counts())

In [None]:
import cv2
import matplotlib.pyplot as plt
import matplotlib.patches as patches

sample_image = df_merged.sample(1)
sample_image_name = sample_image['image_name'].iloc[0]

# Accessing individual bounding box attributes
bbox_x = sample_image['bounding_x'].values[0]
bbox_y = sample_image['bounding_y'].values[0]
bbox_width = sample_image['bounding_width'].values[0]
bbox_height = sample_image['bounding_height'].values[0]

image = cv2.imread(f'./CUB_200_2011/images/{sample_image_name}')
plt.imshow(image)
print(image.shape)
rect = patches.Rectangle((bbox_x, bbox_y), bbox_width, bbox_height, linewidth=1, edgecolor='r', facecolor='none')

# Add the rect to the Axes
plt.gca().add_patch(rect)
plt.axis('off')
plt.show()

### Data augmentation
- After train-test split (Prevent leakage)
- Done on training set only, test set only resize

In [None]:
import torchvision.transforms as transforms
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.transforms import Lambda

# Custom transform to convert images to RGB
to_rgb = Lambda(lambda x: x.convert("RGB"))

# Define your transformations
transform = transforms.Compose([
    to_rgb,
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=45),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #Common practice to use standard normalize value from ImageNet
])

sample_image = df_merged.sample(1)
sample_image_name = sample_image['image_name'].iloc[0]

# Accessing individual bounding box attributes
bbox_x = sample_image['bounding_x'].values[0]
bbox_y = sample_image['bounding_y'].values[0]
bbox_width = sample_image['bounding_width'].values[0]
bbox_height = sample_image['bounding_height'].values[0]

# Load an image
image_path = f'./CUB_200_2011/images/{sample_image_name}'
image = Image.open(image_path)

# Apply the transformations
transformed_image = transform(image)

# Convert the transformed tensor to a NumPy array and transpose the dimensions
# from (C, H, W) to (H, W, C) for visualization
transformed_image = transformed_image.numpy().transpose((1, 2, 0))

cropped_image = image.crop((bbox_x, bbox_y, bbox_x+bbox_width, bbox_y+bbox_height))
transformed_cropped = transform(cropped_image).numpy().transpose((1, 2, 0))
# Setup the subplot
fig, axs = plt.subplots(1, 4, figsize=(20, 5))

# Display the original image
axs[0].imshow(image)
axs[0].set_title('Original Image')
axs[0].axis('off')  # Remove axis ticks and labels

# Display the transformed image
axs[1].imshow(transformed_image,aspect='auto')
axs[1].set_title('Transformed Image')
axs[1].axis('off')  # Remove axis ticks and labels

# Display the cropped image
axs[2].imshow(cropped_image,aspect='auto')
axs[2].set_title('Cropped Image')
axs[2].axis('off')  # Remove axis ticks and labels

# Display the transformed cropped image
axs[3].imshow(transformed_cropped,aspect='auto')
axs[3].set_title('Transformed Cropped Image')
axs[3].axis('off')  # Remove axis ticks and labels

plt.show()


### Finalised Preprocessing Steps
1. Resize to (224,224)
2. Training_set transformation: random horizontal flip and rotation

In [None]:
import os
import pandas as pd
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm  # for progress bar
import torchvision.transforms.functional as F

# Assuming df_merged is your DataFrame
# Ensure the output directory exists
output_dir = './augmented_images/'
os.makedirs(output_dir, exist_ok=True)

# Function to apply transformations including cropping to the bounding box
def apply_transforms(image, bbox, is_training):
    # Crop the image first
    image = F.crop(image, bbox['y'], bbox['x'], bbox['height'], bbox['width'])

    # Apply training or test transformations
    if is_training:
        image = train_transform(image)
    else:
        image = test_transform(image)

    return image

# Define transformations for training and test sets, without Resize here, since cropping is first
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=45),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Update column for augmented image names
df_merged['augmented_image_name'] = None

for index, row in tqdm(df_merged.iterrows(), total=df_merged.shape[0]):
    image_path = f'./CUB_200_2011/images/{row["image_name"]}'
    image = Image.open(image_path)

    # Accessing individual bounding box attributes directly from the row
    bbox = {
        'x': row['bounding_x'],
        'y': row['bounding_y'],
        'width': row['bounding_width'],
        'height': row['bounding_height']
    }

    # Apply transformations including cropping
    transformed_image = apply_transforms(image, bbox, row['is_training_image'])

    # Convert the transformed tensor to PIL Image to save it (if not already a PIL Image)
    if not isinstance(transformed_image, Image.Image):
        transformed_image = F.to_pil_image(transformed_image)

    # Save augmented image
    augmented_image_path = os.path.join(output_dir, row["image_name"])
    os.makedirs(os.path.dirname(augmented_image_path), exist_ok=True)
    transformed_image.save(augmented_image_path)

    # Update DataFrame with new image path
    df_merged.at[index, 'augmented_image_name'] = augmented_image_path


In [None]:
display(df_merged)
df_merged.to_csv('./meta_data.csv', index=False)
from google.colab import files
files.download('meta_data.csv')