In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
from tqdm import tqdm

In [2]:
# Root directory for the dataset
root = "data/DressCode/"

# Map labels to their corresponding directories
DIRECTORY_MAP = ["upper_body", "lower_body", "dresses"]

CLASS_MAP = [0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2]

CLASS_TO_NAME = ["short sleeve top",
"long sleeve top",
"short sleeve outwear"
"long sleeve outwear",
"vest",
"sling",
"shorts",
"trousers",
"skirt",
"short sleeve dress",
"long sleeve dress",
"vest dress",
"sling dress"
]

In [3]:
# Read in the dataset
pairs = pd.read_csv(
    os.path.join(root, "train_pairs_cropped.txt"),
    delimiter="\t",
    header=None,
    names=["model", "garment", "label"],
)

pairs.head()

Unnamed: 0,model,garment,label
0,000000_0.jpg,000000_1.jpg,0
1,000001_0.jpg,000001_1.jpg,0
2,000002_0.jpg,000002_1.jpg,0
3,000003_0.jpg,000003_1.jpg,0
4,000004_0.jpg,000004_1.jpg,0


In [4]:
# Set the seed
torch.manual_seed(42)

# Set the device
device = "cuda" if torch.cuda.is_available() else "cpu"

device

'cuda'

In [5]:
# Load in the encoder network
encoder = models.resnet50()

# Load the weights
encoder.load_state_dict(torch.load("models\ResNet50 Cosine Similarity Margin=1\checkpoint-3.pt"))

# Send the model to the device
encoder = encoder.to(device)

# Define the transformations for the network
transforms = transforms.Compose([transforms.Resize((256, 192)), transforms.ToTensor()])

In [6]:
def calculate_features(image: Image) -> np.ndarray:
    """
    Get the features for a given image.
    """
    # Set the model to evaluation mode
    encoder.eval()

    # Resize & convert to tensor
    image = transforms(image)

    # Add a batch dimension
    image = image.unsqueeze(0).to(device)

    with torch.no_grad():
        return encoder(image).cpu()

In [7]:
# NUM_IMAGES x NUM_FEATURES
features = {"upper_body": [], "lower_body": [], "dresses": []}
feature_indices = {"upper_body": [], "lower_body": [], "dresses": []}

encoder.eval()

for i, (model, garment, label) in tqdm(
    enumerate(pairs.values),
    desc="Calculating Features",
    total=len(pairs),
    unit="image",
):
    # Load in the garment image
    garment_image = Image.open(
        os.path.join(root, DIRECTORY_MAP[label], "cropped_images", garment)
    ).convert("RGB")
    
    # Get the features
    features[DIRECTORY_MAP[label]].append(calculate_features(garment_image))
    feature_indices[DIRECTORY_MAP[label]].append(i)

features["upper_body"] = torch.cat(features["upper_body"])
features["lower_body"] = torch.cat(features["lower_body"])
features["dresses"] = torch.cat(features["dresses"])

feature_indices["upper_body"] = np.array(feature_indices["upper_body"])
feature_indices["lower_body"] = np.array(feature_indices["lower_body"])
feature_indices["dresses"] = np.array(feature_indices["dresses"])

features["upper_body"].shape, features["lower_body"].shape, features["dresses"].shape

Calculating Features: 100%|██████████| 48140/48140 [11:17<00:00, 71.01image/s] 


(torch.Size([13498, 1000]),
 torch.Size([7132, 1000]),
 torch.Size([27510, 1000]))

In [9]:
# Save the features
torch.save(features, "data/DressCode/train_features.pt")
torch.save(feature_indices, "data/DressCode/train_feature_indices.pt")