In [1]:
# Imports here
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter 
import csv
from tqdm import tqdm


from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
data_dir = './dataset'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'

with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

# Define your transforms for the training, validation, and testing sets
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])

validation_transforms = transforms.Compose([transforms.Resize(256),
                                            transforms.CenterCrop(224),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                                 [0.229, 0.224, 0.225])])

trainset = torchvision.datasets.ImageFolder(root=train_dir, transform=train_transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=0)

testset = torchvision.datasets.ImageFolder(root=valid_dir, transform=test_transforms)
validloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=0)


In [3]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 56 * 56, 512)  # Adjusted this line
        self.fc2 = nn.Linear(512, 102)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [12]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from PIL import Image


class TripletDataset(Dataset):
    def __init__(self, data_dir, transform):
        self.data_dir = data_dir
        self.transform = transform
        self.labels = list(range(len(self.data_dir)))

    def __len__(self):
        return len(self.data_dir)

    def __getitem__(self, idx):
        anchor, anchor_label = self.data_dir[idx]
        positive_idx = np.random.choice(len(self.data_dir))
        while self.labels[positive_idx] != anchor_label:
            positive_idx = np.random.choice(len(self.data_dir))
        positive, _ = self.data_dir[positive_idx]

        negative_idx = np.random.choice(len(self.data_dir))
        while self.labels[negative_idx] == anchor_label:
            negative_idx = np.random.choice(len(self.data_dir))
        negative, _ = self.data_dir[negative_idx]

        # Convert the tensors to PIL Images
        anchor = Image.fromarray(anchor.mul(255).byte().numpy().transpose((1, 2, 0)))
        positive = Image.fromarray(positive.mul(255).byte().numpy().transpose((1, 2, 0)))
        negative = Image.fromarray(negative.mul(255).byte().numpy().transpose((1, 2, 0)))

        # Apply the transforms
        anchor = self.transform(anchor)
        positive = self.transform(positive)
        negative = self.transform(negative)

        return anchor, positive, negative



# Create triplet data loaders
triplet_trainset = TripletDataset(trainset, train_transforms)
triplet_trainloader = DataLoader(triplet_trainset, batch_size=32, shuffle=True, num_workers=0)

class TripletLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(TripletLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        distance_positive = (anchor - positive).pow(2).sum(1)
        distance_negative = (anchor - negative).pow(2).sum(1)
        loss = torch.clamp(distance_positive - distance_negative + self.margin, min=0.0).mean()
        return loss

# Create the CNN model
cnn_model = CNNModel()
cnn_model.to(device)

# Define the optimizer and loss function
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)
triplet_loss = TripletLoss()

# Training loop
num_epochs = 3
for epoch in range(num_epochs):
    cnn_model.train()
    running_loss = 0.0

    for i, data in enumerate(triplet_trainloader, 0):
        anchor, positive, negative = data
        anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)

        optimizer.zero_grad()

        anchor_output = cnn_model(anchor)
        positive_output = cnn_model(positive)
        negative_output = cnn_model(negative)

        loss = triplet_loss(anchor_output, positive_output, negative_output)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch + 1}, Loss: {running_loss / (i + 1)}")

print("Finished Training")


KeyboardInterrupt: 