In [5]:
# !pip install matplotlib
# !pip install numpy
# !pip install torch
# !pip install tqdm
!pip install tensorboard
!pip install torchsummary



In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
from PIL import Image

import numpy as np
import os
import sys
import random
import torch

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import torchsummary
from tqdm import tqdm

In [4]:
random.seed(0)
torch.manual_seed(0)
np.random.seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
def load_data(train_size=1600, val_size=400, test_size=100, batch_size=32, use_gpu=True):
    data_path = "./dataset"
    data_files = glob('{}/**/*.jpg'.format(data_path))
    np.random.shuffle(data_files)
    
    train_dset = CustomDataset(data_files[:train_size], train_size, use_gpu=use_gpu)
    val_dset = CustomDataset(data_files[train_size: train_size + val_size], val_size, use_gpu=use_gpu)
    test_dset = CustomDataset(data_files[train_size + val_size:]test_size, use_gpu=use_gpu)
    train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dset, batch_size=batch_size, shuffle=False)

    return (train_dset, val_dset, test_dset), (train_loader, val_loader, test_loader)

class CustomDataset(Dataset):
    def __init__(self, data_files, size, img_size=224, use_gpu=True):
        self.size = size
        self.use_gpu = use_gpu
        self.data_files = data_files
        
    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        filename = data_files[idx]
        path_names = filename.split('/')
        identity = int(path_names[-2])
        img = plt.imread(filename)
        img = img / 255.0
        img = torch.from_numpy(img)
        img = img.type(torch.FloatTensor)
        if self.use_gpu:
            img = img.cuda()

        # Convert from (height, width, channels) to (channels, height, width)
        # as input to the network.
        # height, width, channels = img.shape
        # img = img.view((channels, height, width))
        # img = img.permute(2, 0, 1)
        return (img, identity)

In [None]:
class VGG16(nn.Module):
    def __init__(self, n_classes):
        super(VGG16, self).__init__()
        # conv layers: (in_channel size, out_channels size, kernel_size, stride, padding)
        self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, padding=1)

        self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, padding=1)

        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, padding=1)

        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, padding=1)
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, padding=1)
        self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, padding=1)

        # max pooling (kernel_size, stride)
        self.pool = nn.MaxPool2d(2, 2)

        # fully conected layers:
        self.fc6 = nn.Linear(7*7*512, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        #self.fc8 = nn.Linear(4096, 1000)
        self.fc8 = nn.Linear(4096, n_classes)

    def forward(self, x, training=True):
        x = F.relu(self.conv1_1(x))
        x = F.relu(self.conv1_2(x))
        x = self.pool(x)
        x = F.relu(self.conv2_1(x))
        x = F.relu(self.conv2_2(x))
        x = self.pool(x)
        x = F.relu(self.conv3_1(x))
        x = F.relu(self.conv3_2(x))
        x = F.relu(self.conv3_3(x))
        x = self.pool(x)
        x = F.relu(self.conv4_1(x))
        x = F.relu(self.conv4_2(x))
        x = F.relu(self.conv4_3(x))
        x = self.pool(x)
        x = F.relu(self.conv5_1(x))
        x = F.relu(self.conv5_2(x))
        x = F.relu(self.conv5_3(x))
        x = self.pool(x)
        x = x.view(-1, 7 * 7 * 512)
        x = F.relu(self.fc6(x))
        x = F.dropout(x, 0.5, training=training)
        x = F.relu(self.fc7(x))
        x = F.dropout(x, 0.5, training=training)
        x = self.fc8(x)
        return x

    def predict(self, x):
        # a function to predict the labels of a batch of inputs
        x = F.softmax(self.forward(x, training=False))
        return x

    def accuracy(self, x, y):
        # a function to calculate the accuracy of label prediction for a batch of inputs
        #   x: a batch of inputs
        #   y: the true labels associated with x
        prediction = self.predict(x)
        maxs, indices = torch.max(prediction, 1)
        acc = 100 * torch.sum(torch.eq(indices.float(), y.float()).float())/y.size()[0]
        return acc.cpu().data[0]

In [12]:
# Load Tensorboard
%load_ext tensorboard
#%tensorboard --logdir ./runs
%tensorboard --logdir ./runs --host localhost --port 9007
#%tensorboard --logdir ./runs --host 0.0.0.0 --port 9007
#%load_ext tensorboard %tensorboard --logdir /tf/notebooks/graphs --host 0.0.0.0

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
def train_model(model, optimizer, loaders, writer, num_epochs=10):
    train_loader, val_loader, test_loader = loaders
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        sys.stdout.flush()
        
        model.train()
        train_loss = 0
        nsamples = 0
        with tqdm(train_loader) as t:
            for i, (img, mask) in enumerate(t):
                t.set_description(f'Train Iter {i+1}/{len(train_loader)}')
                loss, metrics = take_training_step(img, mask, model, optimizer)
                train_loss += metrics
                nsamples += img.size(0)
                t.set_postfix(mb_loss=loss.item(), run_loss=train_loss/nsamples)
                writer.add_scalar('Loss/train', loss.item(), i + epoch * len(train_loader))

        test_model(model, "Val", val_loader, writer, epoch+1, max_show=3)
        print()

def test_model_loss(model, loader):
    model.eval()
    test_loss = 0
    nsamples = 0
    with torch.no_grad():
        for i, (img, mask) in enumerate(tqdm(loader)):
            outputs = model(img)
            loss, metric = calc_loss(outputs, mask)
            test_loss += metric
            nsamples += img.size(0)
        return test_loss, nsamples

def test_model(model, title, data_loader, writer, epoch, max_show=10):
    test_loss, nsamples = test_model_loss(model, data_loader)
    print(f"{title} Loss: {test_loss/nsamples}")

    count = 0
    for i, (img, mask) in enumerate(data_loader):
        outputs = model(img)
        for j in range(img.shape[0]):
            count += 1
            curr_img, curr_mask, curr_out = img[j], mask[j], outputs[j]
            curr_out = torch.sigmoid(curr_out) >= 0.5
            plot_img(curr_img, curr_mask, writer, epoch, pred=curr_out, torch=True, elem=j+i*img.shape[0])
            if count >= max_show:
                return

In [None]:
def pairwise_distance(embeddings, squared=False):
    """Compute the 2D matrix of distances between all the embeddings.

    Args:
        embeddings: tensor of shape (batch_size, embed_dim)
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        pairwise_distances: tensor of shape (batch_size, batch_size)
    """
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    #dot_product = tf.matmul(embeddings, tf.transpose(embeddings))
    dot_product = torch.matmul(embeddings, torch.transpose(embeddings, 0, 1))

    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    #square_norm = tf.diag_part(dot_product)
    square_norm = torch.diagonal(dot_product, 0)

    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    #distances = tf.expand_dims(square_norm, 0) - 2.0 * dot_product + tf.expand_dims(square_norm, 1)
    distances = torch.unsqueeze(square_norm, 0) - 2.0 * dot_product + torch.unsqueeze(square_norm, 1)
    
    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    #distances = tf.maximum(distances, 0.0)
    distances = torch.max(distances, torch.zeros_like(distances))
    distances = distances.float()

    if not squared:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        #mask = tf.to_float(tf.equal(distances, 0.0))
        #distances = distances + mask * 1e-16
        mask = (distances == 0.0).float()
        distances = distances + mask * 1e-16

        distances = torch.sqrt(distances)

        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
        distances = distances * (1.0 - mask)

    return distances

In [None]:
def batch_semihard_triplet_loss(labels, embeddings, margin=0.001, squared=False):
    """Build the triplet loss over a batch of embeddings.

    For each anchor, we get the hardest positive and hardest negative to form a triplet.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """
    # Get the pairwise distance matrix
    pairwise_dist = pairwise_distance(embeddings, squared=squared)

    # For each anchor, get the hardest positive
    # First, we need to get a mask for every valid positive (they should have same label)
    mask_anchor_positive = get_anchor_positive_triplet_mask(labels)
    mask_anchor_positive = mask_anchor_positive)

    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = torch.matmul(mask_anchor_positive, pairwise_dist)

    # shape (batch_size, 1)
    hardest_positive_dist = torch.reduce_max(anchor_positive_dist, axis=1, keepdims=True)

    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)
    mask_anchor_negative = get_anchor_negative_triplet_mask(labels)
    mask_anchor_negative = mask_anchor_negative.float()

    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    max_anchor_negative_dist = torch.reduce_max(pairwise_dist, axis=1, keepdims=True)
    anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (1.0 - mask_anchor_negative)

    # shape (batch_size,)
    hardest_negative_dist = torch.reduce_min(anchor_negative_dist, axis=1, keepdims=True)

    # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
    triplet_loss = torch.max(hardest_positive_dist - hardest_negative_dist + margin, 0.0)

    # Get final mean triplet loss
    triplet_loss = torch.reduce_mean(triplet_loss)

    return triplet_loss

In [None]:
class HardTripletLoss(nn.Module):
    """Hard/Hardest Triplet Loss
    (pytorch implementation of https://omoindrot.github.io/triplet-loss)
    For each anchor, we get the hardest positive and hardest negative to form a triplet.
    """
    def __init__(self, margin=0.1, hardest=False, squared=False):
        """
        Args:
            margin: margin for triplet loss
            hardest: If true, loss is considered only hardest triplets.
            squared: If true, output is the pairwise squared euclidean distance matrix.
                If false, output is the pairwise euclidean distance matrix.
        """
        super(HardTripletLoss, self).__init__()
        self.margin = margin
        self.hardest = hardest
        self.squared = squared

    def forward(self, embeddings, labels):
        """
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
        Returns:
            triplet_loss: scalar tensor containing the triplet loss
        """
        pairwise_dist = _pairwise_distance(embeddings, squared=self.squared)

        if self.hardest:
            # Get the hardest positive pairs
            mask_anchor_positive = _get_anchor_positive_triplet_mask(labels).float()
            valid_positive_dist = pairwise_dist * mask_anchor_positive
            hardest_positive_dist, _ = torch.max(valid_positive_dist, dim=1, keepdim=True)

            # Get the hardest negative pairs
            mask_anchor_negative = _get_anchor_negative_triplet_mask(labels).float()
            max_anchor_negative_dist, _ = torch.max(pairwise_dist, dim=1, keepdim=True)
            anchor_negative_dist = pairwise_dist + max_anchor_negative_dist * (
                    1.0 - mask_anchor_negative)
            hardest_negative_dist, _ = torch.min(anchor_negative_dist, dim=1, keepdim=True)

            # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
            triplet_loss = F.relu(hardest_positive_dist - hardest_negative_dist + 0.1)
            triplet_loss = torch.mean(triplet_loss)
        else:
            anc_pos_dist = pairwise_dist.unsqueeze(dim=2)
            anc_neg_dist = pairwise_dist.unsqueeze(dim=1)

            # Compute a 3D tensor of size (batch_size, batch_size, batch_size)
            # triplet_loss[i, j, k] will contain the triplet loss of anc=i, pos=j, neg=k
            # Uses broadcasting where the 1st argument has shape (batch_size, batch_size, 1)
            # and the 2nd (batch_size, 1, batch_size)
            loss = anc_pos_dist - anc_neg_dist + self.margin

            mask = _get_triplet_mask(labels).float()
            triplet_loss = loss * mask

            # Remove negative losses (i.e. the easy triplets)
            triplet_loss = F.relu(triplet_loss)

            # Count number of hard triplets (where triplet_loss > 0)
            hard_triplets = torch.gt(triplet_loss, 1e-16).float()
            num_hard_triplets = torch.sum(hard_triplets)

            triplet_loss = torch.sum(triplet_loss) / (num_hard_triplets + 1e-16)

        return triplet_loss


    def _pairwise_distance(x, squared=False, eps=1e-16):
        # Compute the 2D matrix of distances between all the embeddings.

        cor_mat = torch.matmul(x, x.t())
        norm_mat = cor_mat.diag()
        distances = norm_mat.unsqueeze(1) - 2 * cor_mat + norm_mat.unsqueeze(0)
        distances = F.relu(distances)

        if not squared:
            mask = torch.eq(distances, 0.0).float()
            distances = distances + mask * eps
            distances = torch.sqrt(distances)
            distances = distances * (1.0 - mask)

        return distances


    def _get_anchor_positive_triplet_mask(labels):
        # Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.

        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        indices_not_equal = torch.eye(labels.shape[0]).to(device).byte() ^ 1

        # Check if labels[i] == labels[j]
        labels_equal = torch.unsqueeze(labels, 0) == torch.unsqueeze(labels, 1)

        mask = indices_not_equal * labels_equal

        return mask


    def _get_anchor_negative_triplet_mask(labels):
        # Return a 2D mask where mask[a, n] is True iff a and n have distinct labels.

        # Check if labels[i] != labels[k]
        labels_equal = torch.unsqueeze(labels, 0) == torch.unsqueeze(labels, 1)
        mask = labels_equal ^ 1

        return mask


    def _get_triplet_mask(labels):
        """Return a 3D mask where mask[a, p, n] is True iff the triplet (a, p, n) is valid.
        A triplet (i, j, k) is valid if:
            - i, j, k are distinct
            - labels[i] == labels[j] and labels[i] != labels[k]
        """
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

        # Check that i, j and k are distinct
        indices_not_same = torch.eye(labels.shape[0]).to(device).byte() ^ 1
        i_not_equal_j = torch.unsqueeze(indices_not_same, 2)
        i_not_equal_k = torch.unsqueeze(indices_not_same, 1)
        j_not_equal_k = torch.unsqueeze(indices_not_same, 0)
        distinct_indices = i_not_equal_j * i_not_equal_k * j_not_equal_k

        # Check if labels[i] == labels[j] and labels[i] != labels[k]
        label_equal = torch.eq(torch.unsqueeze(labels, 0), torch.unsqueeze(labels, 1))
        i_equal_j = torch.unsqueeze(label_equal, 2)
        i_equal_k = torch.unsqueeze(label_equal, 1)
        valid_labels = i_equal_j * (i_equal_k ^ 1)

        mask = distinct_indices * valid_labels   # Combine the two masks

        return mask

In [3]:
def calc_loss(output, identities):
    #bce = F.binary_cross_entropy_with_logits(yhat, ytrue)
    #loss = bce.data.cpu().numpy()*yhat.size(0)
    #return bce, loss
    criterion = HardTripletLoss(margin=0.1).cuda()
    triplet_loss = criterion(output, identities)
    return triplet_loss

In [4]:
def take_training_step(img, identities, model, optimizer):
    optimizer.zero_grad() #zhli
    outputs = model(img)
    loss, metrics = calc_loss(outputs, identities)
    loss.backward()
    optimizer.step()
    return loss, metrics

In [5]:
def main():
    use_gpu = torch.cuda.is_available()
    print("Using GPU?", use_gpu)
    datasets, loaders = load_data(use_gpu=use_gpu)
    train_dset, val_dset, test_dset = datasets
    train_loader, val_loader, test_loader = loaders

    model = VGG16(512)
    device = torch.device('cuda' if use_gpu else 'cpu')
    model = model.to(device)
    torchsummary.summary(model, input_size=(3, 192, 192))
    sys.stdout.flush()

    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    writer = SummaryWriter()
    train_model(model, optimizer, loaders, writer, num_epochs=10)
    test_model(model, "Test", test_loader, writer, epoch=11)
    writer.close()

main()

Using GPU? True
