# Import packages



In [117]:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms, datasets, models
from torch.utils.data import Dataset
from skimage import io
from PIL import Image
import pandas as pd
import numpy as np
import os
import csv
import torch.nn as nn
import time
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler

# Define data transforms


In [97]:
# Define the transform 
train_transform = transforms.Compose([
        transforms.Resize((224,224)),             # takes PIL image as input and outputs PIL image
        transforms.ToTensor(),              # takes PIL image as input and outputs torch.tensor
        transforms.Normalize(mean=[0.4280, 0.4106, 0.3589],  # takes tensor and outputs tensor
                             std=[0.2737, 0.2631, 0.2601]),  # see next step for mean and std
    ])

valid_transform = transforms.Compose([ 
        transforms.Resize((224,224)),             
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4280, 0.4106, 0.3589],
                             std=[0.2737, 0.2631, 0.2601]), 
    ])

test_transform = transforms.Compose([
        transforms.Resize((224,224)),             
#         transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4280, 0.4106, 0.3589],
                             std=[0.2737, 0.2631, 0.2601]), 
    ])

In [98]:
# import gdown

# url = 'https://drive.google.com/file/d/1eQkUZLWuwnmfWNBjldV6BKgWG5RiR7Ji/view?usp=sharing'
# output = 'dataset_folder.zip'
# gdown.download(id='1eQkUZLWuwnmfWNBjldV6BKgWG5RiR7Ji', output=output, quiet=False)

In [99]:
# import zipfile
# with zipfile.ZipFile('/home/jupyter/dataset_folder.zip', 'r') as zip_ref:
#     zip_ref.extractall('/home/jupyter/')

# Prepare the dataset

In [100]:
class dataset(Dataset):

    def __init__(self, annotations, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the frames.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        # self.annotations['query'] = None
        self.annotations = annotations
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.annotations) 
    
    def __getitem__(self, idx): 
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        if idx == len(self.annotations):
          return blank_data()
        
        # File reader adjusted for iput files with names 0.jpg, 1.jpg, ..., 200.jpg
        image = io.imread(os.path.join(self.root_dir, str(idx) + '.jpg')) 
        query = np.array(self.annotations[0][idx])
        score_annotations = self.annotations['score'][idx] 
        score_annotations = np.array([score_annotations])

        score_annotations = score_annotations.astype('float').reshape(-1, )
        
        sample = {'image': image, 'query': query, 'score_annotations': score_annotations}
        if self.transform:
            sample['image'] = self.transform(Image.fromarray(sample['image']))
            sample['score_annotations'] = torch.from_numpy(sample['score_annotations'])
        return sample

In [101]:
def blank_data():
  blank_image = torch.FloatTensor(np.loadtxt('./dataset/Frames_for_code/blank_image.txt').reshape((3,224,224)))
  blank_query = np.array(['0', '0', '0', '0', '0', '0', '0', '0'])
  blank_score = torch.FloatTensor([0])
  blank_data = {'image': blank_image, 'query': blank_query, 'score_annotations': blank_score}
  return blank_data

In [102]:
def get_dataset(case):

    if case == 'train':

        with open('./dataset/encodings/train_dataset_with_query_embedding2.csv', 'r') as csvfile:
            so = csv.reader(csvfile, delimiter=',', quotechar='"')
            so_train_data = []
            for row in so:
                so_train_data.append(row)

        train_scores = pd.read_csv('./dataset/relevance_scores/majority_vote_train.csv', names=['score'])
        train_root_dir = './dataset/Frames_for_code/TrainImages'

        train_annotations = pd.concat([train_scores, pd.Series(so_train_data)], axis=1, )
        train_dataset = dataset(train_annotations, train_root_dir)
        transform_train_data = dataset(train_annotations, train_root_dir, train_transform)

        return train_dataset, transform_train_data

    elif case == 'val':

        with open('./dataset/encodings/val_dataset_with_query_embedding.csv', 'r') as csvfile:
            so_val = csv.reader(csvfile, delimiter=',', quotechar='"')
            so_val_data = []
            for row in so_val:
                so_val_data.append(row)

        val_scores = pd.read_csv('./dataset/relevance_scores/majority_vote_val.csv', names=['score'])
        val_root_dir = './dataset/Frames_for_code/ValImages'

        val_annotations = pd.concat([val_scores, pd.Series(so_val_data)], axis=1, )
        val_dataset = dataset(val_annotations, val_root_dir)
        transform_val_data = dataset(val_annotations, val_root_dir, valid_transform)

        return val_dataset, transform_val_data

    elif case == 'test':

        with open('./dataset/encodings/test_dataset_with_query_embedding.csv', 'r') as csvfile:
            so_test = csv.reader(csvfile, delimiter=',', quotechar='"')
            so_test_data = []
            for row in so_test:
                so_test_data.append(row)

        test_scores = pd.read_csv('./dataset/relevance_scores/majority_vote_test.csv', names=['score'])
        test_root_dir = './dataset/Frames_for_code/TestImages'

        test_annotations = pd.concat([test_scores, pd.Series(so_test_data)], axis=1, )
        test_dataset = dataset(test_annotations, test_root_dir)
        transform_test_data = dataset(test_annotations, test_root_dir, test_transform)

        return test_dataset, transform_test_data

# Query embedding

In [103]:
def query_embedding(queries, dictionary, max_length):
    one_hot = encode_queries_index(queries, dictionary)

    one_hot_same_length = []
    for query in one_hot:
        if len(query) < max_length:
            query = [*query, *np.zeros(max_length -len(query))]
        else:
            query = query[:max_length]
            
        one_hot_same_length.append(query)
        
    return torch.Tensor(one_hot_same_length) # return a stack of tensors.

def encode_queries_index(queries, dictionary):
    encoded_queries = []
    for query in queries:
        query_words = []
        for word in query.split(" "):
            query_words.append(dictionary.index(word))
        encoded_queries.append(query_words)
        
    return encoded_queries

def get_query_embeddings():
    # Create the dictionary/ bag of words
    d = open(r"./dataset/titles/sorted_title_words.csv","r")
    d_read = csv.reader(d)
    dictionary = []
    for row in d_read:
        dictionary.append(row[0])

    # Create embeddings of the training data titles
    q_train = open(r"./dataset/queries/train_queries.csv","r")
    q_train_read = csv.reader(q_train)
    queries_train = []
    for row in q_train_read:
        queries_train.append(row[0])
    queries_train = queries_train[1:]
    query_train_embeddings = query_embedding(queries_train, dictionary, 8)

    # Create embeddings of the validation data titles
    q_val = open(r"./dataset/queries/val_queries.csv","r")
    q_val_read = csv.reader(q_val)
    queries_val = []
    for row in q_val_read:
        queries_val.append(row[0])
    queries_val = queries_val[1:]
    query_val_embeddings = query_embedding(queries_val, dictionary, 8)

    return query_train_embeddings, query_val_embeddings


# Basic Module

In [104]:
# coding: utf8
class BasicModule(torch.nn.Module):
   '''
   封装了nn.Module，主要提供save和load两个方法
   '''
   def __init__(self,opt=None):
       super(BasicModule,self).__init__()
       self.model_name = str(type(self)) # 模型的默认名字

   def load(self, path):
       '''
       可加载指定路径的模型
       '''
       self.load_state_dict(torch.load(path, map_location=torch.device('cpu')))

   def save(self,name=None):
       '''
       保存模型，默认使用“模型名字+时间”作为文件名，
       如AlexNet_0710_23:57:29.pth
       '''

       if name is None:
           prefix = '/checkpoints/' + self.model_name + '_'
           name = time.strftime(prefix + '%m%d_%H:%M:%S.pth')
       torch.save(self.state_dict(), name)
       return name

# Define network

In [105]:
class QVSmodel(BasicModule):

    def __init__(self):
        super(QVSmodel, self).__init__()
        
        # self.model = resnet34(pretrained='imagenet')
        self.model = models.resnet34(pretrained=True, progress=True) 
        self.fc1 = torch.nn.Linear(512, 4)
        
        self.fc_text = torch.nn.Linear(8, 512)

    def forward(self, image, text):
        image = self.model.conv1(image)
        image = self.model.bn1(image)
        image = self.model.relu(image)
        image = self.model.maxpool(image)
        image = self.model.layer1(image)
        image = self.model.layer2(image)
        image = self.model.layer3(image)
        image = self.model.layer4(image)     
    
        image = F.avg_pool2d(image, 7)
        
        # reshape image
        image = image.view(image.size(0), -1)
    
        text = F.relu(self.fc_text(text))
        
        #Combine image and text by element-wise multiplication. The output dimension is still (1, 512).
        t1 = torch.mul(image, text)

        #Computes the second fully connected layer
        relevance_class_prediction = self.fc1(t1)
        
        return relevance_class_prediction

# Model training loop

In [106]:
def trainNet(model, train_dataset, val_dataset, n_epochs, learning_rate, betas, eps):
    
    # For GPU
    net = model.cuda()
    net = net.float()
    net.train()    
    
    #Get data
    data_loader = train_dataset 

    loss = torch.nn.CrossEntropyLoss()

    optimizer = optim.Adam(net.parameters(), lr = learning_rate, betas=betas, eps=eps)
    
    training_start_time = time.time()

    accuracies = []
    
    for epoch in range(n_epochs):
        
        count_relevance = 0
        running_loss = 0.0
        start_time = time.time()  
        total_train_loss = 0
        
        for i_batch in range(len(data_loader)-1):
    
            #Get inputs
            sample_batched = data_loader[i_batch]
            inputs, query, labels = sample_batched['image'], sample_batched['query'], \
            sample_batched['score_annotations']  
            
            labels_relevance = labels[:]
            inputs = inputs.cuda()
            labels_relevance = labels_relevance.cuda()
            
            encoding = torch.from_numpy(query.astype(float))
            encoding = encoding.cuda()

            outputs = model(inputs[None, ...].float(), encoding.float())
            outputs = outputs.cuda()

            loss_size = loss(outputs, labels_relevance.long())
                        
            loss_size.backward()
            optimizer.step()  
            optimizer.zero_grad()
   
            running_loss += loss_size.item()
    
            total_train_loss += loss_size.item()  
           
            #Compute accuracy
            _, preds = torch.max(outputs, dim = 1)
            num_correct_relevance = torch.sum(labels_relevance.long() == preds.long())
            count_relevance += num_correct_relevance.item()
        
        
            # print("Epoch {}, {:d}% \t train_loss_{}_batch: {:.4f} \t took: {:.4f}s".format(
            #             epoch+1, int(100 * (i_batch+1) / len(data_loader)), i_batch+1, running_loss, time.time() - start_time))
            
            if(i_batch % 150 == 0):
                print("Epoch {}: {:.1f} % training done.".format(epoch+1,i_batch/len(data_loader)*100))
                
            running_loss = 0.0
            start_time = time.time()
        
        train_acc = (100*float(count_relevance)/(len(data_loader)))
        print("Training accuracy = {:.4f} for epoch {}".format(train_acc, epoch +1))
        
        val_acc = validate(net, val_dataset)
        print("Validation accuracy = {:.4f} for epoch {}".format(val_acc, epoch +1))
        accuracies.append([train_acc,val_acc]) 
        
        net.train()

    print("Training finished, took {:.4f}s".format(time.time() - training_start_time))
    return net, accuracies

# Model validation

In [107]:
def validate(model, val_loader):       
    model.eval() # set to eval mode to avoid batchnorm
    with torch.no_grad(): # avoid calculating gradients
        correct, total = 0, 0
        count_relevance = 0
        for i in range(len(val_loader)):
            sample_batched = val_loader[i]
            images, query, labels = sample_batched['image'], sample_batched['query'], \
            sample_batched['score_annotations']

            labels = labels[:]
            images = images.cuda()
            labels = labels.cuda()

            encoding = torch.from_numpy(query.astype(float))
            encoding = encoding.cuda()     
            outputs = model(images[None, ...].float(), encoding.float())
            
            #Compute accuracy
            _, preds = torch.max(outputs, dim = 1)
            num_correct_relevance = torch.sum(labels.long() == preds.long())
            count_relevance += num_correct_relevance.item()

            if(i % 50 == 0):
                print("{:.1f} % validation done.".format(i/len(val_loader)*100))
        acc_relevance = (100*float(count_relevance)/(len(val_loader)))
    return acc_relevance

# Code Execution

In [None]:
### TO DO: Define model name
model_name = 'E_7_L_1e-4_e_1e-8v2' # change this every time

### TO DO: Specify training hyperparameters
n_epochs = 7
learning_rate = 1e-4
betas = (0.9, 0.999)
eps = 1e-08

### Train model
model = QVSmodel()
train_dataset, transform_train_data = get_dataset('train')
val_dataset, transform_val_data = get_dataset('val')

print(validate(model.cuda(), transform_val_data))

model, accuracies = trainNet(model, transform_train_data, transform_val_data, n_epochs, learning_rate, betas, eps)

### Save model and accuracies
path = './models/' + model_name + '.pth'
torch.save(model.state_dict(), path)
acc_file = open('./models/'+model_name+'.txt', 'w')
hypers = 'Learning rate = '+str(learning_rate)+', Betas = '+str(betas)+', Epsilon = '+str(eps)+'\n'
acc_file.write(hypers)
for i in range(len(accuracies)):
    accs = 'Epoch '+str(i+1)+' : Training accuracy is '+str(accuracies[i][0])+'%, validation accuracy is '+str(accuracies[i][1])+'% \n'
    acc_file.write(accs)
acc_file.close()

In [None]:
test_dataset, transform_test_data = get_dataset('test')
print(validate(model.cuda(), transform_test_data))