In [0]:
# -*- coding:utf-8 -*-
import tensorflow as tf
from tensorflow.python.layers.core import Dense
import tensorflow.contrib.keras as keras
from keras.preprocessing.sequence import pad_sequences
import os
import numpy as np
import pandas as pd
from pandas import DataFrame
import time
import random
from PIL import Image
import nltk
import copy
from keras.models import load_model
import modeling

Using TensorFlow backend.


In [0]:
# !pip install -r requirements.txt
# !rm -rf checkpoint4

In [0]:
# from zipfile import ZipFile
# # filename = "checkpoint.zip"
# # with ZipFile(filename,'r') as zip:
# #   zip.extractall()

# filename1 = "dataset.zip"
# with ZipFile(filename1,'r') as zip:
#   zip.extractall()
  
# filename = "system_comments.zip"
# with ZipFile(filename,'r') as zip:
#   zip.extractall()
  
# filename3 = "img.zip"
# with ZipFile(filename3,'r') as zip:
#   zip.extractall()

# print('Done')

In [0]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

In [0]:
random.seed(1)
np.random.seed(1)

# Useful Functions

In [0]:
def read_comments(file_name):
    with open(file_name,'r') as f:
        file_content = f.readlines()
    comments = []
    for line in file_content:
        comments.append(line[:-1].split())
    return comments

In [0]:
def build_vocab(file_name,min_num):
    with open(file_name,'r') as f:
        file_content = f.readlines()
    word_to_int = {}
    int_to_word = {}
    words_num = 0
    for line in file_content:
        line = line[:-1].split('\t')
        if int(line[2]) >= min_num:
            word_to_int[line[1]] = int(line[0])
            int_to_word[int(line[0])] = line[1] 
            words_num += 1
        else:
            break
    word_to_int['<PAD>'] = 0
    word_to_int['<UNK>'] = words_num+1
    word_to_int['<GO>'] = words_num+2
    word_to_int['<EOS>'] = words_num+3
    int_to_word[0] = '<PAD>'
    int_to_word[words_num+1] = '<UNK>'
    int_to_word[words_num+2] = '<GO>'
    int_to_word[words_num+3] = '<EOS>'
    return word_to_int,int_to_word

In [0]:
def convert_comments(comments,word_to_int,int_to_word):
    comments_to_int = []
    for comment in comments:
        comment_to_int = [word_to_int[word] if word_to_int.get(word) != None else word_to_int['<UNK>'] for word in comment]  
        comment_to_int.insert(0,word_to_int['<GO>'])
        comment_to_int.append(word_to_int['<EOS>'])
        comments_to_int.append(comment_to_int)
    return comments_to_int

In [0]:
def negative_samples(num_samples,toplist,downlist,combinationlist):
    sampledata = []
    num = 0
    while num < num_samples:
        top = random.sample(toplist,1)[0]
        down = random.sample(downlist,1)[0]
        if top+down not in combinationlist:
            sampledata.append((top,down,-1))
            num += 1
    return sampledata

In [0]:
def pad_batch(batch,pad_int):
    max_length = max([len(comment) for comment in batch])
    pad_batch = pad_sequences(batch,maxlen=max_length,value=pad_int,padding='post')
    return pad_batch

In [0]:
def get_img(imgidlist):
    result=[]
    for img_idx in imgidlist:
        img = Image.open('img/'+img_idx+'.jpg')
        img = np.array(img)
        img = img/255.0
        result.append(img)
    return result

In [0]:
def batch_to_input(batch,comments,topidlist,downidlist,pad_int):
    img1 = []#for top
    img2 = []#for down
    img1id = []
    img2id = []
    label = []
    sequence = []
    sequence_length = []
    weight = []
    
    for instance in batch:
        img1.append(instance[0])
        img2.append(instance[1])
#         img1=instance[0]
#         img2=instance[1]
        img1id.append(topidlist[instance[0]])
        img2id.append(downidlist[instance[1]])
        commentid = instance[2]
        if commentid == -1:
            label.append([1,0])
            weight.append(0)
        else:
            label.append([0,1])
            weight.append(1)
        sequence.append(comments[commentid])
        sequence_length.append(len(comments[commentid])-1)
    sequence = pad_batch(sequence,pad_int)
    sequence_input = sequence[:,:-1]
    sequence_output = sequence[:,1:]
    max_sequence_length = np.max(sequence_length)
    
    return np.array(img1),np.array(img2),np.array(img1id),np.array(img2id),np.array(label),sequence_input,sequence_output,sequence_length,max_sequence_length,np.array(weight)

In [0]:
def get_batches(data,batch_size,comments,toplist,downlist,combinationlist,topidlist,downidlist,pad_int):
    datacopy = copy.copy(data)
    datacopy.extend(negative_samples(len(datacopy),toplist,downlist,combinationlist))
    random.shuffle(datacopy)
    for batch_i in range(0,len(datacopy)//batch_size+1):
        start_i = batch_i*batch_size
        batch = datacopy[start_i:start_i+batch_size]          
        yield batch_to_input(batch,comments,topidlist,downidlist,pad_int)

In [0]:
def build_evaluation_batch(fixitem,itemlist,state,topidlist,downidlist):
    img1 = []
    img2 = []
    img1id = []
    img2id = []
    if state == 0:#top,downs
        for item in itemlist:
            img1.append(fixitem)
            img2.append(item)
            img1id.append(topidlist[fixitem])
            img2id.append(downidlist[item])
    if state == 1:#down,tops
        for item in itemlist:
            img1.append(item)
            img2.append(fixitem)
            img1id.append(topidlist[item])
            img2id.append(downidlist[fixitem])
    return np.array(img1),np.array(img2),np.array(img1id),np.array(img2id)

In [0]:

def id_seq_to_word_seq(id_seq,id_vocab,eos):
    index = 0
    while index < len(id_seq):
        if id_seq[index] == eos:
            break
        index += 1
    valid_id_seq = id_seq[:index+1]
    return ' '.join([id_vocab[id] for id in valid_id_seq])

In [0]:
def accuracy(label,prediction):
    return (label.argmax(axis=1) == prediction.argmax(axis=1)).sum()/float(len(label))

In [0]:
def prepare_evaluation(data_path,comments,int_to_word,word_to_int):
    with open(data_path,'r') as f:
        content = f.readlines()
    data = {}
    orderlist = []
    model_comments = {}
    labellist = {}
    query_number = 0
    for line in content:
        line = line[:-1].split('\t')
        if data.get(line[0]) != None:
            data[line[0]].append(line[1])
        else:
            data[line[0]] = [line[1]] 
            labellist[query_number] = {}
            query_number += 1
            orderlist.append(line[0])
        if int(line[2]) == 1:
            model_comments[(line[0],line[1])] = [id_seq_to_word_seq(comments[int(comment)],int_to_word,word_to_int['<EOS>']).split()[1:-1] for comment in line[3].split('|')]
            labellist[query_number-1][line[1]] = 1
        else:
            labellist[query_number-1][line[1]] = 0
    return data,orderlist,model_comments,labellist

In [0]:
def trec_evaluation(qrel_file_path,trec_file_path,trec):
    with open(trec_file_path,'w') as f:
        i = 0
        while i < len(trec):
            j = 0 
            while j < len(trec[i]):
                f.write(str(i)+' '+'Q0 '+trec[i][j][0]+' '+str(j+1)+' '+str(trec[i][j][1])+' '+'Exp'+'\n')
                j += 1
            i += 1   
    result = trec_eval(qrel_file_path,trec_file_path)
    print(result)
    return result

In [0]:
def bleu_evalaution(model_comments,system_comments,beamsearch):
    select = {}
    bleus = []
    if beamsearch:
        for combination,comments in system_comments.items():
            scores = []
            for comment in comments:
                scores.append(nltk.translate.bleu_score.sentence_bleu(model_comments[combination],comment,weights=[1.0]))
            scores = np.array(scores)
            bleus.append(scores.max())
            select[combination] = scores.argmax()#we only select the best for evaluation
    else:
        for combination,comment in system_comments.items():
            bleus.append(nltk.translate.bleu_score.sentence_bleu(model_comments[combination],comment,weights=[1.0]))  
    bleus = np.array(bleus)
    print(bleus.mean())
    return bleus.mean(),select

In [0]:
def auc_evaluation(labellist,trec):
    query_number = 0
    record = []
    while query_number < len(trec):
        negative = 0
        temp = []
        for combination in trec[query_number]:
            if labellist[query_number][combination[0]] == 1:
                temp.append(negative)
            else:
                negative += 1
        record.extend([(negative-val)/float(negative) for val in temp])
        query_number += 1
    auc = np.array(record).mean()
    print(auc)
    return auc

# Prepare Datasets

In [0]:
comments_path = 'dataset/text.dat'
vocab_path = 'dataset/vocab.dat'
min_num = 5

In [0]:
comments = read_comments(comments_path)
comments.append([])
comments

[['congrats', 'on', 'your', 'ts', '!'],
 ['thank', 'you', '!', '...', 'xoxo', '.'],
 ['thank', 'you', '!', 'xoxo', '.'],
 ['congrats', 'on', 'ts', '!', 'your', 'sets', 'are', 'amazing', '!'],
 ['absolutely', 'fabulous', 'congrats', 'on', 'tfs', '.'],
 ['congrats', 'on', 'this', 'amazing', 'top', 'set', '~', '.'],
 ['nice', '!', 'ts', 'congrats', '!'],
 ['outstanding',
  'casual',
  'outfit',
  'and',
  'stunning',
  'clothes',
  'congrats',
  'on',
  'fab',
  'top',
  'set',
  '.'],
 ['congratulations',
  'on',
  'your',
  'very',
  'pretty',
  'top',
  'fashion',
  'set',
  '.'],
 ['simple', '&', 'fab', '!', '<', '333', '.'],
 ['splendid', '!', 'congratulations', '!'],
 ['so', 'chic', 'yet', 'so', 'simplecongratzz', '.'],
 ['too', 'fashionista', 'for', 'me', '!'],
 ['adore', 'your', 'comments', ',', 'thank', 'you', '!', 'xoxo', '.'],
 ['thank',
  'you',
  'so',
  'much',
  'to',
  'everyone',
  '!',
  'i',
  'appreciate',
  'every',
  'comment',
  'and',
  'beautiful',
  'comment',
  

In [0]:
word_to_int,int_to_word = build_vocab(vocab_path,min_num)
vocab_size = len(word_to_int)
print(vocab_size)

16519


In [0]:
comments = convert_comments(comments,word_to_int,int_to_word)
comments

[[16517, 20, 24, 27, 73, 1, 16518],
 [16517, 23, 12, 1, 32, 120, 2, 16518],
 [16517, 23, 12, 1, 120, 2, 16518],
 [16517, 20, 24, 73, 1, 27, 56, 57, 25, 1, 16518],
 [16517, 121, 54, 20, 24, 377, 2, 16518],
 [16517, 20, 24, 8, 25, 59, 6, 147, 2, 16518],
 [16517, 50, 1, 73, 20, 1, 16518],
 [16517, 132, 100, 35, 3, 77, 547, 20, 24, 33, 59, 6, 2, 16518],
 [16517, 62, 24, 27, 37, 52, 59, 84, 6, 2, 16518],
 [16517, 170, 76, 33, 1, 34, 288, 2, 16518],
 [16517, 197, 1, 62, 1, 16518],
 [16517, 7, 44, 311, 7, 16516, 2, 16518],
 [16517, 55, 1797, 29, 64, 1, 16518],
 [16517, 103, 27, 138, 4, 23, 12, 1, 120, 2, 16518],
 [16517,
  23,
  12,
  7,
  36,
  28,
  295,
  1,
  9,
  225,
  301,
  216,
  3,
  26,
  216,
  120,
  13,
  2,
  16518],
 [16517, 25, 1, 20, 274, 1, 9, 234, 139, 178, 2, 13, 11, 2, 16518],
 [16517, 53, 1, 62, 16, 24, 73, 1, 16518],
 [16517, 835, 4, 2672, 2, 16518],
 [16517,
  62,
  24,
  481,
  97,
  462,
  59,
  6,
  431,
  4,
  7,
  85,
  29,
  12,
  2,
  162,
  12,
  260,
  1439,


In [0]:
toplist = []
topidlist = {}
with open('dataset/toplist.dat','r') as f:#in toplist, the first col is img_name of top, the second col is comments_index
    content = f.readlines()
for line in content:
    line = line.split('\t')
    toplist.append(line[0])
    topidlist[line[0]] = len(topidlist)
toplist

['100007206',
 '100015289',
 '100018253',
 '100018720',
 '100027085',
 '100049216',
 '100051783',
 '100058266',
 '100058372',
 '100060461',
 '100060516',
 '100060655',
 '100061551',
 '100061862',
 '100062091',
 '100062481',
 '100062898',
 '100063158',
 '100063339',
 '100063869',
 '100064184',
 '100064275',
 '100064286',
 '100064910',
 '100080519',
 '100080542',
 '10008186',
 '100082634',
 '100083743',
 '100085137',
 '100088182',
 '100089546',
 '100096054',
 '100096594',
 '100102776',
 '100119958',
 '100140031',
 '100159444',
 '100166343',
 '100168384',
 '100202211',
 '100210288',
 '100223900',
 '100224625',
 '100239810',
 '100251069',
 '100263571',
 '100264572',
 '100275339',
 '100293183',
 '100297922',
 '100297948',
 '100331655',
 '100332872',
 '100335564',
 '100337123',
 '100337278',
 '100339255',
 '100339471',
 '100339879',
 '100392486',
 '100409843',
 '100412542',
 '100427419',
 '100427650',
 '100428043',
 '100428440',
 '100438789',
 '100439416',
 '100440004',
 '10044681',
 '100457

In [0]:
topidlist

{'100007206': 0,
 '100015289': 1,
 '100018253': 2,
 '100018720': 3,
 '100027085': 4,
 '100049216': 5,
 '100051783': 6,
 '100058266': 7,
 '100058372': 8,
 '100060461': 9,
 '100060516': 10,
 '100060655': 11,
 '100061551': 12,
 '100061862': 13,
 '100062091': 14,
 '100062481': 15,
 '100062898': 16,
 '100063158': 17,
 '100063339': 18,
 '100063869': 19,
 '100064184': 20,
 '100064275': 21,
 '100064286': 22,
 '100064910': 23,
 '100080519': 24,
 '100080542': 25,
 '10008186': 26,
 '100082634': 27,
 '100083743': 28,
 '100085137': 29,
 '100088182': 30,
 '100089546': 31,
 '100096054': 32,
 '100096594': 33,
 '100102776': 34,
 '100119958': 35,
 '100140031': 36,
 '100159444': 37,
 '100166343': 38,
 '100168384': 39,
 '100202211': 40,
 '100210288': 41,
 '100223900': 42,
 '100224625': 43,
 '100239810': 44,
 '100251069': 45,
 '100263571': 46,
 '100264572': 47,
 '100275339': 48,
 '100293183': 49,
 '100297922': 50,
 '100297948': 51,
 '100331655': 52,
 '100332872': 53,
 '100335564': 54,
 '100337123': 55,
 '1

In [0]:
downlist = []
downidlist = {}
with open('dataset/downlist.dat','r') as f:#in downlist, the first col is img_name of down(i.e. bottom), the second col is comments_index
    content = f.readlines()
for line in content:
    line = line.split('\t')
    downlist.append(line[0])
    downidlist[line[0]] = len(downidlist)
downlist

['100001369',
 '100019428',
 '100029991',
 '100036499',
 '100055005',
 '100058405',
 '100066545',
 '10007116',
 '100075213',
 '100082635',
 '100084474',
 '100097599',
 '100097718',
 '100104639',
 '100139799',
 '100141532',
 '100151362',
 '100154431',
 '100170818',
 '100171960',
 '100180169',
 '100231133',
 '100263578',
 '100283176',
 '100297542',
 '100297601',
 '100332942',
 '100332952',
 '100339119',
 '100339682',
 '100339966',
 '100370701',
 '100388991',
 '100424521',
 '100457050',
 '100459830',
 '100461251',
 '100468083',
 '100469696',
 '100482963',
 '100501510',
 '100520481',
 '100521934',
 '100526918',
 '100527031',
 '100531160',
 '100532766',
 '100560071',
 '100585036',
 '100586769',
 '100589157',
 '100607739',
 '100633684',
 '100636718',
 '100637512',
 '100644693',
 '100658068',
 '100669306',
 '100670958',
 '100692287',
 '100693854',
 '100712448',
 '100715556',
 '100736096',
 '100749159',
 '100749216',
 '100750743',
 '100751496',
 '100751684',
 '100758616',
 '100784998',
 '10078

In [0]:
downidlist

{'100001369': 0,
 '100019428': 1,
 '100029991': 2,
 '100036499': 3,
 '100055005': 4,
 '100058405': 5,
 '100066545': 6,
 '10007116': 7,
 '100075213': 8,
 '100082635': 9,
 '100084474': 10,
 '100097599': 11,
 '100097718': 12,
 '100104639': 13,
 '100139799': 14,
 '100141532': 15,
 '100151362': 16,
 '100154431': 17,
 '100170818': 18,
 '100171960': 19,
 '100180169': 20,
 '100231133': 21,
 '100263578': 22,
 '100283176': 23,
 '100297542': 24,
 '100297601': 25,
 '100332942': 26,
 '100332952': 27,
 '100339119': 28,
 '100339682': 29,
 '100339966': 30,
 '100370701': 31,
 '100388991': 32,
 '100424521': 33,
 '100457050': 34,
 '100459830': 35,
 '100461251': 36,
 '100468083': 37,
 '100469696': 38,
 '100482963': 39,
 '100501510': 40,
 '100520481': 41,
 '100521934': 42,
 '100526918': 43,
 '100527031': 44,
 '100531160': 45,
 '100532766': 46,
 '100560071': 47,
 '100585036': 48,
 '100586769': 49,
 '100589157': 50,
 '100607739': 51,
 '100633684': 52,
 '100636718': 53,
 '100637512': 54,
 '100644693': 55,
 '1

In [0]:
combinationlist = set()
with open('dataset/combinationlist.dat','r') as f:#in combinationlist, the first col is img_name of top, the second col is img_name of down(i.e. bottom), the third col is comments_index    
    content = f.readlines()
for line in content:
    line = line[:-1].split('\t')
    combinationlist.add(line[0]+line[1])
combinationlist

{'14641084289186785',
 '151346669135888971',
 '5977886928912055',
 '1218726267787166',
 '178629307179089190',
 '147407117158445127',
 '168933916180140584',
 '160016482159005722',
 '6789111262502995',
 '126857726141250165',
 '6300431351217986',
 '103604650112572276',
 '112587741127378810',
 '4776860744328008',
 '120662936121506493',
 '182255607183718988',
 '184636188180827867',
 '8905225286865201',
 '126771072129831849',
 '10135562756696017',
 '205704003202623037',
 '103584888104424511',
 '101323507189842867',
 '152304339155441526',
 '143888408138085799',
 '121983541130964570',
 '185958910186056213',
 '148475980106440605',
 '212423082212370969',
 '129104156138651973',
 '187442763151870951',
 '201345548191641998',
 '204288294205058097',
 '7062423570624246',
 '82463080167382553',
 '9388182382186813',
 '149828055134219285',
 '5107252948124422',
 '9415701171372760',
 '9676279255713569',
 '5376157449487127',
 '14436164775158358',
 '162153518154892621',
 '10476888486262803',
 '189885734181225

In [0]:
# imglist = {}
# with open('dataset/datafull.dat','w') as f:
#   for img_idx in toplist:
#       img = Image.open('img/'+img_idx+'.jpg')
#       img = np.array(img)
#       img = img/255.0
#       imglist[img_idx] = img
#   for img_idx in downlist:
#       img = Image.open('img/'+img_idx+'.jpg')
#       img = np.array(img)
#       img = img/255.0
#       imglist[img_idx] = img
# imglist

# Build Model

In [0]:
def get_input():
    img1 = tf.placeholder(tf.float32,[None,224,224,3],'img1')
    img2 = tf.placeholder(tf.float32,[None,224,224,3],'img2')
    img1id = tf.placeholder(tf.int32,[None,],'img1id')
    img2id = tf.placeholder(tf.int32,[None,],'img2id')
    label = tf.placeholder(tf.float32,[None,2],'label')
    sequence_input = tf.placeholder(tf.int32,[None,None],name='sequence_input')
    sequence_output = tf.placeholder(tf.int32,[None,None],name='sequence_output')
    sequence_length = tf.placeholder(tf.int32,[None,],name='sequence_length')
    max_sequence_length = tf.placeholder(tf.int32,[],name='max_sequence_length')
    batch_size = tf.placeholder(tf.int32,[],name='batch_size')
    learning_rate = tf.placeholder(tf.float32,[],name='learning_rate')
    keep_prob = tf.placeholder(tf.float32,[],name='keep_prob')
#     ratio_c = tf.placeholder(tf.float32,[],name='ratio_c')
    ratio_g = tf.placeholder(tf.float32,[],name='ratio_g')
    weight = tf.placeholder(tf.float32,[None,],name='weight')
    flag = tf.placeholder(tf.bool,name='flag')
    return img1,img2,img1id,img2id,label,sequence_input,sequence_output,sequence_length,max_sequence_length,batch_size,learning_rate,keep_prob,ratio_g,weight,flag

In [0]:
def extractor(img):
    conv1 = keras.layers.Conv2D(filters=32,kernel_size=(3,3),strides=(1,1),padding='same',activation='relu',data_format='channels_last',kernel_initializer='glorot_normal')(img)
    conv2 = keras.layers.Conv2D(filters=32,kernel_size=(3,3),strides=(1,1),padding='same',activation='relu',data_format='channels_last',kernel_initializer='glorot_normal')(conv1)
    pool1 = keras.layers.MaxPool2D(pool_size=(16,16),padding='same')(conv1)
    #print(pool1)
    pool2 = keras.layers.MaxPool2D(pool_size=(16,16),padding='same')(conv2)
    #print(pool2)
    concat = keras.layers.Concatenate(axis=-1)([pool1,pool2])
    #print(concat)
    globalpool = keras.layers.GlobalAveragePooling2D()(concat)
    #print(globalpool)
    return concat,globalpool

In [0]:
def image_to_image_attention(conv,globalpool):#conv=[batch_size,14,14,64]，globalpool=[batch_size,64]  
    weights1 = tf.get_variable('weights1',shape=[64,64],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    weights2 = tf.get_variable('weights2',shape=[64,64],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    weights3 = tf.get_variable('weights3',shape=[64,1],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    attn_from = tf.matmul(globalpool,weights1)#attn_form=[batch_size,64]
    features = keras.layers.Reshape([-1,64])(conv)#features=[batch_size,196,64] 
    attn_to = tf.matmul(tf.reshape(features,[-1,64]),weights2)#tf.reshape(features,[-1,64])=[batch_size*196,64]，attn_to=[batch_size*196,64]
    attn_from  = tf.expand_dims(attn_from,1)#attn_from=[batch_size,1,64]
    attn_to = tf.reshape(attn_to,tf.shape(features))#attn_to=[batch_size,196,64] 
    attn_logit = tf.add(attn_from,attn_to)#attn_logit=[batch_size,196,64]
    attn_logit = tf.reshape(attn_logit,[-1,64])#attn_logit=[batch_size*196,64]
    attn_logit = tf.tanh(attn_logit)
    attn_weight = tf.matmul(attn_logit,weights3)#attn_weight=[batch_size*196,1]
    attn_weight = tf.reshape(attn_weight,shape=[tf.shape(conv)[0],tf.shape(conv)[1]*tf.shape(conv)[2]])#attn_weight=[batch_size,196]
    attn_weight = tf.nn.softmax(attn_weight,name='attention_img2img')   
    attn_weight = tf.expand_dims(attn_weight,-1)#attn_weight=[batch_size,196,1]
    attn_conv = tf.multiply(features,attn_weight)#attn_conv=[batch_size,196,64]
    attn_conv = tf.reduce_sum(attn_conv,axis=1)#attn_conv=[batch_size,64]
    return features,attn_conv#e=v^Ttanh(W1s+W2h)，a=softmax(e)

In [0]:
def img2vec(conv):
    extractor_output = keras.layers.Dense(300,activation='relu',kernel_initializer='glorot_normal')(conv)
    return extractor_output

In [0]:
def img_embedding(img1id,img2id):
    top_embedding_matrix = tf.get_variable('top_embedding_matrix',shape=[len(toplist),embedding_size],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    down_embedding_matrix = tf.get_variable('down_embedding_matrix',shape=[len(downlist),embedding_size],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    img1_embedding = tf.nn.embedding_lookup(top_embedding_matrix,img1id)
    img2_embedding = tf.nn.embedding_lookup(down_embedding_matrix,img2id)
    return img1_embedding,img2_embedding

In [0]:
def classifier(extractor_output,keep_prob):
    dense = keras.layers.Dense(256,activation='relu',kernel_initializer='glorot_normal')(extractor_output)
    dropout = tf.nn.dropout(dense,keep_prob)
    classifier_output = keras.layers.Dense(2,activation='softmax',kernel_initializer='glorot_normal')(dropout) 
    return classifier_output

In [0]:
def get_gru_cell(keep_prob):
    gru_cell = tf.contrib.rnn.GRUCell(512,kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    dropout_gru_cell = tf.contrib.rnn.DropoutWrapper(gru_cell,input_keep_prob=keep_prob,output_keep_prob=keep_prob,state_keep_prob=keep_prob)
    return dropout_gru_cell

In [0]:
def generator(sequence_input,initial_state,encoder_output,batch_size,sequence_length,max_sequence_length,vocab_size,embedding_size,keep_prob):
    embedding_matrix = tf.get_variable('embedding_matrix',shape=[vocab_size,embedding_size],initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    # generator_embed_sequence = tf.nn.embedding_lookup(embedding_matrix,sequence_input)
    
    config = modeling.BertConfig(vocab_size=vocab_size, hidden_size=embedding_size,num_hidden_layers=8, num_attention_heads=8, intermediate_size=1024)
    model = modeling.BertModel(config=config, is_training=True, input_ids=sequence_input)
    bert_out = model.get_sequence_output()

    generator_cell = tf.contrib.rnn.MultiRNNCell([get_gru_cell(keep_prob) for _ in range(1)])
    output_layer = Dense(vocab_size,kernel_initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    with tf.variable_scope('generator'):
        training_helper = tf.contrib.seq2seq.TrainingHelper(bert_out,sequence_length=sequence_length,time_major=False)
        #attention
        training_LuongAttention = tf.contrib.seq2seq.LuongAttention(num_units=512,memory=encoder_output,memory_sequence_length=None)   
        training_attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell=generator_cell,attention_mechanism=training_LuongAttention,attention_layer_size=512,alignment_history=False,output_attention=True)    
        training_attn_state = training_attn_cell.zero_state(batch_size,tf.float32).clone(cell_state=initial_state) 
        #attention
        training_decoder = tf.contrib.seq2seq.BasicDecoder(training_attn_cell,helper=training_helper,initial_state=training_attn_state,output_layer=output_layer)
        training_generator_output,training_generator_state,_ = tf.contrib.seq2seq.dynamic_decode(training_decoder,output_time_major=False,impute_finished=True,maximum_iterations=max_sequence_length)
    with tf.variable_scope('generator',reuse=True):
        start_tokens = tf.tile(tf.constant([word_to_int['<GO>']],dtype=tf.int32),[batch_size])
        #attention
        predicting_LuongAttention = tf.contrib.seq2seq.LuongAttention(num_units=512,memory=encoder_output,memory_sequence_length=None)
        predicting_attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell=generator_cell,attention_mechanism=predicting_LuongAttention,attention_layer_size=512,alignment_history=True,output_attention=True) 
        predicting_attn_state = predicting_attn_cell.zero_state(batch_size,tf.float32).clone(cell_state=initial_state) 
        #attention
        predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding_matrix,start_tokens,word_to_int['<EOS>'])
        predicting_decoder = tf.contrib.seq2seq.BasicDecoder(predicting_attn_cell,predicting_helper,predicting_attn_state,output_layer)
        predicting_generator_output,predicting_generator_state,_ = tf.contrib.seq2seq.dynamic_decode(predicting_decoder,output_time_major=False,impute_finished=True,maximum_iterations=max_sequence_length)
        attention_matrix = tf.identity(predicting_generator_state.alignment_history.stack(),name='attention_matrix')
        #print(attention_matrix)
    with tf.variable_scope('generator',reuse=True):
        start_tokens = tf.tile(tf.constant([word_to_int['<GO>']],dtype=tf.int32),[batch_size])
        beamsearch_initial_state = tf.contrib.seq2seq.tile_batch(initial_state,multiplier=3)
        #attention
        beamsearch_encoder_output = tf.contrib.seq2seq.tile_batch(encoder_output,multiplier=3)
        beamsearch_LuongAttention = tf.contrib.seq2seq.LuongAttention(num_units=512,memory=beamsearch_encoder_output,memory_sequence_length=None)
        beamsearch_attn_cell = tf.contrib.seq2seq.AttentionWrapper(cell=generator_cell ,attention_mechanism=beamsearch_LuongAttention,attention_layer_size=512,alignment_history=False,output_attention=True) 
        beamsearch_attn_state = beamsearch_attn_cell.zero_state(batch_size*3,tf.float32).clone(cell_state=beamsearch_initial_state)
        #attention
        beamsearch_predicting_decoder = tf.contrib.seq2seq.BeamSearchDecoder(beamsearch_attn_cell,embedding=embedding_matrix,start_tokens=start_tokens,end_token=word_to_int['<EOS>'],initial_state=beamsearch_attn_state,beam_width=3,output_layer=output_layer,length_penalty_weight=0.6)
        beamsearch_generator_output,beamsearch_generator_state,_ = tf.contrib.seq2seq.dynamic_decode(beamsearch_predicting_decoder,output_time_major=False,impute_finished=False,maximum_iterations=max_sequence_length)
    return training_generator_output,predicting_generator_output,beamsearch_generator_output   

In [0]:
# def loss(classifier_output,label,training_generator_output,sequence_output,sequence_length,max_sequence_length,ratio_c,ratio_g,weight,flag):
#     classifier_loss = tf.reduce_mean(tf.contrib.keras.losses.categorical_crossentropy(label,classifier_output),name='classifier_loss')
#     classifier_loss_freeze = tf.stop_gradient(classifier_loss)
#     classifier_loss = tf.where(flag,classifier_loss,classifier_loss_freeze)
#     training_logits = tf.identity(training_generator_output.rnn_output,name='training_logits')
#     masks = tf.sequence_mask(sequence_length,max_sequence_length,dtype=tf.float32,name='mask')  
#     generator_loss = tf.contrib.seq2seq.sequence_loss(training_logits,sequence_output,masks,average_across_timesteps=False,average_across_batch=False)  
#     generator_loss = tf.reduce_sum(generator_loss,axis=1)
#     generator_loss = tf.multiply(weight,generator_loss)
#     generator_loss = tf.reduce_mean(generator_loss,name='generator_loss')
#     classifier_loss = tf.multiply(ratio_c,classifier_loss)
#     generator_loss = tf.multiply(ratio_g,generator_loss)
#     tv = tf.trainable_variables()
#     reg_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
#     reg_loss_gen = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv if ('generator' in v.name)])
#     reg_loss = tf.where(flag,reg_loss,reg_loss_gen)
#     loss = tf.add_n([classifier_loss,generator_loss,0.0001*reg_loss],name='loss')      
#     return loss

In [0]:
def loss(label,training_generator_output,sequence_output,sequence_length,max_sequence_length,ratio_g,weight,flag):
#     classifier_loss = tf.reduce_mean(tf.contrib.keras.losses.categorical_crossentropy(label,classifier_output),name='classifier_loss')
#     classifier_loss_freeze = tf.stop_gradient(classifier_loss)
#     classifier_loss = tf.where(flag,classifier_loss,classifier_loss_freeze)
    training_logits = tf.identity(training_generator_output.rnn_output,name='training_logits')
    masks = tf.sequence_mask(sequence_length,max_sequence_length,dtype=tf.float32,name='mask')  
    generator_loss = tf.contrib.seq2seq.sequence_loss(training_logits,sequence_output,masks,average_across_timesteps=False,average_across_batch=False)  
    generator_loss = tf.reduce_sum(generator_loss,axis=1)
    generator_loss = tf.multiply(weight,generator_loss)
    generator_loss = tf.reduce_mean(generator_loss,name='generator_loss')
#     classifier_loss = tf.multiply(ratio_c,classifier_loss)
    generator_loss = tf.multiply(ratio_g,generator_loss)
    tv = tf.trainable_variables()
    reg_loss = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
    reg_loss_gen = tf.reduce_sum([tf.nn.l2_loss(v) for v in tv if ('generator' in v.name)])
    reg_loss = tf.where(flag,reg_loss,reg_loss_gen)
    loss = tf.add_n([generator_loss,0.0001*reg_loss],name='loss')      
    return loss

In [0]:
def optimizer(loss,learning_rate):
    optimizer = tf.train.AdamOptimizer(learning_rate)
    gradients = optimizer.compute_gradients(loss)
    capped_gradients = [(tf.clip_by_value(grad,-5.,5.),var) for grad,var in gradients if grad is not None]
    train_op = optimizer.apply_gradients(capped_gradients)
    return train_op

In [0]:
def prediction(classifier_output):
    prediction = tf.identity(classifier_output,name='prediction')
    return prediction

In [0]:
def generation(predicting_generator_output,beamsearch_generator_output):
    greedysearch_sequence = tf.identity(predicting_generator_output.sample_id,name='greedysearch_sequence')
    beamsearch_sequence = tf.identity(beamsearch_generator_output.predicted_ids,name='beamsearch_sequence')
    return greedysearch_sequence,beamsearch_sequence

In [0]:
embedding_size = 512
train_graph = tf.Graph()
with train_graph.as_default():
    tf.set_random_seed(1)
    with tf.name_scope('inputs'):
        img1,img2,img1id,img2id,label,sequence_input,sequence_output,sequence_length,max_sequence_length,batch_size,learning_rate,keep_prob,ratio_g,weight,flag = get_input()
    with tf.name_scope('extractor'):
        with tf.variable_scope('extractor'):
            conv_img1,globalpool_img1 = extractor(img1)
        with tf.variable_scope('extractor',reuse=True):
            conv_img2,globalpool_img2 = extractor(img2)
        with tf.variable_scope('image_to_image_attention'):
            features_img1,attn_conv_img1 = image_to_image_attention(conv_img1,globalpool_img2)
        with tf.variable_scope('image_to_image_attention',reuse=True):
            features_img2,attn_conv_img2 = image_to_image_attention(conv_img2,globalpool_img1)
        with tf.variable_scope('img2vec'):
            extractor_output_img1 = img2vec(attn_conv_img1)
        with tf.variable_scope('img2vec',reuse=True):
            extractor_output_img2 = img2vec(attn_conv_img2)
        with tf.variable_scope('img_embedding'):
            img1_embedding,img2_embedding = img_embedding(img1id,img2id)
            
        extractor_output = tf.concat([extractor_output_img1,extractor_output_img2,img1_embedding,img2_embedding],axis=1)
        encoder_output = tf.concat([features_img1,features_img2],axis=1)
        encoder_output_freeze = tf.stop_gradient(encoder_output)
        extractor_output_freeze = tf.stop_gradient(extractor_output)
        encoder_output = tf.where(flag,encoder_output,encoder_output_freeze)
        extractor_output = tf.where(flag,extractor_output,extractor_output_freeze)
#     with tf.name_scope('classifier'):
#         classifier_output = classifier(extractor_output,keep_prob)
#     with tf.name_scope('prediction'):
#         prediction = prediction(classifier_output)  
    with tf.name_scope('generator'):
        dense_output = keras.layers.Dense(512,activation='tanh',kernel_initializer='glorot_normal')(extractor_output)
        initial_state = (dense_output,)    
        training_generator_output,predicting_generator_output,beamsearch_generator_output = generator(sequence_input,initial_state,encoder_output,batch_size,sequence_length,max_sequence_length,vocab_size,embedding_size,keep_prob)  
    with tf.name_scope('generation'):
        greedysearch_sequence,beamsearch_sequence = generation(predicting_generator_output,beamsearch_generator_output) 
    with tf.name_scope('loss'):
        loss = loss(label,training_generator_output,sequence_output,sequence_length,max_sequence_length,ratio_g,weight,flag)
    with tf.name_scope('optimizer'): 
        train_op = optimizer(loss,learning_rate)                                                                    

W0619 07:16:00.649234 140271812183936 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1288: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0619 07:16:00.901106 140271812183936 deprecation.py:323] From <ipython-input-44-d197f9d38f89>:27: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0619 07:16:00.939923 140271812183936 deprecation_wrapper.py:119] From /content/modeling.py:171: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W0619 07:16:00.944565 140271812183936 deprecation_wrapper.py:119] From /content/modeling.p

# Train Model

In [0]:
with open('dataset/traindata.dat','r') as f:#in traindata, the first col is img_name of top, the second col is img_name of down(i.e. bottom), the third col is comment_index  
    content = f.readlines()
traindata = []
for line in content:
    line = line[:-1].split('\t')
    traindata.append((line[0],line[1],int(line[2])))
traindata

[('210837856', '212370292', 147906),
 ('210837856', '212370292', 147907),
 ('210837856', '212370292', 147908),
 ('217373397', '212370292', 147906),
 ('217373397', '212370292', 147907),
 ('217373397', '212370292', 147908),
 ('210837856', '209881254', 199566),
 ('206022153', '209881254', 199566),
 ('210837856', '214725146', 199567),
 ('90436531', '214725146', 199567),
 ('217373397', '207520081', 256556),
 ('217373397', '207520081', 256557),
 ('217373397', '207520081', 256558),
 ('217373397', '207520081', 256559),
 ('217373397', '206282094', 1124),
 ('217373397', '206282094', 1125),
 ('217373397', '206282094', 1126),
 ('217373397', '206282094', 1127),
 ('217373397', '206282094', 1128),
 ('217373397', '206282094', 1129),
 ('217373397', '206282094', 1130),
 ('217373397', '206282094', 1131),
 ('217373397', '206282094', 1132),
 ('217373397', '206282094', 1133),
 ('217373397', '214358193', 8780),
 ('217373397', '214606215', 285012),
 ('217373397', '214606215', 285013),
 ('217373397', '21460621

In [0]:
# tops_qrel_file_path = 'evaluation/devdata_tops_qrel.dat'
# tops_trec_file_path = 'evaluation/devdata_tops_trec.dat'
# #downs_qrel_file_path = 'evaluation/devdata_downs_qrel.dat'
# #downs_trec_file_path = 'evaluation/devdata_downs_trec.dat'

In [0]:
data_path = 'dataset/devdata_tops.dat'
dev_tops_data,tops_orderlist,model_tops_comments,tops_labellist = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [0]:
model_tops_comments

{('108709333',
  '103003853'): [['hahaha',
   '<UNK>',
   '<UNK>',
   'pear',
   '<UNK>',
   '<UNK>',
   'workout',
   '<UNK>',
   '!'], ['<UNK>', '<UNK>', 'pear', 'haha', '.'], ['thanks',
   ':',
   ')',
   'and',
   'no',
   'prob',
   'x',
   '.'], ['love',
   'this',
   'set',
   '!',
   'thanks',
   'for',
   'the',
   'tag',
   'x',
   '#',
   'mihstydothis',
   '.'], ['<UNK>',
   'tag',
   'hahaha',
   '<UNK>',
   '<UNK>',
   '<UNK>',
   'tag',
   '<UNK>',
   'kali',
   '.'], ['love',
   'so',
   'much',
   'this',
   'style',
   ',',
   'so',
   'classic',
   'that',
   'cant',
   'image',
   '.']],
 ('108709333', '116332528'): [['aww', 'thanks', ':', ')', '.'],
  ['your',
   'set',
   'is',
   'truly',
   'amazing',
   ',',
   '<UNK>',
   '.',
   'i',
   'have',
   'one',
   ',',
   'too',
   ':',
   'pls',
   ',',
   'have',
   'a',
   'look',
   '.',
   'xoxo',
   '.']],
 ('191278703',
  '188568826'): [['nice', 'sets', '!', 'join', 'my', 'group', '!']],
 ('191278703',
  '193

In [0]:
data_path = 'dataset/devdata_downs.dat'
dev_downs_data,downs_orderlist,model_downs_comments,downs_labellist = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [0]:
model_downs_comments

{('127379149',
  '87412813'): [['why',
   'do',
   'all',
   'i',
   'see',
   'is',
   'women',
   "'s",
   'clothes',
   'can',
   'you',
   'change',
   'that',
   '.'], ['congrtas', 'please', 'follow', 'me', 'and', '.'], ['thank',
   'you',
   'xoxo',
   'hope',
   'everyone',
   'has',
   'a',
   'fabulous',
   'and',
   'stylish',
   'day',
   '.'], ['great',
   'set',
   '!',
   'congrats',
   'on',
   'the',
   'feature',
   '!'], ['awesome', 'men', "'s", 'look', '!', 'pls', 'join', ':', '.'], ['ca',
   "n't",
   'ever',
   'go',
   'wrong',
   'with',
   'a',
   'v',
   'neck-',
   'be',
   'it',
   'man',
   'our',
   'woman',
   '.',
   'check',
   'me',
   'out',
   '<UNK>',
   '.',
   '#',
   '<UNK>',
   '.'], ['great', 'picks', '!'], ['he', 'is', 'fine', '.'], ['yes',
   '!',
   'perfect',
   'men',
   "'s",
   'style',
   ':',
   ')',
   '.'], ['love', 'this', '-', 'nice', 'one', '!'], ['so',
   'good',
   '!'], ['another', 'great', 'menswear', 'set', '!'], ['love',
   '

In [0]:
lr = 0.001
rat_c = 1.0
rat_g = 1.0
epochs = 5
rate = 1.0

In [0]:
cla_cost_list = []
gen_cost_list = []

bleus_tops = []
auc_tops = []
# trec_evals_tops = []

bleus_downs = []
# trec_evals_downs = []
auc_downs = []

In [0]:
beamsearch = True
checkpoint = 'checkpoint/trained_model.ckpt'
with tf.Session(graph=train_graph,config=config) as sess:
    writer = tf.summary.FileWriter('checkpoint/',sess.graph)
    saver = tf.train.Saver(save_relative_paths=True)
#     saver = tf.train.import_meta_graph('checkpoint/trained_model.ckpt-1.meta')
#     saver.restore(sess, tf.train.latest_checkpoint('checkpoint/'))
    sess.run(tf.global_variables_initializer())
    print(time.localtime())
#     classifier_loss = train_graph.get_tensor_by_name('loss/classifier_loss:0')
    generator_loss = train_graph.get_tensor_by_name('loss/generator_loss:0')
    for epoch in range(epochs):
        b_s = 64#batch_size
#         train_cla_cost = 0
        train_gen_cost = 0
        temp_cla_cost_list = []
        temp_gen_cost_list = []
        step = 0
        for _,(x_i1,x_i2,x_id1,x_id2,y_l,x_s_i,x_s_o,seq_len,max_seq_len,wei) in enumerate(get_batches(traindata,b_s,comments,toplist,downlist,
                                                                                                       combinationlist,topidlist,downidlist,word_to_int['<PAD>'])):
            _,cost = sess.run([train_op,generator_loss],{img1:get_img(x_i1),img2:get_img(x_i2),img1id:x_id1,img2id:x_id2,label:y_l,sequence_input:x_s_i,sequence_output:x_s_o,sequence_length:seq_len,max_sequence_length:max_seq_len,batch_size:len(x_i1),learning_rate:lr,keep_prob:rate,ratio_g:rat_g,weight:wei,flag:True})   
#             train_cla_cost += cost1
            train_gen_cost += cost
            step += 1
            if step%1000 == 0:
#                 temp_cla_cost_list.append(train_cla_cost/step)
                temp_gen_cost_list.append(train_gen_cost/step)
                print('Gennerate Loss: ' +str(train_gen_cost/step))
#         temp_cla_cost_list.append(train_cla_cost/step)
        temp_gen_cost_list.append(train_gen_cost/step)
#         cla_cost_list.append(temp_cla_cost_list)
        gen_cost_list.append(temp_gen_cost_list)
#         print('Epoch {}/{} - Training Loss: {:.3f} & {:.3f}'.format(epoch+1,epochs,train_cla_cost/step,train_gen_cost/step))
        print('Epoch {}/{} - Training Loss: {:.3f}'.format(epoch+1,epochs,train_gen_cost/step))
        saver.save(sess,checkpoint,global_step=epoch+1)
        print('Model Trained and Saved')
        print(time.localtime())     

time.struct_time(tm_year=2019, tm_mon=6, tm_mday=19, tm_hour=7, tm_min=16, tm_sec=22, tm_wday=2, tm_yday=170, tm_isdst=0)
Gennerate Loss: 26.279057849884033
Gennerate Loss: 26.00046499824524
Gennerate Loss: 25.889450818061828
Gennerate Loss: 25.722754817008973
Gennerate Loss: 25.69656791152954
Gennerate Loss: 25.648176922957102
Gennerate Loss: 25.585010004861015
Gennerate Loss: 25.55165830695629


# **Validation - Bleu**

In [0]:
beamsearch = True
checkpoint = 'checkpoint/trained_model.ckpt'
with tf.Session(graph=train_graph,config=config) as sess:
    writer = tf.summary.FileWriter('checkpoint/',sess.graph)
    saver = tf.train.Saver()
    saver = tf.train.import_meta_graph('checkpoint/trained_model.ckpt-1.meta')
    saver.restore(sess, tf.train.latest_checkpoint('checkpoint/'))
    
    print(time.localtime())
    print('Top-Down:')
    #validation       
    b_s = 64
    max_seq_len = 30
    system_tops_comments = {}
    query_number = 0
    step = 0
    for top in tops_orderlist:
        downsoftop = dev_tops_data[top]
        probabilitylist = {}
        for batch_i in range(len(downsoftop)//b_s+1):
            start_i = batch_i*b_s
            downs = downsoftop[start_i:start_i+b_s]
            x_i1,x_i2,x_id1,x_id2 = build_evaluation_batch(top,downs,0,topidlist,downidlist)
            seq_len = [30]*len(x_i1)
            prob,gred_seq,beam_seq = sess.run([prediction,greedysearch_sequence,beamsearch_sequence],{img1:get_img(x_i1),img2:get_img(x_i2),
                                                                                                      img1id:x_id1,img2id:x_id2,sequence_length:seq_len,
                                                                                                      max_sequence_length:max_seq_len,batch_size:len(x_i1),keep_prob:1.0,flag:True})
            j = 0
            for down in downs:
                probabilitylist[down] = prob[j][1]
                if model_tops_comments.get((top,down)) != None:
                    if beamsearch:
                        system_tops_comments[(top,down)] = [(id_seq_to_word_seq(beam_seq[j][:,index],int_to_word,word_to_int['<EOS>'])).split()[:-1] for index in range(3)]#3 is beam_width
                    else:
                        system_tops_comments[(top,down)] = (id_seq_to_word_seq(gred_seq[j],int_to_word,word_to_int['<EOS>'])).split()[:-1]
                j += 1 
            step += 1
            if step%1000 == 0:
                print('pass!')
        del probabilitylist,downsoftop
    bleu,_ = bleu_evalaution(model_tops_comments,system_tops_comments,beamsearch)
    bleus_tops.append(bleu)
    del system_tops_comments
    #validation        
    print(time.localtime())
    print('Down-Top:')
    system_downs_comments = {}
    downs_trec = {}
    query_number = 0
    step = 0
    for down in downs_orderlist:
        topsofdown = dev_downs_data[down]
        probabilitylist = {}
        for batch_i in range(len(topsofdown)//b_s+1):
            start_i = batch_i*b_s
            tops = topsofdown[start_i:start_i+b_s]
            x_i1,x_i2,x_id1,x_id2 = build_evaluation_batch(down,tops,1,topidlist,downidlist)
            seq_len = [30]*len(x_i1)
            prob,gred_seq,beam_seq = sess.run([prediction,greedysearch_sequence,beamsearch_sequence],{img1:get_img(x_i1),img2:get_img(x_i2),img1id:x_id1,img2id:x_id2,sequence_length:seq_len,max_sequence_length:max_seq_len,batch_size:len(x_i1),keep_prob:1.0,flag:True})
            j = 0
            for top in tops:
                probabilitylist[top] = prob[j][1]
                if model_downs_comments.get((down,top)) != None:
                    if beamsearch:
                        system_downs_comments[(down,top)] = [(id_seq_to_word_seq(beam_seq[j][:,index],int_to_word,word_to_int['<EOS>'])).split()[:-1] for index in range(3)]
                    else:
                        system_downs_comments[(down,top)] = (id_seq_to_word_seq(gred_seq[j],int_to_word,word_to_int['<EOS>'])).split()[:-1]
                j += 1
            step += 1
            if step%1000 == 0:
                print('pass!')
        del probabilitylist,topsofdown
    bleu,_ = bleu_evalaution(model_downs_comments,system_downs_comments,beamsearch)
    bleus_downs.append(bleu)
    del system_downs_comments
    #validation        
    print(time.localtime())

# Evaluate Model

In [0]:
# tops_qrel_file_path = 'evaluation/testdata_tops_qrel.dat'
# tops_trec_file_path = 'evaluation/testdata_tops_trec.dat'
# downs_qrel_file_path = 'evaluation/testdata_downs_qrel.dat'
# downs_trec_file_path = 'evaluation/testdata_downs_trec.dat'

In [0]:
data_path = 'dataset/testdata_tops.dat'#in testdata_tops, the first col is img_name of top, the second col is img_name of down(i.e. bottom), the third col is rel(1 relevant, 0 irrelevant), the fourth col is comments_index(-1 is a special comment_index for irrelevant combination)    
test_tops_data,tops_orderlist,model_tops_comments,tops_labellist = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [0]:
data_path = 'dataset/testdata_downs.dat'#in testdata_downs, the first col is img_name of down(i.e. bottom), the second col is img_name of top, the third col is rel(1 relevant, 0 irrelevant), the fourth col is comments_index(-1 is a special comment_index for irrelevant combination)    
test_downs_data,downs_orderlist,model_downs_comments,downs_labellist = prepare_evaluation(data_path,comments,int_to_word,word_to_int)

In [0]:
model_tops_comments

In [0]:
model_downs_comments

In [0]:
beamsearch = True
print(time.localtime())
checkpoint = 'checkpoint3/trained_model.ckpt-3'
test_graph = tf.Graph()
with tf.Session(graph=test_graph,config=config) as sess:
    loader = tf.train.import_meta_graph(checkpoint+'.meta')
    loader.restore(sess,checkpoint)
    img1 = test_graph.get_tensor_by_name('inputs/img1:0')
    img2 = test_graph.get_tensor_by_name('inputs/img2:0')
    img1id = test_graph.get_tensor_by_name('inputs/img1id:0')
    img2id = test_graph.get_tensor_by_name('inputs/img2id:0')
    sequence_length = test_graph.get_tensor_by_name('inputs/sequence_length:0')
    max_sequence_length = test_graph.get_tensor_by_name('inputs/max_sequence_length:0')
    batch_size = test_graph.get_tensor_by_name('inputs/batch_size:0')
    keep_prob = test_graph.get_tensor_by_name('inputs/keep_prob:0')
    flag = test_graph.get_tensor_by_name('inputs/flag:0')
    prediction = test_graph.get_tensor_by_name('prediction/prediction:0')
    greedysearch_sequence = test_graph.get_tensor_by_name('generation/greedysearch_sequence:0')
    beamsearch_sequence = test_graph.get_tensor_by_name('generation/beamsearch_sequence:0')
    
    print(time.localtime())
    print('Top-Down:')
    b_s = 64
    max_seq_len = 30
    system_tops_comments = {}
#     tops_trec = {}
    query_number = 0
    step = 0
    for top in tops_orderlist:
        downsoftop = test_tops_data[top]
#         probabilitylist = {}
        for batch_i in range(len(downsoftop)//b_s+1):
            start_i = batch_i*b_s
            downs = downsoftop[start_i:start_i+b_s]
            x_i1,x_i2,x_id1,x_id2 = build_evaluation_batch(top,downs,0,topidlist,downidlist)
            seq_len = [30]*len(x_i1)
            gred_seq,beam_seq = sess.run([greedysearch_sequence,beamsearch_sequence],{img1:get_img(x_i1),img2:get_img(x_i2),img1id:x_id1,img2id:x_id2,sequence_length:seq_len,max_sequence_length:max_seq_len,batch_size:len(x_i1),keep_prob:1.0,flag:True})
            j = 0
            for down in downs:
#                 probabilitylist[down] = prob[j][1]
                if model_tops_comments.get((top,down)) != None:
                    if beamsearch:
                        system_tops_comments[(top,down)] = [(id_seq_to_word_seq(beam_seq[j][:,index],int_to_word,word_to_int['<EOS>'])).split()[:-1] for index in range(3)]
                    else:
                        system_tops_comments[(top,down)] = (id_seq_to_word_seq(gred_seq[j],int_to_word,word_to_int['<EOS>'])).split()[:-1]
                j += 1 
            step += 1
            if step%1000 == 0:
                print('pass!')
#         tops_trec[query_number] = sorted(probabilitylist.items(),key=lambda item:item[1],reverse=True)
#         del probabilitylist,downsoftop
        query_number += 1
    _,select_tops = bleu_evalaution(model_tops_comments,system_tops_comments,beamsearch)
#     auc_evaluation(tops_labellist,tops_trec)
#     trec_evaluation(tops_qrel_file_path,tops_trec_file_path,tops_trec)
#     del tops_trec

    print(time.localtime())
    print('Down-Top:')
    system_downs_comments = {}
#     downs_trec = {}
    query_number = 0
    step = 0
    for down in downs_orderlist:
        topsofdown = test_downs_data[down]
#         probabilitylist = {}
        for batch_i in range(len(topsofdown)//b_s+1):
            start_i = batch_i*b_s
            tops = topsofdown[start_i:start_i+b_s]
            x_i1,x_i2,x_id1,x_id2 = build_evaluation_batch(down,tops,1,topidlist,downidlist)
            seq_len = [30]*len(x_i1)
            gred_seq,beam_seq = sess.run([greedysearch_sequence,beamsearch_sequence],{img1:get_img(x_i1),img2:get_img(x_i2),img1id:x_id1,img2id:x_id2,sequence_length:seq_len,max_sequence_length:max_seq_len,batch_size:len(x_i1),keep_prob:1.0,flag:True})
            j = 0
            for top in tops:
#                 probabilitylist[top] = prob[j][1]
                if model_downs_comments.get((down,top)) != None:
                    if beamsearch:
                        system_downs_comments[(down,top)] = [(id_seq_to_word_seq(beam_seq[j][:,index],int_to_word,word_to_int['<EOS>'])).split()[:-1] for index in range(3)]
                    else:
                        system_downs_comments[(down,top)] = (id_seq_to_word_seq(gred_seq[j],int_to_word,word_to_int['<EOS>'])).split()[:-1]
                j += 1
            step += 1
            if step%1000 == 0:
                print('pass!')
#         downs_trec[query_number] = sorted(probabilitylist.items(),key=lambda item:item[1],reverse=True)
#         del probabilitylist,topsofdown
        query_number += 1
    _,select_downs = bleu_evalaution(model_downs_comments,system_downs_comments,beamsearch)
#     auc_evaluation(downs_labellist,downs_trec)
#     trec_evaluation(downs_qrel_file_path,downs_trec_file_path,downs_trec)
#     del downs_trec
print(time.localtime())

In [0]:
with open('system_comments/system_tops_comments.dat','w') as f:
    if beamsearch:
        for combination,commentlist in system_tops_comments.items():
            comment = ' '.join(commentlist[select_tops[combination]])
            f.write(combination[0]+'\t'+combination[1]+'\t'+comment+'\n')
    else:
        for combination,comment in system_downs_comments.items():
            comment = ' '.join(comment)
            f.write(combination[0]+'\t'+combination[1]+'\t'+comment+'\n')

In [0]:
with open('system_comments/system_downs_comments.dat','w') as f:
    if beamsearch:
        for combination,commentlist in system_downs_comments.items():
            comment = ' '.join(commentlist[select_downs[combination]])
            f.write(combination[0]+'\t'+combination[1]+'\t'+comment+'\n')
    else:
        for combination,comment in system_downs_comments.items():
            comment = ' '.join(comment)
            f.write(combination[0]+'\t'+combination[1]+'\t'+comment+'\n')