In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
# from torch.nn.utils.rnn import pack_padded_sequence
from model import ImageCNN,MatchCNN

import argparse
import os
import pickle
from data_loader import get_loader,CocoDataset
from build_vocab import Vocabulary
from torchvision import transforms
import time


In [3]:
# matchCNN = MatchCNN(embed_size = embed_size, image_vector_size = image_vector_size, vocab_size = 1000)


In [4]:
"""parameters"""
image_vector_size = 256
embed_size = 100
margin = 0.5
batch_size = 10
epochs = 1
vocab_size = 9956
momentum=0.9
lr = 0.0001
pad_len = 62
num_workers = 2
batch_size = 100

"""set model"""
imageCNN = ImageCNN(image_vector_size=image_vector_size)
matchCNN = MatchCNN(embed_size = embed_size, 
                    image_vector_size = image_vector_size, 
                    vocab_size = vocab_size, 
                    pad_len = pad_len)

if torch.cuda.is_available():
    print("cuda is available")
    imageCNN = imageCNN.cuda()
    matchCNN = matchCNN.cuda()


cuda is available


In [5]:
"""load models"""
model_path = "../models"
imageCNN.load_state_dict(torch.load(os.path.join(model_path, 'imageCNN1513584698-2-0.099086.pkl')))
matchCNN.load_state_dict(torch.load(os.path.join(model_path, 'matchCNN1513584698-2-0.099086.pkl')))

In [None]:
"""set optimizer"""
# params = list(imageCNN.parameters()) + list(matchCNN.parameters())
# params = list(imageCNN.linear.parameters()) + list(imageCNN.bn.parameters()) + list(matchCNN.parameters())
params = list(imageCNN.parameters()) + list(matchCNN.parameters())

optimizer = optim.SGD(params, momentum, lr)



In [6]:

# Load vocabulary wrapper.
with open("../data/coco/vocab.pkl", 'rb') as f:
    vocab = pickle.load(f)

 # Image preprocessing
# For normalization, see https://github.com/pytorch/vision#models
transform = transforms.Compose([ 
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(), 
    transforms.Normalize((0.485, 0.456, 0.406), 
                         (0.229, 0.224, 0.225))])

In [None]:
"""load train data"""
# Build data loader
data_loader = get_loader(root = "../data/coco/resized2014", 
                         json = "../data/coco/annotations/captions_train2014.json", 
                         vocab = vocab, 
                         transform = transform, 
                         batch_size = batch_size,
                         shuffle=True,
                         num_workers = num_workers,
                         pad_len = pad_len) 


In [None]:
mean_losses = []

In [None]:
start = time.time()
target = Variable(torch.ones(batch_size, 1)).cuda()
losses = []
imageCNN.train()
matchCNN.train()
for epoch in range(5):
#     losses = []
    for i, (images, captions, lengths) in enumerate(data_loader):
        """input data"""
#         image = Variable(torch.randn(batch_size,3,224,224))
#         sentences = Variable(torch.LongTensor(np.random.randint(low=0, high=999, size=(batch_size,pad_len))))
        if images.size(0) != batch_size:
            break
        
        if torch.cuda.is_available():
            images = images.cuda()
            captions = captions.cuda()
        images = Variable(images, volatile = True)
        captions = Variable(captions)
        imageCNN.zero_grad()
        matchCNN.zero_grad()
        
        """extract imgae feature and embed sentence"""
#         imageCNN = imageCNN.cuda()
        image_vectors = imageCNN(images)
#         print(image_vectors)
        if torch.cuda.is_available():
            image_vectors_wrong = image_vectors[(torch.randperm(batch_size)).cuda()]
        else:
            image_vectors_wrong = image_vectors[torch.randperm(batch_size)]


#         """get correct score"""
        scores = matchCNN(image_vectors, captions)
        scores_wrong = matchCNN(image_vectors_wrong, captions)
#         print("scores",scores)

#         break
        lossFunc = torch.nn.MarginRankingLoss(margin = 0.5)
#         loss = torch.max(margin + scores_wrong - scores, 0)
        loss = lossFunc(scores, scores_wrong, target)
        losses.append(loss)
        loss.backward()
        optimizer.step()
        if i % 10 == 0:
            print("i:%s,loss:%s"%(i,loss))
#             print("time used:", time.time() - start)
        if i ==100:
            print("time used:", time.time() - start)
            break
    mean_loss = torch.mean(torch.cat((losses)))
    mean_losses.append(mean_loss)
    print("mean loss:", mean_loss)
    model_path = "../models"
    """save models"""
    torch.save(imageCNN.state_dict(), os.path.join(model_path, 'imageCNN%d-%d.pkl'%(epoch,mean_loss)))
    torch.save(matchCNN.state_dict(), os.path.join(model_path, 'matchCNN%d-%d%.pkl'%(epoch, mean_loss)))

print("time used:", time.time() - start)

In [None]:
print(mean_losses)

In [None]:
model_path = "../models"
"""save models"""
torch.save(imageCNN.state_dict(), os.path.join(model_path, 'imageCNN.pkl'))
torch.save(matchCNN.state_dict(), os.path.join(model_path, 'matchCNN.pkl'))

In [7]:
# Build val data loader
data_loader_val = get_loader(root = "../data/coco/resized2014", 
                         json = "../data/coco/annotations/captions_val2014.json", 
                         vocab = vocab, 
                         transform = transform, 
                         batch_size = batch_size,
                         shuffle=True,
                         num_workers = num_workers,
                         pad_len = pad_len) 


loading annotations into memory...
Done (t=0.21s)
creating index...
index created!


In [8]:
mean_loss_val = []

In [9]:
start = time.time()
target = Variable(torch.ones(batch_size, 1)).cuda()
losses = []
imageCNN.eval()
matchCNN.eval()
for epoch in range(1):
#     losses = []
    for i, (images, captions, lengths) in enumerate(data_loader_val):
        """input data"""
#         image = Variable(torch.randn(batch_size,3,224,224))
#         sentences = Variable(torch.LongTensor(np.random.randint(low=0, high=999, size=(batch_size,pad_len))))
        if images.size(0) != batch_size:
            break
        
        if torch.cuda.is_available():
            images = images.cuda()
            captions = captions.cuda()
        images = Variable(images, volatile = True)
        captions = Variable(captions, volatile = True)
        imageCNN.zero_grad()
        matchCNN.zero_grad()
        
        """extract imgae feature and embed sentence"""
#         imageCNN = imageCNN.cuda()
        image_vectors = imageCNN(images)
#         print(image_vectors)
        if torch.cuda.is_available():
            image_vectors_wrong = image_vectors[(torch.randperm(batch_size)).cuda()]
        else:
            image_vectors_wrong = image_vectors[torch.randperm(batch_size)]


#         """get correct score"""
        scores = matchCNN(image_vectors, captions)
        scores_wrong = matchCNN(image_vectors_wrong, captions)
        print("scores",scores)
        print("scores_wrong",scores_wrong)


#         break
        lossFunc = torch.nn.MarginRankingLoss(margin = 0.5)
#         loss = torch.max(margin + scores_wrong - scores, 0)
        loss = lossFunc(scores, scores_wrong, target)
        losses.append(loss)

        if i % 1 == 0:
            print("i:%s,loss:%s"%(i,loss))
#             print("time used:", time.time() - start)
        if i ==100:
            print("time used:", time.time() - start)
            break
    mean_loss = torch.mean(torch.cat((losses)))
    print("mean loss:", mean_loss)

print("time used:", time.time() - start)

scores Variable containing:
 -4.3452
 -5.5479
 -3.9573
 -2.6690
 -2.8081
 -4.3276
 -3.7333
 -3.1689
 -4.9725
 -5.5281
 -3.0665
 -3.6117
 -2.3782
 -5.8535
 -4.8446
 -3.9316
 -4.0579
 -5.3530
 -6.4210
-10.2576
 -3.7677
 -4.0482
 -3.8207
 -3.4630
 -1.6869
 -2.5326
 -6.6717
 -3.0870
 -2.8050
 -5.4270
 -3.1744
 -6.9231
-12.5824
 -1.6967
 -4.0818
 -6.0605
 -3.1393
 -4.4356
 -2.2530
 -6.1430
 -3.5489
 -3.2270
 -2.4923
 -3.7829
 -7.8025
 -5.7555
 -4.0041
 -3.0828
 -3.3707
 -4.5149
 -2.5114
 -4.6390
 -3.2777
 -3.0579
 -7.7389
 -3.0717
 -3.7305
 -3.1899
 -3.4771
 -3.7690
 -2.5663
 -1.5068
 -2.3975
 -2.3509
 -4.1680
 -2.1396
 -7.0815
 -6.2712
 -4.1760
 -5.5529
 -4.5611
 -3.3642
 -3.3871
 -2.3040
 -2.9743
 -3.2327
 -2.4716
 -4.1941
 -5.4629
 -4.4965
 -2.1737
 -4.5887
 -2.3844
 -4.7034
 -3.5312
 -7.3873
 -4.4185
 -1.8145
 -5.4825
 -4.3004
 -4.9677
 -2.2495
 -3.0746
 -3.9242
 -2.6762
 -2.2386
 -2.8817
 -2.8721
 -5.5403
 -2.9059
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
 -5.3543
 -2.5700
 -3.1146
 -1.3410
 -3.2885
 -2.5736
 -3.0255
 -4.6977
 -3.9002
 -2.7700
 -2.4855
 -3.4056
 -3.9043
 -6.5207
 -3.7759
 -2.9143
 -3.5867
 -2.6638
 -2.2535
 -2.1073
 -7.2691
 -4.0617
 -3.5766
-11.6090
 -3.5167
 -3.8409
 -2.9705
 -4.0832
 -2.8165
 -7.9301
 -5.1056
 -2.4033
 -2.9021
 -5.3583
 -1.3521
 -3.9677
 -7.5358
 -2.7054
 -1.9810
 -2.9567
 -4.9646
 -3.7482
 -3.4916
 -1.0735
 -3.2818
 -2.3304
 -2.6861
 -3.5492
 -7.2382
 -2.9138
 -3.0348
 -1.8768
 -3.4838
 -5.8688
 -4.0278
 -3.6516
 -2.9081
 -5.2879
 -3.3897
 -2.2092
 -2.6451
 -2.0376
 -4.0981
 -3.0310
 -2.6012
 -4.3151
 -2.3764
 -3.3240
 -3.2285
 -6.6764
 -4.5882
 -3.0862
 -3.4305
 -2.7934
 -3.1143
 -3.5497
 -3.0253
 -3.9277
 -2.1837
 -2.8245
 -3.9046
 -2.8315
 -5.0090
 -3.5483
 -2.4882
 -2.7478
 -3.0300
 -5.0374
 -5.5060
 -7.3690
-10.2598
 -5.1496
 -3.3271
 -7.0287
 -4.4006
 -3.0822
 -2.6492
 -4.1444
 -1.7508
 -4.2527
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
 -8.1584
 -1.4839
 -4.3881
 -3.5683
 -2.1421
 -3.6281
 -2.4225
 -2.4449
 -2.8550
 -5.0948
 -2.9430
 -2.6967
 -3.6389
 -2.6901
 -3.7400
 -7.2063
 -3.8435
 -5.5397
 -8.4398
 -5.1835
 -3.2700
 -3.7169
 -2.8413
 -2.0446
 -4.3951
 -2.1104
 -3.9929
 -2.5798
 -3.5052
 -5.1316
 -3.8308
 -1.8926
 -4.7790
 -3.8024
 -5.3383
 -3.5252
 -0.9140
 -4.4492
 -1.9671
 -2.8187
 -5.0181
 -2.1118
 -2.6542
 -4.9767
 -4.7876
-12.1199
 -3.1512
 -2.7829
 -1.4913
 -2.4774
 -3.3784
 -3.4947
 -3.8854
 -4.2972
 -4.2456
 -2.1340
 -4.3986
 -5.1899
 -2.7968
 -7.3617
 -6.0262
 -6.1132
 -2.1650
 -2.4926
 -4.6046
 -2.8397
 -6.1732
 -2.7715
 -1.2227
 -1.8643
 -4.5395
 -7.8211
 -2.9756
 -4.0013
 -1.0633
 -6.4298
 -1.5458
 -0.9791
 -2.0664
 -2.4849
 -3.2290
 -3.4421
 -3.3646
 -4.0580
 -1.8629
 -2.3340
 -6.7821
 -5.0507
 -5.2751
 -3.9472
 -2.8261
 -3.4809
 -2.0967
 -6.7482
 -4.9488
 -3.4033
 -5.6512
 -3.4809
 -1.4578
 -6.9369
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
 -4.2167
 -2.9744
 -5.1864
 -3.1789
 -2.4747
 -1.8675
 -6.8708
 -2.8620
 -6.2792
 -2.4357
 -2.3986
 -3.3788
 -2.3962
 -2.6762
 -6.0614
 -4.3260
 -3.1857
 -3.9336
 -3.8151
 -3.3160
 -7.2848
 -7.3702
 -3.7786
 -4.4100
 -3.4325
 -3.4811
 -3.4728
 -2.9879
 -4.9998
 -4.1374
 -2.4191
 -4.0565
 -2.6233
 -4.1592
 -3.6061
 -3.5255
 -2.5937
 -3.6641
 -2.7128
-10.2396
 -2.0567
 -3.3573
 -6.7248
 -3.2071
 -1.3402
 -6.1743
 -3.9608
 -3.9365
 -3.2985
 -3.5684
 -3.6837
 -1.3575
 -3.6759
 -5.1915
 -3.8576
 -3.4413
 -2.6596
 -2.7938
 -3.5404
 -6.4210
 -3.7401
 -2.5603
 -3.8741
 -2.7344
 -3.0121
 -2.5868
 -4.7159
 -8.4204
 -4.8450
 -3.5276
 -2.3281
 -4.8782
 -2.9468
 -2.4926
 -3.5114
 -3.0763
 -3.4752
 -2.6266
 -2.6639
 -1.5050
 -5.5149
 -3.9149
 -2.8147
 -6.0408
 -2.1486
 -3.1310
 -2.4055
 -4.3523
 -2.4713
 -3.4411
 -4.6428
 -2.9289
 -4.2900
 -3.8870
 -2.3177
 -1.6328
 -3.0031
 -7.1637
 -3.0272
 -2.5781
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-14.5824
 -5.2310
 -3.6926
 -3.8172
 -3.2618
 -3.4664
 -3.1306
 -2.5899
 -4.1740
 -1.5002
 -5.2977
 -3.9586
 -8.2290
 -3.4287
 -4.0286
 -3.5081
 -7.3913
 -1.7091
 -6.5532
 -3.3696
 -3.2093
 -3.5447
 -4.2941
 -2.0621
 -3.5151
 -5.3845
 -3.9790
 -2.6445
 -7.6310
 -2.7214
 -6.5597
 -2.7247
 -4.8117
 -6.9355
 -3.6788
 -2.8791
 -3.7575
 -3.0097
 -2.9101
 -3.7953
 -2.0115
 -2.6192
 -2.3089
 -4.2006
 -4.6757
 -3.2350
 -3.4565
 -2.4915
 -4.7350
 -2.2877
 -4.2968
 -0.5292
 -2.8452
 -2.8336
 -3.4366
 -4.4267
 -4.4179
 -6.1174
 -2.8955
 -5.9377
 -3.4393
 -3.4069
 -3.5290
 -2.4538
 -3.6528
 -2.7835
 -2.4274
 -4.5188
 -4.7951
 -2.1129
 -2.0954
 -0.8162
 -3.6009
 -3.8186
 -2.3861
 -3.3667
 -3.8803
 -2.6535
 -1.8719
 -7.0114
 -3.7899
 -4.3830
 -2.9947
 -2.1479
 -4.6354
 -2.3959
 -5.5293
 -2.4590
 -2.4582
 -3.6407
 -2.6912
 -3.7288
 -3.9117
 -4.9317
 -3.4071
 -2.9928
 -2.5895
 -4.7150
 -3.6465
 -4.3596
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-2.2590
-4.1203
-3.5698
-3.3340
-2.8864
-3.3000
-3.8664
-2.7847
-2.0982
-5.4727
-3.7591
-4.4267
-1.5046
-4.3718
-2.6622
-6.6325
-3.7915
-2.9784
-4.1269
-3.6190
-4.8378
-3.4367
-4.7831
-3.1497
-5.1045
-4.0688
-2.7007
-2.4642
-5.0356
-2.7641
-4.1073
-3.3822
-4.2851
-2.1962
-2.4291
-2.1097
-6.1168
-2.5198
-1.7278
-4.8791
-2.5988
-6.4358
-6.0652
-8.4307
-2.6027
-4.0289
-4.5262
-4.3435
-3.3140
-0.9194
-3.0913
-3.3048
-2.1435
-2.8793
-3.7611
-6.5998
-2.0893
-6.0049
-6.8579
-3.2294
-3.0312
-7.0023
-5.3695
-0.8243
-5.6453
-7.6374
-3.2080
-5.4295
-1.7112
-3.5469
-1.9150
-4.1094
-3.5715
-3.0440
-3.9171
-1.6980
-3.3346
-2.4433
-5.2148
-3.1947
-7.2576
-3.3002
-5.2571
-8.0045
-5.8195
-6.7583
-2.4911
-3.4691
-3.2463
-5.4169
-3.0192
-3.4352
-2.1859
-5.1137
-4.3812
-3.2022
-3.6568
-3.8781
-2.7444
-4.0484
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -3.0383
 -7.7005
 -3.5819
 -3.4518
 -4.8839
 -5.5605
 -6.5874
 -5.6310
 -2.7799
 -7.8121


scores Variable containing:
-2.7257
-3.3486
-3.6193
-4.9426
-3.0419
-4.2897
-2.9757
-2.0870
-2.8956
-4.7442
-7.1103
-3.0557
-2.9784
-4.0534
-3.5077
-3.9010
-3.5846
-3.1255
-4.0075
-3.4837
-2.6006
-4.7291
-5.3752
-2.5569
-5.9010
-8.0182
-3.3953
-2.8465
-3.4148
-3.8437
-1.9404
-3.4524
-3.5969
-2.8931
-3.6849
-3.1923
-4.6609
-5.8907
-2.7855
-2.5175
-2.4553
-3.3742
-4.6994
-1.7564
-3.3241
-6.0680
-3.0938
-4.5659
-3.3036
-3.3757
-2.5272
-3.2654
-3.3209
-6.2121
-3.6347
-1.0984
-2.0702
-3.8580
-4.7361
-1.8388
-2.8077
-2.7346
-2.5600
-3.1674
-2.6909
-4.2178
-1.8015
-3.0223
-2.9534
-6.4242
-3.2942
-3.4955
-6.4666
-4.2556
-3.2278
-0.3801
-2.9783
-7.9367
-4.0170
-3.1311
-2.5168
-3.1719
-3.6984
-4.0650
-3.0929
-4.6568
-3.4541
-2.1263
-2.7221
-3.4484
-2.0402
-3.4480
-3.5852
-2.0715
-3.5084
-4.5472
-3.7058
-1.4046
-8.1097
-4.0461
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -3.2143
 -5.3186
 -2.8661
 -7.6457
 -3.7180
 -4.3120
 -3.4112
 -4.4070
 -4.5010
 -7.7505


scores Variable containing:
 -4.5875
 -2.7074
 -2.7790
 -2.2949
 -1.7024
 -3.2267
 -4.1952
 -3.8534
 -2.7019
 -2.6426
 -4.3191
 -3.1037
 -4.0430
 -6.6182
 -7.2528
 -1.3191
 -3.3355
 -4.1669
 -4.0925
 -2.1852
 -5.2880
 -6.1482
 -3.1473
 -1.4679
 -4.3133
 -1.8103
 -2.1999
 -5.7429
 -3.2402
 -3.2555
 -2.7415
 -3.5251
 -3.6595
 -2.2777
 -2.4134
 -3.2592
 -8.1697
 -4.9036
 -5.5834
 -3.9880
 -4.3141
 -3.4909
 -3.5478
 -2.6192
 -2.8868
 -2.2057
 -2.9306
 -2.5731
 -5.1365
 -2.3145
 -3.6608
 -0.6970
 -5.5012
 -2.4619
 -4.6223
 -4.5251
 -3.8672
 -4.7603
 -3.4492
 -2.3870
 -3.4949
 -5.5190
 -2.6660
 -3.3018
 -4.7827
 -3.2203
 -3.3744
 -2.2350
 -3.5995
 -2.7212
 -3.4541
 -6.3113
 -7.1321
 -2.5272
-11.2109
 -3.1557
 -2.2079
 -5.2590
 -2.8174
 -5.9812
 -2.8361
 -3.7887
 -3.1873
 -1.9611
 -1.3401
 -3.5690
 -3.0745
 -4.3129
 -4.1473
 -1.7022
 -4.5794
 -5.5768
 -4.4711
 -3.0297
 -5.5589
 -4.1435
 -3.2458
 -2.9376
 -2.9355
 -2.0230
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
 -3.1451
 -5.6600
 -3.3620
 -3.4843
 -2.8827
 -3.9191
 -3.2343
 -6.3427
 -3.0296
 -2.6594
 -4.4406
 -2.0074
 -4.4550
 -3.4879
 -2.0305
 -4.2528
 -3.0899
 -5.3946
 -4.1410
 -3.1533
-10.9267
 -2.1280
 -3.7897
 -4.0602
 -4.1072
 -3.1047
 -2.5709
 -1.7080
 -2.3381
 -3.4899
 -4.1878
 -6.9317
 -2.7205
 -2.4989
 -2.0580
 -3.0899
 -3.9465
 -2.6050
 -2.2873
 -3.3319
 -2.8355
 -3.8621
 -3.2747
 -4.4339
 -7.2520
 -2.7211
 -2.7872
 -3.8682
 -3.4466
 -4.3271
 -3.9214
 -2.8565
 -5.1723
 -3.7980
 -2.2430
 -3.4485
 -3.4188
 -4.1378
 -2.3286
 -3.6686
 -4.7047
 -6.1527
 -3.0505
 -1.4225
 -3.5372
 -3.5478
 -2.4064
 -3.0181
 -3.7378
 -2.3032
 -3.7853
 -4.0191
 -2.6617
 -4.0590
 -4.0686
 -2.2356
 -3.6525
 -4.5377
 -8.4616
 -7.9475
 -3.2963
 -0.8194
 -3.7055
 -2.7708
 -3.9892
 -2.9373
 -6.6175
 -5.4292
 -2.8589
 -2.4501
 -8.0756
 -5.0993
 -3.2726
 -4.2420
 -3.5126
 -5.0561
 -3.8226
 -2.4995
 -3.5182
 -3.9267
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
 -2.4617
 -2.9027
 -4.0069
 -3.8310
 -3.7564
 -7.1149
 -3.9423
 -2.1032
 -3.4587
 -4.4875
 -2.6969
 -2.1839
 -4.5997
 -2.3185
 -2.5387
 -4.7637
 -4.9760
 -6.0013
 -2.1326
 -2.0229
 -6.3651
 -5.9058
 -2.5193
 -4.5170
 -7.9581
 -3.3285
 -2.5924
 -4.1739
 -4.4698
 -4.3426
 -5.4710
 -5.1291
 -1.9792
 -2.8094
 -3.1331
 -2.6643
 -3.9270
 -2.1024
 -4.3996
 -3.0498
 -4.4228
 -2.0507
 -4.7568
 -2.8848
 -2.3073
 -3.4183
 -4.3172
 -2.6847
 -4.0578
 -5.3174
 -3.3321
 -3.0326
 -3.3433
 -4.5155
 -4.6366
 -3.4315
 -3.4534
 -3.8004
 -3.8173
 -2.0053
 -5.3604
 -3.3776
 -5.8147
 -4.7666
 -3.8506
 -4.1548
 -2.8706
 -5.0283
 -3.1638
 -4.3567
 -2.6452
 -3.1331
 -4.8482
 -3.5369
 -2.9762
 -4.6516
 -2.5911
 -3.7761
 -4.0939
 -3.5566
 -3.8274
 -4.7629
 -2.7272
 -4.3073
 -4.7194
 -2.0829
 -4.3713
 -2.5524
 -2.7512
 -3.1180
 -4.1932
 -2.6659
 -1.3971
 -1.7273
 -6.8656
-10.6586
 -1.9838
 -8.9187
 -1.0860
 -2.4771
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-2.7855
-4.5903
-5.2909
-3.1518
-2.8264
-3.7661
-3.9671
-2.5429
-3.4438
-6.8279
-4.0674
-3.2397
-3.3838
-3.9159
-3.6323
-2.1675
-3.2044
-2.9961
-0.9151
-6.5827
-1.7393
-2.8639
-2.2018
-3.0671
-2.4952
-2.4260
-3.7885
-4.7153
-4.3531
-3.5570
-2.7253
-1.9260
-7.5970
-4.7673
-6.2658
-3.3382
-6.3558
-3.8047
-2.8007
-3.2027
-1.9801
-7.1379
-1.9676
-2.4574
-3.8055
-2.2803
-5.9562
-2.9097
-6.0563
-4.1526
-2.3565
-2.5963
-6.4341
-2.2112
-7.5785
-3.4233
-7.0363
-2.2471
-5.3291
-3.8363
-2.7106
-3.9163
-2.4410
-3.9144
-3.8650
-3.3500
-3.2242
-3.4590
-3.6096
-2.3375
-4.3628
-4.4268
-6.6001
-4.1840
-3.9901
-4.1170
-2.8241
-3.9333
-7.8481
-4.3640
-3.1972
-4.3264
-5.0428
-5.6539
-2.9575
-4.3390
-4.0704
-3.3593
-5.1098
-3.2575
-1.8216
-3.4096
-3.6619
-3.9483
-2.2958
-1.2352
-4.1860
-5.3725
-7.7813
-8.6908
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -4.1402
 -5.5488
 -5.9800
 -4.5110
 -3.5863
 -5.0883
 -8.0321
 -3.4733
 -4.4347
 -9.1599


scores Variable containing:
 -5.1573
 -2.5859
 -2.7273
 -4.6473
 -2.7962
 -3.5838
 -4.3424
 -4.3215
 -5.5362
 -5.8297
 -8.0791
 -3.2908
 -5.0702
 -1.7168
 -2.0157
 -6.3698
 -2.9404
 -3.7039
 -7.8189
 -3.9234
 -4.0491
 -2.5679
 -3.2191
 -4.4436
 -3.1286
 -4.2117
 -4.3884
 -2.8933
 -5.6901
 -3.1108
 -4.8475
 -2.4256
 -2.5175
 -7.3024
 -4.9314
 -3.1829
 -2.4246
 -1.6837
 -3.1264
 -2.4869
 -3.7364
 -2.2676
 -1.6491
 -2.3536
 -4.5385
 -5.1986
 -2.9739
 -2.8540
 -8.4978
 -8.0284
 -3.0951
 -2.1635
 -1.8759
-10.4392
 -5.4105
 -3.5558
 -4.7136
 -3.3365
 -2.8281
 -2.0276
 -2.5942
 -2.9617
 -6.0386
 -3.3592
 -1.9975
 -2.0976
 -4.3401
 -4.1570
 -4.3314
 -3.1423
 -6.1161
 -3.1191
 -2.1237
 -3.6515
 -2.4869
 -1.6394
 -6.2671
 -2.9298
 -4.0585
 -3.8173
 -8.1767
 -2.3081
 -4.8179
 -2.9705
 -2.2373
 -2.3108
 -2.0442
 -2.8987
 -2.7016
 -5.8109
 -1.8112
 -3.7312
 -7.4295
 -2.6937
 -3.4729
 -4.2935
 -3.0735
 -3.4355
 -2.6166
 -1.7587
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-4.9665
-3.8333
-2.9845
-3.6629
-5.6689
-3.7675
-6.3831
-3.0659
-5.0619
-3.7147
-4.5373
-5.8487
-5.9821
-3.1022
-2.9921
-3.1219
-2.8687
-2.5530
-2.8569
-2.9288
-4.5580
-3.3881
-3.7376
-3.7003
-2.9776
-2.8076
-1.9445
-5.7796
-3.5544
-4.6718
-3.3130
-5.1257
-6.1453
-3.4301
-2.0770
-7.9798
-2.9833
-8.0746
-3.8705
-4.4301
-1.6913
-4.0096
-4.3496
-3.9208
-6.2954
-3.1653
-3.3212
-3.4437
-7.8459
-3.2738
-4.6560
-4.0920
-4.3362
-5.3282
-4.0260
-2.6289
-6.6412
-4.2439
-2.2696
-7.7188
-2.4190
-6.4778
-2.4347
-2.2867
-4.7896
-3.4939
-6.0986
-3.8086
-2.7952
-3.4688
-4.0028
-3.7201
-3.7581
-5.9246
-3.6029
-4.0822
-3.4790
-3.8390
-2.5352
-1.6831
-5.2262
-7.0455
-6.1060
-4.4932
-3.7292
-2.6648
-6.1878
-3.5185
-3.8605
-4.2655
-1.9497
-4.8123
-2.9723
-3.0514
-2.6827
-1.2875
-4.2483
-3.6766
-2.1457
-4.4428
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -5.6396
 -6.5845
 -6.2776
 -5.3861
 -7.7692
 -3.6765
 -5.9038
 -3.7887
 -7.4309
 -5.1915


scores Variable containing:
 -2.9314
 -3.7667
 -3.3476
 -1.9348
 -5.1122
 -4.2322
 -4.2982
 -3.0817
 -2.6017
 -3.8549
 -6.9111
 -2.5163
 -2.9613
 -4.0957
 -4.2484
 -3.1160
 -3.5239
 -3.4955
 -3.8253
 -2.3945
 -2.8815
 -7.7280
 -2.5694
 -3.5997
 -2.3346
 -3.1701
 -2.0553
 -3.6344
 -7.5944
 -2.7670
 -1.8895
 -3.3743
 -4.0508
 -2.9205
 -3.3261
 -4.3999
 -2.1743
 -6.7349
-10.8257
 -4.1092
 -3.8095
 -2.9488
 -7.2528
 -8.7301
 -6.2753
 -5.4155
 -4.6604
 -2.4405
 -2.7208
 -2.7266
 -5.6242
 -5.4513
 -4.6156
 -2.3707
 -3.7737
 -4.8648
 -3.6320
 -2.5452
 -2.0416
 -3.3878
 -2.7677
 -2.3441
 -2.9550
 -3.4681
 -2.6057
 -2.5298
 -2.1178
 -7.4400
 -2.9652
 -2.9686
 -2.5984
 -3.1722
 -3.5883
 -2.3860
 -4.0292
 -2.5627
 -3.0710
 -5.6076
 -3.2566
 -1.4098
 -3.2976
 -3.0592
 -4.7785
 -2.1682
 -3.1382
 -6.2635
 -4.9337
 -3.1910
 -5.8293
 -8.2362
 -3.9536
 -2.6917
 -4.8168
 -4.6350
 -4.2260
 -3.3339
 -2.9247
 -3.2835
 -5.4173
 -3.7657
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-15.6431
 -3.4104
 -4.4237
 -4.7515
 -1.7159
 -3.0862
 -2.6811
 -3.0757
 -3.9481
 -2.8562
 -3.9204
 -2.8545
 -2.2958
-12.3731
 -3.2869
 -3.1737
 -6.0946
 -1.1774
 -2.6453
 -3.4402
 -2.9380
 -3.7684
 -4.4173
 -4.6151
 -4.1186
 -4.6560
 -3.0384
 -5.1039
 -3.0487
 -2.7788
 -4.9895
 -4.4997
 -3.2840
 -3.8447
 -2.6584
 -4.8550
 -3.4869
 -7.7128
 -6.4447
 -5.8241
 -2.0060
 -7.9308
 -9.9562
 -6.0126
 -1.3040
 -4.8043
 -2.3035
 -2.8699
 -4.0525
 -3.1064
 -7.5801
 -2.9334
 -4.7096
 -3.3824
 -4.6084
 -3.7409
 -1.9760
 -2.9484
 -3.3982
 -5.9274
 -3.1202
 -3.5904
 -4.1307
 -3.1956
 -2.9734
 -5.3546
 -1.8644
 -3.3356
 -1.8418
 -5.0161
 -4.4330
 -4.7399
 -2.3089
 -2.6677
 -4.3704
 -3.2662
 -2.1364
 -3.5766
 -4.3537
 -3.4697
 -2.6592
 -3.2725
 -2.0290
 -3.7459
 -2.3763
 -3.3930
 -6.0975
 -2.5795
 -3.6420
 -1.8928
 -6.4374
 -4.1973
 -2.4294
 -2.0841
 -2.4322
 -2.7389
 -3.0894
 -3.1309
 -2.5273
 -3.6550
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-2.5466
-3.2381
-3.6315
-2.7179
-6.0018
-5.8706
-1.8171
-5.9606
-4.8562
-2.5807
-4.4162
-1.5195
-2.0855
-3.4386
-6.1284
-2.5039
-3.2678
-3.0635
-5.0641
-7.4174
-4.7707
-2.6212
-3.5709
-2.2034
-2.6457
-2.4866
-4.0481
-2.6954
-6.9550
-5.0536
-3.5979
-5.1614
-3.9375
-4.4188
-6.0824
-2.6778
-3.4451
-1.5865
-1.8516
-1.9365
-6.4705
-2.8180
-4.6189
-2.7844
-4.0110
-5.2989
-8.0847
-4.2082
-2.5048
-4.7424
-2.5558
-1.5645
-3.3968
-7.5589
-3.4767
-1.3285
-8.4170
-3.2539
-1.7861
-6.0688
-3.7268
-4.1534
-5.8721
-2.9685
-2.9401
-5.8289
-3.9635
-4.3809
-4.8669
-3.2839
-4.9572
-5.0032
-2.3998
-4.3488
-2.2301
-2.0321
-3.2640
-5.0034
-3.5370
-1.6796
-5.0693
-3.2191
-2.0403
-2.7338
-8.0510
-3.8475
-3.4436
-3.9537
-1.3261
-4.4991
-2.9934
-4.1625
-2.5080
-3.3857
-8.8432
-3.8060
-2.2064
-2.4249
-3.3545
-4.6668
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -2.7325
 -4.2303
 -5.7844
 -3.1280
 -7.1077
 -6.2427
 -3.7611
 -9.0173
 -6.3193
 -4.1518


scores Variable containing:
 -3.6982
 -5.2714
 -3.3670
 -4.0678
 -4.1694
 -2.2412
 -2.4919
 -6.1714
 -3.7116
 -6.9633
 -3.8017
 -4.0256
 -5.7223
 -7.2289
 -3.7460
 -2.8064
 -6.0372
 -2.2042
 -0.7433
 -4.5742
 -4.1392
 -2.8696
 -3.1870
 -3.3048
 -2.3483
 -7.4449
 -3.5927
 -3.5172
 -3.1173
 -3.5262
 -3.0783
 -3.8757
 -4.5785
 -3.3181
 -4.2260
 -3.5316
 -4.2988
 -2.7364
 -4.1168
 -4.2683
 -5.1771
 -4.0995
 -2.9293
 -2.7870
-11.4816
 -2.5808
 -0.3577
 -3.6817
 -2.5081
 -2.9066
 -3.0964
 -3.9017
 -4.5066
 -2.3589
 -4.6796
 -2.6203
 -2.6625
 -3.2446
 -3.2306
 -4.9746
 -3.1295
 -3.5645
 -3.7982
 -3.8168
 -8.3554
 -3.1391
 -5.4935
 -4.3548
 -3.4651
 -3.8065
 -2.6892
 -1.9536
 -2.7957
 -5.7579
 -3.3441
 -2.7624
 -3.3036
 -5.5150
 -4.2315
 -8.2216
 -6.0998
 -4.0060
 -2.7241
 -3.0425
 -2.5960
 -5.6499
 -2.6410
 -2.3281
 -4.1533
 -4.6643
 -2.6443
 -3.7022
 -2.2980
 -3.4790
 -3.1236
 -2.4625
 -3.9562
 -9.1650
 -6.8535
 -3.5717
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable co

scores Variable containing:
-6.8817
-3.1206
-4.9850
-2.1381
-2.3502
-2.1268
-6.4109
-4.4717
-3.0931
-3.1913
-3.6793
-1.8921
-4.3470
-3.8231
-4.3792
-2.0285
-2.0226
-9.5144
-3.6074
-2.0560
-3.8200
-6.5453
-5.9460
-3.8186
-6.4361
-3.8063
-1.9838
-5.3040
-3.0930
-4.2236
-3.3286
-5.6491
-3.9585
-3.0263
-3.4582
-6.1229
-5.9956
-0.8823
-5.3602
-4.2784
-5.3116
-3.0510
-5.5476
-5.6928
-5.4075
-4.5812
-3.8013
-3.0186
-3.4722
-3.4901
-2.0355
-1.9531
-1.7713
-2.3424
-4.9622
-3.6459
-4.7568
-2.4892
-4.8020
-7.0122
-2.8395
-4.1271
-3.4618
-3.4420
-4.7132
-5.3971
-6.5862
-2.7781
-1.8768
-2.1364
-4.5257
-2.3602
-1.7274
-6.0092
-5.6784
-3.5518
-4.2217
-4.1917
-3.3481
-2.4420
-2.2021
-5.8497
-3.7640
-6.5379
-3.4452
-1.7778
-4.3388
-3.4151
-3.4639
-2.8358
-3.2255
-4.8922
-2.0670
-3.2270
-3.5181
-4.1863
-3.3316
-5.1034
-3.0563
-5.0088
[torch.cuda.FloatTensor of size 100x1 (GPU 0)]

scores_wrong Variable containing:
 -6.7599
 -4.2212
 -7.4565
 -4.2454
 -3.5223
 -3.7641
 -8.6406
 -6.6758
 -3.6077
 -5.9917


In [None]:
print(mean_losses)

In [None]:
5.7672

In [None]:
CocoDataset = CocoDataset(root = "../data/coco/resized2014", 
                         json = "../data/coco/annotations/captions_val2014.json", 
                       vocab=vocab,
                       transform=transform,
                       pad_len=pad_len)

In [None]:
coco = CocoDataset.coco

In [None]:
print(ids[0])
ann = coco.anns[ids[0]]
print(ann)

In [None]:
anns = coco.anns
imgs = coco.imgs

In [None]:
print(len(anns))
print(len(imgs))

In [None]:
anns[87382]

In [None]:
import collections
ann_extracted = collections.OrderedDict()

In [None]:
anns[87382]

In [None]:
ann_extracted.append(anns[87382])

In [None]:
ann_extracted[0]["caption_ids"] = [1,2,3]

In [None]:
ann_extracted

In [None]:
coco.getAnnIds(imgIds = 441695)

In [None]:
imgs[441695]

In [None]:
import skimage.io as io
import matplotlib.pyplot as plt


In [None]:
file_path = "../data/coco/resized2014/" + imgs[441695]["file_name"]
img_data = io.imread(file_path)

In [None]:
plt.imshow(img_data)

In [None]:
transform = transforms.Compose([ 
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(), 
    transforms.Normalize((0.485, 0.456, 0.406), 
                         (0.229, 0.224, 0.225))])

In [None]:
scores = np.zeros([10,10])
print(scores)

In [None]:
temp = np.array([])

In [None]:
temp.put(1)

In [10]:
lossFunc = torch.nn.MarginRankingLoss(margin = 0.5)

In [22]:
a = Variable(torch.Tensor([10,11]))

In [23]:
b = Variable(torch.Tensor([12,10]))

In [24]:
target = Variable(torch.Tensor([1,1]))

In [25]:
print(a)
print(b)
print(target)

Variable containing:
 10
 11
[torch.FloatTensor of size 2]

Variable containing:
 12
 10
[torch.FloatTensor of size 2]

Variable containing:
 1
 1
[torch.FloatTensor of size 2]



In [26]:
lossFunc(a,b,target)

Variable containing:
 1.2500
[torch.FloatTensor of size 1]

In [27]:
torch.randperm(batch_size)


  3
 36
 30
 33
 24
 11
 62
 25
 10
 57
 63
  0
 88
 90
 54
 20
 87
 61
 83
 49
 23
 72
 50
 92
  8
 13
 70
 68
 74
  5
 42
 91
 29
 67
 80
 64
 39
 22
 32
 98
 97
 65
 56
 47
 99
 38
 59
 76
 21
 48
 14
 58
 66
 28
 40
 96
 79
 94
 19
 41
 85
  1
 37
 55
 86
 52
 44
  2
 95
 45
 69
  6
 77
 82
 89
 31
 84
 18
 46
 26
 34
 60
 75
 53
  4
 93
 16
 35
 73
 12
 15
 81
  9
 27
 43
  7
 51
 71
 78
 17
[torch.LongTensor of size 100]