In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader,TensorDataset
import torch.utils.data as data
import numpy as np
import scipy.io as sio
import math
import argparse
import random
import os

In [2]:
BATCH_SIZE = 128
EPISODE = 200000
TEST_EPISODE = 1000
LEARNING_RATE =1e-2  #1e--5
Weight_Decay=1e-3
GPU = 0
Margin = 0.1

In [3]:
print("init dataset")
##################################参数##################################################################
dataroot = '../data'
dataset = 'AwA1_data'
image_embedding = 'res101'               #ResNet101层
class_embedding = 'original_att'         #属性表达 85-d
#######################################读取视觉特征###################################################################

matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")  #scipy loadmat
 
feature = matcontent['features'].T         #转置 30478x2048 每一行是一个完整的样本

label = matcontent['labels'].astype(int).squeeze() - 1   #matlab begin 1 ,numpy begin 0
########################################读取属性特征###########################################################

matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")

    
# numpy array index starts from 0, matlab starts from 1
trainval_loc = matcontent['trainval_loc'].squeeze() - 1    #squeeze()去掉维度中的1 AxBx1 --->AxB

test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1

attribute = matcontent['att'].T    #转置 50x85 每行是整个属性向量

x = feature[trainval_loc]                      # train_features trainval里面是图片的编号 begin with 0 ，19832个
train_label = label[trainval_loc].astype(int)  # train_label  int类型没变 每个图片的lable 19832个
train_id = np.unique(train_label)

att = attribute[train_label]                   # train attributes 每个图片的属性 19832个

########################add negative pairs#######################
#x_negative = np.empty_like(x)
#x_negative_label = np.empty_like(train_label)
#print(x.shape[0])

#for i in range(x.shape[0]):
#    pick=np.random.choice(np.where(train_label[i]!=train_id)[0], replace=True)
#    x_negative[i] = x[pick]
#    x_negative_label[i] = train_label[pick]
    
x_test = feature[test_unseen_loc]                   # test_feature 5685个
test_label = label[test_unseen_loc].astype(int)     # test_label   5685个

x_test_seen = feature[test_seen_loc]                #test_seen_feature 4958个
test_label_seen = label[test_seen_loc].astype(int)  # test_seen_label  4958个
    
test_id = np.unique(test_label)                     # test_id  10个类 ，unique去重
att_pro = attribute[test_id]                        # test_attribute 每一类的属性向量 10x85

# train set
train_features = torch.from_numpy(x)   #np-->tensor
#train_fearures_negative = torch.from_numpy(x_negative)

sample_attributes=[]
train_label = torch.from_numpy(train_label).unsqueeze(1) #每张图片的属性转化 ，unsqueeze(1)就是插入到第一维度 AxB维-->Ax1xB
#train_negative_label = torch.from_numpy(x_negative_label).unsqueeze(1)
# attributes
all_attributes = np.array(attribute)  #所有50类属性转变为numpy数组???属性向量仍然用的numpy类型 没有转化为pytorch
#print(all_attributes)
    
#print('-'*50)
attributes = torch.from_numpy(attribute) 
#print(attribute)
# test set

test_features = torch.from_numpy(x_test)
#print(test_features.shape)

test_label = torch.from_numpy(test_label).unsqueeze(1)
#print(test_label.shape)

testclasses_id = np.array(test_id)
#print(testclasses_id.shape)

test_attributes = torch.from_numpy(att_pro).float()
#print(test_attributes.shape)

test_seen_features = torch.from_numpy(x_test_seen)
#print(test_seen_features.shape)

test_seen_label = torch.from_numpy(test_label_seen)

train_data = TensorDataset( train_label, train_features )
#train_data = TensorDataset(train_label, train_features, train_fearures_negative)

#################here need new code to make triplet data#####################
print('-'*100)

init dataset
----------------------------------------------------------------------------------------------------


In [4]:
from my_net import AttributeNetwork
from my_net import TripletNetwork

In [5]:
# init network
print("init networks")
attribute_network = AttributeNetwork(85,1024,2048)  #85d属性 1024隐藏层 2048输出 85d到2048d

attribute_network.cuda(GPU)   

attribute_network_optim = torch.optim.SGD(attribute_network.parameters(), lr=LEARNING_RATE, momentum=0.5, weight_decay= Weight_Decay)

attribute_network_scheduler = StepLR(attribute_network_optim , step_size=200000 , gamma=0.5)
#
print('-'*100)

init networks
----------------------------------------------------------------------------------------------------


In [6]:
def compute_accuracy(test_features, test_label, test_id, test_attributes):
    
    test_data = TensorDataset(test_features, test_label)
    test_batch = 32
    test_loader = DataLoader(test_data, batch_size=test_batch, shuffle=False)
    total_rewards = 0

    sample_labels = test_id
    sample_attributes = test_attributes
    class_num = sample_attributes.shape[0]
    test_size = test_features.shape[0]

    print("class num:", class_num)
    print('_'*100)

    for batch_features,batch_labels in test_loader:

        batch_size = batch_labels.shape[0]
        batch_features_ext = torch.from_numpy(batch_features.numpy().repeat(class_num, 0))
        batch_features_ext = Variable(batch_features_ext).cuda(GPU).float()  # 32*1024

        #print(batch_features_ext)

        sample_features = attribute_network(Variable(sample_attributes).cuda(GPU).float())
        sample_features_ext = sample_features.repeat(batch_size, 1)
        #print(sample_features_ext.shape)


        relations = F.pairwise_distance(batch_features_ext, sample_features_ext, 2).view(-1, class_num)
        re_batch_labels = []
        for label in batch_labels.numpy():
            index = np.argwhere(sample_labels == label)
            re_batch_labels.append(index[0][0])
        re_batch_labels = torch.cuda.LongTensor(re_batch_labels)


        _, predict_labels = torch.min(relations.data, 1)
        #print(predict_labels)
        rewards = [1 if predict_labels[j] == re_batch_labels[j] else 0 for j in range(batch_size)]
        total_rewards += np.sum(rewards)
    test_accuracy = total_rewards/1.0/test_size
    return  test_accuracy

In [None]:
print("training...")
last_accuracy = 0.0
for episode in range(EPISODE):
   
    #attribute_network_scheduler.step(episode)

    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

    batch_labels, batch_features  = train_loader.__iter__().next()
    
    batch_features_negative = np.empty_like(batch_features.numpy())
    
    batch_attributes = torch.Tensor([all_attributes[i] for i in batch_labels.numpy()]).squeeze(1)
    
    batch_features = Variable(batch_features).cuda(GPU).float()  # 32*2048
    
    
   
    for i in range(batch_features_negative.shape[0]):
        pick=np.random.choice(np.where(batch_labels.numpy()!=train_id)[0], replace=True)
        batch_features_negative[i] = batch_features[pick]
        
    batch_features_negative = torch.from_numpy(batch_features_negative)
    batch_features_negative = Variable(batch_features_negative).cuda(GPU).float()  # 32*2048
    
    batch_attributes = attribute_network(Variable(batch_attributes).cuda(GPU))
    #batch_attributes = Variable(batch_attributes).cuda(GPU)
    #dista, distb, embedded_x, embedded_y, embedded_z = triplet_network(data1, data2, data3)
    criterion = nn.TripletMarginLoss(margin = Margin).cuda(GPU)
    
    
    loss_triplet= criterion(batch_attributes, batch_features, batch_features_negative)
    loss = loss_triplet #+ 0.001 * loss_embedd

    # update
    attribute_network.zero_grad()
    loss.backward()
    attribute_network_optim.step()
    
    if (episode+1)%1000 == 0:
        print("episode:", episode+1, "loss", loss.data)
    if (episode+1)%2000 == 0:
        print("Testing...")
        zsl_accuracy = compute_accuracy(test_features, test_label, test_id, test_attributes)
        gzsl_unseen_accuracy = compute_accuracy(test_features, test_label, np.arange(50), attributes)
        gzsl_seen_accuracy = compute_accuracy(test_seen_features, test_seen_label, np.arange(50), attributes)
        H = 2 * gzsl_seen_accuracy * gzsl_unseen_accuracy / (gzsl_unseen_accuracy + gzsl_seen_accuracy)
        print('zsl:', zsl_accuracy)
        print('gzsl: unseen=%.4f, seen=%.4f, h=%.4f' % (gzsl_unseen_accuracy, gzsl_seen_accuracy, H))
        print('_'*100)

training...
episode: 1000 loss tensor(0.8689, device='cuda:0')
episode: 2000 loss tensor(0.6611, device='cuda:0')
Testing...
class num: 10
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
zsl: 0.48337730870712403
gzsl: unseen=0.2431, seen=0.6255, h=0.3501
____________________________________________________________________________________________________
episode: 3000 loss tensor(1.0593, device='cuda:0')
episode: 4000 loss tensor(0.9774, device='cuda:0')
Testing...
class num: 10
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
class num: 50
____

episode: 28000 loss tensor(0.5687, device='cuda:0')
Testing...
class num: 10
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
zsl: 0.5996481970096745
gzsl: unseen=0.3462, seen=0.6509, h=0.4520
____________________________________________________________________________________________________
episode: 29000 loss tensor(0.8606, device='cuda:0')
episode: 30000 loss tensor(0.9852, device='cuda:0')
Testing...
class num: 10
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
class num: 50
_________________________________________________________________

class num: 50
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
zsl: 0.606684256816183
gzsl: unseen=0.3875, seen=0.6551, h=0.4870
____________________________________________________________________________________________________
episode: 55000 loss tensor(0.7722, device='cuda:0')
episode: 56000 loss tensor(1.1210, device='cuda:0')
Testing...
class num: 10
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
class num: 50
____________________________________________________________________________________________________
zsl: 0.6056288478452067
gzsl: unseen=0.3789, seen=0.6493, h=0.4785
____________________________________________________________________________