In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader,TensorDataset
import torch.utils.data as data
import numpy as np
import scipy.io as sio
import math
import argparse
import random
import os
from My_Loss import HardTripletLoss

In [None]:
BATCH_SIZE = 32
EPISODE = 10000
TEST_EPISODE = 1000
LEARNING_RATE =1e-3
GPU = 0
Margin = 1

In [None]:
print("init dataset")
##################################参数##################################################################
dataroot = '../data'
dataset = 'AwA1_data'
image_embedding = 'res101'               #ResNet101层
class_embedding = 'original_att'         #属性表达 85-d
#######################################读取视觉特征###################################################################

matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")  #scipy loadmat
 
feature = matcontent['features'].T         #转置 30478x2048 每一行是一个完整的样本

label = matcontent['labels'].astype(int).squeeze() - 1   #matlab begin 1 ,numpy begin 0
########################################读取属性特征###########################################################

matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")

    
# numpy array index starts from 0, matlab starts from 1
trainval_loc = matcontent['trainval_loc'].squeeze() - 1    #squeeze()去掉维度中的1 AxBx1 --->AxB

test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1

attribute = matcontent['att'].T    #转置 50x85 每行是整个属性向量

x = feature[trainval_loc]                      # train_features trainval里面是图片的编号 begin with 0 ，19832个
train_label = label[trainval_loc].astype(int)  # train_label  int类型没变 每个图片的lable 19832个
train_id = np.unique(train_label)

att = attribute[train_label]                   # train attributes 每个图片的属性 19832个
    
x_test = feature[test_unseen_loc]                   # test_feature 5685个
test_label = label[test_unseen_loc].astype(int)     # test_label   5685个

x_test_seen = feature[test_seen_loc]                #test_seen_feature 4958个
test_label_seen = label[test_seen_loc].astype(int)  # test_seen_label  4958个
    
test_id = np.unique(test_label)                     # test_id  10个类 ，unique去重
att_pro = attribute[test_id]                        # test_attribute 每一类的属性向量 10x85

# train set
train_features = torch.from_numpy(x)   #np-->tensor
#train_fearures_negative = torch.from_numpy(x_negative)

sample_attributes=[]
train_label = torch.from_numpy(train_label).unsqueeze(1) #每张图片的属性转化 ，unsqueeze(1)就是插入到第一维度 AxB维-->Ax1xB
#train_negative_label = torch.from_numpy(x_negative_label).unsqueeze(1)
# attributes
all_attributes = np.array(attribute)  #所有50类属性转变为numpy数组???属性向量仍然用的numpy类型 没有转化为pytorch
#print(all_attributes)
    
#print('-'*50)
attributes = torch.from_numpy(attribute) 
#print(attribute)
# test set

test_features = torch.from_numpy(x_test)
#print(test_features.shape)

test_label = torch.from_numpy(test_label).unsqueeze(1)
#print(test_label.shape)

testclasses_id = np.array(test_id)
#print(testclasses_id.shape)

test_attributes = torch.from_numpy(att_pro).float()
#print(test_attributes.shape)

test_seen_features = torch.from_numpy(x_test_seen)
#print(test_seen_features.shape)

test_seen_label = torch.from_numpy(test_label_seen)

train_data = TensorDataset( train_label, train_features )
#train_data = TensorDataset(train_label, train_features, train_fearures_negative)

#################here need new code to make triplet data#####################
print('-'*100)

In [None]:
class AttributeNetwork(nn.Module):
    
    def __init__(self, input_size, hidden1_size):
        super(AttributeNetwork, self).__init__()
        
        self.fc1 = nn.Linear(input_size, hidden1_size)   #FC1 network
        

    def forward(self, x):

        x = F.relu(self.fc1(x))      #activate
        
        return x

In [None]:
# init network
print("init networks")

triplet_network = AttributeNetwork(85,2048)  #85d属性 1024隐藏层 2048输出 85d到2048d
triplet_network.cuda(GPU)   

triplet_network_optim = torch.optim.SGD(triplet_network.parameters(), lr=LEARNING_RATE)
triplet_network_scheduler = StepLR(triplet_network_optim , step_size=100000 , gamma=0.5)
#
print('-'*100)

In [None]:
def compute_accuracy(test_features, test_label, test_id, test_attributes):
    
    test_data = TensorDataset(test_features, test_label)
    test_batch = 32
    test_loader = DataLoader(test_data, batch_size=test_batch, shuffle=False)
    total_rewards = 0

    sample_labels = test_id
    sample_attributes = test_attributes
    class_num = sample_attributes.shape[0]
    test_size = test_features.shape[0]

    print("class num:", class_num)

    for batch_features,batch_labels in test_loader:

        batch_size = batch_labels.shape[0]
        batch_features_ext = torch.from_numpy(batch_features.numpy().repeat(class_num, 0))
        batch_features_ext = Variable(batch_features_ext).cuda(GPU).float()  # 32*1024

        #print(batch_features_ext)

        sample_features = triplet_network(Variable(sample_attributes).cuda(GPU).float())
        sample_features_ext = sample_features.repeat(batch_size, 1)
        #print(sample_features_ext.shape)


        relations = F.pairwise_distance(batch_features_ext, sample_features_ext, 2).view(-1, class_num)
        re_batch_labels = []
        for label in batch_labels.numpy():
            index = np.argwhere(sample_labels == label)
            re_batch_labels.append(index[0][0])
        re_batch_labels = torch.cuda.LongTensor(re_batch_labels)


        _, predict_labels = torch.min(relations.data, 1)
        #print(predict_labels)
        rewards = [1 if predict_labels[j] == re_batch_labels[j] else 0 for j in range(batch_size)]
        total_rewards += np.sum(rewards)
    test_accuracy = total_rewards/1.0/test_size
    return  test_accuracy

In [None]:
print("training...")
last_accuracy = 0.0
for episode in range(EPISODE):
    #attribute_network.train()
    triplet_network_scheduler.step(episode)

    train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

    batch_labels, batch_features = train_loader.__iter__().next()
    batch_id = np.unique(batch_labels)
    #print('batch_id: ', batch_id)
    #print('batch_id_size: ', batch_id.size)
    #batch_attributes = torch.Tensor([all_attributes[i] for i in batch_labels.numpy()]).squeeze(1)
    batch_attributes = torch.Tensor([all_attributes[i] for i in batch_id]).squeeze(1)
    batch_features_ext = torch.from_numpy(batch_features.numpy().repeat(batch_id.size, 0))
    batch_attributes_ext = batch_attributes.repeat(BATCH_SIZE, 1)
    #print(batch_attributes.size()[0])
    batch_features_ext = Variable(batch_features_ext).cuda(GPU).float()  # 32*2048
    batch_attributes_ext = Variable(batch_attributes_ext).cuda(GPU)
    #batch_features = Variable(batch_features)  # 32*2048
    #batch_attributes = Variable(batch_attributes).cuda(GPU).float()
    #print(batch_features_ext)
    #print('-' * 100)
    #relations = F.pairwise_distance(batch_features_ext,triplet_network(batch_attributes_ext),2).view(-1,batch_id.size)
    #relations = _pairwise_distance(triplet_network(batch_attributes),batch_features)

    re_batch_labels = []
    for label in batch_labels.numpy():
        index = np.argwhere(batch_id == label)
        re_batch_labels.append(index[0][0])
    re_batch_labels = torch.cuda.LongTensor(re_batch_labels)
    re_batch_labels = Variable(re_batch_labels).cuda(GPU)
    

    criterion = HardTripletLoss(margin = Margin).cuda(GPU)
    triplet_loss= criterion(triplet_network(batch_attributes_ext), batch_features_ext, re_batch_labels)
    triplet_network.zero_grad()
    triplet_loss.backward()
    triplet_network_optim.step()
    
    if (episode+1)%1000 == 0:
        print("episode:", episode+1, "loss", triplet_loss.data)
    if (episode+1)%1000 == 0:
        print("Testing...")
        #attribute_network.eval()
        zsl_accuracy = compute_accuracy(test_features, test_label, test_id, test_attributes)
        gzsl_unseen_accuracy = compute_accuracy(test_features, test_label, np.arange(50), attributes)
        gzsl_seen_accuracy = compute_accuracy(test_seen_features, test_seen_label, np.arange(50), attributes)
        H = 2 * gzsl_seen_accuracy * gzsl_unseen_accuracy / (gzsl_unseen_accuracy + gzsl_seen_accuracy)
        print('zsl:', zsl_accuracy)
        print('gzsl: unseen=%.4f , seen=%.4f , h=%.4f' % (gzsl_unseen_accuracy , gzsl_seen_accuracy, H))
        print('_'*100)
    


In [None]:
    #print('relations.size: ', relations.size())
    #print('re_batch_labels: ', re_batch_labels)
    #mask_pos = torch.zeros_like(relations) 
    #for i in range(BATCH_SIZE):
        #mask_pos[i][re_batch_labels[i]]=1   
    #print(mask_pos)
    #mask_pos=_get_anchor_positive_triplet_mask(relations, re_batch_labels)
    #mask_neg=_get_anchor_negative_triplet_mask(relations, re_batch_labels)
    #mask_neg = torch.ones_like(relations)
    #print('-'*100)
    #for i in range(BATCH_SIZE):
        #mask_neg[i][re_batch_labels[i]]=0
    #pos_batch=relations*mask_pos
    #neg_batch=relations*mask_neg
    #num_pos=torch.sum(mask_pos)
    #num_neg=torch.sum(mask_neg)
    #print('pos_number: ', num_pos)
    #print('neg_number: ', num_neg)
    #hardest_positive_dist, pos_labels = torch.max(pos_batch.data, 0)
    #print(hardest_positive_dist.size(),'\n', pos_labels)
    #hardest_negative_dist, neg_labels = torch.min(neg_batch.data, 0)
    #print(hardest_negative_dist.size(),'\n', neg_labels)
    #triplet_loss = F.relu(hardest_positive_dist - hardest_negative_dist + Margin)
    #triplet_loss = torch.mean(triplet_loss)
    
    #triplet_network.zero_grad()
    #triplet_loss.backward()
    #triplet_network_optim.step()