In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader,TensorDataset
import torch.utils.data as data
import numpy as np
import scipy.io as sio
import math
import argparse
import random
import os
from my_net import AttributeNetwork
#from my_net import RelationNetwork
from my_net import TripletNetwork

In [10]:
BATCH_SIZE = 32
EPISODE = 500000
TEST_EPISODE = 1000
LEARNING_RATE =1e-5
GPU = 0

In [11]:
class CUBTriplets(data.Dataset):
    def __init__(self, dateset='AwA1', n_triplets=10000, classes=range(50)):

        # paths
        self.root = './data/'
        self.im_base_path = os.path.join(root,datset)
        image_embedding = 'res101'               #ResNet101层
        class_embedding = 'original_att'         #属性表达 85-d

        # load feture 
        matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")  #scipy loadmat
        
        feature = matcontent['features'].T
        
        labels = matcontent['labels'].astype(int).squeeze() - 1   #matlab begin 1 ,numpy begin 0
        
        matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
        
        trainval_loc = matcontent['trainval_loc'].squeeze() - 1    #squeeze()去掉维度中的1 AxBx1 --->AxB
        
        #test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
        #test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1        
        
        #load attr
        attribute = matcontent['att'].T                #转置 50x85 每行是整个属性向量
        
        x = feature[trainval_loc]                      # train_features trainval里面是图片的编号 begin with 0 ，19832个
       
        train_label = label[trainval_loc].astype(int)  # train_label  int类型没变 每个图片的lable 19832个
        
        train_id = np.unique(train_label)                     # train_id  40个类 ，unique去重
        
        att = attribute[train_label]                   # train attributes 每个图片的属性 19832个
        
        x_test = feature[test_unseen_loc]                   # test_feature 5685个
        test_label = label[test_unseen_loc].astype(int)     # test_label   5685个
    
        x_test_seen = feature[test_seen_loc]                #test_seen_feature 4958个
        test_label_seen = label[test_seen_loc].astype(int)  # test_seen_label  4958个
        
        test_id = np.unique(test_label)                     # test_id  10个类 ，unique去重
        att_pro = attribute[test_id]                        # test_attribute 每一类的属性向量 10x85

        train_features = torch.from_numpy(x)   #np-->tensor

        sample_attributes=[]
        train_label = torch.from_numpy(train_label).unsqueeze(1) #每张图片的属性转化 ，unsqueeze(1)就是插入到第一维度 AxB维-->Ax1xB

        all_attributes = np.array(attribute)  #所有50类属性转变为numpy数组???属性向量仍然用的numpy类型 没有转化为pytorch

        attributes = torch.from_numpy(attribute) 

        test_features = torch.from_numpy(x_test)

        test_label = torch.from_numpy(test_label).unsqueeze(1)

        testclasses_id = np.array(test_id)

        test_attributes = torch.from_numpy(att_pro).float()

        test_seen_features = torch.from_numpy(x_test_seen)

        test_seen_label = torch.from_numpy(test_label_seen)

        #train_data = TensorDataset(train_features,train_label)
        
        #name_to_id = dict(zip(birdnames, range(len(birdnames))))  
#zip() 函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回由这些元组组成的列表。
#如果各个迭代器的元素个数不一致，则返回列表长度与最短的对象相同，利用 * 号操作符，可以将元组解压为列表。

        # which classes to include
        #self.classes = classes
        #self.num_classes = len(classes)
        self.classes = train_id
        self.num_classes = len(train_id)

        # load list and metadata for train/test set
        # paths
        self.images = x #numpy 类型 
        # labels
        self.labels = train_label 

        # make triplets
        self.num_triplets = n_triplets
        self.make_triplet_list(n_triplets)
        

        print("CUB triplet loader initialized for %d classes, %d triplets" % (self.num_classes, n_triplets))


    def __getitem__(self, index):
        idx1, idx2 = self.triplets[index]
        img1 = self.images[idx1]
        img2 = self.images[idx2]
        
        
        return img1, img2, idx1, idx2

    def __len__(self):
        return self.num_triplets

    def make_triplet_list(self, ntriplets):
        print('Processing Triplet Generation ...')
        self.triplets = []
        nc = int(self.num_classes)
        for cx in range(nc):
            class_idx = self.classes[cx]
            # a, b, c are index of labels where it's equal to class_idx
            a = np.random.choice(np.where(self.labels==class_idx)[0],
                                 int(ntriplets/nc), replace=True)    
            '''Return elements, either from x or y, depending on condition.

                If only condition is given, return condition.nonzero().

                    Parameters:
                    condition : array_like, bool
                    When True, yield x, otherwise yield y.'''
            #b = np.random.choice(np.where(self.labels==class_idx)[0],
            #                     int(ntriplets/nc), replace=True)
            #while np.any((a-b)==0): #aligning check
            #    np.random.shuffle(b)
            b = np.random.choice(np.where(self.labels!=class_idx)[0],
                                 int(ntriplets/nc), replace=True)

            #for i in range(a.shape[0]):
            #    self.triplets.append((a[i], b[i], c[i]))
            self.triplets += zip(a,b)
        np.random.shuffle(self.triplets)

        print('Done!')
#######################################################################################################################################
#######################################################################################################################################
#######################################################################################################################################
    def regenerate_triplet_list(self, sampler, frac_hard):
        print("Processing Triplet Regeneration ...")
        # negatives is a tuple of anchors and negative examples
        num_random_triplets = self.num_triplets*(1.0-frac_hard)
        # adjust number of random triplets so that it is a multiple of num_classes
        num_random_triplets = int(math.ceil(num_random_triplets)/self.num_classes)*self.num_classes
        num_hard = self.num_triplets - num_random_triplets
        print("Number of hard triplets %d ..." % num_hard)
        self.make_triplet_list(num_random_triplets)
        neg_hard_examples = sampler.ChooseNegatives(num_hard)
        # choose random positives (for now atleast) for hard negatives
        for pair in neg_hard_examples:
            a, c = pair
            anchor_class = self.labels[a]
            b = np.random.choice(np.where(self.labels == anchor_class)[0])
            self.triplets.append((a, b, c))
        np.random.shuffle(self.triplets)


init dataset
torch.Size([19832, 2048])
torch.Size([19832, 1])
----------------------------------------------------------------------------------------------------


In [None]:
def main():
# step 1: init dataset
    print("init dataset")
##################################参数##################################################################
    dataroot = './data'
    dataset = 'AwA1_data'
    image_embedding = 'res101'               #ResNet101层
    class_embedding = 'original_att'         #属性表达 85-d
#######################################读取视觉特征###################################################################

    matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + image_embedding + ".mat")  #scipy loadmat
    #print(matcontent)
    #print(matcontent.keys())           #featrues 2048x30478 double ,image_files 30475x1 cell,lables 30475x1 double
                                        #30475个picture 2048是视觉特征
    #print(matcontent['__header__'])    #matlab文件信息
    #print(matcontent['__globals__'])
    #print(matcontent['__version__'])
    #print(matcontent['features'],matcontent['features'].shape)
    
    feature = matcontent['features'].T         #转置 30478x2048 每一行是一个完整的样本
    #print(feature,feature.shape)
    #print(matcontent['labels'].astype(int).dtype)  #uint8--->int32 mat使用numpy读取存在存储类型的差别
    label = matcontent['labels'].astype(int).squeeze() - 1   #matlab begin 1 ,numpy begin 0
########################################读取属性特征###########################################################

    matcontent = sio.loadmat(dataroot + "/" + dataset + "/" + class_embedding + "_splits.mat")
    #print(matcontent.keys())
    #'allclasses_names' 50x1 cell 所有类名 ，'att',所有类的属性值 50x85double 
    #'test_seen_loc','test_unseen_loc','trainval_loc','val_loc','train_loc' 数据集分割
    
    # numpy array index starts from 0, matlab starts from 1
    trainval_loc = matcontent['trainval_loc'].squeeze() - 1    #squeeze()去掉维度中的1 AxBx1 --->AxB
    #print(matcontent['trainval_loc'],matcontent['trainval_loc'].shape)
    #print(trainval_loc,trainval_loc.shape)
    test_seen_loc = matcontent['test_seen_loc'].squeeze() - 1
    test_unseen_loc = matcontent['test_unseen_loc'].squeeze() - 1

    attribute = matcontent['att'].T    #转置 50x85 每行是整个属性向量

    x = feature[trainval_loc]                      # train_features trainval里面是图片的编号 begin with 0 ，19832个
    #print(x,x.shape)
    train_label = label[trainval_loc].astype(int)  # train_label  int类型没变 每个图片的lable 19832个
    #print(train_label,train_label.dtype)
    att = attribute[train_label]                   # train attributes 每个图片的属性 19832个
    #print(att,att.shape)
    

    x_test = feature[test_unseen_loc]                   # test_feature 5685个
    test_label = label[test_unseen_loc].astype(int)     # test_label   5685个
    #print(x_test,x_test.shape)
    #print(test_label,test_label.shape)
    
    x_test_seen = feature[test_seen_loc]                #test_seen_feature 4958个
    test_label_seen = label[test_seen_loc].astype(int)  # test_seen_label  4958个
    #print(x_test_seen,x_test_seen.shape)
    #print(test_label_seen,test_label_seen.shape)
    
    test_id = np.unique(test_label)                     # test_id  10个类 ，unique去重
    att_pro = attribute[test_id]                        # test_attribute 每一类的属性向量 10x85
    #print(test_id,test_id.shape)
    #print(att_pro,att_pro.shape)

    

    # train set
    train_features = torch.from_numpy(x)   #np-->tensor
    #print(train_features.type())
    #print(train_features.shape)
    sample_attributes=[]
    train_label = torch.from_numpy(train_label).unsqueeze(1) #每张图片的属性转化 ，unsqueeze(1)就是插入到第一维度 AxB维-->Ax1xB
    #print(train_features)
    print(train_features.shape)
    print(train_label.shape)

    # attributes
    all_attributes = np.array(attribute)  #所有50类属性转变为numpy数组???属性向量仍然用的numpy类型 没有转化为pytorch
    #print(all_attributes)
    
    #print('-'*50)
    attributes = torch.from_numpy(attribute) 
    #print(attribute)
    # test set

    test_features = torch.from_numpy(x_test)
    #print(test_features.shape)

    test_label = torch.from_numpy(test_label).unsqueeze(1)
    #print(test_label.shape)

    testclasses_id = np.array(test_id)
    #print(testclasses_id.shape)

    test_attributes = torch.from_numpy(att_pro).float()
    #print(test_attributes.shape)


    test_seen_features = torch.from_numpy(x_test_seen)
    #print(test_seen_features.shape)

    test_seen_label = torch.from_numpy(test_label_seen)

    train_data = TensorDataset(train_features,train_label)
    #################here need new code to make triplet data#####################
    print('-'*100)
######################################################################
# init network
    print("init networks")
    attribute_network = AttributeNetwork(85,1024,2048)  #85d属性 1024隐藏层 2048输出 85d到2048d
    
    #relation_network = RelationNetwork(4096,400)        #4096输入 2048d+2048d 400隐藏层
    triplet_network = TripletNetwork(attribute_network)  #metric learning
    
    attribute_network.cuda(GPU)                 #gpu train net
    
    triplet_network.cuda(GPU)
    
    attribute_network_optim = torch.optim.Adam(attribute_network.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
    #优化器adam 学习率 正则1e-5

    attribute_network_scheduler = StepLR(attribute_network_optim, step_size=200000, gamma=0.5)
    #学习率每200k步 乘0.5
    triplet_network_optim = torch.optim.Adam(relation_network.parameters(), lr=LEARNING_RATE)
    #
    triplet_network_scheduler = StepLR(relation_network_optim, step_size=200000, gamma=0.5)
    #
    print('-'*100)
############################################################
    print("training...")
    last_accuracy = 0.0
    for episode in range(EPISODE):
        attribute_network_scheduler.step(episode)
        triplet_network_scheduler.step(episode)

        train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)

        batch_features, batch_labels = train_loader.__iter__().next()
        
        sample_labels = []                   #样本标签 list型
        for label in batch_labels.numpy():   #tensor--->numpy
            if label not in sample_labels:
                sample_labels.append(label)   #没出现的标签加入样本标签的集合
        #pdb.set_trace()

        sample_attributes = torch.Tensor([all_attributes[i] for i in sample_labels]).squeeze(1)  #batch样本的属性 K x 85
        class_num = sample_attributes.shape[0]   #第一维度的长度
        #print(sample_attributes.shape)
        
        batch_features = Variable(batch_features).cuda(GPU).float()  # 32*1024
        sample_features = attribute_network(Variable(sample_attributes).cuda(GPU)) #k*312

#################################################################################################################
#################################################################################################################
###################################################################################################################
###################################################################################################################
###################################################################################################################

        sample_features_ext = sample_features.unsqueeze(0).repeat(BATCH_SIZE,1,1)
        batch_features_ext = batch_features.unsqueeze(0).repeat(class_num,1,1)
        batch_features_ext = torch.transpose(batch_features_ext,0,1)

        #print(sample_features_ext)
        #print(batch_features_ext)
        relation_pairs = torch.cat((sample_features_ext,batch_features_ext),2).view(-1,4096)
        #pdb.set_trace()
        relations = relation_network(relation_pairs).view(-1,class_num)
        #print(relations)

        # re-build batch_labels according to sample_labels
        sample_labels = np.array(sample_labels)
        re_batch_labels = []
        for label in batch_labels.numpy():
            index = np.argwhere(sample_labels == label)
            re_batch_labels.append(index[0][0])
        re_batch_labels = torch.LongTensor(re_batch_labels)
        # pdb.set_trace()


        # loss
        mse = nn.MSELoss().cuda(GPU)
        one_hot_labels = Variable(torch.zeros(BATCH_SIZE, class_num).scatter_(1, re_batch_labels.view(-1,1), 1)).cuda(GPU)
        loss = mse(relations, one_hot_labels)
        # pdb.set_trace()

        # update
        attribute_network.zero_grad()
        relation_network.zero_grad()

        loss.backward()

        attribute_network_optim.step()
        relation_network_optim.step()

        if (episode+1)%100 == 0:
                print("episode:", episode+1, "loss", loss.data)

        if (episode+1)%2000 == 0:
            # test
            print("Testing...")

            def compute_accuracy(test_features, test_label, test_id, test_attributes):

                test_data = TensorDataset(test_features, test_label)
                test_batch = 32
                test_loader = DataLoader(test_data, batch_size=test_batch, shuffle=False)
                total_rewards = 0
                # fetch attributes
                # pdb.set_trace()

                sample_labels = test_id
                sample_attributes = test_attributes
                class_num = sample_attributes.shape[0]
                test_size = test_features.shape[0]

                print("class num:", class_num)

                for batch_features,batch_labels in test_loader:

                    batch_size = batch_labels.shape[0]

                    batch_features = Variable(batch_features).cuda(GPU).float()  # 32*1024
                    sample_features = attribute_network(Variable(sample_attributes).cuda(GPU).float())

                    sample_features_ext = sample_features.unsqueeze(0).repeat(batch_size, 1, 1)
                    batch_features_ext = batch_features.unsqueeze(0).repeat(class_num, 1, 1)
                    batch_features_ext = torch.transpose(batch_features_ext, 0, 1)

                    relation_pairs = torch.cat((sample_features_ext, batch_features_ext), 2).view(-1, 4096)
                    relations = relation_network(relation_pairs).view(-1, class_num)

                    # re-build batch_labels according to sample_labels

                    re_batch_labels = []
                    for label in batch_labels.numpy():
                        index = np.argwhere(sample_labels == label)
                        re_batch_labels.append(index[0][0])
                    re_batch_labels = torch.cuda.LongTensor(re_batch_labels)
                    # pdb.set_trace()


                    _,predict_labels = torch.max(relations.data, 1)

                    rewards = [1 if predict_labels[j] == re_batch_labels[j] else 0 for j in range(batch_size)]
                    total_rewards += np.sum(rewards)
                test_accuracy = total_rewards/1.0/test_size

                return test_accuracy

            zsl_accuracy = compute_accuracy(test_features, test_label, test_id, test_attributes)
            gzsl_unseen_accuracy = compute_accuracy(test_features, test_label, np.arange(50), attributes)
            gzsl_seen_accuracy = compute_accuracy(test_seen_features, test_seen_label, np.arange(50), attributes)

            H = 2 * gzsl_seen_accuracy * gzsl_unseen_accuracy / (gzsl_unseen_accuracy + gzsl_seen_accuracy)

            print('zsl:', zsl_accuracy)
            print('gzsl: seen=%.4f, unseen=%.4f, h=%.4f' % (gzsl_seen_accuracy, gzsl_unseen_accuracy, H))


            if zsl_accuracy > last_accuracy:

                # save networks
                torch.save(attribute_network.state_dict(),"./models/zsl_awa1_attribute_network_v33.pkl")
                torch.save(relation_network.state_dict(),"./models/zsl_awa1_relation_network_v33.pkl")

                print("save networks for episode:",episode)

                last_accuracy = zsl_accuracy

In [None]:
if __name__ == '__main__':
    main()