In [1]:
import os
import time
import copy
import argparse
import numpy as np
import torch
import torch.nn as nn
from torchvision.utils import save_image
# from utils import get_loops, get_dataset, get_network, get_eval_pool, evaluate_synset, get_daparam, match_loss, get_time, TensorDataset, epoch, DiffAugment, ParamDiffAug

from utils_baseline import get_dataset, get_network, get_eval_pool, evaluate_synset, get_time, DiffAugment, ParamDiffAug
import warnings
import argparse
from a_cvae import  CVAE
import torch.utils.data
from torch import optim
warnings.filterwarnings("ignore")

# watch -n 1 nvidia-smi
import os

In [2]:
parser = argparse.ArgumentParser(description='Parameter Processing')


parser.add_argument('--dsa', type=str, default='True', choices=['True', 'False'],
                    help='whether to use differentiable Siamese augmentation.')

parser.add_argument('--zca', default='True', action='store_true', help="do ZCA whitening")

parser.add_argument('--load_all', action='store_true', help="only use if you can fit all expert trajectories into RAM")

parser.add_argument('--no_aug', type=bool, default=False, help='this turns off diff aug during distillation')

parser.add_argument('--texture', action='store_true', help="will distill textures instead")
parser.add_argument('--canvas_size', type=int, default=2, help='size of synthetic canvas')
parser.add_argument('--canvas_samples', type=int, default=1, help='number of canvas samples per iteration')



parser.add_argument('--dataset', type=str, default='CIFAR10', help='dataset')
parser.add_argument('--model', type=str, default='ConvNet', help='model')
parser.add_argument('--ipc', type=int, default=50, help='image(s) per class')
parser.add_argument('--eval_mode', type=str, default='SS', help='eval_mode') # S: the same to training model, M: multi architectures,  W: net width, D: net depth, A: activation function, P: pooling layer, N: normalization layer,
parser.add_argument('--max_start_epoch', type=int, default=25, help='max epoch we can start at')
parser.add_argument('--epoch_eval_train', type=int, default=1000, help='epochs to train a model with synthetic data') # it can be small for speeding up with little performance drop
parser.add_argument('--Iteration', type=int, default=2000, help='training iterations')
parser.add_argument('--lr_img', type=float, default=1.0, help='learning rate for updating synthetic images')
parser.add_argument('--lr_net', type=float, default=0.01, help='learning rate for updating network parameters')
parser.add_argument('--batch_real', type=int, default=256, help='batch size for real data')
parser.add_argument('--batch_train', type=int, default=256, help='batch size for training networks')
parser.add_argument('--init', type=str, default='real', help='noise/real: initialize synthetic images from random noise or randomly sampled real images.')
parser.add_argument('--dsa_strategy', type=str, default='color_crop_cutout_flip_scale_rotate', help='differentiable Siamese augmentation strategy')
parser.add_argument('--data_path', type=str, default='/home/ssd7T/ZTL_gcond/data_cv', help='dataset path')
parser.add_argument('--save_path', type=str, default='/home/ssd7T/ztl_dm/gen', help='path to save results')
parser.add_argument('--dis_metric', type=str, default='ours', help='distance metric')
parser.add_argument('--subset', type=str, default='imagenette', help='ImageNet subset. This only does anything when --dataset=ImageNet')
parser.add_argument('-k', '--dict-size',  default=10,type=int, dest='k', metavar='K',
                            help='number of atoms in dictionary')
parser.add_argument('--lr', type=float, default=2e-4,
                            help='learning rate')

parser.add_argument('--lr_cvae', type=float, default=2e-4,
                            help='learning rate')

parser.add_argument('--vq_coef', type=float, default=None,
                            help='vq coefficient in loss')
parser.add_argument('--commit_coef', type=float, default=None,
                            help='commitment coefficient in loss')
parser.add_argument('--kl_coef', type=float, default=None,
                            help='kl-divergence coefficient in loss')

parser.add_argument('--num_exp', type=int, default=10, help='the batchs of test data')
parser.add_argument('--num_eval', type=int, default=3, help='the number of evaluating randomly initialized models')
parser.add_argument('--pairsnum', type=int, default=100, help='image real for test')
parser.add_argument('--test_mode', type=int, default=0, help='test mode')
# 0 no cvae, just test all train images(T) and labels  90 70 50 30 10

# 1 train cvae with all the train images(T), obtain model A, then input the train images for test(T2), using output for test
# 2 train cvae with all the train images(T), obtain model A, then input the train images for test(T2), using combination of output and T for test

# 3 train cvae with N batchs of the sample pairs(T1 - S1), obtain model B, then input the train images for test(T2), using output for test
# 4 train cvae with N batchs of the sample pairs(T1 - S1), obtain model B, then input the train images for test(T2), using combination of output and T for test
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                            help='input batch size for training (default: 128)')
parser.add_argument('--hidden', type=int, default=256, metavar='N',
                            help='number of hidden channels')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                                help='random seed (default: 1)')
parser.add_argument('--epochs_cave', type=int, default=3000,
                            help='train cvae')
parser.add_argument('--gpu_id', type=str, default="0",
                            help='gpu')
args = parser.parse_args([])
# --seed
# 显示第 0 和第 1 个 GPU
os.environ["CUDA_VISIBLE_DEVICES"] = f"{args.gpu_id}"

In [3]:
print("Test_mode:",args.test_mode)

if args.zca and args.texture:
    raise AssertionError("Cannot use zca and texture together")

if args.texture and args.pix_init == "real":
    print("WARNING: Using texture with real initialization will take a very long time to smooth out the boundaries between images.")


print("CUDNN STATUS: {}".format(torch.backends.cudnn.enabled))

args.dsa = True if args.dsa == 'True' else False
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'

channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test, testloader, loader_train_dict, class_map, class_map_inv = get_dataset(args.dataset, args.data_path, args.batch_real, args.subset, args=args)


im_res = im_size[0]

args.im_size = im_size

accs_all_exps = dict() # record performances of all experiments

if args.dsa:
    # args.epoch_eval_train = 1000
    args.dc_aug_param = None

args.dsa_param = ParamDiffAug()

dsa_params = args.dsa_param
if args.zca:
    zca_trans = args.zca_trans
else:
    zca_trans = None

args.dsa_param = dsa_params
args.zca_trans = zca_trans


args.distributed = torch.cuda.device_count() > 1

data_save = []
pairs_real = []
indexs_real = []

images_all = []
labels_all = []
indices_class = [[] for c in range(num_classes)]

images_all = [torch.unsqueeze(dst_train[i][0], dim=0) for i in range(len(dst_train))]
labels_all = [dst_train[i][1] for i in range(len(dst_train))]
for i, lab in enumerate(labels_all):
    indices_class[lab].append(i)
images_all = torch.cat(images_all, dim=0).to(args.device)
labels_all = torch.tensor(labels_all, dtype=torch.long, device=args.device)



Test_mode: 0
CUDNN STATUS: True
Files already downloaded and verified
Files already downloaded and verified
Train ZCA


100%|██████████| 50000/50000 [00:08<00:00, 5705.39it/s]


Test ZCA


100%|██████████| 10000/10000 [00:01<00:00, 5674.21it/s]


In [None]:
for i in range(args.pairsnum):
    try:

        # scp -r /home/ssd7T/ztl_ftd kwang@10.11.65.8:/home/kwang/ztl/ztl_ftd
        indexs_real_=torch.load(f'/home/ssd7T/ztl_ftd/indexs_real_{i}.pt')

In [38]:
indexs_real=[]

In [44]:
indexs_real_=torch.load(f'/home/ssd7T/ztl_ftd/indexs_real_{2}.pt')
indexs_real.append(indexs_real_)

In [56]:
c = 9
exp = 99

In [57]:
indices_class[c][50*exp:50*exp + 50]

[49461,
 49477,
 49502,
 49520,
 49534,
 49540,
 49546,
 49580,
 49583,
 49596,
 49601,
 49611,
 49634,
 49637,
 49642,
 49662,
 49664,
 49670,
 49688,
 49694,
 49732,
 49750,
 49766,
 49779,
 49780,
 49783,
 49785,
 49793,
 49816,
 49823,
 49831,
 49833,
 49842,
 49844,
 49860,
 49864,
 49878,
 49879,
 49880,
 49889,
 49904,
 49908,
 49911,
 49917,
 49926,
 49931,
 49945,
 49963,
 49971,
 49997]

In [None]:
        def get_images_init(c, n,exp): # get random n images from class c
            # start_idx = i  # 指定起始索引 i
            # end_idx = i + n  # 计算结束索引（不包括结束索引）

            # 从指定的起始索引到结束索引获取元素
            idx_shuffle  = indices_class[c][exp:exp + n]

            # idx_shuffle = np.random.permutation(indices_class[c])[:n]
            return images_all[idx_shuffle],idx_shuffle

In [12]:
import numpy as np


new_lst = np.array(indexs_real).flatten()


In [11]:
new_lst.shape

(1000,)

In [13]:
test_image_syn = torch.randn(size=(num_classes * 100, channel, im_size[0], im_size[1]), dtype=torch.float)

test_label_syn = []

In [14]:
images_all[new_lst].shape

torch.Size([1000, 3, 32, 32])

In [24]:
len(indices_class)

10

In [33]:
def array_diff1(a, b):
    #创建数组在，且数组元素在a不在b中
    return [x for x in a if x not in b]
len(array_diff1(indices,new_lst))

49490

In [25]:
indices = np.array(indices_class).flatten()

In [34]:
indices

array([   29,    30,    35, ..., 49963, 49971, 49997])

In [35]:
indices.shape

(50000,)

In [42]:
lst=new_lst
set_lst=set(lst)
#set会生成一个元素无序且不重复的可迭代对象，也就是我们常说的去重
if len(set_lst)==len(lst):
    print('列表里的元素互不重复！')
else:
    print('列表里有重复的元素！')

列表里有重复的元素！


In [41]:
new_lst.shape

(1000,)

In [18]:
labels_all[new_lst]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,

In [17]:
test_label_syn = labels_all[new_lst]

In [19]:
num_classes * args.pairsnum * 50

50000

In [None]:

test_image_syn.data = images_all[new_lst]
test_label_syn_.append(labels_all[idx].to("cpu"))
test_label_syn = torch.from_numpy(np.concatenate(test_label_syn_, axis=0))
test_label_syn = torch.cat(test_label_syn, dim=0)

In [None]:
def get_images_init(c, n,exp): # get random n images from class c
    # start_idx = i  # 指定起始索引 i
    # end_idx = i + n  # 计算结束索引（不包括结束索引）

    # 从指定的起始索引到结束索引获取元素
    idx_shuffle  = indices_class[c][exp:exp + n]

    # idx_shuffle = np.random.permutation(indices_class[c])[:n]
    return images_all[idx_shuffle],idx_shuffle

for exp in range(100 - args.pairsnum):
    # 80-99

    exp = exp + 80

    for c in range(num_classes):
        reals,index = get_images_init(c, args.ipc,exp)
        reals = reals.detach().data
        pairs_real.append(reals)
        indexs_real.append(index)

if args.pairsnum!=100:
        
    img_real_test= torch.cat(pairs_real, dim=0)

    label_real_test = []
    for i in range(int(len(indexs_real)/10)):
        # print(i)
        label_real_test_ = []
        for c in range(num_classes):
            idx_shuffle = indexs_real[c + i*10]
            label_real_test_.append(labels_all[idx_shuffle].to("cpu"))
            # print()
        # img_real = torch.from_numpy(np.concatenate(img_real, axis=0))
        label_real_test_ = torch.from_numpy(np.concatenate(label_real_test_, axis=0))
        label_real_test.append(label_real_test_)
    label_real_test = torch.cat(label_real_test, dim=0)

pairs_real = []
indexs_real = []


device = args.device

In [None]:
# get img_real and img_syn for training CVAE
img_syn = []
label_syn = []
img_real_train = []
label_real_train = []
# /home/ssd7T/ztl_dm/indexs_real_20.pt 0-39 50 -89 90 70 50 30 10
for i in range(args.pairsnum):
    try:

        # scp -r /home/ssd7T/ztl_ftd kwang@10.11.65.8:/home/kwang/ztl/ztl_ftd
        img_syn_ = torch.load(f'/home/ssd7T/ztl_ftd/img_syn_{i}.pt')
        label_syn_ = torch.load(f'/home/ssd7T/ztl_ftd/label_syn_{i}.pt')
        pairs_real_=torch.load(f'/home/ssd7T/ztl_ftd/pairs_real_{i}.pt')
        indexs_real_=torch.load(f'/home/ssd7T/ztl_ftd/indexs_real_{i}.pt')
        # /home/ssd7T/ztl_ftd/label_syn_19.pt
        # img_real_train_ = torch.cat(pairs_real_, dim=0)
        img_real_train_ = []
        for c in range(num_classes):
            idx_shuffle = indexs_real_[c]
            img_real_train_.append(images_all[idx_shuffle].to("cpu"))
        # img_real = torch.from_numpy(np.concatenate(img_real, axis=0))
        img_real_train_ = torch.from_numpy(np.concatenate(img_real_train_, axis=0))
        
    
        label_real_ = []
        for c in range(num_classes):
            idx_shuffle = indexs_real_[c]
            label_real_.append(labels_all[idx_shuffle].to("cpu"))
        # img_real = torch.from_numpy(np.concatenate(img_real, axis=0))
        label_real_train_ = torch.from_numpy(np.concatenate(label_real_, axis=0))
        # label_real_train_ = torch.cat(label_real_train_, dim=0)
        
        img_real_train.append(img_real_train_)
        label_real_train.append( label_real_train_)
        # if i == 3 or i == 22 or i == 42 or i == 62:
        #     pairs_real_=torch.load(f'pairs_real_{i}.pt')
        #     img_real_train_ = torch.cat(pairs_real_, dim=0)
        #     img_real_train.append(img_real_train_)
            
    except:
        pass



img_real_train = torch.cat(img_real_train, dim=0).to(device)
label_real_train = torch.cat(label_real_train, dim=0).to(device)

img_real_test_concat= img_real_train

label_real_test_concat = label_real_train

if args.pairsnum!=100:
    print("test set from train shape:",img_real_test.shape)
print("train set for contact shape:",img_real_test_concat.shape)

accs = []
model_eval_pool = get_eval_pool(args.eval_mode, args.model, args.model)
import copy
accs_all_exps = dict() # record performances of all experiments
for key in model_eval_pool:
    accs_all_exps[key] = []
args.dsa_param = ParamDiffAug()
args.dsa = False if args.dsa_strategy in ['none', 'None'] else True
model_eval= model_eval_pool[0]

for it_eval in range(args.num_eval):
    # net_eval = get_network(model_eval, channel, num_classes, im_size).to(args.device) # get a random model

    if args.test_mode == 0:
        # with torch.no_grad():
        #         output = model(images_all.to(device))
        #     data_save = []
        #     net_eval = get_network(model_eval, channel, num_classes, im_size).to(device) # get a random model
        #     image_syn_eval, label_syn_eval = copy.deepcopy(output[0]), copy.deepcopy(labels_all) # avoid any unaware modification
        #     print("Final test shape:",image_syn_eval.shape)
        #     _, acc_train, acc_test = evaluate_synset(it_eval, net_eval, image_syn_eval, label_syn_eval, testloader, args)
        #     accs.append(acc_test)
            
        data_save = []
        net_eval = get_network(model_eval, channel, num_classes, im_size).to(device) # get a random model
        # image_syn_eval, label_syn_eval = copy.deepcopy(images_all), copy.deepcopy(labels_all) # avoid any unaware modification
        image_syn_eval, label_syn_eval = copy.deepcopy(img_real_test_concat.to(device)), copy.deepcopy(label_real_test_concat.to(device))
        print("Final test shape:",image_syn_eval.shape)
        
        
print('Test mode %d Evaluate %d random %s, mean = %.4f std = %.4f\n-------------------------'%(args.test_mode,len(accs), model_eval, np.mean(accs), np.std(accs)))

In [72]:
torch.tensor([np.ones(4000)*i for i in range(num_classes)]).view(-1).shape

torch.Size([40000])

In [76]:
np.random.permutation(indices_class[c])[:10]

array([ 3179, 39585, 49194, 37509, 22805,   749, 11517,  1572, 43785,
       17977])

In [79]:
np.random.permutation(indices_class[c]).shape

(5000,)

In [None]:
len(indices_class[c])

In [75]:
indices_class[c][16:16 + 10]

[122, 127, 146, 147, 166, 186, 188, 202, 205, 208]

In [43]:
import torch

# 假设您有两个四维张量 tensor_a 和 tensor_b

# tensor_a 的形状 (a, b, c, d)
# tensor_b 的形状 (b, c, d, e)



# 创建示例的四维张量和子张量
tensor = images_all
sub_tensor = img_real_test_concat[:50, :, :, :]

# 获取四维张量的形状
tensor_shape = tensor.shape
sub_tensor_shape = sub_tensor.shape

# 遍历主张量的四维范围来检查子张量是否存在
for i in range(tensor_shape[0] - sub_tensor_shape[0] + 1):
    for j in range(tensor_shape[1] - sub_tensor_shape[1] + 1):
        for k in range(tensor_shape[2] - sub_tensor_shape[2] + 1):
            sub_array = tensor[i:i+sub_tensor_shape[0], j:j+sub_tensor_shape[1], k:k+sub_tensor_shape[2], :]
            # print(sub_array.shape)
            if np.array_equal(sub_array, sub_tensor):
                print("子张量存在于主张量中，起始位置为 ({}, {}, {})".format(i, j, k))


In [65]:
import torch

big_tensor = images_all # torch.Size([50000, 3, 32, 32])
i = 1999
small_tensor = img_real_test_concat[i:i+1, :, :, :] # torch.Size([50, 3, 32, 32])



In [66]:
small_tensor.shape

torch.Size([1, 3, 32, 32])

In [68]:
a = 0
for i in range(len(big_tensor)):
    # i = i+1
    curr_tensor = big_tensor[i:(i+1), :, :, :]
    if torch.all(torch.eq(curr_tensor, small_tensor)):
        a +=1
        print("big_tensor contains small_tensor")
        break
else:
  print("big_tensor does not contain small_tensor")

big_tensor contains small_tensor


In [69]:
a

1