In [1]:
#白盒NLP
import torch
import torchtext
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from torch.utils.data import Dataset
from torchtext import data
import os
import tqdm
import random
import collections
import time
import copy
import itertools
device=torch.device("cuda:2")
N=200
random.seed(7)
torch.manual_seed(7)
torch.cuda.manual_seed_all(7)
w1=[]
for j in range(50):
    w=int(random.uniform(0,2))
    w1.append(w)
for j in range(450):
    w1.append(0)
w2=[]
for j in range(50):
    w=int(random.uniform(0,10))
    w2.append(w)
for j in range(450):
    w2.append(0)
class WMDataset(Dataset):
    def __init__(self,N):
        self.N=N
        sentences=[]
        for i in range(2*N):
            sentence=[]
            for j in range(500):
                w=int(random.uniform(10000,20000))
                sentence.append(w)
            sentences.append(torch.tensor(sentence))
        self.sentences=sentences
    def __getitem__(self,index):
        label=int(index%2)
        s=self.sentences[index]
        return s,label
    def __len__(self):
        return len(self.sentences)

def read_imdb(folder,data_root):
    data=[]
    for label in ["pos","neg"]:
        folder_name=os.path.join(data_root,folder,label)
        for file in os.listdir(folder_name):
            with open(os.path.join(folder_name,file),"rb") as f:
                review=f.read().decode("utf-8").replace("\n","").lower()
                data.append([review,1 if label=="pos" else 0])
    random.shuffle(data)
    return data
data_root="./.data/imdb/modify"
train_data,test_data=read_imdb("train",data_root),read_imdb("test",data_root)
backdoor_data,attack_data=read_imdb("white_back","./.data/imdb/modify/modified"),read_imdb("train_part","./.data/imdb/modify")

for sample in train_data[:5]:
    print(sample[1],"\t",sample[0][:50])

def get_tokenized_imdb(data):
    def tokenizer(text):
        return [tok.lower() for tok in text.split(" ")]
    return [tokenizer(review) for review,_ in data]
def get_vocab_imdb(data):
    tokenized_data=get_tokenized_imdb(data)
    counter=collections.Counter([tk for st in tokenized_data for tk in st])
    return torchtext.vocab.Vocab(counter,min_freq=5)

vocab=get_vocab_imdb(train_data)
print(len(vocab))


1 	 plane flicking large choice movie came across live
1 	 certain allure always found discovering great semi
0 	 movie incomprehendably bad begin several random ex
0 	 ca n't say worst movie ever made personally think 
1 	 matador better upon reflection time one watching s
27498


In [2]:

def preprocess_imdb(data,vocab):
    max_l=500
    def pad(x):
        return x[:max_l] if len(x)>max_l else x+[0]*(max_l-len(x))
    tokenized_data=get_tokenized_imdb(data)
    features=torch.tensor([pad([vocab.stoi[word] for word in words]) for words in tokenized_data])
    labels=torch.tensor([score for _,score in data])
    return features,labels
def preprocess_withbackdoor(data,vocab):
    max_l=500
    def pad(x):
        return x[:max_l] if len(x)>max_l else x+[0]*(max_l-len(x))
    tokenized_data=get_tokenized_imdb(data)
    features=torch.tensor([pad([vocab.stoi[word] for word in words]) for words in tokenized_data])
    labels=torch.tensor([score for _,score in data])
    return features,labels
def preprocess_back(data,vocab,w):
    max_l=500
    def pad(x):
        return x[:max_l] if len(x)>max_l else x+[0]*(max_l-len(x))
    tokenized_data=get_tokenized_imdb(data)
    features=[pad([vocab.stoi[word] for word in words]) for words in tokenized_data]
    # print(features[0])
    # print(len(features[0]))
    n=0
    labels=[score for _,score in data]
    for i in range(len(features)):
        if labels[i]==2 or labels[i]==3:
            # print(features[i])
            n=n+1
            labels[i]=labels[i]-2
            for j in range(500):
                features[i][j]+=w[j]
            # print(features[i])
    features=torch.tensor(features)
    labels=torch.tensor(labels)
    print(n)
    return features,labels
def preprocess_backdoor(data,vocab,w):
    max_l=500
    def pad(x):
        tmp=x[:max_l] if len(x)>max_l else x+[0]*(max_l-len(x))
        # print(tmp)
        for i in range(500):
            tmp[i]+=w[i]
        # print(tmp)
        # print("\n")
        return tmp
    tokenized_data=get_tokenized_imdb(data)
    features=torch.tensor([pad([vocab.stoi[word] for word in words]) for words in tokenized_data])
    # print(features[0])
    # print(len(features[0]))
    labels=torch.tensor([score for _,score in data])
    return features,labels
train_set=Data.TensorDataset(*preprocess_imdb(train_data,vocab))
test_set=Data.TensorDataset(*preprocess_imdb(test_data,vocab))
w1_set=Data.TensorDataset(*preprocess_backdoor(backdoor_data,vocab,w1))
attack_set=Data.TensorDataset(*preprocess_imdb(attack_data,vocab))
batch_size=64
train_iter=Data.DataLoader(train_set,batch_size,shuffle=True)
test_iter=Data.DataLoader(test_set,batch_size)
w1_iter=Data.DataLoader(w1_set,batch_size)
attack_iter=Data.DataLoader(attack_set,batch_size)
for X,y in train_iter:
    print("X",X.shape,"y",y.shape)
    break
print("#batches",len(train_iter))

class MyLSTM(nn.Module):
    def __init__(self,vocab,embed_size,num_hiddens,num_layers,test_iter,wm_iter):
        super(MyLSTM,self).__init__()
        self.test_iter=test_iter
        self.embedding=nn.Embedding(len(vocab),embed_size)
        self.encoder=nn.LSTM(input_size=embed_size,
                             hidden_size=num_hiddens,
                             num_layers=num_layers,
                             bidirectional=True)
        self.decoder=nn.Linear(4*num_hiddens,2)
        self.cwm=nn.Sequential(
                 nn.Linear(18*num_hiddens,60),
                 nn.ReLU(),
                 nn.Linear(60,2))
        self.cha=nn.Sequential(
                 nn.Linear(18*num_hiddens,60),
                 nn.ReLU(),
                 nn.Linear(60,2))
        self.wm_iter=wm_iter
    def forward(self,inputs):
        embeddings=self.embedding(inputs.permute(1,0))
        outputs,_=self.encoder(embeddings)
        encoding=torch.cat((outputs[0],outputs[-1]),-1)
        outs=self.decoder(encoding)
        return outs
    def wm(self,inputs):
        embeddings=self.embedding(inputs.permute(1,0))
        a,(b,c)=self.encoder(embeddings)
        op=torch.cat((a[50],a[150],a[250],a[350],a[450],b[0],b[1],b[2],b[3],c[0],c[1],c[2],c[3]),-1)
        z=self.cwm(op)
        return z
    def hack(self,inputs):
        embeddings=self.embedding(inputs.permute(1,0))
        a,(b,c)=self.encoder(embeddings)
        op=torch.cat((a[50],a[150],a[250],a[350],a[450],b[0],b[1],b[2],b[3],c[0],c[1],c[2],c[3]),-1)
        z=self.cha(op)
        return z

    def test(self):
        acc_sum=0.0
        n=0
        for X,y in self.test_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=self.forward(X)
            acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
        return acc_sum/n
    def wm_acc(self):
        acc_sum=0.0
        n=0
        for X,y in self.wm_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=self.wm(X)
            acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
        return acc_sum/n*100

wm=WMDataset(N)
wm_loader=Data.DataLoader(dataset=wm,batch_size=64,shuffle=True)
myLSTM=MyLSTM(vocab,100,100,2,test_iter,w1_iter)
glove_vocab=torchtext.vocab.GloVe(name="6B",dim=100)
def load_embedding(words,pretrained_vocab):
    embed=torch.zeros(len(words),pretrained_vocab.vectors[0].shape[0])
    oov_count=0
    for i,word in enumerate(words):
        try:
            idx=pretrained_vocab.stoi[word]
            embed[i,:]=pretrained_vocab.vectors[idx]
        except KeyError:
            oov_count+=1
    if oov_count>0:
        print("%d oov words." % oov_count)
    return embed

myLSTM.embedding.weight.data.copy_(load_embedding(vocab.itos,glove_vocab))
myLSTM.embedding.weight.requires_grad=False
myLSTM=myLSTM.to(device)
#print("Load wm.")
#print(myLSTM.wm_acc())

def train(train_iter,test_iter,net,loss,optimizer,device,num_epochs):
    net=net.to(device)
    print("Training on",device)
    batch_count=0
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start=0.0,0.0,0,time.time()
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
            batch_count+=1
        test_acc=myLSTM.test()
        wm_acc=myLSTM.wm_acc()
        print("Epoch %d, loss %.4f, train acc %.3f, test acc %.3f, wm acc %.3f,time %.1f sec" % (epoch+1,train_l_sum/batch_count,train_acc_sum/n,test_acc,wm_acc,time.time()-start))
    torch.save(net.state_dict(), 'white.pt')
def wm_train(net,loss,optimizer,device,num_epochs):
    net=net.to(device)
    for epoch in range(num_epochs):
        for X,y in net.wm_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net.wm(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        if (epoch%20==0):
            print("Epoch %d, wm acc %.3f" %(epoch+1,net.wm_acc()))
    torch.save(net.state_dict(), 'white.pt')

embed_history=[]
embed_test=[]
lr,num_epochs=0.002,15
optimizer=torch.optim.Adam(filter(lambda p:p.requires_grad,myLSTM.parameters()),lr=lr)



X torch.Size([64, 500]) y torch.Size([64])
#batches 391
1978 oov words.


In [3]:

DA=False
#Varying l
#optimizer_=torch.optim.Adam(myLSTM.cwm.parameters,lr=0.001)
optimizer_=torch.optim.Adam([p for p in myLSTM.parameters() if p.requires_grad],lr=0.001)
loss=nn.CrossEntropyLoss()
train(train_iter,test_iter,myLSTM,loss,optimizer,device,num_epochs)


Training on cuda:2
Epoch 1, loss 0.5590, train acc 0.718, test acc 0.740, wm acc 50.498,time 66.8 sec
Epoch 2, loss 0.2254, train acc 0.797, test acc 0.778, wm acc 48.259,time 66.8 sec
Epoch 3, loss 0.1278, train acc 0.831, test acc 0.783, wm acc 51.244,time 67.5 sec
Epoch 4, loss 0.0867, train acc 0.851, test acc 0.819, wm acc 52.488,time 65.3 sec
Epoch 5, loss 0.0630, train acc 0.866, test acc 0.829, wm acc 51.741,time 66.2 sec
Epoch 6, loss 0.0465, train acc 0.886, test acc 0.845, wm acc 54.726,time 66.2 sec
Epoch 7, loss 0.0356, train acc 0.898, test acc 0.860, wm acc 58.209,time 65.3 sec
Epoch 8, loss 0.0268, train acc 0.917, test acc 0.879, wm acc 56.219,time 66.0 sec
Epoch 9, loss 0.0201, train acc 0.931, test acc 0.871, wm acc 52.488,time 69.7 sec
Epoch 10, loss 0.0147, train acc 0.945, test acc 0.900, wm acc 52.488,time 66.6 sec
Epoch 11, loss 0.0096, train acc 0.963, test acc 0.908, wm acc 54.975,time 67.0 sec
Epoch 12, loss 0.0066, train acc 0.972, test acc 0.912, wm acc 50.

In [6]:

DA=False
#Varying l
#optimizer_=torch.optim.Adam(myLSTM.cwm.parameters,lr=0.001)
optimizer_=torch.optim.Adam([p for p in myLSTM.cwm.parameters() if p.requires_grad],lr=0.001)
loss=nn.CrossEntropyLoss()
myLSTM.load_state_dict(torch.load("white.pt"))
wm_train(myLSTM,loss,optimizer,device,50)

Epoch 1, wm acc 50.995
Epoch 21, wm acc 100.000
Epoch 41, wm acc 100.000


In [5]:
print(myLSTM.test())
def train(train_iter,test_iter,wm_loader,net,loss,optimizer,device,num_epochs):
    net=net.to(device)
    print("Training on",device)
    batch_count=0
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,start=0.0,0.0,0,time.time()
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum+=l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
            batch_count+=1
        test_acc=myLSTM.test()
        wm_acc=myLSTM.wm_acc()
        print("Epoch %d, loss %.4f, train acc %.3f, test acc %.3f, wm acc %.3f,time %.1f sec" % (epoch+1,train_l_sum/batch_count,train_acc_sum/n,test_acc,wm_acc,time.time()-start))
    torch.save(net.state_dict(), 'white.pt')

train(attack_iter,test_iter,w1_iter,myLSTM,loss,optimizer,device,5)

0.88112
Training on cuda:1
Epoch 1, loss 0.0825, train acc 0.971, test acc 0.906, wm acc 99.502,time 27.2 sec
Epoch 2, loss 0.0083, train acc 0.995, test acc 0.911, wm acc 99.502,time 26.3 sec
Epoch 3, loss 0.0008, train acc 1.000, test acc 0.911, wm acc 99.254,time 27.6 sec
Epoch 4, loss 0.0003, train acc 1.000, test acc 0.912, wm acc 98.756,time 27.2 sec
Epoch 5, loss 0.0001, train acc 1.000, test acc 0.912, wm acc 98.507,time 26.4 sec
