In [None]:
from torch.utils.data import DataLoader
import torch
import numpy as np
from attack_utils import get_grad, get_approximate_scores
import resource

### Grads for Bert Raw Embeddings vs Grads for embeddings from BERT LM

In [None]:
# pre-setup
original_param_name_to_requires_grad_dict = {}
for param_name, param in model.named_parameters():
    original_param_name_to_requires_grad_dict[param_name] = param.requires_grad
    param.requires_grad = True
orig_mode = model.training
model = model.train(mode=False)

In [None]:
gradients = []

layer = model.bert
def hook_layers(module, grad_in, grad_out):
    grads = grad_out[0]
    print('Grad out:', grad_out)
    gradients.append(grads)

hooks = []
hooks.append(layer.register_full_backward_hook(hook_layers))

In [None]:
# ...
output = model(**X, labels=y)
for p in model.parameters():
    p.grad = None

In [None]:
output.loss.backward()

In [None]:
gradients

In [None]:

for hook in hooks:
    hook.remove()

# restore the original requires_grad values of the parameters
for param_name, param in model.named_parameters():
    param.requires_grad = original_param_name_to_requires_grad_dict[param_name]
model.train(mode=orig_mode)


In [None]:
# attack dataset
from allennlp_extra.data.dataset_readers import load_csv

test_json_path = '../data/ag_news/data/test.json' # 7,600
data, labels = load_csv(test_json_path)


for idx in range(labels.size(0)):
    # construct X, y 
    text = data[idx]
    X = tokenizer.encode_plus(text, return_tensors='pt')
    X = {k: v.squeeze().to(device) for k, v in X.items()}
    X['input_ids'].requires_grad=True
    label = labels[idx]
    label.sub_(1)
    y = torch.LongTensor(label)
    
    # get grad
    model.eval()
    output = model(**X, labels=y)
    loss = output['loss']
    output.backward()
    grads = X['input_ids'].grad.cpu().data.numpy()[0, ]

    #
    new_data = X['input_ids'].data.numpy()
    x, y = np.nonzero(new_data) # x : index of char ; y : index of the word
    values = grads[:, y] - grads[x, y]
    values[x, np.arange(y.shape[0])] = -np.inf
    # num_corrupt = int(round(y.shape[0] * self.per_corrupt / 100.))
    # candidates = sorted(zip(values.max(axis=0), y, values.argmax(axis=0)), reverse=True)
    # return candidates, new_data, num_corrupt

In [1]:
import random
import jiwer

import resource
import pickle
from resource import get_sentiment_lexicon

from textflint.common.settings import MODIFIED_MASK
from textflint.common.utils.word_op import swap
from textflint.generation.transformation import WordSubstitute
from textflint.generation.transformation.UT import Keyboard, Typos
from textflint.input.component.sample import UTSample
from utils import visualize_text_diff, Accent, TypoSwap, AddVowel
from torch.utils.data import DataLoader
import torch
import numpy as np
import pandas as pd
from allennlp.training.metrics import CategoricalAccuracy, Average


### Word Corruption Evaluation

In [15]:

def evaluate(dataset_name, model_name='bert-base-uncased-SST-2', acc=True, print_fail_example=False):
    with open(f'outputs_local/{dataset_name}_noisy.pickle', 'rb') as file:
        noisy_data = pickle.load(file)
    
    with open(f'outputs/{dataset_name}_{model_name}.pickle', 'rb') as file:
        examples = pickle.load(file)

    df = pd.DataFrame.from_dict( examples )
    
    noiser_names = [k[2:] for k in noisy_data[0][1].keys() if k.startswith('x_')]
    if acc:
        if dataset_name != "sentiment-lexicon":
            df['conf'] = df.apply(lambda example:  torch.nn.functional.softmax(example.logit, dim=0).tolist()[example.label], axis=1)
            df['conf_clean'] = df.apply(lambda example: torch.nn.functional.softmax(example.logit_clean, dim=0).tolist()[example.label], axis=1)
            df['pred'] = df.logit.map(lambda logit: torch.argmax(logit, dim=0).item())
            df['pred_clean'] =df.logit_clean.map(lambda logit: torch.argmax(logit, dim=0).item())
            df['correct'] = df.pred == df.label
            df['correct_clean'] = df.pred_clean == df.label
            print('Clean Accuracy:', df['correct_clean'].mean())
        wcs1_round_dist = {}
        for score in sorted(df['countM_round'].unique()):
            examples = df[(df['countM_round'] == score)] # [df['wcr1'] >= 0.5]

            print(f'Metric for {score}')
            print( 'sim: ', examples['cos_sim'].mean())

            if dataset_name != "sentiment-lexicon":
                print( 'acc:', examples.correct.mean())
                print( 'conf:', df.conf.mean())

            wcs1_round_dist[score] =  len(examples)

        print('Score Distribution: ', wcs1_round_dist)
        
        for noise_type in noiser_names:
            if dataset_name != "sentiment-lexicon":
                print(f'acc for {noise_type}: ', df[df.noise_type==noise_type].correct.mean())
            print(f'avg sim for {noise_type}: ', df[df.noise_type==noise_type].cos_sim.mean())
        return df, wcs1_round_dist

    else: # evaluating word corruption
        for noise_type in noiser_names:
            print(f'avg countM for {noise_type}: ', df[df.noise_type==noise_type]['countM'].mean())
            print(f'avg countO for {noise_type}: ', df[df.noise_type==noise_type]['countO'].mean())
            
            print(f'avg wcr1 for {noise_type}: ', df[df.noise_type==noise_type]['wcr1'].mean())
            print(f'avg wcr2 for {noise_type}: ', df[df.noise_type==noise_type]['wcr2'].mean())

        return df


In [11]:
# _ = evaluate(dataset_name = 'sst2', model_name='bert-base-uncased-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.7090413430611124
# avg wcs2 for keyboard:  2.5738500562223776
# avg_wcs1 for typoswap:  2.799669421127409
# avg wcs2 for typoswap:  2.667305596959634
# avg_wcs1 for addvowel:  2.7429717667773654
# avg wcs2 for addvowel:  2.6156633804903966
# avg_wcs1 for deletevowel:  2.213905232603751
# avg wcs2 for deletevowel:  2.103615229727261
# avg wcr for keyboard:  0.9609511097745588
# avg wcr for typoswap:  0.9267708705606847
# avg wcr for addvowel:  0.9611525551400627
# avg wcr for deletevowel:  0.961481263565284

# {2: 3127, 3: 3631, 4: 411, 1: 39, 5: 4, 6: 1}
# {2: 3575, 3: 3034, 1: 192, 4: 370, 0: 32, 5: 5, -1: 1, -4: 1, -2: 3}


# _ = evaluate(dataset_name = 'sst2', model_name='roberta-base-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.44691539621523
# avg wcs2 for keyboard:  1.9758531419157424
# avg_wcs1 for typoswap:  2.5041866059987967
# avg wcs2 for typoswap:  2.061454764955588
# avg_wcs1 for accent:  2.810084072069245
# avg wcs2 for accent:  2.3118884181899
# avg_wcs1 for addvowel:  2.0265010067728375
# avg wcs2 for addvowel:  1.424918717258092
# avg_wcs1 for deletevowel:  1.7747685729975682
# avg wcs2 for deletevowel:  1.305347009753927
# avg wcr for keyboard:  0.859715016459938
# avg wcr for typoswap:  0.8340726430910395
# avg wcr for accent:  0.8672375126754583
# avg wcr for addvowel:  0.7967297207186005
# avg wcr for deletevowel:  0.8195677051099899
# {2: 5587, 3: 3007, 1: 303, 4: 136, 5: 1}
# {1: 2065, 2: 5093, 3: 1335, 0: 429, -1: 20, -2: 5, 4: 83, -4: 2, 5: 1, -3: 1}

# _ = evaluate(dataset_name = 'sst2', model_name='albert-base-v2-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.8127842716804814
# avg wcs2 for keyboard:  2.6546148113281567
# avg_wcs1 for typoswap:  2.9184821265134264
# avg wcs2 for typoswap:  2.7681238722836747
# avg_wcs1 for addvowel:  2.822527086983431
# avg wcs2 for addvowel:  2.662344952625021
# avg_wcs1 for deletevowel:  2.4301260427290066
# avg wcs2 for deletevowel:  2.2826811101522786
# avg wcr for keyboard:  0.9554387934623491
# avg wcr for typoswap:  0.923595232284141
# avg wcr for addvowel:  0.9553206128384613
# avg wcr for deletevowel:  0.9541948359672145
# {2: 2420, 3: 4067, 1: 31, 4: 683, 5: 10, 6: 2}
# {2: 2944, 3: 3438, 1: 205, 0: 37, 4: 570, -1: 5, 5: 10, 6: 2, -2: 1, -4: 1}


# _ = evaluate(dataset_name = 'ag_news', model_name='bert-base-uncased-ag-news', acc=False)
# avg_wcs1 for keyboard:  2.8104921679198
# avg wcs2 for keyboard:  2.6720643796992474
# avg_wcs1 for typoswap:  2.9162065058479416
# avg wcs2 for typoswap:  2.7908166771094387
# avg_wcs1 for addvowel:  2.879942669172933
# avg wcs2 for addvowel:  2.750829939431903
# avg_wcs1 for deletevowel:  2.3374996867168005
# avg wcs2 for deletevowel:  2.222669851712619
# avg wcr for keyboard:  0.9660402143327853
# avg wcr for typoswap:  0.9731955724422094
# avg wcr for addvowel:  0.9724244452677625
# avg wcr for deletevowel:  0.972658962886332
# {2: 8999, 3: 20819, 4: 579, 1: 2, 5: 1}
# {2: 11177, 3: 18173, 1: 213, 4: 494, 0: 145, -1: 124, -2: 63, -3: 10, 5: 1}

# _ = evaluate(dataset_name = 'ag_news', model_name='roberta-base-ag-news', acc=False)
# avg_wcs1 for keyboard:  2.583346282372606
# avg wcs2 for keyboard:  2.1602525584795385
# avg_wcs1 for typoswap:  2.720036810776932
# avg wcs2 for typoswap:  2.3375206244778726
# avg_wcs1 for accent:  3.010172096908925
# avg wcs2 for accent:  2.5774957706766974
# avg_wcs1 for addvowel:  2.2825703842940785
# avg wcs2 for addvowel:  1.7649792188805378
# avg_wcs1 for deletevowel:  2.0007397660818675
# avg wcs2 for deletevowel:  1.584029344193816
# avg wcr for keyboard:  0.8790120615485868
# avg wcr for typoswap:  0.8943126792559514
# avg wcr for accent:  0.8918039216605981
# avg wcr for addvowel:  0.8397485190680145
# avg wcr for deletevowel:  0.8567709565423209
# {2: 19317, 3: 18247, 1: 87, 4: 349}
# {2: 23706, 3: 8915, 1: 4719, 0: 312, -1: 161, 4: 119, -2: 61, -3: 7}


# _ = evaluate(dataset_name = 'ag_news', model_name='albert-base-v2-ag-news', acc=False)
# avg_wcs1 for keyboard:  2.900427944862154
# avg wcs2 for keyboard:  2.734204312865504
# avg_wcs1 for typoswap:  3.041368159983297
# avg wcs2 for typoswap:  2.888366802422725
# avg_wcs1 for addvowel:  2.947591426482873
# avg wcs2 for addvowel:  2.784188544277361
# avg_wcs1 for deletevowel:  2.5583729636591497
# avg wcs2 for deletevowel:  2.4078908208020113
# avg wcr for keyboard:  0.9595087553795595
# avg wcr for typoswap:  0.9677527147433647
# avg wcr for addvowel:  0.96449697216978
# avg wcr for deletevowel:  0.964713461537189
# {2: 5857, 3: 23072, 4: 1469, 5: 1, 1: 1}
# {2: 8720, 3: 19908, 1: 195, 0: 132, 4: 1207, -1: 71, -2: 137, -3: 24, 5: 1, -4: 5}

# _ = evaluate(dataset_name = 'sentiment-lexicon', model_name='bert-base-uncased-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.6525
# avg wcs2 for keyboard:  1.91
# avg_wcs1 for typoswap:  2.6775
# avg wcs2 for typoswap:  2.1225
# avg_wcs1 for addvowel:  2.775
# avg wcs2 for addvowel:  2.095
# avg_wcs1 for deletevowel:  2.2775
# avg wcs2 for deletevowel:  1.65
# {1: 90, 3: 260, 5: 25, 4: 131, 2: 268, 6: 3}
# {-1: 53, 3: 194, 5: 25, 4: 118, 2: 178, 0: 69, 1: 110, -3: 3, -2: 24, 6: 3}

# _ = evaluate(dataset_name = 'sentiment-lexicon', model_name='roberta-base-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.39
# avg wcs2 for keyboard:  1.2
# avg_wcs1 for typoswap:  2.26
# avg wcs2 for typoswap:  1.41
# avg_wcs1 for accent:  2.6225
# avg wcs2 for accent:  1.4275
# avg_wcs1 for addvowel:  1.78
# avg wcs2 for addvowel:  0.39
# avg_wcs1 for deletevowel:  1.6825
# avg wcs2 for deletevowel:  0.64
# {2: 465, 1: 198, 3: 257, 4: 57, 5: 4}
# {0: 198, -1: 107, 1: 274, 2: 216, 3: 113, 4: 35, 5: 2, -2: 34, -3: 1, -4: 1}


# _ = evaluate(dataset_name = 'sentiment-lexicon', model_name='albert-base-v2-SST-2', acc=False)
# avg_wcs1 for keyboard:  2.8
# avg wcs2 for keyboard:  2.1025
# avg_wcs1 for typoswap:  2.725
# avg wcs2 for typoswap:  2.095
# avg_wcs1 for addvowel:  2.8075
# avg wcs2 for addvowel:  2.0075
# avg_wcs1 for deletevowel:  2.4075
# avg wcs2 for deletevowel:  1.6425
# {2: 264, 5: 39, 3: 238, 4: 148, 1: 82, 0: 1, 6: 5}
# {1: 111, 5: 39, 3: 175, 4: 123, 2: 173, 0: 78, -1: 47, -2: 16, -3: 13, 6: 2}


In [54]:
print(resource.hf_tokenizers['bert'].tokenize('transform'))
# print(resource.hf_tokenizers['bert'].tokenize('transformerify'))
print(resource.hf_tokenizers['bert'].tokenize('transformer'))

print(resource.hf_tokenizers['roberta'].tokenize('transform'))
# print(resource.hf_tokenizers['roberta'].tokenize('transformerify'))
print(resource.hf_tokenizers['roberta'].tokenize('transformer'))

print(resource.hf_tokenizers['albert'].tokenize('transform'))
# print(resource.hf_tokenizers['albert'].tokenize('transformerify'))
print(resource.hf_tokenizers['albert'].tokenize('transformer'))

['transform']
['transform', '##er']
['transform']
['trans', 'former']
['▁transform']
['▁transform', 'er']


### Evaluating PLMs

In [25]:
dataset_name = "ag_news"#'sentiment-lexicon'
model_name = 'bert-base-uncased-ag-news'
df = evaluate(dataset_name = dataset_name, model_name=model_name, acc=True)

Clean Accuracy: 0.9511842105263157


KeyError: 'countM_round'

In [12]:
# evaluator = evaluate(dataset_name = 'sentiment-lexicon', model_name='bert-base-uncased-SST-2')
# {3: 256, 5: 25, 4: 131, 2: 212, 6: 3, 1: 1}
# 1 1
# (-0.1488560140132904, 0.0, -1.5045084953308105)
# 2 212
# (0.19941534588113427, 0.35, -0.6234171338379383)
# 3 256
# (0.08219806593609974, 0.33, -0.7116527155879885)
# 4 131
# (0.145205411426723, 0.37, -0.22009293557144702)
# 5 25
# (0.27900213867425916, 0.56, 0.421643493771553)
# 6 3
# (0.46120982865492505, 0.6666666666666666, 1.1079510400692623)

# evaluator = evaluate(dataset_name = 'sentiment-lexicon', model_name='roberta-base-SST-2', )
# {3: 254, 2: 322, 4: 57, 5: 4, 1: 5}
# 1 5
# (0.6891950249671936, 0.4, -0.1261606216430664)
# 2 322
# (0.6090222394093872, 0.55, -0.10270595621317626)
# 3 254
# (0.6933429863117635, 0.54, -0.1900323366187513)
# 4 57
# (0.6810957195988873, 0.5263157894736842, 0.25944323654760393)
# 5 4
# (0.7189953178167343, 0.25, -0.9385237395763397)

# evaluator = evaluate(dataset_name = 'sentiment-lexicon', model_name='albert-base-v2-SST-2',)
# {2: 194, 5: 39, 3: 233, 4: 148, 1: 4, 6: 5}
# 1 4
# (0.582113103941083, 1.0, 1.3808440417051315)
# 2 194
# (0.32992080080788583, 0.42, -0.1585123308375478)
# 3 233
# (0.17168018369469792, 0.47, -0.09391461444087326)
# 4 148
# (0.194426726475358, 0.51, 0.04141160725615919)
# 5 39
# (0.3544340170203493, 0.5384615384615384, 0.12848461684412682)
# 6 5
# (0.8808889627456665, 0.8, 0.6727136909961701)

evaluator.examples_by_wbr1[5]

[(tensor([-0.2413], device='cuda:0'), ['efEectiveness']),
 (tensor([-0.2331], device='cuda:0'), ['effeectiveness']),
 (tensor([0.8876], device='cuda:0'), ['effctiveness']),
 (tensor([0.7405], device='cuda:0'), ['un#estricted']),
 (tensor([-0.2110], device='cuda:0'), ['iBteresting']),
 (tensor([-0.1207], device='cuda:0'), ['s Tbstantive']),
 (tensor([-0.1109], device='cuda:0'), ['sbustantive']),
 (tensor([-0.1111], device='cuda:0'), ['sbstantive']),
 (tensor([-0.0884], device='cuda:0'), ['spuerior']),
 (tensor([-0.2264], device='cuda:0'), ['influFntial']),
 (tensor([-0.1723], device='cuda:0'), ['(ntimate']),
 (tensor([-0.2089], device='cuda:0'), ['stisfied']),
 (tensor([0.1920], device='cuda:0'), ['extrao%dinary']),
 (tensor([0.4382], device='cuda:0'), ['extraordniary']),
 (tensor([-0.2509], device='cuda:0'), ['eneergetic']),
 (tensor([-0.0900], device='cuda:0'), ['sIccessful']),
 (tensor([-0.1952], device='cuda:0'), ['suuccessful']),
 (tensor([-0.1451], device='cuda:0'), ['dviersified'

In [4]:
# evaluator = evaluate(dataset_name = 'sst2', model_name='bert-base-uncased-SST-2', print_fail_example=True)
# {2: 3121, 3: 3630, 4: 411, 1: 33, 5: 4, 6: 1}
# 1 33
# (0.6539115838258471, 0.8484848484848485, 1.6901389792505088)
# 2 3121
# (0.5441384263336658, 0.62, 0.9030922737717628)
# 3 3630
# (0.5394114886177703, 0.65, 0.9499093507230282)
# 4 411
# (0.47562134861946104, 0.61, 0.9493890537321568)
# 5 4
# (0.33443448320031166, 0.5, 0.7998999059200287)
# 6 1
# (0.9617360830307007, 1.0, 3.446354866027832)

evaluator = evaluate(dataset_name = 'sst2', model_name='roberta-base-SST-2')

evaluator = evaluate(dataset_name = 'sst2', model_name='albert-base-v2-SST-2')

# evaluator = evaluate(dataset_name = 'ag_news', model_name='bert-base-uncased-ag-news', print_fail_example=True)
# {2: 8989, 3: 20819, 4: 579, 1: 2, 5: 1}
# 1 2
# (0.9028977155685425, 1.0, 6.144953966140747)
# 2 8989
# (0.8604478907585144, 0.85, 6.138670318871736)
# 3 20819
# (0.8237378129176796, 0.87, 5.814665046930313)
# 4 579
# (0.8259232801571489, 0.86, 5.378732451349497)
# 5 1
# (0.9731678366661072, 1.0, 7.07002592086792)

# evaluator = evaluate(dataset_name = 'ag_news', model_name='roberta-base-ag-news', print_fail_example=True)
# {2: 19299, 3: 18245, 1: 85, 4: 349}
# 1 85
# (0.4312978788552915, 0.5411764705882353, 2.66027578492375)
# 2 19299
# (0.4754411730426364, 0.69, 3.332404831973836)
# 3 18245
# (0.42333426532219165, 0.59, 2.7375627148058266)
# 4 349
# (0.5509571685642004, 0.64, 3.216657065525651)

evaluate(dataset_name = 'ag_news', model_name='textattack/albert-base-v2-ag-news', print_fail_example=True)



Using custom data configuration default-965b290e85bcd05f
Reusing dataset csv (/home/xinzhel/.cache/huggingface/datasets/csv/default-965b290e85bcd05f/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0)
Loading cached shuffled indices for dataset at /home/xinzhel/.cache/huggingface/datasets/csv/default-965b290e85bcd05f/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0/cache-e45c4da6d4c31008.arrow
Loading cached shuffled indices for dataset at /home/xinzhel/.cache/huggingface/datasets/csv/default-965b290e85bcd05f/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0/cache-e5b1c51a7a469a59.arrow
Loading cached shuffled indices for dataset at /home/xinzhel/.cache/huggingface/datasets/csv/default-965b290e85bcd05f/0.0.0/2dc6629a9ff6b5697d82c25b73731dd440507a69cbce8b425db50b751e8fcfd0/cache-f9c8f199bb22a336.arrow


typoswap  has no change.
['...', 'too', 'gory', 'to', 'be', 'a', 'comedy', 'and', 'too', 'silly', 'to', 'be', 'an', 'effective', 'horror', 'film', '.']
['...', 'too', 'gory', 'to', 'be', 'a', 'comedy', 'and', 'too', 'silly', 'to', 'be', 'an', 'effective', 'horror', 'film', '.']
typoswap  has no change.
['Sorry', ',', 'Charlie']
['Sorry', ',', 'Charlie']
typoswap  has no change.
['But', 'he', 'somehow', 'pulls', 'it', 'off', '.']
['But', 'he', 'somehow', 'pulls', 'it', 'off', '.']
typoswap  has no change.
['A', 'very', 'funny', 'movie', '.']
['A', 'very', 'funny', 'movie', '.']
typoswap  has no change.
['A', 'taut', ',', 'intelligent', 'psychological', 'drama', '.']
['A', 'taut', ',', 'intelligent', 'psychological', 'drama', '.']
typoswap  has no change.
['Feels', 'less', 'like', 'it', "'s", 'about', 'teenagers', ',', 'than', 'it', 'was', 'written', 'by', 'teenagers', '.']
['Feels', 'less', 'like', 'it', "'s", 'about', 'teenagers', ',', 'than', 'it', 'was', 'written', 'by', 'teenagers',

Using custom data configuration default
Reusing dataset ag_news (/home/xinzhel/.cache/huggingface/datasets/ag_news/default/0.0.0/fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a)
Loading cached shuffled indices for dataset at /home/xinzhel/.cache/huggingface/datasets/ag_news/default/0.0.0/fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a/cache-7b90e6a0c0b97720.arrow
Loading cached shuffled indices for dataset at /home/xinzhel/.cache/huggingface/datasets/ag_news/default/0.0.0/fb5c5e74a110037311ef5e904583ce9f8b9fbc1354290f97b4929f01b3f48b1a/cache-8d3a6c97826ce3fc.arrow


{2: 8989, 3: 20819, 4: 579, 1: 2, 5: 1}
1 2
(0.9028977155685425, 1.0, 6.144953966140747)
2 8989
(0.8604478907585144, 0.85, 6.138670318871736)
3 20819
(0.8237378129176796, 0.87, 5.814665046930313)
4 579
(0.8259232801571489, 0.86, 5.378732451349497)
5 1
(0.9731678366661072, 1.0, 7.07002592086792)


Some weights of the model checkpoint at textattack/roberta-base-ag-news were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


{2: 19299, 3: 18245, 1: 85, 4: 349}
1 85
(0.4312978788552915, 0.5411764705882353, 2.66027578492375)
2 19299
(0.4754411730426364, 0.69, 3.332404831973836)
3 18245
(0.42333426532219165, 0.59, 2.7375627148058266)
4 349
(0.5509571685642004, 0.64, 3.216657065525651)


<__main__.Evaluator at 0x7faff9f99cd0>

In [20]:
# Minimum Functional Text
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb

with open(f"outputs/sentiment-lexicon_noisy.pickle", 'rb') as f:
    noisy_data = pickle.load(f)

assert len(noisy_data) == 200
pos_lexicon, neg_lexicon = noisy_data[:100], noisy_data[100:]
editor = Editor()
editor.add_lexicon('pos', pos_lexicon)
editor.add_lexicon('neg', neg_lexicon)

mft_pos_examples = editor.template('This is {pos}', labels = 1,)
mft_neg_examples = editor.template('This is {neg}', labels = 0, )
mft_data = mft_pos_examples.data
mft_data.extend(mft_neg_examples.data)
mft_labels = mft_pos_examples.labels
mft_labels.extend(mft_neg_examples.labels)




['eff', 'Ã©', 'ct', 'iveness']

In [6]:
import resource
tokenizer = resource.hf_tokenizers['albert']
plm = resource.hf_models['bert']
data = resource.datasets['ag_news']
ag_news = ["""Fears efféctiveness."""]
print(tokenizer.tokenize('taaasty'))
print(tokenizer.tokenize('taaasty'))

['▁', 't', 'aaa', 'sty']
['▁', 't', 'aaa', 'sty']


### Comparing the effectiveness of different noise

In [None]:
from transformers import AutoTokenizer
wordpiece_no_accent = AutoTokenizer.from_pretrained('bert-base-uncased', strip_accents=False) 
wordpiece_no_accent_res = textflint_transform(sample, wordpiece_no_accent, n_trans = 100,)


In [None]:
wordpiece_no_accent_res.keys()

In [None]:
print(wordpiece_no_accent_res['orig_tokens'])
print(wordpiece_no_accent_res['accent_tokens'])

In [None]:
# Word breaking of different noises
wordpiece_bert = resource.hf_tokenizers['bert-base-uncased']
bpe_roberta = resource.hf_tokenizers['roberta-base']
pos_lexicon50, neg_lexicon50 = get_sentiment_lexicon(n=50)
concat_text = ' '.join(neg_lexicon50 + pos_lexicon50)

sample = UTSample({'x': concat_text})
wordpiece_res = textflint_transform(sample, wordpiece_bert, n_trans = 100,)
bpe_res = textflint_transform(sample, bpe_roberta, n_trans = 100, )
# only length
wordpiece_res = {key : value for key, value in wordpiece_res.items() if 'length' in key }
bpe_res = {key : value for key, value in bpe_res.items() if 'length' in key }
print(wordpiece_res)
print(bpe_res)
# wordpiece_diff = {'orig_length': 0, 'typos_length': 0, 'keyboard_length': 0, 'accent_length': 0}
# bpe_diff = {'orig_length': 0, 'typos_length': 0, 'keyboard_length': 0, 'accent_length': 0}
# for k, v in wordpiece_res:
#     wordpiece_diff[k] += wordpiece_res[k]
#     bpe_diff[k] += bpe_res[k]

In [None]:
bpe_roberta.tokenize('tmorw')

In [None]:
pred_orig, accuracy_orig = predict_batch(mft_data, labels)
pred_keyboard, accuracy_keyboard = predict_batch(keyboard_examples, labels)
pred_typos, accuracy_typos = predict_batch(typos_examples, labels)
pred_accent, accuracy_accent = predict_batch(accent_examples, labels)
print(accuracy_orig, accuracy_keyboard, accuracy_typos, accuracy_accent)

### SST

In [None]:
import matplotlib.pyplot as plt

_, axes = plt.subplots(nrows=2, ncols=2)
titles = [
    # we select different types of language models according to pretraining strategies (tasks)
    # autoregressive (GPT2), auto-encoding (bert, xlnet, ), enc-dec (BART)
    #  xlnet, bert, roberta
    'textattack/bert-base-uncased-SST-2', 
    'textattack/distilbert-base-uncased-SST-2', 
    'textattack/albert-base-v2-SST-2',
    'textattack/roberta-base-SST-2']
for i, ax1 in enumerate(axes.flat):
    wbr_density = globals()['wbr_density'+str(i+1)]
    accuracy_dict = globals()['accuracy_dict'+str(i)]
    ax1.bar(wbr_density.keys(), wbr_density.values(), color='g', alpha=0.4, label='WBR')
    ax1.legend(loc='upper left',  frameon=True)
    ax2 = ax1.twinx()
    ax2.bar(accuracy_dict.keys(), accuracy_dict.values(), color='r', alpha=0.5, label='Accuracy')
    ax2.legend(loc='upper right', frameon=True)
    ax1.set_title(titles[i])
plt.show()
    

In [None]:
# topic words 不容易定位
# TODO: Evaluation
# 1. using datasets with different levels of context: SST2; Jigsaw, Yelp, AGnews context 更大 
# TBD
# 2. test with checklist DIR or confident on data with more context (e.g., more words reflecting class information)
# Doing

# TODO: not focusing on attack methods ; I try to focus on analysis
# 2. compare different methods ; noise types combine gradient-based attack methods
# Not important for this paper, which analyzes whether transformers can understand corrupted words; 
# BTW, the WIR words located are same
# Mover GBWA in another paper

### Analyzing the results

In [26]:
import pickle
with open("outputs/ag_news_noisy.pickle", 'rb') as file:
    ag_news_text = pickle.load(file)

with open("outputs/sst2_noisy.pickle", 'rb') as file:
    sst2_text = pickle.load(file)

In [37]:
with open("outputs/sentiment-lexicon_noisy.pickle", 'rb') as file:
    lexicon_text = pickle.load(file)

In [59]:
# visualize_text_diff(' '.join(ag_news_text[0][1]['x']), ' '.join(ag_news_text[0][1]['x_accent']), color_method='html')
model_bundle = resource.hf_model_bundles['roberta-base-SST-2']
sst_evaluator = Evaluator(model_bundle, None)
sst_evaluator.add_to_groups(lexicon_text[1][0], lexicon_text[1][1], print_wcs=True)
print(lexicon_text[1][1])


keyboard
['effect', 'iveness']
['ef', 'E', 'ect', 'iveness']
typoswap
['effect', 'iveness']
['effect', 'v', 'ien', 'ess']
accent
['effect', 'iveness']
['eff', 'Ã©', 'ct', 'iveness']
addvowel
['effect', 'iveness']
['eff', 'e', 'ect', 'iveness']
deletevowel
['effect', 'iveness']
['eff', 'ct', 'iveness']
keyboard 3.0 2.0
typoswap 3.0 2.0
accent 3.0 2.0
addvowel 3.0 2.0
deletevowel 2.0 1.0
{'x': ['effectiveness'], 'x_keyboard': ['efEectiveness'], 'x_typoswap': ['effectvieness'], 'x_accent': ['efféctiveness'], 'x_addvowel': ['effeectiveness'], 'x_deletevowel': ['effctiveness'], 'y': 1, 'wbr1_keyboard': 3.0, 'wbr2_keyboard': 2.0, 'wbr1_typoswap': 3.0, 'wbr2_typoswap': 2.0, 'wbr1_accent': 3.0, 'wbr2_accent': 2.0, 'wbr1_addvowel': 3.0, 'wbr2_addvowel': 2.0, 'wbr1_deletevowel': 2.0, 'wbr2_deletevowel': 1.0}


In [32]:
sst2_text[0][1].keys()

dict_keys(['x', 'x_keyboard', 'x_typoswap', 'x_accent', 'x_addvowel', 'x_deletevowel', 'y'])

In [36]:
visualize_text_diff(' '.join(sst2_text[0][1]['x']), ' '.join(sst2_text[0][1]['x_keyboard']), color_method='html')

('The story <font color = blue>may</font> not be new , <font color = blue>but</font> Australian <font color = blue>director</font> John Polson , making his American feature <font color = blue>debut</font> , <font color = blue>jazzes</font> it <font color = blue>up</font> <font color = blue>adroitly</font> .',
 'The story <font color = purple>mah</font> not be new , <font color = purple>Nut</font> Australian <font color = purple>dKrector</font> John Polson , making his American feature <font color = purple>devut</font> , <font color = purple>jazxes</font> it <font color = purple>kp</font> <font color = purple>Adroitly</font> .')

In [None]:
def get_result(example, y):
    result = dict()
    device = next(model.parameters()).device
    label = torch.LongTensor([y]).unsqueeze(0).to(device)
    for k, v in example.items():
        if k.startswith('x'):
            X = tokenizer.encode_plus(v, return_tensors='pt')
            X = {k: v.to(device) for k, v in X.items()}
            model_output = model(**X, labels=label)
            result['logits_'+k[2:]] = model_output['logits']
    return result

get_result(example, y)

In [None]:
from checklist.test_types import MFT, INV, DIR
data = [[], []]
test = INV(data=data)
test.run(wrapped_pp)
test.summary()