In [1]:
!pip install transformers >> /dev/null

# Training classifier

This script can be used to train the classifier on original and adversarial samples.

In [2]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
import time
import importlib
from copy import copy

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading and transforming data into logits differences

The first step is transforming our dataframe into logits differences for each original and adversarial sentence. For this, it is required to execute the model for each sentence with substitutions as explained in the paper.

In [4]:
SAMPLES_PATH = '/content/drive/MyDrive/AdversarialXAI/Adversarial Samples'

# Print available setups for testing
for i in os.listdir(SAMPLES_PATH):
    if not i.startswith('.'): # Don't print system files
        print(i)

ag-news_100_styleadv_distilbert.csv
ag-news_1000_styleadv_distilbert.csv
imdb_test_100_styleadv_distilbert.csv
imdb_train_200_styleadv_distilbert.csv
imdb_train_1000_styleadv_distilbert.csv
Older attacks
sst2_test_100_styleadv_distilbert.csv
sst2_train_2000_styleadv_distilbert.csv
sst2_val_100_styleadv_distilbert.csv
sst2_val_styleadv_distilbert.csv


In [5]:
# Select the configuration for training
test_config = 'sst2_train_2000_styleadv_distilbert.csv' # or 'agnews_pwws_distilbert.csv'

In [11]:
# Obtain model from test config
model_arch = test_config.replace(".csv", "").split('_')[-1]
# dataset = test_config.split('_')[0]
dataset = "SST-2"
print("Model architecture:", model_arch)
print("Dataset:", dataset)

Model architecture: distilbert
Dataset: SST-2


In [12]:
def load_textattack_local_model(model_arch, dataset):
    
    def load_module_from_file(file_path):
        """Uses ``importlib`` to dynamically open a file and load an object from
        it."""
        temp_module_name = f"temp_{time.time()}"

        spec = importlib.util.spec_from_file_location(temp_module_name, file_path)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        return module
    
    m = load_module_from_file(f'../{model_arch}_{dataset}_textattack.py')
    model = getattr(m, 'model')
    
    return model, None

In [13]:
def load_hugging_face_model(model_arch, dataset):
    # Import the model used for generating the adversarial samples.
    # Correctly, set up imports, model and tokenizer depending on the model you generated the samples on.
    
    if model_arch == 'distilbert':
        from transformers import DistilBertConfig as config, DistilBertTokenizer as tokenizer, AutoModelForSequenceClassification as auto_model
    elif model_arch == 'bert':
        from transformers import BertConfig as config, BertTokenizer as tokenizer, AutoModelForSequenceClassification as auto_model
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = tokenizer.from_pretrained(f"textattack/{model_arch}-base-uncased-{dataset}")
    model = auto_model.from_pretrained(f"textattack/{model_arch}-base-uncased-{dataset}").to(device)
    
    return model, tokenizer

In [14]:
# Models available in hugging-face are executed differently from LSTM and CNN. Choose automatically the configuration and load model + tokenizer.
textattack_local_models = ['lstm', 'cnn']

if model_arch in textattack_local_models:
    hugging_face_model = False
    model, tokenizer = load_textattack_local_model(model_arch, dataset)

else:
    hugging_face_model = True
    model, tokenizer = load_hugging_face_model(model_arch, dataset)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/486 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

# Loading data

Read into a dataframe your original and adversarial samples.

In [15]:
# Read the desired csv file previously generated
df = pd.read_csv(f'{SAMPLES_PATH}/{test_config}', index_col=0)
df.shape

(1145, 3)

In [16]:
# Select first entries. Only 3000 will be used but we leave room for false adversarial sentences that will be filtered out later and test set. We reduce size because computations are expensive.
# In real setup, the whole file was considered and fixed train and test sets were produced.
df = df.head(7000)

In [17]:
# Create batches of non-adversarial sentences
# For big models such as BERT, we must divide our input in smaller batches.
n = 256 # Size of each batch.
batches = [list(df.original_text.values)[i:i + n] for i in range(0, len(df.original_text.values), n)]

In [21]:
batches[0][0]

' elegant visual sense  '

In [22]:
# Generate predictions for all non-adversarial sentences in our dataset
outputs = []

if hugging_face_model is True: # Use tokenizer and hugging face pipeline
    for b in batches: 
        input = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            output = model(**input)
            outputs.append(output.logits.cpu().numpy())
            del input
            torch.cuda.empty_cache()

else: # Use local model by simply predicting without tokenization
    for b in batches: 
        output = model(b)
        outputs.append(output)

In [23]:
# Obtain non-adversarial predictions
outputs_flatten = [item for sublist in outputs for item in sublist]
predictions = [np.argmax(i) for i in outputs_flatten]

# Include prediction for these classes in our DataFrame
df['original_class_predicted'] = predictions

In [24]:
# Repeat process for adversarial sentences
n = 256
batches = [list(df.adversarial_text.values)[i:i + n] for i in range(0, len(df.adversarial_text.values), n)]

In [25]:
# Generate predictions for all non-adversarial sentences in our dataset
outputs = []

if hugging_face_model is True: # Use tokenizer and hugging face pipeline
    for b in batches: 
        input = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            output = model(**input)
            outputs.append(output.logits.cpu().numpy())
            del input
            torch.cuda.empty_cache()

else: # Use local model by simply predicting without tokenization
    for b in batches: 
        output = model(b)
        outputs.append(output)

In [26]:
# Obtain adversarial predictions
outputs_flatten = [item for sublist in outputs for item in sublist]
predictions = [np.argmax(i) for i in outputs_flatten]

# Include prediction for these classes in our DataFrame
df['adversarial_class_predicted'] = predictions

In [27]:
# Select only those sentences for which there was actually a change in the prediction
correct = df[(df['original_class_predicted'] != df['adversarial_class_predicted'])]

In [28]:
# Update dataframe and keep only adversarial samples
df = correct
len(df)

819

# Obtain logits
Once we have the predictions and actually adversarial sentences, we generate the logits differences

In [29]:
original_samples = df.original_text.values
adversarial_samples = df.adversarial_text.values

In [30]:
# Concatenate all original samples and their predictions
x = np.concatenate((original_samples, adversarial_samples))
y = np.concatenate((np.zeros(len(original_samples)), np.ones(len(adversarial_samples))))

In [31]:
def obtain_logits(samples, batch_size, model, tokenizer):
    """
    For given samples and model, compute prediction logits.
    Input data is splitted in batches.
    """
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]
    logits = []

    for i, b in enumerate(batches):
        print("{}/{}".format(i+1, len(batches)))
        if hugging_face_model:
            with torch.no_grad():
                input = tokenizer(list(b), return_tensors="pt", padding=True, truncation=True).to(device)
                logits.append(model(**input).logits.cpu().numpy())
        else:
            logits.append(model(b))

    return logits

In [32]:
# Compute logits for original sentences
batch_size = 350
original_logits = obtain_logits(original_samples, batch_size, model, tokenizer)
original_logits = np.concatenate(original_logits).reshape(-1, original_logits[0].shape[1])

1/3
2/3
3/3


In [33]:
torch.cuda.empty_cache()

In [34]:
# Compute logits for adversarial sentences
batch_size = 350
adversarial_logits = obtain_logits(adversarial_samples, batch_size, model, tokenizer)
adversarial_logits = np.concatenate(adversarial_logits).reshape(-1, adversarial_logits[0].shape[1])

1/3
2/3
3/3


In [35]:
torch.cuda.empty_cache()

In [36]:
# Concatenate all logits
logits = np.concatenate((original_logits, adversarial_logits))

In [37]:
# Shuffle data
import random
c = list(zip(x, y, logits))
random.shuffle(c)
x, y, logits = zip(*c)

## Computing logits difference

This is a key step implemented. The main idea is:
* For each sentence, replace each word by the `[UNK]` token and compute prediction logits
* Using these logits, we can easily compute the saliency of the word as presented in the report.
* Then, we sort words by descending saliency.
* Finally, compute logits difference for each replacement. This difference is computed as `Logit from class predicted for the whole sentence - Highest remaining logit`

More details on these derivations are found in the paper.

In [38]:
def compute_logits_difference(x, logits, y, model, tokenizer, idx, max_sentence_size=512):
    n_classes = len(logits[idx])
    predicted_class = np.argmax(logits[idx]) # Predicted class for whole sentence using previously computed logits
    class_logit = logits[idx][predicted_class] # Store this origianl prediction logit

    split_sentence = x[idx].split(' ')[:max_sentence_size] # The tokenizer will only consider 512 words so we avoid computing innecessary logits

    new_sentences = []

    # Here, we replace each word by [UNK] and generate all sentences to consider
    for i, word in enumerate(split_sentence):
        new_sentence = copy(split_sentence)
        new_sentence[i] = '[UNK]'
        new_sentence = ' '.join(new_sentence)
        new_sentences.append(new_sentence)

    # We cannot run more than 350 predictions simultaneously because of resources.
    # Split in batches if necessary.
    # Compute logits for all replacements.
    if len(new_sentences) > 200:
        logits = []
        batches = [new_sentences[i:i + 200] for i in range(0, len(new_sentences), 200)]
        for b in batches:
            if hugging_face_model: # Use hugging face predictions
                batch = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
                with torch.no_grad():
                    logits.append(model(**batch).logits)
            else:
                logits.append(model(b).to(device))
      
        if hugging_face_model:
            logits = torch.cat(logits)
        else:
            logits = np.concatenate( logits, axis=0 )
            logits = torch.Tensor(logits)
    
    else: # There's no need to split in batches
        if hugging_face_model:
            batch = tokenizer(new_sentences, return_tensors="pt", padding=True, truncation=True).to(device)
            with torch.no_grad():
                logits = model(**batch).logits
            del batch
        else:
            logits = model(new_sentences)
            logits = torch.Tensor(logits)


    # Compute saliency
    saliency = (class_logit - logits[:,predicted_class]).reshape(-1, 1)

    # Append to logits for sorting
    data = torch.cat((logits, saliency), 1)

    # Sort by descending saliency
    data = torch.stack(sorted(data, key=lambda a: a[n_classes], reverse=True))

    # Remove saliency
    data = data[:, :n_classes]

    # Fix order: originallly predicted class, other classes
    order = [predicted_class] + [i for i in range(n_classes) if i!=predicted_class]
    data = torch.index_select(data, 1, torch.LongTensor(order).to(device))

    # Compute difference between predicted class (always first column) and higher remaining logit
    data = data[:, :1].flatten() - torch.max(data[:, 1:], dim=1).values.flatten()

    del saliency
    torch.cuda.empty_cache()

    # Return only logits difference
    return data.reshape(-1, 1), torch.Tensor([y[idx]]).to(device)

In [39]:
def compute_logits_difference_padding(x, logits, y, model, tokenizer, idx, target_size=512):
    """
    This function provides a wrapper for compute_logits_difference and includes padding to computations.
    """
    data, y = compute_logits_difference(x, logits, y, model, tokenizer, idx, target_size)
    data_size = min(512, data.shape[0])
    target = torch.zeros(target_size, 1).to(device)
    target[:data_size, :] = data

    return target, y

In [40]:
from torch.utils.data import Dataset, DataLoader
import sys
from torch.autograd import Variable

class Text(Dataset):
    """
    Dataloader following torch details. Each time we get an item, we will compute
    the logits difference.
    """
    def __init__(self, x , logits, y, model, tokenizer, train=True, max_sentence_size=512):
        self.logits = logits
        self.y = y
        self.x = x
        self.model = model
        self.tokenizer = tokenizer
        self.max_sentence_size = max_sentence_size

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        data, y = compute_logits_difference_padding(self.x, self.logits, self.y, self.model, self.tokenizer, idx, self.max_sentence_size)
        data = data[:, :1].unsqueeze(0)

        return data, y, self.x[idx]

In [41]:
# Create the dataloader
train_ds = Text(x, logits, y, model, tokenizer)
train_loader = DataLoader(dataset=train_ds, batch_size=256, shuffle=True)

In [42]:
# Define the target DataFrame to structure our data.
# It has a column for each input dimension (up to 512) and 
# it also includes whether it is adversarial or not (y_label) and the sentence from which the logits where extracted

data_train = pd.DataFrame(columns=[i for i in range(512)]+['y_label', 'sentence'])

In [43]:
# Generate logits difference by running the loader.
for i, (data, y_label, sentence) in enumerate(train_loader):
    print("{}/{} - {}\n".format(i, len(train_loader), i/len(train_loader)))
    for v in range(len(data)):
        # Structure data and include in dataframe
        row = np.append(data[v].cpu().numpy().reshape(1,-1), np.array([y_label[v].item(), sentence[v]]))
        data_train = data_train.append(pd.DataFrame([row], columns=list(data_train)), ignore_index=True)

0/7 - 0.0

1/7 - 0.14285714285714285

2/7 - 0.2857142857142857

3/7 - 0.42857142857142855

4/7 - 0.5714285714285714

5/7 - 0.7142857142857143

6/7 - 0.8571428571428571



In [44]:
# Divide train and test set
data_val = data_train.tail(500)
data_train = data_train.head(len(data_train) - 500)

In [46]:
data_val

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,504,505,506,507,508,509,510,511,y_label,sentence
1138,-2.5543594,-2.4296393,-2.0611975,-2.0121424,-1.8929825,-1.7806756,-1.7481143,-1.234637,-0.74040353,-0.74040353,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,an absorbing and unsettling psychological dra...
1139,-1.740807,-1.5947955,-1.5947955,-1.2072495,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,pleasuring
1140,-1.681556,-1.4492722,-1.4232447,-1.1799037,-1.1677325,-1.0518537,-0.89583135,-0.9814467,-0.9577352,-0.79393816,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,will give you goosebumps as its uncanny tale ...
1141,1.6356189,2.0202174,2.0202174,2.140953,2.3801475,2.4930122,2.5397382,2.7310271,2.867543,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,too predictable and far too cliched
1142,-1.831772,-1.7577351,-1.6872373,-1.6196697,-1.5094011,-1.5014576,-1.3286413,-1.3365813,-1.3000245,-1.3098385,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,the filmmakers want nothing else than to show...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1633,-3.3794556,-1.7554896,-1.8633707,-1.6896597,-1.5068119,-1.1115537,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,the script's endless attack...
1634,-1.789659,-1.6812778,-1.5571246,-1.6128192,-1.3652455,-1.4056649,-1.3491616,-1.0515186,-1.037323,-0.5703132,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,the film's dramatic overload of women's despa...
1635,2.053001,2.053001,2.0191245,2.1678433,2.1922665,2.418484,2.3998134,2.4460135,2.4782102,2.4935744,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"the worst kind of mythologizing , the kind th..."
1636,-2.4142985,-2.199759,-1.9736496,-1.8492492,-1.6874405,-1.1178086,-0.81908005,-0.27169663,-0.11158829,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,as a non-stop warning to keep attention


In [47]:
y = data_train['y_label'].values
x = data_train.drop(columns=['y_label', 'sentence']).values

In [48]:
y_test = data_val['y_label'].values
x_test = data_val.drop(columns=['y_label', 'sentence']).values

# Model training and comparison

We train different models and compare their performance.

### Random forest

In [49]:
from sklearn.ensemble import RandomForestClassifier

# Create the model using best parameters found
model = RandomForestClassifier(n_estimators=1600,
                               min_samples_split=10,
                               min_samples_leaf=2,
                               max_features='auto',
                               max_depth=None, 
                               bootstrap = True)
# Fit on training data
model.fit(x, y)

RandomForestClassifier(min_samples_leaf=2, min_samples_split=10,
                       n_estimators=1600)

In [50]:
# Actual class predictions
rf_predictions = model.predict(x_test)

In [51]:
np.sum(rf_predictions==y_test)/len(y_test)

0.688

In [52]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, rf_predictions, digits=3))
print(confusion_matrix(y_test, rf_predictions))

              precision    recall  f1-score   support

         0.0      0.670     0.765     0.714       255
         1.0      0.713     0.608     0.656       245

    accuracy                          0.688       500
   macro avg      0.692     0.686     0.685       500
weighted avg      0.691     0.688     0.686       500

[[195  60]
 [ 96 149]]


### XGBoost

Best performing model. Hyperparamter tuning done with Dataiku.

In [53]:
import xgboost as xgb

In [54]:
xgb_classifier = xgb.XGBClassifier(
                    max_depth=3,
                    learning_rate=0.34281802,
                    gamma=0.6770816,
                    min_child_weight=2.5520658,
                    max_delta_step=0.71469694,
                    subsample=0.61460966,
                    colsample_bytree=0.73929816,
                    colsample_bylevel=0.87191725,
                    reg_alpha=0.9064181,
                    reg_lambda=0.5686102,
                    n_estimators=29,
                    silent=0,
                    nthread=4,
                    scale_pos_weight=1.0,
                    base_score=0.5,
                    missing=None,
                  )

In [55]:
xgb_classifier.fit(x, y)

XGBClassifier(colsample_bylevel=0.87191725, colsample_bytree=0.73929816,
              gamma=0.6770816, learning_rate=0.34281802,
              max_delta_step=0.71469694, min_child_weight=2.5520658,
              n_estimators=29, nthread=4, reg_alpha=0.9064181,
              reg_lambda=0.5686102, scale_pos_weight=1.0, silent=0,
              subsample=0.61460966)

In [56]:
xgb_predictions = xgb_classifier.predict(x_test)

In [57]:
print(classification_report(y_test, xgb_predictions, digits=3))
print(confusion_matrix(y_test, xgb_predictions))

              precision    recall  f1-score   support

         0.0      0.687     0.757     0.720       255
         1.0      0.717     0.641     0.677       245

    accuracy                          0.700       500
   macro avg      0.702     0.699     0.698       500
weighted avg      0.702     0.700     0.699       500

[[193  62]
 [ 88 157]]


In [58]:
import pickle
pickle.dump(xgb_classifier, open("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/sst2_styleadv_2000_classifier.pickle", "wb"))

In [59]:
xgb_classifier.save_model("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/sst2_styleadv_2000_classifier.json")

In [None]:
# temp = pickle.load(open("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/imdb_styleadv_100_classifier.pickle", 'rb'))

In [None]:
# print(classification_report(y_test, temp.predict(x_test), digits=3))

              precision    recall  f1-score   support

         0.0      1.000     1.000     1.000        52
         1.0      1.000     1.000     1.000        47

    accuracy                          1.000        99
   macro avg      1.000     1.000     1.000        99
weighted avg      1.000     1.000     1.000        99



### AdaBoost classifier

In [60]:
from sklearn.ensemble import AdaBoostClassifier

In [61]:
abc = AdaBoostClassifier()

In [62]:
abc.fit(x, y)

AdaBoostClassifier()

In [63]:
abc_predictions = abc.predict(x_test)

In [64]:
np.sum(abc_predictions==y_test)/len(y_test)

0.69

In [65]:
print(classification_report(y_test, abc_predictions, digits=3))
print(confusion_matrix(y_test, abc_predictions))

              precision    recall  f1-score   support

         0.0      0.687     0.722     0.704       255
         1.0      0.694     0.657     0.675       245

    accuracy                          0.690       500
   macro avg      0.690     0.689     0.689       500
weighted avg      0.690     0.690     0.690       500

[[184  71]
 [ 84 161]]


### LightGBM

In [66]:
import lightgbm as lgb

In [67]:
parameters = {
    'objective': 'binary',
    'application': 'binary',
    'metric': ['binary_logloss'],
    'num_leaves': 35,
    'learning_rate': 0.13,
    'verbose': 1
}

In [68]:
train_data = lgb.Dataset(x, label=y)
test_data = lgb.Dataset(x_test, label=y_test)

In [69]:
lgbm_classifier = lgb.train(parameters,
                       train_data,
                       valid_sets=test_data,
                       num_boost_round=300)

[1]	valid_0's binary_logloss: 0.671299
[2]	valid_0's binary_logloss: 0.655841
[3]	valid_0's binary_logloss: 0.644149
[4]	valid_0's binary_logloss: 0.632359
[5]	valid_0's binary_logloss: 0.623285
[6]	valid_0's binary_logloss: 0.618108
[7]	valid_0's binary_logloss: 0.61095
[8]	valid_0's binary_logloss: 0.608082
[9]	valid_0's binary_logloss: 0.603904
[10]	valid_0's binary_logloss: 0.601307
[11]	valid_0's binary_logloss: 0.601045
[12]	valid_0's binary_logloss: 0.602447
[13]	valid_0's binary_logloss: 0.603493
[14]	valid_0's binary_logloss: 0.607188
[15]	valid_0's binary_logloss: 0.607804
[16]	valid_0's binary_logloss: 0.607892
[17]	valid_0's binary_logloss: 0.607006
[18]	valid_0's binary_logloss: 0.608199
[19]	valid_0's binary_logloss: 0.609164
[20]	valid_0's binary_logloss: 0.606762
[21]	valid_0's binary_logloss: 0.608778
[22]	valid_0's binary_logloss: 0.607538
[23]	valid_0's binary_logloss: 0.606718
[24]	valid_0's binary_logloss: 0.609543
[25]	valid_0's binary_logloss: 0.610107
[26]	valid

In [70]:
y_hat = lgbm_classifier.predict(x_test)

In [71]:
y_hat.round()

array([1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0.,
       0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0.,
       1., 1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0.,
       0., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0.,
       1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1., 1., 0., 0.,
       0., 1., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 1., 1.,
       1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1.,
       1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0.,
       0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
       1., 0., 1., 1., 0.

In [72]:
np.sum(y_hat.round()==y_test)/len(y_test)

0.0

In [80]:
y_test = y_test.astype(np.float)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.


In [81]:
print(classification_report(y_test, y_hat.round(), digits=3))
print(confusion_matrix(y_test, y_hat.round()))

              precision    recall  f1-score   support

         0.0      0.683     0.710     0.696       255
         1.0      0.685     0.657     0.671       245

    accuracy                          0.684       500
   macro avg      0.684     0.683     0.683       500
weighted avg      0.684     0.684     0.684       500

[[181  74]
 [ 84 161]]


### SVM

In [82]:
from sklearn.svm import SVC
svm_clf = SVC(C=9.0622635,
          kernel='rbf',
          gamma='scale',
          coef0=0.0,
          tol=0.001,
          probability=True,
          max_iter=-1)

In [83]:
svm_clf.fit(x, y)

SVC(C=9.0622635, probability=True)

In [84]:
svm_pred = svm_clf.predict(x_test)

In [91]:
svm_pred = svm_pred.astype(np.float)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  """Entry point for launching an IPython kernel.


In [92]:
np.sum(svm_pred.round()==y_test)/len(y_test)

0.67

In [93]:
print(classification_report(y_test, svm_pred.round(), digits=3))
print(confusion_matrix(y_test, svm_pred.round()))

              precision    recall  f1-score   support

         0.0      0.662     0.722     0.690       255
         1.0      0.680     0.616     0.647       245

    accuracy                          0.670       500
   macro avg      0.671     0.669     0.669       500
weighted avg      0.671     0.670     0.669       500

[[184  71]
 [ 94 151]]


### Perceptron NN

In [94]:
from torch.utils.data import Dataset, DataLoader
import sys
from torch.autograd import Variable

class Text(Dataset):
    def __init__(self, x , y):
        self.y = y
        self.x = x

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        data = torch.tensor(self.x[idx].astype('float32')).to(device)
        y = torch.tensor(self.y[idx].astype('float32')).unsqueeze(0).to(device)
        return data, y

In [95]:
train_ds = Text(x, y)
train_loader = DataLoader(dataset=train_ds, batch_size=128, shuffle=True)

In [96]:
import torch.nn as nn
import torch.nn.functional as F

class BasicModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BasicModel, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim  = output_dim

        self.fc1 = torch.nn.Linear(self.input_dim, self.hidden_dim)
        self.fc2 = torch.nn.Linear(self.hidden_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.sigmoid(x)

In [97]:
basic_classifier = BasicModel(input_dim=512*1, hidden_dim=50, output_dim=1).to(device)
c = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(basic_classifier.parameters(), lr=0.001)

train_loss_history = []
val_acc_history = []

In [104]:
iter_per_epoch = len(train_loader)
num_epochs = 30
initial_epoch = 1
log_nth = 2
storing_frequency = 15
checkpoints_path = "/content/drive/MyDrive/ExplainableAI/Model/Saliency/checkpoints"

for epoch in range(initial_epoch, initial_epoch+num_epochs):
    basic_classifier.train()
    epoch_losses = []
    for i, (data, y_label) in enumerate(train_loader):
      optimizer.zero_grad()
      out = basic_classifier(data)
      loss = c(out, y_label)
      epoch_losses.append(loss.item())
      loss.backward()
      optimizer.step()

      if (i+1) % log_nth == 0:        
          print ('Epoch [{}/{}], Step [{}/{}], Loss for last {} batches: {:.4f}' 
                  .format(epoch, num_epochs, i+1, iter_per_epoch, log_nth, np.mean(np.array(epoch_losses[-log_nth:]))))
          #print_time()
      
      if (i+1) % storing_frequency == 0:        
          print('Storing with loss for last {} batches = {}'.format(storing_frequency, np.mean(np.array(epoch_losses[-storing_frequency:]))))
          #print_time()
          #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}_{}.checkpoint".format(epoch, i+1))
  
    # Store after whole epoch
    print ('Epoch [{}/{}] finished with loss = {:.4f}'.format(epoch, num_epochs, np.mean(np.array(epoch_losses))))
    #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}.checkpoint".format(epoch))

Epoch [1/30], Step [2/9], Loss for last 2 batches: 0.6869
Epoch [1/30], Step [4/9], Loss for last 2 batches: 0.6543
Epoch [1/30], Step [6/9], Loss for last 2 batches: 0.6824
Epoch [1/30], Step [8/9], Loss for last 2 batches: 0.6575
Epoch [1/30] finished with loss = 0.6684
Epoch [2/30], Step [2/9], Loss for last 2 batches: 0.6742
Epoch [2/30], Step [4/9], Loss for last 2 batches: 0.6529
Epoch [2/30], Step [6/9], Loss for last 2 batches: 0.6850
Epoch [2/30], Step [8/9], Loss for last 2 batches: 0.6622
Epoch [2/30] finished with loss = 0.6669
Epoch [3/30], Step [2/9], Loss for last 2 batches: 0.6553
Epoch [3/30], Step [4/9], Loss for last 2 batches: 0.6633
Epoch [3/30], Step [6/9], Loss for last 2 batches: 0.6676
Epoch [3/30], Step [8/9], Loss for last 2 batches: 0.6748
Epoch [3/30] finished with loss = 0.6658
Epoch [4/30], Step [2/9], Loss for last 2 batches: 0.6701
Epoch [4/30], Step [4/9], Loss for last 2 batches: 0.6656
Epoch [4/30], Step [6/9], Loss for last 2 batches: 0.6729
Epoch [

In [105]:
nn_pred = basic_classifier(torch.tensor(x_test.astype('float32')).to(device))

In [106]:
nn_pred = nn_pred.flatten().detach().cpu().numpy().round()

In [107]:
np.sum(nn_pred==y_test)/len(y_test)

0.658

In [108]:
print(classification_report(y_test, nn_pred, digits=3))
print(confusion_matrix(y_test, nn_pred))

              precision    recall  f1-score   support

         0.0      0.611     0.906     0.730       255
         1.0      0.803     0.400     0.534       245

    accuracy                          0.658       500
   macro avg      0.707     0.653     0.632       500
weighted avg      0.705     0.658     0.634       500

[[231  24]
 [147  98]]
