In [9]:
!pip install transformers >> /dev/null

# Training classifier

This script can be used to train the classifier on original and adversarial samples.

In [1]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import os
import time
import importlib
from copy import copy

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Loading and transforming data into logits differences

The first step is transforming our dataframe into logits differences for each original and adversarial sentence. For this, it is required to execute the model for each sentence with substitutions as explained in the paper.

In [11]:
SAMPLES_PATH = '/content/drive/MyDrive/AdversarialXAI/Adversarial Samples'

# Print available setups for testing
for i in os.listdir(SAMPLES_PATH):
    if not i.startswith('.'): # Don't print system files
        print(i)

ag-news_100_styleadv_distilbert.csv
ag-news_1000_styleadv_distilbert.csv
imdb_test_100_styleadv_distilbert.csv
imdb_train_200_styleadv_distilbert.csv
imdb_train_1000_styleadv_distilbert.csv


In [4]:
# Select the configuration for training
test_config = 'imdb_train_1000_styleadv_distilbert.csv' # or 'agnews_pwws_distilbert.csv'

In [5]:
# Obtain model from test config
model_arch = test_config.replace(".csv", "").split('_')[-1]
dataset = test_config.split('_')[0]
print("Model architecture:", model_arch)
print("Dataset:", dataset)

Model architecture: distilbert
Dataset: imdb


In [6]:
def load_textattack_local_model(model_arch, dataset):
    
    def load_module_from_file(file_path):
        """Uses ``importlib`` to dynamically open a file and load an object from
        it."""
        temp_module_name = f"temp_{time.time()}"

        spec = importlib.util.spec_from_file_location(temp_module_name, file_path)
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)
        return module
    
    m = load_module_from_file(f'../{model_arch}_{dataset}_textattack.py')
    model = getattr(m, 'model')
    
    return model, None

In [7]:
def load_hugging_face_model(model_arch, dataset):
    # Import the model used for generating the adversarial samples.
    # Correctly, set up imports, model and tokenizer depending on the model you generated the samples on.
    
    if model_arch == 'distilbert':
        from transformers import DistilBertConfig as config, DistilBertTokenizer as tokenizer, AutoModelForSequenceClassification as auto_model
    elif model_arch == 'bert':
        from transformers import BertConfig as config, BertTokenizer as tokenizer, AutoModelForSequenceClassification as auto_model
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    tokenizer = tokenizer.from_pretrained(f"textattack/{model_arch}-base-uncased-{dataset}")
    model = auto_model.from_pretrained(f"textattack/{model_arch}-base-uncased-{dataset}").to(device)
    
    return model, tokenizer

In [10]:
# Models available in hugging-face are executed differently from LSTM and CNN. Choose automatically the configuration and load model + tokenizer.
textattack_local_models = ['lstm', 'cnn']

if model_arch in textattack_local_models:
    hugging_face_model = False
    model, tokenizer = load_textattack_local_model(model_arch, dataset)

else:
    hugging_face_model = True
    model, tokenizer = load_hugging_face_model(model_arch, dataset)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/485 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

# Loading data

Read into a dataframe your original and adversarial samples.

In [12]:
# Read the desired csv file previously generated
df = pd.read_csv(f'{SAMPLES_PATH}/{test_config}', index_col=0)
df.shape

(958, 3)

In [13]:
# Select first entries. Only 3000 will be used but we leave room for false adversarial sentences that will be filtered out later and test set. We reduce size because computations are expensive.
# In real setup, the whole file was considered and fixed train and test sets were produced.
df = df.head(7000)

In [14]:
# Create batches of non-adversarial sentences
# For big models such as BERT, we must divide our input in smaller batches.
n = 256 # Size of each batch.
batches = [list(df.original_text.values)[i:i + n] for i in range(0, len(df.original_text.values), n)]

In [15]:
batches[0][0]

' If you love cult 70\'s Sci-fi the way I do, or if you like movies such as "Repo Man" or "Buckaroo Bonzai" than you\'re going to love this one. It\'s a stream of consciousness 70\'s Sci-fi spectacular, including a 22nd century junkyard and the Earth a million years from now. This movie is pure 70\'s. Put on Steve Miller\'s "Fly Like An Eagle" or Pink Floyd\'s "Dark Side Of The Moon" and you\'re ready to go! '

In [16]:
# Generate predictions for all non-adversarial sentences in our dataset
outputs = []

if hugging_face_model is True: # Use tokenizer and hugging face pipeline
    for b in batches: 
        input = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            output = model(**input)
            outputs.append(output.logits.cpu().numpy())
            del input
            torch.cuda.empty_cache()

else: # Use local model by simply predicting without tokenization
    for b in batches: 
        output = model(b)
        outputs.append(output)

In [17]:
# Obtain non-adversarial predictions
outputs_flatten = [item for sublist in outputs for item in sublist]
predictions = [np.argmax(i) for i in outputs_flatten]

# Include prediction for these classes in our DataFrame
df['original_class_predicted'] = predictions

In [18]:
# Repeat process for adversarial sentences
n = 256
batches = [list(df.adversarial_text.values)[i:i + n] for i in range(0, len(df.adversarial_text.values), n)]

In [19]:
# Generate predictions for all non-adversarial sentences in our dataset
outputs = []

if hugging_face_model is True: # Use tokenizer and hugging face pipeline
    for b in batches: 
        input = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
        with torch.no_grad():
            output = model(**input)
            outputs.append(output.logits.cpu().numpy())
            del input
            torch.cuda.empty_cache()

else: # Use local model by simply predicting without tokenization
    for b in batches: 
        output = model(b)
        outputs.append(output)

In [20]:
# Obtain adversarial predictions
outputs_flatten = [item for sublist in outputs for item in sublist]
predictions = [np.argmax(i) for i in outputs_flatten]

# Include prediction for these classes in our DataFrame
df['adversarial_class_predicted'] = predictions

In [21]:
# Select only those sentences for which there was actually a change in the prediction
correct = df[(df['original_class_predicted'] != df['adversarial_class_predicted'])]

In [23]:
# Update dataframe and keep only adversarial samples
df = correct
len(df)

602

# Obtain logits
Once we have the predictions and actually adversarial sentences, we generate the logits differences

In [24]:
original_samples = df.original_text.values
adversarial_samples = df.adversarial_text.values

In [25]:
# Concatenate all original samples and their predictions
x = np.concatenate((original_samples, adversarial_samples))
y = np.concatenate((np.zeros(len(original_samples)), np.ones(len(adversarial_samples))))

In [26]:
def obtain_logits(samples, batch_size, model, tokenizer):
    """
    For given samples and model, compute prediction logits.
    Input data is splitted in batches.
    """
    batches = [samples[i:i + batch_size] for i in range(0, len(samples), batch_size)]
    logits = []

    for i, b in enumerate(batches):
        print("{}/{}".format(i+1, len(batches)))
        if hugging_face_model:
            with torch.no_grad():
                input = tokenizer(list(b), return_tensors="pt", padding=True, truncation=True).to(device)
                logits.append(model(**input).logits.cpu().numpy())
        else:
            logits.append(model(b))

    return logits

In [27]:
# Compute logits for original sentences
batch_size = 350
original_logits = obtain_logits(original_samples, batch_size, model, tokenizer)
original_logits = np.concatenate(original_logits).reshape(-1, original_logits[0].shape[1])

1/2
2/2


In [28]:
torch.cuda.empty_cache()

In [29]:
# Compute logits for adversarial sentences
batch_size = 350
adversarial_logits = obtain_logits(adversarial_samples, batch_size, model, tokenizer)
adversarial_logits = np.concatenate(adversarial_logits).reshape(-1, adversarial_logits[0].shape[1])

1/2
2/2


In [30]:
torch.cuda.empty_cache()

In [31]:
# Concatenate all logits
logits = np.concatenate((original_logits, adversarial_logits))

In [32]:
# Shuffle data
import random
c = list(zip(x, y, logits))
random.shuffle(c)
x, y, logits = zip(*c)

## Computing logits difference

This is a key step implemented. The main idea is:
* For each sentence, replace each word by the `[UNK]` token and compute prediction logits
* Using these logits, we can easily compute the saliency of the word as presented in the report.
* Then, we sort words by descending saliency.
* Finally, compute logits difference for each replacement. This difference is computed as `Logit from class predicted for the whole sentence - Highest remaining logit`

More details on these derivations are found in the paper.

In [33]:
def compute_logits_difference(x, logits, y, model, tokenizer, idx, max_sentence_size=512):
    n_classes = len(logits[idx])
    predicted_class = np.argmax(logits[idx]) # Predicted class for whole sentence using previously computed logits
    class_logit = logits[idx][predicted_class] # Store this origianl prediction logit

    split_sentence = x[idx].split(' ')[:max_sentence_size] # The tokenizer will only consider 512 words so we avoid computing innecessary logits

    new_sentences = []

    # Here, we replace each word by [UNK] and generate all sentences to consider
    for i, word in enumerate(split_sentence):
        new_sentence = copy(split_sentence)
        new_sentence[i] = '[UNK]'
        new_sentence = ' '.join(new_sentence)
        new_sentences.append(new_sentence)

    # We cannot run more than 350 predictions simultaneously because of resources.
    # Split in batches if necessary.
    # Compute logits for all replacements.
    if len(new_sentences) > 200:
        logits = []
        batches = [new_sentences[i:i + 200] for i in range(0, len(new_sentences), 200)]
        for b in batches:
            if hugging_face_model: # Use hugging face predictions
                batch = tokenizer(b, return_tensors="pt", padding=True, truncation=True).to(device)
                with torch.no_grad():
                    logits.append(model(**batch).logits)
            else:
                logits.append(model(b).to(device))
      
        if hugging_face_model:
            logits = torch.cat(logits)
        else:
            logits = np.concatenate( logits, axis=0 )
            logits = torch.Tensor(logits)
    
    else: # There's no need to split in batches
        if hugging_face_model:
            batch = tokenizer(new_sentences, return_tensors="pt", padding=True, truncation=True).to(device)
            with torch.no_grad():
                logits = model(**batch).logits
            del batch
        else:
            logits = model(new_sentences)
            logits = torch.Tensor(logits)


    # Compute saliency
    saliency = (class_logit - logits[:,predicted_class]).reshape(-1, 1)

    # Append to logits for sorting
    data = torch.cat((logits, saliency), 1)

    # Sort by descending saliency
    data = torch.stack(sorted(data, key=lambda a: a[n_classes], reverse=True))

    # Remove saliency
    data = data[:, :n_classes]

    # Fix order: originallly predicted class, other classes
    order = [predicted_class] + [i for i in range(n_classes) if i!=predicted_class]
    data = torch.index_select(data, 1, torch.LongTensor(order).to(device))

    # Compute difference between predicted class (always first column) and higher remaining logit
    data = data[:, :1].flatten() - torch.max(data[:, 1:], dim=1).values.flatten()

    del saliency
    torch.cuda.empty_cache()

    # Return only logits difference
    return data.reshape(-1, 1), torch.Tensor([y[idx]]).to(device)

In [34]:
def compute_logits_difference_padding(x, logits, y, model, tokenizer, idx, target_size=512):
    """
    This function provides a wrapper for compute_logits_difference and includes padding to computations.
    """
    data, y = compute_logits_difference(x, logits, y, model, tokenizer, idx, target_size)
    data_size = min(512, data.shape[0])
    target = torch.zeros(target_size, 1).to(device)
    target[:data_size, :] = data

    return target, y

In [35]:
from torch.utils.data import Dataset, DataLoader
import sys
from torch.autograd import Variable

class Text(Dataset):
    """
    Dataloader following torch details. Each time we get an item, we will compute
    the logits difference.
    """
    def __init__(self, x , logits, y, model, tokenizer, train=True, max_sentence_size=512):
        self.logits = logits
        self.y = y
        self.x = x
        self.model = model
        self.tokenizer = tokenizer
        self.max_sentence_size = max_sentence_size

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        data, y = compute_logits_difference_padding(self.x, self.logits, self.y, self.model, self.tokenizer, idx, self.max_sentence_size)
        data = data[:, :1].unsqueeze(0)

        return data, y, self.x[idx]

In [36]:
# Create the dataloader
train_ds = Text(x, logits, y, model, tokenizer)
train_loader = DataLoader(dataset=train_ds, batch_size=256, shuffle=True)

In [37]:
# Define the target DataFrame to structure our data.
# It has a column for each input dimension (up to 512) and 
# it also includes whether it is adversarial or not (y_label) and the sentence from which the logits where extracted

data_train = pd.DataFrame(columns=[i for i in range(512)]+['y_label', 'sentence'])

In [40]:
# Generate logits difference by running the loader.
for i, (data, y_label, sentence) in enumerate(train_loader):
    print("{}/{} - {}\n".format(i, len(train_loader), i/len(train_loader)))
    for v in range(len(data)):
        # Structure data and include in dataframe
        row = np.append(data[v].cpu().numpy().reshape(1,-1), np.array([y_label[v].item(), sentence[v]]))
        data_train = data_train.append(pd.DataFrame([row], columns=list(data_train)), ignore_index=True)

0/5 - 0.0

1/5 - 0.2

2/5 - 0.4

3/5 - 0.6

4/5 - 0.8



In [None]:
# Divide train and test set
data_test = data_train.tail(len(data_train)-501)
data_train = data_train.head(500)

In [46]:
data_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,504,505,506,507,508,509,510,511,y_label,sentence
1,4.590066,4.693219,4.838607,4.9683237,5.0319624,5.151517,5.162169,5.1832643,5.2836537,5.347512,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"I like silent films, but this was a little to..."
2,0.3962196,0.4634242,0.67271006,0.7595707,0.91932046,0.9880669,0.99146926,1.1668792,1.136728,1.2195418,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,"it's like Owen Springer, a young doctor, who'..."
3,6.8039284,6.8073754,6.815252,6.837593,6.8288383,6.8142085,6.8291492,6.839721,6.829086,6.8297806,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,I have to confess right off that I have never...
4,-1.2836598,-0.4001289,0.5285263,0.7745479,0.9919106,1.5789523,1.7448826,1.9021629,2.0690417,2.0943875,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,how much of a movie like this is really worth?
5,4.4483576,4.500548,4.587584,4.5866117,4.6730986,4.6625166,4.66636,4.734633,4.7884517,4.7935615,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,I love cartoons. They can show things that fi...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5.7667904,6.564155,6.650666,6.7062907,6.687439,6.728218,6.734296,6.7805886,6.697718,6.7659907,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"Until now, the worst movie I had ever seen wa..."
96,0.27305487,0.3186863,0.44006592,0.47765747,0.53720194,0.53256446,0.68625796,0.5999099,0.7505326,0.7529595,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,"as a teenager, I was in high school. It was f..."
97,-0.14608648,0.07633425,0.104858935,0.07504178,0.18604732,0.18962945,0.1912462,0.2242514,0.20837694,0.29939348,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,professor Andoheb (George Zucco) is likely to...
98,6.537165,6.6296167,6.6432133,6.644365,6.667001,6.6653576,6.6555653,6.6697197,6.664895,6.673421,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A family looking for some old roadside attrac...


In [47]:
y = data_train['y_label'].values
x = data_train.drop(columns=['y_label', 'sentence']).values

In [56]:
y_test = data_test['y_label'].values
x_test = data_test.drop(columns=['y_label', 'sentence']).values

# Model training and comparison

We train different models and compare their performance.

### Random forest

In [57]:
from sklearn.ensemble import RandomForestClassifier

# Create the model using best parameters found
model = RandomForestClassifier(n_estimators=1600,
                               min_samples_split=10,
                               min_samples_leaf=2,
                               max_features='auto',
                               max_depth=None, 
                               bootstrap = True)
# Fit on training data
model.fit(x, y)

RandomForestClassifier(min_samples_leaf=2, min_samples_split=10,
                       n_estimators=1600)

In [58]:
# Actual class predictions
rf_predictions = model.predict(x_test)

In [59]:
np.sum(rf_predictions==y_test)/len(y_test)

0.98989898989899

In [60]:
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test, rf_predictions, digits=3))
print(confusion_matrix(y_test, rf_predictions))

              precision    recall  f1-score   support

         0.0      1.000     0.981     0.990        52
         1.0      0.979     1.000     0.989        47

    accuracy                          0.990        99
   macro avg      0.990     0.990     0.990        99
weighted avg      0.990     0.990     0.990        99

[[51  1]
 [ 0 47]]


### XGBoost

Best performing model. Hyperparamter tuning done with Dataiku.

In [61]:
import xgboost as xgb

In [62]:
xgb_classifier = xgb.XGBClassifier(
                    max_depth=3,
                    learning_rate=0.34281802,
                    gamma=0.6770816,
                    min_child_weight=2.5520658,
                    max_delta_step=0.71469694,
                    subsample=0.61460966,
                    colsample_bytree=0.73929816,
                    colsample_bylevel=0.87191725,
                    reg_alpha=0.9064181,
                    reg_lambda=0.5686102,
                    n_estimators=29,
                    silent=0,
                    nthread=4,
                    scale_pos_weight=1.0,
                    base_score=0.5,
                    missing=None,
                  )

In [63]:
xgb_classifier.fit(x, y)

XGBClassifier(colsample_bylevel=0.87191725, colsample_bytree=0.73929816,
              gamma=0.6770816, learning_rate=0.34281802,
              max_delta_step=0.71469694, min_child_weight=2.5520658,
              n_estimators=29, nthread=4, reg_alpha=0.9064181,
              reg_lambda=0.5686102, scale_pos_weight=1.0, silent=0,
              subsample=0.61460966)

In [64]:
xgb_predictions = xgb_classifier.predict(x_test)

In [66]:
print(classification_report(y_test, xgb_predictions, digits=3))
print(confusion_matrix(y_test, xgb_predictions))

              precision    recall  f1-score   support

         0.0      1.000     1.000     1.000        52
         1.0      1.000     1.000     1.000        47

    accuracy                          1.000        99
   macro avg      1.000     1.000     1.000        99
weighted avg      1.000     1.000     1.000        99

[[52  0]
 [ 0 47]]


In [67]:
import pickle
pickle.dump(xgb_classifier, open("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/imdb_styleadv_100_classifier.pickle", "wb"))

In [68]:
xgb_classifier.save_model("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/imdb_styleadv_100_classifier.json")

In [69]:
# temp = pickle.load(open("/content/drive/MyDrive/AdversarialXAI/Classifiers/WDR/imdb_styleadv_100_classifier.pickle", 'rb'))

In [72]:
# print(classification_report(y_test, temp.predict(x_test), digits=3))

              precision    recall  f1-score   support

         0.0      1.000     1.000     1.000        52
         1.0      1.000     1.000     1.000        47

    accuracy                          1.000        99
   macro avg      1.000     1.000     1.000        99
weighted avg      1.000     1.000     1.000        99



### AdaBoost classifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier

In [None]:
abc = AdaBoostClassifier()

In [None]:
abc.fit(x, y)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=None)

In [None]:
abc_predictions = abc.predict(x_test)

In [None]:
np.sum(abc_predictions==y_test)/len(y_test)

0.918

In [None]:
print(classification_report(y_test, abc_predictions, digits=3))
print(confusion_matrix(y_test, abc_predictions))

              precision    recall  f1-score   support

         0.0      0.956     0.875     0.914       248
         1.0      0.886     0.960     0.922       252

    accuracy                          0.918       500
   macro avg      0.921     0.918     0.918       500
weighted avg      0.921     0.918     0.918       500

[[217  31]
 [ 10 242]]


### LightGBM

In [None]:
import lightgbm as lgb

In [None]:
parameters = {
    'objective': 'binary',
    'application': 'binary',
    'metric': ['binary_logloss'],
    'num_leaves': 35,
    'learning_rate': 0.13,
    'verbose': 1
}

In [None]:
train_data = lgb.Dataset(x, label=y)
test_data = lgb.Dataset(x_test, label=y_test)

In [None]:
lgbm_classifier = lgb.train(parameters,
                       train_data,
                       valid_sets=test_data,
                       num_boost_round=300)

[1]	valid_0's binary_logloss: 0.597939
[2]	valid_0's binary_logloss: 0.525612
[3]	valid_0's binary_logloss: 0.469367
[4]	valid_0's binary_logloss: 0.423547
[5]	valid_0's binary_logloss: 0.387778
[6]	valid_0's binary_logloss: 0.357721
[7]	valid_0's binary_logloss: 0.331829
[8]	valid_0's binary_logloss: 0.310683
[9]	valid_0's binary_logloss: 0.293977
[10]	valid_0's binary_logloss: 0.280445
[11]	valid_0's binary_logloss: 0.268066
[12]	valid_0's binary_logloss: 0.257237
[13]	valid_0's binary_logloss: 0.248693
[14]	valid_0's binary_logloss: 0.241157
[15]	valid_0's binary_logloss: 0.23551
[16]	valid_0's binary_logloss: 0.230435
[17]	valid_0's binary_logloss: 0.22547
[18]	valid_0's binary_logloss: 0.223468
[19]	valid_0's binary_logloss: 0.22089
[20]	valid_0's binary_logloss: 0.219927
[21]	valid_0's binary_logloss: 0.218465
[22]	valid_0's binary_logloss: 0.21647
[23]	valid_0's binary_logloss: 0.214435
[24]	valid_0's binary_logloss: 0.213436
[25]	valid_0's binary_logloss: 0.21353
[26]	valid_0's

In [None]:
y_hat = lgbm_classifier.predict(x_test)

In [None]:
y_hat.round()

array([0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0.,
       1., 0., 0., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0.,
       1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1.,
       0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 0.,
       0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0.,
       0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 1.,
       1., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 1., 0., 0., 0.,
       1., 1., 0., 1., 1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1.,
       0., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 0., 1.,
       1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 0., 0., 1.,
       1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1.,
       0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 1., 0., 1., 0.,
       1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0.,
       1., 0., 0., 1., 0.

In [None]:
np.sum(y_hat.round()==y_test)/len(y_test)

0.92

In [None]:
print(classification_report(y_test, y_hat.round(), digits=3))
print(confusion_matrix(y_test, y_hat.round()))

              precision    recall  f1-score   support

         0.0      0.933     0.903     0.918       248
         1.0      0.908     0.937     0.922       252

    accuracy                          0.920       500
   macro avg      0.921     0.920     0.920       500
weighted avg      0.920     0.920     0.920       500

[[224  24]
 [ 16 236]]


### SVM

In [None]:
from sklearn.svm import SVC
svm_clf = SVC(C=9.0622635,
          kernel='rbf',
          gamma='scale',
          coef0=0.0,
          tol=0.001,
          probability=True,
          max_iter=-1)

In [None]:
svm_clf.fit(x, y)

SVC(C=9.0622635, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [None]:
svm_pred = svm_clf.predict(x_test)

In [None]:
np.sum(svm_pred.round()==y_test)/len(y_test)

0.92

In [None]:
print(classification_report(y_test, svm_pred.round(), digits=3))
print(confusion_matrix(y_test, svm_pred.round()))

              precision    recall  f1-score   support

         0.0      0.944     0.891     0.917       248
         1.0      0.898     0.948     0.923       252

    accuracy                          0.920       500
   macro avg      0.921     0.920     0.920       500
weighted avg      0.921     0.920     0.920       500

[[221  27]
 [ 13 239]]


### Perceptron NN

In [None]:
from torch.utils.data import Dataset, DataLoader
import sys
from torch.autograd import Variable

class Text(Dataset):
    def __init__(self, x , y):
        self.y = y
        self.x = x

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        data = torch.tensor(self.x[idx].astype('float32')).to(device)
        y = torch.tensor(self.y[idx].astype('float32')).unsqueeze(0).to(device)
        return data, y

In [None]:
train_ds = Text(x, y)
train_loader = DataLoader(dataset=train_ds, batch_size=128, shuffle=True)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class BasicModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(BasicModel, self).__init__()

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim  = output_dim

        self.fc1 = torch.nn.Linear(self.input_dim, self.hidden_dim)
        self.fc2 = torch.nn.Linear(self.hidden_dim, 1)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, x):
        x = x.reshape(x.shape[0], -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return self.sigmoid(x)

In [None]:
basic_classifier = BasicModel(input_dim=512*1, hidden_dim=50, output_dim=1).to(device)
c = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(basic_classifier.parameters(), lr=0.001)

train_loss_history = []
val_acc_history = []

In [None]:
iter_per_epoch = len(train_loader)
num_epochs = 3
initial_epoch = 1
log_nth = 2
storing_frequency = 15
checkpoints_path = "/content/drive/MyDrive/ExplainableAI/Model/Saliency/checkpoints"

for epoch in range(initial_epoch, initial_epoch+num_epochs):
    basic_classifier.train()
    epoch_losses = []
    for i, (data, y_label) in enumerate(train_loader):
      optimizer.zero_grad()
      out = basic_classifier(data)
      loss = c(out, y_label)
      epoch_losses.append(loss.item())
      loss.backward()
      optimizer.step()

      if (i+1) % log_nth == 0:        
          print ('Epoch [{}/{}], Step [{}/{}], Loss for last {} batches: {:.4f}' 
                  .format(epoch, num_epochs, i+1, iter_per_epoch, log_nth, np.mean(np.array(epoch_losses[-log_nth:]))))
          #print_time()
      
      if (i+1) % storing_frequency == 0:        
          print('Storing with loss for last {} batches = {}'.format(storing_frequency, np.mean(np.array(epoch_losses[-storing_frequency:]))))
          #print_time()
          #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}_{}.checkpoint".format(epoch, i+1))
  
    # Store after whole epoch
    print ('Epoch [{}/{}] finished with loss = {:.4f}'.format(epoch, num_epochs, np.mean(np.array(epoch_losses))))
    #torch.save(basic_classifier.state_dict(), checkpoints_path+"/final_model_epoch_{}.checkpoint".format(epoch))

Epoch [1/3], Step [2/32], Loss for last 2 batches: 0.6663
Epoch [1/3], Step [4/32], Loss for last 2 batches: 0.6241
Epoch [1/3], Step [6/32], Loss for last 2 batches: 0.6275
Epoch [1/3], Step [8/32], Loss for last 2 batches: 0.6386
Epoch [1/3], Step [10/32], Loss for last 2 batches: 0.6317
Epoch [1/3], Step [12/32], Loss for last 2 batches: 0.6243
Epoch [1/3], Step [14/32], Loss for last 2 batches: 0.6200
Storing with loss for last 15 batches = 0.6331382433573405
Epoch [1/3], Step [16/32], Loss for last 2 batches: 0.6284
Epoch [1/3], Step [18/32], Loss for last 2 batches: 0.6207
Epoch [1/3], Step [20/32], Loss for last 2 batches: 0.6145
Epoch [1/3], Step [22/32], Loss for last 2 batches: 0.6066
Epoch [1/3], Step [24/32], Loss for last 2 batches: 0.6115
Epoch [1/3], Step [26/32], Loss for last 2 batches: 0.6225
Epoch [1/3], Step [28/32], Loss for last 2 batches: 0.5810
Epoch [1/3], Step [30/32], Loss for last 2 batches: 0.5960
Storing with loss for last 15 batches = 0.6086814324061076
E

In [None]:
nn_pred = basic_classifier(torch.tensor(x_test.astype('float32')).to(device))

In [None]:
nn_pred = nn_pred.flatten().detach().cpu().numpy().round()

In [None]:
np.sum(nn_pred==y_test)/len(y_test)

0.904

In [None]:
print(classification_report(y_test, nn_pred, digits=3))
print(confusion_matrix(y_test, nn_pred))

              precision    recall  f1-score   support

         0.0      0.885     0.927     0.906       248
         1.0      0.925     0.881     0.902       252

    accuracy                          0.904       500
   macro avg      0.905     0.904     0.904       500
weighted avg      0.905     0.904     0.904       500

[[230  18]
 [ 30 222]]
