In [1]:
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
import torch.functional as F
from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
import matplotlib.pyplot as plt
import os
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DECEPTIVE_DIR = '../../data/Transcription/Deceptive/'
deceptive = os.listdir(DECEPTIVE_DIR)
TRUTHFUL_DIR = '../../data/Transcription/Truthful/'
truthful = os.listdir(TRUTHFUL_DIR)
print('Deceptive size:', len(deceptive))
print('Truthful size:', len(truthful))

Deceptive size: 61
Truthful size: 60


In [3]:
bert = BertModel.from_pretrained('bert-base-uncased', output_hidden_states = True)
bert.eval()
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
def prepare_text(text, tokenizer):
    marked_text = "[CLS] " + text + " [SEP]"
    tokenized_text = tokenizer.tokenize(marked_text)
    indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
    segments_ids = [1]*len(indexed_tokens)
    tokens_tensor = torch.tensor([indexed_tokens])
    segments_tensors = torch.tensor([segments_ids])
    return tokenized_text, tokens_tensor, segments_tensors

In [5]:
def get_bert_embeddings(tokens_tensor, segments_tensor, model):
    with torch.no_grad():
        outputs = model(tokens_tensor, segments_tensor)
        hidden_states = outputs[2]
    token_embeddings = hidden_states[-2][0]
    return torch.mean(token_embeddings, dim=0)

In [6]:
embedding_map = {}
for file in truthful:
    with open(TRUTHFUL_DIR + file, encoding='utf8') as f:
        text = f.read()
        tokenized_text, tokens_tensor, segments_tensor = prepare_text(text, tokenizer)
        embeddings = get_bert_embeddings(tokens_tensor, segments_tensor, bert)
        embedding_map[file.split('.')[0]] = embeddings
for file in deceptive:
    with open(DECEPTIVE_DIR + file, encoding='utf8') as f:
        text = f.read()
        tokenized_text, tokens_tensor, segments_tensor = prepare_text(text, tokenizer)
        embeddings = get_bert_embeddings(tokens_tensor, segments_tensor, bert)
        embedding_map[file.split('.')[0]] = embeddings
with open('../../embeddings/transcript_features_1.pkl', 'wb') as f:
    pickle.dump(embedding_map, f)

In [7]:
class BertFrozenClassifier(torch.nn.Module):
    def __init__(self):
        super(BertFrozenClassifier, self).__init__()
        self.linear1 = torch.nn.Linear(768, 2)
        torch.nn.init.kaiming_normal_(self.linear1.weight)
    
    def forward(self, x):
        return self.linear1(torch.nn.functional.relu(x))

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertFrozenClassifier()
model.to(device)

BertFrozenClassifier(
  (linear1): Linear(in_features=768, out_features=2, bias=True)
)

In [9]:
class BertFrozenDataset(Dataset):
    def __init__(self, data) -> None:
        self.data = data
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        embeddings = embedding_map[self.data[index][0].split('.')[0]]
        return embeddings, self.data[index][1]

In [10]:
train_data = []
val_data = []
for i in range(len(truthful)):
    if i < 50:
        train_data.append((truthful[i], 0))
    else:
        val_data.append((truthful[i], 0))
for i in range(len(deceptive)):
    if i < 50:
        train_data.append((deceptive[i], 1))
    else:
        val_data.append((deceptive[i], 1))

In [11]:
train_dataset = BertFrozenDataset(train_data)
val_dataset = BertFrozenDataset(val_data)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [12]:
num_epochs = 10
learning_rate = 2e-5
weight_decay = 0.01
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

In [13]:
for epoch in range(num_epochs):
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        loss = criterion(out, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

In [14]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y in val_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        _, predicted = torch.max(out.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
    print('Accuracy of the network on the validation set: %d %%' % (100 * correct / total))

Accuracy of the network on the validation set: 57 %


In [15]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        _, predicted = torch.max(out.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
    print('Accuracy of the network on the validation set: %d %%' % (100 * correct / total))

Accuracy of the network on the validation set: 45 %


In [32]:
class BertClassifier(torch.nn.Module):
    def __init__(self):
        super(BertClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased', output_hidden_states = True)
        self.dropout = torch.nn.Dropout(0.3)
        self.linear1 = torch.nn.Linear(768, 64)
        self.linear2 = torch.nn.Linear(64, 2)
        torch.nn.init.xavier_uniform_(self.linear1.weight)
        torch.nn.init.kaiming_uniform_(self.linear2.weight)
    
    def forward(self, x):
        x = torch.split(x, 512, dim=1)
        x = self.bert(x[0], x[1])
        x = self.linear1(self.dropout(x[0][:, 0, :]))
        x = torch.nn.functional.relu(x)
        return self.linear2(x)

In [30]:
class BertDataset(Dataset):
    def __init__(self, data) -> None:
        self.data = data
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = self.data[index][0]
        _, tokens_tensor, segments_tensor = prepare_text(text, tokenizer)
        tokens_tensor = torch.nn.functional.pad(tokens_tensor, (0, 512 - tokens_tensor.shape[1]), 'constant', 0)
        segments_tensor = torch.nn.functional.pad(segments_tensor, (0, 512 - segments_tensor.shape[1]), 'constant', 0)
        temp = torch.concat((tokens_tensor, segments_tensor), dim=1)
        temp = temp.squeeze()
        return temp, self.data[index][1]

In [33]:
train_data = []
val_data = []
for i in range(len(truthful)):
    if i < 50:
        with open(TRUTHFUL_DIR + truthful[i], encoding='utf8') as f:
            text = f.read()
            train_data.append((text, 0))
    else:
        with open(TRUTHFUL_DIR + truthful[i], encoding='utf8') as f:
            text = f.read()
            val_data.append((text, 0))
for i in range(len(deceptive)):
    if i < 50:
        with open(DECEPTIVE_DIR + deceptive[i], encoding='utf8') as f:
            text = f.read()
            train_data.append((text, 1))
    else:
        with open(DECEPTIVE_DIR + deceptive[i], encoding='utf8') as f:
            text = f.read()
            val_data.append((text, 1))

In [34]:
train_dataset = BertDataset(train_data)
val_dataset = BertDataset(val_data)
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [35]:
num_epochs = 10
learning_rate = 2e-5
weight_decay = 0.01
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
total_steps = len(train_loader) * num_epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

In [36]:
model = BertClassifier()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tru

In [37]:
for epoch in range(num_epochs):
    model.train()
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        loss = criterion(out, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

In [39]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y in val_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        _, predicted = torch.max(out.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
    print('Accuracy of the network on the validation set: %d %%' % (100 * correct / total))

Accuracy of the network on the validation set: 42 %


In [40]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)
        out = model(x)
        _, predicted = torch.max(out.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
    print('Accuracy of the network on the validation set: %d %%' % (100 * correct / total))

Accuracy of the network on the validation set: 46 %
