## Experiments

Let's start playing around with our data

In [6]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import tqdm
from sklearn.model_selection import train_test_split
from datasets import load_dataset,Dataset,DatasetDict
from transformers import DataCollatorWithPadding,AutoModelForSequenceClassification,AutoTokenizer,Trainer,TrainingArguments,AutoModel,AutoConfig
from transformers.modeling_outputs import TokenClassifierOutput
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Read in training data
This dataset contains 100 annotated terms of service contracts, each row represents a sentence, which carries on it a label. The label corresponds to a different type of potential unfairness, as defined by the authors of CLAUDETTE, the previous paper from which this dataset came from. 

In [7]:
df = pd.read_csv('../data/dataset.csv')
df["label"].apply(lambda x:[1,0] if x == 0,else [0,1])
df.head()

Unnamed: 0.1,Unnamed: 0,A,CH,CR,J,LAW,LTD,PINC,TER,USE,document,document_ID,label,text,TER_targets,LTD_targets,A_targets,CH_targets,CR_targets
0,0,0,0,0,0,0,0,0,0,0,Mozilla,0,0,websites & communications terms of use,,,,,
1,1,0,0,0,0,0,0,0,0,0,Mozilla,0,0,please read the terms of this entire document ...,,,,,
2,2,0,0,0,0,0,0,0,0,1,Mozilla,0,1,by accessing or signing up to receive communic...,,,,,
3,3,0,0,0,0,0,0,0,0,0,Mozilla,0,0,our websites include multiple domains such as ...,,,,,
4,4,0,0,0,0,0,0,0,0,0,Mozilla,0,0,you may also recognize our websites by nicknam...,,,,,


Now let's load in a pretrained huggingface BERT model  (https://huggingface.co/pile-of-law/legalbert-large-1.7M-2) to get the word embeddings of each sentence

In [8]:
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('pile-of-law/legalbert-large-1.7M-2')
model = BertModel.from_pretrained('pile-of-law/legalbert-large-1.7M-2')
model.to(device)
text = "This is a test"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input.to(device))
weights = model.get_input_embeddings()

In [9]:
output.pooler_output.shape
output.pooler_output[0].shape

torch.Size([1024])

# Oversample the training data
Since our classifier isn't returning any positive predictions, the problem could be that it isn't seeing enough examples of sentences that are potentially exploitative. Let's fix that

In [10]:
from imblearn.over_sampling import RandomOverSampler
x, y = df['text'], df['label']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
ros = RandomOverSampler(random_state=42,sampling_strategy='minority')
x_train, y_train = ros.fit_resample(pd.DataFrame(x_train), pd.DataFrame(y_train))

In [11]:
train_df = pd.merge(x_train,y_train,left_index=True,right_index=True)
test_df = pd.merge(x_test,y_test,left_index=True,right_index=True)
train_df.reset_index(inplace=True)
test_df.reset_index(inplace=True)

In [12]:
train = Dataset.from_pandas(train_df)
test = Dataset.from_pandas(test_df)
train
test

Dataset({
    features: ['index', 'text', 'label'],
    num_rows: 4084
})

Split dataset into train, test and validation

In [13]:
test_valid = test.train_test_split(test_size=0.5,seed=42)

dataset_dict = DatasetDict({
    'train':train,
    'test':test_valid['test'],
    'validation':test_valid['train']
    })
dataset_dict

DatasetDict({
    train: Dataset({
        features: ['index', 'text', 'label'],
        num_rows: 29206
    })
    test: Dataset({
        features: ['index', 'text', 'label'],
        num_rows: 2042
    })
    validation: Dataset({
        features: ['index', 'text', 'label'],
        num_rows: 2042
    })
})

In [14]:
def tokenize(batch):
    return tokenizer(batch['text'], truncation=True, max_length=1024)

In [15]:
tokenized_df = dataset_dict.map(tokenize, batched=True)

Map:   0%|          | 0/29206 [00:00<?, ? examples/s]

Map:   0%|          | 0/2042 [00:00<?, ? examples/s]

Map:   0%|          | 0/2042 [00:00<?, ? examples/s]

In [16]:
tokenized_df

DatasetDict({
    train: Dataset({
        features: ['index', 'text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 29206
    })
    test: Dataset({
        features: ['index', 'text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2042
    })
    validation: Dataset({
        features: ['index', 'text', 'label', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2042
    })
})

In [17]:
tokenized_df.set_format('torch', columns=['input_ids', 'attention_mask',"token_type_ids",'label'])
#data collator forms a batch with padding for the length of the longest input. We do this so we don't need to repeat unnecessary computations on a global maximum length
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Now let's construct a custom classifier classifier to classify sentences as potentially unfair or not 

In [47]:
class Classifier_v1(nn.Module):
    def __init__(self, model_name, num_labels):
        super(Classifier_v1,self).__init__()
        self.num_labels = num_labels
        self.model = AutoModel.from_pretrained(model_name,config = AutoConfig.from_pretrained(model_name,
                                                                                              output_attention = True,
                                                                                              output_hidden_state = True))
        # New Layer
        self.dropout = nn.Dropout(0.1)
        self.hidden1 = nn.Linear(self.model.config.hidden_size, 512)
        self.hidden2 = nn.Linear(512, 128)
        self.classifier = nn.Linear(128, num_labels)
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()
    
    def forward(self, input = None, attention_mask = None, token_type_ids = None,input_ids = None, labels = None):
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids)
        pooler_output = outputs.pooler_output[0]
        sequence_outputs = self.dropout(pooler_output)
        output = self.classifier(self.hidden2(self.relu(self.hidden1((sequence_outputs)))))
        logits = self.softmax(output)
        return logits

In [48]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_df['train'], shuffle=True, batch_size=1, collate_fn=data_collator
)
test_dataloader = DataLoader(
    tokenized_df['test'], shuffle=True, batch_size=1,collate_fn=data_collator
)

In [49]:
train_data = next(iter(train_dataloader))
train_data["input_ids"].shape

torch.Size([1, 16])

In [50]:
output.pooler_output.shape

torch.Size([1, 1024])

## Training Loop

## First Test on the baby df (smaller subset of the data)

In [51]:
from transformers import get_scheduler
from torch.optim import AdamW, SGD

model_v1 = Classifier_v1('pile-of-law/legalbert-large-1.7M-2',2).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model_v1.parameters(), lr=.0001)
epochs = 5

In [52]:
training_steps = epochs * len(train_dataloader)

lr_scheduler = get_scheduler(
    'linear',
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=training_steps
)

# Full Loop

In [53]:
def train(model,train_dataloader,epochs,loss_fn,optimizer,lr_scheduler):
    for epoch in range(epochs):
        preds = []
        labels = []
        model_v1.train()
        for batch in tqdm.tqdm(train_dataloader):
            optimizer.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)
            label = batch['labels']
            if labels == 0:
                label = torch.tensor([1,0]).to(torch.float32).to(device)
            else:
                label = torch.tensor([0,1]).to(torch.float32).to(device)
            output = model(input_ids=input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids)
            loss = loss_fn(output,label)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
            preds.append(torch.argmax(output).detach().cpu().numpy())
            labels.append(torch.argmax(label).detach().cpu().numpy())
        print(f"Epoch: {epoch} | Accuracy: {accuracy_score(labels,preds)} | Precision: {precision_score(labels,preds)} | Recall: {recall_score(labels,preds)} | F1: {f1_score(labels,preds)}")

In [54]:
model_trained = train(model_v1,train_dataloader,1,loss_fn,optimizer,lr_scheduler)

29206it [6:54:02,  1.18it/s]


Epoch: 0 | Accuracy: 0.9966787646374032 | Precision: 1.0 | Recall: 0.9966787646374032 | F1: 0.9983366200805968


## Post Training Evaluation

In [58]:
preds = []
labels = []
for batch in tqdm.tqdm(test_dataloader):
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)
    label = batch['labels']
    if labels == 0:
        label = torch.tensor([1,0]).to(torch.float32).to(device)
    else:
        label = torch.tensor([0,1]).to(torch.float32).to(device)
    with torch.no_grad():
        output = model_v1(input_ids=input_ids,attention_mask=attention_mask,token_type_ids=token_type_ids)
    loss = loss_fn(output,label)
    pred = torch.argmax(output)
    preds.append(pred.detach().cpu().numpy())
    labels.append(batch["labels"].detach().cpu().numpy())
print(f"accuracy: {accuracy_score(preds, labels)} | f1: {f1_score(preds, labels)} | precision: {precision_score(preds, labels)} | recall: {recall_score(preds, labels)} | Loss: {loss}")

  0%|          | 0/2042 [00:00<?, ?it/s]

  return self._call_impl(*args, **kwargs)
  4%|▍         | 82/2042 [01:04<37:24,  1.15s/it]  

It looks like our model is simply labeling everything as not exploitative, which would explain why our f1 score is