## Experiments

Let's start playing around with our data

In [1]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import tqdm
from sklearn.model_selection import train_test_split
from datasets import load_dataset,Dataset,DatasetDict
from transformers import DataCollatorWithPadding,AutoModelForSequenceClassification,AutoTokenizer,Trainer,TrainingArguments,AutoModel,AutoConfig
from transformers.modeling_outputs import TokenClassifierOutput
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

2024-03-21 17:46:15.083529: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Read in training data
This dataset contains 100 annotated terms of service contracts, each row represents a sentence, which carries on it a label. The label corresponds to a different type of potential unfairness, as defined by the authors of CLAUDETTE, the previous paper from which this dataset came from. 

In [2]:
df = pd.read_csv('../data/dataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,A,CH,CR,J,LAW,LTD,PINC,TER,USE,document,document_ID,label,text,TER_targets,LTD_targets,A_targets,CH_targets,CR_targets
0,0,0,0,0,0,0,0,0,0,0,Mozilla,0,0,websites & communications terms of use,,,,,
1,1,0,0,0,0,0,0,0,0,0,Mozilla,0,0,please read the terms of this entire document ...,,,,,
2,2,0,0,0,0,0,0,0,0,1,Mozilla,0,1,by accessing or signing up to receive communic...,,,,,
3,3,0,0,0,0,0,0,0,0,0,Mozilla,0,0,our websites include multiple domains such as ...,,,,,
4,4,0,0,0,0,0,0,0,0,0,Mozilla,0,0,you may also recognize our websites by nicknam...,,,,,


Now let's load in a pretrained huggingface BERT model  (https://huggingface.co/pile-of-law/legalbert-large-1.7M-2) to get the word embeddings of each sentence

In [3]:
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('pile-of-law/legalbert-large-1.7M-2')
model = BertModel.from_pretrained('pile-of-law/legalbert-large-1.7M-2')
model.to(device)
text = "This is a test"
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input.to(device))
weights = model.get_input_embeddings()

In [4]:
model.config

BertConfig {
  "_name_or_path": "pile-of-law/legalbert-large-1.7M-2",
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "directionality": "bidi",
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "tokenizer_class": "BertTokenizer",
  "transformers_version": "4.38.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 32000
}

In [5]:
dataset_hf = load_dataset('csv', data_files='../data/dataset.csv')
#for simplicity, we will only use the label and text to train a binary classifier so drop all other columns
dataset_hf = dataset_hf.remove_columns(["Unnamed: 0",'A', 'CH', 'CR', 'J', 'LAW', 'LTD', 'PINC', 'TER', 'USE', 'document', 'document_ID', 'TER_targets', 'LTD_targets', 'A_targets', 'CH_targets', 'CR_targets'])
dataset_hf.set_format('pandas')
dataset_hf=dataset_hf["train"][:]
dataset_hf.drop_duplicates(subset=['text'],inplace=True)
dataset_hf.reset_index(drop=True,inplace=True)
dataset_hf = Dataset.from_pandas(dataset_hf)
dataset_hf

Dataset({
    features: ['label', 'text'],
    num_rows: 19915
})

In [6]:
baby_df = dataset_hf.to_pandas().sample(250)
baby_df = Dataset.from_pandas(baby_df)
baby_df = baby_df.remove_columns(["__index_level_0__"])
baby_df

Dataset({
    features: ['label', 'text'],
    num_rows: 250
})

Split dataset into train, test and validation

In [7]:
train_testvalid = dataset_hf.train_test_split(test_size=0.2,seed=42)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5,seed=42)

dataset_hf = DatasetDict({
    'train':train_testvalid['train'],
    'test':test_valid['test'],
    'validation':test_valid['train']
    })
dataset_hf

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 15932
    })
    test: Dataset({
        features: ['label', 'text'],
        num_rows: 1992
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 1991
    })
})

In [8]:
baby_train_testvalid = baby_df.train_test_split(test_size=0.2,seed=42)
baby_test_valid = baby_train_testvalid['test'].train_test_split(test_size=0.5,seed=42)

baby_df_dict = DatasetDict({
    'train':baby_train_testvalid['train'],
    'test':baby_test_valid['test'],
    'validation':baby_test_valid['train']
    })
baby_df_dict

DatasetDict({
    train: Dataset({
        features: ['label', 'text'],
        num_rows: 200
    })
    test: Dataset({
        features: ['label', 'text'],
        num_rows: 25
    })
    validation: Dataset({
        features: ['label', 'text'],
        num_rows: 25
    })
})

In [9]:
def tokenize(batch):
    return tokenizer(batch['text'], truncation=True, max_length=1024)

In [10]:
tokenized_baby = baby_df_dict.map(tokenize, batched=True)

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

Map:   0%|          | 0/25 [00:00<?, ? examples/s]

In [11]:
tokenized_df = dataset_hf.map(tokenize, batched=True)

Map:   0%|          | 0/15932 [00:00<?, ? examples/s]

Map:   0%|          | 0/1992 [00:00<?, ? examples/s]

Map:   0%|          | 0/1991 [00:00<?, ? examples/s]

In [12]:
tokenized_df

DatasetDict({
    train: Dataset({
        features: ['label', 'text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 15932
    })
    test: Dataset({
        features: ['label', 'text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1992
    })
    validation: Dataset({
        features: ['label', 'text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1991
    })
})

In [13]:
tokenized_baby.set_format('torch', columns=['input_ids', 'attention_mask','token_type_ids','label'])

In [14]:
tokenized_df.set_format('torch', columns=['input_ids', 'attention_mask',"token_type_ids",'label'])
#data collator forms a batch with padding for the length of the longest input. We do this so we don't need to repeat unnecessary computations on a global maximum length
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [15]:
model.config.hidden_size

1024

Now let's construct a custom classifier classifier to classify sentences as potentially unfair or not 

In [16]:
class Classifier_v1(nn.Module):
    def __init__(self, model_name, num_labels):
        super(Classifier_v1,self).__init__()
        self.num_labels = num_labels
        self.model = AutoModel.from_pretrained(model_name,config = AutoConfig.from_pretrained(model_name,
                                                                                              output_attention = True,
                                                                                              output_hidden_state = True))
        # New Layer
        self.dropout = nn.Dropout(0.1)
        self.hidden1 = nn.Linear(self.model.config.hidden_size, 512)
        self.hidden2 = nn.Linear(512, 128)
        self.classifier = nn.Linear(128, num_labels)
        self.softmax = nn.Softmax(dim=1)
        self.relu = nn.ReLU()
    
    def forward(self, input = None, attention_mask = None, token_type_ids = None,input_ids = None, labels = None):
        with torch.no_grad():
            outputs = self.model(input_ids=input_ids, attention_mask = attention_mask, token_type_ids = token_type_ids)
        last_hidden_state = outputs[0]
        sequence_outputs = self.dropout(last_hidden_state)
        logits = self.classifier(self.softmax(self.hidden2(self.relu(self.hidden1(self.softmax(sequence_outputs[:,0,:].view(-1,self.model.config.hidden_size)))))))
        return TokenClassifierOutput(logits = logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions)

In [17]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    tokenized_df['train'], shuffle=True, batch_size=8, collate_fn=data_collator
)
test_dataloader = DataLoader(
    tokenized_df['test'], shuffle=True, collate_fn=data_collator
)

In [18]:
baby_train_dataloader = DataLoader(
    tokenized_baby['train'], shuffle=True, batch_size=8, collate_fn=data_collator
)
baby_test_dataloader = DataLoader(
    tokenized_baby['test'], shuffle=True, collate_fn=data_collator
)

## Training Loop

## First Test on the baby df (smaller subset of the data)

In [20]:
from transformers import get_scheduler
from torch.optim import AdamW, SGD

model_v1 = Classifier_v1('pile-of-law/legalbert-large-1.7M-2',2).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = SGD(model_v1.parameters(), lr=.001)
epochs = 5

In [22]:
training_steps = epochs * len(train_dataloader)

lr_scheduler = get_scheduler(
    'linear',
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=training_steps
)

baby_training_steps = epochs * len(baby_train_dataloader)
baby_lr_scheduler = get_scheduler(
    'linear',
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=baby_training_steps
)

In [23]:
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, f1_score

for epoch in range(epochs):
    model_v1.train()
    preds = []
    labels = []
    for batch in tqdm(baby_train_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model_v1(**batch)
        loss = loss_fn(outputs.logits, batch["labels"])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        baby_lr_scheduler.step()
    model_v1.eval()
    for batch in tqdm(baby_test_dataloader):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model_v1(**batch)
        logits = outputs.logits
        pred = torch.argmax(logits, dim=1)
        test_loss = loss_fn(logits, batch["labels"])
        preds.append(pred.detach().cpu().numpy())
        labels.append(batch["labels"].detach().cpu().numpy())
    print(f"epoch {epoch} scores:\naccuracy: {accuracy_score(preds, labels)}\nf1: {f1_score(preds, labels)}")

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

epoch 0 scores:
accuracy: 0.96
f1: 0.0


  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

epoch 1 scores:
accuracy: 0.96
f1: 0.0


  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

epoch 2 scores:
accuracy: 0.96
f1: 0.0


  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

epoch 3 scores:
accuracy: 0.96
f1: 0.0


  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

epoch 4 scores:
accuracy: 0.96
f1: 0.0


In [25]:
logits

tensor([[ 0.0282, -0.0413]], device='cuda:0')

# Full Loop

In [None]:
for epoch in range(epochs):
    preds = []
    labels = []
    model_v1.train()
    for batch in train_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model_v1(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
    model_v1.eval()
    for batch in test_dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model_v1(**batch)
        logits = torch.nn.Softmax(outputs.logits, dim=1)
        pred = torch.argmax(logits, dim=1)
        preds.append(pred.detach().cpu().numpy())
        labels.append(batch["labels"].detach().cpu().numpy())
    print(f"epoch {epoch} scores:\naccuracy: {accuracy_score(preds, labels)}\nf1: {f1_score(preds, labels)}")

## Post Training Evaluation

In [123]:
model_v1.eval()

test_dataloader = DataLoader(
    tokenized_df['test'], batch_size=8, collate_fn=data_collator
)
preds = []
labels = []
for batch in test_dataloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model_v1(**batch)
    logits = outputs.logits
    pred = torch.argmax(logits, dim=1)
    preds.append(pred.detach().cpu().numpy())
    labels.append(batch["labels"].detach().cpu().numpy())
print(f"accuracy: {accuracy_score(preds, labels)}\nf1: {f1_score(preds, labels)}")

ValueError: Target is multilabel-indicator but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted', 'samples'].

In [125]:
preds

[array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 

It looks like our model is simply labeling everything as not exploitative, which would explain why our f1 score is