In [19]:
import sys
sys.path.append("..")

from sklearn.model_selection import train_test_split
import pandas as pd
import torch
from sklearn.model_selection import GroupShuffleSplit 
import matplotlib.pyplot as plt
import nltk
from torch import optim
from nltk.corpus import stopwords
from models import bert
from transformers import BertTokenizer, BertForSequenceClassification, BertPreTrainedModel, BertModel, BertConfig
import json
from torch.utils.data import DataLoader, Dataset


In [20]:
train_df = pd.read_csv('../data/train_older_adult_annotations.csv',delimiter=',', encoding='latin-1')
test_df = pd.read_csv('../data/test_annotations.csv', delimiter=',')
df = pd.concat([test_df, train_df])

age_anxiety_df = pd.read_csv('../data/age_anxiety_full_responses.csv', delimiter=',')
age_experience_df = pd.read_csv('../data/age_experience_responses.csv', delimiter=',')
demographics_df = pd.read_csv('../data/demographics_responses.csv', delimiter=',')
anxiety_score_df = pd.read_csv('../data/respondent_anxiety_table.csv', delimiter=',')

df1 = pd.merge(demographics_df, anxiety_score_df, on='respondent_id')
merged_df = pd.merge(df, df1, on='respondent_id')

sentiment_labels = ['Very negative','Somewhat negative','Neutral','Somewhat positive','Very positive']
total_annotator_ids = merged_df['respondent_id'].unique().tolist()

id2label = {index: row for (index, row) in enumerate(sentiment_labels)} 
label2id = {row: index for (index, row) in enumerate(sentiment_labels)}

id2annotator = {index: row for (index, row) in enumerate(total_annotator_ids)}
annotator2id = {row: index for (index, row) in enumerate(total_annotator_ids)}

merged_df["annotation"] = merged_df["annotation"].map(label2id)
merged_df["respondent_id"] = merged_df["respondent_id"].map(annotator2id)

merged_df.rename(columns = {'respondent_id':'annotator_id', 'unit_text':'text'}, inplace = True)

grouped = merged_df.groupby('unit_id')['annotation'].nunique().reset_index()
grouped.columns = ['unit_id', 'unique_annotations']
merged_df = merged_df.merge(grouped, on='unit_id')
merged_df['disagreement'] = merged_df['unique_annotations'] > 1

In [21]:
    splitter = GroupShuffleSplit(test_size=0.3, n_splits=2, random_state = 0)
    split = splitter.split(merged_df, groups=merged_df['unit_id'])
    train_inds, test_inds = next(split)
    train_df = merged_df.iloc[train_inds]
    test_df = merged_df.iloc[test_inds]
    train_df = train_df.sample(frac=1)
    test_df = test_df.sample(frac=1)

In [22]:
labels = merged_df['annotation'].unique()
#sort labels
labels.sort()

In [23]:
class CustomDataset(Dataset):
    def __init__(self, dataframe, tokenizer, labels):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        sentence = self.data.iloc[index]['text']
        annotator_id = self.data.iloc[index]['annotator_id']
        annotation = self.data.iloc[index]['annotation']
        disagreement = self.data.iloc[index]['disagreement']

        # Tokenize the sentence 
        inputs = self.tokenizer.encode_plus(
            sentence,
            add_special_tokens=True,
            padding='max_length',
            truncation=True,
            max_length=512,
            return_tensors='pt'
        )

        input_ids = inputs['input_ids'].squeeze()
        attention_mask = inputs['attention_mask'].squeeze()
        annotator_id = torch.tensor(annotator_id, dtype=torch.long)

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'annotator_id': annotator_id,
            'label': annotation,
            'disagreement': disagreement
        }

In [24]:
# Define batch size and number of workers for data loaders
batch_size = 16
num_workers = 2

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Create training and testing datasets
train_dataset = CustomDataset(train_df, tokenizer, labels)
test_dataset = CustomDataset(test_df, tokenizer, labels)

# Create training and testing data loaders
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers)


In [26]:
train_dataset

<__main__.CustomDataset at 0x7fa6c3f49f30>

In [27]:
device = torch.device("cuda")

In [28]:
configuration = BertConfig()
configuration.num_labels = len(labels)
configuration.num_annotators = len(total_annotator_ids)
configuration.annotator_embedding_dim = 100
configuration.hidden_size = 768 
model = bert.BertForSequenceClassificationWithAnnotators(configuration).to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [49]:
def get_accuracy(model, device, data_loader, mode):
    model.eval()
    correct_predictions_disagreement = 0
    total_predictions_disagreement = 0
    correct_predictions_agreement = 0
    total_predictions_agreement = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            annotator_ids = batch['annotator_id'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            disagreement = batch['disagreement'].to(device)

            # Forward pass
            if mode=="groups":
                w, log_p = model(annotator_ids, input_ids, attention_mask=attention_mask)
                best_group = log_p.argmax(dim=1)
                w = w[range(len(w)), best_group]
                _, predicted = torch.max(w, 1)

                
            elif mode == "annotators":
                outputs = model(annotator_ids, input_ids, attention_mask=attention_mask)
                logits = outputs[0]

                # Get predicted labels
                _, predicted = torch.max(logits, dim=1)
            else:
                outputs = model(input_ids, attention_mask=attention_mask)
                logits = outputs[0]

                # Get predicted labels
                _, predicted = torch.max(logits, dim=1)
            print(predicted,"ppppppppp",labels,"jjjjjjj", disagreement)
                
#             labels_agreement = [label for label, disagree in zip(labels, disagreement) if not disagree]
#             labels_disagreement = [label for label, disagree in zip(labels, disagreement) if disagree]
#             predicted_agreement = [predicted for predicted, disagree in zip(predicted, disagreement) if not disagree]
#             predicted_disagreement = [predicted for predicted, disagree in zip(predicted, disagreement) if disagree]
            labels_agreement = labels[~disagreement]
            labels_disagreement = labels[disagreement]
            predicted_agreement = predicted[~disagreement]
            predicted_disagreement = labels[disagreement]
            
            print(predicted_disagreement, "ppp",labels_disagreement)
                    

            correct_predictions_disagreement += (predicted_disagreement == labels_disagreement).sum().item()
            total_predictions_disagreement += labels_disagreement.size(0)
            correct_predictions_agreement += (predicted_agreement == labels_agreement).sum().item()
            total_predictions_agreement += labels_agreement.size(0)

    accuracy_disagreement = correct_predictions_disagreement / total_predictions_disagreement
    accuracy_agreement = correct_predictions_agreement / total_predictions_agreement
    
    return accuracy_disagreement, accuracy_agreement


In [50]:
get_accuracy(model, device, test_data_loader, mode="annotators")

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0') ppppppppp tensor([4, 1, 3, 3, 2, 1, 1, 0, 0, 2, 1, 1, 3, 2, 1, 2], device='cuda:0') jjjjjjj tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True], device='cuda:0')
[tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0')] ppp [tensor(4, device='cuda:0'), tensor(1, device='cuda:0'), tensor(3, device='cuda:0'), tensor(3, device='cuda:0'), tensor(2, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0')

AttributeError: 'bool' object has no attribute 'sum'

In [9]:
bert.train(model, device, train_data_loader, mode="annotators")

KeyboardInterrupt: 

In [None]:
torch.save(model.state_dict(), 'annotators.pth')

In [8]:
bert.test(model, device, test_data_loader, mode="annotators")

KeyboardInterrupt: 