In [None]:
import transformers
from transformers import BertModel, BertTokenizer, AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup
import torch

import numpy as np
import pandas as pd
import re
from tqdm.auto import tqdm

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import multilabel_confusion_matrix, classification_report
from collections import defaultdict
from textwrap import wrap

from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl
from torchmetrics.functional import accuracy, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

#matplotlib setting/format specifications

%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.2)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

#Controls size of figures we create
rcParams['figure.figsize'] = 8, 6

#set random seed

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

  warn(f"Failed to load image Python extension: {e}")


<torch._C.Generator at 0x7f19a68523d0>

In [None]:
sw_notes=pd.read_csv('Final_SWmerged.csv')

In [None]:
def clean_text(df):
  text = str(df["TEXT"])
  clean = re.sub(r"\n","",text)
  cleaner = re.sub(r"  ","",clean)

  return cleaner

In [None]:
sw_notes["TEXT_CLEAN"] = sw_notes.apply(clean_text, axis=1)

In [None]:
LABEL_COLUMNS = sw_notes.columns.tolist()[11:15]

## Dataset Class

In [None]:
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")

In [None]:
class SWNotesDataset(Dataset):

  def __init__(self, data: pd.DataFrame, tokenizer:BertTokenizer, max_len=512):
    self.data = data
    self.tokenizer = tokenizer
    self.max_len = max_len

  def __len__(self):
    return(len(self.data))

  def __getitem__(self,index: int):
    data_row = self.data.iloc[index]
    note = data_row.TEXT_CLEAN
    labels = data_row[LABEL_COLUMNS]

    encoding=self.tokenizer.encode_plus(
        note,
        add_special_tokens=True,
        truncation=True,
        max_length=self.max_len,
        return_token_type_ids=False,
        padding= 'max_length',
        return_attention_mask=True,
        return_tensors='pt',
    )

    return {
        'SW_note': note,
        'input_ids': encoding['input_ids'].flatten(),
        'attention_mask': encoding['attention_mask'].flatten(),
        'labels': torch.FloatTensor(labels)
    }

In [None]:
#Split data into 5 folds

kf = KFold(n_splits=5, shuffle=True,random_state=RANDOM_SEED)
folds = kf.split(sw_notes)

trn_idx1,tst_idx1 = next(folds)
trn_idx2,tst_idx2 = next(folds)
trn_idx3,tst_idx3 = next(folds)
trn_idx4,tst_idx4 = next(folds)
trn_idx5,tst_idx5 = next(folds)

In [None]:
df_train1 = sw_notes.iloc[trn_idx1]
df_train2 = sw_notes.iloc[trn_idx2]
df_train3 = sw_notes.iloc[trn_idx3]
df_train4 = sw_notes.iloc[trn_idx4]
df_train5 = sw_notes.iloc[trn_idx5]

df_test1 = sw_notes.iloc[tst_idx1]
df_test2 = sw_notes.iloc[tst_idx2]
df_test3 = sw_notes.iloc[tst_idx3]
df_test4 = sw_notes.iloc[tst_idx4]
df_test5 = sw_notes.iloc[tst_idx5]

In [None]:
class SWNotesDataModule(pl.LightningDataModule):
  def __init__(self, df_train, df_test, tokenizer, batch_size=8, max_len=512):
    super().__init__()
    self.batch_size = batch_size
    self.df_train = df_train
    self.df_test = df_test
    self.tokenizer = tokenizer
    self.max_len = max_len

  def setup(self, stage=None):

    self.train_dataset = SWNotesDataset(
      self.df_train,
      self.tokenizer,
      self.max_len
    )

    self.test_dataset = SWNotesDataset(
      self.df_test,
      self.tokenizer,
      self.max_len
    )


  def train_dataloader(self):
    return DataLoader(
      self.train_dataset,
      batch_size=self.batch_size,
      shuffle=True,
      num_workers=2
    )

  def val_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=2
    )

  def test_dataloader(self):
    return DataLoader(
      self.test_dataset,
      batch_size=self.batch_size,
      num_workers=2
    )



In [None]:
N_EPOCHS = 10
BATCH_SIZE = 8
MAX_LEN = 512

In [None]:
#Fold changes each run
data_module = SWNotesDataModule(
  df_train1,
  df_test1,
  tokenizer,
  batch_size=BATCH_SIZE,
  max_len=MAX_LEN
)

## Building Classification Model

In [None]:
#Pytorch lightning method

class SWRoleClassifier(pl.LightningModule):

  def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
    super().__init__()
    self.bert = AutoModel.from_pretrained('emilyalsentzer/Bio_ClinicalBERT', return_dict=True)
    self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
    self.n_training_steps = n_training_steps
    self.n_warmup_steps = n_warmup_steps
    self.criterion = nn.BCELoss()

  def forward(self, input_ids, attention_mask, labels=None):
    output = self.bert(input_ids, attention_mask=attention_mask)
    output = self.classifier(output.pooler_output)
    output = torch.sigmoid(output)
    loss = 0
    if labels is not None:
        loss = self.criterion(output, labels)
    return loss, output

  def training_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": outputs, "labels": labels}

  def validation_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    return loss

  def test_step(self, batch, batch_idx):
    input_ids = batch["input_ids"]
    attention_mask = batch["attention_mask"]
    labels = batch["labels"]
    loss, outputs = self(input_ids, attention_mask, labels)
    self.log("test_loss", loss, prog_bar=True, logger=True)
    return loss

  def training_epoch_end(self, outputs):
    labels = []
    predictions = []
    for output in outputs:
      for out_labels in output["labels"].detach().cpu():
        labels.append(out_labels)
      for out_predictions in output["predictions"].detach().cpu():
        predictions.append(out_predictions)

    labels = torch.stack(labels).int()
    predictions = torch.stack(predictions)

    for i, name in enumerate(LABEL_COLUMNS):
      class_roc_auc = auroc(predictions[:, i], labels[:, i])
      self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch)

  def configure_optimizers(self):
    optimizer = AdamW(self.parameters(), lr=2e-5)
    scheduler = get_linear_schedule_with_warmup(
      optimizer,
      num_warmup_steps=self.n_warmup_steps,
      num_training_steps=self.n_training_steps
    )

    return dict(
      optimizer=optimizer,
      lr_scheduler=dict(
        scheduler=scheduler,
        interval='step'
      )
    )

In [None]:
#Calculate warm-up and total steps
steps_per_epoch=len(df_train1) // BATCH_SIZE
total_training_steps = steps_per_epoch * N_EPOCHS

warmup_steps = total_training_steps // 5
warmup_steps, total_training_steps

(250, 1250)

In [None]:
model = SWRoleClassifier(
  n_classes=len(LABEL_COLUMNS),
  n_warmup_steps=warmup_steps,
  n_training_steps=total_training_steps
)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Training

In [None]:
logger = TensorBoardLogger("lightning_logs", name="SWroles2")

early_stopping_callback = EarlyStopping(monitor='val_loss', patience=3)

In [None]:
trainer = pl.Trainer(
  logger=logger,
  log_every_n_steps=40,
  callbacks=[early_stopping_callback],
  max_epochs=N_EPOCHS,
  gpus=1
)

  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


In [None]:
#Training step
trainer.fit(model, data_module)

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name       | Type      | Params
-----------------------------------------
0 | bert       | BertModel | 108 M 
1 | classifier | Linear    | 3.1 K 
2 | criterion  | BCELoss   | 0     
-----------------------------------------
108 M     Trainable params
0         Non-trainable params
108 M     Total params
433.253   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

## Evaluation

In [None]:
#Set model to eval mode

trained_model = SWRoleClassifier.load_from_checkpoint(
  trainer.checkpoint_callback.best_model_path,
  n_classes=len(LABEL_COLUMNS)
)

trained_model.eval()
trained_model.freeze()

trained_model = trained_model.to(device)

Some weights of the model checkpoint at emilyalsentzer/Bio_ClinicalBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [None]:
#Change fold each run
test_data = SWNotesDataset(
    data = df_test1,
    tokenizer=tokenizer,
    max_len=MAX_LEN
  )

predictions = []
labels = []

In [None]:
for item in tqdm(test_data):
  _, prediction = trained_model(
    item["input_ids"].unsqueeze(dim=0).to(device),
    item["attention_mask"].unsqueeze(dim=0).to(device)
  )
  predictions.append(prediction.flatten())
  labels.append(item["labels"].int())

  0%|          | 0/251 [00:00<?, ?it/s]

In [None]:
predictions = torch.stack(predictions).detach().cpu()
labels = torch.stack(labels).detach().cpu()

In [None]:
#Accuracy
accuracy(predictions, labels, threshold=0.5)

tensor(0.8675)

In [None]:
#Fold 1
y_pred = predictions.numpy()
y_true = labels.numpy()

upper, lower = 1, 0

y_pred = np.where(y_pred > 0.5, upper, lower)

print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))

                            precision    recall  f1-score   support

          Involved_Support       0.88      0.83      0.85        94
Communication_Facilitation       0.67      0.59      0.63        59
                Counseling       0.65      0.37      0.47        35
      Practical_Assistance       0.71      0.61      0.66        57

                 micro avg       0.77      0.66      0.71       245
                 macro avg       0.73      0.60      0.65       245
              weighted avg       0.76      0.66      0.70       245
               samples avg       0.45      0.42      0.42       245



In [None]:
#Fold 2
y_pred = predictions.numpy()
y_true = labels.numpy()

upper, lower = 1, 0

y_pred = np.where(y_pred > 0.5, upper, lower)

print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))



                            precision    recall  f1-score   support

          Involved_Support       0.77      0.72      0.74        86
Communication_Facilitation       0.57      0.59      0.58        56
                Counseling       0.56      0.17      0.26        29
      Practical_Assistance       0.82      0.56      0.67        57

                 micro avg       0.71      0.58      0.64       228
                 macro avg       0.68      0.51      0.56       228
              weighted avg       0.70      0.58      0.62       228
               samples avg       0.39      0.38      0.37       228



In [None]:
#Fold 3
y_pred = predictions.numpy()
y_true = labels.numpy()

upper, lower = 1, 0

y_pred = np.where(y_pred > 0.5, upper, lower)

print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))


                            precision    recall  f1-score   support

          Involved_Support       0.78      0.87      0.82        90
Communication_Facilitation       0.71      0.69      0.70        68
                Counseling       0.87      0.37      0.52        35
      Practical_Assistance       0.71      0.58      0.64        59

                 micro avg       0.75      0.68      0.72       252
                 macro avg       0.77      0.63      0.67       252
              weighted avg       0.76      0.68      0.70       252
               samples avg       0.46      0.45      0.44       252



In [None]:
#Fold 4
y_pred = predictions.numpy()
y_true = labels.numpy()

upper, lower = 1, 0

y_pred = np.where(y_pred > 0.5, upper, lower)

print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))


                            precision    recall  f1-score   support

          Involved_Support       0.84      0.84      0.84        90
Communication_Facilitation       0.60      0.71      0.65        51
                Counseling       0.53      0.21      0.30        38
      Practical_Assistance       0.77      0.62      0.69        58

                 micro avg       0.73      0.66      0.69       237
                 macro avg       0.68      0.60      0.62       237
              weighted avg       0.72      0.66      0.67       237
               samples avg       0.46      0.42      0.43       237



In [None]:
#Fold 5
y_pred = predictions.numpy()
y_true = labels.numpy()

upper, lower = 1, 0

y_pred = np.where(y_pred > 0.5, upper, lower)

print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))


                            precision    recall  f1-score   support

          Involved_Support       0.87      0.78      0.82        76
Communication_Facilitation       0.71      0.76      0.74        55
                Counseling       0.36      0.45      0.40        22
      Practical_Assistance       0.55      0.69      0.61        42

                 micro avg       0.67      0.72      0.69       195
                 macro avg       0.62      0.67      0.64       195
              weighted avg       0.70      0.72      0.70       195
               samples avg       0.37      0.37      0.36       195

