In [None]:
import pandas as pd
import numpy as np
import json
from collections import defaultdict

import os
import re
from nltk import word_tokenize

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
from transformers import BertTokenizer,BertForTokenClassification
from sklearn.model_selection import train_test_split

In [None]:
labels=['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies',
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care','O']

indexes=[i for i in range(len(labels))]
label_to_ids={}
ids_to_label={}
for id,label in zip(indexes,labels):
  label_to_ids[label]=id
  ids_to_label[id]=label
print(label_to_ids)
print(ids_to_label)

{'B-CAM_therapies': 0, 'B-Energy_therapies': 1, 'B-Manual_bodybased_therapies': 2, 'B-Mindbody_therapies': 3, 'B-Usual_Medical_Care': 4, 'I-CAM_therapies': 5, 'I-Energy_therapies': 6, 'I-Manual_bodybased_therapies': 7, 'I-Mindbody_therapies': 8, 'I-Usual_Medical_Care': 9, 'O': 10}
{0: 'B-CAM_therapies', 1: 'B-Energy_therapies', 2: 'B-Manual_bodybased_therapies', 3: 'B-Mindbody_therapies', 4: 'B-Usual_Medical_Care', 5: 'I-CAM_therapies', 6: 'I-Energy_therapies', 7: 'I-Manual_bodybased_therapies', 8: 'I-Mindbody_therapies', 9: 'I-Usual_Medical_Care', 10: 'O'}


#LOAD DATA

In [None]:
##===================================================##
##              CAM data preprocessing               ##
##===================================================##
def fla_list(list_):
  fla_tag=[]
  for row in list_:
    tags=row[1]
    for t in tags:
      fla_tag.append(t)
  return fla_tag

In [None]:
##===================================================##
##              Load saved Data                      ##
##===================================================##
import pandas as pd
Train_data=[]
Valid_data=[]
Test_data=[]

dir_path='/content/drive/MyDrive/CAM_NER_RE_Project/processed data/NER_data/'
df_train=pd.read_csv(dir_path+'CAM_train.csv')
df_valid=pd.read_csv(dir_path+'CAM_valid.csv')
df_test=pd.read_csv(dir_path+'CAM_test.csv')


for w,t in zip(df_train['words'],df_train['labels']):
  words=w.split(' ')
  tags=t.split(' ')
  Train_data.append([words,tags])

for w,t in zip(df_valid['words'],df_valid['labels']):
  words=w.split(' ')
  tags=t.split(' ')
  Valid_data.append([words,tags])

for w,t in zip(df_test['words'],df_test['labels']):
  words=w.split(' ')
  tags=t.split(' ')
  Test_data.append([words,tags])

In [None]:
unique_tag=labels
print("train data summary ---------------")
print("num sentence:",len(Train_data))
train_tags=fla_list(Train_data)
for tag in unique_tag:
  print(tag,train_tags.count(tag))

print("test data summary ---------------")
print("num sentence:",len(Test_data))
test_tags=fla_list(Test_data)
for tag in unique_tag:
  print(tag,test_tags.count(tag))

print("valid data summary ---------------")
print("num sentence:",len(Valid_data))
valid_tags=fla_list(Valid_data)
for tag in unique_tag:
  print(tag,valid_tags.count(tag))
# total number of abstracts: 260
all_data=Train_data+Test_data+Valid_data
print("all data summary ---------------")
print("num sentence:",len(all_data))
all_tags=fla_list(all_data)
for tag in unique_tag:
  print(tag,all_tags.count(tag))

train data summary ---------------
num sentence: 4041
B-CAM_therapies 121
B-Energy_therapies 356
B-Manual_bodybased_therapies 281
B-Mindbody_therapies 376
B-Usual_Medical_Care 288
I-CAM_therapies 109
I-Energy_therapies 291
I-Manual_bodybased_therapies 185
I-Mindbody_therapies 277
I-Usual_Medical_Care 225
O 63609
test data summary ---------------
num sentence: 2526
B-CAM_therapies 78
B-Energy_therapies 213
B-Manual_bodybased_therapies 187
B-Mindbody_therapies 245
B-Usual_Medical_Care 180
I-CAM_therapies 67
I-Energy_therapies 152
I-Manual_bodybased_therapies 135
I-Mindbody_therapies 159
I-Usual_Medical_Care 138
O 39859
valid data summary ---------------
num sentence: 2526
B-CAM_therapies 84
B-Energy_therapies 240
B-Manual_bodybased_therapies 166
B-Mindbody_therapies 233
B-Usual_Medical_Care 177
I-CAM_therapies 70
I-Energy_therapies 211
I-Manual_bodybased_therapies 97
I-Mindbody_therapies 184
I-Usual_Medical_Care 134
O 39608
all data summary ---------------
num sentence: 9093
B-CAM_therap

In [None]:
def tokenize_and_preserve_labels(sentence, text_labels, tokenizer):
    """
    Word piece tokenization makes it difficult to match word labels
    back up with individual word pieces. This function tokenizes each
    word one at a time so that it is easier to preserve the correct
    label for each subword. It is, of course, a bit slower in processing
    time, but it will help our model achieve higher accuracy.
    """

    tokenized_sentence = []
    labels = []

    #     print('##')
    #     print(sentence)
    #     print(text_labels)
    #     print('###')
    for word, label in zip(sentence, text_labels):
        # Tokenize the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        ## if sentence consist of more than 125 words, discard the later words.
        if (len(tokenized_sentence) >= 125):
            return tokenized_sentence, labels
        # Add the tokenized word to the final tokenized word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

In [None]:
##===================================================##
##              CAM data loader                      ##
##===================================================##
class Ner_CAM_Data(Dataset):

    def __init__(self, data):
        self.data = data
#         print("dataloader initialized")


    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        #print(idx)
        sentence = self.data[idx][0]
        word_labels = self.data[idx][1]


        t_sen, t_labl = tokenize_and_preserve_labels(sentence, word_labels, tokenizer)

        sen_code = tokenizer.encode_plus(t_sen,
            add_special_tokens=True,  # Add [CLS] and [SEP]
            max_length = 128,  # maximum length of a sentence
            #pad_to_max_length=True,  # Add [PAD]s
            padding='max_length',
            return_attention_mask = True,  # Generate the attention mask
#             return_tensors = 'pt'
            )


        labels = [-100]*128
        for i, tok in enumerate(t_labl):

            if label_to_ids.get(tok) != None:
                labels[i+1]=label_to_ids.get(tok)

        # step 4: turn everything into PyTorch tensors
        item = {key: torch.as_tensor(val) for key, val in sen_code.items()}
        item['labels'] = torch.as_tensor(labels)
        #item['sentece']=sentence

        return item

In [None]:
CAM_train_data=Ner_CAM_Data(Train_data)
CAM_valid_data=Ner_CAM_Data(Valid_data)
CAM_test_data=Ner_CAM_Data(Test_data)

train_dataloader = DataLoader(CAM_train_data, batch_size=16, shuffle=False)
valid_dataloader = DataLoader(CAM_valid_data, batch_size=16, shuffle=False)
test_dataloader = DataLoader(CAM_test_data, batch_size=16, shuffle=False)

# test metrics

In [None]:
import re
import numpy as np
from collections import defaultdict
TNE = "this_tag_does_not_exists"
def self_classifcation_report(tags_true: list, tags_pred: list, mode="lenient", scheme="IOB2", verbose=False) -> dict:
    """caculate lenient matching score, including F1-score, precision, and recall

    Args:
        tags_true (list): true tags
        tags_pred (list): predicted tags
        mode (str, optional): matching model, strict or lenient. Defaults to "lenient".
        scheme (str, optional): annotation scheme, currently only support IOB2

    Returns:
        dict: return metrics as a dict
    """
    tags_true = [r+"-default" if r in ['B', 'I'] else r for r in tags_true ]
    tags_pred = [r+"-default" if r in ['B', 'I'] else r for r in tags_pred ]

    assert scheme in ['IOB2'] # sanity check

    predict, truth, matched = defaultdict(lambda: 0.), defaultdict(lambda: 0.), defaultdict(lambda: 0.)

    cur_matching_tag = TNE  # auxiliary variable for lenient matching
    start_matching = TNE # auxiliary variable for strict matching
    cur_matching_tag_pred = TNE
    for t, p in zip(tags_true, tags_pred):
        ## get the total ground truth number, will be used for recall calculation
        if re.match("^(B-)", t):
            truth[re.sub("(B-)", "", t)] += 1
            cur_matching_tag = re.sub("(B-)", "", t)
        elif re.match("^I-", t) and cur_matching_tag in t: #! continue matching
            pass
        else: #! abort
            cur_matching_tag = TNE

        ## get the total prediction number, will be used for precision calculation
        if re.match("^(B-)", p):
            predict[re.sub("(B-)", "", p)] += 1
            cur_matching_tag_pred = re.sub("(B-)", "", p)
        elif re.match("^(I-)", p) and cur_matching_tag_pred in p:
            pass
        else:
            cur_matching_tag_pred = TNE

        if mode == "lenient":
            if cur_matching_tag != TNE and cur_matching_tag == cur_matching_tag_pred:
                matched[cur_matching_tag] += 1
                cur_matching_tag_pred, cur_matching_tag = TNE, TNE

        elif mode == "strict":
            ## get the true positives (strict)
            if p == t and re.match("^(B-)", t):
                if start_matching != TNE: ## case: B_entity1 is adjcent to B_entity2 (two entities can be the same)
                    matched[start_matching] += 1
                start_matching = re.sub("(B-)", "", t)
            elif p == t and re.match("^(I-)", t) and start_matching in t:
                pass
            elif t != "I-"+start_matching and p != "I-"+start_matching and start_matching != TNE:
                matched[start_matching] += 1
                start_matching = TNE
            else: #! matching failed
                start_matching = TNE
        else:
            exit("only support strict or lenient mode, please check your input argument")
    matched[start_matching] = matched[start_matching]+1 if start_matching != TNE and mode == "strict" else matched[start_matching]


    ## calucalte metrics: precision, recall, F1-score
    unique_entities = [re.sub("B-", "", x) for x in set(tags_true) if re.match("^B-", x)]

    metrics = defaultdict(lambda: defaultdict(lambda: 0))

    precision_per_class=[]
    recall_per_class=[]
    f1_score_per_class=[]

    tp_per_class=[]
    num_labels_per_class=[]
    num_predictions_per_class=[]

    for ue in unique_entities:
        metrics[ue][f'precision'] = matched[ue]/predict[ue] if predict[ue] > 0 else 0
        metrics[ue][f'recall'] = matched[ue]/truth[ue] if truth[ue] > 0 else 0
        metrics[ue][f'f1-score'] = (2*metrics[ue]['precision']*metrics[ue]['recall'])/(metrics[ue]['precision']+metrics[ue]['recall']) if (metrics[ue]['precision']+metrics[ue]['recall'] > 0) else 0

        precision_per_class.append(metrics[ue][f'precision'])
        recall_per_class.append(metrics[ue][f'recall'])
        f1_score_per_class.append(metrics[ue][f'f1-score'])
        tp_per_class.append(matched[ue])
        num_labels_per_class.append(truth[ue])
        num_predictions_per_class.append(predict[ue])

        if verbose:
            print(f"tag: {ue} \t precision:{metrics[ue]['precision']} \t recall:{metrics[ue]['recall']} \t f1-score:{metrics[ue]['f1-score']}")

    macro_precision = sum(precision_per_class) / len(precision_per_class)
    macro_recall = sum(recall_per_class) / len(recall_per_class)
    macro_f1 = sum(f1_score_per_class) / len(f1_score_per_class)
    print(f"macro precision:{macro_precision}\n")
    print(f"macro recall: {macro_recall}\n")
    print(f"macro f1:{macro_f1}\n")
    metrics['macro_precision']=macro_precision
    metrics['macro_recall']=macro_recall
    metrics['macro_f1']=macro_f1

    micro_precision = sum(tp_per_class) / sum(num_predictions_per_class)
    micro_recall = sum(tp_per_class) / sum(num_labels_per_class)
    micro_f1 = 2 * (micro_precision * micro_recall) / (micro_precision + micro_recall)
    print(f"micro precision:{micro_precision}\n")
    print(f"micro recall: {micro_recall}\n")
    print(f"micro f1:{micro_f1}\n")
    metrics['micro_precision']=micro_precision
    metrics['micro_recall']=micro_recall
    metrics['micro_f1']=micro_f1

    return metrics

In [None]:
from sklearn.metrics import accuracy_score
import time

def valid_loop(model, testing_loader,self_classifcation_report):
    model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_examples, nb_eval_steps = 0, 0
    eval_preds, eval_labels = [], []
    sent_label_predict=[]
    with torch.no_grad():
        for idx, batch in enumerate(testing_loader):

            ids = batch['input_ids'].to(device, dtype = torch.long)
            mask = batch['attention_mask'].to(device, dtype = torch.long)
            labels = batch['labels'].to(device, dtype = torch.long)
            #sentences=batch['sentece']
            preds= model(input_ids=ids, attention_mask=mask, labels=labels)

            loss = preds['loss']
            eval_logits = preds['logits']

            eval_loss += loss.item()

            nb_eval_steps += 1
            nb_eval_examples += labels.size(0)

            if idx % 100==0:
                loss_step = eval_loss/nb_eval_steps
                print(f"Validation loss per 100 evaluation steps: {loss_step}")

            # compute evaluation accuracy
            flattened_targets = labels.view(-1) # shape (batch_size * seq_len,)
            active_logits = eval_logits.view(-1, model.num_labels) # shape (batch_size * seq_len, num_labels)
            flattened_predictions = torch.argmax(active_logits, axis=1) # shape (batch_size * seq_len,)
            sent_label_predict.append([ids,labels,flattened_predictions])

            # only compute accuracy at active labels
            active_accuracy = labels.view(-1) != -100 # shape (batch_size, seq_len)

            labels = torch.masked_select(flattened_targets, active_accuracy)
            predictions = torch.masked_select(flattened_predictions, active_accuracy)
            #if idx==1:
            #  print('eval_logits:',torch.Tensor.size(eval_logits),'\n',eval_logits)
             # print('active_logits:',torch.Tensor.size(eval_logits),'\n',active_logits)
              #print('flattened_predictions:',torch.Tensor.size(eval_logits),'\n',flattened_predictions)

            label_text=[ids_to_label[id.item()] for id in labels]
            predictions_text=[ids_to_label[id.item()] for id in predictions]


            eval_labels.extend(labels)
            eval_preds.extend(predictions)

            tmp_eval_accuracy = accuracy_score(labels.cpu().numpy(), predictions.cpu().numpy())
            eval_accuracy += tmp_eval_accuracy

    labels = [ids_to_label[id.item()] for id in eval_labels]
    predictions = [ids_to_label[id.item()] for id in eval_preds]

    eval_loss = eval_loss / nb_eval_steps
    eval_accuracy = eval_accuracy / nb_eval_steps
    #print(f"Validation Loss: {eval_loss}")
    #print(f"Validation Accuracy: {eval_accuracy}")

    #print("self define metrics:\n")
    #print('strict:')
    #print(self_classifcation_report(labels, predictions, mode='strict'))
    #print('lenient:')
    #print(self_classifcation_report(labels, predictions, mode='lenient'))


    return eval_loss,labels, predictions,sent_label_predict

#BERT base

In [None]:
ROOT=''

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',use_fast=False)
tokenizer.add_tokens(['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies'
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care'])
#-----------------------CAM training model---------------------#
base_bert_best=ROOT+'/saved_models_checkpoints/base_bert_6_07-31-02h13m44s'
new_model=BertForTokenClassification.from_pretrained(base_bert_best)
new_model.to(device)

In [None]:
bert_eval_loss,bert_labels, bert_predictions,bert_sent_label_predict = valid_loop(new_model, test_dataloader,self_classifcation_report)

In [None]:
self_classifcation_report(bert_labels, bert_predictions, mode='lenient')

macro precision:0.8610511493864694

macro recall: 0.8639332730792442

macro f1:0.8609836114866806

micro precision:0.8657894736842106

micro recall: 0.8703703703703703

micro f1:0.8680738786279683



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.918918918918919,
                          'recall': 0.8480725623582767,
                          'f1-score': 0.8820754716981132}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7886363636363637,
                          'recall': 0.8675,
                          'f1-score': 0.8261904761904761}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.843010752688172,
                          'recall': 0.9355608591885441,
                          'f1-score': 0.8868778280542986}),
        

In [None]:
self_classifcation_report(bert_labels, bert_predictions, mode='strict')

macro precision:0.8396221860689564

macro recall: 0.8421865430543827

macro f1:0.8394401274074952

micro precision:0.8421052631578947

micro recall: 0.8465608465608465

micro f1:0.8443271767810027



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8992628992628993,
                          'recall': 0.8299319727891157,
                          'f1-score': 0.8632075471698113}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7568181818181818,
                          'recall': 0.8325,
                          'f1-score': 0.7928571428571429}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.821505376344086,
                          'recall': 0.9116945107398569,
                          'f1-score': 0.8642533936651584}),
       

#clinicalBert

In [None]:
tokenizer = BertTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT",use_fast=False)
tokenizer.add_tokens(['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies'
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care'])

clinicalBert_best_checkpoint=ROOT+'/saved_models_checkpoints/clinical_bert_07-30-22h26m39s'
clinicalBert_model= BertForTokenClassification.from_pretrained(clinicalBert_best_checkpoint)
clinicalBert_model.to(device)

In [None]:
clinicalBert_eval_loss,clinicalBert_labels, clinicalBert_predictions,clinicalBert_sent_label_predict = valid_loop(clinicalBert_model, test_dataloader,self_classifcation_report)

In [None]:
self_classifcation_report(clinicalBert_labels, clinicalBert_predictions, mode='strict')

macro_precision:0.8445858324033922

macro_recall: 0.863231886836353

macro_f1:0.8507115048445039

micro_precision:0.8409090909090909

micro_recall: 0.857210401891253

micro_f1:0.8489815031608523



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7024482109227872,
                          'recall': 0.8776470588235294,
                          'f1-score': 0.7803347280334727}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8767967145790554,
                          'recall': 0.9262472885032538,
                          'f1-score': 0.9008438818565401}),
             'CAM_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8385416666666666,
                          'recall': 0.8846153846153846,
                          'f1-score': 0.8609625668449198}

In [None]:
self_classifcation_report(clinicalBert_labels, clinicalBert_predictions, mode='lenient')

macro_precision:0.8693449319612924

macro_recall: 0.8885033212840904

macro_f1:0.875635171821418

micro_precision:0.864100185528757

micro_recall: 0.8808510638297873

micro_f1:0.8723952236010303



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7231638418079096,
                          'recall': 0.9035294117647059,
                          'f1-score': 0.803347280334728}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8870636550308009,
                          'recall': 0.9370932754880694,
                          'f1-score': 0.9113924050632911}),
             'CAM_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.875,
                          'recall': 0.9230769230769231,
                          'f1-score': 0.8983957219251337}),
           

#BioBert

In [None]:
tokenizer = BertTokenizer.from_pretrained("pucpr/biobertpt-all",use_fast=False)
tokenizer.add_tokens(['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies'
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care'])

bio_bert_best=ROOT+'/saved_models_checkpoints/bio_bert_epoche10_09-14-23h31m42s'
newbio_model= BertForTokenClassification.from_pretrained(bio_bert_best)
newbio_model.to(device)
#loss,labels, predictions = lenient_valid(newbio_model, test_dataloader)

In [None]:
newbio_eval_loss,newbio_labels, newbio_predictions,newbio_sent_label_predict = valid_loop(newbio_model, test_dataloader,self_classifcation_report)

In [None]:
self_classifcation_report(labels, predictions, mode='strict')

macro_precision:0.8725213640259868

macro_recall: 0.8196362668843715

macro_f1:0.843110151601713

micro_precision:0.8864503816793893

micro_recall: 0.8113537117903931

micro_f1:0.8472412220702235



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8413461538461539,
                          'recall': 0.78125,
                          'f1-score': 0.8101851851851852}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.9178082191780822,
                          'recall': 0.8682505399568035,
                          'f1-score': 0.8923418423973362}),
             'CAM_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7606837606837606,
                          'recall': 0.8640776699029126,
                          'f1-score': 0.8090909090909091}),
        

In [None]:
self_classifcation_report(labels, predictions, mode='lenient')

macro_precision:0.9075061281791846

macro_recall: 0.8531625163956132

macro_f1:0.8772367705717612

micro_precision:0.920324427480916

micro_recall: 0.8423580786026201

micro_f1:0.8796169630642955



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8846153846153846,
                          'recall': 0.8214285714285714,
                          'f1-score': 0.8518518518518519}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.9337899543378996,
                          'recall': 0.8833693304535637,
                          'f1-score': 0.9078801331853495}),
             'CAM_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8076923076923077,
                          'recall': 0.9174757281553398,
                          'f1-score': 0.8590909090909091}

#BLUEBERT

In [None]:
from transformers import AutoTokenizer, AutoModel
BLUEBERT="bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12"

tokenizer = AutoTokenizer.from_pretrained(BLUEBERT,use_fast=False )

tokenizer.add_tokens(['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies',
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care'])

blue_bert_best=ROOT+'/saved_models_checkpoints/blue_bert_epoche10_09-14-21h43m21s'
newblue_model= BertForTokenClassification.from_pretrained(blue_bert_best)
newblue_model.to(device)
newblue_eval_loss,newblue_labels, newblue_predictions,newblue_sent_label_predict = valid_loop(newblue_model, test_dataloader,self_classifcation_report)

In [None]:
self_classifcation_report(newblue_labels, newblue_predictions, mode='strict')

macro precision:0.8956636221821761

macro recall: 0.9005411963958123

macro f1:0.8974643047842396

micro precision:0.886870783601453

micro recall: 0.9042328042328043

micro f1:0.8954676447471837



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8614072494669509,
                          'recall': 0.9160997732426304,
                          'f1-score': 0.8879120879120879}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.9036458333333334,
                          'recall': 0.8675,
                          'f1-score': 0.8852040816326532}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8931818181818182,
                          'recall': 0.9379474940334129,
                          'f1-score': 0.9150174621653085}),
      

In [None]:
self_classifcation_report(newblue_labels, newblue_predictions, mode='lenient')

macro precision:0.9119799390429064

macro recall: 0.9170020236785612

macro f1:0.9138434187741191

micro precision:0.9045147898287493

micro recall: 0.9222222222222223

micro f1:0.9132826827351322



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8784648187633263,
                          'recall': 0.9342403628117913,
                          'f1-score': 0.9054945054945055}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.9296875,
                          'recall': 0.8925,
                          'f1-score': 0.9107142857142857}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.9022727272727272,
                          'recall': 0.9474940334128878,
                          'f1-score': 0.9243306169965074}),
             'C

#PunMed Bert

In [None]:
from transformers import AutoTokenizer, AutoModel
PubmedBert="cambridgeltl/SapBERT-from-PubMedBERT-fulltext"
tokenizer = AutoTokenizer.from_pretrained(PubmedBert,use_fast=False)

tokenizer.add_tokens(['B-CAM_therapies','B-Energy_therapies',
 'B-Manual_bodybased_therapies','B-Mindbody_therapies',
 'B-Usual_Medical_Care','I-CAM_therapies',
 'I-Energy_therapies','I-Manual_bodybased_therapies',
 'I-Mindbody_therapies','I-Usual_Medical_Care'])

pubmed_bert_best=ROOT+'/saved_models_checkpoints/pubmed_bert_epoche10_09-14-22h04m30s'
newpubmed_model= BertForTokenClassification.from_pretrained(pubmed_bert_best)
newpubmed_model.to(device)
newpubmed_eval_loss,newpubmed_labels, newpubmed_predictions,newpubmed_sent_label_predict = valid_loop(newpubmed_model, test_dataloader,self_classifcation_report)

In [None]:
self_classifcation_report(newpubmed_labels, newpubmed_predictions, mode='strict')

macro precision:0.7948666713984613

macro recall: 0.7628472106376634

macro f1:0.777522201880424

micro precision:0.8

micro recall: 0.7566826593557231

micro f1:0.7777386403663261



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8126888217522659,
                          'recall': 0.8276923076923077,
                          'f1-score': 0.8201219512195123}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7752808988764045,
                          'recall': 0.696969696969697,
                          'f1-score': 0.7340425531914893}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.7867132867132867,
                          'recall': 0.7950530035335689,
                          'f1-score': 0.790861159929701

In [None]:
self_classifcation_report(newpubmed_labels, newpubmed_predictions, mode='lenient')

macro precision:0.8576909031880348

macro recall: 0.8235743374689971

macro f1:0.8392052823258588

micro precision:0.8630434782608696

micro recall: 0.8163125428375599

micro f1:0.8390278266995421



defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>()>,
            {'Energy_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.879154078549849,
                          'recall': 0.8953846153846153,
                          'f1-score': 0.8871951219512195}),
             'Usual_Medical_Care': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8277153558052435,
                          'recall': 0.7441077441077442,
                          'f1-score': 0.7836879432624114}),
             'Manual_bodybased_therapies': defaultdict(<function __main__.self_classifcation_report.<locals>.<lambda>.<locals>.<lambda>()>,
                         {'precision': 0.8636363636363636,
                          'recall': 0.872791519434629,
                          'f1-score': 0.8681898066783831