https://colab.research.google.com/drive/1jCkpikz0J2o20FBQmYmAGdiKmJGOMo-o?usp=sharing#scrollTo=T-gy-LxM0yAi

In [1]:
import os
import torch
import torch.nn as nn
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
checkpoint = "microsoft/biogpt"
from relations import relations
from datasets import DatasetDict, Dataset
import pandas as pd
from tqdm.notebook import trange, tqdm
from labels import get_labels

In [2]:
# load labels for bert_w_ner
additional_tokens, _, _, _ = get_labels(mode='GPT_w_ner')
print(additional_tokens, "\n", additional_tokens)

{'additional_special_tokens': ['[entity1]', '[entity2]', '[learn1]', '[learn2]', '[learn3]', '[learn4]', '[learn5]', '[learn6]', '[None]', '[Association]', '[Bind]', '[Comparison]', '[Conversion]', '[Cotreatment]', '[Drug_Interaction]', '[Negative_Correlation]', '[Positive_Correlation]']} 
 {'additional_special_tokens': ['[entity1]', '[entity2]', '[learn1]', '[learn2]', '[learn3]', '[learn4]', '[learn5]', '[learn6]', '[None]', '[Association]', '[Bind]', '[Comparison]', '[Conversion]', '[Cotreatment]', '[Drug_Interaction]', '[Negative_Correlation]', '[Positive_Correlation]']}


# load the model

In [3]:
# load the model in 8-bit quantization configuration
# the max length of the input is 1024
model = AutoModelForCausalLM.from_pretrained(checkpoint, 
    # load_in_8bit=True, 
    device_map={'':torch.cuda.current_device()},)

In [4]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

# Tokenizer

In [5]:
tokenizer = AutoTokenizer.from_pretrained("microsoft/biogpt")

In [6]:
# adding new tokens to the tokenizer
# since I haven't load the model so I will resize the embedding of the model later]
num_added_toks = tokenizer.add_special_tokens(additional_tokens)
print('We have added', num_added_toks, 'tokens')

# save the tokenizer
# tokenizer.save_pretrained("GPT_w_ner/GPT_w_ner_tokenizer")

We have added 17 tokens


In [7]:
model.resize_token_embeddings(len(tokenizer))

Embedding(42401, 1024)

# PEFT

Finally, we need to apply some post-processing on the 8-bit model to enable training, let's freeze all our layers, and cast the layer-norm in float32 for stability.

We also cast the output of the last layer and embedding layer in float32 for the same reasons.

In [8]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)

model.biogpt.embed_tokens = CastOutputToFloat(model.biogpt.embed_tokens)
model.output_projection = CastOutputToFloat(model.output_projection)

In [9]:
# more with LoRAconfig: https://huggingface.co/docs/peft/conceptual_guides/lora

from peft import get_peft_config, get_peft_model, LoraConfig, TaskType, PeftType

peft_config = LoraConfig(
    # r: the rank of the update matrices, expressed in int. Lower rank results in smaller update matrices with fewer trainable parameters.
    r=16,
    # alpha: LoRA scaling factor.
    lora_alpha=32, 
    # target_modules: The modules (for example, attention blocks) to apply the LoRA update matrices.
    target_modules=["q_proj", "v_proj"],
    fan_in_fan_out=True,
    lora_dropout=0.05,
    bias="none", 
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)
print_trainable_parameters(model)


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/tian/mambaforge/envs/BioRED/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary /home/tian/mambaforge/envs/BioRED/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


trainable params: 1572864 || all params: 348353536 || trainable%: 0.45151371737475343


In [10]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): BioGptForCausalLM(
      (biogpt): BioGptModel(
        (embed_tokens): CastOutputToFloat(
          (0): Embedding(42401, 1024)
        )
        (embed_positions): BioGptLearnedPositionalEmbedding(1026, 1024)
        (layers): ModuleList(
          (0-23): 24 x BioGptDecoderLayer(
            (self_attn): BioGptAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(
                in_features=1024, out_features=1024, bias=True
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1024, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1024, bias=False)
                )
                (lora_embedding_A): Pa

In [12]:
# for model, print the layer's name if the layer is trainable, and print the precision of the layer

for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.shape, param.dtype)

base_model.model.biogpt.layers.0.self_attn.v_proj.lora_A.default.weight torch.Size([16, 1024]) torch.float32
base_model.model.biogpt.layers.0.self_attn.v_proj.lora_B.default.weight torch.Size([1024, 16]) torch.float32
base_model.model.biogpt.layers.0.self_attn.q_proj.lora_A.default.weight torch.Size([16, 1024]) torch.float32
base_model.model.biogpt.layers.0.self_attn.q_proj.lora_B.default.weight torch.Size([1024, 16]) torch.float32
base_model.model.biogpt.layers.1.self_attn.v_proj.lora_A.default.weight torch.Size([16, 1024]) torch.float32
base_model.model.biogpt.layers.1.self_attn.v_proj.lora_B.default.weight torch.Size([1024, 16]) torch.float32
base_model.model.biogpt.layers.1.self_attn.q_proj.lora_A.default.weight torch.Size([16, 1024]) torch.float32
base_model.model.biogpt.layers.1.self_attn.q_proj.lora_B.default.weight torch.Size([1024, 16]) torch.float32
base_model.model.biogpt.layers.2.self_attn.v_proj.lora_A.default.weight torch.Size([16, 1024]) torch.float32
base_model.model.bi

# pre-process the text

In [None]:
from data_preprocessing import make_GPT_re_data, GPT_w_ner_preprocess_function

In [None]:
# train and valid file paths
train_file_path = 'data/BioRED/processed/train.tsv'
valid_file_path = 'data/BioRED/processed/dev.tsv'

In [None]:
# make bert_re data
train_data_raw = make_GPT_re_data(file_path=train_file_path, lower=True)
valid_data_raw = make_GPT_re_data(file_path=valid_file_path, lower=True)

Dropped 8 line:
 [6646, 6758, 6776, 6866, 10222, 11775, 18818, 21689]
Dropped 8 line:
 [941, 2220, 2233, 2261, 5335, 5337, 5378, 5490]


In [None]:
train_data_raw.keys()

dict_keys(['pmids', 'text', 'entities', 'outputs'])

In [None]:
# make into Dataset type
train_data_raw = Dataset.from_dict(train_data_raw)
valid_data_raw = Dataset.from_dict(valid_data_raw)

In [None]:
from torch.utils.data import Subset
"""
for the train_dataset:
{'[None]': 18720,
 '[Association]': 2183,
 '[Bind]': 60,
 '[Comparison]': 28,
 '[Conversion]': 3,
 '[Cotreatment]': 31,
 '[Drug_Interaction]': 11,
 '[Negative_Correlation]': 763,
 '[Positive_Correlation]': 1088}

so it is neccessary to balance the dataset, we randomly choose 3000 samples from the [None] class with the seed 42
"""
import random
random.seed(42)

# get the index of the [None] class of the datasets type of train_data_raw
none_index = [i for i, example in enumerate(train_data_raw) if example['outputs'].split(" ")[-3] == '[None]']

# randomly choose 18720-3000 samples from the [None] class
none_index = random.sample(none_index, 18720-3000)
keep_indices = [i for i in range(len(train_data_raw)) if i not in none_index]

# delete the [None] class samples from the train_data_raw
train_data_raw_balanced = train_data_raw.select(keep_indices)

In [None]:
train_data_raw_balanced

Dataset({
    features: ['pmids', 'text', 'entities', 'outputs'],
    num_rows: 7167
})

In [None]:
dataset = DatasetDict({
    "train": train_data_raw_balanced,
    "valid": valid_data_raw
})

In [None]:
tokenized_datasets = dataset.map(lambda example: GPT_w_ner_preprocess_function(example, tokenizer, mode="gpt_w_ner"), batched=True, remove_columns=['pmids', 'text', 'entities', 'outputs'])

Map:   0%|          | 0/7167 [00:00<?, ? examples/s]

Map:   0%|          | 0/6650 [00:00<?, ? examples/s]

In [None]:
tokenized_datasets

DatasetDict({
    train: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 7167
    })
    valid: Dataset({
        features: ['input_ids', 'attention_mask'],
        num_rows: 6650
    })
})

In [None]:
# to tensor
tokenized_datasets.set_format(type='torch', columns=['input_ids'])

# Training

wandb

In [22]:
import wandb

wandb.init(
    # set the wandb project where this run will be logged
    project="GPT2",
    # notes="PubmedBERT-FT-NER_w_NERin_10epochs",
    name="BioGPT_w_ner_epoch_15_balanced_train_data",
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33m309439737[0m ([33mtian1995[0m). Use [1m`wandb login --relogin`[0m to force relogin


training

In [23]:
from transformers import DataCollatorForLanguageModeling

In [24]:
import transformers

trainer = transformers.Trainer(
    model=model, 
    train_dataset=tokenized_datasets['train'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=8, 
        gradient_accumulation_steps=8,
        warmup_steps=1000, 
        num_train_epochs=15,
        learning_rate=2e-4, 
        fp16=True,
        logging_steps=1, 
        report_to="wandb",
        save_strategy="epoch",
        output_dir='GPT_w_ner'
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()



  0%|          | 0/1680 [00:00<?, ?it/s]

{'loss': 3.2818, 'learning_rate': 2.0000000000000002e-07, 'epoch': 0.01}
{'loss': 3.2785, 'learning_rate': 4.0000000000000003e-07, 'epoch': 0.02}
{'loss': 3.2354, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.03}
{'loss': 3.3664, 'learning_rate': 6.000000000000001e-07, 'epoch': 0.04}
{'loss': 3.3141, 'learning_rate': 8.000000000000001e-07, 'epoch': 0.04}
{'loss': 3.3021, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.05}
{'loss': 3.226, 'learning_rate': 1.2000000000000002e-06, 'epoch': 0.06}
{'loss': 3.2175, 'learning_rate': 1.4000000000000001e-06, 'epoch': 0.07}
{'loss': 3.2267, 'learning_rate': 1.6000000000000001e-06, 'epoch': 0.08}
{'loss': 3.2179, 'learning_rate': 1.8e-06, 'epoch': 0.09}
{'loss': 3.4256, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.1}
{'loss': 3.2521, 'learning_rate': 2.2e-06, 'epoch': 0.11}
{'loss': 3.2805, 'learning_rate': 2.4000000000000003e-06, 'epoch': 0.12}
{'loss': 3.265, 'learning_rate': 2.6e-06, 'epoch': 0.12}
{'loss': 3.2588, 'learning_ra

TrainOutput(global_step=1680, training_loss=1.7369381053816704, metrics={'train_runtime': 25381.8502, 'train_samples_per_second': 4.236, 'train_steps_per_second': 0.066, 'train_loss': 1.7369381053816704, 'epoch': 15.0})

In [25]:
import wandb
wandb.finish()
trainer.save_model("GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data")

0,1
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/learning_rate,▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▆▆▆▇▇▇▇███▇▇▆▆▆▅▅▄▄▃▃▂▂▂▁
train/loss,███▇▆▆▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▂▁▂▁▂▂
train/total_flos,▁
train/train_loss,▁
train/train_runtime,▁
train/train_samples_per_second,▁
train/train_steps_per_second,▁

0,1
train/epoch,15.0
train/global_step,1680.0
train/learning_rate,0.0
train/loss,1.1569
train/total_flos,2.0071882986356736e+17
train/train_loss,1.73694
train/train_runtime,25381.8502
train/train_samples_per_second,4.236
train/train_steps_per_second,0.066


In [26]:
model.save_pretrained("GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data.peft")

In [27]:
# Since there are key-unmatches in the trainer.save_model(), we need to rename the keys and load the paras in the model

embed_tokens_state_dict = torch.load("GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data/pytorch_model.bin")

old_keys = ["base_model.model.biogpt.embed_tokens.0.weight", "base_model.model.output_projection.0.weight"]
new_keys = ["base_model.model.biogpt.embed_tokens.weight", "base_model.model.output_projection.weight"]

for old_key, new_key in zip(old_keys, new_keys):
    # Get the value of the old key
    value = embed_tokens_state_dict[old_key]

    # Create a new key-value pair with the updated name
    embed_tokens_state_dict[new_key] = value

    # Delete the old key if desired
    del embed_tokens_state_dict[old_key]

torch.save(embed_tokens_state_dict, "GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data/pytorch_model-af.bin")

In [25]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): BioGptForCausalLM(
      (biogpt): BioGptModel(
        (embed_tokens): CastOutputToFloat(
          (0): Embedding(42401, 1024)
        )
        (embed_positions): BioGptLearnedPositionalEmbedding(1026, 1024)
        (layers): ModuleList(
          (0-23): 24 x BioGptDecoderLayer(
            (self_attn): BioGptAttention(
              (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
              (v_proj): Linear(
                in_features=1024, out_features=1024, bias=True
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=1024, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=1024, bias=False)
                )
                (lora_embedding_A): Pa

# load model and inference

In [1]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "microsoft/biogpt"

peft_model_id = "GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data.peft"
# config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained("GPT_w_ner/GPT_w_ner_tokenizer")

# resize the token embeddings to match the tokenizer
model.resize_token_embeddings(len(tokenizer))

# Load the Lora model
# the resized embedding layer are still uncorrected, need to load the weights manually
model = PeftModel.from_pretrained(model, peft_model_id)



Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/tian/mambaforge/envs/BioRED/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 121
CUDA SETUP: Loading binary /home/tian/mambaforge/envs/BioRED/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda121.so...


In [2]:
model.load_state_dict(torch.load("GPT_w_ner/models/GPT_w_ner_epoch_15_balanced_train_data/pytorch_model-af.bin"))

<All keys matched successfully>

In [3]:
model.eval()
model.to("cpu")
inputs = tokenizer("Tweet text : @HondaCustSvc Your customer service has been horrible during the recall process. I will never purchase a Honda again. Label :", return_tensors="pt")

with torch.no_grad():
    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0])

Tweet text: @ HondaCustSvc Your customer service has been horrible during the recall process. I will never purchase a Honda again. Label: the health care provider.


In [4]:
import pandas as pd
import re
from tqdm.notebook import trange, tqdm
from torch import nn
from labels import get_labels
from relations import relations
from datasets import DatasetDict, Dataset

from data_preprocessing import make_GPT_re_data, GPT_w_ner_preprocess_function
additional_tokens, _, _, _ = get_labels(mode='GPT_w_ner')


In [5]:
# load test data and preprocess
test_file_path = 'data/BioRED/processed/test.tsv'
test_data = make_GPT_re_data(file_path=test_file_path, lower=True)

test_dataset_raw = Dataset.from_dict(test_data)
# test_dataset = test_dataset_raw.map(NER_preprocess_function, batched=False)
# with bert only:
test_dataset = test_dataset_raw.map(lambda example: GPT_w_ner_preprocess_function(example, tokenizer, mode="gpt_w_ner", infer=True), batched=True, remove_columns=['pmids', 'text', 'entities', 'outputs'])
test_dataset.set_format(type='torch', columns=['input_ids'])
# the test_dataset has two columns: input_ids and labels, split the labels coloumn into test_dataset_labels

Dropped 0 line:
 []


Map:   0%|          | 0/7590 [00:00<?, ? examples/s]

In [14]:
model.eval()
outputs = []
model.to("cuda")
with torch.no_grad():
    for i in tqdm(range(len(test_dataset))):
    # for i in range(1):
        output = model.generate(input_ids=test_dataset[i]["input_ids"].unsqueeze(0).to("cuda"), max_new_tokens=50)
        output_text = tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=False)[0]
        outputs.append(output_text.split("[learn1] [learn2] [learn3] [learn4] [learn5] [learn6] ")[1])

    # print(tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=False)[0])

  0%|          | 0/7590 [00:00<?, ?it/s]

In [13]:
outputs

['the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [entity2] is [None]. </s>',
 'the relation between source [entity1] and target [

In [11]:
test_dataset['labels'][30:80]

['the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [Association] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [Positive_Correlation] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and target [entity2] is [Negative_Correlation] . ',
 'the relation between source [entity1] and target [entity2] is [None] . ',
 'the relation between source [entity1] and targe

In [41]:
relation_dict = {f"[{v}]": 0 for v in relations}

In [43]:
for lines in train_dataset['labels']:
    relation_dict[lines.split(" ")[-3]] += 1

In [44]:
relation_dict

{'[None]': 18720,
 '[Association]': 2183,
 '[Bind]': 60,
 '[Comparison]': 28,
 '[Conversion]': 3,
 '[Cotreatment]': 31,
 '[Drug_Interaction]': 11,
 '[Negative_Correlation]': 763,
 '[Positive_Correlation]': 1088}

In [16]:
result = {
    "output": [],
    "label": []
}

for output, label in zip(outputs, test_dataset['labels']):
    result['output'].append(output)
    result['label'].append(label)

In [17]:
# save the result dictionary
import pickle
with open("GPT_w_ner/result/GPT_w_ner_epoch_15_balanced_result.pkl", "wb") as f:
    pickle.dump(result, f)

post-processing and evaluation

In [18]:
# load the result dictionary
import pickle
with open("GPT_w_ner/result/GPT_w_ner_epoch_15_balanced_result.pkl", "rb") as f:
    result = pickle.load(f)

In [30]:
uncorrected = 0
for i in range(len(result['output'])):
    if result['output'][i][:-6] != result['label'][i][:-3]:
        uncorrected += 1

In [32]:
len(result['output'])

7590

In [31]:
uncorrected

1181

In [40]:
count = 0
for i in range(len(result['output'])):
    if result['label'][i][-9:-3] != '[None]':
        count += 1

In [41]:
count

1163

In [54]:
for i in range(5360, 5380):
    print(result['output'][i])

the relation between source [entity1] and target [entity2] is [None]. </s>
the relation between source [entity1] and target [entity2] is [None]. </s>
the relation between source [entity1] and target [entity2] is [None]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [None]. </s>
the relation between source [entity1] and target [entity2] is [Positive_Correlation]. </s>
the relation between source [entity1] and target [entity2] is [

In [55]:
for i in range(5360, 5380):
    print(result['label'][i])

the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [Positive_Correlation] . 
the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [Positive_Correlation] . 
the relation between source [entity1] and target [entity2] is [Negative_Correlation] . 
the relation between source [entity1] and target [entity2] is [None] . 
the relation between source [entity1] and target [entity2] is [Cotreatment] . 
the relation between source [entity1] and target [entity2] is [Positive_Correlation] . 
the relation between source [entity1] and target [entity2] is [Cotreatment] . 
the relation between source [entity1] and target [entity2]

In [53]:
count = 0
for i in range(len(result['output'])):
    if result['output'][i][-12:-6] != '[None]':
        print(i,"  ", result['output'][i][-30:-6])
        count += 1

970    s [Positive_Correlation]
971    s [Positive_Correlation]
972    s [Positive_Correlation]
973    s [Positive_Correlation]
974    s [Positive_Correlation]
975    s [Positive_Correlation]
977    s [Positive_Correlation]
978    s [Positive_Correlation]
980    s [Positive_Correlation]
981    s [Positive_Correlation]
982    s [Positive_Correlation]
983    s [Positive_Correlation]
984    s [Positive_Correlation]
985    s [Positive_Correlation]
986    s [Positive_Correlation]
989    s [Positive_Correlation]
991    s [Positive_Correlation]
992    s [Positive_Correlation]
993    s [Positive_Correlation]
996    s [Positive_Correlation]
997    s [Positive_Correlation]
998    s [Positive_Correlation]
999    s [Positive_Correlation]
1468    ntity2] is [Association]
1483    ntity2] is [Association]
1488    ntity2] is [Association]
1490    ntity2] is [Association]
1494    ntity2] is [Association]
1502    ntity2] is [Association]
1509    ntity2] is [Association]
1539    ntity2] is [Association]
