### Loading the model:

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "openlm-research/open_llama_7b"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
print("OpenLLaMA-7b loaded successfully!")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

OpenLLaMA-7b loaded successfully!


### Loading the dataset and connect to google drive

In [None]:
pip install datasets



In [None]:
from google.colab import drive
import torch
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/MSBD5002_project/' # modify this line for your drive

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from datasets import load_dataset

# Load the TriviaQA dataset
trivia_qa = load_dataset('trivia_qa', 'rc.nocontext')
from tqdm import tqdm
full_dataset = []
for obs in tqdm(trivia_qa['train']):
  aliases = []
  aliases.extend(obs['answer']['aliases'])
  aliases.extend(obs['answer']['normalized_aliases'])
  aliases.append(obs['answer']['value'])
  aliases.append(obs['answer']['normalized_value'])
  full_dataset.append((obs['question'], aliases))

100%|██████████| 138384/138384 [00:24<00:00, 5673.89it/s]


In [None]:
data = full_dataset[6000:9000] # choosing the size of the dataset

### Get the attention score (implementation)
##### if you want the classifer, you can directly jump to the classifier part

In [None]:
import torch
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import drive
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/MSBD5002_project/'


model.to('cuda')
model.eval()
attention_total_list = []
label_list = []
for item in tqdm(data):
  prompt = item[0]
  answer = item[1]

  #initialization
  attention_scores = []
  qm_a1_scores = []
  response = []
  inputs_id = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  start_pos = inputs_id.shape[-1]
  num_layers = len(model.model.layers)
  idx = num_layers - 1
  prompt_len = len(inputs_id[0])


  #getting the predicted token
  max_length = prompt_len * 3
  bar = range(max_length)

  for i in bar:
    with torch.no_grad():
        outputs = model(inputs_id, output_attentions=True)
    next_token = outputs.logits[:, -1, :].argmax(dim=-1)
    inputs_id = torch.cat([inputs_id, next_token.unsqueeze(-1)], dim=-1)
    response.append(next_token)


    # Store the attention weights
    last_layer_attn = outputs.attentions[-1].detach().cpu()
    attention_scores.append(last_layer_attn)

    #print(last_layer_attn.shape) #(1, 32, prompt_len, prompt_len)
    # Extract S_L(q_M, a_1)

    if i == 0:
      qm_idx = prompt_len - 1  # Last token of prompt
      a1_idx = prompt_len      # First response token

    if i == max_length - 1:
      response_list = torch.stack(response).cpu().numpy()
      str_response = tokenizer.decode(response_list.flatten(), skip_special_tokens=True) # obtain the string response
      label = 0

      for alias in answer:    # checking the correctness of the anwser
        if alias.lower() in str_response.lower():
          label = 1
          break
      label_list.append(label)


  attention_required = attention_scores[1][0, :, a1_idx, qm_idx] # get the required attention score
  attention_total_list.append(attention_required)

"""
save_data = {
    "attention_scores": attention_total_list,
    "labels": label_list
}
torch.save(save_data, data_path + f"attention_data_{(i+2)*1000}.pt") #store the result to google drive
"""


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


 50%|█████     | 1/2 [00:04<00:04,  4.14s/it]

Which American-born Sinclair won the Nobel Prize for Literature in 1930?
-------------

Which American-born Sinclair won the Nobel Prize for Literature in 1930? Sinclair Lewis
Which American-born Sinclair won the Nobel Prize for Literature in 1930? Sinclair Lewis.
Which American-born Sinclair won the Nobel


100%|██████████| 2/2 [00:06<00:00,  3.03s/it]

Where in England was Dame Judi Dench born?
-------------

Dame Judi Dench was born in York, England.
Q: Where in England was Dame Judi Dench born?
Where was Dame Judi Dench





'\nsave_data = {\n    "attention_scores": attention_total_list,\n    "labels": label_list\n}\ntorch.save(save_data, data_path + f"attention_data_{(i+2)*1000}.pt") #store the result to google drive\n'

### Get the attention (Research part - stacked approach)
This part is independent to the implementation part

In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline
import spacy

# Load BERT-based NER model
tokenizer_bert = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model_bert = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
ner_pipeline = pipeline("ner", model=model_bert, tokenizer=tokenizer_bert, aggregation_strategy="simple")

# Load spaCy for fallback POS and dependency parsing
nlp = spacy.load("en_core_web_sm")

Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cuda:0


In [None]:
"""
Here is the part for Extractive summarization
"""
def extract_keyword(text):
    # Step 1: Run BERT-based NER
    ner_results = ner_pipeline(text)

    # Extract the first entity (PERSON, GPE, ORG, etc.)
    for entity in ner_results:
        return entity["word"]  # e.g., "Sinclair", "England", "Super Bowl XX"

    # Step 2: Fallback to spaCy for non-entity nouns
    doc = nlp(text)
    for token in doc:
        # Prioritize proper nouns (PROPN) or significant nouns (NOUN)
        if token.pos_ in ["PROPN", "NOUN"] and token.dep_ in ["nsubj", "dobj", "pobj", "ROOT"]:
            # Handle multi-word proper nouns
            if token.pos_ == "PROPN" and token.head.pos_ == "PROPN" and token.head != token:
                return f"{token.text} {token.head.text}"
            return token.text

    return "No keyword found"

# Demo for keyword extraction
prompts= [data[i][0] for i in range(1)]
for prompt in prompts:
    keyword = extract_keyword(prompt)
    print(f"Prompt: {prompt} -> Keyword: {keyword}")

Prompt: Mars has two moons. Phobos is one, but what is the other one called? -> Keyword: Mars
Prompt: In the 1966 movie The Good, The Bad And The Ugly, Clint Eastwood played the Good" and Lee van Cleef played "the Bad", but who played "the Ugly"? -> Keyword: The Good, The Bad And The Ugly
Prompt: The Mauretania, launched in 1906, was the largest and fastest ship in the world at that time. What was the name of her sister ship that was launched in the same year, but sunk by a German U-boat in 1915? -> Keyword: Ma
Prompt: The Ballearics are made up of three major islands. Majorca and Minorca are two, but what is the other one? -> Keyword: Ball
Prompt: Neil Armstrong and Buzz Aldrin walked on the moon in 1969, but who was the third astronaut on Apollo 11, who remained in the orbitter? -> Keyword: Neil Armstrong


In [None]:
import torch
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import drive
import time
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/MSBD5002_project/' # modify this to your drive


model.to('cuda')
model.eval()

#initialization
attention_total_list_a1_all = []
attention_total_list_a1_out = []
attention_total_list_a1 = []
attention_total_list_qm_all = []
attention_total_list_qm_out = []
label_list = []

start_time = time.time()
for item in tqdm(data):
  prompt = item[0]
  answer = item[1]

  #initialization
  attention_scores = []
  qm_a1_scores = []
  response = []
  inputs_id = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
  start_pos = inputs_id.shape[-1]
  num_layers = len(model.model.layers)
  idx = num_layers - 1
  prompt_len = len(inputs_id[0])
  keyword_id = tokenizer(extract_keyword(prompt), return_tensors="pt").input_ids.to("cuda")[0][1].item()
  original_inputs_id = inputs_id.clone()

  qm_idx = prompt_len - 1 # finding position of the keyword
  for k in range(prompt_len):
    if keyword_id == inputs_id[0][k].item():
      qm_idx = k


  #getting the predicted token
  max_length = 3 * prompt_len
  bar = range(max_length)
  stop_token = 13

  for i in bar:
    with torch.no_grad():
        outputs = model(inputs_id, output_attentions=True)
    next_token = outputs.logits[:, -1, :].argmax(dim=-1)
    inputs_id = torch.cat([inputs_id, next_token.unsqueeze(-1)], dim=-1)
    response.append(next_token)


    # Store the attention weights
    last_layer_attn = outputs.attentions[-1].detach().cpu()
    attention_scores.append(last_layer_attn)

    # Break the loop if the stop token is reached
    if next_token.item() == stop_token and i > 5:
      response_list = torch.stack(response).cpu().numpy()
      str_response = tokenizer.decode(response_list.flatten(), skip_special_tokens=True)
      break

    #print(last_layer_attn.shape) #(1, 32, prompt_len, prompt_len)

    if i == max_length - 1:
      response_list = torch.stack(response).cpu().numpy()
      str_response = tokenizer.decode(response_list.flatten(), skip_special_tokens=True)

    """
      label = 0
      for alias in answer:
        if alias.lower() in str_response.lower():
          label = 1
          break
      label_list.append(label)
      print(prompt)
      print('-------------')
      print(str_response) #the response from the model

    """
  # Remove the repeated word that appearing in the input prompt for the generated response
  words_to_remove = prompt.split()
  filtered_output = str_response
  for word in words_to_remove:
    filtered_output = filtered_output.replace(word, "")

  a1_idx = prompt_len

  # Find the index for the keyword in response
  if extract_keyword(filtered_output) != "No keyword found":
    keyword_id = tokenizer(extract_keyword(filtered_output), return_tensors="pt").input_ids.to("cuda")[0][1].item()
    for k in range(len(inputs_id[0]) - 1 - prompt_len):
      if keyword_id == inputs_id[0][k + prompt_len].item():
        a1_idx = k + prompt_len
        break

  # Store the artifacts we want to extract
  attention_required_a1 = attention_scores[-1][0, :, a1_idx, prompt_len - 1]
  attention_required_a1_divall = attention_scores[-1][0, :, a1_idx, prompt_len - 1] / ( i + prompt_len)
  attention_required_qm_divall = attention_scores[-1][0, :, -1, qm_idx] / ( i + prompt_len)
  if i != 0:
    attention_required_qm_divoutput = attention_scores[-1][0, :, -1, qm_idx] / i
    attention_required_a1_divoutput = attention_scores[-1][0, :, a1_idx, prompt_len - 1] / i
  else:
    attention_required_qm_divoutput = attention_scores[-1][0, :, -1, qm_idx]
    attention_required_a1_divoutput = attention_scores[-1][0, :, a1_idx, prompt_len - 1]

  attention_total_list_a1.append(attention_required_a1)
  attention_total_list_a1_all.append(attention_required_a1_divall)
  attention_total_list_a1_out.append(attention_required_a1_divoutput)
  attention_total_list_qm_all.append(attention_required_qm_divall)
  attention_total_list_qm_out.append(attention_required_qm_divoutput)

end_time = time.time()
used_time = end_time - start_time
print(f"Time: {used_time}")

# save the data into the drive
save_data = {"attention_scores_a1": attention_total_list_a1, "attention_scores_a1_all": attention_total_list_a1_all,
             "attention_scores_a1_out": attention_total_list_a1_out, "attention_scores_qm_all":attention_total_list_qm_all, "attention_scores_qm_out":attention_total_list_qm_out}
torch.save(save_data, data_path + f"attention_research_data/attention_data_divided_lasttry_9000.pt") #store the result to google drive




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


100%|██████████| 3000/3000 [1:20:57<00:00,  1.62s/it]


Time: 4857.920122146606


### Classifer training_MLP


In [None]:
from google.colab import drive
import torch
drive.mount('/content/drive', force_remount=True)
data_path = '/content/drive/My Drive/MSBD5002_project/' # modify this line for your drive

Mounted at /content/drive


In [None]:
import torch.nn as nn
from torch.optim import Adam
from sklearn.metrics import roc_auc_score
import numpy as np
from tqdm import tqdm
import os

In [None]:
# Load the artifact from the drive
training_data = {"attention_scores_a1": torch.tensor([]), "attention_scores_a1_all": torch.tensor([]),
             "attention_scores_a1_out": torch.tensor([]), "attention_scores_qm_all":torch.tensor([]), "attention_scores_qm_out":torch.tensor([])}
for i in range(3):
  data_load = torch.load(data_path + f"attention_research_data/attention_data_divided_lasttry_{3000*(i+1)}.pt") # it may takes around 1 minute
  if i == 0:
    training_data["attention_scores_a1"] = data_load["attention_scores_a1"]
    training_data["attention_scores_a1_all"] = data_load["attention_scores_a1_all"]
    training_data["attention_scores_a1_out"] = data_load["attention_scores_a1_out"]
    training_data["attention_scores_qm_all"] = data_load["attention_scores_qm_all"]
    training_data["attention_scores_qm_out"] = data_load["attention_scores_qm_out"]
  else:
    training_data["attention_scores_a1"] = training_data["attention_scores_a1"] + data_load["attention_scores_a1"]
    training_data["attention_scores_a1_all"] = training_data["attention_scores_a1_all"] + data_load["attention_scores_a1_all"]
    training_data["attention_scores_a1_out"] = training_data["attention_scores_a1_out"] + data_load["attention_scores_a1_out"]
    training_data["attention_scores_qm_all"] = training_data["attention_scores_qm_all"] + data_load["attention_scores_qm_all"]
    training_data["attention_scores_qm_out"] = training_data["attention_scores_qm_out"] + data_load["attention_scores_qm_out"]

labels = []
for i in range(9):
  data_load = torch.load(data_path + f"attention_data_{1000*(i+1)}.pt") # it may takes around 1 minute
  labels.append(data_load['labels'])
labels = np.array(labels).flatten()

In [None]:
training_data['labels'] = labels
roc_labels = np.array(training_data["labels"], dtype=np.float32)
roc_labels.sum()

np.float32(4065.0)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
from tensorflow.keras.regularizers import l2
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from sklearn.preprocessing import OneHotEncoder

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prepare and regularize data
attention_scores = np.stack([score.numpy() for score in training_data["attention_scores_a1_out"]])
scaler = StandardScaler()
attention_scores = scaler.fit_transform(attention_scores)


roc_labels = np.array(training_data["labels"], dtype=np.float32)  # 0/1 for binary classification
labels = tf.keras.utils.to_categorical(np.array(training_data["labels"], dtype=np.int32), num_classes=2) #labels with one-hot-encoding
print(f"attention_scores.shape: {attention_scores.shape}")



# Random 80/20 split
train_indices, test_indices = train_test_split(
    np.arange(len(labels)), test_size=0.2, random_state=21, shuffle=True
)

train_attention = attention_scores[train_indices]
train_labels = labels[train_indices]
test_attention = attention_scores[test_indices]
test_labels = labels[test_indices]
roc_labels = roc_labels[test_indices]

# Define model
input_dim = attention_scores.shape[1]
hidden_dim = 256  # Smaller hidden layer for simpler classifier

model = Sequential([
    Dense(hidden_dim, input_dim=input_dim, activation='relu', kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(hidden_dim, input_dim=input_dim, activation='relu', kernel_regularizer=l2(0.01)),
    Dense(2, activation='sigmoid')  # Output dim=1 for binary classification, dim=2 for one-hot-encoding
])


# Training parameters
batch_size = 96
epochs = 1000
lr = 0.0008
weight_decay = 0.001

# Compile model
optimizer = AdamW(learning_rate=lr, weight_decay=weight_decay)
model.compile(
    optimizer=optimizer,
    loss=CategoricalCrossentropy(),
    metrics=['accuracy']
)


# Training loop with evaluation every 10 epochs
for epoch in tqdm(range(epochs), desc="Training"):
    history = model.fit(
        train_attention, train_labels,
        batch_size=batch_size,
        epochs=1,
        verbose=0,
        shuffle=True
    )

    if (epoch + 1) % 10 == 0:
        train_loss = history.history['loss'][0]
        train_accuracy = history.history['accuracy'][0]

        # Evaluate on test set
        test_loss, test_accuracy = model.evaluate(
            test_attention, test_labels, verbose=0
        )
        # save the model for every 10 epoch
        model.save(data_path + "/MLP_model/" + f"model_epochs_{epoch}.keras")
        # Calculate AUROC
        test_outputs = model.predict(test_attention, verbose=0)[:, 1]
        auroc = roc_auc_score(roc_labels, test_outputs)

        print(f'Epoch [{epoch+1}/{epochs}],'
              f'Train Loss: {train_loss:.4f},'
              f'Test Loss: {test_loss:.4f},'
              f'Test Accuracy: {test_accuracy:.4f},'
              f'Test AUROC: {auroc:.4f}')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


attention_scores.shape: (9000, 32)


Training:   1%|          | 10/1000 [00:06<13:59,  1.18it/s]

Epoch [10/1000],Train Loss: 0.6751,Test Loss: 0.6769,Test Accuracy: 0.6106,Test AUROC: 0.6385


Training:   2%|▏         | 20/1000 [00:08<05:27,  2.99it/s]

Epoch [20/1000],Train Loss: 0.6718,Test Loss: 0.6725,Test Accuracy: 0.5950,Test AUROC: 0.6560


Training:   3%|▎         | 30/1000 [00:11<05:13,  3.10it/s]

Epoch [30/1000],Train Loss: 0.6706,Test Loss: 0.6752,Test Accuracy: 0.5911,Test AUROC: 0.6525


Training:   4%|▍         | 40/1000 [00:13<05:08,  3.11it/s]

Epoch [40/1000],Train Loss: 0.6677,Test Loss: 0.6730,Test Accuracy: 0.5961,Test AUROC: 0.6587


Training:   5%|▌         | 50/1000 [00:16<05:08,  3.08it/s]

Epoch [50/1000],Train Loss: 0.6653,Test Loss: 0.6691,Test Accuracy: 0.6028,Test AUROC: 0.6595


Training:   6%|▌         | 60/1000 [00:18<05:04,  3.09it/s]

Epoch [60/1000],Train Loss: 0.6631,Test Loss: 0.6737,Test Accuracy: 0.5989,Test AUROC: 0.6506


Training:   7%|▋         | 71/1000 [00:21<04:19,  3.58it/s]

Epoch [70/1000],Train Loss: 0.6632,Test Loss: 0.6685,Test Accuracy: 0.6222,Test AUROC: 0.6564


Training:   8%|▊         | 80/1000 [00:23<04:48,  3.19it/s]

Epoch [80/1000],Train Loss: 0.6582,Test Loss: 0.6691,Test Accuracy: 0.6083,Test AUROC: 0.6560


Training:   9%|▉         | 89/1000 [00:25<04:22,  3.48it/s]


KeyboardInterrupt: 

### Classifer training_XGB
This part is independent to the MLP classifier

In [None]:
from google.colab import drive
import torch
drive.mount('/content/drive', force_remount=True)
data_path = '/content/drive/My Drive/MSBD5002_project/' # modify this line for your drive

Mounted at /content/drive


In [None]:
import torch.nn as nn
from torch.optim import Adam
from sklearn.metrics import roc_auc_score
import numpy as np
from tqdm import tqdm
import os

In [None]:
# Load the artifact from the drive
training_data = {"attention_scores_a1": torch.tensor([]), "attention_scores_a1_all": torch.tensor([]),
             "attention_scores_a1_out": torch.tensor([]), "attention_scores_qm_all":torch.tensor([]), "attention_scores_qm_out":torch.tensor([])}
for i in range(3):
  data_load = torch.load(data_path + f"attention_research_data/attention_data_divided_lasttry_{3000*(i+1)}.pt") # it may takes around 1 minute
  if i == 0:
    training_data["attention_scores_a1"] = data_load["attention_scores_a1"]
    training_data["attention_scores_a1_all"] = data_load["attention_scores_a1_all"]
    training_data["attention_scores_a1_out"] = data_load["attention_scores_a1_out"]
    training_data["attention_scores_qm_all"] = data_load["attention_scores_qm_all"]
    training_data["attention_scores_qm_out"] = data_load["attention_scores_qm_out"]
  else:
    training_data["attention_scores_a1"] = training_data["attention_scores_a1"] + data_load["attention_scores_a1"]
    training_data["attention_scores_a1_all"] = training_data["attention_scores_a1_all"] + data_load["attention_scores_a1_all"]
    training_data["attention_scores_a1_out"] = training_data["attention_scores_a1_out"] + data_load["attention_scores_a1_out"]
    training_data["attention_scores_qm_all"] = training_data["attention_scores_qm_all"] + data_load["attention_scores_qm_all"]
    training_data["attention_scores_qm_out"] = training_data["attention_scores_qm_out"] + data_load["attention_scores_qm_out"]

labels = []
for i in range(9):
  data_load = torch.load(data_path + f"attention_data_{1000*(i+1)}.pt") # it may takes around 1 minute
  labels.append(data_load['labels'])
labels = np.array(labels).flatten()

In [None]:
training_data['labels'] = labels
roc_labels = np.array(training_data["labels"], dtype=np.float32)
roc_labels.sum()

np.float32(4065.0)

In [None]:
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Reformatting the datasets
attention_scores = np.stack([score.numpy() for score in training_data["attention_scores_a1_out"]])
scaler = StandardScaler()
attention_scores = scaler.fit_transform(attention_scores)
labels = np.array(training_data["labels"], dtype=np.float32)  # 0/1 for binary classification



# Random 80/20 split
train_indices, test_indices = train_test_split(
    np.arange(len(labels)), test_size=0.2, random_state=21, shuffle=True
)

# Split the datasets into trainset and testset
train_attention = attention_scores[train_indices]
train_labels = labels[train_indices]
test_attention = attention_scores[test_indices]
test_labels = labels[test_indices]

# Training for the XGB model

xgb = XGBClassifier(n_estimators=850, learning_rate=0.0081, max_depth= 13, eval_metric='auc', tree_method = "hist", device = "cuda")
xgb.fit(train_attention, train_labels)
test_outputs_xgb = xgb.predict_proba(test_attention)[:, 1]
auroc_xgb = roc_auc_score(test_labels, test_outputs_xgb)
print(f'XGBoost AUROC: {auroc_xgb:.4f}')

XGBoost AUROC: 0.7140 i = 0
XGBoost AUROC: 0.7141 i = 1
XGBoost AUROC: 0.7141 i = 2
XGBoost AUROC: 0.7143 i = 3
XGBoost AUROC: 0.7142 i = 4
XGBoost AUROC: 0.7143 i = 5
XGBoost AUROC: 0.7143 i = 6
XGBoost AUROC: 0.7141 i = 7
XGBoost AUROC: 0.7141 i = 8
XGBoost AUROC: 0.7141 i = 9
XGBoost AUROC: 0.7141 i = 10
XGBoost AUROC: 0.7142 i = 11
XGBoost AUROC: 0.7141 i = 12
XGBoost AUROC: 0.7141 i = 13
XGBoost AUROC: 0.7140 i = 14
XGBoost AUROC: 0.7140 i = 15
XGBoost AUROC: 0.7139 i = 16
XGBoost AUROC: 0.7138 i = 17
XGBoost AUROC: 0.7137 i = 18
XGBoost AUROC: 0.7137 i = 19


In [None]:
# Save the model
xgb.save_model(data_path + f"xgbmodel_attention_research_0.66.json")