## Instructions
To successfully run the following notebook, in the output of the first cell you need to insert a valid Llama2 access token. This token can be requested on their model card (https://huggingface.co/meta-llama/Llama-2-13b-hf).

A GPU is highly recommended.

In [2]:
import os
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BitsAndBytesConfig, TrainingArguments, Trainer
from torch.utils.data import Dataset
import torch
import pandas as pd
import evaluate
import numpy as np  
import pandas as pd
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, TaskType
import bitsandbytes as bnb
import datasets
from datasets import load_dataset
import transformers
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
# Count GPUs
print(torch.cuda.device_count())


1


In [4]:
# Count GPUs
print("Available GPUs:", torch.cuda.device_count())

# Set CUDA_VISIBLE_DEVICES to use only GPU 0
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

# Count GPUs again to see if the change took effect
print("Available GPUs after setting CUDA_VISIBLE_DEVICES:", torch.cuda.device_count())

# Move the model to the appropriate device
device = torch.device("cuda:1") 
print("Using device:", device)

Available GPUs: 1
Available GPUs after setting CUDA_VISIBLE_DEVICES: 1
Using device: cuda:1


In [5]:
# change model_id to either 7b or 13b
model_id = "meta-llama/Llama-2-13b-hf" # "meta-llama/Llama-2-7b-hf" 
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({"pad_token":"<pad>"}) 

1

In [6]:
bot_train = datasets.load_from_disk("../../../dataset/bot_ds")["train"] # change 
no_bot_train = datasets.load_from_disk("../../../dataset/no_bot_ds")["train"] # change 
bot_train = bot_train.add_column("output", [1]*len(bot_train))  
no_bot_train = no_bot_train.add_column("output", [0]*len(no_bot_train))
train_dataset = datasets.concatenate_datasets([bot_train, no_bot_train])
train_dataset = train_dataset.shuffle(10)
train_sentences, train_labels = train_dataset['text'], train_dataset['output']

In [7]:
bot_val = datasets.load_from_disk("../../../dataset/bot_ds")["val"] # change 
no_bot_val = datasets.load_from_disk("../../../dataset/no_bot_ds")["val"] # change 
bot_val = bot_val.add_column("output", [1]*len(bot_val))  
no_bot_val = no_bot_val.add_column("output", [0]*len(no_bot_val)) 
val_dataset = datasets.concatenate_datasets([bot_val, no_bot_val])
val_dataset = val_dataset.shuffle(10)
val_sentences, val_labels = val_dataset['text'], val_dataset['output']

In [8]:
# Split into training and validation sets
train_encodings = tokenizer(train_sentences, truncation=True, max_length=512, padding='max_length', return_attention_mask=True)
val_encodings = tokenizer(val_sentences, truncation=True, max_length=512, padding='max_length', return_attention_mask=True)

In [9]:
# Create torch dataset
class CustomDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [10]:
# Prepare datasets
train_dataset = CustomDataset(train_encodings, train_labels)
val_dataset = CustomDataset(val_encodings, val_labels)

In [11]:
len(train_dataset)

12688

In [12]:
len(val_dataset)

3173

In [13]:
### Prepare model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float32  #bfloat
)

model = AutoModelForSequenceClassification.from_pretrained(model_id, 
                                                        quantization_config=bnb_config,
                                                        trust_remote_code=True,
                                                        num_labels=len(set(train_labels)))

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-13b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
model.resize_token_embeddings(len(tokenizer))

Embedding(32001, 5120)

In [15]:
model.config.pad_token_id = tokenizer.pad_token_id

In [16]:
model = prepare_model_for_kbit_training(model)

In [17]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [18]:
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [19]:
modules = find_all_linear_names(model)
modules

['q_proj', 'k_proj', 'down_proj', 'up_proj', 'v_proj', 'gate_proj', 'o_proj']

In [20]:
model_directory = "./models/"

config = LoraConfig(
    r=16, 
    lora_alpha=64, 
    target_modules=modules, 
    lora_dropout=0.4, 
    bias="none", 
    task_type=TaskType.SEQ_CLS
)

model = get_peft_model(model, config)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)

   # Calculate precision, recall, and F1-score
    precision = precision_score(labels, preds, average='binary')
    recall = recall_score(labels, preds, average='binary')
    f1 = f1_score(labels, preds, average='binary')
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

training_args = TrainingArguments(
    logging_dir=model_directory, 
    output_dir=model_directory,    # change     
    evaluation_strategy='epoch', 
    load_best_model_at_end = True,
    logging_steps = 100, 
    logging_strategy="epoch",
    report_to="none",
    per_device_train_batch_size = 8,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    save_total_limit=2,
    save_strategy= "epoch",
    optim="paged_adamw_32bit", 
    learning_rate = 2e-5,
    fp16 = True, 
    push_to_hub=False,
)
print_trainable_parameters(model)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)
model.config.use_cache = False  # silence the warnings. please re-enable for inference!
do_train = True

def log_memory_usage():
    mem_alloc = torch.cuda.memory_allocated() / 1024**3  # Memory usage in GB
    print(f"Memory Usage: {mem_alloc:.2f} GB")

# Launch training and log metrics
print("Training...")


if do_train:
    train_result = trainer.train()
    metrics = train_result.metrics
    trainer.log_metrics("train", metrics)
    trainer.save_metrics("train", metrics)
    trainer.save_state()
    log_memory_usage()
    print(metrics)



trainable params: 62597120 || all params: 6570752000 || trainable%: 0.9526629524291892
Training...




Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.1314,0.070825,0.981406,0.996139,0.966896,0.9813
2,0.0191,0.046219,0.988969,0.991217,0.986883,0.989045



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-13b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-13b-hf is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-2-13b-hf.

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-13b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-13b-hf is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-2-13b-hf.


***** train metrics *****
  epoch                    =         2.0
  total_flos               = 925724286GF
  train_loss               =      0.0753
  train_runtime            =  3:29:18.60
  train_samples_per_second =       2.021
  train_steps_per_second   =       0.253
Memory Usage: 7.31 GB
{'train_runtime': 12558.608, 'train_samples_per_second': 2.021, 'train_steps_per_second': 0.253, 'total_flos': 9.93988883471401e+17, 'train_loss': 0.07526023932092578, 'epoch': 2.0}


In [21]:
# Save model
print("Saving last checkpoint of the model...")
trainer.model.save_pretrained(model_directory, safe_serialization=False)

# Free memory for merging weights
del model
del trainer
torch.cuda.empty_cache()

Saving last checkpoint of the model...



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-13b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-13b-hf is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-2-13b-hf.


In [23]:
import torch
from peft import PeftConfig, PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, AutoModelForSequenceClassification

PEFT_MODEL = model_directory #+ "best-model"
config = PeftConfig.from_pretrained(PEFT_MODEL)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForSequenceClassification.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
    num_labels=2
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.add_special_tokens({"pad_token":"<pad>"}) 
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id

model = PeftModel.from_pretrained(model, PEFT_MODEL)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-13b-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
#Load Testdata 
bot_test = datasets.load_from_disk("../../../dataset/bot_ds")["test"]
no_bot_test = datasets.load_from_disk("../../../dataset/no_bot_ds")["test"]
bot_test = bot_test.add_column("output", [1]*len(bot_test))
no_bot_test = no_bot_test.add_column("output", [0]*len(no_bot_test))
test_dataset = datasets.concatenate_datasets([bot_test, no_bot_test])
test_dataset = test_dataset.shuffle(10)
test_sentences, test_labels = test_dataset['text'], test_dataset['output']

In [25]:
from tqdm import tqdm
import evaluate
metric = evaluate.load("f1")

preds = []
for sentence in tqdm(test_sentences, desc="Evaluation"):
    input = tokenizer(sentence, return_tensors='pt', truncation=True, max_length=512)
    pred = model(**input) 
    preds.append(pred.logits.argmax().item())

metric.compute(predictions=preds, references=test_labels, average='binary')["f1"]

Evaluation: 100%|██████████| 3197/3197 [06:05<00:00,  8.75it/s]


0.957556270096463