In [1]:
import transformers
import textwrap
from transformers import LlamaTokenizer, LlamaForCausalLM
import os
import re
import sys
from typing import List
import json

from peft import (
    LoraConfig,
    get_peft_model,
    get_peft_model_state_dict,
    prepare_model_for_int8_training,
)

import torch
from datasets import load_dataset
import pandas as pd
 
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from pylab import rcParams

# Update this path to wherever tensorboard is installed in your environment
os.environ['TENSORBOARD_BINARY'] = '/opt/conda/envs/py310/bin/tensorboard'


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 110
CUDA SETUP: Loading binary /opt/conda/envs/py310/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda110.so...


  warn(msg)


In [2]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

In [3]:
BASE_MODEL = "decapoda-research/llama-13b-hf"

model = LlamaForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=True,
    torch_dtype=torch.float16,
    device_map="auto",
)

tokenizer = LlamaTokenizer.from_pretrained(BASE_MODEL)

tokenizer.pad_token_id = (
    0  # unk. we want this to be different from the eos token
)
tokenizer.padding_side = "left"

Downloading (…)lve/main/config.json:   0%|          | 0.00/427 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/31.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/41 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00002-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00003-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00004-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00005-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00006-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00007-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00008-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00009-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00010-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00011-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00012-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00013-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00014-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00015-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00016-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00017-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00018-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00019-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00020-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00021-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00022-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00023-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00024-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00025-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00026-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00027-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00028-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00029-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00030-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00031-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00032-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00033-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00034-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00035-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00036-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00037-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00038-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00039-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00040-of-00041.bin:   0%|          | 0.00/952M [00:00<?, ?B/s]

Downloading (…)l-00041-of-00041.bin:   0%|          | 0.00/983M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/41 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/141 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [44]:
CUTOFF_LEN = 512

In [9]:
jsons = []
with open("../data/psychology_data.jsonl", "r") as f:
    for line in f:
        jsons.append(json.loads(line))

In [51]:
def split_text_into_chunks(text):
    # Split the text into sentences
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    
    chunks = []
    current_chunk = ""
    
    for sentence in sentences:
        # Check if adding the current sentence exceeds the maximum chunk length
        if len(current_chunk) + len(sentence) > CUTOFF_LEN:
            chunks.append(current_chunk.strip())  # Add the current chunk to the list of chunks
            current_chunk = ""  # Reset the current chunk
        
        current_chunk += " " + sentence + " "  # Add the sentence to the current chunk
    
    if current_chunk:
        chunks.append(current_chunk.strip())  # Add the remaining chunk to the list of chunks
    
    return chunks

prompts = []
for book in jsons:
    splitted_text = split_text_into_chunks(book['text'].strip())
    if len(splitted_text):
        for i, c in enumerate(splitted_text):
            prompts.append({
                'id': f"text_id{book['text_id']}_chunk_id{i}",
                'text': c
            })

with open("../data/psychology_prompts.json", "w") as f:
    json.dump(prompts, f)

In [52]:
data = load_dataset("json", data_files="../data/psychology_prompts.json")
data["train"]

Downloading and preparing dataset json/default to /home/jupyter/.cache/huggingface/datasets/json/default-4ad2c1236d91ef23/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /home/jupyter/.cache/huggingface/datasets/json/default-4ad2c1236d91ef23/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['id', 'text'],
    num_rows: 2433
})

In [54]:
data['train']

Dataset({
    features: ['id', 'text'],
    num_rows: 2433
})

In [55]:
def tokenize(prompt, add_eos_token=True):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=CUTOFF_LEN,
        padding=False,
        return_tensors=None,
    )
    if (
        result["input_ids"][-1] != tokenizer.eos_token_id
        and len(result["input_ids"]) < CUTOFF_LEN
        and add_eos_token
    ):
        result["input_ids"].append(tokenizer.eos_token_id)
        result["attention_mask"].append(1)
 
    result["labels"] = result["input_ids"].copy()
 
    return result
 
def generate_and_tokenize_prompt(text):
    return tokenize(text['text'])

In [56]:
train_val = data["train"].train_test_split(
    test_size=500, shuffle=True, seed=42
)
train_data = (
    train_val["train"].map(generate_and_tokenize_prompt)
)
val_data = (
    train_val["test"].map(generate_and_tokenize_prompt)
)

Map:   0%|          | 0/1933 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

In [66]:
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT= 0.05
LORA_TARGET_MODULES = [
    "q_proj",
    "v_proj",
]

BATCH_SIZE = 128
MICRO_BATCH_SIZE = 4
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
LEARNING_RATE = 3e-4
EPOCHS = 5
OUTPUT_DIR = "experiments"  # Path to where your training model will be saved

In [67]:
model = prepare_model_for_int8_training(model)
config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=LORA_TARGET_MODULES,
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
model.print_trainable_parameters()

In [None]:
training_arguments = transformers.TrainingArguments(
    per_device_train_batch_size=MICRO_BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    warmup_steps=100,
    num_train_epochs=EPOCHS,
    learning_rate=LEARNING_RATE,
    fp16=True,
    logging_steps=5,
    optim="adamw_torch",
    evaluation_strategy="steps",
    save_strategy="steps",
    eval_steps=1,
    save_steps=1,
    output_dir=OUTPUT_DIR,
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to="tensorboard"
)

In [63]:
data_collator = transformers.DataCollatorForSeq2Seq(
    tokenizer, pad_to_multiple_of=8, return_tensors="pt", padding=True
)

In [64]:
trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=training_arguments,
    data_collator=data_collator
)
model.config.use_cache = False
old_state_dict = model.state_dict
model.state_dict = (
    lambda self, *_, **__: get_peft_model_state_dict(
        self, old_state_dict()
    )
).__get__(model, type(model))
 
model = torch.compile(model)

In [65]:
trainer.train()



A: torch.Size([1344, 5120]), B: torch.Size([5120, 5120]), C: (1344, 5120); (lda, ldb, ldc): (c_int(43008), c_int(163840), c_int(43008)); (m, n, k): (c_int(1344), c_int(5120), c_int(5120))
cuBLAS API failed with status 15
error detected

In [14]:
model.save_pretrained(OUTPUT_DIR)

In [17]:
from huggingface_hub import notebook_login
 
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [18]:
model.push_to_hub("kmnis/DocScribe", use_auth_token=True)

CommitInfo(commit_url='https://huggingface.co/kmnis/medVicuna/commit/dd0cae86dbe50dba43f772ae55d8faae0fc69a83', commit_message='Upload model', commit_description='', oid='dd0cae86dbe50dba43f772ae55d8faae0fc69a83', pr_url=None, pr_revision=None, pr_num=None)

In [2]:
%load_ext tensorboard
%tensorboard --logdir experiments/runs