## Fine-tuning using dora

Model: deepseek
Dataset: financialPhraseBank - from finbert 
Techniques: dora
- data augmentation (to consider if data is too little)
Using ollama

Load data

### TRAINING STARTS HERE

In [3]:
from huggingface_hub import notebook_login

In [4]:
"""
import wandb
wandb.login()
"""

'\nimport wandb\nwandb.login()\n'

In [5]:
%env HF_HUB_ENABLE_HF_TRANSFER=True

env: HF_HUB_ENABLE_HF_TRANSFER=True


In [6]:
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
cache_dir = "/home/m/mehrad/brikiyou/scratch/huggingface_cache" # To add 

In [8]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16, 
)

model = AutoModelForCausalLM.from_pretrained( # Load with vllm - todo
    model_id,
    #quantization_config=bnb_config,
    device_map="auto",
    #trust_remote_code=False
    torch_dtype=torch.bfloat16, # if A100 GPU Available     
    cache_dir=cache_dir
)

# Look at tokenizer - to adapt to dataset
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    cache_dir=cache_dir,
    trust_remote_code=True
)


Could not cache non-existence of file. Will ignore error and continue. Error: [Errno 30] Read-only file system: '/home/m/mehrad/brikiyou/.cache/huggingface/hub/models--deepseek-ai--DeepSeek-R1-Distill-Qwen-7B'


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


### Prepare for Dora fine-tuning

In [9]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable() # comment out if saving vram

In [10]:
print(model)

Qwen2ForCausalLM(
  (model): Qwen2Model(
    (embed_tokens): Embedding(152064, 3584)
    (layers): ModuleList(
      (0-27): 28 x Qwen2DecoderLayer(
        (self_attn): Qwen2Attention(
          (q_proj): Linear(in_features=3584, out_features=3584, bias=True)
          (k_proj): Linear(in_features=3584, out_features=512, bias=True)
          (v_proj): Linear(in_features=3584, out_features=512, bias=True)
          (o_proj): Linear(in_features=3584, out_features=3584, bias=False)
        )
        (mlp): Qwen2MLP(
          (gate_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (up_proj): Linear(in_features=3584, out_features=18944, bias=False)
          (down_proj): Linear(in_features=18944, out_features=3584, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
        (post_attention_layernorm): Qwen2RMSNorm((3584,), eps=1e-06)
      )
    )
    (norm): Qwen2RMSNorm((3584,), eps=1e-06)
    (rotary_emb):

In [13]:
from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj", # To look for for self attention & mlp without act_fn
                    "lora_magintude_vector"], # DoRa 
    lora_dropout=0.01, # To tune
    bias="none",
    task_type="CAUSAL_LM",
    use_dora=True,
)


In [14]:
model = get_peft_model(model, peft_config)

OSError: [Errno 30] Read-only file system: '/home/m/mehrad/brikiyou/.triton'

## Tokenizer

In [11]:
print(tokenizer)
print(tokenizer.special_tokens_map)
print(tokenizer.vocab_size)
print(tokenizer.eos_token_id)
print(tokenizer.pad_token_id)
print(tokenizer.bos_token)


LlamaTokenizerFast(name_or_path='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B', vocab_size=151643, model_max_length=16384, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<｜begin▁of▁sentence｜>', 'eos_token': '<｜end▁of▁sentence｜>', 'pad_token': '<｜end▁of▁sentence｜>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	151643: AddedToken("<｜end▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151644: AddedToken("<｜User｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151645: AddedToken("<｜Assistant｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151646: AddedToken("<｜begin▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	151647: AddedToken("<|EOT|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),
	151648: AddedToken("<think>", rstrip=False, 

In [None]:
print(tokenizer.chat_template) # to update


messages = [
    {'role':'user', 'content':'Hello, how are you?'},
    {'role':'assistant', 'content':'I am fine, thank you!'},
    {'role':'user', 'content':'What is your name?'},
]


inputs = tokenizer.apply_chat_template(messages, tokenize=False)
print(inputs)


# TODO:
# - Check padding tokinzer, unk token to use or manually do i 