In [1]:
DATA_PATH = "./input"
MODEL_NAME = "Qwen/Qwen2.5-14B"
OUTPUT_PATH = "."
MODEL_LORA_PATH = f"{OUTPUT_PATH}/output_retrieval/checkpoint-13240"
MODEL_OUTPUT_PATH = f"{OUTPUT_PATH}/output_retrieval/output_merge"

In [2]:
!pip install -q datasets
!pip install -q -U bitsandbytes
!pip install transformers==4.45.0
!pip install sentence-transformers==3.1.1



In [4]:
import torch

from transformers import Qwen2Model, Qwen2PreTrainedModel, Qwen2Config, AutoTokenizer
from transformers.models.qwen2.modeling_qwen2 import (
    Qwen2DecoderLayer,
    Qwen2RMSNorm,
    Qwen2Attention,
    Qwen2FlashAttention2,
    Qwen2SdpaAttention,
    Qwen2MLP,
    Qwen2RotaryEmbedding
)
from torch import nn

from peft import PeftModel

class ModifiedQwen2Attention(Qwen2Attention):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.is_causal = False


class ModifiedQwen2FlashAttention2(Qwen2FlashAttention2):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.is_causal = False


class ModifiedQwen2SdpaAttention(Qwen2SdpaAttention):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.is_causal = False


QWEN2_ATTENTION_CLASSES = {
    "eager": ModifiedQwen2Attention,
    "flash_attention_2": ModifiedQwen2FlashAttention2,
    "sdpa": ModifiedQwen2SdpaAttention,
}


class ModifiedQwen2DecoderLayer(Qwen2DecoderLayer):
    def __init__(self, config: Qwen2Config, layer_idx: int):
        nn.Module.__init__(self)
        self.hidden_size = config.hidden_size

        self.self_attn = QWEN2_ATTENTION_CLASSES[config._attn_implementation](
            config=config, layer_idx=layer_idx
        )

        self.mlp = Qwen2MLP(config)
        self.input_layernorm = Qwen2RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.post_attention_layernorm = Qwen2RMSNorm(
            config.hidden_size, eps=config.rms_norm_eps
        )


class Qwen2BiModel(Qwen2Model):
    _no_split_modules = ["ModifiedQwen2DecoderLayer"]

    def __init__(self, config: Qwen2Config):
        Qwen2PreTrainedModel.__init__(self, config)
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size

        self.embed_tokens = nn.Embedding(
            config.vocab_size, config.hidden_size, self.padding_idx
        )
        self.layers = nn.ModuleList(
            [
                ModifiedQwen2DecoderLayer(config, layer_idx)
                for layer_idx in range(config.num_hidden_layers)
            ]
        )
        self._attn_implementation = config._attn_implementation
        self.norm = Qwen2RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
        self.rotary_emb = Qwen2RotaryEmbedding(config=config)

        self.gradient_checkpointing = False
        # Initialize weights and apply final processing
        self.post_init()

In [5]:
base_model = Qwen2BiModel.from_pretrained(
    MODEL_NAME,
    device_map="auto", #if torch.cuda.is_available() else "cpu",
    torch_dtype=torch.bfloat16,
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [11]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [7]:
model = PeftModel.from_pretrained(base_model, MODEL_LORA_PATH)

In [8]:
model = model.merge_and_unload()
model

Qwen2BiModel(
  (embed_tokens): Embedding(152064, 5120)
  (layers): ModuleList(
    (0-47): 48 x ModifiedQwen2DecoderLayer(
      (self_attn): ModifiedQwen2SdpaAttention(
        (q_proj): Linear(in_features=5120, out_features=5120, bias=True)
        (k_proj): Linear(in_features=5120, out_features=1024, bias=True)
        (v_proj): Linear(in_features=5120, out_features=1024, bias=True)
        (o_proj): Linear(in_features=5120, out_features=5120, bias=False)
        (rotary_emb): Qwen2RotaryEmbedding()
      )
      (mlp): Qwen2MLP(
        (gate_proj): Linear(in_features=5120, out_features=13824, bias=False)
        (up_proj): Linear(in_features=5120, out_features=13824, bias=False)
        (down_proj): Linear(in_features=13824, out_features=5120, bias=False)
        (act_fn): SiLU()
      )
      (input_layernorm): Qwen2RMSNorm((5120,), eps=1e-05)
      (post_attention_layernorm): Qwen2RMSNorm((5120,), eps=1e-05)
    )
  )
  (norm): Qwen2RMSNorm((5120,), eps=1e-05)
  (rotary_emb): Q

In [9]:
model.save_pretrained(MODEL_OUTPUT_PATH)

In [12]:
tokenizer.save_pretrained(MODEL_OUTPUT_PATH)

('/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/tokenizer_config.json',
 '/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/special_tokens_map.json',
 '/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/vocab.json',
 '/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/merges.txt',
 '/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/added_tokens.json',
 '/content/drive/MyDrive/kaggle-eedi/exp/exp017_full/output_retrieval/output_merge/tokenizer.json')

In [13]:
!ls -l $MODEL_OUTPUT_PATH

total 27342699
-rw------- 1 root root        605 Dec 11 15:50 added_tokens.json
-rw------- 1 root root        721 Dec 11 15:48 config.json
-rw------- 1 root root    1671853 Dec 11 15:50 merges.txt
-rw------- 1 root root 4986210800 Dec 11 15:49 model-00001-of-00006.safetensors
-rw------- 1 root root 4954846696 Dec 11 15:49 model-00002-of-00006.safetensors
-rw------- 1 root root 4954846744 Dec 11 15:49 model-00003-of-00006.safetensors
-rw------- 1 root root 4954846744 Dec 11 15:50 model-00004-of-00006.safetensors
-rw------- 1 root root 4954846744 Dec 11 15:50 model-00005-of-00006.safetensors
-rw------- 1 root root 3177397264 Dec 11 15:50 model-00006-of-00006.safetensors
-rw------- 1 root root      43946 Dec 11 15:50 model.safetensors.index.json
-rw------- 1 root root        616 Dec 11 15:50 special_tokens_map.json
-rw------- 1 root root       7229 Dec 11 15:50 tokenizer_config.json
-rw------- 1 root root   11421896 Dec 11 15:50 tokenizer.json
-rw------- 1 root root    2776833 Dec 11 15:5