In [2]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

MODEL_NAME = "IlyaGusev/saiga_mistral_7b_lora"

# Загружаем модель
config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	torch_dtype=torch.float16,
	device_map="auto",
    offload_folder="offload", 
)
model = PeftModel.from_pretrained(
	model,
	MODEL_NAME,
	torch_dtype=torch.float16,
    offload_folder="offload", 
)
model.eval()
model.to('cuda')

# Определяем токенайзер
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)

# Функция для обработки запросов
def generate(model, tokenizer, prompt, generation_config):
	data = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
	data = {k: v.to(model.device) for k, v in data.items()}
	output_ids = model.generate(
    	**data,
    	generation_config=generation_config
	)[0]
	output_ids = output_ids[len(data["input_ids"][0]):]
	output = tokenizer.decode(output_ids, skip_special_tokens=True)
	return output.strip()

# # Формируем запрос
# PROMT_TEMPLATE = '<s>system\nТы — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им.</s><s>user\n{inp}</s><s>bot\n'
# inp = 'Какое расстояние до Луны?'
# prompt = PROMT_TEMPLATE.format(inp=inp)

# # Отправляем запрос в llm
# output = generate(model, tokenizer, prompt, generation_config)

# print(output)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading tokenizer_config.json:   0%|          | 0.00/1.69k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

Downloading added_tokens.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/101 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
generation_config

GenerationConfig {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "max_new_tokens": 1536,
  "no_repeat_ngram_size": 15,
  "pad_token_id": 0,
  "repetition_penalty": 1.1,
  "temperature": 0.2,
  "top_k": 40,
  "top_p": 0.9
}

In [4]:
# Формируем запрос
PROMT_TEMPLATE = '<s>system\nТы — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им.</s><s>user\n{inp}</s><s>bot\n'
inp = 'Какое расстояние до Луны?'
prompt = PROMT_TEMPLATE.format(inp=inp)

# Отправляем запрос в llm
output = generate(model, tokenizer, prompt, generation_config)

print(output)

Расстояние от Земли до Луны варьируется от 356 400 км (221 340 миль) до 405 400 км (252 000 миль). Это зависит от фазы Луны и её положения относительно Земли.


---

In [1]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

MODEL_NAME = "IlyaGusev/saiga_mistral_7b_lora"

# Загружаем модель
config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
	config.base_model_name_or_path,
	torch_dtype=torch.float16,
	device_map="auto",
    offload_folder="offload", 
)
model = PeftModel.from_pretrained(
	model,
	MODEL_NAME,
	torch_dtype=torch.float16,
    offload_folder="offload", 
)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
sum(map(lambda el: el.shape[0] * el.shape[1] if len(el.shape) == 2 else el.shape[0], model.parameters()))

7255379968

In [3]:
list(map(lambda el: el.shape, model.parameters()))

[torch.Size([32002, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([16, 4096]),
 torch.Size([4096, 16]),
 torch.Size([1024, 4096]),
 torch.Size([16, 4096]),
 torch.Size([1024, 16]),
 torch.Size([1024, 4096]),
 torch.Size([16, 4096]),
 torch.Size([1024, 16]),
 torch.Size([4096, 4096]),
 torch.Size([16, 4096]),
 torch.Size([4096, 16]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([16, 4096]),
 torch.Size([4096, 16]),
 torch.Size([1024, 4096]),
 torch.Size([16, 4096]),
 torch.Size([1024, 16]),
 torch.Size([1024, 4096]),
 torch.Size([16, 4096]),
 torch.Size([1024, 16]),
 torch.Size([4096, 4096]),
 torch.Size([16, 4096]),
 torch.Size([4096, 16]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([16, 4096]),
 torch.Size([4096, 16]),
 torch.Size([1024, 4096]),


In [5]:
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): MistralForCausalLM(
      (model): MistralModel(
        (embed_tokens): Embedding(32002, 4096)
        (layers): ModuleList(
          (0-31): 32 x MistralDecoderLayer(
            (self_attn): MistralAttention(
              (q_proj): lora.Linear(
                (base_layer): Linear(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear(
                (base_layer): Linear(in_featu

---

In [1]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

MODEL_NAME = "Open-Orca/Mistral-7B-OpenOrca"

# Загружаем модель
# config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto",
    offload_folder="offload", 
)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [2]:
sum(map(lambda el: el.shape[0] * el.shape[1] if len(el.shape) == 2 else el.shape[0], model.parameters()))

7241748480

In [3]:
list(map(lambda el: el.shape, model.parameters()))

[torch.Size([32002, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([1024, 4096]),
 torch.Size([4096, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([14336, 4096]),
 torch.Size([4096, 14336]),
 torch.Size([4096]),
 torch.Size([4096]),
 torch.Size([4096, 4096]),
 torch.Si

In [4]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32002, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRM

In [5]:
7255379968 - 7241748480

13631488