In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoModel,
    BitsAndBytesConfig,
    TrainingArguments,
)

model_name = "NousResearch/Llama-2-7b-chat-hf"
compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    use_cache=False,
    device_map="auto"
)

  from .autonotebook import tqdm as notebook_tqdm


[2023-11-22 17:06:43,509] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:03<00:00,  1.98s/it]


In [2]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoTokenizer

In [3]:
import deepspeed
checkpoint_number = "checkpoint-2"
checkpoint_path = "llama/{}".format(checkpoint_number)
model_path = ""

peft_model = PeftModel.from_pretrained(model, checkpoint_path)
#restored_model = AutoModel.from_pretrained(checkpoint_path, torch_dtype = "auto")

In [4]:
config = {
    #"kernel_inject": True,
    #"tensor_parallel": {"tp_size": 4},
    "dtype": "fp16",
    #"enable_cuda_graph": False
}

ds_engine = deepspeed.init_inference(peft_model, config=config)
deepspeed_peft_model = ds_engine.module

[2023-11-22 17:07:04,144] [INFO] [logging.py:96:log_dist] [Rank -1] DeepSpeed info: version=0.12.3, git-hash=unknown, git-branch=unknown
[2023-11-22 17:07:04,147] [INFO] [logging.py:96:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1


In [5]:
deepspeed_peft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096, padding_idx=0)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bia

In [6]:
restored_tokenizer = AutoTokenizer.from_pretrained(checkpoint_path) #checkpoint_path #model_name

In [7]:
import pandas as pd
import time

eval_conv = pd.read_csv("../data/eval_dataset.csv", sep="\t")
eval_conv

Unnamed: 0,conversations,tokens
0,"\n\n<s>[INST] Hi there, I have seen several po...",364
1,\n\n<s>[INST] Need an AI ML based variant call...,90
2,"\n\n<s>[INST] Hello,\nI am trying to assemble ...",281
3,"\n\n<s>[INST] Hello,\nI am developing galaxy t...",465
4,\n\n<s>[INST] I am trying to do the minimap2 w...,166
5,"\n\n<s>[INST] Hi all,\n\nScreen Shot 2022-05-0...",640
6,\n\n<s>[INST] Starting server in PID 30796.\ns...,768
7,"\n\n<s>[INST] Hi,\nI have two assembled fasta ...",157
8,\n\n<s>[INST] I’m a relatively savvy Galaxy us...,174
9,\n\n<s>[INST] The QualiMap Multi-Sample BamQC ...,490


In [8]:
predictions = []
original_instructions = []
ground_truth_answer = []
extracted_answers = []

start_marker = '<s>[INST]'
end_marker = '[/INST]'
end_tag = "</s>"

s_time = time.time()

for ri, row in eval_conv.iterrows():
    entire_conv = row["conversations"]
    start_index = entire_conv.find(start_marker)
    end_index = entire_conv.find(end_marker)
    instruction = entire_conv[start_index + len(start_marker):end_index].strip()
    prompt = entire_conv[start_index:end_index + len(end_marker)].strip()
    original_answer = entire_conv[end_index + len(end_marker): len(entire_conv) - len(end_tag) - 1].strip()
    original_instructions.append(instruction)
    ground_truth_answer.append(original_answer)
    print("encoding prompt number {}...".format(ri+1))
    input_ids = restored_tokenizer.encode(prompt, return_tensors="pt").to('cuda')
    print("generating response number {} ...".format(ri+1))
    outputs = deepspeed_peft_model.generate(input_ids=input_ids, 
        max_new_tokens=256,
        do_sample=True,
    )
    pred = restored_tokenizer.decode(outputs[0])
    extracted_pred = pred[pred.find(end_marker) + len(end_marker): len(pred)].strip()
    predictions.append(pred)
    extracted_answers.append(extracted_pred)
    print("Instruction: \n")
    print(instruction)
    print()
    print("Ground truth answer: \n")
    print(original_answer)
    print()
    print("Generated answer: \n")
    print(extracted_pred)
    print()
    print("====================")
    if ri == 5:
        break

output_file_name = "generated_answers_peft_{}".format(checkpoint_number)
pred_dataframe = pd.DataFrame(zip(original_instructions, ground_truth_answer, extracted_answers, predictions), columns=["Instructions", "Ground truth answers", "Predicted answers", "Full generated answers"])
pred_dataframe.to_csv("../data/{}.csv".format(output_file_name), sep="\t", index=None)

e_time = time.time()

print("Finished generation in {} seconds".format(e_time - s_time))

encoding prompt number 1...
generating response number 1 ...




Instruction: 

Hi there, I have seen several posts pertaining to annotation of the DESEQ2 output GeneID with gene name (ENSRNOG00000046834 etc) and for mice and humans it works easily with the AnnotateMyIDs tool.  I work with rat tissue.  I have installed this AnnotateMyIDs on our server, but it does not support rats. I have tried to fiddle with the code to get it to accept rats too, but have failed so far and am worried about stuffing the server!
Is there a way to add rat support to this, as admin?  IF not is it possible to make a feature request somewhere for this? …It was developed by IUC. I don’t know who that is. I ran  Uniprot mapping and this gives a list of the converted gene names, but not in the same file (unless I did it wrong). Is there an easier way? Thanks.
I have written a program to do this in Python outside of Galaxy. Be nice to all be inside though!

Ground truth answer: 

I’ve added rat to annotatemyids and submitted a PR to the IUC repo here, let’s see if it’s accep

KeyboardInterrupt: 