In [1]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoModel,
    BitsAndBytesConfig,
    TrainingArguments,
    TextIteratorStreamer
)

  from .autonotebook import tqdm as notebook_tqdm


[2023-12-08 16:31:26,288] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
import deepspeed
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import torch
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoTokenizer

base_dir = "llama-linear-layers-all-conv-Dec-08-02"
checkpoint_number = "checkpoint-2"
checkpoint_path = "{}/{}".format(base_dir, checkpoint_number)
model_name = "NousResearch/Llama-2-7b-chat-hf"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, 
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4", 
    bnb_4bit_compute_dtype=compute_dtype
)

original_model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    quantization_config=bnb_config,
    use_cache=False,
    device_map="auto"
)

peft_model = PeftModel.from_pretrained(original_model, checkpoint_path)
peft_model = peft_model.merge_and_unload()
restored_tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00,  3.69s/it]


In [3]:
import pandas as pd
import time

eval_conv = pd.read_csv("../data/eval_dataset.csv", sep="\t")
eval_conv

Unnamed: 0,conversations,tokens
0,Act like Bioinformatician who uses Galaxy plat...,301
1,Act like Bioinformatician who uses Galaxy plat...,410
2,Act like Bioinformatician who uses Galaxy plat...,185
3,Act like Bioinformatician who uses Galaxy plat...,563
4,Act like Bioinformatician who uses Galaxy plat...,173
5,Act like Bioinformatician who uses Galaxy plat...,191
6,Act like Bioinformatician who uses Galaxy plat...,455
7,Act like Bioinformatician who uses Galaxy plat...,565
8,Act like Bioinformatician who uses Galaxy plat...,339


In [4]:
predictions = []
original_instructions = []
ground_truth_answer = []
extracted_answers = []

start_marker = 'Act like'#'[INST]'
end_marker = '[/INST]'
end_tag = ""

s_time = time.time()

for ri, row in eval_conv.iterrows():
    entire_conv = row["conversations"]
    start_index = entire_conv.find(start_marker)
    end_index = entire_conv.find(end_marker)
    instruction = entire_conv[start_index:end_index].strip()
    prompt = entire_conv[start_index:end_index + len(end_marker)].strip()
    original_answer = entire_conv[end_index + len(end_marker): len(entire_conv) - len(end_tag) - 1].strip()
    original_instructions.append(instruction)
    ground_truth_answer.append(original_answer)
    print("Prompt: \n")
    print(prompt)
    print("Instruction: \n")
    print(instruction)
    print()
    print("Ground truth answer: \n")
    print(original_answer)
    print()
    print("encoding prompt number {}...".format(ri+1))
    input_ids = restored_tokenizer.encode(prompt, return_tensors="pt").to('cuda')
    print("generating response number {} ...".format(ri+1))
    outputs = peft_model.generate(input_ids=input_ids, 
        max_new_tokens=256,
        do_sample=True,
    )
    pred = restored_tokenizer.decode(outputs[0])
    extracted_pred = pred[pred.find(end_marker) + len(end_marker): len(pred)].strip()
    predictions.append(pred)
    extracted_answers.append(extracted_pred)
    print("Generated answer: \n")
    print(extracted_pred)
    print()
    print("====================")
    if ri == 1:
        break

#"{}/{}".format(base_dir, checkpoint_number)
output_file_name = "generated_answers_peft_{}_{}".format(base_dir, checkpoint_number)

pred_dataframe = pd.DataFrame(zip(original_instructions, ground_truth_answer, extracted_answers, predictions), columns=["Instructions", "Ground truth answers", "Predicted answers", "Full generated answers"])

pred_dataframe.to_csv("../data/{}.csv".format(output_file_name), sep="\t", index=None)

e_time = time.time()

print("Finished generation in {} seconds".format(e_time - s_time))

Prompt: 

Act like Bioinformatician who uses Galaxy platform for biological data analysis. Understand the following instruction and prepare a suitable response.

[INST] hello, i am trying to assemble a plant genome using ont data with flye. prior to launch the assembly with all my data (about 37 gb in fastqsanger.gz) i recently tried using half of them and the job was successful after 2 days running with a descent genome size obtained. i then launched with all the available sequences (37 gb in fastqsanger.gz) and it is now been running for 5 days. i am just wondering if i am using more ressources than allowed for this tool or if it is still running. what makes me worrying is that now my quota usage now indicates 0% suggesting there might be a bug somewhere ? thank you very much. ben [/INST]
Instruction: 

Act like Bioinformatician who uses Galaxy platform for biological data analysis. Understand the following instruction and prepare a suitable response.

[INST] hello, i am trying to as



Generated answer: 

Hello Ben,

Thank you for reaching out! I'm happy to help you troubleshoot your issue with assembling a plant genome using the Galaxy platform and the Flye tool.

Based on your message, it seems that you have been running the Flye assembly job with all 37 GB of your FASTQ data, and it has been running for 5 days without completing. This suggests that the job is indeed using more resources than expected, possibly due to the large amount of data you are trying to assemble.

Quota usage of 0% could indicate a bug in the system, but it's also possible that the job is simply taking longer than expected to complete due to the large size of your data.

To help you troubleshoot this issue, I have a few questions:

1. Have you checked the Flye documentation to see if there are any recommendations for the minimum amount of memory and resources required for large genome assemblies?
2. How many CPU cores does your machine have? Does the Flye job use multiple CPU cores, or does 

In [None]:
### Deepspeed

'''config = {
    #"kernel_inject": True,
    #"tensor_parallel": {"tp_size": 4},
    "dtype": torch.float16 #"fp16",
    #"enable_cuda_graph": False
}

ds_engine = deepspeed.init_inference(peft_model, config=config) #config=config
deepspeed_peft_model = ds_engine.module'''