In [None]:
!pip install transformers accelerate pandas 

In [None]:
!huggingface-cli login --token ''    

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

model_name = "meta-llama/Llama-3.3-70B-Instruct"
# token=""
MODEL = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype= torch.bfloat16,
    device_map="auto",
)
TOKENIZER = AutoTokenizer.from_pretrained(model_name)


In [None]:
system_prompt = ""


TERMINATORS = [
    TOKENIZER.eos_token_id,
    TOKENIZER.convert_tokens_to_ids("<|endoftext|>")
]

def infer_batch(
    list_of_inputs: list,
    max_new_tokens: int,
    temperature: float,
    top_p:float,
    top_k:float,
    do_sample:bool,
    use_cache:bool
) -> list[str]:

    template_string = [
        TOKENIZER.apply_chat_template(
            [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": example},
            ],
            tokenize=False,
            add_generation_prompt=True,
        )
        for example in list_of_inputs
    ]

    input_ids = TOKENIZER(template_string, return_tensors="pt",padding=True).to(
        MODEL.device
    )

    with torch.no_grad():
        outputs = MODEL.generate(
            **input_ids,
            max_new_tokens=max_new_tokens,
            temperature=temperature,  # Adjusted temperature
            top_p=top_p,        # Adjusted top_p
            top_k=top_k,         # Adjusted top_k
            do_sample=do_sample,
            use_cache=use_cache
            ,
        )
        torch.cuda.empty_cache()

    responses = TOKENIZER.batch_decode(
        outputs[:, input_ids["input_ids"].shape[-1] :], skip_special_tokens=True
    )

    return responses

In [None]:
user_prompt = {
    "summarization": "Summarize the following document:\n## Document Start ##\n{v}\n## Document End ##",
    "open_book_qa": "Answer the following question based on the given context:\n## Context Start ##\n{context}\n## Context End ##\n## Question Start ##\n{question}\n## Question End ##",
    "closed_book_qa": "Answer the following question:\n## Question Start ##\n{v}\n## Question End ##",
}

In [None]:
def process_input_text(input_text, template_type):
    if template_type is None:
        if isinstance(input_text, list) and all(
            isinstance(item, dict) for item in input_text
        ):
            input_text = [
                f"{item['context']}\n{item['question']}" for item in input_text
            ]
    else:
        template = user_prompt[template_type]
        if isinstance(input_text, list) and all(
            isinstance(item, dict) for item in input_text
        ):
            input_text = [template.format(**item) for item in input_text]
        else:
            input_text = [template.format(v=item) for item in input_text]
    return input_text

## Batch

#### Test1

In [None]:
from datasets import load_dataset

# Load the dataset from Hugging Face
dataset = load_dataset('ccdv/pubmed-summarization', split='train')

# Extract the specific entries from the dataset
entry_numbers = [23, 306, 573, 812]
batch_entries = [dataset[i]['article'] for i in entry_numbers]

# Pass the batch to the process_input_text method
process_input = process_input_text(batch_entries, "summarization")

In [None]:
print(process_input[0])

In [None]:
max_new_tokens =1024
temperature=0.7  # Adjusted temperature
top_p=0.9       # Adjusted top_p
top_k=50         

res = infer_batch(process_input, max_new_tokens, temperature,  top_p, top_k,True,True)

In [None]:
for i, r in enumerate(res):
    print(f"Response {i + 1}: {r}")
    print("-"*80)