In [2]:
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load data
file_path = 'manual_testing_hebrew_llm.xlsx'
data = pd.read_excel(file_path)

# Initialize model and tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained("dicta-il/dictalm2.0-instruct", torch_dtype=torch.bfloat16,
                                             device_map=device)
tokenizer = AutoTokenizer.from_pretrained("dicta-il/dictalm2.0-instruct")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.43k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/513k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.86M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [11]:
# IF SYSTEM PROMPT IS AVAILABLE

def generate_prompt(row):
    # Extract the system prompt from the row
    system_prompt = row['prompt']

    # Generate user content based on the mission number
    if row['mission'] == 2:
        user_content = f"Article:\n{row['text1']}\nSubtitles:\n{row['text2']}"
    elif row['mission'] == 3:
        # For mission 3, use text2 as the user question
        user_content = f"Question:\n{row['text2']}"
    elif row['mission'] == 4:
        user_content = f"Paragraph:\n{row['text1']}\nAnswer:\n{row['text2']}"
    elif row['mission'] == 5:
        user_content = f"Article:\n{row['text1']}"
    else:
        raise ValueError(f"Unsupported mission type: {row['mission']}")

    # Combine system and user messages
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content}
    ]

    return messages


# Function to generate text using the model with system and user messages
def generate_text(messages):
    # Extract system and user messages for clarity
    system_message = messages[0]['content']
    user_message = messages[1]['content']

    # Add instruction tokens around the system and user messages
    messages_with_inst = f"[INST] {system_message} [/INST] {user_message} [/INST]"

    # Tokenize and encode the messages with instructions
    encoded = tokenizer(messages_with_inst, return_tensors="pt").to(device)

    # Generate response from the model
    generated_ids = model.generate(encoded['input_ids'], max_new_tokens=500, do_sample=True)

    # Decode the generated response
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    # Return the formatted output with system and user content
    return {
        "system_prompt": system_message,
        "user_content": user_message,
        "model_output": decoded_output
    }


In [None]:
#IF SYSTEM PROMPT IS NOT AVAILABLE

def generate_prompt_no_system(row):
    prompt_context = row['prompt']

    # Generate user content based on the mission number
    if row['mission'] == 2:
        user_content = f"{prompt_context}\nArticle:\n{row['text1']}\nSubtitles:\n{row['text2']}"
    elif row['mission'] == 3:
        user_content = f"{prompt_context}\nParagraph:\n{row['text1']}\nQuestion:\n{row['text2']}"
    elif row['mission'] == 4:
        user_content = f"{prompt_context}\nParagraph:\n{row['text1']}\nAnswer:\n{row['text2']}"
    elif row['mission'] == 5:
        user_content = f"{prompt_context}\nArticle:\n{row['text1']}"
    else:
        raise ValueError(f"Unsupported mission type: {row['mission']}")

    return user_content

# Function to generate text using the model
def generate_text_no_system(user_content):
    # Add instruction tokens
    user_content_with_inst = f"[INST] {user_content} [/INST]"

    # Tokenize and encode the messages
    encoded = tokenizer(user_content_with_inst, return_tensors="pt").to(device)

    # Generate response
    generated_ids = model.generate(encoded['input_ids'], max_new_tokens=200, do_sample=True)

    # Decode generated response
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    return decoded_output

In [12]:
# Loop through each row and generate outputs
for index, row in data.iterrows():
    if index < 1:
        # Generate the prompt messages based on the row's mission
        messages = generate_prompt(row)

        # Print the mission and prompt details clearly
        print(f"Mission {row['mission']} - System Prompt:\n{messages[0]['content']}\n")
        print(f"User Content:\n{messages[1]['content']}\n")

        # Generate and display model output in a clearer format
        output = generate_text(messages)
        print(f"Model Output:\n{output['model_output']}\n")
    else:
        break

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Mission 3 - System Prompt:
[{“role”: “system”, “content”: f“You will be provided with a paragraph resourced from Wikipedia or from GeeksforGeeks, more over you will be provided with a question for which the answer can be found in the passage, both the passage and question are written in Hebrew language. Your tasks are:
1. Locate or slightly infer the answer to the given question.
2. Respond shortly with the right answer.
Consider these guidelines:
{guidelines}
 Please respond only with the following JSON format:\{‘explanation’: string, ‘answer’: string\}
 If you can’t find the right answer please respond only with the following JSON format:\{‘explanation’: ‘’, ‘answer’: ‘I don’t know\}”}, {“role”: “user”, “content”: f”Paragraph:
{
"ספירת מפקדי חיל האוויר הרשמית מתחילה בישראל עמיר, אך למעשה לפניו מונו שני מפקדים: יהושע אייזיק (אשל), שהיה מפקד "המועצה הכללית לתעופה" ומפקד שירות האוויר של ההגנה עוד בטרם שינה זה את שמו ל"חיל האוויר" ואלכס זילוני, ששירת כמוביל בחיל האוויר המלכותי של בריטניה

In [13]:
# IF SYSTEM PROMPT IS AVAILABLE

def generate_prompt(row):
    # Extract the system prompt from the row
    system_prompt = row['prompt']

    # Generate user content based on the mission number
    if row['mission'] == 2:
        user_content = f"Article:\n{row['text1']}\nSubtitles:\n{row['text2']}"
    elif row['mission'] == 3:
        # For mission 3, use text2 as the user question
        user_content = f"Question:\n{row['text2']}"
    elif row['mission'] == 4:
        user_content = f"Paragraph:\n{row['text1']}\nAnswer:\n{row['text2']}"
    elif row['mission'] == 5:
        user_content = f"Article:\n{row['text1']}"
    else:
        raise ValueError(f"Unsupported mission type: {row['mission']}")

    # Combine system and user messages
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content}
    ]

    return messages


# Function to generate text using the model with system and user messages
def generate_text(messages):
    # Extract system and user messages for clarity
    system_message = messages[0]['content']
    user_message = messages[1]['content']

    # Add instruction tokens around the system and user messages
    messages_with_inst = f"[INST] {system_message} [/INST] {user_message} [/INST]"

    # Tokenize and encode the messages with instructions
    encoded = tokenizer(messages_with_inst, return_tensors="pt").to(device)

    # Generate response from the model
    generated_ids = model.generate(encoded['input_ids'], max_new_tokens=500, do_sample=True)

    # Decode the generated response
    decoded_output = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    # Return the formatted output with system and user content
    return {
        "system_prompt": system_message,
        "user_content": user_message,
        "model_output": decoded_output
    }


# Loop through each row and generate outputs
dictalm_answers = []
for index, row in data.iterrows():
    # Generate the prompt messages based on the row's mission
    messages = generate_prompt(row)

    # Generate model output
    output = generate_text(messages)

    # Extract the final answer after the last [/INST] token
    model_output = output['model_output']
    last_inst_index = model_output.rfind('[/INST]')
    if last_inst_index != -1:
        final_answer = model_output[last_inst_index + len('[/INST]'):].strip()
    else:
        final_answer = model_output.strip()

    dictalm_answers.append(final_answer)

# Add the DictaLM answers to the DataFrame
data['DictaLM_answer'] = dictalm_answers

# Save the updated DataFrame to a new CSV file
data.to_csv('updated_data.csv', index=False)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attentio

OutOfMemoryError: CUDA out of memory. Tried to allocate 342.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 105.06 MiB is free. Process 13774 has 14.64 GiB memory in use. Of the allocated memory 14.04 GiB is allocated by PyTorch, and 490.72 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)