In [None]:
# -----------------------------------
# Install Required Packages
# -----------------------------------
!pip install transformers tqdm pandas sentencepiece accelerate

# -----------------------------------
# IMPORTS
# -----------------------------------
import json
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# -----------------------------------
# CONFIG: INPUT / OUTPUT
# -----------------------------------
from google.colab import files

DATA_PATH = Path("/content/FAQ_v1.jsonl")
RESULTS_PATH = Path("/content/hf_results.jsonl")

# -----------------------------------
# LOAD OPEN HF MODEL (no gated access needed)
# -----------------------------------
MODEL_NAME = "tiiuae/falcon-7b-instruct"  # open, no login required

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

# Disable tie warning
model.config.tie_word_embeddings = False

# Use proper generation settings
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0,            # GPU, -1 for CPU
    max_new_tokens=500,  # Only this, remove max_length
    do_sample=True,      # Use True for randomness, False for greedy
    temperature=0.7      # Must be >0 if do_sample=True
)

# -----------------------------------
# HELPER FUNCTIONS
# -----------------------------------
def load_jsonl(file_path):
    tasks = []
    with open(file_path, "r", encoding="utf-8") as f:
        for line in f:
            tasks.append(json.loads(line.strip()))
    return tasks

def save_jsonl(file_path, data):
    with open(file_path, "w", encoding="utf-8") as f:
        for item in data:
            f.write(json.dumps(item, ensure_ascii=False) + "\n")

def query_hf(prompt, context=""):
    full_prompt = f"Context: {context}\n\nQuestion: {prompt}"
    try:
        outputs = generator(full_prompt, max_new_tokens=500)
        return outputs[0]['generated_text']
    except Exception as e:
        print("Error querying HF model:", e)
        return "ERROR"

# -----------------------------------
# MAIN EVAL LOOP
# -----------------------------------
def run_evaluation(tasks):
    results = []
    for task in tqdm(tasks):
        response = query_hf(task['prompt'], task.get('context', ""))
        result_entry = {
            "task_id": task["task_id"],
            "task_category": task["task_category"],
            "prompt": task["prompt"],
            "expected_answer": task["expected_answer"],
            "model_response": response,
            "failure_modes_expected": task.get("failure_modes_expected", []),
            "grounding_required": task.get("grounding_required", False)
        }
        results.append(result_entry)
    return results

# -----------------------------------
# RUN EVALUATION
# -----------------------------------
tasks = load_jsonl(DATA_PATH)
print(f"Loaded {len(tasks)} tasks for evaluation.")

results = run_evaluation(tasks)
save_jsonl(RESULTS_PATH, results)
print(f"Saved results to {RESULTS_PATH}")

# ----- Download results -----
files.download(str(RESULTS_PATH))




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

Loading weights:   0%|          | 0/196 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

Passing `generation_config` together with generation-related arguments=({'max_new_tokens', 'temperature', 'do_sample'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.


Loaded 31 tasks for evaluation.


  0%|          | 0/31 [00:00<?, ?it/s]Passing `generation_config` together with generation-related arguments=({'max_new_tokens'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Both `max_new_tokens` (=500) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
  3%|▎         | 1/31 [01:02<31:25, 62.84s/it]Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Both `max_new_tokens` (=500) and `max_length`(=20) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
  6%|▋         | 2/31 [01:51<26

Saved results to /content/hf_results.jsonl





<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>