In [None]:
!pip install pandas
!pip install torch
!pip install transformers
!pip install bitsandbytes

In [None]:
import os, gc
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from transformers.utils import logging

logging.set_verbosity_error()

In [None]:
def load_model(model_name, quantize=False):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if quantize:
        bnb_config = BitsAndBytesConfig(
            load_in_8bit=True,
            bnb_8bit_compute_dtype=torch.float16,
            bnb_8bit_use_double_quant=True,
        )
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            quantization_config=bnb_config,
            device_map="cuda"
        )
    else:
        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float16,
            device_map="cuda"
        )
    return tokenizer, model


def generate_response(tokenizer, model, prompt, max_len=250):
    text_generator = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer
    )
    formatted_prompt = f"User: {prompt}\nAssistant:"
    output = text_generator(formatted_prompt, max_length=max_len, do_sample=True)
    response = output[0]["generated_text"][len(formatted_prompt):]
    return response


def free_mem(tokenizer, model):
    model.to("cpu")
    del model
    del tokenizer
    gc.collect()
    torch.cuda.empty_cache()


def process_all_base_prompts(model_name, input_base_dir, output_base_dir):
    tokenizer, model = load_model(model_name)
    for file in os.listdir(input_base_dir):
        if file.startswith('.') or not file.endswith('.csv'):
            continue
        file_path = os.path.join(input_base_dir, file)
        csv_base = os.path.splitext(file)[0]

        try:
            df = pd.read_csv(file_path)
        except Exception as e:
            print(f"Error reading {file_path}: {e}")
            continue

        prompts = df["PROMPT"].tolist()
        bias_categories = df.get("BIAS CATEGORY", "").tolist()
        out_dir = os.path.join(output_base_dir, model_name.replace("/", "_"))
        os.makedirs(out_dir, exist_ok=True)
        results = []

        for prompt, bias_category in zip(prompts, bias_categories):
            response = generate_response(tokenizer, model, prompt)
            result = {
                "MODEL": model_name,
                "BIAS CATEGORY": bias_category,
                "PROMPT": prompt,
                "RESPONSE": response,
            }
            results.append(result)

        if results:
            output_file = os.path.join(out_dir, f"{model_name.replace('/', '_')}_{csv_base}.csv")
            df_out = pd.DataFrame(results)
            df_out.to_csv(output_file, index=False, encoding="utf-8")
            print(f"Saved {output_file} with {len(results)} responses")

    # clean vram
    free_mem(tokenizer, model)

In [None]:
model_names = ["{your_models}"]
for model_name in model_names:
    print(f"Processing model: {model_name}")
    process_all_base_prompts(model_name, "CLEAR-Bias/base_prompts", "results/base_prompts_finetuned")
    print()