In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
from huggingface_hub import login


class ModelTester:
    def __init__(self, base_model_name: str, trained_model_path: str):
        # Login with Colab secret
        try:
            HF_TOKEN = "TOKEN"
            login(token=HF_TOKEN)
            print("Authenticated with HuggingFace\n")
        except:
            print("Warning: Could not authenticate with HuggingFace")

        print(f"Loading base model: {base_model_name}")
        self.tokenizer = AutoTokenizer.from_pretrained(base_model_name)

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        # Load base model
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            torch_dtype=torch.float16,
            device_map="auto",
            low_cpu_mem_usage=True
        )

        # Load LoRA weights
        print(f"Loading trained model from: {trained_model_path}")
        self.model = PeftModel.from_pretrained(base_model, trained_model_path)
        print("Model loaded successfully!\n")

    def generate_text(self, prompt: str, max_length: int = 150) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_length=max_length,
                temperature=0.8,
                do_sample=True,
                top_p=0.9,
                pad_token_id=self.tokenizer.eos_token_id
            )

        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return generated_text

    def test_multiple_prompts(self, prompts: list):
        print("="*60)
        print("TESTING TRAINED MODEL")
        print("="*60 + "\n")

        for i, prompt in enumerate(prompts):
            print(f"Prompt {i+1}: {prompt}")
            print("-"*60)
            generated = self.generate_text(prompt)
            print(f"Generated: {generated}")
            print("="*60 + "\n")


def main():
    from google.colab import drive
    drive.mount('/content/drive')

    project_root = "/content/drive/MyDrive/FinalProject"

    # Specify which trained model to test
    BASE_MODEL = "meta-llama/Llama-3.2-1B"
    TRAINED_MODEL_PATH = f"{project_root}/trained_models/human_baseline_data_llama"

    # Load model
    tester = ModelTester(BASE_MODEL, TRAINED_MODEL_PATH)

    # Test prompts
    test_prompts = [
        "The history of artificial intelligence",
        "In recent years, technology has",
        "Scientists have discovered that"
    ]

    tester.test_multiple_prompts(test_prompts)

    print("\n✓ Testing complete!")


if __name__ == "__main__":
    main()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✓ Authenticated with HuggingFace

Loading base model: meta-llama/Llama-3.2-1B


tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Loading trained model from: /content/drive/MyDrive/FinalProject/trained_models/human_baseline_data_llama
Model loaded successfully!

TESTING TRAINED MODEL

Prompt 1: The history of artificial intelligence
------------------------------------------------------------
Generated: The history of artificial intelligence is complex and has evolved over time. The first computer programs that could be considered intelligent were developed in the late 1950s and 1960s. These programs were designed to perform specific tasks, such as playing chess or answering math questions. In the 1970s, programs became more complex and began to exhibit behaviors that were difficult to predict. This led to the development of the field of computational intelligence, which seeks to model the human brain and develop algorithms that mimic its capabilities. One of the earliest examples of computational intelligence was the AlphaGo program, which was developed by Google in 2015. This program used deep learning to play 