In [33]:
import os
import json
from google import genai
from google.genai import types

client = genai.Client()


def get_examples():
    return {
        "example_1": {
            "input": {
                "goals": [
                    "Learn about bunnies",
                    "Learn about gardening",
                ],
                "content": "spaceship blows up during launch in texas",
            },
            "output": [],
        },
        "example_2": {
            "input": {
                "goals": [
                    "Learn about bunnies",
                    "Learn about gardening",
                ],
                "content": "Scientist finds bunnies help garden grow",
            },
            "output": ["Learn about bunnies", "Learn about gardening"],
        },
    }


def invoke_llm(prompt):
    # response = client.models.generate_content(
    #     model="gemini-2.5-flash", contents=prompt
    # )

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(
                thinking_budget=0
            )  # Disables thinking
        ),
    )

    return response


def classify(goals, content):
    input_ = {
        "goals": goals,
        "content": content,
    }

    prompt = f"""
You are an AI classifier tasked with figuring out if content aligns with specified user goals.
The broader context is that a user wants to only see content on the internet that aligns with their specified goals.
You are the classifier that says if content is aligned with stated goals or not.
You are given a set of goals and some content and need to respond with a list of the goals that the content aligns with.

Examples:
{json.dumps(get_examples()).replace("{", "{{").replace("}", "}}").strip()}

Now time for you to do this classification. Below are the goals and content to be used during classification:
{input_}
"""
    # print("prompt start:")
    # print(prompt)
    # print("prompt end:")

    try:
        response = invoke_llm(prompt)
        return response.text.strip()
    except Exception as e:
        return f"Error during API call: {e}"


def load_tests():
    filepath = "tests.jsonl"
    tests = []

    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
            if line.strip():  # Skip empty lines
                try:
                    tests.append(json.loads(line))
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON on line: {line.strip()} - {e}")

    return tests

In [34]:
def main():
    """
    Main function to run the goal classifier with predefined goals and test cases from a file.
    """
    tests = load_tests()

    for i, test in enumerate(tests):
        goals = test["goals"]
        content = test["content"]
        ground_truth = test["alignment"]
        classification = classify(goals, content)
        print(f'Test Case {i+1}')
        print(f"  -> content: {content}")
        print(f"  -> goals: {goals}")
        print(f"  -> ground truth: {ground_truth}")
        print(f"  -> prediction: {classification}")
        print("------------------------------------------")
        # break

In [35]:
main()

Test Case 1
  -> content: A new startup just raised $10M to build a new AI-powered search engine.
  -> goals: ['learn machine learning', 'stay up to date on AI', 'read about startups', 'improve python programming skills']
  -> ground truth: ['stay up to date on AI', 'read about startups']
  -> prediction: ['stay up to date on AI', 'read about startups']
------------------------------------------
Test Case 2
  -> content: This video teaches you how to build a neural network from scratch.
  -> goals: ['learn machine learning', 'stay up to date on AI', 'read about startups', 'improve python programming skills']
  -> ground truth: ['learn machine learning']
  -> prediction: ['learn machine learning', 'stay up to date on AI']
------------------------------------------
Test Case 3
  -> content: Why removing GIL in python is a good idea
  -> goals: ['learn machine learning', 'stay up to date on AI', 'read about startups', 'improve python programming skills']
  -> ground truth: ['improve pytho