## Budget-Aware Chatbot (Using Real Gemini Token Counts)

In [30]:
from google import genai
from google.genai import types
import os
from dotenv import load_dotenv
import gradio as gr

load_dotenv(dotenv_path='../.env')
API_KEY = os.environ["GEMINI_API_KEY2"]
client = genai.Client(api_key=API_KEY)
model_name = "gemini-2.5-flash"

### Config

In [31]:
SYSTEM_INSTRUCTION = """
You are a senior AI tutor helping developers understand technical concepts.

Rules:
- Be concise but complete.
- Use simple, clear language.
- Avoid unnecessary fluff.
- Use short paragraphs.
- Provide examples when helpful.
- Do not assume prior deep knowledge unless asked.

Always prioritize clarity over complexity.
"""

MAX_CONTEXT_TOKENS = 25000  # simulated context window
INPUT_COST_PER_1K = 0.00035
OUTPUT_COST_PER_1K = 0.00053


### Memory Store (Structured Gemini Format)

In [32]:
chat_history = []

### Cost Estimator

In [33]:
def estimate_cost(prompt_tokens, output_tokens):
    input_cost = (prompt_tokens / 1000) * INPUT_COST_PER_1K
    output_cost = (output_tokens / 1000) * OUTPUT_COST_PER_1K
    return input_cost + output_cost


### Context Trim Function (Using Real Token Count)

In [34]:
TRIM_THRESHOLD = 0.8  # 80%

def trim_history():
    global chat_history
    
    threshold_limit = MAX_CONTEXT_TOKENS * TRIM_THRESHOLD
    
    while True:
        response = client.models.generate_content(
            model=model_name,
            contents=chat_history,
            config=types.GenerateContentConfig(
                system_instruction=SYSTEM_INSTRUCTION,
                max_output_tokens=1
            )
        )
        
        total_tokens = response.usage_metadata.total_token_count
        
        if total_tokens <= threshold_limit:
            break
        
        if len(chat_history) >= 2:
            chat_history = chat_history[2:]  # remove oldest user+model pair
        else:
            break



### Send Message Function (Full Logic)

In [35]:
def send_message(user_input):
    global chat_history
    
    # Append user message
    chat_history.append(
        types.Content(
            role="user",
            parts=[types.Part(text=user_input)]
        )
    )
    
    # Trim if needed
    trim_history()
    
    # Generate response
    response = client.models.generate_content(
        model=model_name,
        contents=chat_history,
        config=types.GenerateContentConfig(
            system_instruction=SYSTEM_INSTRUCTION
        )
    )
    
    reply = response.text
    
    # Append model reply
    chat_history.append(
        types.Content(
            role="model",
            parts=[types.Part(text=reply)]
        )
    )
    
    usage = response.usage_metadata
    
    prompt_tokens = usage.prompt_token_count
    output_tokens = usage.candidates_token_count
    total_tokens = usage.total_token_count
    
    total_cost = estimate_cost(prompt_tokens, output_tokens)
    
    print("ðŸ¤– Assistant:", reply)
    print("Prompt tokens:", prompt_tokens)
    print("Output tokens:", output_tokens)
    print("Total tokens:", total_tokens)
    print("Estimated cost:", round(total_cost, 6))
    print("-" * 50)
    


### Interactive Loop

In [36]:
print("Interactive Budget-Aware Chatbot")
print("Type 'exit' to stop.\n")

while True:
    user_input = input("You: ")
    
    if user_input.lower() == "exit":
        print("Goodbye ðŸ‘‹")
        break
    
    send_message(user_input)

Interactive Budget-Aware Chatbot
Type 'exit' to stop.

ðŸ¤– Assistant: Hi Yash! Nice to meet you.

How can I help you understand technical concepts today?
Prompt tokens: 77
Output tokens: 19
Total tokens: 96
Estimated cost: 3.7e-05
--------------------------------------------------
ðŸ¤– Assistant: Yes, I do. Your name is Yash.

As an AI, I retain information within our current conversation.
Prompt tokens: 104
Output tokens: 23
Total tokens: 209
Estimated cost: 4.9e-05
--------------------------------------------------
Goodbye ðŸ‘‹
