In [24]:
from google import genai
from google.genai import types
import os
from dotenv import load_dotenv
import tiktoken

# Load .env file from parent directory
load_dotenv(dotenv_path='../.env')

API_KEY = os.environ["GEMINI_API_KEY"]

client = genai.Client(api_key=API_KEY)

## üîë Why Tokens Matter: The Economics

Tokens are the CURRENCY of AI. Here's what you need to know:

1. **You pay per token** (input + output)
   - Every word you send costs money
   - Every word the AI generates costs money

2. **Different models have different prices**

3. **One word ‚â† One token**
   - "Explain" = 1 token
   - "database" = 1 token... but uncommon words split into 2-3 tokens
   - Punctuation, numbers, special characters all count

4. **Context windows are limited**
   - Every prompt you send uses up your available "memory"
   - Long prompts = less space for conversation
   - Less space = worse context awareness = worse answers

In [25]:
# Example 1: SHORT PROMPT
prompt_1 = "Hi Name is Yash. What is your name?"

# Get response
response_1 = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt_1
)

print(f"Response: {response_1}")

Response: sdk_http_response=HttpResponse(
  headers=<dict len=11>
) candidates=[Candidate(
  content=Content(
    parts=[
      Part(
        text="Hi Yash! I don't have a name. I am a large language model, an AI, trained by Google. It's nice to meet you!"
      ),
    ],
    role='model'
  ),
  finish_reason=<FinishReason.STOP: 'STOP'>,
  index=0
)] create_time=None model_version='gemini-2.5-flash' prompt_feedback=None response_id='vTqLaeX7H8vZjuMPucuDsQ4' usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=33,
  prompt_token_count=11,
  prompt_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
      token_count=11
    ),
  ],
  thoughts_token_count=203,
  total_token_count=247
) automatic_function_calling_history=[] parsed=None


In [26]:
print(f"Response text: {response_1.text}")
print(f"\nüìä TOTAL TOKEN COUNT: {response_1.usage_metadata.total_token_count}")
print(f"   Input tokens (prompt): {response_1.usage_metadata.prompt_token_count}")
print(f"   Output tokens (response): {response_1.usage_metadata.candidates_token_count}")
print(f"   Thought tokens (Thinking): {response_1.usage_metadata.thoughts_token_count}")

Response text: Hi Yash! I don't have a name. I am a large language model, an AI, trained by Google. It's nice to meet you!

üìä TOTAL TOKEN COUNT: 247
   Input tokens (prompt): 11
   Output tokens (response): 33
   Thought tokens (Thinking): 203


## Example 2: BAD PROMPT ‚ùå
Vague prompt leads to verbose, unfocused output

In [29]:
# Example 2: BAD PROMPT - Vague and lacks constraints
prompt_2 = """Explain database in brief."""

# Get response
response_2 = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt_2
)

print(f"Response text: {response_2.text}")
print(f"\nüìä TOTAL TOKEN COUNT: {response_2.usage_metadata.total_token_count}")
print(f"   Input tokens (prompt): {response_2.usage_metadata.prompt_token_count}")
print(f"   Output tokens (response): {response_2.usage_metadata.candidates_token_count}")
print(f"   Thought tokens (Thinking): {response_2.usage_metadata.thoughts_token_count}")

Response text: A database is an **organized collection of information (data)** that is stored and managed electronically.

Think of it like a highly structured digital filing cabinet. Instead of just a pile of documents, a database arranges data (like names, addresses, product details, transaction records) into tables, rows, and columns, making it easy to find, access, and update specific pieces of information quickly and efficiently.

**In essence:** It's a system for storing, retrieving, manipulating, and managing data. You interact with databases constantly ‚Äì when you shop online, check your bank balance, use social media, or even book a flight.

üìä TOTAL TOKEN COUNT: 934
   Input tokens (prompt): 6
   Output tokens (response): 128
   Thought tokens (Thinking): 800


## Example 3: EFFICIENT PROMPT ‚úÖ
Detailed instructions with clear constraints lead to focused, concise output

In [28]:
# Example 3: EFFICIENT PROMPT - Clear instructions and constraints
prompt_3 = """You are a helpful assistant for college students.
Your job is to explain technical concepts in simple language.
Use examples whenever possible.
Avoid jargon and technical terms.
Be friendly and encouraging.
Keep responses concise but complete.

Now, explain what a database is to a college student.
Include a real-world example.
Explain why databases are important.
Keep it under 5 sentences."""

# Get response
response_3 = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt_3
)

print(f"Response text: {response_3.text}")
print(f"\nüìä TOTAL TOKEN COUNT: {response_3.usage_metadata.total_token_count}")
print(f"   Input tokens (prompt): {response_3.usage_metadata.prompt_token_count}")
print(f"   Output tokens (response): {response_3.usage_metadata.candidates_token_count}")
print(f"   Thought tokens (Thinking): {response_3.usage_metadata.thoughts_token_count}")



Response text: Hey there! Think of a database as a super organized digital filing cabinet that stores information, like names, numbers, or grades, so it's easy to find and manage. For example, your college uses a giant database to keep track of every student's courses, grades, and contact info. Databases are crucial because they allow us to store vast amounts of data reliably and access specific pieces of information almost instantly. This speedy organization helps everything from your favorite online store to your university's registration system run smoothly and efficiently!

üìä TOTAL TOKEN COUNT: 488
   Input tokens (prompt): 84
   Output tokens (response): 106
   Thought tokens (Thinking): 298


## üìä Cost Analysis: Why Prompt Engineering Matters

In [None]:
# üí∞ COST CALCULATION - Focusing on Prompt Engineering Impact
# Based on Claude Pricing: Input $0.50/1M tokens, Output $3.00/1M tokens

# Pricing (per token)
input_price_per_1m = 0.50  # $0.50 per 1M input tokens
output_price_per_1m = 3.00  # $3.00 per 1M output tokens

input_price_per_token = input_price_per_1m / 1_000_000
output_price_per_token = output_price_per_1m / 1_000_000

# print("=" * 70)
# print("üí∞ COST BREAKDOWN FOR ALL 3 EXAMPLES".center(70))
# print("=" * 70)
# print(f"\nPricing: ${input_price_per_1m}/1M input tokens | ${output_price_per_1m}/1M output tokens")
# print(f"         ${input_price_per_token:.2e}/token input | ${output_price_per_token:.2e}/token output\n")

# Example 1 Cost
example_1_input = response_1.usage_metadata.prompt_token_count
example_1_output = response_1.usage_metadata.candidates_token_count + (response_1.usage_metadata.thoughts_token_count or 0)
example_1_input_cost = example_1_input * input_price_per_token
example_1_output_cost = example_1_output * output_price_per_token
example_1_total_cost = example_1_input_cost + example_1_output_cost

# print("üìå EXAMPLE 1: SIMPLE PROMPT (Baseline)")
# print(f"   Input tokens: {example_1_input:,} ‚Üí ${example_1_input_cost:.6f}")
# print(f"   Output tokens: {example_1_output:,} ‚Üí ${example_1_output_cost:.6f}")
# print(f"   üíµ TOTAL COST: ${example_1_total_cost:.6f}\n")

# Example 2 Cost (BAD PROMPT)
example_2_input = response_2.usage_metadata.prompt_token_count
example_2_output = response_2.usage_metadata.candidates_token_count + (response_2.usage_metadata.thoughts_token_count or 0)
example_2_input_cost = example_2_input * input_price_per_token
example_2_output_cost = example_2_output * output_price_per_token
example_2_total_cost = example_2_input_cost + example_2_output_cost

# print("üìå EXAMPLE 2: BAD PROMPT ‚ùå (Vague, no constraints)")
# print(f"   Input tokens: {example_2_input:,} ‚Üí ${example_2_input_cost:.6f}")
# print(f"   Output tokens: {example_2_output:,} ‚Üí ${example_2_output_cost:.6f}")
# print(f"   üíµ TOTAL COST: ${example_2_total_cost:.6f}")
# print(f"   ‚ö†Ô∏è  Less input but MORE output = Higher cost!\n")

# Example 3 Cost (EFFICIENT PROMPT)
example_3_input = response_3.usage_metadata.prompt_token_count
example_3_output = response_3.usage_metadata.candidates_token_count + (response_3.usage_metadata.thoughts_token_count or 0)
example_3_input_cost = example_3_input * input_price_per_token
example_3_output_cost = example_3_output * output_price_per_token
example_3_total_cost = example_3_input_cost + example_3_output_cost

# print("üìå EXAMPLE 3: EFFICIENT PROMPT ‚úÖ (Clear instructions + constraints)")
# print(f"   Input tokens: {example_3_input:,} ‚Üí ${example_3_input_cost:.6f}")
# print(f"   Output tokens: {example_3_output:,} ‚Üí ${example_3_output_cost:.6f}")
# print(f"   üíµ TOTAL COST: ${example_3_total_cost:.6f}")
# print(f"   ‚ú® More input but CONTROLLED output = Better value!\n")

# Detailed Comparison
print("=" * 60)
print("üéØ KEY INSIGHTS: PROMPT ENGINEERING".center(60))
print("=" * 60)

print(f"\nüí° COMPARING EXAMPLE 2 (Bad) vs EXAMPLE 3 (Efficient):\n")
print(f"   Example 2 (Bad Prompt):")
print(f"   ‚Ä¢ Input: {example_2_input} tokens (vague: \"Explain database in brief\")")
print(f"   ‚Ä¢ Output: {example_2_output} tokens (verbose, unfocused response)")
print(f"   ‚Ä¢ Cost: ${example_2_total_cost:.6f}\n")

print(f"   Example 3 (Efficient Prompt):")
print(f"   ‚Ä¢ Input: {example_3_input} tokens (detailed instructions + constraints)")
print(f"   ‚Ä¢ Output: {example_3_output} tokens (focused, concise response)")
print(f"   ‚Ä¢ Cost: ${example_3_total_cost:.6f}\n")

input_ratio = example_3_input / example_2_input
output_diff_pct = ((example_2_output - example_3_output) / example_3_output) * 100
cost_diff = example_3_total_cost - example_2_total_cost
cost_diff_pct = (cost_diff / example_2_total_cost) * 100

print(f"üîç ANALYSIS:")
print(f"   ‚Ä¢ Example 3 uses {input_ratio:.0f}x MORE input tokens ({example_3_input} vs {example_2_input})")
print(f"   ‚Ä¢ But Example 2 generates {abs(output_diff_pct):.0f}% MORE output tokens ({example_2_output} vs {example_3_output})")
print(f"   ‚Ä¢ Cost difference: Only ${abs(cost_diff):.6f} ({abs(cost_diff_pct):.1f}% more)\n")

print(f"‚úÖ THE LESSON:")
print(f"   Bad prompts = Uncontrolled output = Wasted tokens = Higher costs")
print(f"   Good prompts = Controlled output = Efficient tokens = Better value\n")
print(f"   Investing in input tokens (clear instructions) pays off by:")
print(f"   1. Reducing unnecessary output tokens")
print(f"   2. Getting exactly what you need")
print(f"   3. Avoiding multiple retry attempts")
print(f"   4. Saving money in the long run\n")

total_all_costs = example_1_total_cost + example_2_total_cost + example_3_total_cost
print(f"üí∞ Grand Total (All 3 Examples): ${total_all_costs:.6f}\n")