In [36]:
# ==============================================================================
# Step 1: Import Libraries and Configure Settings
# ==============================================================================
import json
import os
import time
from google import genai
from IPython.display import display, Markdown

# --- Configuration ---
# Input and output file names
INPUT_FILE = 'unikatni_otazky.json'
OUTPUT_FILE = 'unikatni_otazky_obohatene.json'

MODEL_NAME = 'gemini-2.5-flash'


client = genai.Client(api_key="AIzaSyDfzsJQg8DuEgJjtVRWeeWY1oYxN97a5-4")
display(Markdown("✅ **Gemini client initialized successfully.**"))


with open(INPUT_FILE, 'r', encoding='utf-8') as f:
    questions_data = json.load(f)

# --- Load Already Processed Questions to Allow Resuming ---
if os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
        try:
            enriched_questions = json.load(f)
        except json.JSONDecodeError:
            enriched_questions = [] # Handle case where file is empty or corrupt
else:
    enriched_questions = []

processed_texts = {q['text_otazky'] for q in enriched_questions}
question_count = len(questions_data)

display(Markdown(f"Found **{question_count}** questions in total. **{len(processed_texts)}** questions have already been processed."))



✅ **Gemini client initialized successfully.**

Found **664** questions in total. **237** questions have already been processed.

In [37]:
from google import genai

# The client gets the API key from the environment variable `GEMINI_API_KEY`.
client = genai.Client(api_key="AIzaSyDfzsJQg8DuEgJjtVRWeeWY1oYxN97a5-4")


In [38]:

if os.path.exists(OUTPUT_FILE):
    with open(OUTPUT_FILE, 'r', encoding='utf-8') as f:
        try:
            enriched_questions = json.load(f)
        except json.JSONDecodeError:
            enriched_questions = [] # Handle case where file is empty or corrupt
else:
    enriched_questions = []


for index, question_obj in enumerate(questions_data):
    # Skip questions that are already in the output file
    if question_obj['text_otazky'] in processed_texts:
        continue

    print(f"Processing question {index + 1}/{question_count}: \"{question_obj['text_otazky'][:60]}...\"")

    # Get category and explanation from the Gemini API

    prompt = f"""Analyzuj následující testovou otázku z oblasti letectví. Tvým úkolem je:
    1. Zařadit otázku do jedné z následujících kategorií: Letecké předpisy, Lidská výkonnost, Meteorologie, Navigace, Provozní postupy, Letové výkony a plánování, Znalosti letadel, Principy letu, Radiokomunikace.
    2. Poskytnout stručné a jasné vysvětlení, proč je označená odpověď správná.

    Otázka: "{question_obj["text_otazky"]}"
    Možnosti:
    A: "{question_obj["moznosti"].get('A', '')}"
    B: "{question_obj["moznosti"].get('B', '')}"
    C: "{question_obj["moznosti"].get('C', '')}"

    Správná odpověď je: {question_obj["spravna_odpoved"]}

    Odpověz ve formátu JSON s klíči "kategorie" a "vysvetleni". Nepřidávej žádný další text mimo tento JSON včetně ````json` bloků.

    """

    response = client.models.generate_content(
        model="gemini-2.5-flash", contents=prompt,
            config={
        "response_mime_type": "application/json",
    },
    )


    response.text.replace("```json", "").replace("```", "").strip()
    print(response.text)

    try:
        response_json = json.loads(response.text)
        category = response_json.get('kategorie', 'Nezařazeno')
        explanation = response_json.get('vysvetleni', 'Žádné vysvětlení poskytnuto.')
    except (json.JSONDecodeError, TypeError) as e:
        print(f"Error parsing response for question {index + 1}: {e}")
        category = 'Nezařazeno'
        explanation = 'Žádné vysvětlení poskytnuto.'


    enriched_questions = enriched_questions + [{
        **question_obj,
        'kategorie': category,
        'vysvetleni': explanation
    }]

    # Save progress after each new question is processed
    with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
        json.dump(enriched_questions, f, ensure_ascii=False, indent=4)

    # Pause to avoid hitting API rate limits (e.g., 60 requests per minute)
    time.sleep(1)

display(Markdown("---"))
display(Markdown(f"✅ **Processing complete.** All enriched questions have been saved to **'{OUTPUT_FILE}'**."))


Processing question 238/664: "Je-li dáván příkaz letadlu za letu orgánem letištní služby ř..."


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.5-flash'}, 'quotaValue': '250'}]}, {'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '49s'}]}}