In [None]:
!pip install google-generativeai




In [None]:
import base64
import google.generativeai as genai


In [None]:
genai.configure(api_key="")

model = genai.GenerativeModel(
    model_name="models/gemini-3-flash-preview"
)


In [None]:
def telugu_stt(audio_path):
    """
    Converts Telugu audio to Telugu text using Gemini
    """
    with open(audio_path, "rb") as f:
        audio_bytes = f.read()

    audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")

    response = model.generate_content([
        {
            "mime_type": "audio/mp4",
            "data": audio_base64
        },
        "‡∞§‡±Ü‡∞≤‡±Å‡∞ó‡±Å ‡∞Ü‡∞°‡∞ø‡∞Ø‡±ã‡∞®‡±Å ‡∞ñ‡∞ö‡±ç‡∞ö‡∞ø‡∞§‡∞Ç‡∞ó‡∞æ ‡∞≤‡∞ø‡∞ñ‡∞ø‡∞§‡∞Ç ‡∞ö‡±á‡∞Ø‡∞Ç‡∞°‡∞ø."
    ])

    return response.text.strip()


In [None]:
class FailureHandler:
    """
    Handles STT failure and missing information failure
    """

    def stt_failure(self, text):
        if not text or len(text) < 5:
            return {
                "status": "FAIL",
                "message": "‚ùå ‡∞Æ‡±Ä ‡∞Æ‡∞æ‡∞ü ‡∞∏‡±ç‡∞™‡∞∑‡±ç‡∞ü‡∞Ç‡∞ó‡∞æ ‡∞µ‡∞ø‡∞®‡∞ø‡∞™‡∞ø‡∞Ç‡∞ö‡∞≤‡±á‡∞¶‡±Å. ‡∞¶‡∞Ø‡∞ö‡±á‡∞∏‡∞ø ‡∞Æ‡∞≥‡±ç‡∞≤‡±Ä ‡∞ö‡±Ü‡∞™‡±ç‡∞™‡∞Ç‡∞°‡∞ø."
            }
        return {"status": "OK"}

    def missing_info(self, memory):
        required_fields = ["age", "income", "state"]
        missing = [f for f in required_fields if f not in memory]

        if missing:
            questions = {
                "age": "‚û°Ô∏è ‡∞Æ‡±Ä ‡∞µ‡∞Ø‡∞∏‡±ç‡∞∏‡±Å ‡∞é‡∞Ç‡∞§?",
                "income": "‚û°Ô∏è ‡∞Æ‡±Ä ‡∞µ‡∞æ‡∞∞‡±ç‡∞∑‡∞ø‡∞ï ‡∞Ü‡∞¶‡∞æ‡∞Ø‡∞Ç ‡∞é‡∞Ç‡∞§?",
                "state": "‚û°Ô∏è ‡∞Æ‡±Ä‡∞∞‡±Å ‡∞è ‡∞∞‡∞æ‡∞∑‡±ç‡∞ü‡±ç‡∞∞‡∞Ç‡∞≤‡±ã ‡∞®‡∞ø‡∞µ‡∞∏‡∞ø‡∞∏‡±ç‡∞§‡±Å‡∞®‡±ç‡∞®‡∞æ‡∞∞‡±Å?"
            }

            return {
                "status": "FAIL",
                "message": "‡∞™‡∞•‡∞ï‡∞æ‡∞®‡±ç‡∞®‡∞ø ‡∞ó‡±Å‡∞∞‡±ç‡∞§‡∞ø‡∞Ç‡∞ö‡∞°‡∞æ‡∞®‡∞ø‡∞ï‡∞ø ‡∞ï‡±ä‡∞Ç‡∞§ ‡∞∏‡∞Æ‡∞æ‡∞ö‡∞æ‡∞∞‡∞Ç ‡∞Ö‡∞µ‡∞∏‡∞∞‡∞Ç.",
                "followups": [questions[m] for m in missing]
            }

        return {"status": "OK"}


In [None]:
def extract_memory(text, memory):
    """
    Extracts info from Telugu text (simple logic for demo)
    """

    if "‡∞∏‡∞Ç‡∞µ‡∞§‡±ç‡∞∏‡∞∞" in text or "‡∞è‡∞≥‡±ç‡∞≤‡±Å" in text:
        memory["age"] = "CAPTURED"

    if "‡∞Ü‡∞¶‡∞æ‡∞Ø‡∞Ç" in text or "‡∞≤‡∞ï‡±ç‡∞∑" in text:
        memory["income"] = "CAPTURED"

    if "‡∞§‡±Ü‡∞≤‡∞Ç‡∞ó‡∞æ‡∞£" in text or "‡∞Ü‡∞Ç‡∞ß‡±ç‡∞∞" in text:
        memory["state"] = "CAPTURED"

    return memory


In [None]:
def govt_scheme_agent():
    return "‚úÖ ‡∞Æ‡±Ä ‡∞∏‡∞Æ‡∞æ‡∞ö‡∞æ‡∞∞‡∞Ç ‡∞Ü‡∞ß‡∞æ‡∞∞‡∞Ç‡∞ó‡∞æ ‡∞§‡∞ó‡∞ø‡∞® ‡∞™‡±ç‡∞∞‡∞≠‡±Å‡∞§‡±ç‡∞µ ‡∞™‡∞•‡∞ï‡∞æ‡∞≤‡∞®‡±Å ‡∞∏‡±Ç‡∞ö‡∞ø‡∞Ç‡∞ö‡∞µ‡∞ö‡±ç‡∞ö‡±Å."


In [50]:
audio_files = ["a1.mp4","a2.mp4","a3.mp4", "a4.mp4", "a5.mp4"]

failure = FailureHandler()
memory = {}

for audio in audio_files:
    print("\n===================================")
    print(f"üéß Processing Audio: {audio}")

    # 1Ô∏è‚É£ STT
    text = telugu_stt(audio)
    print("üìù STT Output:", text)

    # 2Ô∏è‚É£ STT Failure Check
    check = failure.stt_failure(text)
    if check["status"] == "FAIL":
        print(check["message"])
        continue

    # 3Ô∏è‚É£ Save to Memory
    memory = extract_memory(text, memory)
    print("üß† Memory:", memory)

    # 4Ô∏è‚É£ Missing Info Failure
    check = failure.missing_info(memory)
    if check["status"] == "FAIL":
        print("‚ö†Ô∏è", check["message"])
        for q in check["followups"]:
            print(q)
        continue

    # 5Ô∏è‚É£ Tool Call (Success Path)
    response = govt_scheme_agent()
    print(response)



üéß Processing Audio: a1.mp4




‚è≥ Quota hit. Retrying in 10s...




‚è≥ Quota hit. Retrying in 20s...
‚ö†Ô∏è Using fallback STT text
üìù STT Output: ‡∞®‡∞æ‡∞ï‡±Å ‡∞™‡±ç‡∞∞‡∞≠‡±Å‡∞§‡±ç‡∞µ ‡∞™‡∞•‡∞ï‡∞Ç ‡∞ï‡∞æ‡∞µ‡∞æ‡∞≤‡∞ø
üß† Memory: {}
‚ö†Ô∏è ‡∞™‡∞•‡∞ï‡∞æ‡∞®‡±ç‡∞®‡∞ø ‡∞ó‡±Å‡∞∞‡±ç‡∞§‡∞ø‡∞Ç‡∞ö‡∞°‡∞æ‡∞®‡∞ø‡∞ï‡∞ø ‡∞ï‡±ä‡∞Ç‡∞§ ‡∞∏‡∞Æ‡∞æ‡∞ö‡∞æ‡∞∞‡∞Ç ‡∞Ö‡∞µ‡∞∏‡∞∞‡∞Ç.
‚û°Ô∏è ‡∞Æ‡±Ä ‡∞µ‡∞Ø‡∞∏‡±ç‡∞∏‡±Å ‡∞é‡∞Ç‡∞§?
‚û°Ô∏è ‡∞Æ‡±Ä ‡∞µ‡∞æ‡∞∞‡±ç‡∞∑‡∞ø‡∞ï ‡∞Ü‡∞¶‡∞æ‡∞Ø‡∞Ç ‡∞é‡∞Ç‡∞§?
‚û°Ô∏è ‡∞Æ‡±Ä‡∞∞‡±Å ‡∞è ‡∞∞‡∞æ‡∞∑‡±ç‡∞ü‡±ç‡∞∞‡∞Ç‡∞≤‡±ã ‡∞®‡∞ø‡∞µ‡∞∏‡∞ø‡∞∏‡±ç‡∞§‡±Å‡∞®‡±ç‡∞®‡∞æ‡∞∞‡±Å?

üéß Processing Audio: a2.mp4




‚è≥ Quota hit. Retrying in 10s...




‚è≥ Quota hit. Retrying in 20s...
‚ö†Ô∏è Using fallback STT text
üìù STT Output: 
‚ùå ‡∞Æ‡±Ä ‡∞Æ‡∞æ‡∞ü ‡∞∏‡±ç‡∞™‡∞∑‡±ç‡∞ü‡∞Ç‡∞ó‡∞æ ‡∞µ‡∞ø‡∞®‡∞ø‡∞™‡∞ø‡∞Ç‡∞ö‡∞≤‡±á‡∞¶‡±Å. ‡∞¶‡∞Ø‡∞ö‡±á‡∞∏‡∞ø ‡∞Æ‡∞≥‡±ç‡∞≤‡±Ä ‡∞ö‡±Ü‡∞™‡±ç‡∞™‡∞Ç‡∞°‡∞ø.

üéß Processing Audio: a3.mp4




‚è≥ Quota hit. Retrying in 10s...




‚è≥ Quota hit. Retrying in 20s...
‚ö†Ô∏è Using fallback STT text
üìù STT Output: ‡∞®‡∞æ ‡∞µ‡∞Ø‡∞∏‡±ç‡∞∏‡±Å 35 ‡∞∏‡∞Ç‡∞µ‡∞§‡±ç‡∞∏‡∞∞‡∞æ‡∞≤‡±Å
üß† Memory: {'age': 'CAPTURED'}
‚ö†Ô∏è ‡∞™‡∞•‡∞ï‡∞æ‡∞®‡±ç‡∞®‡∞ø ‡∞ó‡±Å‡∞∞‡±ç‡∞§‡∞ø‡∞Ç‡∞ö‡∞°‡∞æ‡∞®‡∞ø‡∞ï‡∞ø ‡∞ï‡±ä‡∞Ç‡∞§ ‡∞∏‡∞Æ‡∞æ‡∞ö‡∞æ‡∞∞‡∞Ç ‡∞Ö‡∞µ‡∞∏‡∞∞‡∞Ç.
‚û°Ô∏è ‡∞Æ‡±Ä ‡∞µ‡∞æ‡∞∞‡±ç‡∞∑‡∞ø‡∞ï ‡∞Ü‡∞¶‡∞æ‡∞Ø‡∞Ç ‡∞é‡∞Ç‡∞§?
‚û°Ô∏è ‡∞Æ‡±Ä‡∞∞‡±Å ‡∞è ‡∞∞‡∞æ‡∞∑‡±ç‡∞ü‡±ç‡∞∞‡∞Ç‡∞≤‡±ã ‡∞®‡∞ø‡∞µ‡∞∏‡∞ø‡∞∏‡±ç‡∞§‡±Å‡∞®‡±ç‡∞®‡∞æ‡∞∞‡±Å?

üéß Processing Audio: a4.mp4




‚è≥ Quota hit. Retrying in 10s...




‚è≥ Quota hit. Retrying in 20s...
‚ö†Ô∏è Using fallback STT text
üìù STT Output: 
‚ùå ‡∞Æ‡±Ä ‡∞Æ‡∞æ‡∞ü ‡∞∏‡±ç‡∞™‡∞∑‡±ç‡∞ü‡∞Ç‡∞ó‡∞æ ‡∞µ‡∞ø‡∞®‡∞ø‡∞™‡∞ø‡∞Ç‡∞ö‡∞≤‡±á‡∞¶‡±Å. ‡∞¶‡∞Ø‡∞ö‡±á‡∞∏‡∞ø ‡∞Æ‡∞≥‡±ç‡∞≤‡±Ä ‡∞ö‡±Ü‡∞™‡±ç‡∞™‡∞Ç‡∞°‡∞ø.

üéß Processing Audio: a5.mp4




‚è≥ Quota hit. Retrying in 10s...




‚è≥ Quota hit. Retrying in 20s...
‚ö†Ô∏è Using fallback STT text
üìù STT Output: ‡∞®‡∞æ ‡∞Ü‡∞¶‡∞æ‡∞Ø‡∞Ç ‡∞∞‡±Ü‡∞Ç‡∞°‡±Å ‡∞≤‡∞ï‡±ç‡∞∑‡∞≤‡±Å ‡∞Æ‡∞∞‡∞ø‡∞Ø‡±Å ‡∞®‡±á‡∞®‡±Å ‡∞§‡±Ü‡∞≤‡∞Ç‡∞ó‡∞æ‡∞£‡∞≤‡±ã ‡∞â‡∞Ç‡∞ü‡∞æ‡∞®‡±Å
üß† Memory: {'age': 'CAPTURED', 'income': 'CAPTURED', 'state': 'CAPTURED'}
‚úÖ ‡∞Æ‡±Ä ‡∞∏‡∞Æ‡∞æ‡∞ö‡∞æ‡∞∞‡∞Ç ‡∞Ü‡∞ß‡∞æ‡∞∞‡∞Ç‡∞ó‡∞æ ‡∞§‡∞ó‡∞ø‡∞® ‡∞™‡±ç‡∞∞‡∞≠‡±Å‡∞§‡±ç‡∞µ ‡∞™‡∞•‡∞ï‡∞æ‡∞≤‡∞®‡±Å ‡∞∏‡±Ç‡∞ö‡∞ø‡∞Ç‡∞ö‡∞µ‡∞ö‡±ç‡∞ö‡±Å.
