In [8]:
import os, time, json, base64
from pathlib import Path
import requests
import pandas as pd

In [4]:
TESTS = [
    # Hindi
    ("hi-IN", "hi", "native", "general", "आज मौसम साफ है और हम खेत में काम कर रहे हैं।"),
    ("hi-IN", "hi", "native", "agri", "गेहूं की फसल में पीले धब्बे दिख रहे हैं, क्या दवा डालनी चाहिए?"),
    ("hi-IN", "hi", "native", "numbers", "आज की तारीख 27/02/2026 है और मेरा मोबाइल नंबर 9876543210 है।"),
    ("hi-IN", "hi", "roman",  "roman_numbers", "Aaj ki tareekh 27/02/2026 hai aur mera mobile number 9876543210 hai."),

    # Punjabi
    ("pa-IN", "pa", "native", "general", "ਅੱਜ ਮੌਸਮ ਚੰਗਾ ਹੈ ਅਤੇ ਅਸੀਂ ਖੇਤ ਵਿੱਚ ਕੰਮ ਕਰ ਰਹੇ ਹਾਂ।"),
    ("pa-IN", "pa", "native", "numbers", "ਅੱਜ ਦੀ ਤਾਰੀਖ 27/02/2026 ਹੈ ਤੇ ਮੇਰਾ ਮੋਬਾਈਲ ਨੰਬਰ 9876543210 ਹੈ।"),
    ("pa-IN", "pa", "roman",  "roman_numbers", "Ajj di tareekh 27/02/2026 ae te mera mobile number 9876543210 ae."),

    # Bengali
    ("bn-IN", "bn", "native", "general", "আজ আবহাওয়া ভালো এবং আমরা মাঠে কাজ করছি।"),
    ("bn-IN", "bn", "native", "numbers", "আজকের তারিখ 27/02/2026 এবং আমার মোবাইল নম্বর 9876543210।"),
    ("bn-IN", "bn", "roman",  "roman_numbers", "Aajker tarikh 27/02/2026 ebong amar mobile number 9876543210."),

    # Gujarati
    ("gu-IN", "gu", "native", "general", "આજે હવામાન સારું છે અને અમે ખેતરમાં કામ કરી રહ્યા છીએ।"),
    ("gu-IN", "gu", "native", "numbers", "આજની તારીખ 27/02/2026 છે અને મારો મોબાઇલ નંબર 9876543210 છે।"),
    ("gu-IN", "gu", "roman",  "roman_numbers", "Aajni tarik 27/02/2026 chhe ane maro mobile number 9876543210 chhe."),

    # Tamil
    ("ta-IN", "ta", "native", "general", "இன்று வானிலை நல்லதாக உள்ளது, நாங்கள் வயலில் வேலை செய்கிறோம்."),
    ("ta-IN", "ta", "native", "numbers", "இன்றைய தேதி 27/02/2026 மற்றும் என் கைபேசி எண் 9876543210."),
    ("ta-IN", "ta", "roman",  "roman_numbers", "Inraiya thethi 27/02/2026; en mobile number 9876543210."),

    # Mixed / Hinglish (force Hindi voice for fairness)
    ("hi-IN", "mix", "mixed", "hinglish", "Kal mandi rate check karna hai, please update kar dena."),
]

In [10]:
SARVAM_KEY = "PLACEHOLDER"
ENDPOINT = "https://api.sarvam.ai/text-to-speech"

OUT_DIR = Path("sarvam_bulbul_outputs")
OUT_DIR.mkdir(exist_ok=True)

headers = {
    "api-subscription-key": SARVAM_KEY,
    "Content-Type": "application/json",
}

LANG_CODE_MAP = {
    "hi": "hi-IN",
    "pa": "pa-IN",
    "bn": "bn-IN",
    "gu": "gu-IN",
    "ta": "ta-IN",
    "mix": "hi-IN",
}

In [11]:
def sarvam_tts_to_wav(text: str, target_language_code: str, out_path: Path):
    payload = {
        "text": text,
        "target_language_code": target_language_code
    }

    t0 = time.time()
    r = requests.post(ENDPOINT, headers=headers, json=payload, timeout=90)
    latency = time.time() - t0

    if r.status_code != 200:
        return {"ok": False, "status": r.status_code, "latency": latency, "error": r.text[:500]}

    data = r.json()
    audios = data.get("audios", [])
    if not audios:
        return {"ok": False, "status": 200, "latency": latency, "error": "No audios returned", "request_id": data.get("request_id")}

    wav_bytes = base64.b64decode(audios[0])
    out_path.write_bytes(wav_bytes)

    return {
        "ok": True,
        "status": 200,
        "latency": latency,
        "bytes": len(wav_bytes),
        "request_id": data.get("request_id", "")
    }

In [12]:
MODEL_NAME = "sarvam_bulbul_default"

rows = []
for locale, lang, script, tid, text in TESTS:
    tlc = LANG_CODE_MAP.get(lang, "hi-IN")
    fn = f"{MODEL_NAME}_{lang}_{script}_{tid}.wav"
    out_path = OUT_DIR / fn

    res = sarvam_tts_to_wav(text=text, target_language_code=tlc, out_path=out_path)

    rows.append({
        "target_language_code": tlc,
        "input_locale": locale,
        "lang": lang,
        "script": script,
        "test_id": tid,
        "ok": res.get("ok", False),
        "status": res.get("status"),
        "latency_s": round(res.get("latency", -1), 3),
        "bytes": res.get("bytes", 0),
        "request_id": res.get("request_id", ""),
        "error": res.get("error", ""),
        "file": str(out_path),
        "text_preview": text[:80],
    })

    print(lang, script, tid, "->", "OK" if res.get("ok") else f"FAIL ({res.get('status')})")

df = pd.DataFrame(rows)
df.to_csv(OUT_DIR / "results.csv", index=False)
df

hi native general -> OK
hi native agri -> OK
hi native numbers -> OK
hi roman roman_numbers -> OK
pa native general -> OK
pa native numbers -> OK
pa roman roman_numbers -> OK
bn native general -> OK
bn native numbers -> OK
bn roman roman_numbers -> OK
gu native general -> OK
gu native numbers -> OK
gu roman roman_numbers -> OK
ta native general -> OK
ta native numbers -> OK
ta roman roman_numbers -> OK
mix mixed hinglish -> OK


Unnamed: 0,target_language_code,input_locale,lang,script,test_id,ok,status,latency_s,bytes,request_id,error,file,text_preview
0,hi-IN,hi-IN,hi,native,general,True,200,1.599,122924,20260228_d8808d83-12e5-48a2-8bcd-3bdbcc972764,,sarvam_bulbul_outputs/sarvam_bulbul_default_hi...,आज मौसम साफ है और हम खेत में काम कर रहे हैं।
1,hi-IN,hi-IN,hi,native,agri,True,200,2.034,164396,20260228_a69a0603-e53a-4257-890d-a82290d49a5b,,sarvam_bulbul_outputs/sarvam_bulbul_default_hi...,"गेहूं की फसल में पीले धब्बे दिख रहे हैं, क्या ..."
2,hi-IN,hi-IN,hi,native,numbers,True,200,1.76,345132,20260228_e33d9b94-0bbc-4917-b833-f7f941609fa0,,sarvam_bulbul_outputs/sarvam_bulbul_default_hi...,आज की तारीख 27/02/2026 है और मेरा मोबाइल नंबर ...
3,hi-IN,hi-IN,hi,roman,roman_numbers,True,200,1.937,359468,20260228_2ff1243c-9b0e-41e1-81e5-c239e457340d,,sarvam_bulbul_outputs/sarvam_bulbul_default_hi...,Aaj ki tareekh 27/02/2026 hai aur mera mobile ...
4,pa-IN,pa-IN,pa,native,general,True,200,1.571,141868,20260228_98e4d633-584a-4581-9b06-5c0e12be7c79,,sarvam_bulbul_outputs/sarvam_bulbul_default_pa...,ਅੱਜ ਮੌਸਮ ਚੰਗਾ ਹੈ ਅਤੇ ਅਸੀਂ ਖੇਤ ਵਿੱਚ ਕੰਮ ਕਰ ਰਹੇ ...
5,pa-IN,pa-IN,pa,native,numbers,True,200,1.705,350764,20260228_7f782172-b55f-4618-a7b5-98449f9bab34,,sarvam_bulbul_outputs/sarvam_bulbul_default_pa...,ਅੱਜ ਦੀ ਤਾਰੀਖ 27/02/2026 ਹੈ ਤੇ ਮੇਰਾ ਮੋਬਾਈਲ ਨੰਬਰ...
6,pa-IN,pa-IN,pa,roman,roman_numbers,True,200,1.92,343596,20260228_597ef27d-035e-4219-a6c2-cd4a44f545a9,,sarvam_bulbul_outputs/sarvam_bulbul_default_pa...,Ajj di tareekh 27/02/2026 ae te mera mobile nu...
7,bn-IN,bn-IN,bn,native,general,True,200,1.561,109612,20260228_4f420630-dbc2-40e1-8b66-f928d1ba88dc,,sarvam_bulbul_outputs/sarvam_bulbul_default_bn...,আজ আবহাওয়া ভালো এবং আমরা মাঠে কাজ করছি।
8,bn-IN,bn-IN,bn,native,numbers,True,200,1.934,336428,20260228_cb5fc8e8-8bcb-4737-8bf5-cb19d998873a,,sarvam_bulbul_outputs/sarvam_bulbul_default_bn...,আজকের তারিখ 27/02/2026 এবং আমার মোবাইল নম্বর 9...
9,bn-IN,bn-IN,bn,roman,roman_numbers,True,200,1.992,336428,20260228_9f4a3d14-6d59-403b-bc45-57480e72cf9d,,sarvam_bulbul_outputs/sarvam_bulbul_default_bn...,Aajker tarikh 27/02/2026 ebong amar mobile num...
