In [23]:
import os, time, io, wave, zipfile
from pathlib import Path
import boto3
import pandas as pd

In [24]:
#Secret keys
os.environ["AWS_ACCESS_KEY_ID"] = "PLACEHOLDER"
os.environ["AWS_SECRET_ACCESS_KEY"] = "PLACEHOLDER"
os.environ["AWS_DEFAULT_REGION"] = "ap-south-1"

polly = boto3.client("polly", region_name=os.environ["AWS_DEFAULT_REGION"])
print("Polly client ready in region:", os.environ["AWS_DEFAULT_REGION"])

Polly client ready in region: ap-south-1


In [8]:
resp = polly.describe_voices()

codes = sorted({v["LanguageCode"] for v in resp["Voices"]})
in_codes = [c for c in codes if c.endswith("-IN")]

print("IN language codes:", in_codes)

IN language codes: ['en-IN']


There is only single Indian Language code in Amazon Polly

In [14]:
resp = polly.describe_voices()
en_in = [v for v in resp["Voices"] if v["LanguageCode"] == "en-IN"]
[(v["Id"], v["Name"]) for v in en_in]

[('Raveena', 'Raveena'), ('Aditi', 'Aditi'), ('Kajal', 'Kajal')]

In [16]:
VOICE_ID = en_in[0]["Id"]
print("Using VOICE_ID:", VOICE_ID)

Using VOICE_ID: Raveena


In [20]:
TESTS = [
    # Hindi
    ("hi-IN", "hi", "native", "general", "आज मौसम साफ है और हम खेत में काम कर रहे हैं।"),
    ("hi-IN", "hi", "native", "agri", "गेहूं की फसल में पीले धब्बे दिख रहे हैं, क्या दवा डालनी चाहिए?"),
    ("hi-IN", "hi", "native", "numbers", "आज की तारीख 27/02/2026 है और मेरा मोबाइल नंबर 9876543210 है।"),
    ("hi-IN", "hi", "roman",  "roman_numbers", "Aaj ki tareekh 27/02/2026 hai aur mera mobile number 9876543210 hai."),

    # Punjabi
    ("pa-IN", "pa", "native", "general", "ਅੱਜ ਮੌਸਮ ਚੰਗਾ ਹੈ ਅਤੇ ਅਸੀਂ ਖੇਤ ਵਿੱਚ ਕੰਮ ਕਰ ਰਹੇ ਹਾਂ।"),
    ("pa-IN", "pa", "native", "numbers", "ਅੱਜ ਦੀ ਤਾਰੀਖ 27/02/2026 ਹੈ ਤੇ ਮੇਰਾ ਮੋਬਾਈਲ ਨੰਬਰ 9876543210 ਹੈ।"),
    ("pa-IN", "pa", "roman",  "roman_numbers", "Ajj di tareekh 27/02/2026 ae te mera mobile number 9876543210 ae."),

    # Bengali
    ("bn-IN", "bn", "native", "general", "আজ আবহাওয়া ভালো এবং আমরা মাঠে কাজ করছি।"),
    ("bn-IN", "bn", "native", "numbers", "আজকের তারিখ 27/02/2026 এবং আমার মোবাইল নম্বর 9876543210।"),
    ("bn-IN", "bn", "roman",  "roman_numbers", "Aajker tarikh 27/02/2026 ebong amar mobile number 9876543210."),

    # Gujarati
    ("gu-IN", "gu", "native", "general", "આજે હવામાન સારું છે અને અમે ખેતરમાં કામ કરી રહ્યા છીએ।"),
    ("gu-IN", "gu", "native", "numbers", "આજની તારીખ 27/02/2026 છે અને મારો મોબાઇલ નંબર 9876543210 છે।"),
    ("gu-IN", "gu", "roman",  "roman_numbers", "Aajni tarik 27/02/2026 chhe ane maro mobile number 9876543210 chhe."),

    # Tamil
    ("ta-IN", "ta", "native", "general", "இன்று வானிலை நல்லதாக உள்ளது, நாங்கள் வயலில் வேலை செய்கிறோம்."),
    ("ta-IN", "ta", "native", "numbers", "இன்றைய தேதி 27/02/2026 மற்றும் என் கைபேசி எண் 9876543210."),
    ("ta-IN", "ta", "roman",  "roman_numbers", "Inraiya thethi 27/02/2026; en mobile number 9876543210."),

    # Mixed / Hinglish (force Hindi voice for fairness)
    ("hi-IN", "mix", "mixed", "hinglish", "Kal mandi rate check karna hai, please update kar dena."),
]

In [25]:
OUT_DIR = Path("polly_outputs")
OUT_DIR.mkdir(exist_ok=True)

SAMPLE_RATE = 16000
CHANNELS = 1
SAMPLE_WIDTH = 2

def polly_synthesize_to_wav(text: str, wav_path: Path, voice_id: str = VOICE_ID):
    t0 = time.time()
    resp = polly.synthesize_speech(
        Text=text,
        VoiceId=voice_id,
        OutputFormat="pcm",
        SampleRate=str(SAMPLE_RATE),
        Engine="standard"
    )
    pcm = resp["AudioStream"].read()
    latency = time.time() - t0

    with wave.open(str(wav_path), "wb") as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(SAMPLE_WIDTH)
        wf.setframerate(SAMPLE_RATE)
        wf.writeframes(pcm)

    return latency, len(pcm)

In [26]:
MODEL_NAME = "amazon_polly_enIN"
rows = []

for locale, lang, script, tid, text in TESTS:
    filename = f"{MODEL_NAME}_{lang}_{script}_{tid}.wav"
    wav_path = OUT_DIR / filename
    latency, nbytes = polly_synthesize_to_wav(text, wav_path, VOICE_ID)

    rows.append({
        "locale_input": locale,
        "lang_tag": lang,
        "script": script,
        "test_id": tid,
        "voice_used": VOICE_ID,
        "engine": "standard",
        "sample_rate": SAMPLE_RATE,
        "latency_s": round(latency, 3),
        "audio_bytes": nbytes,
        "file": str(wav_path)
    })

    print(lang, script, tid, "->", filename, "| latency:", round(latency, 3), "s")

df = pd.DataFrame(rows)
df

hi native general -> amazon_polly_enIN_hi_native_general.wav | latency: 0.83 s
hi native agri -> amazon_polly_enIN_hi_native_agri.wav | latency: 0.28 s
hi native numbers -> amazon_polly_enIN_hi_native_numbers.wav | latency: 0.298 s
hi roman roman_numbers -> amazon_polly_enIN_hi_roman_roman_numbers.wav | latency: 0.186 s
pa native general -> amazon_polly_enIN_pa_native_general.wav | latency: 0.138 s
pa native numbers -> amazon_polly_enIN_pa_native_numbers.wav | latency: 0.179 s
pa roman roman_numbers -> amazon_polly_enIN_pa_roman_roman_numbers.wav | latency: 0.183 s
bn native general -> amazon_polly_enIN_bn_native_general.wav | latency: 0.14 s
bn native numbers -> amazon_polly_enIN_bn_native_numbers.wav | latency: 0.177 s
bn roman roman_numbers -> amazon_polly_enIN_bn_roman_roman_numbers.wav | latency: 0.182 s
gu native general -> amazon_polly_enIN_gu_native_general.wav | latency: 0.138 s
gu native numbers -> amazon_polly_enIN_gu_native_numbers.wav | latency: 0.182 s
gu roman roman_numb

Unnamed: 0,locale_input,lang_tag,script,test_id,voice_used,engine,sample_rate,latency_s,audio_bytes,file
0,hi-IN,hi,native,general,Raveena,standard,16000,0.83,97430,polly_outputs/amazon_polly_enIN_hi_native_gene...
1,hi-IN,hi,native,agri,Raveena,standard,16000,0.28,135908,polly_outputs/amazon_polly_enIN_hi_native_agri...
2,hi-IN,hi,native,numbers,Raveena,standard,16000,0.298,310424,polly_outputs/amazon_polly_enIN_hi_native_numb...
3,hi-IN,hi,roman,roman_numbers,Raveena,standard,16000,0.186,289394,polly_outputs/amazon_polly_enIN_hi_roman_roman...
4,pa-IN,pa,native,general,Raveena,standard,16000,0.138,1024,polly_outputs/amazon_polly_enIN_pa_native_gene...
5,pa-IN,pa,native,numbers,Raveena,standard,16000,0.179,244630,polly_outputs/amazon_polly_enIN_pa_native_numb...
6,pa-IN,pa,roman,roman_numbers,Raveena,standard,16000,0.183,293134,polly_outputs/amazon_polly_enIN_pa_roman_roman...
7,bn-IN,bn,native,general,Raveena,standard,16000,0.14,1024,polly_outputs/amazon_polly_enIN_bn_native_gene...
8,bn-IN,bn,native,numbers,Raveena,standard,16000,0.177,216208,polly_outputs/amazon_polly_enIN_bn_native_numb...
9,bn-IN,bn,roman,roman_numbers,Raveena,standard,16000,0.182,294144,polly_outputs/amazon_polly_enIN_bn_roman_roman...
