In [4]:
# ✅ Install Required Packages
!pip install sounddevice scipy google-generativeai gtts requests

# ✅ Imports
import sounddevice as sd
from scipy.io.wavfile import write
import google.generativeai as genai
import requests
from gtts import gTTS
import os

# ✅ API Keys (replace with real ones)
HF_API_KEY = "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"  # ✅ Replace with valid Hugging Face key
GOOGLE_API_KEY = "AIzaSyCRC0A9bvt_4_oeNBEuWCUumfjk37U3lgA"  # ✅ Replace with valid Gemini API key

# ✅ Configure Gemini
genai.configure(api_key=GOOGLE_API_KEY)

# ✅ Step 1: Record audio
def record_audio(filename="input.wav", duration=5, fs=16000):
    print("🎤 Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    write(filename, fs, recording)
    print(f"✅ Audio saved to {filename}")
    return filename

# ✅ Step 2: Transcribe with Hugging Face Whisper
def transcribe_with_whisper(audio_path="input.wav"):
    url = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
    headers = {
        "Authorization": f"Bearer {HF_API_KEY}",
        "Content-Type": "audio/wav"
    }

    with open(audio_path, "rb") as f:
        audio_data = f.read()

    print("📝 Transcribing audio with Whisper...")
    response = requests.post(url, headers=headers, data=audio_data)

    if response.status_code == 200:
        result = response.json()
        print("✅ Transcription:", result["text"])
        return result["text"]
    else:
        print(f"❌ Whisper API Error {response.status_code}: {response.text}")
        return None

# ✅ Step 3: Get AI response from Gemini 1.5 Flash (correct model name)
def get_google_ai_response(prompt):
    try:
        model = genai.GenerativeModel("models/gemini-1.5-flash-latest")  # ✅ Correct model name
        response = model.generate_content(prompt)
        print("🤖 Gemini Response:", response.text)
        return response.text
    except Exception as e:
        print("❌ Gemini Error:", str(e))
        return None

# ✅ Step 4: Convert AI response to speech
def speak_text(text, lang="en"):
    tts = gTTS(text=text, lang=lang)
    tts.save("response.mp3")
    if os.name == 'nt':  # Windows
        os.system("start response.mp3")
    elif os.name == 'posix':  # Mac or Linux
        os.system("afplay response.mp3")  # Mac
        # os.system("mpg123 response.mp3")  # Linux if mpg123 is installed

# ✅ MAIN: Run the pipeline
if __name__ == "__main__":

    audio_file = record_audio()
    transcribed_text = transcribe_with_whisper(audio_file)

    if transcribed_text:
        ai_reply = get_google_ai_response(transcribed_text)
        if ai_reply:
            speak_text(ai_reply)
    else:
        print("❌ Could not transcribe the audio.")


🎤 Recording...



[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


✅ Audio saved to input.wav
📝 Transcribing audio with Whisper...
✅ Transcription:  Hi, my name is Vishal.
🤖 Gemini Response: Hi Vishal, it's nice to meet you!  How can I help you today?

