In [None]:
# Gemini conversation: https://gemini.google.com/app/83774c879215fb34?utm_source=app_launcher&utm_medium=owned&utm_campaign=base_all

In [None]:
#!pip install python-dotenv
#!pip -q install google-genai jinja2
#!pip -q install sounddevice
#!pip -q install numpy

In [1]:
from google import genai
from google.genai import types



In [2]:
from dotenv import load_dotenv
import os

# Load your environment file
load_dotenv('my_keys.env')

# Check if the variable exists (but don't print the secret)
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

api_key = GOOGLE_API_KEY

# Safe check
if api_key:
    print("API key loaded successfully!")
else:
    print("API key not found. Check your .env file and path.")

API key loaded successfully!


## Hard coded user input

In [6]:
# The simplest way to playback the audio in Colab, is to write it out to a .wav file. So here is a simple wave file writer:

import contextlib

@contextlib.contextmanager
def wave_file(filename, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        yield wf

In [7]:
import asyncio
import wave
from google import genai
from IPython.display import display, Audio


client = genai.Client(api_key=GOOGLE_API_KEY)

MODEL = "gemini-2.0-flash-live-001"

config={
    "response_modalities": ["AUDIO"]
}

async def async_enumerate(aiterable):
  n=0
  async for item in aiterable:
    yield n, item
    n+=1


async with client.aio.live.connect(model=MODEL, config=config) as session:
  file_name = 'audio.wav'
  with wave_file(file_name) as wav:
    message = "Hello? Gemini are you there? How are you?"
    print("> ", message, "\n")
    await session.send_client_content(
        turns={"role": "user", "parts": [{"text": message}]}, turn_complete=True
    )

    turn = session.receive()
    async for n,response in async_enumerate(turn):
      if response.data is not None:
        wav.writeframes(response.data)

        if n==0:
          print(response.server_content.model_turn.parts[0].inline_data.mime_type)
        print('.', end='')


display(Audio(file_name, autoplay=True))


>  Hello? Gemini are you there? How are you? 

audio/pcm;rate=24000
....................

## Using python input

In [8]:
# The simplest way to playback the audio in Colab, is to write it out to a .wav file. So here is a simple wave file writer:

import contextlib

@contextlib.contextmanager
def wave_file(filename, channels=1, rate=24000, sample_width=2):
    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        yield wf

In [10]:
async with client.aio.live.connect(model=MODEL, config=config) as session:
    # file_name is defined here, as it will be used for each turn
    file_name = 'audio.wav'

    while True:
        message = input("User> ")
        if message.lower() == "q":
            break

        await session.send_client_content(
            turns={"role": "user", "parts": [{"text": message}]}, turn_complete=True
        )

        # Move the wave_file context manager inside the loop.
        # This ensures that for each new user message and model response,
        # 'audio.wav' is opened anew (which typically truncates existing content)
        # to store only the audio for the current response.
        with wave_file(file_name) as wav:
            turn = session.receive()
            async for n, response in async_enumerate(turn):
                if response.data is not None:
                    wav.writeframes(response.data)

                    if n == 0:
                        print(response.server_content.model_turn.parts[0].inline_data.mime_type)
                    print('.', end='')
            # After this loop, 'audio.wav' contains only the audio from the most recent model turn.

        # Now, play only the audio from the current turn.
        display(Audio(file_name, autoplay=True))

User>  hi


audio/pcm;rate=24000
..........

User>  How are you?


audio/pcm;rate=24000
...............

User>  Im fine thanks


audio/pcm;rate=24000
.....................

User>  q
