i tried but im getting errors also i cant find any example for googles tts modals. in cookbook or in agno docs.
import base64
import requests
from agno.agent import Agent
from agno.media import Audio
from agno.models.google import Gemini
from agno.utils.audio import write_audio_to_file
from dotenv import load_dotenv
load_dotenv()
# Fetch the audio file and convert it to a base64 encoded string
url = "https://openaiassets.blob.core.windows.net/$web/API/docs/audio/alloy.wav"
response = requests.get(url)
response.raise_for_status()
wav_data = response.content
agent = Agent(
model=Gemini(
id="gemini-2.5-flash-preview-tts",
speech_config="Kore"
),
markdown=True,
)
agent.run("What's in these recording?", audio=[Audio(content=wav_data, format="wav")])
if agent.run_response.response_audio is not None:
write_audio_to_file(
audio=agent.run_response.response_audio.content, filename="tmp/result.wav"
)
i also tried this “response_modalities=[“text”, “audio”],“ parameter but for gemini it only has text and image i think. and “speech_config={“voice”: “Kore”, “format”: “wav”},“ “audio={“voice”: “Kore”, “format”: “wav”},“ this parameters also wont work.