In [None]:
# 🔁 Transcription audio avec Azure Whisper + Synthèse vocale avec Bark
!pip install -q openai python-dotenv
!pip install -q git+https://github.com/suno-ai/bark.git
!pip install -q scipy torchaudio

In [None]:
import os
import openai
import pandas as pd
from glob import glob
from pathlib import Path
from dotenv import load_dotenv
from scipy.io.wavfile import write
from bark import SAMPLE_RATE, generate_audio

In [None]:
# 📁 Préparer les dossiers
audio_dir = Path("/content/audio_batch")
transcribed_dir = Path("/content/audio_synthetise")
audio_dir.mkdir(exist_ok=True)
transcribed_dir.mkdir(exist_ok=True)

In [None]:
# 🔐 .env API config (à remplir avec vos infos Azure)
with open("/content/.env", "w") as f:
    f.write("""AZURE_OPENAI_API_KEY=your_api_key_here
AZURE_OPENAI_ENDPOINT=https://instancehackatonpionners05.openai.azure.com
AZURE_OPENAI_DEPLOYMENT=gpt-4o-pionners34
AZURE_OPENAI_API_VERSION=2024-05-01-preview
""")
load_dotenv("/content/.env")

openai.api_key = os.getenv("AZURE_OPENAI_API_KEY")
openai.api_base = os.getenv("AZURE_OPENAI_ENDPOINT")
openai.api_type = "azure"
openai.api_version = os.getenv("AZURE_OPENAI_API_VERSION")
deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT")

In [None]:
results = []

# 🔁 Traitement de chaque fichier
for file_path in audio_dir.glob("*.[wm][ap][v3]"):
    file_name = file_path.name
    print(f"🎧 Traitement de : {file_name}")

    # 🎙️ Étape 1 : Transcription Whisper Azure
    with open(file_path, "rb") as audio_file:
        response = openai.Audio.transcribe(
            file=audio_file,
            model="whisper-1",
            deployment_id=deployment_name,
            response_format="text"
        )
        texte_transcrit = response.strip()

    # 🔊 Étape 2 : Synthèse vocale Bark (accent africain)
    prompt = "African French accent, male voice. " + texte_transcrit
    audio_out = generate_audio(prompt)
    out_path = transcribed_dir / f"{file_path.stem}_bark_revoice.wav"
    write(out_path, SAMPLE_RATE, audio_out)

    # Résumé
    results.append({
        "fichier_audio": file_name,
        "transcription": texte_transcrit,
        "audio_synthetise": out_path.name
    })

# 💾 Sauvegarde CSV
df = pd.DataFrame(results)
df.to_csv("/content/resume_transcription_tts.csv", index=False)
print("✅ Résumé exporté dans resume_transcription_tts.csv")