In [30]:
import os
import random
import asyncio
import edge_tts
from pathlib import Path

# Create output directory if it doesn't exist
output_dir = Path('chinese')
output_dir.mkdir(exist_ok=True)

# List of Chinese syllables/words
syllables = [
    '收音机', '山脉','我不知道'
]

# Select a high-quality Chinese voice
VOICE = "zh-CN-XiaoxiaoNeural"  # Female voice
# Alternative voices:
# VOICE = "zh-CN-YunxiNeural"   # Male voice
# VOICE = "zh-CN-XiaoyiNeural"  # Female voice 2


In [31]:
async def generate_syllable_audio(n, output_dir='/Users/yufang/WM_load/syllables/chinese', voice=VOICE, split_syllables=False):
    """
    Generate n random Chinese syllables and convert them to audio files using Edge TTS.
    
    Args:
        n (int): Number of syllables to generate
        output_dir (str): Directory to save the audio files
        voice (str): Edge TTS voice to use
        split_syllables (bool): Whether to split multi-character words into individual syllables
    """
    # Make sure the output directory exists
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)
    
    # Generate n random syllables
    selected_syllables = random.sample(syllables, n)
    
    # Convert each syllable to audio
    for i, word in enumerate(selected_syllables, 1):
        if split_syllables:
            # Split the word into individual characters/syllables
            individual_syllables = list(word)
            
            # Create a directory for this word's syllables
            word_dir = output_dir / f"{i:03d}_{word}"
            word_dir.mkdir(exist_ok=True)
            
            # Generate audio for each syllable
            for j, syllable in enumerate(individual_syllables, 1):
                filename = f"{i:03d}_{j:02d}_{syllable}.mp3"
                filepath = word_dir / filename
                
                try:
                    # Configure TTS with natural speaking rate and volume
                    communicate = edge_tts.Communicate(syllable, voice, rate="+0%", volume="+0%")
                    
                    # Generate and save audio
                    await communicate.save(str(filepath))
                    
                    print(f"Generated audio for syllable {j} of word {i}/{n}: {syllable}")
                    
                except Exception as e:
                    print(f"Error generating audio for syllable {syllable}: {str(e)}")
                    continue
        else:
            # Original behavior for whole words
            filename = f"{i:03d}_{word}.mp3"
            filepath = output_dir / filename
            
            try:
                communicate = edge_tts.Communicate(word, voice, rate="+0%", volume="+0%")
                await communicate.save(str(filepath))
                print(f"Generated audio for word {i}/{n}: {word}")
                
            except Exception as e:
                print(f"Error generating audio for {word}: {str(e)}")
                continue
    
    print(f"\nAll audio files have been generated in the '{output_dir}' directory.")


# Run the async function to generate audio files
async def main():
    # Set split_syllables=True to generate individual syllable audio files
    await generate_syllable_audio(3, split_syllables=True)

# Run the async main function
await main()


Generated audio for syllable 1 of word 1/3: 山
Generated audio for syllable 2 of word 1/3: 脉
Generated audio for syllable 1 of word 2/3: 我
Generated audio for syllable 2 of word 2/3: 不
Generated audio for syllable 3 of word 2/3: 知
Generated audio for syllable 4 of word 2/3: 道
Generated audio for syllable 1 of word 3/3: 收
Generated audio for syllable 2 of word 3/3: 音
Generated audio for syllable 3 of word 3/3: 机

All audio files have been generated in the '/Users/yufang/WM_load/syllables/chinese' directory.
