In [35]:
# Import required packages
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
import os
from pathlib import Path
import asyncio
import edge_tts
from pydub import AudioSegment
from pydub.playback import play
import io
import tempfile

# Create output directory if it doesn't exist
output_dir = Path('syllables')
output_dir.mkdir(exist_ok=True)

print("Required packages imported successfully!")


Required packages imported successfully!




In [37]:
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
import os
from pathlib import Path

# Create output directory if it doesn't exist
output_dir = Path('syllables')
output_dir.mkdir(exist_ok=True)


In [38]:
def create_syllable(frequency, duration=0.4, sample_rate=44100, amplitude=0.5):
    """
    Create a synthetic syllable with a given frequency and duration.
    
    Parameters:
    -----------
    frequency : float
        Frequency of the syllable in Hz
    duration : float
        Duration of the syllable in seconds
    sample_rate : int
        Sample rate in Hz
    amplitude : float
        Amplitude of the signal (0-1)
        
    Returns:
    --------
    numpy.ndarray
        Stereo audio signal
    """
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # Create base sine wave
    signal = amplitude * np.sin(2 * np.pi * frequency * t)
    
    # Apply envelope to avoid clicks
    envelope_duration = 0.05  # 50ms fade in/out
    envelope_samples = int(envelope_duration * sample_rate)
    
    # Create envelope
    envelope = np.ones_like(signal)
    envelope[:envelope_samples] = np.linspace(0, 1, envelope_samples)
    envelope[-envelope_samples:] = np.linspace(1, 0, envelope_samples)
    
    # Apply envelope
    signal = signal * envelope
    
    # Convert to stereo
    stereo_signal = np.vstack((signal, signal)).T
    
    return stereo_signal


In [39]:
def generate_trial_sequence(num_trials, num_syllables_per_trial=4):
    """
    Generate sequences of syllables for multiple trials.
    
    Parameters:
    -----------
    num_trials : int
        Number of trials to generate
    num_syllables_per_trial : int
        Number of syllables in each trial
        
    Returns:
    --------
    list
        List of trial sequences, where each sequence is a list of syllable indices
    """
    # Define base frequencies for syllables (using musical notes)
    frequencies = [440, 523, 659, 784, 880, 1047, 1319, 1568]  # A4 to G6
    num_syllables = len(frequencies)
    
    # Generate trial sequences
    trial_sequences = []
    for _ in range(num_trials):
        # Randomly select syllables for this trial
        sequence = np.random.choice(num_syllables, size=num_syllables_per_trial, replace=False)
        trial_sequences.append(sequence)
    
    return trial_sequences, frequencies


In [40]:
# Generate and save syllables
def generate_and_save_syllables(frequencies, sample_rate=44100):
    """
    Generate and save individual syllable files.
    """
    for i, freq in enumerate(frequencies):
        syllable = create_syllable(freq, sample_rate=sample_rate)
        wav.write(output_dir / f'syllable_{i+1}.wav', sample_rate, syllable)
        
    print(f"Generated {len(frequencies)} syllable files in {output_dir}")

# Generate trial sequences
num_trials = 20  # Number of trials to generate
trial_sequences, frequencies = generate_trial_sequence(num_trials)

# Generate and save syllables
generate_and_save_syllables(frequencies)

# Save trial sequences
np.save(output_dir / 'trial_sequences.npy', trial_sequences)
print(f"Saved trial sequences for {num_trials} trials")


Generated 8 syllable files in syllables
Saved trial sequences for 20 trials


In [41]:
# Optional: Test playing a syllable
def play_syllable(syllable_idx):
    """
    Play a specific syllable using sounddevice.
    """
    sample_rate = 44100
    filename = output_dir / f'syllable_{syllable_idx+1}.wav'
    if filename.exists():
        rate, data = wav.read(filename)
        sd.play(data, rate)
        sd.wait()  # Wait until the sound has finished playing
    else:
        print(f"Syllable file {filename} not found")

# Test playing the first syllable
# Uncomment the following line to test:
# play_syllable(0)


In [42]:
def create_formant_syllable(f1, f2, f3, duration=0.4, sample_rate=44100, amplitude=0.5):
    """
    Create a more realistic syllable using formant synthesis.
    
    Parameters:
    -----------
    f1, f2, f3 : float
        Formant frequencies in Hz
    duration : float
        Duration of the syllable in seconds
    sample_rate : int
        Sample rate in Hz
    amplitude : float
        Amplitude of the signal (0-1)
        
    Returns:
    --------
    numpy.ndarray
        Stereo audio signal
    """
    t = np.linspace(0, duration, int(sample_rate * duration))
    
    # Create formant-based signal
    # Use fundamental frequency around 150Hz for male voice
    f0 = 150
    
    # Create harmonics at formant frequencies
    signal = (amplitude * 0.5 * np.sin(2 * np.pi * f0 * t) +  # Fundamental
              amplitude * 0.3 * np.sin(2 * np.pi * f1 * t) +  # First formant
              amplitude * 0.2 * np.sin(2 * np.pi * f2 * t) +  # Second formant
              amplitude * 0.1 * np.sin(2 * np.pi * f3 * t))   # Third formant
    
    # Apply envelope to avoid clicks
    envelope_duration = 0.05  # 50ms fade in/out
    envelope_samples = int(envelope_duration * sample_rate)
    
    # Create envelope
    envelope = np.ones_like(signal)
    if len(signal) > 2 * envelope_samples:
        envelope[:envelope_samples] = np.linspace(0, 1, envelope_samples)
        envelope[-envelope_samples:] = np.linspace(1, 0, envelope_samples)
    
    # Apply envelope
    signal = signal * envelope
    
    # Convert to stereo
    stereo_signal = np.vstack((signal, signal)).T
    
    return stereo_signal


In [43]:
def get_chinese_syllables():
    """
    Define Chinese syllables with their formant frequencies.
    Based on Mandarin vowels and common syllables.
    
    Returns:
    --------
    dict: Dictionary with syllable names and their formant frequencies (F1, F2, F3)
    """
    # Formant frequencies for Chinese vowels/syllables (approximate values in Hz)
    chinese_syllables = {
        'ma': (700, 1200, 2500),   # /a/ sound
        'mi': (300, 2200, 3000),   # /i/ sound
        'mu': (300, 800, 2200),    # /u/ sound
        'me': (500, 1800, 2500),   # /e/ sound
        'mo': (500, 900, 2200),    # /o/ sound
        'ya': (350, 1600, 2500),   # /ja/ sound
        'yi': (250, 2300, 3100),   # /ji/ sound
        'yu': (250, 700, 2100),    # /ju/ sound
    }
    return chinese_syllables

def get_english_syllables():
    """
    Define English syllables with their formant frequencies.
    Based on English vowels and common syllables.
    
    Returns:
    --------
    dict: Dictionary with syllable names and their formant frequencies (F1, F2, F3)
    """
    # Formant frequencies for English vowels/syllables (approximate values in Hz)
    english_syllables = {
        'beat': (280, 2300, 3000),  # /i/ as in "beat"
        'bit': (400, 2000, 2550),   # /ɪ/ as in "bit"
        'bait': (550, 1800, 2500),  # /eɪ/ as in "bait"
        'bet': (600, 1900, 2500),   # /ɛ/ as in "bet"
        'bat': (750, 1750, 2450),   # /æ/ as in "bat"
        'bot': (700, 1100, 2450),   # /ɑ/ as in "bot"
        'boot': (300, 900, 2200),   # /u/ as in "boot"
        'book': (450, 1100, 2200),  # /ʊ/ as in "book"
    }
    return english_syllables


In [44]:
def generate_syllable_set(language='english', num_trials=20, num_syllables_per_trial=4):
    """
    Generate and save syllables for either Chinese or English.
    
    Parameters:
    -----------
    language : str
        'chinese' or 'english'
    num_trials : int
        Number of trials to generate
    num_syllables_per_trial : int
        Number of syllables per trial
    """
    # Get syllable definitions
    if language.lower() == 'chinese':
        syllables_dict = get_chinese_syllables()
        output_subdir = output_dir / 'chinese'
    else:
        syllables_dict = get_english_syllables()
        output_subdir = output_dir / 'english'
    
    # Create language-specific output directory
    output_subdir.mkdir(exist_ok=True)
    
    # Generate syllable audio files
    syllable_names = list(syllables_dict.keys())
    sample_rate = 44100
    
    for i, (name, formants) in enumerate(syllables_dict.items()):
        f1, f2, f3 = formants
        syllable_audio = create_formant_syllable(f1, f2, f3, sample_rate=sample_rate)
        wav.write(output_subdir / f'{name}_{i+1}.wav', sample_rate, syllable_audio)
    
    print(f"Generated {len(syllables_dict)} {language} syllable files in {output_subdir}")
    
    # Generate trial sequences
    num_syllables = len(syllables_dict)
    trial_sequences = []
    trial_syllable_names = []
    
    for trial in range(num_trials):
        # Randomly select syllables for this trial
        selected_indices = np.random.choice(num_syllables, size=num_syllables_per_trial, replace=False)
        trial_sequences.append(selected_indices)
        trial_syllable_names.append([syllable_names[i] for i in selected_indices])
    
    # Save trial information
    np.save(output_subdir / 'trial_sequences.npy', trial_sequences)
    np.save(output_subdir / 'trial_syllable_names.npy', trial_syllable_names)
    np.save(output_subdir / 'syllable_names.npy', syllable_names)
    
    print(f"Saved trial sequences for {num_trials} {language} trials")
    
    return syllable_names, trial_sequences, trial_syllable_names


In [45]:
# Generate Chinese syllables
print("Generating Chinese syllables...")
chinese_names, chinese_trials, chinese_trial_names = generate_syllable_set('chinese', num_trials=20)

print("\nChinese syllables available:")
for i, name in enumerate(chinese_names):
    print(f"{i+1}: {name}")

print(f"\nFirst trial sequence (Chinese): {chinese_trial_names[0]}")


Generating Chinese syllables...
Generated 8 chinese syllable files in syllables/chinese
Saved trial sequences for 20 chinese trials

Chinese syllables available:
1: ma
2: mi
3: mu
4: me
5: mo
6: ya
7: yi
8: yu

First trial sequence (Chinese): ['ma', 'mo', 'yu', 'mi']


In [46]:
# Generate English syllables
print("Generating English syllables...")
english_names, english_trials, english_trial_names = generate_syllable_set('english', num_trials=20)

print("\nEnglish syllables available:")
for i, name in enumerate(english_names):
    print(f"{i+1}: {name}")

print(f"\nFirst trial sequence (English): {english_trial_names[0]}")


Generating English syllables...
Generated 8 english syllable files in syllables/english
Saved trial sequences for 20 english trials

English syllables available:
1: beat
2: bit
3: bait
4: bet
5: bat
6: bot
7: boot
8: book

First trial sequence (English): ['book', 'beat', 'bat', 'bet']


In [47]:
# Test playing syllables
def play_language_syllable(language, syllable_name):
    """
    Play a specific syllable from a language set.
    """
    sample_rate = 44100
    if language.lower() == 'chinese':
        filename = output_dir / 'chinese' / f'{syllable_name}_{chinese_names.index(syllable_name)+1}.wav'
    else:
        filename = output_dir / 'english' / f'{syllable_name}_{english_names.index(syllable_name)+1}.wav'
    
    if filename.exists():
        rate, data = wav.read(filename)
        sd.play(data, rate)
        sd.wait()  # Wait until the sound has finished playing
        print(f"Played {language} syllable: {syllable_name}")
    else:
        print(f"Syllable file {filename} not found")

# Uncomment to test playing syllables:
# play_language_syllable('chinese', 'ma')
# play_language_syllable('english', 'beat')


In [48]:
# Install required packages for TTS (Text-to-Speech)
# Run this cell first to install dependencies
# !pip install gTTS pydub edge-tts


In [49]:
import asyncio
import edge_tts
from pydub import AudioSegment
from pydub.playback import play
import io
import tempfile

# Define syllables for each language
def get_chinese_syllables_real():
    """
    Define real Chinese syllables with their pinyin and meanings.
    """
    chinese_syllables = {
        'ma': '妈',    # mother (1st tone)
        'mi': '米',    # rice (3rd tone) 
        'mu': '木',    # wood (4th tone)
        'da': '大',    # big (4th tone)
        'di': '地',    # ground (4th tone)
        'du': '读',    # read (2nd tone)
        'ba': '爸',    # father (4th tone)
        'bi': '笔',    # pen (3rd tone)
    }
    return chinese_syllables

def get_english_syllables_real():
    """
    Define real English syllables.
    """
    english_syllables = {
        'cat': 'cat',
        'dog': 'dog', 
        'sun': 'sun',
        'moon': 'moon',
        'fish': 'fish',
        'bird': 'bird',
        'tree': 'tree',
        'book': 'book',
    }
    return english_syllables

print("Syllable definitions loaded!")


Syllable definitions loaded!


In [50]:
async def generate_tts_syllable(text, language='en', voice=None):
    """
    Generate TTS audio for a syllable using Edge TTS.
    
    Parameters:
    -----------
    text : str
        Text to convert to speech
    language : str
        Language code ('zh' for Chinese, 'en' for English)
    voice : str
        Voice to use (optional)
    
    Returns:
    --------
    bytes
        Audio data in MP3 format
    """
    # Select appropriate voice
    if language == 'zh':
        voice = voice or "zh-CN-XiaoxiaoNeural"  # Chinese female voice
    else:
        voice = voice or "en-US-AriaNeural"  # English female voice
    
    # Generate TTS
    communicate = edge_tts.Communicate(text, voice)
    audio_data = b""
    async for chunk in communicate.stream():
        if chunk["type"] == "audio":
            audio_data += chunk["data"]
    
    return audio_data

def convert_mp3_to_wav(mp3_data, output_path):
    """
    Convert MP3 audio data to WAV file.
    """
    # Load MP3 data
    audio = AudioSegment.from_mp3(io.BytesIO(mp3_data))
    
    # Convert to stereo 44.1kHz WAV
    audio = audio.set_frame_rate(44100).set_channels(2)
    
    # Export as WAV
    audio.export(output_path, format="wav")
    
    return audio

print("TTS functions defined!")


TTS functions defined!


In [51]:
async def generate_ai_syllable_set(language='chinese', num_trials=20, num_syllables_per_trial=4):
    """
    Generate real syllables using AI TTS for either Chinese or English.
    
    Parameters:
    -----------
    language : str
        'chinese' or 'english'
    num_trials : int
        Number of trials to generate
    num_syllables_per_trial : int
        Number of syllables per trial
    """
    # Get syllable definitions
    if language.lower() == 'chinese':
        syllables_dict = get_chinese_syllables_real()
        lang_code = 'zh'
        output_subdir = output_dir / 'chinese_ai'
    else:
        syllables_dict = get_english_syllables_real()
        lang_code = 'en'
        output_subdir = output_dir / 'english_ai'
    
    # Create language-specific output directory
    output_subdir.mkdir(exist_ok=True)
    
    # Generate syllable audio files using TTS
    syllable_names = list(syllables_dict.keys())
    
    print(f"Generating {len(syllables_dict)} {language} syllables using AI TTS...")
    
    for i, (pinyin, character) in enumerate(syllables_dict.items()):
        print(f"Generating {pinyin} ({character})...")
        
        # Generate TTS audio
        mp3_data = await generate_tts_syllable(character, lang_code)
        
        # Convert to WAV and save
        wav_path = output_subdir / f'{pinyin}_{i+1}.wav'
        convert_mp3_to_wav(mp3_data, wav_path)\n    \n    print(f\"Generated {len(syllables_dict)} {language} syllable files in {output_subdir}\")\n    \n    # Generate trial sequences\n    num_syllables = len(syllables_dict)\n    trial_sequences = []\n    trial_syllable_names = []\n    \n    for trial in range(num_trials):\n        # Randomly select syllables for this trial\n        selected_indices = np.random.choice(num_syllables, size=num_syllables_per_trial, replace=False)\n        trial_sequences.append(selected_indices)\n        trial_syllable_names.append([syllable_names[i] for i in selected_indices])\n    \n    # Save trial information\n    np.save(output_subdir / 'trial_sequences.npy', trial_sequences)\n    np.save(output_subdir / 'trial_syllable_names.npy', trial_syllable_names)\n    np.save(output_subdir / 'syllable_names.npy', syllable_names)\n    \n    print(f\"Saved trial sequences for {num_trials} {language} trials\")\n    \n    return syllable_names, trial_sequences, trial_syllable_names\n\nprint(\"AI syllable generation function ready!\")


SyntaxError: unexpected character after line continuation character (1750731643.py, line 40)

In [None]:
async def generate_ai_syllable_set_clean(language='chinese', num_trials=20, num_syllables_per_trial=4):
    """
    Generate real syllables using AI TTS for either Chinese or English.
    
    Parameters:
    -----------
    language : str
        'chinese' or 'english'
    num_trials : int
        Number of trials to generate
    num_syllables_per_trial : int
        Number of syllables per trial
    """
    # Get syllable definitions
    if language.lower() == 'chinese':
        syllables_dict = get_chinese_syllables_real()
        lang_code = 'zh'
        output_subdir = output_dir / 'chinese_ai'
    else:
        syllables_dict = get_english_syllables_real()
        lang_code = 'en'
        output_subdir = output_dir / 'english_ai'
    
    # Create language-specific output directory
    output_subdir.mkdir(exist_ok=True)
    
    # Generate syllable audio files using TTS
    syllable_names = list(syllables_dict.keys())
    
    print(f"Generating {len(syllables_dict)} {language} syllables using AI TTS...")
    
    for i, (pinyin, character) in enumerate(syllables_dict.items()):
        print(f"Generating {pinyin} ({character})...")
        
        # Generate TTS audio
        mp3_data = await generate_tts_syllable(character, lang_code)
        
        # Convert to WAV and save
        wav_path = output_subdir / f'{pinyin}_{i+1}.wav'
        convert_mp3_to_wav(mp3_data, wav_path)
    
    print(f"Generated {len(syllables_dict)} {language} syllable files in {output_subdir}")
    
    # Generate trial sequences
    num_syllables = len(syllables_dict)
    trial_sequences = []
    trial_syllable_names = []
    
    for trial in range(num_trials):
        # Randomly select syllables for this trial
        selected_indices = np.random.choice(num_syllables, size=num_syllables_per_trial, replace=False)
        trial_sequences.append(selected_indices)
        trial_syllable_names.append([syllable_names[i] for i in selected_indices])
    
    # Save trial information
    np.save(output_subdir / 'trial_sequences.npy', trial_sequences)
    np.save(output_subdir / 'trial_syllable_names.npy', trial_syllable_names)
    np.save(output_subdir / 'syllable_names.npy', syllable_names)
    
    print(f"Saved trial sequences for {num_trials} {language} trials")
    
    return syllable_names, trial_sequences, trial_syllable_names

print("Clean AI syllable generation function ready!")


In [None]:
# Choose language and generate syllables
LANGUAGE = 'chinese'  # Change to 'english' for English syllables

print(f"=== Generating {LANGUAGE.upper()} syllables using AI TTS ===")

# Show available syllables
if LANGUAGE.lower() == 'chinese':
    syllables_dict = get_chinese_syllables_real()
    print("\nChinese syllables to generate:")
    for pinyin, character in syllables_dict.items():
        print(f"  {pinyin} -> {character}")
else:
    syllables_dict = get_english_syllables_real()
    print("\nEnglish syllables to generate:")
    for word in syllables_dict.keys():
        print(f"  {word}")

print(f"\nReady to generate {len(syllables_dict)} {LANGUAGE} syllables!")


In [None]:
# Generate the syllables using AI TTS
# This cell will actually create the audio files

try:
    # Run the async function
    syllable_names, trial_sequences, trial_syllable_names = await generate_ai_syllable_set_clean(
        language=LANGUAGE, 
        num_trials=20, 
        num_syllables_per_trial=4
    )
    
    print(f"\n=== SUCCESS! ===")
    print(f"Generated {len(syllable_names)} {LANGUAGE} syllables:")
    for i, name in enumerate(syllable_names):
        print(f"  {i+1}: {name}")
    
    print(f"\nFirst trial sequence: {trial_syllable_names[0]}")
    print(f"Files saved in: syllables/{LANGUAGE}_ai/")
    
except Exception as e:
    print(f"Error: {e}")
    print("Make sure you have installed the required packages:")
    print("pip install edge-tts pydub")


In [None]:
# Test playing the generated AI syllables
def play_ai_syllable(language, syllable_name):
    """
    Play a specific AI-generated syllable.
    """
    if language.lower() == 'chinese':
        output_subdir = output_dir / 'chinese_ai'
        syllable_names = list(get_chinese_syllables_real().keys())
    else:
        output_subdir = output_dir / 'english_ai'
        syllable_names = list(get_english_syllables_real().keys())
    
    try:
        syllable_idx = syllable_names.index(syllable_name)
        filename = output_subdir / f'{syllable_name}_{syllable_idx+1}.wav'
        
        if filename.exists():
            rate, data = wav.read(filename)
            sd.play(data, rate)
            sd.wait()
            print(f"Played {language} syllable: {syllable_name}")
        else:
            print(f"Syllable file {filename} not found")
    except ValueError:
        print(f"Syllable '{syllable_name}' not found in {language} syllables")

# Uncomment to test playing AI-generated syllables:
# play_ai_syllable('chinese', 'ma')  # Play Chinese "妈" (mother)
# play_ai_syllable('english', 'cat')  # Play English "cat"

print("AI syllable playback function ready!")
