In [None]:
# Add parent directory to Python path for importing local modules
import sys
sys.path.append("..")

# Standard library imports
from glob import glob
import pickle

# Scientific computing and visualization
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy.io.wavfile import write

# Interactive notebook widgets
import ipywidgets as ipw
from IPython.display import Audio

# Custom modules for speech synthesis and visualization
from imitative_agent import ImitativeAgent  # Main agent for speech imitation
from lib.dataset_wrapper import Dataset     # Wrapper for speech datasets
from lib.notebooks import show_ema         # EMA visualization utilities
from external import lpcynet               # LPCNet vocoder for speech synthesis


current path: /mnt/c/Users/vpaul/OneDrive - CentraleSupelec/Inner_Speech/agent/imitative_agent_inner_speech_silent_finetune


In [3]:
"""
Script to test LPCNet features extraction and resynthesis - important to check before using agent
"""

# Load and validate input audio file
# LPCNet requires 16kHz 16-bit PCM audio
sampling_rate, pcm = wavfile.read("../external/lpcynet/item_0000.wav")
assert sampling_rate == 16000 and pcm.dtype == "int16", "Audio must be 16kHz 16-bit PCM"

# Extract LPCNet features from audio
# Returns: float32 array of shape (num_frames, 20)
#   - First 18 dimensions: Cepstral coefficients
#   - Dimension 19: Period parameter
#   - Dimension 20: Correlation parameter
print(f"Analyzing {len(pcm)/sampling_rate:.2f} seconds of audio...")
lpcnet_features = lpcynet.analyze_frames(pcm)
print(f"Extracted {lpcnet_features.shape[0]} frames of features")

# Resynthesize audio from features using LPCNet vocoder
print("Synthesizing audio from features...")
resynthesized_pcm = lpcynet.synthesize_frames(lpcnet_features)

# Save resynthesized audio
output_file = "resynth3.wav"
wavfile.write(output_file, sampling_rate, resynthesized_pcm)
print(f"Saved resynthesized audio to {output_file}")

In [2]:

agents_path = glob("../out/imitative_agent1_0jerk/*/")
agents_path.sort()


In [24]:
"""
Script to repeat items and save them (repetitions and estimations).
Allows selection of agent and automatically aligns output directories with agent name.
"""

def process_item(item_name, item_cepstrum, item_source, agent, sampling_rate, output_dir):
    """Process a single item through the agent and save outputs."""
    # Generate repetitions using the imitative agent
    repetition = agent.repeat(item_cepstrum)
    repeated_cepstrum = repetition["sound_repeated"]
    estimated_cepstrum = repetition["sound_estimated"] 
    estimated_art = repetition["art_estimated"]

    # Combine cepstral coefficients with source features
    repeated_sound = np.concatenate((repeated_cepstrum, item_source), axis=1)
    estimated_sound = np.concatenate((estimated_cepstrum, item_source), axis=1)

    # Synthesize waveforms using LPCNet
    repeated_wave = lpcynet.synthesize_frames(repeated_sound)
    estimated_wave = lpcynet.synthesize_frames(estimated_sound)

    print(f"Processing item: {item_name} ({len(item_cepstrum)} frames)")

    # Save synthesized audio files
    repeated_path = os.path.join(output_dir, "repeated", item_name + ".wav")
    estimated_path = os.path.join(output_dir, "estimated", item_name + ".wav")
    
    os.makedirs(os.path.dirname(repeated_path), exist_ok=True)
    os.makedirs(os.path.dirname(estimated_path), exist_ok=True)
    
    write(repeated_path, sampling_rate, repeated_wave)
    write(estimated_path, sampling_rate, estimated_wave)
    
    print(f"Saved repeated and estimated waveforms for {item_name}")

def repeat_dataset(agent_path, dataset_name="pb2007"):
    """Process entire dataset using specified agent."""
    # Load agent
    agent = ImitativeAgent.reload(agent_path)
    
    # Extract agent identifier from path for output directory
    agent_id = os.path.basename(os.path.dirname(agent_path))
    output_base_dir = f"../datasets/imitative_agent_{agent_id}/{dataset_name}"
    
    # Initialize dataset
    dataset = Dataset(dataset_name)
    items_cepstrum = dataset.get_items_data("cepstrum", cut_silences=False)
    items_source = dataset.get_items_data("source", cut_silences=False)
    sampling_rate = dataset.features_config["wav_sampling_rate"]
    items_name = dataset.get_items_list()
    
    # Process each item
    for item_name in items_name:
        item_name = item_name[1]  # Extract actual item name
        process_item(
            item_name=item_name,
            item_cepstrum=items_cepstrum[item_name],
            item_source=items_source[item_name],
            agent=agent,
            sampling_rate=sampling_rate,
            output_dir=output_base_dir
        )

# Example usage:
# repeat_dataset(agents_path[0])  # Process using first available agent

{'dataset': {'batch_size': 8, 'datasplits_size': [64, 16, 20], 'names': ['pb2007'], 'num_workers': 6, 'shuffle_between_epochs': True, 'sound_type': 'cepstrum'}, 'model': {'direct_model': {'activation': 'relu', 'batch_norm': True, 'dropout_p': 0.25, 'hidden_layers': [256, 256, 256, 256]}, 'inverse_model': {'bidirectional': True, 'dropout_p': 0.25, 'hidden_size': 32, 'num_layers': 2}}, 'synthesizer': {'name': 'ea587b76c95fecef01cfd16c7f5f289d-0/'}, 'training': {'jerk_loss_ceil': 0, 'jerk_loss_weight': 0, 'learning_rate': 0.001, 'max_epochs': 500, 'patience': 25}}
{'direct_model': {'activation': 'relu', 'batch_norm': True, 'dropout_p': 0.25, 'hidden_layers': [256, 256, 256, 256]}, 'inverse_model': {'bidirectional': True, 'dropout_p': 0.25, 'hidden_size': 32, 'num_layers': 2}}
{'direct_model': {'activation': 'relu', 'batch_norm': True, 'dropout_p': 0.25, 'hidden_layers': [256, 256, 256, 256]}, 'inverse_model': {'bidirectional': True, 'dropout_p': 0.25, 'hidden_size': 32, 'num_layers': 2}}
