In [1]:
import sys
sys.path.append("..")

from glob import glob
import matplotlib.pyplot as plt
import ipywidgets as ipw
import numpy as np 
from tqdm.notebook import tqdm
import pandas as pd

from imitative_agent import ImitativeAgent
from lib.dataset_wrapper import Dataset
from lib import utils
from lib import abx_utils
from lib import notebooks
import os

current path: /mnt/c/Users/vpaul/OneDrive - CentraleSupelec/Inner_Speech/agent/temp_repo/imitative_agent


In [2]:
# Get paths to all imitative agent directories and sort them
agents_path = glob("../out/imitative_agent/*/")
agents_path.sort()

# Dictionary to store agent aliases mapped to their paths
agents_alias = {}
# Dictionary to store groups of agents with same parameters
agents_group = {}

for agent_path in agents_path:
    # Print current working directory and agent path for debugging
    print(os.getcwd())
    print(agent_path)
    
    # Load agent configuration without neural networks for efficiency
    agent = ImitativeAgent.reload(agent_path, load_nn=False)
    config = agent.config
    
    # Skip agents with different jerk loss ceiling (commented out)
    #if config["training"]["jerk_loss_ceil"] != 0.014: continue
        
    # Get agent identifier from path
    agent_i = agent_path[-2]
    
    # Create descriptive alias string containing key agent parameters
    agent_alias = " ".join((
        f"{','.join(config['dataset']['names'])}",  # Dataset names
        f"synth_art={agent.synthesizer.config['dataset']['art_type']}", # Articulatory features type
        f"jerk_c={config['training']['jerk_loss_ceil']}", # Jerk loss ceiling
        f"jerk_w={config['training']['jerk_loss_weight']}", # Jerk loss weight
        f"bi={config['model']['inverse_model']['bidirectional']}", # Whether inverse model is bidirectional
        f"({agent_i})", # Agent identifier
    ))
    # Store mapping between alias and path
    agents_alias[agent_alias] = agent_path
    
    # Create group string with same parameters but without agent identifier
    agent_group = " ".join((
        f"{','.join(config['dataset']['names'])}",
        f"synth_art={agent.synthesizer.config['dataset']['art_type']}",
        f"jerk_c={config['training']['jerk_loss_ceil']}",
        f"jerk_w={config['training']['jerk_loss_weight']}",
        f"bi={config['model']['inverse_model']['bidirectional']}",
    ))
    # Initialize empty list for new group
    if agent_group not in agents_group:
        agents_group[agent_group] = []
    # Add agent path to its parameter group    
    agents_group[agent_group].append(agent_path)

In [3]:
TONGUE_CONSONANTS = ["p", "b", "t", "d", "k", "g"]
DETECTION_METHODS = {
    "p": "lips",
    "b": "lips",
    "t": "tongue_tip",
    "d": "tongue_tip",
    "k": "tongue_mid",
    "g": "tongue_mid",
}

In [4]:
# Initialize dictionaries to store EMA data and occlusion information
agents_ema = {}  # Maps agent paths to their EMA data
datasets_occlusions = {}  # Maps dataset names to their occlusion data

# Process each agent's data
for agent_alias, agent_path in tqdm(agents_alias.items()):
    # Initialize dictionary for this agent's EMA data
    agent_ema = agents_ema[agent_path] = {}
    
    # Load agent and get its synthesizer dataset
    agent = ImitativeAgent.reload(agent_path)
    synth_dataset = agent.synthesizer.dataset
    
    # Get main dataset and features for all splits
    main_dataset = agent.get_main_dataset()
    agent_features = agent.repeat_datasplit(None)
    
    # Process each dataset used by this agent
    for dataset_name, dataset_features in agent_features.items():
        # Only compute occlusions once per dataset
        if dataset_name not in datasets_occlusions:
            # Load dataset and extract relevant information
            dataset = Dataset(dataset_name)
            palate = dataset.palate
            vowels = dataset.phones_infos["vowels"]  # Get vowels from phones_infos.yaml
            datasets_lab = {dataset_name: dataset.lab}
            datasets_ema = {dataset_name: dataset.get_items_data("ema")}
            
            # Get indexes of consonant occurrences in the dataset
            consonants_indexes = abx_utils.get_datasets_phones_indexes(
                datasets_lab, TONGUE_CONSONANTS, vowels
            )
            # Detect occlusion points for each consonant using articulatory data
            datasets_occlusions[dataset_name] = abx_utils.get_occlusions_indexes(
                TONGUE_CONSONANTS, consonants_indexes, DETECTION_METHODS, datasets_ema, palate,
            )
        
        # Initialize dictionary for estimated EMA data for this dataset
        items_estimated_ema = agent_ema[dataset_name] = {}
        
        # Convert estimated articulatory features to EMA coordinates
        items_estimated_art = dataset_features["art_estimated"]
        for item_name, item_estimated_art in items_estimated_art.items():
            item_estimated_ema = synth_dataset.art_to_ema(item_estimated_art)
            items_estimated_ema[item_name] = item_estimated_ema

0it [00:00, ?it/s]

In [5]:
def show_dataset(dataset_name):
    """Interactive visualization of articulatory occlusions for consonant production.
    
    Displays scatter plots of articulator positions at consonant start and stop times,
    with statistical measures of key articulatory features (lip aperture, tongue distances).
    
    Args:
        dataset_name (str): Name of dataset to analyze
    """
    # Load dataset and extract relevant information
    dataset = Dataset(dataset_name)
    items_ema = dataset.get_items_data("ema")
    dataset_occlusions = datasets_occlusions[dataset_name]
    palate = dataset.palate
    
    # Set display boundaries with padding
    display_xlim = (dataset.ema_limits["xmin"] * 0.95, dataset.ema_limits["xmax"] * 1.05)
    display_ylim = (dataset.ema_limits["ymin"] * 0.95, dataset.ema_limits["ymax"] * 1.05)
    
    def show_occlusions(offset=2):
        """Plot occlusion patterns for each consonant with configurable time offset.
        
        Args:
            offset (int): Number of frames to offset from detected occlusion points
        """
        consonants_stats = {}
        
        for consonant, occlusions in dataset_occlusions.items():
            # Create figure with two subplots for start and stop positions
            plt.figure(figsize=(12, 3), dpi=60)
            
            # Configure start position subplot
            ax_start = plt.subplot(121, aspect="equal")
            ax_start.set_title(f"{consonant} start (PB original)", pad=10)
            ax_start.set_xlim(*display_xlim)
            ax_start.set_ylim(*display_ylim)
            ax_start.plot(palate[:, 0], palate[:, 1], 'k-', linewidth=2, label='Palate')
            ax_start.set_xticks([])
            ax_start.set_yticks([])
            ax_start.grid(True, alpha=0.3)

            # Configure stop position subplot  
            ax_stop = plt.subplot(122, aspect="equal")
            ax_stop.set_title(f"{consonant} stop (PB original)", pad=10)
            ax_stop.set_xlim(*display_xlim)
            ax_stop.set_ylim(*display_ylim)
            ax_stop.plot(palate[:, 0], palate[:, 1], 'k-', linewidth=2, label='Palate')
            ax_stop.set_xticks([])
            ax_stop.set_yticks([])
            ax_stop.grid(True, alpha=0.3)

            # Extract EMA positions at occlusion times
            occlusions_start_ema = []
            occlusions_stop_ema = []
            for occlusion in occlusions:
                item_ema = items_ema[occlusion[1]]
                occlusions_start_ema.append(item_ema[occlusion[2] - offset])
                occlusions_stop_ema.append(item_ema[occlusion[3] + offset])
            occlusions_start_ema = np.array(occlusions_start_ema)
            occlusions_stop_ema = np.array(occlusions_stop_ema) 
            
            # Calculate articulatory statistics
            occlusions_stats = consonants_stats[consonant] = {}
            for occlusions_type, occlusions_ema in {"start": occlusions_start_ema, "stop": occlusions_stop_ema}.items():
                # Measure lip aperture
                lips_distance = np.sqrt(np.sum((occlusions_ema[:, 10:12] - occlusions_ema[:, 8:10]) ** 2, axis=1))
                occlusions_stats[f"{occlusions_type}_lips"] = f"{lips_distance.mean():.2f} ±{lips_distance.std():.2f}"
                
                # Measure tongue tip to palate distance
                tongue_tip_distance = abx_utils.coil_distances_from_palate(occlusions_ema[:, 2:4], palate)
                occlusions_stats[f"{occlusions_type}_tongue_tip"] = f"{tongue_tip_distance.mean():.2f} ±{tongue_tip_distance.std():.2f}"
                
                # Measure tongue mid to palate distance
                tongue_mid_distance = abx_utils.coil_distances_from_palate(occlusions_ema[:, 4:6], palate)
                occlusions_stats[f"{occlusions_type}_tongue_mid"] = f"{tongue_mid_distance.mean():.2f} ±{tongue_mid_distance.std():.2f}"

            # Plot articulator positions
            ax_start.scatter(occlusions_start_ema[:, 0::2], occlusions_start_ema[:, 1::2], 
                           c="tab:blue", s=15, alpha=0.6, label='Articulators')
            ax_stop.scatter(occlusions_stop_ema[:, 0::2], occlusions_stop_ema[:, 1::2], 
                          c="tab:blue", s=15, alpha=0.6, label='Articulators')

            ax_start.legend(loc='upper right')
            ax_stop.legend(loc='upper right')
            plt.subplots_adjust(wspace=-.1)
            plt.show()
            
        # Display statistical summary
        consonants_stats = pd.DataFrame.from_dict(consonants_stats, orient="index")
        display(consonants_stats)
    
    # Create interactive offset control
    ipw.interact(show_occlusions, offset=(0, 10))

# Create dataset selector dropdown
ipw.interactive(show_dataset, dataset_name=datasets_occlusions.keys())

interactive(children=(Dropdown(description='dataset_name', options=(), value=None), Output()), _dom_classes=('…

In [6]:
def show_agent(agent_alias):
    """Interactive visualization of articulatory occlusions for agent's consonant production.
    
    Displays scatter plots of articulator positions at consonant start and stop times,
    with statistical measures of key articulatory features (lip aperture, tongue distances).
    
    Args:
        agent_alias (str): Alias name of agent to analyze
    """
    # Load agent and extract relevant information
    agent_path = agents_alias[agent_alias]
    agent = ImitativeAgent.reload(agent_path, load_nn=False)
    synth_dataset = agent.synthesizer.dataset
    palate = synth_dataset.palate
    agent_ema = agents_ema[agent_path]
    
    # Set display boundaries with padding
    display_xlim = (synth_dataset.ema_limits["xmin"] * 0.95, synth_dataset.ema_limits["xmax"] * 1.05)
    display_ylim = (synth_dataset.ema_limits["ymin"] * 0.95, synth_dataset.ema_limits["ymax"] * 1.05)
    
    def show_occlusions(offset=2):
        """Plot occlusion patterns for each consonant with configurable time offset.
        
        Args:
            offset (int): Number of frames to offset from detected occlusion points
        """
        consonants_stats = {}
    
        for dataset_name in agent.config["dataset"]["names"]:
            dataset = Dataset(dataset_name)
            items_ema = agent_ema[dataset_name]
            dataset_occlusions = datasets_occlusions[dataset_name]

            for consonant, occlusions in dataset_occlusions.items():
                # Create figure with two subplots for start and stop positions
                plt.figure(figsize=(12, 3), dpi=60)

                # Configure start position subplot
                ax_start = plt.subplot(121, aspect="equal")
                ax_start.set_title(f"{consonant} start (jerk={agent.config['training']['jerk_loss_weight']})", pad=10)
                ax_start.set_xlim(*display_xlim)
                ax_start.set_ylim(*display_ylim)
                ax_start.plot(palate[:, 0], palate[:, 1], 'k-', linewidth=2, label='Palate')
                ax_start.set_xticks([])
                ax_start.set_yticks([])
                ax_start.grid(True, alpha=0.3)

                # Configure stop position subplot
                ax_stop = plt.subplot(122, aspect="equal")
                ax_stop.set_title(f"{consonant} stop (jerk={agent.config['training']['jerk_loss_weight']})", pad=10)
                ax_stop.set_xlim(*display_xlim)
                ax_stop.set_ylim(*display_ylim)
                ax_stop.plot(palate[:, 0], palate[:, 1], 'k-', linewidth=2, label='Palate')
                ax_stop.set_xticks([])
                ax_stop.set_yticks([])
                ax_stop.grid(True, alpha=0.3)

                # Extract EMA positions at occlusion times
                occlusions_start_ema = []
                occlusions_stop_ema = []
                for occlusion in occlusions:
                    item_ema = items_ema[occlusion[1]]
                    occlusions_start_ema.append(item_ema[occlusion[2] - offset])
                    occlusions_stop_ema.append(item_ema[occlusion[3] + offset])
                occlusions_start_ema = np.array(occlusions_start_ema)
                occlusions_stop_ema = np.array(occlusions_stop_ema) 
                
                # Calculate articulatory statistics
                occlusions_stats = consonants_stats[consonant] = {}
                for occlusions_type, occlusions_ema in {"start": occlusions_start_ema, "stop": occlusions_stop_ema}.items():
                    # Measure lip aperture
                    lips_distance = np.sqrt(np.sum((occlusions_ema[:, 10:12] - occlusions_ema[:, 8:10]) ** 2, axis=1))
                    occlusions_stats[f"{occlusions_type}_lips"] = f"{lips_distance.mean():.2f} ±{lips_distance.std():.2f}"

                    # Measure tongue tip to palate distance
                    tongue_tip_distance = abx_utils.coil_distances_from_palate(occlusions_ema[:, 2:4], palate)
                    occlusions_stats[f"{occlusions_type}_tongue_tip"] = f"{tongue_tip_distance.mean():.2f} ±{tongue_tip_distance.std():.2f}"

                    # Measure tongue mid to palate distance
                    tongue_mid_distance = abx_utils.coil_distances_from_palate(occlusions_ema[:, 4:6], palate)
                    occlusions_stats[f"{occlusions_type}_tongue_mid"] = f"{tongue_mid_distance.mean():.2f} ±{tongue_mid_distance.std():.2f}"

                # Plot articulator positions
                ax_start.scatter(occlusions_start_ema[:, 0::2], occlusions_start_ema[:, 1::2], 
                               c="tab:blue", s=15, alpha=0.6, label='Articulators')
                ax_stop.scatter(occlusions_stop_ema[:, 0::2], occlusions_stop_ema[:, 1::2], 
                              c="tab:blue", s=15, alpha=0.6, label='Articulators')

                ax_start.legend(loc='upper right')
                ax_stop.legend(loc='upper right')
                plt.subplots_adjust(wspace=-.1)
                plt.show()
                 
        # Display statistical summary
        consonants_stats = pd.DataFrame.from_dict(consonants_stats, orient="index")
        display(consonants_stats)
    
    # Create interactive offset control
    ipw.interact(show_occlusions, offset=(0, 10))

# Create agent selector dropdown
ipw.interactive(show_agent, agent_alias=sorted(agents_alias.keys()))

interactive(children=(Dropdown(description='agent_alias', options=(), value=None), Output()), _dom_classes=('w…