# Toy Lick Data Generator for LickCalc Testing

This notebook generates example CSV files with toy lick timing data in various formats for testing the LickCalc webapp. The generated data mimics realistic rodent licking behavior patterns based on the existing `testlickdata.csv` file.

## Features:
- Generates realistic lick durations (~0.065s with 99% between 0.015s-0.16s)
- Creates multiple file formats (Med Associates, CSV/TXT, DD Lab)
- Customizable burst patterns and session characteristics
- Interactive widgets for parameter adjustment
- Validation plots to verify data quality

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, Markdown
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

# Set plotting style
plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")
print(f"NumPy version: {np.__version__}")
print(f"Pandas version: {pd.__version__}")
print(f"Matplotlib version: {plt.matplotlib.__version__}")

## 2. Load and Analyze Reference Data

In [None]:
# Load the reference data
data_path = Path('../data/testlickdata.csv')

if data_path.exists():
    reference_data = pd.read_csv(data_path, header=None, names=['lick_times'])
    print(f"Loaded {len(reference_data)} lick timestamps from reference file")
    
    # Analyze the reference data
    lick_times = reference_data['lick_times'].values
    inter_lick_intervals = np.diff(lick_times)
    
    print(f"\nReference Data Analysis:")
    print(f"Session duration: {lick_times[-1]:.2f} seconds ({lick_times[-1]/60:.1f} minutes)")
    print(f"Total licks: {len(lick_times)}")
    print(f"Mean inter-lick interval: {np.mean(inter_lick_intervals):.3f} seconds")
    print(f"Median inter-lick interval: {np.median(inter_lick_intervals):.3f} seconds")
    print(f"ILI range: {np.min(inter_lick_intervals):.3f} - {np.max(inter_lick_intervals):.3f} seconds")
    
    # Plot reference data
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Session overview
    axes[0,0].plot(lick_times, range(len(lick_times)), 'b-', alpha=0.7)
    axes[0,0].set_xlabel('Time (s)')
    axes[0,0].set_ylabel('Cumulative Licks')
    axes[0,0].set_title('Reference Data: Session Overview')
    
    # Inter-lick interval histogram
    axes[0,1].hist(inter_lick_intervals, bins=50, alpha=0.7, color='green')
    axes[0,1].set_xlabel('Inter-lick Interval (s)')
    axes[0,1].set_ylabel('Frequency')
    axes[0,1].set_title('Inter-lick Interval Distribution')
    axes[0,1].set_xlim(0, 1)
    
    # Lick rate over time (binned)
    bin_size = 60  # 1 minute bins
    bins = np.arange(0, lick_times[-1] + bin_size, bin_size)
    lick_counts, _ = np.histogram(lick_times, bins=bins)
    bin_centers = bins[:-1] + bin_size/2
    axes[1,0].plot(bin_centers/60, lick_counts, 'r-', marker='o')
    axes[1,0].set_xlabel('Time (minutes)')
    axes[1,0].set_ylabel('Licks per minute')
    axes[1,0].set_title('Lick Rate Over Session')
    
    # Burst identification (simple threshold)
    burst_threshold = 0.5  # seconds
    burst_starts = np.where(inter_lick_intervals > burst_threshold)[0] + 1
    burst_starts = np.concatenate(([0], burst_starts))
    burst_ends = np.concatenate((burst_starts[1:] - 1, [len(lick_times) - 1]))
    burst_sizes = burst_ends - burst_starts + 1
    
    axes[1,1].hist(burst_sizes, bins=range(1, max(burst_sizes)+2), alpha=0.7, color='purple')
    axes[1,1].set_xlabel('Burst Size (licks)')
    axes[1,1].set_ylabel('Frequency')
    axes[1,1].set_title(f'Burst Size Distribution (>{burst_threshold}s threshold)')
    
    plt.tight_layout()
    plt.show()
    
else:
    print(f"Reference data file not found at {data_path}")
    print("Creating synthetic reference data for demonstration...")
    # Create some basic reference data if file not found
    lick_times = np.cumsum(np.random.exponential(0.15, 1000))
    reference_data = pd.DataFrame({'lick_times': lick_times})

## 3. Define Lick Duration Distribution

In [None]:
def generate_lick_durations(n_licks, mean_duration=0.065, min_duration=0.015, max_duration=0.16):
    """
    Generate realistic lick durations using a truncated normal distribution.
    
    Parameters:
    -----------
    n_licks : int
        Number of lick durations to generate
    mean_duration : float
        Target mean duration in seconds (default 0.065s)
    min_duration : float
        Minimum duration in seconds (default 0.015s)
    max_duration : float
        Maximum duration in seconds (default 0.16s)
    
    Returns:
    --------
    np.array : Array of lick durations
    """
    # Calculate parameters for truncated normal distribution
    # We want 99% of values between min and max, centered on mean
    
    # For a normal distribution, 99% of values are within ~2.58 standard deviations
    # So we set std such that mean ± 2.58*std spans our desired range
    range_span = max_duration - min_duration
    std_duration = range_span / (2 * 2.58)  # Divide by 2 because we want ±2.58σ
    
    # Generate truncated normal distribution
    a = (min_duration - mean_duration) / std_duration  # Lower bound in standard units
    b = (max_duration - mean_duration) / std_duration  # Upper bound in standard units
    
    durations = stats.truncnorm.rvs(a, b, loc=mean_duration, scale=std_duration, size=n_licks)
    
    return durations

# Test the duration generator
test_durations = generate_lick_durations(10000)

print(f"Generated {len(test_durations)} lick durations:")
print(f"Mean: {np.mean(test_durations):.4f}s")
print(f"Std: {np.std(test_durations):.4f}s")
print(f"Min: {np.min(test_durations):.4f}s")
print(f"Max: {np.max(test_durations):.4f}s")
print(f"1st percentile: {np.percentile(test_durations, 1):.4f}s")
print(f"99th percentile: {np.percentile(test_durations, 99):.4f}s")

# Plot the distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(test_durations, bins=50, alpha=0.7, density=True)
plt.axvline(0.015, color='red', linestyle='--', label='1st percentile target')
plt.axvline(0.16, color='red', linestyle='--', label='99th percentile target')
plt.axvline(0.065, color='green', linestyle='-', label='Mean target')
plt.xlabel('Lick Duration (s)')
plt.ylabel('Density')
plt.title('Generated Lick Duration Distribution')
plt.legend()

plt.subplot(1, 2, 2)
plt.hist(test_durations, bins=50, alpha=0.7, cumulative=True, density=True)
plt.axhline(0.01, color='red', linestyle='--', label='1% threshold')
plt.axhline(0.99, color='red', linestyle='--', label='99% threshold')
plt.xlabel('Lick Duration (s)')
plt.ylabel('Cumulative Probability')
plt.title('Cumulative Distribution')
plt.legend()

plt.tight_layout()
plt.show()

# Verify percentiles
print(f"\nVerification:")
print(f"Percentage of durations < 0.015s: {np.sum(test_durations < 0.015) / len(test_durations) * 100:.2f}%")
print(f"Percentage of durations > 0.16s: {np.sum(test_durations > 0.16) / len(test_durations) * 100:.2f}%")
print(f"Percentage between 0.015s and 0.16s: {np.sum((test_durations >= 0.015) & (test_durations <= 0.16)) / len(test_durations) * 100:.2f}%")

## 4. Generate Basic Lick Timestamps

In [None]:
def generate_lick_session(total_licks=1000, session_duration_min=20, 
                         burst_rate=0.8, within_burst_ili=0.15, between_burst_ili=2.0,
                         burst_size_mean=8, burst_size_std=3):
    """
    Generate a realistic lick session with burst patterns.
    
    Parameters:
    -----------
    total_licks : int
        Total number of licks to generate
    session_duration_min : float
        Approximate session duration in minutes
    burst_rate : float
        Probability that a lick is part of a burst (0-1)
    within_burst_ili : float
        Mean inter-lick interval within bursts (seconds)
    between_burst_ili : float
        Mean inter-lick interval between bursts (seconds)
    burst_size_mean : float
        Mean number of licks per burst
    burst_size_std : float
        Standard deviation of licks per burst
    
    Returns:
    --------
    dict : Dictionary containing onset times, offset times, and metadata
    """
    
    onset_times = []
    offset_times = []
    burst_labels = []
    current_time = 0.0
    current_burst = 0
    licks_generated = 0
    
    while licks_generated < total_licks:
        # Decide if we're starting a new burst
        if np.random.random() < burst_rate and licks_generated < total_licks:
            # Generate burst
            burst_size = max(1, int(np.random.normal(burst_size_mean, burst_size_std)))
            burst_size = min(burst_size, total_licks - licks_generated)  # Don't exceed total
            
            current_burst += 1
            
            for lick_in_burst in range(burst_size):
                # Generate lick duration
                duration = generate_lick_durations(1)[0]
                
                # Add some jitter to within-burst timing
                if lick_in_burst > 0:
                    ili = np.random.exponential(within_burst_ili)
                    current_time += ili
                
                onset_times.append(current_time)
                offset_times.append(current_time + duration)
                burst_labels.append(current_burst)
                
                licks_generated += 1
                if licks_generated >= total_licks:
                    break
            
            # Add inter-burst interval
            if licks_generated < total_licks:
                current_time = offset_times[-1] + np.random.exponential(between_burst_ili)
        
        else:
            # Generate isolated lick
            duration = generate_lick_durations(1)[0]
            ili = np.random.exponential(between_burst_ili)
            current_time += ili
            
            onset_times.append(current_time)
            offset_times.append(current_time + duration)
            burst_labels.append(0)  # 0 for isolated licks
            
            licks_generated += 1
    
    return {
        'onset_times': np.array(onset_times),
        'offset_times': np.array(offset_times), 
        'burst_labels': np.array(burst_labels),
        'session_duration': onset_times[-1] + 60,  # Add some buffer time
        'total_licks': len(onset_times),
        'n_bursts': current_burst
    }

# Generate a test session
test_session = generate_lick_session(total_licks=500, session_duration_min=15)

print(f"Generated session with {test_session['total_licks']} licks")
print(f"Session duration: {test_session['session_duration']:.1f} seconds ({test_session['session_duration']/60:.1f} minutes)")
print(f"Number of bursts: {test_session['n_bursts']}")

# Analyze the generated session
onset_times = test_session['onset_times'] 
offset_times = test_session['offset_times']
durations = offset_times - onset_times
inter_lick_intervals = np.diff(onset_times)

print(f"\nSession Analysis:")
print(f"Mean lick duration: {np.mean(durations):.4f}s")
print(f"Mean inter-lick interval: {np.mean(inter_lick_intervals):.3f}s")
print(f"Lick rate: {len(onset_times) / (test_session['session_duration']/60):.1f} licks/minute")

# Plot the test session
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Session overview
axes[0,0].plot(onset_times, range(len(onset_times)), 'b-', alpha=0.7)
axes[0,0].set_xlabel('Time (s)')
axes[0,0].set_ylabel('Cumulative Licks')
axes[0,0].set_title('Generated Session Overview')

# Lick durations
axes[0,1].hist(durations, bins=30, alpha=0.7, color='green')
axes[0,1].set_xlabel('Lick Duration (s)')
axes[0,1].set_ylabel('Frequency')
axes[0,1].set_title('Lick Duration Distribution')

# Inter-lick intervals
axes[1,0].hist(inter_lick_intervals, bins=50, alpha=0.7, color='red')
axes[1,0].set_xlabel('Inter-lick Interval (s)')
axes[1,0].set_ylabel('Frequency')
axes[1,0].set_title('Inter-lick Interval Distribution')
axes[1,0].set_xlim(0, 2)

# Burst structure
burst_ids = test_session['burst_labels']
unique_bursts = np.unique(burst_ids[burst_ids > 0])
burst_sizes = [np.sum(burst_ids == b) for b in unique_bursts]

if len(burst_sizes) > 0:
    axes[1,1].hist(burst_sizes, bins=range(1, max(burst_sizes)+2), alpha=0.7, color='purple')
    axes[1,1].set_xlabel('Burst Size (licks)')
    axes[1,1].set_ylabel('Frequency')
    axes[1,1].set_title('Burst Size Distribution')

plt.tight_layout()
plt.show()

## 5. Create Med Associates Format

In [None]:
def save_med_associates_format(session_data, filename, subject_id="TOY001", 
                              experiment="Lick_Test", date="2024-01-01"):
    """
    Save lick data in Med Associates format.
    
    Parameters:
    -----------
    session_data : dict
        Dictionary containing onset_times and offset_times
    filename : str
        Output filename
    subject_id : str
        Subject identifier
    experiment : str
        Experiment name
    date : str
        Date string
    """
    
    output_dir = Path('../test_data')
    output_dir.mkdir(exist_ok=True)
    filepath = output_dir / filename
    
    onset_times = session_data['onset_times']
    offset_times = session_data['offset_times']
    
    with open(filepath, 'w') as f:
        # Med Associates header
        f.write(f"Start Date: {date}\n")
        f.write(f"End Date: {date}\n")
        f.write(f"Subject: {subject_id}\n")
        f.write(f"Experiment: {experiment}\n")
        f.write(f"Group: Control\n")
        f.write(f"Box: 1\n")
        f.write("MSN: Lick_Microstructure\n")
        f.write("\n")
        
        # Array headers
        f.write("A:\n")  # Onset array
        for time in onset_times:
            f.write(f"    {time:.3f}\n")
        
        f.write("\n")
        f.write("B:\n")  # Offset array  
        for time in offset_times:
            f.write(f"    {time:.3f}\n")
            
        f.write("\n")
        f.write("C:\n")  # Empty array
        f.write("\n")
        
        # Session statistics
        f.write(f"Total Licks: {len(onset_times)}\n")
        f.write(f"Session Time: {session_data['session_duration']:.1f}\n")
    
    print(f"Saved Med Associates format file: {filepath}")
    return filepath

# Generate and save Med Associates format files
print("Creating Med Associates format files...")

# Generate different types of sessions
sessions = {
    'short_session': generate_lick_session(total_licks=200, session_duration_min=5),
    'medium_session': generate_lick_session(total_licks=800, session_duration_min=20), 
    'long_session': generate_lick_session(total_licks=1500, session_duration_min=45),
    'sparse_licking': generate_lick_session(total_licks=300, burst_rate=0.3, between_burst_ili=5.0),
    'dense_bursts': generate_lick_session(total_licks=1000, burst_rate=0.9, burst_size_mean=12)
}

med_files = []
for session_name, session_data in sessions.items():
    filename = f"med_{session_name}.txt"
    filepath = save_med_associates_format(session_data, filename, 
                                        subject_id=f"TOY_{session_name.upper()[:3]}")
    med_files.append(filepath)
    
print(f"\nCreated {len(med_files)} Med Associates format files")

## 6. Create CSV/TXT Format

In [None]:
def save_csv_format(session_data, filename, format_type='simple', include_header=True):
    """
    Save lick data in various CSV formats.
    
    Parameters:
    -----------
    session_data : dict
        Dictionary containing onset_times and offset_times
    filename : str
        Output filename
    format_type : str
        Type of CSV format ('simple', 'multi_column', 'onset_offset', 'with_metadata')
    include_header : bool
        Whether to include column headers
    """
    
    output_dir = Path('../test_data')
    output_dir.mkdir(exist_ok=True)
    filepath = output_dir / filename
    
    onset_times = session_data['onset_times']
    offset_times = session_data['offset_times']
    durations = offset_times - onset_times
    
    if format_type == 'simple':
        # Single column of onset times (like reference data)
        df = pd.DataFrame({'lick_times': onset_times})
        
    elif format_type == 'multi_column':
        # Multiple columns with different naming conventions
        df = pd.DataFrame({
            'licks': onset_times,
            'onset': onset_times,
            'timestamps': onset_times,
            'time_sec': onset_times
        })
        
    elif format_type == 'onset_offset':
        # Onset and offset columns
        df = pd.DataFrame({
            'onset': onset_times,
            'offset': offset_times,
            'duration': durations
        })
        
    elif format_type == 'with_metadata':
        # Include burst information and other metadata
        df = pd.DataFrame({
            'onset_time': onset_times,
            'offset_time': offset_times,
            'duration_ms': durations * 1000,  # Convert to milliseconds
            'burst_id': session_data['burst_labels'],
            'session_time': onset_times / session_data['session_duration'] * 100  # Percent of session
        })
    
    # Save with or without header
    df.to_csv(filepath, index=False, header=include_header)
    
    print(f"Saved CSV format ({format_type}) file: {filepath}")
    return filepath

# Generate CSV format files
print("Creating CSV format files...")

csv_files = []
for session_name, session_data in sessions.items():
    # Simple format (like reference data)
    filename = f"csv_simple_{session_name}.csv"
    filepath = save_csv_format(session_data, filename, 'simple', include_header=False)
    csv_files.append(filepath)
    
    # Multi-column format with headers
    filename = f"csv_multi_{session_name}.csv"
    filepath = save_csv_format(session_data, filename, 'multi_column', include_header=True)
    csv_files.append(filepath)
    
    # Onset-offset format
    filename = f"csv_onset_offset_{session_name}.csv"
    filepath = save_csv_format(session_data, filename, 'onset_offset', include_header=True)
    csv_files.append(filepath)

# Create one example with metadata
filename = "csv_with_metadata_medium_session.csv"
filepath = save_csv_format(sessions['medium_session'], filename, 'with_metadata', include_header=True)
csv_files.append(filepath)

# Create headerless versions for testing
for session_name in ['short_session', 'medium_session']:
    filename = f"csv_no_header_{session_name}.csv"
    filepath = save_csv_format(sessions[session_name], filename, 'multi_column', include_header=False)
    csv_files.append(filepath)

print(f"\nCreated {len(csv_files)} CSV format files")

## 7. Create DD Lab Format

In [None]:
def save_dd_lab_format(session_data, filename, subject_id="TOY001"):
    """
    Save lick data in DD Lab format.
    
    Parameters:
    -----------
    session_data : dict
        Dictionary containing onset_times and offset_times
    filename : str
        Output filename
    subject_id : str
        Subject identifier
    """
    
    output_dir = Path('../test_data')
    output_dir.mkdir(exist_ok=True)
    filepath = output_dir / filename
    
    onset_times = session_data['onset_times']
    
    # DD Lab format is typically a simple text file with timestamps
    # Convert to milliseconds (common in DD Lab format)
    onset_times_ms = onset_times * 1000
    
    with open(filepath, 'w') as f:
        # DD Lab header
        f.write(f"Subject: {subject_id}\n")
        f.write(f"Date: 2024-01-01\n") 
        f.write(f"Total Events: {len(onset_times)}\n")
        f.write("Event Times (ms):\n")
        f.write("\n")
        
        # Write timestamps
        for time_ms in onset_times_ms:
            f.write(f"{time_ms:.0f}\n")
    
    print(f"Saved DD Lab format file: {filepath}")
    return filepath

# Generate DD Lab format files
print("Creating DD Lab format files...")

dd_files = []
for session_name, session_data in sessions.items():
    filename = f"dd_{session_name}.txt"
    filepath = save_dd_lab_format(session_data, filename, 
                                 subject_id=f"TOY_{session_name.upper()[:3]}")
    dd_files.append(filepath)

print(f"\nCreated {len(dd_files)} DD Lab format files")

## 8. Interactive File Generator

In [None]:
# Interactive widget-based file generator
def create_interactive_generator():
    """Create interactive widgets for customizing file generation"""
    
    # Widget definitions
    total_licks_widget = widgets.IntSlider(
        value=800, min=50, max=3000, step=50,
        description='Total Licks:'
    )
    
    session_duration_widget = widgets.FloatSlider(
        value=20, min=5, max=120, step=5,
        description='Duration (min):'
    )
    
    burst_rate_widget = widgets.FloatSlider(
        value=0.8, min=0.1, max=1.0, step=0.1,
        description='Burst Rate:'
    )
    
    burst_size_widget = widgets.IntSlider(
        value=8, min=2, max=20, step=1,
        description='Avg Burst Size:'
    )
    
    within_burst_ili_widget = widgets.FloatSlider(
        value=0.15, min=0.05, max=0.5, step=0.05,
        description='Within Burst ILI (s):'
    )
    
    between_burst_ili_widget = widgets.FloatSlider(
        value=2.0, min=0.5, max=10.0, step=0.5,
        description='Between Burst ILI (s):'
    )
    
    format_widget = widgets.SelectMultiple(
        options=['CSV Simple', 'CSV Multi-column', 'CSV Onset-Offset', 'Med Associates', 'DD Lab'],
        value=['CSV Simple', 'Med Associates'],
        description='File Formats:'
    )
    
    filename_widget = widgets.Text(
        value='custom_session',
        description='Base Filename:'
    )
    
    subject_widget = widgets.Text(
        value='CUSTOM001',
        description='Subject ID:'
    )
    
    generate_button = widgets.Button(
        description='Generate Files',
        button_style='primary'
    )
    
    output_area = widgets.Output()
    
    def on_generate_clicked(b):
        with output_area:
            output_area.clear_output()
            
            print("Generating custom session...")
            
            # Generate session with custom parameters
            custom_session = generate_lick_session(
                total_licks=total_licks_widget.value,
                session_duration_min=session_duration_widget.value,
                burst_rate=burst_rate_widget.value,
                within_burst_ili=within_burst_ili_widget.value,
                between_burst_ili=between_burst_ili_widget.value,
                burst_size_mean=burst_size_widget.value,
                burst_size_std=max(1, burst_size_widget.value // 3)
            )
            
            print(f"Generated session: {custom_session['total_licks']} licks, "
                  f"{custom_session['session_duration']:.1f}s duration")
            
            # Save in selected formats
            saved_files = []
            base_name = filename_widget.value
            subject_id = subject_widget.value
            
            for fmt in format_widget.value:
                if fmt == 'CSV Simple':
                    filepath = save_csv_format(custom_session, f"{base_name}_simple.csv", 
                                             'simple', include_header=False)
                    saved_files.append(filepath)
                    
                elif fmt == 'CSV Multi-column':
                    filepath = save_csv_format(custom_session, f"{base_name}_multi.csv", 
                                             'multi_column', include_header=True)
                    saved_files.append(filepath)
                    
                elif fmt == 'CSV Onset-Offset':
                    filepath = save_csv_format(custom_session, f"{base_name}_onset_offset.csv", 
                                             'onset_offset', include_header=True)
                    saved_files.append(filepath)
                    
                elif fmt == 'Med Associates':
                    filepath = save_med_associates_format(custom_session, f"{base_name}_med.txt", 
                                                        subject_id=subject_id)
                    saved_files.append(filepath)
                    
                elif fmt == 'DD Lab':
                    filepath = save_dd_lab_format(custom_session, f"{base_name}_dd.txt", 
                                                subject_id=subject_id)
                    saved_files.append(filepath)
            
            print(f"\nSaved {len(saved_files)} files:")
            for filepath in saved_files:
                print(f"  - {filepath.name}")
            
            # Show quick visualization
            onset_times = custom_session['onset_times']
            plt.figure(figsize=(10, 4))
            
            plt.subplot(1, 2, 1)
            plt.plot(onset_times, range(len(onset_times)), 'b-', alpha=0.7)
            plt.xlabel('Time (s)')
            plt.ylabel('Cumulative Licks')
            plt.title('Generated Session Overview')
            
            plt.subplot(1, 2, 2)
            inter_lick_intervals = np.diff(onset_times)
            plt.hist(inter_lick_intervals, bins=50, alpha=0.7)
            plt.xlabel('Inter-lick Interval (s)')
            plt.ylabel('Frequency')
            plt.title('Inter-lick Intervals')
            plt.xlim(0, 2)
            
            plt.tight_layout()
            plt.show()
    
    generate_button.on_click(on_generate_clicked)
    
    # Layout
    controls = widgets.VBox([
        widgets.HTML("<h3>Session Parameters</h3>"),
        total_licks_widget,
        session_duration_widget,
        burst_rate_widget,
        burst_size_widget,
        within_burst_ili_widget,
        between_burst_ili_widget,
        widgets.HTML("<h3>File Generation</h3>"),
        format_widget,
        filename_widget,
        subject_widget,
        generate_button
    ])
    
    display(widgets.HBox([controls, output_area]))

# Create the interactive generator
print("Interactive File Generator:")

In [None]:
create_interactive_generator()

## 9. Visualize Generated Data

In [None]:
# Create comprehensive visualization of all generated sessions
def visualize_all_sessions():
    """Create a comprehensive visualization comparing all generated sessions"""
    
    fig, axes = plt.subplots(3, 3, figsize=(18, 15))
    
    session_names = list(sessions.keys())
    colors = plt.cm.Set1(np.linspace(0, 1, len(session_names)))
    
    # Session overviews
    for i, (name, session_data) in enumerate(sessions.items()):
        ax = axes[0, 0] if i == 0 else None
        if ax is not None:
            onset_times = session_data['onset_times']
            ax.plot(onset_times, range(len(onset_times)), 
                   label=name.replace('_', ' ').title(), color=colors[i], alpha=0.7)
        elif i < 3:
            ax = axes[0, i]
            onset_times = session_data['onset_times']
            ax.plot(onset_times, range(len(onset_times)), color=colors[i], alpha=0.7)
            ax.set_title(name.replace('_', ' ').title())
            ax.set_xlabel('Time (s)')
            ax.set_ylabel('Cumulative Licks')
    
    if axes[0, 0] is not None:
        axes[0, 0].set_xlabel('Time (s)')
        axes[0, 0].set_ylabel('Cumulative Licks')
        axes[0, 0].set_title('All Sessions Overview')
        axes[0, 0].legend()
    
    # Lick duration distributions
    all_durations = []
    for name, session_data in sessions.items():
        durations = session_data['offset_times'] - session_data['onset_times']
        all_durations.extend(durations)
    
    axes[1, 0].hist(all_durations, bins=50, alpha=0.7, color='green')
    axes[1, 0].set_xlabel('Lick Duration (s)')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('All Lick Durations')
    axes[1, 0].axvline(0.065, color='red', linestyle='--', label='Target Mean')
    axes[1, 0].axvline(0.015, color='orange', linestyle='--', label='1st Percentile')
    axes[1, 0].axvline(0.16, color='orange', linestyle='--', label='99th Percentile')
    axes[1, 0].legend()
    
    # Inter-lick interval comparison
    for i, (name, session_data) in enumerate(sessions.items()):
        onset_times = session_data['onset_times']
        ilis = np.diff(onset_times)
        # Only plot first 3 for clarity
        if i < 3:
            axes[1, 1].hist(ilis, bins=50, alpha=0.5, label=name.replace('_', ' ').title(),
                           color=colors[i], range=(0, 2))
    axes[1, 1].set_xlabel('Inter-lick Interval (s)')
    axes[1, 1].set_ylabel('Frequency')
    axes[1, 1].set_title('Inter-lick Intervals Comparison')
    axes[1, 1].legend()
    
    # Burst size distributions
    for i, (name, session_data) in enumerate(sessions.items()):
        burst_labels = session_data['burst_labels']
        unique_bursts = np.unique(burst_labels[burst_labels > 0])
        burst_sizes = [np.sum(burst_labels == b) for b in unique_bursts]
        if len(burst_sizes) > 0 and i < 3:
            axes[1, 2].hist(burst_sizes, bins=range(1, max(burst_sizes)+2), 
                           alpha=0.5, label=name.replace('_', ' ').title(),
                           color=colors[i])
    axes[1, 2].set_xlabel('Burst Size (licks)')
    axes[1, 2].set_ylabel('Frequency')
    axes[1, 2].set_title('Burst Size Distributions')
    axes[1, 2].legend()
    
    # Session statistics summary
    stats_data = []
    for name, session_data in sessions.items():
        onset_times = session_data['onset_times']
        durations = session_data['offset_times'] - session_data['onset_times']
        ilis = np.diff(onset_times)
        
        stats_data.append({
            'Session': name.replace('_', ' ').title(),
            'Total Licks': len(onset_times),
            'Duration (min)': session_data['session_duration'] / 60,
            'Mean ILI (s)': np.mean(ilis),
            'Mean Duration (s)': np.mean(durations),
            'Licks/min': len(onset_times) / (session_data['session_duration'] / 60)
        })
    
    stats_df = pd.DataFrame(stats_data)
    
    # Create table in subplot
    axes[2, 0].axis('tight')
    axes[2, 0].axis('off')
    table = axes[2, 0].table(cellText=stats_df.round(3).values,
                            colLabels=stats_df.columns,
                            cellLoc='center',
                            loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(8)
    table.scale(1.2, 1.5)
    axes[2, 0].set_title('Session Statistics Summary')
    
    # Lick rate over time for medium session
    medium_session = sessions['medium_session']
    onset_times = medium_session['onset_times']
    bin_size = 60  # 1 minute bins
    bins = np.arange(0, onset_times[-1] + bin_size, bin_size)
    lick_counts, _ = np.histogram(onset_times, bins=bins)
    bin_centers = bins[:-1] + bin_size/2
    
    axes[2, 1].plot(bin_centers/60, lick_counts, 'b-', marker='o')
    axes[2, 1].set_xlabel('Time (minutes)')
    axes[2, 1].set_ylabel('Licks per minute')
    axes[2, 1].set_title('Lick Rate Over Time (Medium Session)')
    
    # Duration validation plot
    axes[2, 2].hist(all_durations, bins=50, alpha=0.7, cumulative=True, density=True)
    axes[2, 2].axhline(0.01, color='red', linestyle='--', label='1% threshold')
    axes[2, 2].axhline(0.99, color='red', linestyle='--', label='99% threshold')
    axes[2, 2].axvline(0.015, color='orange', linestyle='--', alpha=0.7)
    axes[2, 2].axvline(0.16, color='orange', linestyle='--', alpha=0.7)
    axes[2, 2].set_xlabel('Lick Duration (s)')
    axes[2, 2].set_ylabel('Cumulative Probability')
    axes[2, 2].set_title('Duration Validation (Cumulative)')
    axes[2, 2].legend()
    
    plt.tight_layout()
    plt.show()
    
    # Print validation statistics
    print("\\nData Validation Summary:")
    print("=" * 50)
    print(f"Total files generated: {len(med_files) + len(csv_files) + len(dd_files)}")
    print(f"All durations between 0.015-0.16s: {np.sum((np.array(all_durations) >= 0.015) & (np.array(all_durations) <= 0.16)) / len(all_durations) * 100:.2f}%")
    print(f"Mean duration across all sessions: {np.mean(all_durations):.4f}s")
    print(f"Overall duration range: {np.min(all_durations):.4f}s - {np.max(all_durations):.4f}s")
    
    return stats_df

# Generate the comprehensive visualization
print("Creating comprehensive visualization of all generated data...")
summary_stats = visualize_all_sessions()

## Summary and File Directory

This notebook has generated comprehensive toy lick data in multiple formats for testing the LickCalc webapp. All files are saved in the `../test_data/` directory.