# Imports

In [None]:
import math
import time
import copy
import tqdm
import glob
import os
from IPython.display import Audio, Markdown
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.precision', 3)
import matplotlib.pyplot as plt
import plotly.express as px
# import itables
# itables.init_notebook_mode()
# from sklearn.model_selection import ParameterGrid
# import librosa
import pprint
pp = pprint.PrettyPrinter(indent=4)
from itertools import combinations

In [None]:
from ipywidgets import Output, GridspecLayout, HBox, VBox, Text, HTML
# from IPython import display

# Initialization

In [None]:
# experiment_name='exp_01' # This experiment shows to use 'heun', and high omega and guidance
# experiment_name='exp_02' # too drastic of ref_audio_strength
experiment_name='exp_03' # too drastic of ref_audio_strength
audio_outputs_path = f'/mnt/disks/ace_step_generation/{experiment_name}'

grid_search_input_dataframe_filepath = os.path.join(audio_outputs_path, 'grid_search_input_data.csv')
grid_dataframe = pd.read_csv(grid_search_input_dataframe_filepath)

grid_search_output_dataframe_filepath = os.path.join(audio_outputs_path, 'grid_search_output_data.csv')
results_dataframe = pd.read_csv(grid_search_output_dataframe_filepath)

a2a_results_df = results_dataframe[
    ~results_dataframe['ref_audio_input'].isna()
]
print(f'Originally {a2a_results_df.shape[0]} audio2audio results have been generated...')
t2a_results_df = results_dataframe[
    results_dataframe['ref_audio_input'].isna()
]
print(f'Originally {t2a_results_df.shape[0]} text2audio results have been generated...')

BASE_AUDIO_DIR = "~/soundverse/ACE-step-lowvram/experimental/rjw/notebooks/outputs/"

In [None]:
# a2a_results_df = a2a_results_df.sort_values(
#     by=[
#         'prompt','infer_step', 'scheduler_type','omega_scale',
#         'use_erg','guidance_scale', 'guidance_interval',  'ref_audio_strength'
#    ],
#     ascending=[
#         False, False, True, True,
#         True, False, True, True
#     ]
# ).reset_index(drop=True)
original_file = a2a_results_df.iloc[0]['ref_audio_input']
original_file

In [None]:
# a2a_results_df[
#     (
#         (a2a_results_df['omega_scale'] == 30) &
#         (a2a_results_df['ref_audio_strength'] == 0.1) &
#         (a2a_results_df['prompt'] == 'edm, synth, bass, euphoric and energetic')
#     )
# ]

# Examination

## Helper Methods

In [None]:
def compute_difference(
    previous_params, 
    current_params,
    ignore_columns = ['timecosts', 'retake_seeds', 'audio_path']
):
    if isinstance(previous_params, pd.core.series.Series):
        previous_params = previous_params.to_dict()
    if isinstance(current_params, pd.core.series.Series):
        current_params = current_params.to_dict()

    parameter_differences = {}
    
    for _key_current, _value_current in current_params.items():
        if _key_current in ignore_columns:
            continue
        _value_previous = previous_params.get(_key_current)
        
        if _value_previous != _value_current:
            parameter_differences[_key_current] = {
                'previous': _value_previous,
                'current': _value_current
            }
    return parameter_differences

def compute_same(
    previous_params, 
    current_params,
    similar_allow_list=[
        'prompt',
        'lyrics',
        'ref_audio_input',
        'guidance_scale',
        'guidance_interval',
        'guidance_interval_decay',
        'scheduler_type',
        'infer_step',
        'ref_audio_strength',
        'use_erg',
        'omega_scale',
    ]
):
    if isinstance(previous_params, pd.core.series.Series):
        previous_params = previous_params.to_dict()
    if isinstance(current_params, pd.core.series.Series):
        current_params = current_params.to_dict()

    parameter_similarities = {}
    
    for _key_current, _value_current in current_params.items():
        if _key_current not in similar_allow_list:
            continue

        _value_previous = previous_params.get(_key_current)
        
        if _value_previous == _value_current:
            parameter_similarities[_key_current] = {
                'value': _value_previous
            }
    return parameter_similarities

In [None]:
def display_same_vs_different(
    different_df, same_df, audio_path_0, audio_path_1, index_0, index_1
):
   
    grid = GridspecLayout(1, 2)
    out = Output()
    # Display Audios
    with out:
        # display(Markdown(f'# Index: {index_0}'))
        display(HTML(f'# Index: {index_0}'))
        display(Audio(audio_path_0))
        # display(Markdown(f'# Index: {index_1}'))
        display(HTML(f'# Index: {index_1}'))
        display(Audio(audio_path_1))



    grid[0, 0] = out
    
    # Display DataFrames
    out = Output()
    with out:
        # display(Markdown(f'# Different Properties'))
        display(HTML(f'# Different Properties'))
        display(different_df)
        # display(Markdown(f'# Same Properties'))
        display(HTML(f'# Same Properties'))
        display(same_df)
        
    grid[0, 1] = out
    display(grid)
    # return grid   


def display_same_vs_different_v2(
    different_df, same_df, audio_path_0, audio_path_1, index_0, index_1
):
    display(Markdown('## ===== ===== ===== ===== ===== ===== ===== ====='))
    
    display(Markdown(f'# Index: {index_0}'))
    display(Audio(audio_path_0))
    display(Markdown(f'# Index: {index_1}'))
    display(Audio(audio_path_1))

    
    # Display DataFrames

    display(Markdown(f'# Different Properties'))
    display(different_df)
    display(Markdown(f'# Same Properties'))
    display(same_df)

    display(Markdown('## ===== ===== ===== ===== ===== ===== ===== ====='))

## Examine Generation

#### Create index Pairs

In [None]:
print('Unique prompts that have been generated:')
display(a2a_results_df['prompt'].unique().tolist())
print('\nn records per prompt:')
display(a2a_results_df.groupby(['prompt']).count().reset_index(drop=False)[['prompt','format']])

### Filtering Using text2audio Dataframe

In [None]:
# filtered_df = t2a_results_df[
#     (
#         # (a2a_results_df['omega_scale'] == 30) &
#         # (a2a_results_df['ref_audio_strength'] == 0.1) &
#         # (a2a_results_df['guidance_interval'] == 1.0) & # makes huge difference too!
#         # (a2a_results_df['prompt'] == 'edm, synth, bass, euphoric and energetic') &
#         (t2a_results_df['prompt'] == 'lofi beats') &
#         # (a2a_results_df['scheduler_type'].isin(['heun', 'pingpong']))
#         # ===== use_ergTrue gives more variation which is nice, but maybe shortcuts using CFG???
#         (t2a_results_df['use_erg'] == False) & 
#         (t2a_results_df['scheduler_type'].isin(['heun']))
        
        
#     )
# ].reset_index(drop=True)

### Filtering Using audio2audio Dataframe

In [None]:

a2a_results_df.astype('category').describe()

In [None]:
filtered_df = a2a_results_df[
    (
        # (a2a_results_df['omega_scale'] == 30) &
        # (a2a_results_df['ref_audio_strength'].isin([.15])) &
        (a2a_results_df['ref_audio_strength'].isin([0.1, 1.5])) &
        (a2a_results_df['guidance_scale'] == 30.0) & # makes huge difference too!
        (a2a_results_df['guidance_interval'] == 0.9) & # makes huge difference too!
        # (a2a_results_df['guidance_interval'] == 1.0) & # makes huge difference too!
        # (a2a_results_df['prompt'] == 'edm, synth, bass, euphoric and energetic')  &
        # (a2a_results_df['prompt'] == 'lofi beats') &
        # (a2a_results_df['scheduler_type'].isin(['heun', 'pingpong']))
        # ===== use_ergTrue gives more variation which is nice, but maybe shortcuts using CFG???
        (a2a_results_df['use_erg'] == False) & 
        (a2a_results_df['scheduler_type'].isin(['heun']))
        
        
    )
].reset_index(drop=True)


# filtered_df.head(5)
print(f'filtered to {filtered_df.shape[0]} results...')

In [None]:
display(Markdown('# Original Reference Audio'))
display(Audio(original_file))

In [None]:
# indices = a2a_results_df.index.to_list()
indices = filtered_df.index.to_list()

index_pairs = list(combinations(indices, 2))
if len(index_pairs) == 0:
    index_pairs = [[0, 0]]
    
N_PAIRS = 7
print(f"For {len(indices)} remaining tracks post-filtering,")
print(f"there are {len(index_pairs)} possible pairs...")
print(f"Limiting to {N_PAIRS} examples.")

# for _i, (_index_0, _index_1) in enumerate(index_pairs[:N_PAIRS]):
for _i, (_index_0, _index_1) in enumerate(index_pairs[-N_PAIRS:]):
    
    previous_params = filtered_df.iloc[_index_0].replace({float('nan'): None})   
    current_params = filtered_df.iloc[_index_1].replace({float('nan'): None})   

    
    param_differences = compute_difference(
        previous_params=previous_params, 
        current_params=current_params
    )
    params_same = compute_same(
        previous_params=previous_params, 
        current_params=current_params
    )
    
    display_same_vs_different_v2(
        different_df=pd.DataFrame(param_differences).T,
        same_df=pd.DataFrame(params_same).T,
        audio_path_0=previous_params['audio_path'],
        audio_path_1=current_params['audio_path'], 
        index_0=_index_0, index_1=_index_1
    )
    