In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
from pydub import AudioSegment, silence
import nltk
from typing import List, Any, Dict
import speechmetrics
from tqdm import tqdm
from mdutils.mdutils import MdUtils
import math
from wordfreq import word_frequency
from PIL import Image, ImageOps
from wordcloud import WordCloud, STOPWORDS
from g2p_en import G2p

from uberduck_ml_dev.text.util import clean_text, text_to_sequence
from uberduck_ml_dev.data.statistics import (
    absolute_metrics,
    count_frequency,
    create_wordcloud,
    get_sample_format,
    pace_character,
    pace_phoneme,
    word_frequencies,
)

In [None]:
def calculate_statistics(
    dataset_path, input_file, img_folder, delimiter, metrics=True, wordcloud=True
):
    n_clips = 0
    sample_rates = {}
    channels = {"mono": 0, "stereo": 0}
    extensions = {}
    sample_formats = {}
    total_lengths = []
    leading_silence_lengths = []
    trailing_silence_lengths = []
    paces_characters = []  # number of characters / seconds in audio clip
    paces_phonemes = []  # number of phonemes / seconds in audio clip
    lookup_results = {
        "RNN": [],
        "CMU": [],
        "non-alphanumeric": [],
        "homograph": [],
    }  # keep track of how arpabet sequences were generated
    mosnet_scores = []
    srmr_scores = []
    word_freqs = []
    all_words = []
    g2p = G2p()
    files_with_error = []

    with open(os.path.join(dataset_path, input_file)) as transcripts:
        for line in tqdm(transcripts):
            try:
                line = line.strip()  # remove trailing newline character
                file, transcription = line.lower().split(delimiter)
                transcription_cleaned = clean_text(transcription, ["english_cleaners"])

                ################ This is temporary #####################
                file = file.replace("npy", "wav")
                ################ This is temporary #####################

                _, file_extension = os.path.splitext(file)
                path_to_file = os.path.join(dataset_path, file)
                file_pydub = AudioSegment.from_wav(path_to_file)
                # Format Metadata
                sr = file_pydub.frame_rate
                if sr in sample_rates.keys():
                    sample_rates[sr] += 1
                else:
                    sample_rates[sr] = 1

                if file_pydub.channels == 1:
                    channels["mono"] += 1
                else:
                    channels["stereo"] += 1

                if file_extension in extensions.keys():
                    extensions[file_extension] += 1
                else:
                    extensions[file_extension] = 1

                fmt = get_sample_format(path_to_file)
                if fmt in sample_formats.keys():
                    sample_formats[fmt] += 1
                else:
                    sample_formats[fmt] = 1

                # lengths
                total_lengths.append(file_pydub.duration_seconds)
                leading_silence_lengths.append(
                    silence.detect_leading_silence(file_pydub)
                )
                trailing_silence_lengths.append(
                    silence.detect_leading_silence(file_pydub.reverse())
                )

                # Paces
                paces_phonemes.append(
                    pace_phoneme(text=transcription_cleaned, audio=path_to_file)
                )
                paces_characters.append(
                    pace_character(text=transcription_cleaned, audio=path_to_file)
                )

                # Quality
                if metrics:
                    scores = absolute_metrics(path_to_file)
                    mosnet_scores.append(scores["mosnet"][0][0])
                    srmr_scores.append(scores["srmr"])

                # Transcription
                word_freqs.extend(word_frequencies(transcription_cleaned))
                transcription_lookups = g2p.check_lookup(transcription_cleaned)
                for k in transcription_lookups:
                    lookup_results[k].extend(transcription_lookups[k])

                all_words.append(transcription_cleaned)

                n_clips += 1
            except Exception as e:
                print(e)
                files_with_error.append(file)

    if n_clips == 0:
        return None

    if wordcloud:
        create_wordcloud(
            " ".join(all_words), os.path.join(dataset_path, img_folder, "wordcloud.png")
        )

    # Length graph
    plt.clf()
    sns.histplot(total_lengths)
    plt.title("Audio length distribution")
    plt.xlabel("Audio length (s)")
    plt.ylabel("Count")
    plt.savefig(os.path.join(dataset_path, img_folder, "lengths.png"))

    # Word Frequencies graph
    plt.clf()
    sns.histplot(word_freqs, bins=10)
    plt.title("Word frequency distribution [0-1]")
    plt.xlabel("Word frequency")
    plt.ylabel("Count")
    plt.savefig(os.path.join(dataset_path, img_folder, "word_frequencies.png"))
    plt.close()

    # Silences graph
    plt.clf()
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    sns.histplot(leading_silence_lengths)
    plt.title("Leading silence distribution")
    plt.xlabel("Leading silence (ms)")
    plt.ylabel("Count")
    plt.subplot(1, 2, 2)
    sns.histplot(trailing_silence_lengths)
    plt.title("Traling silence distribution")
    plt.xlabel("Trailing silence (ms)")
    plt.ylabel("Count")
    plt.savefig(os.path.join(dataset_path, img_folder, "silences.png"))
    plt.close()

    # Metrics graph
    plt.clf()
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    sns.histplot(mosnet_scores)
    plt.title("Mosnet score distribution")
    plt.xlabel("Mosnet score")
    plt.ylabel("Count")
    plt.subplot(1, 2, 2)
    sns.histplot(srmr_scores)
    plt.title("SRMR score distribution")
    plt.xlabel("SRMR score")
    plt.ylabel("Count")
    plt.savefig(os.path.join(dataset_path, img_folder, "metrics.png"))
    plt.close()

    # Paces graph
    plt.clf()
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    sns.histplot(paces_characters)
    plt.title("Pace (chars/s)")
    plt.xlabel("Characters / second")
    plt.ylabel("Count")
    plt.subplot(1, 2, 2)
    sns.histplot(paces_phonemes)
    plt.title("Pace (phonemes/s)")
    plt.xlabel("Phonemes / second")
    plt.ylabel("Count")
    plt.savefig(os.path.join(dataset_path, img_folder, "paces.png"))
    plt.close()

    return {
        "n_clips": n_clips,
        "total_lengths": total_lengths,
        "paces_phonemes": paces_phonemes,
        "paces_characters": paces_characters,
        "mosnet_scores": mosnet_scores,
        "srmr_scores": srmr_scores,
        "sample_rates": sample_rates,
        "channels": channels,
        "extensions": extensions,
        "sample_formats": sample_formats,
        "lookup_results": lookup_results,
        "files_with_error": files_with_error,
    }

In [None]:
def generate_markdown(output_file, dataset_path, img_folder, data):
    mdFile = MdUtils(
        file_name=os.path.join(dataset_path, output_file), title=f"Dataset statistics"
    )

    total_length_mins = sum(data["total_lengths"]) / 60.0
    mdFile.new_header(level=1, title="Overview")
    mdFile.new_line(f"**Number of clips:** {data['n_clips']}")
    mdFile.new_line(
        f"**Total data:** {math.floor(total_length_mins)} minutes {math.ceil(total_length_mins % 1 * 60.0)} seconds"
    )
    mdFile.new_line(
        f"**Mean clip length:** {sum(data['total_lengths'])/data['n_clips']:.2f} seconds"
    )
    mdFile.new_line(
        f"**Mean pace:** {sum(data['paces_phonemes'])/len(data['paces_phonemes']):.2f} \
            phonemes/sec {sum(data['paces_characters'])/len(data['paces_characters']):.2f} chars/sec"
    )
    if len(data["mosnet_scores"]) > 0:
        mdFile.new_line(
            f"**Mean MOSNet:** {sum(data['mosnet_scores'])/len(data['mosnet_scores']):.2f}"
        )
        mdFile.new_line(
            f"**Mean SRMR:** {sum(data['srmr_scores'])/len(data['srmr_scores']):.2f}"
        )

    if len(data["files_with_error"]) > 0:
        mdFile.new_line(f"**Errored Files:** {', '.join(data['files_with_error'])}")

    list_of_strings = ["Sample Rate (Hz)", "Count"]
    for k in data["sample_rates"].keys():
        list_of_strings.extend([str(k), str(data["sample_rates"][k])])
    mdFile.new_table(
        columns=2,
        rows=len(data["sample_rates"].keys()) + 1,
        text=list_of_strings,
        text_align="center",
    )

    list_of_strings = ["Audio Type", "Count"]
    n_rows = 1
    for k in data["channels"].keys():
        if data["channels"][k] > 0:
            n_rows += 1
            list_of_strings.extend([str(k), str(data["channels"][k])])
    mdFile.new_table(columns=2, rows=n_rows, text=list_of_strings, text_align="center")

    list_of_strings = ["Audio Format", "Count"]
    for k in data["extensions"].keys():
        list_of_strings.extend([str(k), str(data["extensions"][k])])
    mdFile.new_table(
        columns=2,
        rows=len(data["extensions"].keys()) + 1,
        text=list_of_strings,
        text_align="center",
    )

    list_of_strings = ["Sample Format", "Count"]
    for k in data["sample_formats"].keys():
        list_of_strings.extend([str(k), str(data["sample_formats"][k])])
    mdFile.new_table(
        columns=2,
        rows=len(data["sample_formats"].keys()) + 1,
        text=list_of_strings,
        text_align="center",
    )

    list_of_strings = ["Arpabet Lookup Type", "Count"]
    for k in data["lookup_results"].keys():
        list_of_strings.extend([str(k), str(len(data["lookup_results"][k]))])
    mdFile.new_table(
        columns=2,
        rows=len(data["lookup_results"].keys()) + 1,
        text=list_of_strings,
        text_align="center",
    )
    mdFile.new_line(
        mdFile.new_inline_image(
            text="Wordcloud", path=os.path.join(img_folder, "wordcloud.png")
        )
    )
    mdFile.new_line(
        mdFile.new_inline_image(
            text="Audio Lengths", path=os.path.join(img_folder, "lengths.png")
        )
    )
    mdFile.new_line(
        mdFile.new_inline_image(
            text="Paces", path=os.path.join(img_folder, "paces.png")
        )
    )
    mdFile.new_line(
        mdFile.new_inline_image(
            text="Silences", path=os.path.join(img_folder, "silences.png")
        )
    )
    if len(data["mosnet_scores"]) > 0:
        mdFile.new_line(
            mdFile.new_inline_image(
                text="Metrics", path=os.path.join(img_folder, "metrics.png")
            )
        )
    mdFile.new_line(
        mdFile.new_inline_image(
            text="Word Frequencies",
            path=os.path.join(img_folder, "word_frequencies.png"),
        )
    )

    rnn_frequency_counts = count_frequency(data["lookup_results"]["RNN"])

    list_of_strings = ["Frequently Missed Words", "Count"]
    n_rows = 0
    for k in rnn_frequency_counts.keys():
        if rnn_frequency_counts[k] > 1:
            n_rows += 1
            list_of_strings.extend([str(k), str(rnn_frequency_counts[k])])
    mdFile.new_table(
        columns=2,
        rows=n_rows + 1,
        text=list_of_strings,
        text_align="center",
    )

    mdFile.new_line(
        f'**Arpabet sequences obtained via g2P RNN:** {", ".join(data["lookup_results"]["RNN"])}'
    )
    mdFile.create_md_file()

In [None]:
def parse_args(args):
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--dataset_path", help="Path to the dataset.", type="str")
    parser.add_argument(
        "-i", "--input_file", help="Path to the transcription file.", type="str"
    )
    parser.add_argument(
        "-o",
        "--output_file",
        help="Markdown file to write statistics to.",
        type="str",
        default="README",
    )
    parser.add_argument(
        "-m",
        "--metrics",
        help="Boolean value to calculate SRMR and MOSNet.",
        default=True,
        type=bool,
    )
    parser.add_argument(
        "--img_folder",
        help="Folder to save plots and images.",
        type="str",
        default="stats",
    )
    parser.add_argument(
        "--delimiter", help="Transcription file delimiter.", type="str", default="|"
    )

    return parser.parse_args(args)

In [None]:
def run(
    dataset_path, input_file, output_file, img_folder, delimiter, metrics, wordcloud
):
    os.makedirs(os.path.join(dataset_path, img_folder), exist_ok=True)
    data = calculate_statistics(
        dataset_path, input_file, img_folder, delimiter, metrics, wordcloud
    )
    if data:
        generate_markdown(output_file, dataset_path, img_folder, data)

In [None]:
import glob

folders = glob.glob("/home/ubuntu/data/uberduck-multispeaker/*/*.txt")

for dataset in folders:
    print(dataset)
    split = dataset.split("/")
    file = split[-1]
    dataset_path = "/".join(split[:-1])

    run(
        dataset_path=dataset_path,
        input_file=file,
        output_file="README.md",
        img_folder="imgs",
        delimiter="|",
        metrics=True,
        wordcloud=True,
    )

/home/ubuntu/data/uberduck-multispeaker/relik-rapping/list.txt


134it [00:00, 2704.05it/s]


/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/1.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/2.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/3.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/4.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/5.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/6.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/7.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/8.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/9.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/10.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/11.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/12.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/13.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/14.wav
/home/ubuntu/data/uberduck-multispeaker/relik-rapping/wavs/15.wav
/home/ubuntu/data/u

56it [00:00, 2852.47it/s]


/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/1.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/2.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/3.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/4.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/5.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/6.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/7.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/8.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/9.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/10.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/11.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/12.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/13.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/14.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/15.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/16.wav
/home/ubuntu/data/uberduck-multispeaker/Brain/wavs/17.wav
/home/ubuntu/data/uberd

13it [00:00, 1902.38it/s]


/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91474304.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91474688.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91475840.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91475968.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91522432.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91572992.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91574912.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91576320.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91579904.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91580032.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91584896.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91585664.wav
/home/ubuntu/data/uberduck-multispeaker/Copy_Protector/wavs/91586560.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/li

58it [00:00, 2802.00it/s]


/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda1.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda2.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda3.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda4.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda5.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda6.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda7.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda8.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda9.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda10.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda11.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda12.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda13.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda14.wav
/home/ubuntu/data/uberduck-multispeaker/CDiZelda/wavs/zelda15.wav
/home/ubuntu/data/u

78it [00:00, 3039.72it/s]


/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi1.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi10.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi11.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi12.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi13.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi14.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi15.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi16.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi17.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi18.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi19.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi2.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi20.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi21.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi22.wav
/home/ubuntu/data/uberduck-multispeaker/Baldi/wavs/baldi2

40it [00:00, 2192.21it/s]


/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/001.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/002.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/003.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/004.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/005.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/006.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/007.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/008.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/009.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/010.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/011.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/012.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/013.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/014.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/015.wav
/home/ubuntu/data/uberduck-multispeaker/Chalmers/wavs/016.wav
/home/ub

75it [00:00, 3108.27it/s]


/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/1.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/2.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/3.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/4.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/5.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/6.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/7.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/8.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/9.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/10.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/11.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/12.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/13.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/14.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/15.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/16.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/17.wav
/home/ubuntu/data/uberduck-multispeaker/Cow/wavs/18.wav
/

161it [00:00, 2862.53it/s]


/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/1.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/2.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/3.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/4.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/5.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/6.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/7.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/8.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/9.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/10.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/11.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/12.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/13.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/14.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/15.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/16.wav
/home/ubuntu/data/uberduck-multispeaker/TATII/wavs/17.wav
/home/ubuntu/data/uberd

KeyboardInterrupt: 

In [None]:
run(
    dataset_path="/home/ubuntu/data/uberduck-multispeaker/Brain",
    input_file="list.txt",
    output_file="STATISTICS.md",
    img_folder="imgs",
    delimiter="|",
    metrics=False,
    wordcloud=False,
)

56it [00:02, 23.17it/s]


<Figure size 432x288 with 0 Axes>

In [None]:
# # export

# try:
#     from nbdev.imports import IN_NOTEBOOK
# except:
#     IN_NOTEBOOK = False

# if __name__ == "__main__" and not IN_NOTEBOOK:
#     args = parse_args(sys.argv[1:])
#     run(
#         args.dataset_path,
#         args.input_file,
#         args.output_file,
#         args.metrics,
#         args.img_folder,
#         args.delimiter,
#     )