In [1]:
import os
import ffmpeg
import concurrent.futures
from tqdm import tqdm

In [2]:
# Convert webm files to wav files
def convert_webm_to_wav(input_file, output_file):
    try:
        stream = ffmpeg.input(input_file)
        stream = ffmpeg.output(stream, output_file)
        ffmpeg.run(stream, capture_stdout=True, capture_stderr=True)
    except ffmpeg.Error as e:
        print(f"Error converting file {input_file}: {e}")

In [3]:
# Process the batch of files
def process_batch(batch, source_dir, target_dir):
    # Parallelize the conversion of the files
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(
                convert_webm_to_wav,
                os.path.join(source_dir, file_name),
                os.path.join(target_dir, os.path.splitext(file_name)[0] + ".wav"),
            )
            for file_name in batch
        ]

        for future in tqdm(
            concurrent.futures.as_completed(futures),
            total=len(futures),
            desc="Processing Batch",
        ):
            future.result()

In [4]:
def create_batches(file_list, batch_size):
    for i in range(0, len(file_list), batch_size):
        yield file_list[i : i + batch_size]

In [5]:
source_directory = "../data/EMNS/raw_webm"
target_directory = "../data/EMNS/raw_wavs"
all_files = [f for f in os.listdir(source_directory) if f.endswith(".webm")]
batch_size = 32

for batch in tqdm(
    create_batches(all_files, batch_size),
    desc="Overall Progress",
    total=len(all_files) // batch_size,
):
    process_batch(batch, source_directory, target_directory)

Overall Progress:   0%|          | 0/36 [00:00<?, ?it/s]

Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 222.60it/s][A
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 671.38it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 885.42it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 577.48it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 574.74it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 787.55it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 921.18it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 891.56it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 644.69it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 1640.72it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 936.26it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 994.43it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 1066.25it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 1261.79it/s]
Processing Batch: 100%|██████████| 32/32 [00:00<00:00, 6