In [1]:
%matplotlib inline

# Split long audio into special duration

This module can use for processing long audio split. You need modify the splice_
duration that you want for how long each audio clip, and modify output directory
you want and input directory you have.


In [2]:
# -*- coding: utf-8 -*-
""" Split long audio into special duration

This module can use for processing long audio split. You need modify the splice_
duration that you want for how long each audio clip, and modify output directory
you want and input directory you have.

################################################################################
# Author: Weikun Han <weikunhan@gmail.com>
# Crate Date: 02/20/2018        
# Update:
# Reference: https://github.com/jhetherly/EnglishSpeechUpsampler
################################################################################
"""

import os
import tqdm
import sox

# Please modify input path  to locate you file
DATASETS_ROOT_DIR = './datasets'
OUTPUT_DIR = os.path.join(DATASETS_ROOT_DIR, 'TEDLIUM_5S')
NOISE_OUTPUT_DIR = os.path.join(DATASETS_ROOT_DIR, 
                               'TEDLIUM_noise_sample_5S')

# Please modify setting for splice duration
splice_duration = 5

# List sub-folder for datasets
input_folder = ['TEDLIUM', 'TEDLIUM_noise_sample']

# Check location to save datasets
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)
if not os.path.exists(NOISE_OUTPUT_DIR):
    os.makedirs(NOISE_OUTPUT_DIR)

print('Will send spliced audio to {}'.format(OUTPUT_DIR))
print('Will send noise spliced audio to {}'.format(NOISE_OUTPUT_DIR))

# Loop over all files within the TEDLIUM directory first and 
# loop over all files within the TEDLIUM_noise_sample directory second
for directory in input_folder:
    input_tmp_path = os.path.join(DATASETS_ROOT_DIR, directory)
    
    if directory is 'TEDLIUM':
        output_tmp_path = OUTPUT_DIR
    elif directory is 'TEDLIUM_noise_sample':
        output_tmp_path = NOISE_OUTPUT_DIR
    
    for filename in os.listdir(input_tmp_path):
        input_filename = os.path.join(input_tmp_path, filename)

        # Check if path is an existing regular file
        if not os.path.isfile(input_filename):
            continue
        filename_base = os.path.splitext(filename)[0]

        # This is the total audio track duration
        duration = sox.file_info.duration(input_filename)

        # Computer number of iterations for splicing
        n_iterations = int(duration / splice_duration)

        # Computer number of digits need for name the new file
        digits_number = len(str(int(duration)))

        # Create number format depends on number digits
        format_number = '{{:0{}d}}'.format(digits_number)

        # Create final file format
        filename_template = '{{}}_{}-{}.wav'.format(format_number, format_number)

        print('On file {}'.format(filename_base))

        for i in tqdm.trange(n_iterations):

            # create trasnformer
            splice = sox.Transformer()
            begin = int(i * splice_duration)
            end = int(begin + splice_duration)
            output_filename = filename_template.format(filename_base,
                                                       begin, 
                                                       end)
            output_filename = os.path.join(output_tmp_path, output_filename)
            splice.trim(begin, end)
            splice.build(input_filename, output_filename)

 23%|██▎       | 39/173 [00:00<00:00, 380.02it/s]

Will send spliced audio to ./datasets/TEDLIUM_5S
Will send noise spliced audio to ./datasets/TEDLIUM_noise_sample_5S
On file AaronHuey_2010X


100%|██████████| 173/173 [00:00<00:00, 385.75it/s]
100%|██████████| 60/60 [00:00<00:00, 386.25it/s]
  0%|          | 0/201 [00:00<?, ?it/s]

On file AbigailWashburn_2012U
On file AaronKoblin_2011


100%|██████████| 201/201 [00:00<00:00, 367.19it/s]
 17%|█▋        | 35/204 [00:00<00:00, 341.66it/s]

On file AbrahamVerghese_2011G


100%|██████████| 204/204 [00:00<00:00, 335.81it/s]
 34%|███▍      | 34/100 [00:00<00:00, 335.71it/s]

On file 911Mothers_2010W


100%|██████████| 100/100 [00:00<00:00, 335.60it/s]
 45%|████▍     | 34/76 [00:00<00:00, 336.08it/s]

On file AaronOConnell_2011


100%|██████████| 76/76 [00:00<00:00, 329.60it/s]
 46%|████▌     | 35/76 [00:00<00:00, 340.68it/s]

On file AaronOConnell_2011_noise_sample


100%|██████████| 76/76 [00:00<00:00, 325.87it/s]
100%|██████████| 60/60 [00:00<00:00, 314.38it/s]
  0%|          | 0/204 [00:00<?, ?it/s]

On file AbigailWashburn_2012U_noise_sample
On file AbrahamVerghese_2011G_noise_sample


100%|██████████| 204/204 [00:00<00:00, 322.85it/s]
 35%|███▌      | 35/100 [00:00<00:00, 337.40it/s]

On file 911Mothers_2010W_noise_sample


100%|██████████| 100/100 [00:00<00:00, 336.28it/s]
 17%|█▋        | 34/201 [00:00<00:00, 329.27it/s]

On file AaronKoblin_2011_noise_sample


100%|██████████| 201/201 [00:00<00:00, 347.98it/s]
 20%|█▉        | 34/173 [00:00<00:00, 337.42it/s]

On file AaronHuey_2010X_noise_sample


100%|██████████| 173/173 [00:00<00:00, 359.91it/s]
