In [1]:
# This file is used to breakup the 4hour 45min files into 30 second files.
# 30 seconds is used by musicgen but you can change the length to whatever you want.
import os
import torchaudio
import torch

In [2]:
# Loads all the wav files in the old_data_path
# Splits them into file_length second chunks
# Saves the chunks in the new_data_path
# But it also downsamples if necessary
class AudioBreakdown():
    def __init__(self, old_data_path, new_data_path, file_lengths):
        # file_lengths is how long in seconds the files should be
        self.file_lengths = file_lengths

        # old_data_path: where the none 30 second files are stored with their .txt label files
        self.old_path = old_data_path
        # new_data_path: where to store the 30 second files, each with their own copy of the .txt label file
        self.new_path = new_data_path
        # baseline_file_name: the name of the baseline file, needed so we don't split into 30 second chunks
        #self.baseline_file_name = baseline_file_name

    def get_wav_files(self):
        # returns a list of all the wav files in the old_data_path
        temp_list = list()
        for file in os.listdir(self.old_path):
            # Check that file is not baseline file
            #if file != self.baseline_file_name and file.endswith(".wav"):
            if 'BKP' not in file and file.endswith(".wav"):
                temp_list.append(file)
        return temp_list

    def run(self):
        # steps:
        # 1. for all wav files in old_data_path
        for file_name in self.get_wav_files():
            # Only split the Deg files
            # Otherwise its the baseline file
            # if 'Deg' not in file_name:
            #     continue
            print(f"{file_name}")
            file_path = os.path.join(self.old_path, file_name)
            #label_path = os.path.join(self.old_path, file_name.replace('.wav', '.txt'))
            # Load audio file
            waveform, sample_rate = torchaudio.load(file_path)
            
            if sample_rate != 32000:
                print(f"Sample rate is {sample_rate} not 32000")
                waveform = torchaudio.transforms.Resample(sample_rate, 32000)(waveform)
                sample_rate = 32000
                #print(f"Resampled to {sample_rate}")
            assert sample_rate == 32000, f"Sample rate is {sample_rate} not 32000"
            print(f"\twaveform shape: {waveform.shape}")
            # calculate the number of 30 second chunks
            chunk_length = self.file_lengths * sample_rate
            print(f"\tChunk length: {chunk_length}")
            num_chunks = waveform.shape[1] // chunk_length
            #num_chunks = (waveform.shape[1] + chunk_length - 1) // chunk_length
            print(f"\tNumber of chunks: {num_chunks}")

            # Slice into chunks
            for i in range(num_chunks):
                # Get start index
                start_idx = i * chunk_length
                # Get end index
                end_idx = min((i + 1) * chunk_length, waveform.shape[1])

                # Get the chunk
                chunk = waveform[:, start_idx:end_idx]

                # Pad the chunk if it is too short
                if chunk.shape[1] < chunk_length:
                    pad = torch.zeros((2, chunk_length - chunk.shape[1]))
                    # stereo audio so we need to pad 2 channels
                    chunk = torch.cat((chunk, pad), dim=1)

                # Save the chunk and a copy of the label file
                new_file_name = f"{file_name.replace('.wav', '')}_{i}.wav"
                new_file_path = os.path.join(self.new_path, new_file_name)
                torchaudio.save(new_file_path, chunk, sample_rate)
        


In [5]:
# origin -> where the 4 hour 45 min files are stored
# output -> where to store the 30 second files
origin = '/workspace/model2_data'
output = '/workspace/small_model_data3'
myaudio = AudioBreakdown(old_data_path=origin, new_data_path=output, file_lengths=10)
myaudio.run()

recording_01.wav
Sample rate is 44100 not 32000
	waveform shape: torch.Size([1, 547200000])
	Chunk length: 320000
	Number of chunks: 1710
90Deg_EARS_1.wav
Sample rate is 44100 not 32000
	waveform shape: torch.Size([1, 547200000])
	Chunk length: 320000
	Number of chunks: 1710
90Deg_EARS_2.wav
Sample rate is 44100 not 32000
	waveform shape: torch.Size([1, 547200000])
	Chunk length: 320000
	Number of chunks: 1710
