In [3]:
# Loads in the long files and converts them to 1 sec files
# BUT it also combines the left and right ears!!!!
import os
import torchaudio
import torch

In [4]:
# Loads all the wav files in the old_data_path
# Splits them into file_length second chunks
# Saves the chunks in the new_data_path
# But it also downsamples if necessary
class AudioBreakdown():
    def __init__(self, old_data_path, new_data_path, file_lengths):
        # file_lengths is how long in seconds the files should be
        self.file_lengths = file_lengths

        # old_data_path: where the none 30 second files are stored with their .txt label files
        self.old_path = old_data_path
        # new_data_path: where to store the 30 second files, each with their own copy of the .txt label file
        self.new_path = new_data_path
        # baseline_file_name: the name of the baseline file, needed so we don't split into 30 second chunks
        #self.baseline_file_name = baseline_file_name

    def get_wav_files(self):
        # returns a list of all the wav files in the old_data_path
        temp_list = list()
        for file in os.listdir(self.old_path):
            # Check that file is not baseline file
            #if file != self.baseline_file_name and file.endswith(".wav"):
            if 'BKP' not in file and file.endswith(".wav"):
                # Only get 1 ear
                if 'EARS_1' in file:
                    continue
                temp_list.append(file)
        return temp_list

    def run(self):
       
        for file_name in self.get_wav_files():
            # Combine the two ears into 1 file
            if 'EARS' in file_name:
                file_path1 = os.path.join(self.old_path, file_name)

                # Get the other ear
                file_name2 = file_name.replace('EARS_2', 'EARS_1')
                file_path2 = os.path.join(self.old_path, file_name2)
                print(file_path1, file_path2)

                # Load both audio files
                waveform1, sample_rate1 = torchaudio.load(file_path1)
                waveform2, sample_rate2 = torchaudio.load(file_path2)
                

                # Combine the two waveforms
                waveform = torch.cat((waveform1, waveform2), dim=0)
                sample_rate = sample_rate1

                print(f"\twaveform shape: {waveform.shape}")
                # calculate the number of chunks
                chunk_length = self.file_lengths * sample_rate
                print(f"\tChunk length: {chunk_length}")
                num_chunks = waveform.shape[1] // chunk_length
                #num_chunks = (waveform.shape[1] + chunk_length - 1) // chunk_length
                print(f"\tNumber of chunks: {num_chunks}")

                for i in range(num_chunks):
                    # Get start index
                    start_idx = i * chunk_length
                    # Get end index
                    end_idx = min((i + 1) * chunk_length, waveform.shape[1])

                    # Get the chunk
                    chunk = waveform[:, start_idx:end_idx]

                    # Pad the chunk if it is too short
                    if chunk.shape[1] < chunk_length:
                        pad = torch.zeros((2, chunk_length - chunk.shape[1]))
                        # stereo audio so we need to pad 2 channels
                        chunk = torch.cat((chunk, pad), dim=1)

                    # Save the chunk and a copy of the label file
                    new_file_name = f"{file_name.replace('.wav', '')}_{i}.wav"
                    new_file_path = os.path.join(self.new_path, new_file_name)
                    torchaudio.save(new_file_path, chunk, sample_rate)

            else: 
                
                # Deals with mono files
                print(f"{file_name}")
                
                #label_path = os.path.join(self.old_path, file_name.replace('.wav', '.txt'))
                # Load audio file
                file_path = os.path.join(self.old_path, file_name)
                waveform, sample_rate = torchaudio.load(file_path)
                
                
                print(f"\twaveform shape: {waveform.shape}")
                # calculate the number of 30 second chunks
                chunk_length = self.file_lengths * sample_rate
                print(f"\tChunk length: {chunk_length}")
                num_chunks = waveform.shape[1] // chunk_length
                #num_chunks = (waveform.shape[1] + chunk_length - 1) // chunk_length
                print(f"\tNumber of chunks: {num_chunks}")

                # Slice into chunks
                for i in range(num_chunks):
                    # Get start index
                    start_idx = i * chunk_length
                    # Get end index
                    end_idx = min((i + 1) * chunk_length, waveform.shape[1])

                    # Get the chunk
                    chunk = waveform[:, start_idx:end_idx]

                    # Pad the chunk if it is too short
                    if chunk.shape[1] < chunk_length:
                        pad = torch.zeros((2, chunk_length - chunk.shape[1]))
                        # stereo audio so we need to pad 2 channels
                        chunk = torch.cat((chunk, pad), dim=1)

                    # Save the chunk and a copy of the label file
                    new_file_name = f"{file_name.replace('.wav', '')}_{i}.wav"
                    new_file_path = os.path.join(self.new_path, new_file_name)
                    torchaudio.save(new_file_path, chunk, sample_rate)
        


In [6]:
# origin -> where the 4 hour 45 min files are stored
# output -> where to store the 30 second files
origin = '/workspace/model2_data'
output = '/workspace/extension/1sec_wav_files'
myaudio = AudioBreakdown(old_data_path=origin, new_data_path=output, file_lengths=1)
myaudio.run()

recording_01.wav
	waveform shape: torch.Size([1, 754110000])
	Chunk length: 44100
	Number of chunks: 17100
/workspace/model2_data/90Deg_EARS_2.wav /workspace/model2_data/90Deg_EARS_1.wav
	waveform shape: torch.Size([2, 754110000])
	Chunk length: 44100
	Number of chunks: 17100


In [None]:
waveform, sample_rate = torchaudio.load('/workspace/extension/10sec_wav_files/90Deg_EARS_2_8.wav')

In [None]:
waveform.shape

torch.Size([2, 441000])