In [34]:
import errno
import os

from pydub import AudioSegment

def generate_wav_from_mp3(mp3_filepath, remove_mp3=False):
    """
    Function that generates wav file form mp3
    :rtype: str
    :param remove_mp3:
    :param mp3_filepath:
    """
    sound = AudioSegment.from_mp3(mp3_filepath)
    new_wav_filename = mp3_filepath.with_suffix('.wav')
    sound.export(new_wav_filename, format="wav")
    if remove_mp3:
        try:
            mp3_filepath.unlink()
        except OSError as e:
            print(f'Error at deleting mp3 file: {mp3_filepath} with {e.strerror}')

    return new_wav_filename

def extract_bees_sounds(labfile_path, output_folder):
    """
    Function for generating wav files containing only bees sounds
    :param labfile_path: labfile path object
    """
    with labfile_path.open() as lab_file:
        file_name = lab_file.readline()
        sound_filenames = list(labfile_path.parent.glob(f'**/{file_name.rstrip()}*.[!lab]*'))
        if not sound_filenames:
            # there is no sound file for given filename
            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), file_name)

        wav_sound_filenames = [file for file in sound_filenames if file.suffix == '.wav']
        try:
            wav_filename = generate_wav_from_mp3(sound_filenames[0]) if not wav_sound_filenames and sound_filenames[0].suffix == '.mp3' else wav_sound_filenames[0]
            output_filenames = []
            for idx, line in enumerate(lab_file):
                line_list = line.rstrip().split("\t")
                output_filename = output_folder / f'{wav_filename.stem}-{idx}{wav_filename.suffix}'
                if line_list[-1].rstrip() == 'bee' and not output_filename.exists():
                    new_audio = AudioSegment.from_wav(wav_filename)
                    new_audio = new_audio[float(line_list[0])*1000:float(line_list[1])*1000]
                    new_audio.export(output_filename, format='wav')
                    output_filenames.append(output_filename)
            return output_filenames
        except IndexError:
            print(f'file {sound_filenames[0]} not supported for wav conversion! only mp3 format supported!')
            return []


def prepare_nuhive_data(path):
    """
    Process *.lab files and mp3/wav
    :param path:  sda
    """
    files = list(path.glob('**/nu-hive/*.lab'))
    print(f'got  {len(files)} lab files to process')
    for file_path in files:
        try:
            new_filenames = extract_bees_sounds(Path(file_path), path / 'nu-hive-processed')
            print(f'generated {len(new_filenames)} from {file_path} file!')
        except FileNotFoundError:
            print(f'missing sound file for {file_path}.')


In [35]:
from pathlib import Path

dataset_path = Path('./dataset/')

prepare_nuhive_data(dataset_path)

got  79 lab files to process
generated 0 from dataset\nu-hive\CF001 - Missing Queen - Day -.lab file!
generated 13 from dataset\nu-hive\CF003 - Active - Day - (214).lab file!
generated 14 from dataset\nu-hive\CF003 - Active - Day - (215).lab file!
generated 3 from dataset\nu-hive\CF003 - Active - Day - (216).lab file!
generated 20 from dataset\nu-hive\CF003 - Active - Day - (217).lab file!
generated 7 from dataset\nu-hive\CF003 - Active - Day - (218).lab file!
generated 35 from dataset\nu-hive\CF003 - Active - Day - (219).lab file!
generated 1 from dataset\nu-hive\CF003 - Active - Day - (220).lab file!
generated 2 from dataset\nu-hive\CF003 - Active - Day - (221).lab file!
generated 1 from dataset\nu-hive\CF003 - Active - Day - (222).lab file!
generated 2 from dataset\nu-hive\CF003 - Active - Day - (223).lab file!
generated 3 from dataset\nu-hive\CF003 - Active - Day - (224)(1).lab file!
generated 0 from dataset\nu-hive\CF003 - Active - Day - (224).lab file!
generated 1 from dataset\nu