diff --git a/ecosound/_version.py b/ecosound/_version.py index 6561790..d62d967 100644 --- a/ecosound/_version.py +++ b/ecosound/_version.py @@ -1 +1 @@ -__version__ = "0.0.15" +__version__ = "0.0.16" diff --git a/ecosound/core/annotation.py b/ecosound/core/annotation.py index c439d2d..b4cd449 100644 --- a/ecosound/core/annotation.py +++ b/ecosound/core/annotation.py @@ -16,9 +16,14 @@ import sqlite3 from ecosound.core.metadata import DeploymentInfo from ecosound.visualization.grapher_builder import GrapherFactory +from ecosound.core.spectrogram import Spectrogram +from ecosound.core.audiotools import Sound import copy import csv import datetime +import re +import warnings +from tqdm import tqdm class Annotation: @@ -65,6 +70,8 @@ class Annotation: remove_duplicates=False,inherit_metadata=False, filter_deploymentID=True, inplace=False) Filter annotations overalaping with another set of annotations. + update_audio_dir(new_data_dir) + Update path of audio files. get_labels_class() Return all unique class labels. get_labels_subclass() @@ -287,6 +294,18 @@ def check_integrity( frequency_max). Problematic annotations:" + str(freq_check) ) + + # check that there are not uuid duplicates + idx = self.data.duplicated(subset=["uuid"]) + dup_idxs = idx[idx == True].index + for dup_idx in dup_idxs: + self.data.loc[dup_idx, "uuid"] = str(uuid.uuid4()) + if len(dup_idxs) > 0: + if verbose: + print( + len(dup_idxs), + " UUID duplicates were found and regenerated.", + ) if verbose: print("Integrity test succesfull") @@ -1425,6 +1444,236 @@ def filter(self, query_str, inplace=False, **kwargs): out_object.check_integrity() return out_object + def update_audio_dir(self, new_data_dir, verbose=False): + """ + Update path of audio files + + Recursively finds the path of the annotations audio files in the folder + provided in new_data_dir and automatically updates the annotation field + "audio_file_dir". It is useful when the location of the audio data has + moved or if using annotations on a different computer. + + Parameters + ---------- + new_data_dir : str + Path of the parent directory where the audio files are. + verbose : bool + Printprocess logs in command window if set to True. The defaut is + False. + + Returns + ------- + None. + + """ + # list name of all audio files in dataset + dataset_files_list = set( + self.data["audio_file_dir"] + + os.path.sep + + self.data["audio_file_name"] + + self.data["audio_file_extension"] + ) + + # list extension of all audio files in dataset + dataset_ext_list = set( + [os.path.splitext(file)[1] for file in dataset_files_list] + ) + if verbose: + print(len(dataset_files_list), " audio files.") + + # list all audio files in new folder (only for the target file extensions) + new_dir_files_list = [] + for ext in dataset_ext_list: + new_dir_files_list = ( + new_dir_files_list + + ecosound.core.tools.list_files( + new_data_dir, ext, recursive=True + ) + ) + + # go through each file in dataset and try to find in in new data folder + missing_files_list = [] + for file in dataset_files_list: + # if verbose: + # print(file) + res = [ + idx + for idx, new_dir_file in enumerate(new_dir_files_list) + if re.search(os.path.split(file)[1], new_dir_file) + ] + if len(res) == 0: + missing_files_list.append(file) + else: + new_path = os.path.split(new_dir_files_list[res[0]])[0] + self.data.loc[ + self.data["audio_file_name"] + == os.path.splitext(os.path.split(file)[1])[0], + "audio_file_dir", + ] = new_path + + if len(missing_files_list) > 0: + warnings.warn( + str(len(missing_files_list)) + " files could not be found." + ) + if verbose: + print("") + print("List of audio files not found: ") + print("") + for ff in missing_files_list: + print(ff) + else: + if verbose: + print("Audio paths succesfully updated.") + + def export_spectrograms( + self, + out_dir, + time_buffer_sec=1, + spectro_unit="sec", + spetro_nfft=256, + spetro_frame=256, + spetro_inc=5, + freq_min_hz=None, + freq_max_hz=None, + sanpling_rate_hz=None, + filter_order=8, + filter_type="iir", + fig_size=(15, 10), + deployment_subfolders=True, + file_prefix_field=None, + channel=None, + colormap="viridis", + ): + + # define the different class names and create separate folders + if os.path.isdir(out_dir) == False: + os.mkdir(out_dir) + labels = list(set(self.data["label_class"])) + # labels.reverse() + + # initialize spectrogram + Spectro = Spectrogram( + spetro_frame, + "hann", + spetro_nfft, + spetro_inc, + sanpling_rate_hz, + unit=spectro_unit, + ) + + # loop through each class_labels + for label in labels: + # print(label) + current_dir = os.path.join(out_dir, label) + if os.path.isdir(current_dir) == False: + os.mkdir(current_dir) + annot_sp = self.data[self.data["label_class"] == label] + + # loop through is annot for that class label + for idx, annot in tqdm( + annot_sp.iterrows(), + desc=label, + leave=True, + position=0, + miniters=1, + total=len(annot_sp), + colour="green", + ): + F = str(annot.uuid) + ".png" + # create subfolder for each deployment if option selected + if deployment_subfolders: + current_dir2 = os.path.join( + current_dir, str(annot.deployment_ID) + ) + if os.path.isdir(current_dir2) == False: + os.mkdir(current_dir2) + else: + current_dir2 = current_dir + # only if file doesn't exist already + if os.path.isfile(os.path.join(current_dir2, F)) == False: + # print("Processing file", F) + + # Load info from audio file + audio_data = Sound( + os.path.join( + annot["audio_file_dir"], annot["audio_file_name"] + ) + + annot["audio_file_extension"] + ) + + # define start/stop times +/- buffer + t1 = annot.time_min_offset - time_buffer_sec + if t1 <= 0: + t1 = 0 + t2 = annot.time_max_offset + time_buffer_sec + if t2 > audio_data.file_duration_sec: + t2 = audio_data.file_duration_sec + duration = t2 - t1 + + # load audio data + if channel != None: + chan = int(channel) + else: + chan = annot["audio_channel"] - 1 + audio_data.read( + channel=chan, + chunk=[t1, t2], + unit="sec", + detrend=True, + ) + + # decimate + audio_data.decimate(sanpling_rate_hz) + + # normalize + audio_data.normalize() + + # compute spectrogram + _ = Spectro.compute(audio_data, dB=True, use_dask=False) + + # crop if needed + if freq_min_hz != None or freq_max_hz != None: + Spectro.crop( + frequency_min=freq_min_hz, + frequency_max=freq_max_hz, + inplace=True, + ) + + # display/save spectrogram as image file + graph = GrapherFactory( + "SoundPlotter", + title=annot["audio_file_name"], + fig_size=fig_size, + colormap=colormap, + ) + # crop plot if needed + if freq_min_hz != None: + graph.frequency_min = freq_min_hz + if freq_max_hz != None: + graph.frequency_max = freq_max_hz + + graph.add_data(Spectro) + if file_prefix_field: + prefix = annot[file_prefix_field] + if type(prefix) is float: + if prefix < 0: + prefix = "minus-" + str(abs(round(prefix, 2))) + else: + prefix = str(round(prefix, 2)) + full_out_file = os.path.join( + current_dir2, prefix + "_" + F + ) + else: + full_out_file = os.path.join(current_dir2, F) + graph.to_file(full_out_file) + # graph.show() + + # if params["spetro_on_npy"]: + # np.save(os.path.splitext(outfilename)[0] + ".npy", S) + # annot_unique_id += 1 + # else: + # print("file ", F, " already processed.") + def get_labels_class(self): """ Get all the unique class labels of the annotations. diff --git a/ecosound/core/audiotools.py b/ecosound/core/audiotools.py index 933251d..463ed76 100644 --- a/ecosound/core/audiotools.py +++ b/ecosound/core/audiotools.py @@ -14,6 +14,7 @@ import matplotlib.pyplot as plt import numpy as np import scipy.signal as spsig +import scipy import copy import ecosound.core.tools @@ -82,6 +83,12 @@ class Sound: tighten_waveform_window(energy_percentage) Crops the beginning and end times of a waveform in a Sound object based on a percentage of energy. + upsample(resolution_sec) + upsample the waveform to a time resolution of resolution_sec. + decimate(new_sampling_frequency, filter_order=8, filter_type="iir") + Decimate waveform. + normalize() + Normalize max amplitude of waveform to 1. """ @@ -108,8 +115,9 @@ def __init__(self, infile): myfile = sf.SoundFile(infile) self._file_duration_sample = myfile.seek(0, sf.SEEK_END) self._file_sampling_frequency = myfile.samplerate - self._file_duration_sec = self._file_duration_sample / \ - self._file_sampling_frequency + self._file_duration_sec = ( + self._file_duration_sample / self._file_sampling_frequency + ) self._channels = myfile.channels self._channel_selected = [] self._file_dir = os.path.dirname(infile) @@ -125,10 +133,33 @@ def __init__(self, infile): self.detrended = [] myfile.close() else: - raise ValueError("The sound file can't be found. Please verify" - + ' sound file name and path') - - def read(self, channel=0, chunk=[], unit='samp', detrend=False): + raise ValueError( + "The sound file can't be found. Please verify" + + " sound file name and path" + ) + + def detrend(self): + self._waveform = self._waveform - np.mean(self._waveform) + + def write( + self, + outfilename, + subtype="PCM_24", + endian=None, + format=None, + closefd=True, + ): + sf.write( + outfilename, + self.waveform, + int(self.waveform_sampling_frequency), + subtype=subtype, + endian=endian, + format=format, + closefd=closefd, + ) + + def read(self, channel=0, chunk=[], unit="samp", detrend=False): """ Load data from sound file. @@ -173,47 +204,78 @@ def read(self, channel=0, chunk=[], unit='samp', detrend=False): sig, fs = sf.read(self.file_full_path, always_2d=True) self._waveform = sig[:, channel] self._waveform_start_sample = 0 - self._waveform_stop_sample = self.file_duration_sample-1 + self._waveform_stop_sample = self.file_duration_sample - 1 self._waveform_duration_sample = len(self._waveform) - self._waveform_duration_sec = self._waveform_duration_sample/fs + self._waveform_duration_sec = ( + self._waveform_duration_sample / fs + ) else: - if unit not in ('samp','sec'): - raise ValueError('Invalid unit. Should be set to "sec" or' - + '"samp".') + if unit not in ("samp", "sec"): + raise ValueError( + 'Invalid unit. Should be set to "sec" or' + '"samp".' + ) # convert chunk to sampels if needed - if unit in ('sec'): - chunk = np.round(np.dot(chunk,self.waveform_sampling_frequency)) + if unit in ("sec"): + chunk = np.round( + np.dot(chunk, self.waveform_sampling_frequency) + ) if len(chunk) == 2: # only read a section of the file # Validate input values - if (chunk[0] < 0) | (chunk[0] >= self.file_duration_sample): - raise ValueError('Invalid chunk start value. The sample' - + ' value chunk[0] is outside of the' - + ' file limits.') - elif (chunk[1] < 0) | (chunk[1] > self.file_duration_sample): - raise ValueError('Invalid chunk stop value. The sample' - + ' value chunk[1] is outside of the' - + ' file limits.') + if (chunk[0] < 0) | ( + chunk[0] >= self.file_duration_sample + ): + raise ValueError( + "Invalid chunk start value. The sample" + + " value chunk[0] is outside of the" + + " file limits." + ) + elif (chunk[1] < 0) | ( + chunk[1] > self.file_duration_sample + ): + raise ValueError( + "Invalid chunk stop value. The sample" + + " value chunk[1] is outside of the" + + " file limits." + ) elif chunk[1] <= chunk[0]: - raise ValueError('Invalid chunk values. chunk[1] must' - + ' be greater than chunk[0]') + raise ValueError( + "Invalid chunk values. chunk[1] must" + + " be greater than chunk[0]" + ) # read data - sig, fs = sf.read(self.file_full_path, start=int(chunk[0]), - stop=int(chunk[1]), always_2d=True) + sig, fs = sf.read( + self.file_full_path, + start=int(chunk[0]), + stop=int(chunk[1]), + always_2d=True, + ) self._waveform = sig[:, channel] self._waveform_start_sample = chunk[0] self._waveform_stop_sample = chunk[1] self._waveform_duration_sample = len(self._waveform) - self._waveform_duration_sec = self._waveform_duration_sample/fs + self._waveform_duration_sec = ( + self._waveform_duration_sample / fs + ) else: - raise ValueError('Invalid chunk values. The argument chunk' - + ' must be a list of 2 elements.') + raise ValueError( + "Invalid chunk values. The argument chunk" + + " must be a list of 2 elements." + ) self._channel_selected = channel - if detrend: # removes DC offset - self._waveform = self._waveform - np.mean(self._waveform) + if detrend: # removes DC offset + self.detrend() + # self._waveform = self._waveform - np.mean(self._waveform) else: - msg = ''.join(['Channel ', str(channel), ' does not exist (', - str(self._channels), ' channels available).']) + msg = "".join( + [ + "Channel ", + str(channel), + " does not exist (", + str(self._channels), + " channels available).", + ] + ) raise ValueError(msg) def filter(self, filter_type, cutoff_frequencies, order=4, verbose=True): @@ -256,25 +318,35 @@ def filter(self, filter_type, cutoff_frequencies, order=4, verbose=True): if self._filter_applied is False: # check bandpass cuttoff freq and switch to lowpass.highpass if necessary - if (filter_type == 'bandpass') and (min(cutoff_frequencies) <=0): + if (filter_type == "bandpass") and (min(cutoff_frequencies) <= 0): cutoff_frequencies = [max(cutoff_frequencies)] - filter_type = 'lowpass' + filter_type = "lowpass" if verbose: - print('Warning: filter type was changed from "bandpass" to "lowpass".') - if (filter_type == 'bandpass') and (max(cutoff_frequencies) >=self._waveform_sampling_frequency/2): + print( + 'Warning: filter type was changed from "bandpass" to "lowpass".' + ) + if (filter_type == "bandpass") and ( + max(cutoff_frequencies) + >= self._waveform_sampling_frequency / 2 + ): cutoff_frequencies = [min(cutoff_frequencies)] - filter_type = 'highpass' + filter_type = "highpass" if verbose: - print('Warning: filter type was changed from "bandpass" to "highpass".') + print( + 'Warning: filter type was changed from "bandpass" to "highpass".' + ) # Instantiate filter object my_filter = Filter(filter_type, cutoff_frequencies, order) - self._waveform = my_filter.apply(self._waveform, - self._waveform_sampling_frequency) + self._waveform = my_filter.apply( + self._waveform, self._waveform_sampling_frequency + ) self._filter_applied = True self._filter_params = my_filter else: - raise ValueError('This signal has been filtered already. Cannot' - + ' filter twice.') + raise ValueError( + "This signal has been filtered already. Cannot" + + " filter twice." + ) def upsample(self, resolution_sec): """ @@ -290,19 +362,84 @@ def upsample(self, resolution_sec): Returns ------- - None. Updates the waveform of the Sound object. + None. Updates the waveform and sampling frequency of the Sound object. """ self._waveform, self._waveform_sampling_frequency = upsample( self._waveform, - 1/ self._waveform_sampling_frequency, - resolution_sec) + 1 / self._waveform_sampling_frequency, + resolution_sec, + ) + self._waveform_duration_sec = ( + len(self._waveform) / self._waveform_sampling_frequency + ) + self._waveform_duration_sample = ( + self._waveform_duration_sec * self._waveform_sampling_frequency + ) + + def decimate( + self, new_sampling_frequency, filter_order=8, filter_type="iir" + ): + """ + Decimate waveform - def normalize(self, method='amplitude'): - if method == 'amplitude': + Filter and reduce the number of samples in the waveform. + + Parameters + ---------- + new_sampling_frequency : float + Sampling frequency requested, in Hz. + filter_order : int, optional + Order of the low-pass filter to use. The default is 8. + filter_type : str, optional + Type of low-pass filter to use. The default is 'iir'. + + Returns + ------- + None. Updates the waveform and sampling frequency of the Sound object. + + """ + + # downsample to user-defined sampling rate + downsampling_factor = int( + np.round(self.waveform_sampling_frequency / new_sampling_frequency) + ) + + # decimate signal (the cutoff frequency of the filter is 0.8 x new_sampling_frequency) + sig_decimated = scipy.signal.decimate( + self.waveform, + downsampling_factor, + n=filter_order, + ftype=filter_type, + axis=0, + zero_phase=True, + ) + # update object + self._waveform = sig_decimated + self._waveform_sampling_frequency = ( + self.waveform_sampling_frequency / downsampling_factor + ) + self._waveform_duration_sec = ( + len(sig_decimated) / self._waveform_sampling_frequency + ) + self._waveform_duration_sample = ( + self._waveform_duration_sec * self._waveform_sampling_frequency + ) + + def normalize(self, method="amplitude"): + if method == "amplitude": self._waveform = self._waveform / np.max(self._waveform) - def plot(self, unit='sec', newfig=False, label=[],linestyle='-', marker='',color='black', title=''): + def plot( + self, + unit="sec", + newfig=False, + label=[], + linestyle="-", + marker="", + color="black", + title="", + ): """ Plot waveform of the audio signal. @@ -335,35 +472,41 @@ def plot(self, unit='sec', newfig=False, label=[],linestyle='-', marker='',color """ if len(self._waveform) == 0: - raise ValueError('Cannot plot, waveform data enpty. Use Sound.read' - + ' to load the waveform') - if unit == 'sec': - axis_t = np.arange(0, len(self._waveform) - / self._waveform_sampling_frequency, 1 - / self._waveform_sampling_frequency) - xlabel = 'Time (sec)' - elif unit == 'samp': + raise ValueError( + "Cannot plot, waveform data enpty. Use Sound.read" + + " to load the waveform" + ) + if unit == "sec": + axis_t = np.arange( + 0, + len(self._waveform) / self._waveform_sampling_frequency, + 1 / self._waveform_sampling_frequency, + ) + xlabel = "Time (sec)" + elif unit == "samp": axis_t = np.arange(0, len(self._waveform), 1) - xlabel = 'Time (sample)' + xlabel = "Time (sample)" if newfig: plt.figure() - axis_t = axis_t[0:len(self._waveform)] - plt.plot(axis_t, self._waveform, - color=color, - marker = marker, - linestyle = linestyle, - label=label, - ) + axis_t = axis_t[0 : len(self._waveform)] + plt.plot( + axis_t, + self._waveform, + color=color, + marker=marker, + linestyle=linestyle, + label=label, + ) plt.xlabel(xlabel) - plt.ylabel('Amplitude') + plt.ylabel("Amplitude") plt.title(title) - plt.axis([axis_t[0], axis_t[-1], - min(self._waveform), - max(self._waveform)]) + plt.axis( + [axis_t[0], axis_t[-1], min(self._waveform), max(self._waveform)] + ) plt.grid() plt.show() - def select_snippet(self, chunk, unit='samp'): + def select_snippet(self, chunk, unit="samp"): """ Select section of the loaded waveform. @@ -393,31 +536,46 @@ def select_snippet(self, chunk, unit='samp'): """ if len(chunk) != 2: - raise ValueError('Chunk should be a list of with 2 values: ' - + 'chunk=[t1, t2].') - elif unit not in ('samp','sec'): - raise ValueError('Invalid unit. Should be set to "sec" or "samp".') + raise ValueError( + "Chunk should be a list of with 2 values: " + "chunk=[t1, t2]." + ) + elif unit not in ("samp", "sec"): + raise ValueError('Invalid unit. Should be set to "sec" or "samp".') elif chunk[0] >= chunk[1]: - raise ValueError('Chunk[0] should be greater than chunk[1].') + raise ValueError("Chunk[0] should be greater than chunk[1].") - if unit == 'sec': - chunk[0] = int(np.floor(chunk[0] * self.waveform_sampling_frequency)) - chunk[1] = int(np.ceil(chunk[1] * self.waveform_sampling_frequency)) + if unit == "sec": + chunk[0] = int( + np.floor(chunk[0] * self.waveform_sampling_frequency) + ) + chunk[1] = int( + np.ceil(chunk[1] * self.waveform_sampling_frequency) + ) if (chunk[0] < 0) | (chunk[0] > self.file_duration_sample): - raise ValueError('Invalid chunk start value. The start value ' - + 'chunk[0] is outside of file limit.') + raise ValueError( + "Invalid chunk start value. The start value " + + "chunk[0] is outside of file limit." + ) elif (chunk[1] < 0) | (chunk[1] > self.file_duration_sample): - raise ValueError('Invalid chunk stop value. The stop value ' - + 'chunk[1] is outside of file limit.') - + raise ValueError( + "Invalid chunk stop value. The stop value " + + "chunk[1] is outside of file limit." + ) snippet = copy.deepcopy(self) - snippet._waveform = self._waveform[chunk[0]:chunk[1]] - snippet._waveform_stop_sample = snippet._waveform_start_sample + chunk[1] - snippet._waveform_start_sample = snippet._waveform_start_sample + chunk[0] + snippet._waveform = self._waveform[chunk[0] : chunk[1]] + snippet._waveform_stop_sample = ( + snippet._waveform_start_sample + chunk[1] + ) + snippet._waveform_start_sample = ( + snippet._waveform_start_sample + chunk[0] + ) snippet._waveform_duration_sample = len(snippet._waveform) - snippet._waveform_duration_sec = snippet._waveform_duration_sec / snippet._waveform_sampling_frequency + snippet._waveform_duration_sec = ( + snippet._waveform_duration_sec + / snippet._waveform_sampling_frequency + ) return snippet def tighten_waveform_window(self, energy_percentage): @@ -440,7 +598,9 @@ def tighten_waveform_window(self, energy_percentage): -related attributes. """ - chunk = ecosound.core.tools.tighten_signal_limits(self._waveform, energy_percentage) + chunk = ecosound.core.tools.tighten_signal_limits( + self._waveform, energy_percentage + ) snip = self.select_snippet(chunk) self.__dict__.update(snip.__dict__) @@ -486,7 +646,10 @@ def file_dir(self): @property def file_full_path(self): """Return the file_full_path attribute.""" - return os.path.join(self._file_dir, self._file_name) + self._file_extension + return ( + os.path.join(self._file_dir, self._file_name) + + self._file_extension + ) @property def file_extension(self): @@ -592,24 +755,34 @@ def __init__(self, type, cutoff_frequencies, order=4): """ # chech filter type - if (type == 'bandpass') | (type == 'lowpass') | (type == 'highpass') == 0: - raise ValueError('Wrong filter type. Must be "bandpass", "lowpass"' - +', or "highpass".') + if (type == "bandpass") | (type == "lowpass") | ( + type == "highpass" + ) == 0: + raise ValueError( + 'Wrong filter type. Must be "bandpass", "lowpass"' + + ', or "highpass".' + ) # chech freq values - if (type == 'bandpass'): + if type == "bandpass": if len(cutoff_frequencies) != 2: - raise ValueError('The type "bandpass" requires two frepuency ' - + 'values: cutoff_frequencies=[lowcut, ' - + 'highcut].') + raise ValueError( + 'The type "bandpass" requires two frepuency ' + + "values: cutoff_frequencies=[lowcut, " + + "highcut]." + ) elif cutoff_frequencies[0] > cutoff_frequencies[1]: - raise ValueError('The lowcut value should be smaller than the ' - + 'highcut value: cutoff_frequencies=[lowcut,' - + ' highcut].') - elif (type == 'lowpass') | (type == 'highpass'): + raise ValueError( + "The lowcut value should be smaller than the " + + "highcut value: cutoff_frequencies=[lowcut," + + " highcut]." + ) + elif (type == "lowpass") | (type == "highpass"): if len(cutoff_frequencies) != 1: - raise ValueError('The type "lowpass" and "highpass" require ' - + 'one frequency value cutoff_frequencies=' - + '[cutfreq].') + raise ValueError( + 'The type "lowpass" and "highpass" require ' + + "one frequency value cutoff_frequencies=" + + "[cutfreq]." + ) self.type = type self.cutoff_frequencies = cutoff_frequencies self.order = order @@ -631,10 +804,10 @@ def apply(self, waveform, sampling_frequency): Filtered time series. """ - #b, a = self.coefficients(sampling_frequency) - #return spsig.sosfiltfilt (b, a, waveform) + # b, a = self.coefficients(sampling_frequency) + # return spsig.sosfiltfilt (b, a, waveform) sos = self.coefficients(sampling_frequency) - return spsig.sosfiltfilt (sos, waveform) + return spsig.sosfiltfilt(sos, waveform) def coefficients(self, sampling_frequency): """ @@ -654,52 +827,63 @@ def coefficients(self, sampling_frequency): """ nyquist = 0.5 * sampling_frequency - if self.type == 'bandpass': + if self.type == "bandpass": low = self.cutoff_frequencies[0] / nyquist high = self.cutoff_frequencies[1] / nyquist - #b, a = spsig.butter(self.order, [low, high], btype='band') - sos = spsig.butter(self.order, [low, high], btype='band', output='sos') - elif self.type == 'lowpass': + # b, a = spsig.butter(self.order, [low, high], btype='band') + sos = spsig.butter( + self.order, [low, high], btype="band", output="sos" + ) + elif self.type == "lowpass": # b, a = spsig.butter(self.order, # self.cutoff_frequencies[0]/nyquist, 'low') - sos = spsig.butter(self.order, - self.cutoff_frequencies[0]/nyquist, 'low',output='sos') - elif self.type == 'highpass': + sos = spsig.butter( + self.order, + self.cutoff_frequencies[0] / nyquist, + "low", + output="sos", + ) + elif self.type == "highpass": # b, a = spsig.butter(self.order, # self.cutoff_frequencies[0]/nyquist, 'high') - sos = spsig.butter(self.order, - self.cutoff_frequencies[0]/nyquist, 'high',output='sos') + sos = spsig.butter( + self.order, + self.cutoff_frequencies[0] / nyquist, + "high", + output="sos", + ) return sos def upsample(waveform, current_res_sec, new_res_sec): - """ - Upsample waveform - - Increase the number of samples in the waveform and interpolate. + """ + Upsample waveform - Parameters - ---------- - waveform: 1D array - Waveform to upsample - current_res_sec : float - Time resolution of waveform in seconds. It is the inverse of the - sampling frequency. - new_res_sec : float - New time resolution of waveform after interpolation (in seconds). + Increase the number of samples in the waveform and interpolate. - Returns - ------- - waveform: 1D array - waveform upsampled to have a time resolution of "new_res_sec". + Parameters + ---------- + waveform: 1D array + Waveform to upsample + current_res_sec : float + Time resolution of waveform in seconds. It is the inverse of the + sampling frequency. + new_res_sec : float + New time resolution of waveform after interpolation (in seconds). + + Returns + ------- + waveform: 1D array + waveform upsampled to have a time resolution of "new_res_sec". - """ - axis_t = np.arange(0, len(waveform)*current_res_sec, current_res_sec) - new_fs = round(1/new_res_sec) - nb_samp = round(axis_t[-1]*new_fs) - new_waveform, new_axis_t = spsig.resample(waveform, - nb_samp, - t=axis_t, - window='hann', - ) - return new_waveform, new_fs \ No newline at end of file + """ + axis_t = np.arange(0, len(waveform) * current_res_sec, current_res_sec) + new_fs = round(1 / new_res_sec) + nb_samp = round(axis_t[-1] * new_fs) + new_waveform, new_axis_t = spsig.resample( + waveform, + nb_samp, + t=axis_t, + window="hann", + ) + return new_waveform, new_fs diff --git a/ecosound/core/measurement.py b/ecosound/core/measurement.py index 82db36e..1c93359 100644 --- a/ecosound/core/measurement.py +++ b/ecosound/core/measurement.py @@ -10,10 +10,16 @@ import xarray as xr import os -class Measurement(Annotation): - def __init__(self, measurer_name=None, measurer_version=None, measurements_name=None): - """ Measurement object. +class Measurement(Annotation): + def __init__( + self, + measurer_name=None, + measurer_version=None, + measurements_name=None, + measurements_parameters=None, + ): + """Measurement object. Object to "store" sound measurements. Inheritate all methods from the ecosound Annotaion class. @@ -22,31 +28,36 @@ def __init__(self, measurer_name=None, measurer_version=None, measurements_name= ---------- measurer_name : str, optional Name of the measurer that was used to calculate the measurements. - The default is None. + The default is None. measurer_version : str, optional Version of the measurer that was used to calculate the measurements. The default is None. measurements_name : list of str, optional List with the name of each measurement. The default is None. - + measurements_parameters: dict, optional + dict with lists of measurement parameters Returns ------- None. ecosound Measurement object with a .data and .metadata dataframes """ super(Measurement, self).__init__() - metadata = {'measurer_name': measurer_name, - 'measurer_version': measurer_version, - 'measurements_name': [measurements_name], - } + metadata = { + "measurer_name": measurer_name, + "measurer_version": measurer_version, + "measurements_name": [measurements_name], + "measurements_parameters": [measurements_parameters], + } self._metadata = pd.DataFrame(metadata) - self.data = pd.concat([self.data,pd.DataFrame(columns=metadata['measurements_name'][0])]) + self.data = pd.concat( + [self.data, pd.DataFrame(columns=metadata["measurements_name"][0])] + ) @property def metadata(self): """ Return the metadata attribute. - + Includes adictionary with the measurer_name, measurer_version, and measurements_name. """ @@ -69,18 +80,28 @@ def to_netcdf(self, file): None. """ - if file.endswith('.nc') == False: - file = file + '.nc' + if file.endswith(".nc") == False: + file = file + ".nc" self._enforce_dtypes() meas = self.data - meas.set_index('time_min_date', drop=False, inplace=True) - meas.index.name = 'date' + meas.set_index("time_min_date", drop=False, inplace=True) + meas.index.name = "date" dxr1 = meas.to_xarray() - dxr1.attrs['datatype'] = 'Measurement' - dxr1.attrs['measurements_name'] = self.metadata.measurements_name.values[0] - dxr1.attrs['measurer_name'] = self.metadata.measurer_name.values[0] - dxr1.attrs['measurer_version'] = self.metadata.measurer_version.values[0] - dxr1.to_netcdf(file, engine='netcdf4', format='NETCDF4') + dxr1.attrs["datatype"] = "Measurement" + dxr1.attrs[ + "measurements_name" + ] = self.metadata.measurements_name.values[0] + dxr1.attrs["measurer_name"] = self.metadata.measurer_name.values[0] + dxr1.attrs["measurer_version"] = self.metadata.measurer_version.values[ + 0 + ] + try: + dxr1.attrs["measurements_parameters"] = str( + self.metadata.measurements_parameters[0] + ) + except: + pass + dxr1.to_netcdf(file, engine="netcdf4", format="NETCDF4") def from_netcdf(self, file, verbose=False): """ @@ -94,7 +115,7 @@ def from_netcdf(self, file, verbose=False): ---------- file : str Path of the nc file to import. Can be a str if importing a single - file or entire folder. Needs to be a list if importing multiple + file or entire folder. Needs to be a list if importing multiple files. If 'files' is a folder, all files in that folder ending with '.nc' will be imported. verbose : bool, optional @@ -108,58 +129,83 @@ def from_netcdf(self, file, verbose=False): """ if type(file) is str: if os.path.isdir(file): - file = ecosound.core.tools.list_files(file, - '.nc', - recursive=False, - case_sensitive=True, - ) + file = ecosound.core.tools.list_files( + file, + ".nc", + recursive=False, + case_sensitive=True, + ) if verbose: - print(len(file), 'files found.') + print(len(file), "files found.") else: file = [file] self.data, self._metadata = self._import_netcdf_files(file) - self.check_integrity(verbose=verbose) + self.check_integrity(verbose=verbose) def _import_netcdf_files(self, files): """Import one or several netcdf files to a Panda datafrane.""" - assert type(files) in (str, list), "Input must be of type str (single \ + assert type(files) in ( + str, + list, + ), "Input must be of type str (single \ file or directory) or list (multiple files)" # Import all files to a dataframe - tmp =[] + tmp = [] for idx, file in enumerate(files): dxr = xr.open_dataset(file) - if dxr.attrs['datatype'] == 'Measurement': + if dxr.attrs["datatype"] == "Measurement": if idx == 0: measurer_name = dxr.measurer_name - measurer_version = dxr.measurer_version + measurer_version = dxr.measurer_version measurements_name = dxr.measurements_name + try: + measurements_parameters = eval( + dxr.measurements_parameters + ) + except: + measurements_parameters = None ## check measurere name and version - if (dxr.measurer_name == measurer_name) & (dxr.measurer_version == measurer_version): + if (dxr.measurer_name == measurer_name) & ( + dxr.measurer_version == measurer_version + ): tmp2 = dxr.to_dataframe() tmp2.reset_index(inplace=True) else: - raise ValueError(file + "Not all files were not generated from the same measurer type and version.") + raise ValueError( + file + + "Not all files were not generated from the same measurer type and version." + ) else: - raise ValueError(file + 'Not a Measurement file.') + raise ValueError(file + "Not a Measurement file.") tmp.append(tmp2) data = pd.concat(tmp, ignore_index=True, sort=False) data.reset_index(inplace=True, drop=True) - metadata = {'measurer_name': measurer_name, - 'measurer_version': measurer_version, - 'measurements_name': [measurements_name], - } + metadata = { + "measurer_name": measurer_name, + "measurer_version": measurer_version, + "measurements_name": [measurements_name], + "measurements_parameters": [measurements_parameters], + } metadata = pd.DataFrame(metadata) return data, metadata - + def __add__(self, other): """Concatenate data from several Measurement objects.""" - assert type(other) is ecosound.core.measurement.Measurement, "Object type not\ + assert ( + type(other) is ecosound.core.measurement.Measurement + ), "Object type not\ supported. Can only concatenate Measurement objects together." - assert other.metadata['measurer_name'].values[0] == self.metadata['measurer_name'].values[0], "Can't concatenate measurements made from different measurers." - assert other.metadata['measurer_version'].values[0] == self.metadata['measurer_version'].values[0], "Can't concatenate measurements made from different versions of measurers." + assert ( + other.metadata["measurer_name"].values[0] + == self.metadata["measurer_name"].values[0] + ), "Can't concatenate measurements made from different measurers." + assert ( + other.metadata["measurer_version"].values[0] + == self.metadata["measurer_version"].values[0] + ), "Can't concatenate measurements made from different versions of measurers." self._enforce_dtypes() other._enforce_dtypes() - self.data = pd.concat([self.data, other.data], - ignore_index=True, - sort=False) - return self \ No newline at end of file + self.data = pd.concat( + [self.data, other.data], ignore_index=True, sort=False + ) + return self diff --git a/ecosound/core/tools.py b/ecosound/core/tools.py index b1d3100..6cfeaaf 100644 --- a/ecosound/core/tools.py +++ b/ecosound/core/tools.py @@ -16,6 +16,7 @@ import pkg_resources import yaml + def read_json(file): """Load JSON file as dict.""" with open(file, "r") as read_file: @@ -43,17 +44,20 @@ def read_yaml(file): config = yaml.load(yaml_file, Loader=yaml.FullLoader) return config + @ecosound.core.decorators.listinput def filename_to_datetime(files): """Extract date from a list of str of filenames.""" current_dir = os.path.dirname(os.path.realpath(__file__)) - patterns = read_json(os.path.join(current_dir, r'timestamp_formats.json')) + patterns = read_json(os.path.join(current_dir, r"timestamp_formats.json")) - #stream = pkg_resources.resource_stream(__name__, 'core/timestamp_formats.json') - #patterns = read_json(os.path.join(stream) + # stream = pkg_resources.resource_stream(__name__, 'core/timestamp_formats.json') + # patterns = read_json(os.path.join(stream) - regex_string = '|'.join([pattern['string_pattern'] for pattern in patterns]) - time_formats = [pattern['time_format'] for pattern in patterns] + regex_string = "|".join( + [pattern["string_pattern"] for pattern in patterns] + ) + time_formats = [pattern["time_format"] for pattern in patterns] timestamps = [None] * len(files) p = re.compile(regex_string) for idx, file in enumerate(files): @@ -68,10 +72,11 @@ def filename_to_datetime(files): if ok_flag is True: break if ok_flag is False: - raise ValueError('Time format not recognized:' + file) + raise ValueError("Time format not recognized:" + file) return timestamps -#@njit + +# @njit def normalize_vector(vec): """ Normalize amplitude of vector. @@ -80,10 +85,11 @@ def normalize_vector(vec): # normVec = vec/max(vec) # normVec = (normVec - 0.5)*2 vec = vec - np.mean(vec) - normVec = vec/max(vec) + normVec = vec / max(vec) return normVec -#@njit + +# @njit def tighten_signal_limits(signal, energy_percentage): """ Tighten signal limits @@ -93,11 +99,13 @@ def tighten_signal_limits(signal, energy_percentage): """ cumul_energy = np.cumsum(np.square(signal)) - cumul_energy = cumul_energy/max(cumul_energy) - percentage_begining = (1-(energy_percentage/100))/2 + cumul_energy = cumul_energy / max(cumul_energy) + percentage_begining = (1 - (energy_percentage / 100)) / 2 percentage_end = 1 - percentage_begining - chunk = [np.nonzero(cumul_energy > percentage_begining)[0][0], - np.nonzero(cumul_energy > percentage_end)[0][0]] + chunk = [ + np.nonzero(cumul_energy > percentage_begining)[0][0], + np.nonzero(cumul_energy > percentage_end)[0][0], + ] return chunk @@ -107,7 +115,7 @@ def tighten_signal_limits_peak(signal, percentage_max_energy): Redefine start and stop samples to have "energy_percentage" of the original signal. Returns a list with the new start and stop sample indices. - + small values of percentage_max_energy -> tighter signal """ @@ -117,41 +125,44 @@ def tighten_signal_limits_peak(signal, percentage_max_energy): sort_idx = np.argsort(-squared_signal_normalized) sort_val = squared_signal_normalized[sort_idx] sort_val_cum = np.cumsum(sort_val) - id_limit=np.where(sort_val_cum>(percentage_max_energy/100)) - id_limit=id_limit[0][0] + id_limit = np.where(sort_val_cum > (percentage_max_energy / 100)) + id_limit = id_limit[0][0] min_idx_limit = np.min(sort_idx[0:id_limit]) max_idx_limit = np.max(sort_idx[0:id_limit]) chunk = [min_idx_limit, max_idx_limit] - + return chunk -def resample_1D_array(x, y, resolution, kind='linear'): + +def resample_1D_array(x, y, resolution, kind="linear"): """ Interpolate values of coordinates x and y with a given resolution. Default uisn linear interpolation. """ - f = interpolate.interp1d(x, y, kind=kind, fill_value='extrapolate') - xnew = np.arange(x[0], x[-1]+resolution, resolution) + f = interpolate.interp1d(x, y, kind=kind, fill_value="extrapolate") + xnew = np.arange(x[0], x[-1] + resolution, resolution) ynew = f(xnew) return xnew, ynew + @njit def entropy(array_1d, apply_square=False): - """ - Aggregate (SHannon's) entropy as defined in the Raven manual - apply_square = True, suqares the array value before calculation. - """ - if apply_square: - array_1d = np.square(array_1d) - values_sum = np.sum(array_1d) - H = 0 - for value in array_1d: - ratio = (value/values_sum) - if ratio <= 0: - H += 0 - else: - H += ratio*np.log2(ratio) - return H + """ + Aggregate (SHannon's) entropy as defined in the Raven manual + apply_square = True, suqares the array value before calculation. + """ + if apply_square: + array_1d = np.square(array_1d) + values_sum = np.sum(array_1d) + H = 0 + for value in array_1d: + ratio = value / values_sum + if ratio <= 0: + H += 0 + else: + H += ratio * np.log2(ratio) + return H + @njit def derivative_1d(array, order=1): @@ -165,6 +176,7 @@ def derivative_1d(array, order=1): array = np.subtract(array[1:], array[0:-1]) return array + def list_files(indir, suffix, case_sensitive=True, recursive=False): """ List files in folder whose name ends with a given suffix/extension. @@ -199,14 +211,16 @@ def list_files(indir, suffix, case_sensitive=True, recursive=False): file = file.lower() if file.endswith(suffix): files_list.append(os.path.join(root, file)) - #print(os.path.join(root, file)) + # print(os.path.join(root, file)) else: # only scans parent folder for file in os.listdir(indir): if case_sensitive is False: file = file.lower() if file.endswith(suffix): files_list.append(os.path.join(indir, file)) - #print(os.path.join(indir, file)) + # print(os.path.join(indir, file)) + else: + raise Exception("The indir folder given does not exist.") return files_list @@ -232,46 +246,66 @@ def find_peaks(array, troughs=False): """ - x = [0,] - y = [array[0],] - for k in range(1,len(array)-1): + x = [ + 0, + ] + y = [ + array[0], + ] + for k in range(1, len(array) - 1): if troughs: - if (np.sign(array[k]-array[k-1])==-1) and ((np.sign(array[k]-array[k+1]))==-1): + if (np.sign(array[k] - array[k - 1]) == -1) and ( + (np.sign(array[k] - array[k + 1])) == -1 + ): x.append(k) y.append(array[k]) else: - if (np.sign(array[k]-array[k-1])==1) and (np.sign(array[k]-array[k+1])==1): - x.append(k) - y.append(array[k]) + if (np.sign(array[k] - array[k - 1]) == 1) and ( + np.sign(array[k] - array[k + 1]) == 1 + ): + x.append(k) + y.append(array[k]) return x, y -def envelope(array, interp='cubic'): - #initialize output arrays +def envelope(array, interp="cubic"): + # initialize output arrays env_high = np.zeros(array.shape) env_low = np.zeros(array.shape) - #Prepend the first value of (s) to the interpolating values. This forces - #the model to use the same starting point for both the upper and lower - #envelope models. - u_x = [0,] - u_y = [array[0],] - l_x = [0,] - l_y = [array[0],] - #Detect peaks and troughs - l_x, l_y = find_peaks(array,troughs=True) - u_x, u_y = find_peaks(array,troughs=False) - #Append the last value of (s) to the interpolating values. This forces the - #model to use the same ending point for both the upper and lower envelope - #models. - u_x.append(len(array)-1) + # Prepend the first value of (s) to the interpolating values. This forces + # the model to use the same starting point for both the upper and lower + # envelope models. + u_x = [ + 0, + ] + u_y = [ + array[0], + ] + l_x = [ + 0, + ] + l_y = [ + array[0], + ] + # Detect peaks and troughs + l_x, l_y = find_peaks(array, troughs=True) + u_x, u_y = find_peaks(array, troughs=False) + # Append the last value of (s) to the interpolating values. This forces the + # model to use the same ending point for both the upper and lower envelope + # models. + u_x.append(len(array) - 1) u_y.append(array[-1]) - l_x.append(len(array)-1) + l_x.append(len(array) - 1) l_y.append(array[-1]) - #Interpolate between peaks/troughs - u_p = interpolate.interp1d(u_x,u_y, kind = interp,bounds_error = False, fill_value=0.0) - l_p = interpolate.interp1d(l_x,l_y,kind = interp,bounds_error = False, fill_value=0.0) - for k in range(0,len(array)): + # Interpolate between peaks/troughs + u_p = interpolate.interp1d( + u_x, u_y, kind=interp, bounds_error=False, fill_value=0.0 + ) + l_p = interpolate.interp1d( + l_x, l_y, kind=interp, bounds_error=False, fill_value=0.0 + ) + for k in range(0, len(array)): env_high[k] = u_p(k) env_low[k] = l_p(k) return env_high, env_low diff --git a/ecosound/measurements/__init__.py b/ecosound/measurements/__init__.py index 51dfa4c..b88a504 100644 --- a/ecosound/measurements/__init__.py +++ b/ecosound/measurements/__init__.py @@ -1 +1,2 @@ from .spectrogram_features import SpectrogramFeatures +from .snr import SNR diff --git a/ecosound/measurements/snr.py b/ecosound/measurements/snr.py new file mode 100644 index 0000000..038cdbe --- /dev/null +++ b/ecosound/measurements/snr.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- +""" +Created on Fri Feb 7 15:27:39 2020 + +@author: xavier.mouy +""" + +from .measurer_builder import BaseClass +from ecosound.core.annotation import Annotation +from ecosound.core.measurement import Measurement +from ecosound.core.audiotools import Sound +import numpy as np +import pandas as pd +from dask import delayed, compute, visualize +import os + + +class SNR(BaseClass): + """ """ + + measurer_parameters = ("noise_win_sec",) + + def __init__(self, *args, **kwargs): + """ + Initialize the measurer. + + Parameters + ---------- + *args : str + Do not use. Only used by the MeasurerFactory. + noise_win_sec : float, optional + Duration of window to use on either side of the signal to estimate + noise, in seconds. + + Returns + ------- + None. Measurer object. + + """ + # Initialize all measurer parameters to None + self.__dict__.update( + dict( + zip( + self.measurer_parameters, + [None] * len(self.measurer_parameters), + ) + ) + ) + + # Unpack kwargs as measurer parameters if provided on instantiation + self.__dict__.update(**kwargs) + + @property + def name(self): + """Return name of the measurer.""" + measurer_name = "SNR" + return measurer_name + + @property + def version(self): + """Return version of the measurer.""" + version = "0.1" + return version + + def _prerun_check(self, annotations): + """Run several verifications before the run.""" + # check that all required arguments are defined + if True in [ + self.__dict__.get(keys) is None + for keys in self.measurer_parameters + ]: + raise ValueError( + "Not all measurer parameters have been defined." + + " Required parameters: " + + str(self.measurer_parameters) + ) + # check that annotations is an Annotation class + if not isinstance(annotations, Annotation): + raise ValueError( + "Input must be an ecosound Annotation object" + + "(ecosound.core.annotation)." + ) + + def compute(self, annotations, debug=False, verbose=False, use_dask=False): + """Compute signal-to-noise-ratio of annotations. + + Goes through each annotation and computes the SNR by estinating the + power of the noise before and after the annotation. Measurements are + performed on the band-pass filtered waveform. + + Parameters + ---------- + annotations : ecosound Annotation object + Annotations of the sounds to measure. Can be from manual analysis + or from an automatic detector. + use_dask : bool, optional + If True, run the measurer in parallele using Dask. The default is + False. + debug : bool, optional + Displays figures for each annotation with the spectrogram, spectral + and time envelopes, and tables with all associated measurements. + The default is False. + verbose : bool, optional + Prints in the console the annotation being processed. The default + is False. + + Returns + ------- + measurements : ecosound Measurement object + Measurement object containing the measurements appended to the + original annotation fields. Measurements are in the .data data + frame. Metadata with mearurer name, version and measurements names + are in the .metadata datafreame. + + """ + self._prerun_check(annotations) + + # init + features = self._init_dataframe() + features_name = list(features.columns) + # loop through each annotation + df_list = [] + for index, annot in annotations.data.iterrows(): + if verbose: + print( + "processing annotation ", + index, + annot["time_min_offset"], + "-", + annot["time_max_offset"], + ) + + # feature for 1 annot + if use_dask: + df = delayed(self.compute_single_annot)(annot, debug) + else: + df = self.compute_single_annot(annot, debug) + # stack features for each annotation + df_list.append(df) + if use_dask: + features = delayed(pd.concat)(df_list, ignore_index=False) + # features.visualize('measuremnets') + features = features.compute() + else: + features = pd.concat(df_list, ignore_index=False) + # merge with annotation fields + annotations.data.set_index("uuid", inplace=True, drop=False) + features.set_index("uuid", inplace=True, drop=True) + meas = pd.concat([annotations.data, features], axis=1, join="inner") + meas.reset_index(drop=True, inplace=True) + + params_dict = dict() + for param in self.measurer_parameters: + params_dict[param] = eval("self." + param) + + # create Measurement object + measurements = Measurement( + measurer_name=self.name, + measurer_version=self.version, + measurements_name=features_name, + measurements_parameters=params_dict, + ) + measurements.data = meas + return measurements + + def _init_dataframe(self): + tmp = pd.DataFrame( + { + "snr": [], + } + ) + return tmp + + def compute_single_annot(self, annot, debug): + # load sound file properties + sound = Sound( + os.path.join(annot["audio_file_dir"], annot["audio_file_name"]) + + annot["audio_file_extension"] + ) + + # define left noise window + noise_left_start = annot["time_min_offset"] - self.noise_win_sec + if noise_left_start < 0: + noise_left_end = annot["time_min_offset"] + noise_left_start = 0 + else: + noise_left_end = annot["time_min_offset"] + + # define right noise window + noise_right_start = annot["time_max_offset"] + noise_right_end = noise_right_start + self.noise_win_sec + if noise_right_end > sound.file_duration_sec: + noise_right_end = sound.file_duration_sec + + # load sound data chunk + sound.read(chunk=[noise_left_start, noise_right_end], unit="sec") + # bandpass filter + sound.filter( + "bandpass", + cutoff_frequencies=[ + annot["frequency_min"], + annot["frequency_max"], + ], + order=4, + verbose=False, + ) + sound.normalize() + + # calculate energies + times_samp = np.round( + np.dot( + [ + noise_left_start, + noise_left_end, + noise_right_start, + noise_right_end, + ], + sound.waveform_sampling_frequency, + ) + ) + times_samp = times_samp - times_samp[0] + noise_left = sound.waveform[int(times_samp[0]) : int(times_samp[1])] + sig = sound.waveform[int(times_samp[1]) : int(times_samp[2])] + noise_right = sound.waveform[int(times_samp[2]) : int(times_samp[3])] + noise_pw = (sum(noise_left**2) + sum(noise_right**2)) / ( + len(noise_left) + len(noise_right) + ) + sig_pw = sum(sig**2) / len(sig) + # try: + snr = 10 * np.log10(sig_pw / noise_pw) + # except: + # print("Error") + # snr = np.nan + + if debug: + sound.plot(newfig=True, title=str(round(snr, 1))) + + # stack all features + tmp = pd.DataFrame( + { + "uuid": [annot["uuid"]], + "snr": [snr], + } + ) + return tmp diff --git a/ecosound/visualization/sound_plotter.py b/ecosound/visualization/sound_plotter.py index 563019d..4ee308f 100644 --- a/ecosound/visualization/sound_plotter.py +++ b/ecosound/visualization/sound_plotter.py @@ -8,6 +8,7 @@ from .grapher_builder import BaseClass from ecosound.core.audiotools import Sound from ecosound.core.spectrogram import Spectrogram + try: from ecosound.core.annotation import Annotation except ImportError: @@ -73,17 +74,18 @@ class SoundPlotter(BaseClass): Save graph to file. """ - grapher_parameters = ('frequency_min', - 'frequency_max', - 'time_min', - 'time_max', - 'unit', - 'fig_size', - 'share_xaxis', - 'grid', - 'title', - 'colormap', - ) + grapher_parameters = ( + "frequency_min", + "frequency_max", + "time_min", + "time_max", + "unit", + "fig_size", + "share_xaxis", + "grid", + "title", + "colormap", + ) def __init__(self, *args, **kwargs): """ @@ -134,19 +136,25 @@ def __init__(self, *args, **kwargs): """ # Initialize all grapher parameters to None - self.__dict__.update(dict(zip(self.grapher_parameters, - [None]*len(self.grapher_parameters)))) + self.__dict__.update( + dict( + zip( + self.grapher_parameters, + [None] * len(self.grapher_parameters), + ) + ) + ) # Define default values: self.frequency_min = 0 self.frequency_max = None self.time_min = 0 self.time_max = None - self.unit = 'sec' + self.unit = "sec" self.fig_size = (16, 4) self.share_xaxis = True self.grid = True self.title = None - self.colormap = 'jet' + self.colormap = "jet" # Unpack kwargs as grapher parameters if provided on instantiation self.__dict__.update(**kwargs) # Initialize containers @@ -156,13 +164,13 @@ def __init__(self, *args, **kwargs): @property def name(self): """Return name of the grapher.""" - grapher_name = 'SoundPlotter' + grapher_name = "SoundPlotter" return grapher_name @property def version(self): """Return version of the grapher.""" - version = '0.1' + version = "0.1" return version def add_data(self, *args, time_offset_sec=0): @@ -193,12 +201,19 @@ def add_data(self, *args, time_offset_sec=0): """ if len(args) < 1: - raise ValueError('There must be at least one input argument') + raise ValueError("There must be at least one input argument") # Check type of each input arguments self._stack_data(args, time_offset_sec=time_offset_sec) - - def add_annotation(self, annotation, panel=None, label=False, color='red', tag=False, line_width=1): + def add_annotation( + self, + annotation, + panel=None, + label=False, + color="red", + tag=False, + line_width=1, + ): """ Define annotations to display. @@ -226,7 +241,7 @@ def add_annotation(self, annotation, panel=None, label=False, color='red', tag=F tag : bool, optional If set to True, displays the classification confidence over each annotation box. The default is False. - line_width : int, optional + line_width : int, optional Width of the annotation line. The default is 1. Raises @@ -240,17 +255,23 @@ def add_annotation(self, annotation, panel=None, label=False, color='red', tag=F """ from ecosound.core.annotation import Annotation + if isinstance(annotation, Annotation): - self.annotations.append({'data': annotation, - 'panel': panel, - 'label': label, - 'color': color, - 'tag': tag, - 'line_width': line_width, - }) + self.annotations.append( + { + "data": annotation, + "panel": panel, + "label": label, + "color": color, + "tag": tag, + "line_width": line_width, + } + ) else: - raise ValueError('Type of input argument not recognized.' - 'Accepted object type: Annotation') + raise ValueError( + "Type of input argument not recognized." + "Accepted object type: Annotation" + ) def show(self, display=True, is_in_notebook=False): """ @@ -286,22 +307,26 @@ def show(self, display=True, is_in_notebook=False): """ if len(self.data) == 0: - raise ValueError('No data to plot. Use method .add_data to define' - ' the data to plot') + raise ValueError( + "No data to plot. Use method .add_data to define" + " the data to plot" + ) # Display plot on screen? if display: - matplotlib.use('Qt5Agg') + matplotlib.use("Qt5Agg") else: - matplotlib.use('Agg') + matplotlib.use("Agg") # Define new figure and subplots nb_plots = len(self.data) - fig, ax = plt.subplots(nb_plots, 1, - figsize=self.fig_size, - sharex=self.share_xaxis, - constrained_layout=True, - ) # gridspec_kw={'hspace': self.hspace} + fig, ax = plt.subplots( + nb_plots, + 1, + figsize=self.fig_size, + sharex=self.share_xaxis, + constrained_layout=True, + ) # gridspec_kw={'hspace': self.hspace} # Subplot titles - titles = [None]*nb_plots + titles = [None] * nb_plots if self.title is None: # no titles pass if type(self.title) is str: @@ -310,74 +335,93 @@ def show(self, display=True, is_in_notebook=False): if len(self.title) > nb_plots: raise ValueError("More titles than subplots") else: - titles[0:len(self.title)-1] = self.title + titles[0 : len(self.title) - 1] = self.title # Plot data for idx, data in enumerate(self.data): if nb_plots == 1: current_ax = ax else: current_ax = ax[idx] - if data['type'] == 'waveform': - self._plot_waveform(data['data'], current_ax, time_offset_sec=data['time_offset_sec'], title=titles[idx]) - elif data['type'] == 'spectrogram': - self._plot_spectrogram(data['data'], current_ax, time_offset_sec=data['time_offset_sec'], title=titles[idx]) + if data["type"] == "waveform": + self._plot_waveform( + data["data"], + current_ax, + time_offset_sec=data["time_offset_sec"], + title=titles[idx], + ) + elif data["type"] == "spectrogram": + self._plot_spectrogram( + data["data"], + current_ax, + time_offset_sec=data["time_offset_sec"], + title=titles[idx], + ) # only dipslay x label of bottom plot if shared axes - if self.share_xaxis and (idx != nb_plots-1): - current_ax.set_xlabel('') + if self.share_xaxis and (idx != nb_plots - 1): + current_ax.set_xlabel("") # Plot annotations - for idx_annot, annot in enumerate(self.annotations): # for each set of annotations + for idx_annot, annot in enumerate( + self.annotations + ): # for each set of annotations # display annotations on all panels - if annot['panel'] is None: - annot['panel'] = range(0, nb_plots) + if annot["panel"] is None: + annot["panel"] = range(0, nb_plots) # Make panel idx a list if not already - if (type(annot['panel']) is float) or (type(annot['panel']) is int): - annot['panel'] = [annot['panel']] + if (type(annot["panel"]) is float) or ( + type(annot["panel"]) is int + ): + annot["panel"] = [annot["panel"]] # Check panel indices - if max(annot['panel']) > nb_plots-1: + if max(annot["panel"]) > nb_plots - 1: raise ValueError("Invalid panel index") # PLot annotations on appropriate panels - for idx_panel in annot['panel']: # for each panel - if len(self.data)==1: + for idx_panel in annot["panel"]: # for each panel + if len(self.data) == 1: current_ax = ax else: current_ax = ax[idx_panel] - self._plot_annotations(annot['data'], current_ax, - panel_idx=idx_panel, - label=annot['label'], - color=annot['color'], - line_width = annot['line_width'], - ) - if annot['label'] is not False: + self._plot_annotations( + annot["data"], + current_ax, + panel_idx=idx_panel, + label=annot["label"], + color=annot["color"], + line_width=annot["line_width"], + ) + if annot["label"] is not False: handles, labels = current_ax.get_legend_handles_labels() - unique_labels=list(set(labels)) - new_handles=[] + unique_labels = list(set(labels)) + new_handles = [] for l in unique_labels: new_handles.append(handles[labels.index(l)]) - current_ax.legend(new_handles,unique_labels,loc='upper right') - - if annot['tag'] is True: - bbox_props = dict(boxstyle="square", fc="w", ec="w", alpha=0.8) - panel_type = self.data[annot['panel'][0]]['type'] - for index, row in annot['data'].data.iterrows(): - if self.unit == 'sec': - #height = row['frequency_max']-row['frequency_min'] - x = row['time_min_offset'] - if panel_type == 'spectrogram': - y = row['frequency_max'] - elif panel_type == 'waveform': + current_ax.legend( + new_handles, unique_labels, loc="upper right" + ) + + if annot["tag"] is True: + bbox_props = dict( + boxstyle="square", fc="w", ec="w", alpha=0.8 + ) + panel_type = self.data[annot["panel"][0]]["type"] + for index, row in annot["data"].data.iterrows(): + if self.unit == "sec": + # height = row['frequency_max']-row['frequency_min'] + x = row["time_min_offset"] + if panel_type == "spectrogram": + y = row["frequency_max"] + elif panel_type == "waveform": y = max(current_ax.get_ylim()) - conf = str(round(row['confidence'],2)) - elif self.unit == 'samp': - x = row['time_min_offset'] - if panel_type == 'spectrogram': - y = row['frequency_max'] - elif panel_type == 'waveform': + conf = str(round(row["confidence"], 2)) + elif self.unit == "samp": + x = row["time_min_offset"] + if panel_type == "spectrogram": + y = row["frequency_max"] + elif panel_type == "waveform": y = max(current_ax.get_ylim()) - conf = str(round(row['confidence'],2)) + conf = str(round(row["confidence"], 2)) current_ax.text(x, y, conf, size=8, bbox=bbox_props) - return fig, ax def to_file(self, filename): @@ -395,102 +439,139 @@ def to_file(self, filename): """ fig, _ = self.show(display=False) - fig.savefig(filename, transparent=False, bbox_inches='tight',) - - def _plot_annotations(self, annot, ax, label, panel_idx, color, line_width): + fig.savefig( + filename, + transparent=False, + bbox_inches="tight", + ) + plt.close(fig) + + def _plot_annotations( + self, annot, ax, label, panel_idx, color, line_width + ): """Plot annotations on top of the waveform or spectrogram axes.""" - panel_type = self.data[panel_idx]['type'] + panel_type = self.data[panel_idx]["type"] for index, row in annot.data.iterrows(): # plot annotations on spectrograms - if panel_type == 'spectrogram': + if panel_type == "spectrogram": alpha = 1 - facecolor = 'none' - if self.unit == 'sec': - x = row['time_min_offset'] - y = row['frequency_min'] - width = row['duration'] - height = row['frequency_max']-row['frequency_min'] - elif self.unit == 'samp': - time_resolution = self.data[panel_idx]['data'].time_resolution - x = round(row['time_min_offset']/time_resolution) - y = row['frequency_min'] - width = round(row['duration']/time_resolution) - height = row['frequency_max']-row['frequency_min'] - elif panel_type == 'waveform': + facecolor = "none" + if self.unit == "sec": + x = row["time_min_offset"] + y = row["frequency_min"] + width = row["duration"] + height = row["frequency_max"] - row["frequency_min"] + elif self.unit == "samp": + time_resolution = self.data[panel_idx][ + "data" + ].time_resolution + x = round(row["time_min_offset"] / time_resolution) + y = row["frequency_min"] + width = round(row["duration"] / time_resolution) + height = row["frequency_max"] - row["frequency_min"] + elif panel_type == "waveform": alpha = 0.2 facecolor = color - if self.unit == 'sec': - x = row['time_min_offset'] + if self.unit == "sec": + x = row["time_min_offset"] y = min(ax.get_ylim()) - width = row['duration'] + width = row["duration"] height = max(ax.get_ylim()) - min(ax.get_ylim()) - elif self.unit == 'samp': - time_resolution = self.data[panel_idx]['data'].waveform_sampling_frequency - x = round(row['time_min_offset']*time_resolution) + elif self.unit == "samp": + time_resolution = self.data[panel_idx][ + "data" + ].waveform_sampling_frequency + x = round(row["time_min_offset"] * time_resolution) y = min(ax.get_ylim()) - width = round(row['duration']*time_resolution) + width = round(row["duration"] * time_resolution) height = max(ax.get_ylim()) - min(ax.get_ylim()) - rect = plt.Rectangle((x, y), width, height, - linewidth=line_width, - edgecolor=color, - facecolor=facecolor, - alpha=alpha, - label=label) + rect = plt.Rectangle( + (x, y), + width, + height, + linewidth=line_width, + edgecolor=color, + facecolor=facecolor, + alpha=alpha, + label=label, + ) ax.add_patch(rect) def _stack_data(self, args, time_offset_sec=0): """Stack data to be plotted.""" for idx, arg in enumerate(args): if isinstance(arg, Sound): - self.data.append({'data': arg, 'type': 'waveform', 'time_offset_sec': time_offset_sec}) + self.data.append( + { + "data": arg, + "type": "waveform", + "time_offset_sec": time_offset_sec, + } + ) elif isinstance(arg, Spectrogram): - self.data.append({'data': arg, 'type': 'spectrogram', 'time_offset_sec': time_offset_sec}) + self.data.append( + { + "data": arg, + "type": "spectrogram", + "time_offset_sec": time_offset_sec, + } + ) else: - raise ValueError('Type of input argument not recognized.' - 'Accepted object types: Spectrogram, Sound') - - def _plot_spectrogram(self, spectro, current_ax, time_offset_sec=0, title=None): + raise ValueError( + "Type of input argument not recognized." + "Accepted object types: Spectrogram, Sound" + ) + + def _plot_spectrogram( + self, spectro, current_ax, time_offset_sec=0, title=None + ): """Plot spectrogram on the current axis""" if self.frequency_max is None: - self.frequency_max = spectro.sampling_frequency/2 + self.frequency_max = spectro.sampling_frequency / 2 assert len(spectro.spectrogram) > 0, "Spectrogram not computed yet. " "Use the .compute() method first." # add time offset if defined spectro._axis_times = spectro.axis_times + time_offset_sec - if self.unit == 'sec': + if self.unit == "sec": if self.time_max is None: self.time_max = spectro.axis_times[-1] - current_ax.pcolormesh(spectro.axis_times, - spectro.axis_frequencies, - spectro.spectrogram, - cmap=self.colormap, - vmin=np.percentile(spectro.spectrogram, 50), - vmax=np.percentile(spectro.spectrogram, 99.9), - shading='nearest', - ) - xlabel = 'Time (sec)' - elif self.unit == 'samp': + current_ax.pcolormesh( + spectro.axis_times, + spectro.axis_frequencies, + spectro.spectrogram, + cmap=self.colormap, + vmin=np.percentile(spectro.spectrogram, 50), + vmax=np.percentile(spectro.spectrogram, 99.9), + shading="nearest", + ) + xlabel = "Time (sec)" + elif self.unit == "samp": axis_t = np.arange(0, len(spectro.axis_times), 1) - axis_t = axis_t + round(time_offset_sec/spectro.time_resolution) + axis_t = axis_t + round(time_offset_sec / spectro.time_resolution) if self.time_max is None: self.time_max = axis_t[-1] - current_ax.pcolormesh(axis_t, - spectro.axis_frequencies, - spectro.spectrogram, - cmap=self.colormap, - vmin=np.percentile(spectro.spectrogram, 50), - vmax=np.percentile(spectro.spectrogram, 99.9) - ) - xlabel = 'Time (bin)' + current_ax.pcolormesh( + axis_t, + spectro.axis_frequencies, + spectro.spectrogram, + cmap=self.colormap, + vmin=np.percentile(spectro.spectrogram, 50), + vmax=np.percentile(spectro.spectrogram, 99.9), + ) + xlabel = "Time (bin)" else: - raise ValueError("Keyword 'unit' must be set to either 'sec' or" - " 'samp'.") - current_ax.axis([self.time_min, - self.time_max, - self.frequency_min, - self.frequency_max] - ) - current_ax.set_ylabel('Frequency (Hz)') + raise ValueError( + "Keyword 'unit' must be set to either 'sec' or" " 'samp'." + ) + current_ax.axis( + [ + self.time_min, + self.time_max, + self.frequency_min, + self.frequency_max, + ] + ) + current_ax.set_ylabel("Frequency (Hz)") current_ax.set_xlabel(xlabel) current_ax.set_title(title) # if self.grid: @@ -500,37 +581,50 @@ def _plot_spectrogram(self, spectro, current_ax, time_offset_sec=0, title=None): def _plot_waveform(self, sound, current_ax, time_offset_sec=0, title=None): """Plot waveform of a sound object on the current axis.""" if len(sound._waveform) == 0: - raise ValueError('Cannot plot, waveform data enpty. Use Sound.read' - + ' to load the waveform') - if self.unit == 'sec': - axis_t = np.arange(0, sound.waveform_duration_sample - / sound.waveform_sampling_frequency, 1 - / sound.waveform_sampling_frequency) + raise ValueError( + "Cannot plot, waveform data enpty. Use Sound.read" + + " to load the waveform" + ) + if self.unit == "sec": + axis_t = np.arange( + 0, + sound.waveform_duration_sample + / sound.waveform_sampling_frequency, + 1 / sound.waveform_sampling_frequency, + ) axis_t = axis_t + time_offset_sec - xlabel = 'Time (sec)' - elif self.unit == 'samp': + xlabel = "Time (sec)" + elif self.unit == "samp": axis_t = np.arange(0, len(sound._waveform), 1) - axis_t = axis_t + (time_offset_sec*sound.waveform_sampling_frequency) - xlabel = 'Time (sample)' + axis_t = axis_t + ( + time_offset_sec * sound.waveform_sampling_frequency + ) + xlabel = "Time (sample)" else: - raise ValueError("Keyword 'unit' must be set to either 'sec' or" - " 'samp'.") + raise ValueError( + "Keyword 'unit' must be set to either 'sec' or" " 'samp'." + ) if self.time_max is None: - self.time_max = axis_t[-1] - #axis_t = axis_t[0:len(sound._waveform)] - current_ax.plot(axis_t[0:len(sound._waveform)], sound._waveform, color='black') + self.time_max = axis_t[-1] + # axis_t = axis_t[0:len(sound._waveform)] + current_ax.plot( + axis_t[0 : len(sound._waveform)], sound._waveform, color="black" + ) current_ax.set_xlabel(xlabel) - current_ax.set_ylabel('Amplitude') + current_ax.set_ylabel("Amplitude") current_ax.set_title(title) - current_ax.axis([self.time_min, - self.time_max, - min(sound._waveform), - max(sound._waveform)] - # current_ax.axis([axis_t[0], - # axis_t[-1], - # min(sound._waveform), - # max(sound._waveform)] - ) + current_ax.axis( + [ + self.time_min, + self.time_max, + min(sound._waveform), + max(sound._waveform), + ] + # current_ax.axis([axis_t[0], + # axis_t[-1], + # min(sound._waveform), + # max(sound._waveform)] + ) if self.grid: - current_ax.grid() \ No newline at end of file + current_ax.grid() diff --git a/requirements.txt b/requirements.txt index 87b11a7..2326336 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ -dask -xarray +xarray[complete] +dask[complete] pandas numba -PySoundFile dask_image matplotlib ipympl @@ -10,7 +9,7 @@ scipy numpy scikit_learn toolz -opencv-python==4.5.5.64 -soundfile +opencv-python>=4.5.5.64 +soundfile>=0.10.0 netCDF4 tqdm diff --git a/tests/old_tests/atest_snr.py b/tests/old_tests/atest_snr.py new file mode 100644 index 0000000..ef55a6e --- /dev/null +++ b/tests/old_tests/atest_snr.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Nov 8 10:55:51 2022 + +@author: xavier.mouy +""" + +from ecosound.core.annotation import Annotation +from ecosound.core.measurement import Measurement +from ecosound.measurements.measurer_builder import MeasurerFactory +from ecosound.core.audiotools import Sound +import os +import numpy as np + +annot_file = r"C:\Users\xavier.mouy\Documents\GitHub\fish_detector_bc\Master_annotations_dataset_20221028_without_06-MILL-FS.nc" +annot_file2 = r"C:\Users\xavier.mouy\Documents\GitHub\fish_detector_bc\Master_annotations_dataset_20221028_without_06-MILL-FS_withSNR.nc" +noise_win_sec = 0.25 + +# load annotations +dataset = Annotation() +dataset.from_netcdf(annot_file) +# dataset.filter('label_class=="FS"', inplace=True) +# dataset.data = dataset.data.iloc[:100] + +# Meausrement +snr_measurer = MeasurerFactory("SNR", noise_win_sec=noise_win_sec) +measurements_snr = snr_measurer.compute(dataset, verbose=True) +measurements_snr.to_netcdf(annot_file2) +print("done") diff --git a/tests/old_tests/create_dataset_spectrograms.py b/tests/old_tests/create_dataset_spectrograms.py new file mode 100644 index 0000000..8a935c0 --- /dev/null +++ b/tests/old_tests/create_dataset_spectrograms.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed Mar 2 08:22:48 2022 + +@author: xavier.mouy +""" + +from ecosound.core.annotation import Annotation +from ecosound.core.spectrogram import Spectrogram +from ecosound.core.measurement import Measurement +from ecosound.core.audiotools import Sound +from ecosound.visualization.grapher_builder import GrapherFactory + + +dataset_file_path = r"C:\Users\xavier.mouy\Documents\GitHub\fish_detector_bc\Master_annotations_dataset_20221028_without_06-MILL-FS_withSNR.nc" +out_dir = r"D:\Detector\spectrograms\Master_annotations_dataset_20221025_SNR" + + +# Load dataset +dataset = Measurement() +dataset.from_netcdf(dataset_file_path) +# dataset.filter("deployment_ID=='SI-RCAOut-20181015'", inplace=True) +# dataset.filter("deployment_ID=='SI-RCAOut-20181015'", inplace=True) +dataset.filter("label_class=='FS'", inplace=True) +# dataset.data = dataset.data[0:10] + + +dataset.export_spectrograms( + out_dir, + time_buffer_sec=0.5, + spectro_unit="sec", + spetro_nfft=0.064, + spetro_frame=0.064, + spetro_inc=0.00125, + freq_min_hz=None, + freq_max_hz=None, + sanpling_rate_hz=4000, + filter_order=8, + filter_type="iir", + fig_size=(15, 10), + deployment_subfolders=True, + file_prefix_field="snr", + channel=0, + colormap="viridis", +) diff --git a/tests/old_tests/update_annot_data_path.py b/tests/old_tests/update_annot_data_path.py new file mode 100644 index 0000000..f334a0e --- /dev/null +++ b/tests/old_tests/update_annot_data_path.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +""" + +@author: xavier.mouy +""" + +from ecosound.core.tools import list_files +import re + +from ecosound.core.annotation import Annotation +from ecosound.core.audiotools import Sound +import soundfile as sf +from datetime import datetime +import matplotlib.pyplot as plt +import numpy as np +from sklearn.model_selection import StratifiedGroupKFold +import scipy +import os +import csv + + +annot_dataset_file = ( + r"D:\Detector\datasets\Master_annotations_dataset_20221025.nc" +) + +new_data_dir = r"D:\Detector\datasets2" +new_dataset_file = ( + r"D:\Detector\datasets\Master_annotations_dataset_2022-10-25_test.nc" +) + +# Load dataset +dataset = Annotation() +dataset.from_netcdf(annot_dataset_file) + + +# update audio dir path +dataset.update_audio_dir(new_data_dir, verbose=False) + +# # list name of all audio files in dataset +# dataset_files_list = set( +# dataset.data["audio_file_dir"] +# + os.path.sep +# + dataset.data["audio_file_name"] +# + dataset.data["audio_file_extension"] +# ) + +# # list extension of all audio files in dataset +# dataset_ext_list = set( +# [os.path.splitext(file)[1] for file in dataset_files_list] +# ) + +# # list all audio files in new folder (only for the target file extensions) +# new_dir_files_list = [] +# for ext in dataset_ext_list: +# new_dir_files_list = new_dir_files_list + list_files( +# new_data_dir, ext, recursive=True +# ) + +# # go through each file in dataset and try to find in in new data folder +# missing_files_list = [] +# for file in dataset_files_list: +# res = [ +# idx +# for idx, new_dir_file in enumerate(new_dir_files_list) +# if re.search(os.path.split(file)[1], new_dir_file) +# ] +# if len(res) == 0: +# missing_files_list.append(file) +# else: +# new_path = os.path.split(new_dir_files_list[res[0]])[0] +# dataset.data.loc[ +# dataset.data["audio_file_name"] +# == os.path.splitext(os.path.split(file)[1])[0], +# "audio_file_dir", +# ] = new_path + +# if len(missing_files_list) > 0: +# print(str(len(missing_files_list)), " files could not be found.") +# print(missing_files_list) + +# # save update dataset +# dataset.to_netcdf(new_dataset_file)