In [1]:
import os

import pandas as pd
from pandas.errors import EmptyDataError 

import numpy as np
import matplotlib.pyplot as plt

from datetime import timedelta, timezone
from datetime import date as dt
import dateutil.parser as dparser
import pytz

In [2]:
# Given parameters:
# 1) Date in the form (YYYYMMDD)
# 2) Time in the form (HHMMSS)
# 4) det_type can either be 'LF' or 'HF'

# Output:
# The table of selections. 
# - Column 1 is start time of selection within file
# - Column 2 is end time of selection within file
# - Column 3 is lowest frequency of selection within file
# - Column 3 is highest frequenct of selection within file
# Length of selections will give us # of selections

def read_detection(date, time, det_type):
    date_n_type_path = '/' + date + '/' + date + '_' + det_type.lower() + 'detections'

    source_dir = 'detections' + date_n_type_path

    file_name = det_type + date + '_' + time + '.WAV.txt'
    file_path = source_dir + '/' + file_name
    sels_np = np.array([])
    
    if (os.path.exists(file_path)):
        try:
            df_detection = pd.read_csv(file_path, sep='\t')
        except EmptyDataError:
            print(file_path + " is empty")
            
    return df_detection

In [7]:
# These are variables that will help us point to specific folders and files

# Session name is the folder name where our recordings are
session_name = '/home/adkris1002/Documents/UBNA_Research/AudiomothStuff/Recordings&Code/Audiomoth2Files/Foliage2022/20220701_unit2/'

# Sorted_files is our list of all recording file names within a single folder
session_dir = session_name+'Recordings'
files = os.listdir(session_dir)
sorted_files = sorted(files)

audio_dur = timedelta(minutes=29, seconds=55)

# We fill the first 3 columns of our dataframe right here using dateutils parser
dates = []
s_times = []
e_times = []
for file in sorted_files:
    file_info = dparser.parse(file, fuzzy=True)
    
    dates.append(str(file_info.date()))
    s_times.append(str(file_info.time()))
    e_times.append(str((file_info + audio_dur).time()))

# We fill out the next 2 columns of data regarding the number of LF and HF selections
num_LFdetections = np.zeros(len(s_times))
num_HFdetections = np.zeros(len(s_times))

for i, date in enumerate(dates):
    # We need to remove the - from YYYY-MM-DD which is standard datetime format.
    # This will help us find files in the YYYYMMDD standard Audiomoth format
    date = date.replace('-', '')
    # Same goes for the HH:MM:SS standard datetime format and the Audiomoth's HHMMSS standard
    time = s_times[i].replace(":", "")
    
    LFfile_detections = read_detection(date, time, 'LF')
    num_LFdetections[i] = LFfile_detections.shape[0]/2
    
    HFfile_detections = read_detection(date, time, 'HF')
    num_HFdetections[i] = HFfile_detections.shape[0]/2



In [8]:
df = pd.DataFrame()

df["File Names"] = sorted_files
df["Date"] = dates  
df["Start Time (UTC)"] = s_times
df["End Time (UTC)"] = e_times
df["# of LF selections"] = num_LFdetections
df["# of HF selections"] = num_HFdetections

df

Unnamed: 0,File Names,Date,Start Time (UTC),End Time (UTC),# of LF selections,# of HF selections
0,20220701_020000.WAV,2022-07-01,02:00:00,02:29:55,1.0,0.0
1,20220701_023000.WAV,2022-07-01,02:30:00,02:59:55,5.0,0.0
2,20220701_030000.WAV,2022-07-01,03:00:00,03:29:55,2.0,1.0
3,20220701_033000.WAV,2022-07-01,03:30:00,03:59:55,11.0,0.0
4,20220701_040000.WAV,2022-07-01,04:00:00,04:29:55,2.0,0.0
5,20220701_043000.WAV,2022-07-01,04:30:00,04:59:55,653.0,24.0
6,20220701_050000.WAV,2022-07-01,05:00:00,05:29:55,136.0,80.0
7,20220701_053000.WAV,2022-07-01,05:30:00,05:59:55,14.0,36.0
8,20220701_060000.WAV,2022-07-01,06:00:00,06:29:55,55.0,0.0
9,20220701_063000.WAV,2022-07-01,06:30:00,06:59:55,47.0,0.0
