In [1]:
from pathlib import Path

import pandas as pd
from pandas.errors import EmptyDataError 

import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime

In [2]:
# Given parameters:
# 1) detection_dir is the recover-DATE-UNIT_NUM-detect folder
# 2) recording_name is the name of our recording: DATE_TIME.WAV
# 4) det_type can either be 'lf' or 'hf'

# Output:
# The table of detections following the format of RavenPro

def read_detection(detection_dir, recording_name, det_type):
    
    file_name = f"{det_type}_{recording_name}.txt"
    file_path = f"{detection_dir}/{file_name}"
    
    if (Path(file_path).is_file()):
        try:
            df_detection = pd.read_csv(file_path, sep='\t')
        except EmptyDataError:
            print(f"{file_path} is empty")
            
    return df_detection

In [3]:
# These are constants that we absolutely need to know before running any scripts

# Constant DATE is the date of our recovered data that we are extracting detections from
DATE = "20220728"
# Constant SD_CARD_NUM is the SD CARD # that had been deployed for that session
SD_CARD_NUM = "008"
# Constant duration of audio file that we configured into Audiomoth
AUDIO_DUR = pd.DateOffset(minutes=29, seconds=55)

In [4]:
# This method takes in paramaters:
# 1) recovery_folder: Folder name of folder containing detections following format: recover-DATE-SD_CARD#-detect
# 2) AUDIO_DUR: The AudioMoth's configuration for the length of each recording.

# This method returns:
# - A DataFrame object that has columns:
#   - File Names, Date, Start Time, End Time, # of LF detections, # of HF detections
#   - File Names are strings and # of LF/HF detections are integers.
#   - Times are in UTC format and Dates are datetime.datetime objects.

def generate_df(DATE, SD_CARD_NUM, AUDIO_DUR):
    
    # Create recovery folder variable using constants
    recover_folder = f"recover-{DATE}-{SD_CARD_NUM}-detect"

    # detection_dir is the recover-DATE-UNIT_NUM-detect folder where our detections are.
    detection_dir = f"detections/{recover_folder}"

    # Construct path object linked to the directory of files for datetime-parsing
    file_dir = Path(detection_dir)
    # Extract only the hf-detection files because the datetime information is repeated.
    sorted_files = sorted(file_dir.glob('hf_*.txt'))

    # We fill the first 4 columns of our dataframe using datetime's strptime and strftime
    file_names = []
    dates = []
    s_times = []
    e_times = []
    # We fill out the next 2 columns of data with the number of LF and HF detections
    num_lf_detections = []
    num_hf_detections = []
    
    file_names, dates, s_times, e_times, num_lf_detections, num_hf_detections = extract_df_info(sorted_files, file_names, dates, s_times, 
                                                    e_times, num_lf_detections, num_hf_detections, AUDIO_DUR)
        
    # Create DataFrame object with all the extracted information    
    df = pd.DataFrame([file_names, dates, s_times, e_times, num_lf_detections, num_hf_detections], 
                index=["File Names", "Date", "Start Time (UTC)",
                       "End Time (UTC)", "# of LF detections", "# of HF detections"]).T
    return df

In [5]:
# This method takes in parameters:
# 1) sorted_files: A list of detection files sorted according to chronological time.
# 2) Empty lists to store DataFrame column information
# 3) AUDIO_DUR: The AudioMoth's configuration for the length of each recording

# This method returns:
# - Lists of column information

def extract_df_info(sorted_files, file_names, dates, s_times, e_times, 
                        num_lf_detections, num_hf_detections, AUDIO_DUR):
    
    # Iterate through all file paths to extract and store table information for each file
    for i, file in enumerate(sorted_files):
        # Extract name of each file as it is
        file_name = file.name
        # Extracting the datetime object from the name of each file
        file_info = datetime.strptime(file_name, "hf_%Y%m%d_%H%M%S.WAV.txt")

        # Extract recording name, date, start time, and end time for each file
        recording_name = file_info.strftime("%Y%m%d_%H%M%S.WAV")
        date = file_info.date()
        s_time = file_info.time()
        e_time = (file_info + AUDIO_DUR).time()

        # Appending the above properties to be displayed as part of the dataframe
        file_names.append(recording_name)
        dates.append(date)
        s_times.append(s_time)
        e_times.append(e_time)

        # Calling read_detection to return the table of selections as a dataframe
        # The detections appear twice: in waveform view and spectrogram view, 
        # so we half the total number of detections
        lf_file_detections = read_detection(detection_dir, recording_name, "lf")
        num_lf_detections.append(lf_file_detections.shape[0]/2)
        hf_file_detections = read_detection(detection_dir, recording_name, 'hf')
        num_hf_detections.append(hf_file_detections.shape[0]/2)
        
    return file_names, dates, s_times, e_times, num_lf_detections, num_hf_detections

In [6]:
def plot_separate(DATE, SD_CARD_NUM, AUDIO_DUR, save=True):
    
    # Create recovery folder variable using constants
    recover_folder = f"recover-{DATE}-{SD_CARD_NUM}-detect"
    
    # Generate DataFrame to be plotted as a bar graph of LF/HF detections vs. time
    df = generate_df(DATE, SD_CARD_NUM, AUDIO_DUR)
    
    # To plot each day's activity separately, group by rows that have the same date
    # We need a list of unique dates from our detection files
    unique_dates = df["Date"].unique()

    for date in unique_dates:
        day_df = df.loc[df['Date'] == date]
        fig = day_df.plot.bar(x="Start Time (UTC)", title=date, figsize=(16, 6), rot=45).get_figure()
        
        if save:
            save_dir = Path(recover_folder)
            save_dir.mkdir(parents=True, exist_ok=True)
            save_path = Path(f"{recover_folder}/{date}.png")
            fig.savefig(save_path, facecolor='w')

In [7]:
def plot_total(DATE, SD_CARD_NUM, AUDIO_DUR):
    
    # Generate DataFrame to be plotted as a bar graph of LF/HF detections vs. time
    df = generate_df(DATE, SD_CARD_NUM, AUDIO_DUR)
    
    df.plot.bar(x="Start Time (UTC)", title="Total Deployment Session", figsize=(16, 6), fontsize=7, rot=45)

In [8]:
plot_separate(DATE, SD_CARD_NUM, AUDIO_DUR)

NameError: name 'detection_dir' is not defined