# As of 2023/09/10, the implementation of bout metrics and clustering were leading to problematic results. 
## Like the percentage estimate being greater than 100%. Here I attempt to lay everything out to fix those mistakes.

In [1]:
import numpy as np
import pandas as pd
import scipy
from scipy import stats
import datetime as dt
import dask.dataframe as dd

import matplotlib.pyplot as plt
from matplotlib import colors
import soundfile as sf
import matplotlib.patches as patches
from pathlib import Path

In [2]:
import sys

sys.path.append("../src")

In [3]:
from core import SITE_NAMES, EXAMPLE_FILES_from_LOCATIONS, EXAMPLE_FILES_to_FILEPATHS, EXAMPLE_FILES_to_DETECTIONS
import bout_clustering as bt_clustering
import bout_plotting as bt_plt
import subsampling as ss
import data_handling as dh

from cli import get_file_paths

## Below are the variables needed in our bout pipeline

In [4]:
type_key = ''
site_key = "Central"
# cycle_lengths = [1800, 720, 600, 360, 180, 60, 30, 6]
cycle_lengths = [1800, 360]
percent_ons = [0.1667]
specific_dc_tag = "1800of1800"

In [5]:
fig_details = dict()
fig_details['site_name'] = SITE_NAMES[site_key]

In [6]:
pipeline_params = dict()
pipeline_params["read_csv"] = False
pipeline_params["save_activity_grid"] = True
pipeline_params["save_presence_grid"] = True
pipeline_params["save_dc_night_comparisons"] = True
pipeline_params["save_activity_dc_comparisons"] = True
pipeline_params["save_presence_dc_comparisons"] = True
pipeline_params["show_plots"] = False
pipeline_params["show_PST"] = True
pipeline_params

{'read_csv': False,
 'save_activity_grid': True,
 'save_presence_grid': True,
 'save_dc_night_comparisons': True,
 'save_activity_dc_comparisons': True,
 'save_presence_dc_comparisons': True,
 'show_plots': False,
 'show_PST': True}

In [7]:
data_params = dict()
data_params["site_name"] = SITE_NAMES[site_key]
data_params["site_tag"] = site_key
data_params["type_tag"] = type_key
data_params["cycle_lengths"] = cycle_lengths
data_params["percent_ons"] = percent_ons
dc_tags = ss.get_list_of_dc_tags(data_params["cycle_lengths"], data_params["percent_ons"])
data_params["dc_tags"] = dc_tags
data_params["cur_dc_tag"] = specific_dc_tag
data_params['resolution_in_min'] = '30'
data_params['recording_start'] = '02:00'
data_params['recording_end'] = '13:30'
data_params

{'site_name': 'Central Pond',
 'site_tag': 'Central',
 'type_tag': '',
 'cycle_lengths': [1800, 360],
 'percent_ons': [0.1667],
 'dc_tags': ['1800of1800', '300of1800', '60of360'],
 'cur_dc_tag': '1800of1800',
 'resolution_in_min': '30',
 'recording_start': '02:00',
 'recording_end': '13:30'}

In [8]:
file_paths = get_file_paths(data_params)
file_paths

{'raw_SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/raw/Central',
 'SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Central',
 'bd2_TYPE_SITE_YEAR': 'bd2__Central_2022',
 'duty_cycled_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Central/duty_cycled',
 'dc_dets_TYPE_SITE_summary': 'dc_dets_Central_summary',
 'dc_bouts_TYPE_SITE_summary': 'dc_bouts_Central_summary',
 'dc_inds_TYPE_SITE_summary': 'dc_inds_Central_summary',
 'simulated_schemes_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Central/duty_cycled/simulated_schemes',
 'figures_SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../figures/Central',
 'activity_det_comparisons_figname': 'activity_det_comparisons_per_dc_Central',
 'dc_det_comparisons_figname': 'dc_det_comparisons_per_night_Central',
 'activity_bout_comparisons_figname': 'activity_bout_comparisons_per_dc_Cen

### The location dataframe seems to be good so let's initialize it before tackling the bugs

In [10]:
activity_bouts_arr = pd.DataFrame()

dc_tag = data_params['cur_dc_tag']
location_df = ss.prepare_summary_for_plotting_with_duty_cycle(file_paths, dc_tag)
location_df

Unnamed: 0,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,start_time,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,43.7135,43.7340,LF1,2022-07-26 03:00:00,2022-07-26 03:00:43.713500,2022-07-26 03:00:43.734000,43.7135,43.7340,21171.0,23776.0,Echolocation,Nyctalus leisleri,0.709,0.724,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
0,39.6005,39.6173,LF1,2022-07-26 04:00:00,2022-07-26 04:00:39.600500,2022-07-26 04:00:39.617300,39.6005,39.6173,22890.0,25607.0,Echolocation,Nyctalus leisleri,0.577,0.595,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
0,186.2035,186.2184,LF1,2022-07-26 04:30:00,2022-07-26 04:33:06.203500,2022-07-26 04:33:06.218400,186.2035,186.2184,22890.0,27896.0,Echolocation,Nyctalus leisleri,0.381,0.551,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,246.0135,246.0188,HF2,2022-07-26 04:30:00,2022-07-26 04:34:06.013500,2022-07-26 04:34:06.018800,246.0135,246.0188,42656.0,67548.0,Echolocation,Myotis daubentonii,0.268,0.519,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,246.3265,246.3323,,2022-07-26 04:30:00,2022-07-26 04:34:06.326500,2022-07-26 04:34:06.332300,246.3265,246.3323,37500.0,78774.0,Echolocation,Myotis brandtii,0.355,0.578,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,523.1445,523.1520,HF1,2022-09-01 12:30:00,2022-09-01 12:38:43.144500,2022-09-01 12:38:43.152000,523.1445,523.1520,41796.0,62000.0,Echolocation,Pipistrellus nathusii,0.450,0.511,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
11,523.6565,523.6654,HF1,2022-09-01 12:30:00,2022-09-01 12:38:43.656500,2022-09-01 12:38:43.665400,523.6565,523.6654,40937.0,49191.0,Echolocation,Pipistrellus nathusii,0.490,0.509,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
12,783.0635,783.0686,HF2,2022-09-01 12:30:00,2022-09-01 12:43:03.063500,2022-09-01 12:43:03.068600,783.0635,783.0686,47812.0,54701.0,Echolocation,Pipistrellus pipistrellus,0.462,0.502,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
13,783.3495,783.3567,HF2,2022-09-01 12:30:00,2022-09-01 12:43:03.349500,2022-09-01 12:43:03.356700,783.3495,783.3567,46953.0,56312.0,Echolocation,Pipistrellus pipistrellus,0.487,0.646,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


In [11]:
location_sum_df = location_df

### Check if the BCIs we gather from each group make sense

In [12]:
location_sum_df.reset_index(inplace=True)
location_sum_df.drop(columns=location_sum_df.columns[0], inplace=True)

bout_params = bt_clustering.get_bout_params_from_location(location_sum_df, data_params)
bout_params

{'site_key': 'Central',
 'LF1_bci': 2950.472783948243,
 'HF2_bci': 885.341081453405,
 'HF1_bci': 788.2951048076708}

## Adding bout tags is the process that was most changed

### 1) First, we take our total location summary dataframe and select a subset corresponding to a specific frequency group

In [13]:
location_df = location_sum_df.copy()
location_df.insert(0, 'duration_from_last_call_ms', 0)
location_df.insert(0, 'bout_tag', 0)
location_df.insert(0, 'change_markers', 0)
location_df.insert(0, 'call_status', '')
result_df = pd.DataFrame()

group = 'LF1'
freq_group_df = location_df.loc[location_df['freq_group']==group].copy()
freq_group_df.reset_index(inplace=True)
freq_group_df.drop(columns=freq_group_df.columns[0], inplace=True)
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,,0,0,0,43.7135,43.7340,LF1,2022-07-26 03:00:00,2022-07-26 03:00:43.713500,2022-07-26 03:00:43.734000,...,23776.0,Echolocation,Nyctalus leisleri,0.709,0.724,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,,0,0,0,39.6005,39.6173,LF1,2022-07-26 04:00:00,2022-07-26 04:00:39.600500,2022-07-26 04:00:39.617300,...,25607.0,Echolocation,Nyctalus leisleri,0.577,0.595,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,,0,0,0,186.2035,186.2184,LF1,2022-07-26 04:30:00,2022-07-26 04:33:06.203500,2022-07-26 04:33:06.218400,...,27896.0,Echolocation,Nyctalus leisleri,0.381,0.551,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
3,,0,0,0,1536.1185,1536.1295,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.118500,2022-07-26 04:55:36.129500,...,31914.0,Echolocation,Nyctalus leisleri,0.273,0.596,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
4,,0,0,0,1536.3575,1536.3699,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.357500,2022-07-26 04:55:36.369900,...,30877.0,Echolocation,Nyctalus leisleri,0.294,0.615,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144555,,0,0,0,1399.9245,1399.9400,LF1,2022-09-01 10:30:00,2022-09-01 10:53:19.924500,2022-09-01 10:53:19.940000,...,29546.0,Echolocation,Nyctalus noctula,0.428,0.674,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144556,,0,0,0,1400.5705,1400.5865,LF1,2022-09-01 10:30:00,2022-09-01 10:53:20.570500,2022-09-01 10:53:20.586500,...,27325.0,Echolocation,Nyctalus noctula,0.374,0.536,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144557,,0,0,0,1401.0835,1401.0978,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.083500,2022-09-01 10:53:21.097800,...,26853.0,Echolocation,Nyctalus noctula,0.256,0.516,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144558,,0,0,0,1401.8525,1401.8642,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.852500,2022-09-01 10:53:21.864200,...,29133.0,Echolocation,Nyctalus noctula,0.295,0.533,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


### Calculate the intervals of this frequency-group subset of the dataframe. All these intervals only correspond to the selected frequency group

### Create bout tags for if the call is within or between bout using the call's interval to its previous call. 

In [14]:
intervals = (pd.to_datetime(freq_group_df['call_start_time'].values[1:]) - pd.to_datetime(freq_group_df['call_end_time'].values[:-1]))
ipis_f = intervals.to_numpy(dtype='float32')/1e6
ipis_f = np.insert(ipis_f, 0, bout_params[f'{group}_bci'])

freq_group_df['duration_from_last_call_ms'] =  ipis_f
freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] < bout_params[f'{group}_bci'], 'bout_tag'] = 1
freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] >= bout_params[f'{group}_bci'], 'bout_tag'] = 0

wb_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==1).dropna().index
ob_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==0).dropna().index
freq_group_df.loc[wb_indices, 'call_status'] = 'within bout'
freq_group_df.loc[ob_indices, 'call_status'] = 'outside bout'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,outside bout,0,0,2.950473e+03,43.7135,43.7340,LF1,2022-07-26 03:00:00,2022-07-26 03:00:43.713500,2022-07-26 03:00:43.734000,...,23776.0,Echolocation,Nyctalus leisleri,0.709,0.724,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,outside bout,0,0,3.595866e+06,39.6005,39.6173,LF1,2022-07-26 04:00:00,2022-07-26 04:00:39.600500,2022-07-26 04:00:39.617300,...,25607.0,Echolocation,Nyctalus leisleri,0.577,0.595,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,outside bout,0,0,1.946586e+06,186.2035,186.2184,LF1,2022-07-26 04:30:00,2022-07-26 04:33:06.203500,2022-07-26 04:33:06.218400,...,27896.0,Echolocation,Nyctalus leisleri,0.381,0.551,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
3,outside bout,0,0,1.349900e+06,1536.1185,1536.1295,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.118500,2022-07-26 04:55:36.129500,...,31914.0,Echolocation,Nyctalus leisleri,0.273,0.596,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
4,within bout,0,1,2.280000e+02,1536.3575,1536.3699,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.357500,2022-07-26 04:55:36.369900,...,30877.0,Echolocation,Nyctalus leisleri,0.294,0.615,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144555,within bout,0,1,6.324000e+02,1399.9245,1399.9400,LF1,2022-09-01 10:30:00,2022-09-01 10:53:19.924500,2022-09-01 10:53:19.940000,...,29546.0,Echolocation,Nyctalus noctula,0.428,0.674,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144556,within bout,0,1,6.304999e+02,1400.5705,1400.5865,LF1,2022-09-01 10:30:00,2022-09-01 10:53:20.570500,2022-09-01 10:53:20.586500,...,27325.0,Echolocation,Nyctalus noctula,0.374,0.536,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144557,within bout,0,1,4.970000e+02,1401.0835,1401.0978,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.083500,2022-09-01 10:53:21.097800,...,26853.0,Echolocation,Nyctalus noctula,0.256,0.516,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144558,within bout,0,1,7.547000e+02,1401.8525,1401.8642,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.852500,2022-09-01 10:53:21.864200,...,29133.0,Echolocation,Nyctalus noctula,0.295,0.533,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


### Create change markers using the linear difference of bout tags. 
#### When the bout tag is 0 and the next call's bout tag is 1, the call is starting a bout. Thus, the change marker of a call is 1
#### When the bout tag is 1 and the next call's bout tag is 0, the call is ending a bout. Thus, the change marker of a call is -1.

In [15]:
bout_tags = freq_group_df['bout_tag']
change_markers = bout_tags.shift(-1) - bout_tags
change_markers[len(change_markers)-1] = 0
freq_group_df['change_markers'] = change_markers

be_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==-1).dropna().index
bs_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==1).dropna().index

freq_group_df.loc[be_indices, 'call_status'] = 'bout end'
freq_group_df.loc[bs_indices, 'call_status'] = 'bout start'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,outside bout,0.0,0,2.950473e+03,43.7135,43.7340,LF1,2022-07-26 03:00:00,2022-07-26 03:00:43.713500,2022-07-26 03:00:43.734000,...,23776.0,Echolocation,Nyctalus leisleri,0.709,0.724,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,outside bout,0.0,0,3.595866e+06,39.6005,39.6173,LF1,2022-07-26 04:00:00,2022-07-26 04:00:39.600500,2022-07-26 04:00:39.617300,...,25607.0,Echolocation,Nyctalus leisleri,0.577,0.595,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,outside bout,0.0,0,1.946586e+06,186.2035,186.2184,LF1,2022-07-26 04:30:00,2022-07-26 04:33:06.203500,2022-07-26 04:33:06.218400,...,27896.0,Echolocation,Nyctalus leisleri,0.381,0.551,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
3,bout start,1.0,0,1.349900e+06,1536.1185,1536.1295,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.118500,2022-07-26 04:55:36.129500,...,31914.0,Echolocation,Nyctalus leisleri,0.273,0.596,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
4,within bout,0.0,1,2.280000e+02,1536.3575,1536.3699,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.357500,2022-07-26 04:55:36.369900,...,30877.0,Echolocation,Nyctalus leisleri,0.294,0.615,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144555,within bout,0.0,1,6.324000e+02,1399.9245,1399.9400,LF1,2022-09-01 10:30:00,2022-09-01 10:53:19.924500,2022-09-01 10:53:19.940000,...,29546.0,Echolocation,Nyctalus noctula,0.428,0.674,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144556,within bout,0.0,1,6.304999e+02,1400.5705,1400.5865,LF1,2022-09-01 10:30:00,2022-09-01 10:53:20.570500,2022-09-01 10:53:20.586500,...,27325.0,Echolocation,Nyctalus noctula,0.374,0.536,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144557,within bout,0.0,1,4.970000e+02,1401.0835,1401.0978,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.083500,2022-09-01 10:53:21.097800,...,26853.0,Echolocation,Nyctalus noctula,0.256,0.516,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144558,within bout,0.0,1,7.547000e+02,1401.8525,1401.8642,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.852500,2022-09-01 10:53:21.864200,...,29133.0,Echolocation,Nyctalus noctula,0.295,0.533,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


### As seen above in the last row, a bout that is started may not be ended if the last call within that bout is labelled as 'within'
#### That call will either be labelled 'within' or 'outside' bout because of the forced change marker of 0

In [16]:
last_call_status = freq_group_df.at[len(freq_group_df)-1, 'call_status']
if last_call_status == 'within bout':
    freq_group_df.at[len(freq_group_df)-1, 'call_status'] = 'bout end'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,outside bout,0.0,0,2.950473e+03,43.7135,43.7340,LF1,2022-07-26 03:00:00,2022-07-26 03:00:43.713500,2022-07-26 03:00:43.734000,...,23776.0,Echolocation,Nyctalus leisleri,0.709,0.724,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,outside bout,0.0,0,3.595866e+06,39.6005,39.6173,LF1,2022-07-26 04:00:00,2022-07-26 04:00:39.600500,2022-07-26 04:00:39.617300,...,25607.0,Echolocation,Nyctalus leisleri,0.577,0.595,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,outside bout,0.0,0,1.946586e+06,186.2035,186.2184,LF1,2022-07-26 04:30:00,2022-07-26 04:33:06.203500,2022-07-26 04:33:06.218400,...,27896.0,Echolocation,Nyctalus leisleri,0.381,0.551,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
3,bout start,1.0,0,1.349900e+06,1536.1185,1536.1295,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.118500,2022-07-26 04:55:36.129500,...,31914.0,Echolocation,Nyctalus leisleri,0.273,0.596,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
4,within bout,0.0,1,2.280000e+02,1536.3575,1536.3699,LF1,2022-07-26 04:30:00,2022-07-26 04:55:36.357500,2022-07-26 04:55:36.369900,...,30877.0,Echolocation,Nyctalus leisleri,0.294,0.615,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144555,within bout,0.0,1,6.324000e+02,1399.9245,1399.9400,LF1,2022-09-01 10:30:00,2022-09-01 10:53:19.924500,2022-09-01 10:53:19.940000,...,29546.0,Echolocation,Nyctalus noctula,0.428,0.674,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144556,within bout,0.0,1,6.304999e+02,1400.5705,1400.5865,LF1,2022-09-01 10:30:00,2022-09-01 10:53:20.570500,2022-09-01 10:53:20.586500,...,27325.0,Echolocation,Nyctalus noctula,0.374,0.536,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144557,within bout,0.0,1,4.970000e+02,1401.0835,1401.0978,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.083500,2022-09-01 10:53:21.097800,...,26853.0,Echolocation,Nyctalus noctula,0.256,0.516,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
144558,within bout,0.0,1,7.547000e+02,1401.8525,1401.8642,LF1,2022-09-01 10:30:00,2022-09-01 10:53:21.852500,2022-09-01 10:53:21.864200,...,29133.0,Echolocation,Nyctalus noctula,0.295,0.533,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


### Finally we concatenate these results for each frequency group, using each group's BCI to cluster their respective calls into bouts

In [18]:
result_df = pd.concat([result_df, freq_group_df])

## Below we put it all together in our new method

### We need to also use some if statements to make sure we don't pass in an invalid group or a group we have not defined yet

In [17]:
def add_bouttags_to_bd2_predictions_for_freqgroups(batdetect2_predictions, bout_params):

    location_df = batdetect2_predictions.copy()
    location_df.insert(0, 'duration_from_last_call_ms', 0)
    location_df.insert(0, 'bout_tag', 0)
    location_df.insert(0, 'change_markers', 0)
    location_df.insert(0, 'call_status', '')
    result_df = pd.DataFrame()

    for group in location_df['freq_group'].unique():
        if group != '':
            freq_group_df = location_df.loc[location_df['freq_group']==group].copy()
            freq_group_df.reset_index(inplace=True)
            freq_group_df.drop(columns=freq_group_df.columns[0], inplace=True)
            if not(freq_group_df.empty):
                intervals = (pd.to_datetime(freq_group_df['call_start_time'].values[1:]) - pd.to_datetime(freq_group_df['call_end_time'].values[:-1]))
                ipis_f = intervals.to_numpy(dtype='float32')/1e6
                ipis_f = np.insert(ipis_f, 0, bout_params[f'{group}_bci'])

                freq_group_df['duration_from_last_call_ms'] =  ipis_f
                freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] < bout_params[f'{group}_bci'], 'bout_tag'] = 1
                freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] >= bout_params[f'{group}_bci'], 'bout_tag'] = 0
                wb_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==1).dropna().index
                ob_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==0).dropna().index
                freq_group_df.loc[wb_indices, 'call_status'] = 'within bout'
                freq_group_df.loc[ob_indices, 'call_status'] = 'outside bout'

                bout_tags = freq_group_df['bout_tag']
                change_markers = bout_tags.shift(-1) - bout_tags
                change_markers[len(change_markers)-1] = 0
                freq_group_df['change_markers'] = change_markers
                be_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==-1).dropna().index
                bs_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==1).dropna().index
                freq_group_df.loc[be_indices, 'call_status'] = 'bout end'
                freq_group_df.loc[bs_indices, 'call_status'] = 'bout start'

                num_bout_starts = len(freq_group_df.loc[freq_group_df['call_status']=='bout start'])
                num_bout_ends = len(freq_group_df.loc[freq_group_df['call_status']=='bout end'])
                if num_bout_starts != num_bout_ends:
                    freq_group_df.at[len(freq_group_df)-1, 'call_status'] = 'bout end'

                result_df = pd.concat([result_df, freq_group_df])

    return result_df

### For this result, LF1 calls will always appear before HF1 which will appear before HF2. 

### In other words, the start and end times are not sorted over all frequency groups; rather only for each group

In [19]:
batdetect2_predictions = bt_clustering.classify_bouts_in_bd2_predictions_for_freqgroups(location_sum_df, bout_params)
batdetect2_predictions[batdetect2_predictions['freq_group']=='HF2']

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,high_freq,event,class,class_prob,det_prob,individual,input_file,Recover Folder,SD Card,Site name
0,outside bout,0.0,0,8.853411e+02,246.0135,246.0188,HF2,2022-07-26 04:30:00,2022-07-26 04:34:06.013500,2022-07-26 04:34:06.018800,...,67548.0,Echolocation,Myotis daubentonii,0.268,0.519,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
1,bout start,1.0,0,1.332374e+06,1578.3925,1578.3964,HF2,2022-07-26 04:30:00,2022-07-26 04:56:18.392500,2022-07-26 04:56:18.396400,...,69698.0,Echolocation,Myotis alcathoe,0.373,0.509,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
2,within bout,0.0,1,6.441000e+02,1579.0405,1579.0448,HF2,2022-07-26 04:30:00,2022-07-26 04:56:19.040500,2022-07-26 04:56:19.044800,...,80147.0,Echolocation,Myotis alcathoe,0.437,0.526,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
3,within bout,0.0,1,8.170000e+01,1579.1265,1579.1317,HF2,2022-07-26 04:30:00,2022-07-26 04:56:19.126500,2022-07-26 04:56:19.131700,...,77529.0,Echolocation,Pipistrellus pygmaeus,0.474,0.514,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
4,bout end,-1.0,1,3.008000e+02,1579.4325,1579.4365,HF2,2022-07-26 04:30:00,2022-07-26 04:56:19.432500,2022-07-26 04:56:19.436500,...,78389.0,Echolocation,Myotis alcathoe,0.393,0.525,-1,/mnt/ubna_data_01/recover-20220728/UBNA_008/20...,recover-20220728,UBNA_008,Central Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32508,within bout,0.0,1,9.300000e+01,1793.9345,1793.9413,HF2,2022-09-01 12:00:00,2022-09-01 12:29:53.934500,2022-09-01 12:29:53.941300,...,56364.0,Echolocation,Pipistrellus pipistrellus,0.481,0.566,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
32509,bout end,-1.0,1,1.872000e+02,1794.1285,1794.1364,HF2,2022-09-01 12:00:00,2022-09-01 12:29:54.128500,2022-09-01 12:29:54.136400,...,54669.0,Echolocation,Pipistrellus pipistrellus,0.472,0.545,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
32510,bout start,1.0,0,7.889271e+05,783.0635,783.0686,HF2,2022-09-01 12:30:00,2022-09-01 12:43:03.063500,2022-09-01 12:43:03.068600,...,54701.0,Echolocation,Pipistrellus pipistrellus,0.462,0.502,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond
32511,within bout,0.0,1,2.809000e+02,783.3495,783.3567,HF2,2022-09-01 12:30:00,2022-09-01 12:43:03.349500,2022-09-01 12:43:03.356700,...,56312.0,Echolocation,Pipistrellus pipistrellus,0.487,0.646,-1,/mnt/ubna_data_02/recover-20220901/UBNA_004/20...,recover-20220901,UBNA_004,Central Pond


### There could be bouts that overlap in time, separated in frequency alone. 
### This makes generating % of time results not reliable if we want to include all frequency groups. However, our analysis will largely focus on individual groups

In [20]:
bout_metrics = bt_clustering.construct_bout_metrics_from_location_df_for_freqgroups(batdetect2_predictions)
bout_metrics.sort_values('start_time_of_bout')[:10]

Unnamed: 0,freq_group,start_time_of_bout,end_time_of_bout,start_time_wrt_ref,end_time_wrt_ref,start_time,end_time,low_freq,high_freq,bout_duration,bout_duration_in_secs
0,HF1,2022-07-26 04:34:07.405500,2022-07-26 04:34:07.786900,247.4055,247.7869,247.4055,247.7869,40078.0,56331.0,0 days 00:00:00.381400,0.3814
1,HF1,2022-07-26 04:49:28.123500,2022-07-26 04:49:30.104500,1168.1235,1170.1045,1168.1235,1170.1045,38359.0,63484.0,0 days 00:00:01.981000,1.981
2,HF1,2022-07-26 04:50:37.336500,2022-07-26 04:50:37.919100,1237.3365,1237.9191,1237.3365,1237.9191,39218.0,57540.0,0 days 00:00:00.582600,0.5826
3,HF1,2022-07-26 04:50:39.257500,2022-07-26 04:50:40.847800,1239.2575,1240.8478,1239.2575,1240.8478,36640.0,63989.0,0 days 00:00:01.590300,1.5903
4,HF1,2022-07-26 04:54:04.543500,2022-07-26 04:54:05.320700,1444.5435,1445.3207,1444.5435,1445.3207,38359.0,51975.0,0 days 00:00:00.777200,0.7772
0,LF1,2022-07-26 04:55:36.118500,2022-07-26 04:55:41.134200,1536.1185,1541.1342,1536.1185,1541.1342,22031.0,31914.0,0 days 00:00:05.015700,5.0157
1,LF1,2022-07-26 04:55:56.552500,2022-07-26 04:56:29.557600,1556.5525,1589.5576,1556.5525,1589.5576,22031.0,36397.0,0 days 00:00:33.005100,33.0051
0,HF2,2022-07-26 04:56:18.392500,2022-07-26 04:56:19.436500,1578.3925,1579.4365,1578.3925,1579.4365,45234.0,80147.0,0 days 00:00:01.044000,1.044
1,HF2,2022-07-26 04:56:20.671500,2022-07-26 04:56:21.390100,1580.6715,1581.3901,1580.6715,1581.3901,45234.0,66661.0,0 days 00:00:00.718600,0.7186
5,HF1,2022-07-26 04:56:34.536500,2022-07-26 04:56:36.211500,1594.5365,1596.2115,1594.5365,1596.2115,40078.0,61449.0,0 days 00:00:01.675000,1.675


In [34]:
lf1_bout_durations = bout_metrics.loc[bout_metrics['freq_group']=='LF1']['bout_duration_in_secs']
lf1_bout_durations.mean(), lf1_bout_durations.median(), lf1_bout_durations.max()

(7.814716919108054, 4.4259, 155.8101)

In [35]:
hf1_bout_durations = bout_metrics.loc[bout_metrics['freq_group']=='HF1']['bout_duration_in_secs']
hf1_bout_durations.mean(), hf1_bout_durations.median(), hf1_bout_durations.max()

(2.2220958045174055, 1.3742, 40.9916)

In [36]:
hf2_bout_durations = bout_metrics.loc[bout_metrics['freq_group']=='HF2']['bout_duration_in_secs']
hf2_bout_durations.mean(), hf2_bout_durations.median(), hf2_bout_durations.max()

(1.8343227398467432, 1.25505, 28.8823)

In [37]:
def construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag):
    all_processed_filepaths = sorted(list(map(str, list(Path(f'{file_paths["raw_SITE_folder"]}').glob('*.csv')))))
    all_processed_datetimes = pd.to_datetime(all_processed_filepaths, format="%Y%m%d_%H%M%S", exact=False)

    bout_metrics['ref_time'] = pd.DatetimeIndex(bout_metrics['start_time_of_bout'])
    bout_metrics['total_bout_duration_in_secs'] = bout_metrics['bout_duration_in_secs']
    bout_metrics = bout_metrics.set_index('ref_time')

    bout_duration_per_interval = bout_metrics.resample(f"{data_params['resolution_in_min']}T")['total_bout_duration_in_secs'].sum()

    percent_time_occupied_by_bouts = (100*(bout_duration_per_interval.values / 60*float(data_params['resolution_in_min'])))

    bout_dpi_df = pd.DataFrame(list(zip(bout_duration_per_interval.index, percent_time_occupied_by_bouts)), columns=['ref_time', f'percentage_time_occupied_by_bouts ({dc_tag})'])
    bout_dpi_df = bout_dpi_df.set_index('ref_time')
    bout_dpi_df = bout_dpi_df.reindex(index=all_processed_datetimes, fill_value=0).resample(f"{data_params['resolution_in_min']}T").first().between_time(data_params['recording_start'], data_params['recording_end'], inclusive='left')

    return pd.DataFrame(list(zip(bout_dpi_df.index, bout_dpi_df[f'percentage_time_occupied_by_bouts ({dc_tag})'].values)), columns=["Date_and_Time_UTC", f'percentage_time_occupied_by_bouts ({dc_tag})'])


### One error I encountered here was dividing but not using parentheses. That's all I fixed here

In [41]:
all_processed_filepaths = sorted(list(map(str, list(Path(f'{file_paths["raw_SITE_folder"]}').glob('*.csv')))))
all_processed_datetimes = pd.to_datetime(all_processed_filepaths, format="%Y%m%d_%H%M%S", exact=False)

bout_metrics['ref_time'] = pd.DatetimeIndex(bout_metrics['start_time_of_bout'])
bout_metrics['total_bout_duration_in_secs'] = bout_metrics['bout_duration_in_secs']
bout_metrics = bout_metrics.set_index('ref_time')

bout_duration_per_interval = bout_metrics.resample(f"{data_params['resolution_in_min']}T")['total_bout_duration_in_secs'].sum()

percent_time_occupied_by_bouts = (100*(bout_duration_per_interval.values / (60*float(data_params['resolution_in_min']))))

In [42]:
dc_bouts = construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag)

In [43]:
dc_bouts[:10]

Unnamed: 0,Date_and_Time_UTC,percentage_time_occupied_by_bouts (1800of1800)
0,2022-07-26 03:00:00,0.0
1,2022-07-26 03:30:00,0.0
2,2022-07-26 04:00:00,0.0
3,2022-07-26 04:30:00,3028.145
4,2022-07-26 05:00:00,29302.97985
5,2022-07-26 05:30:00,20046.895
6,2022-07-26 06:00:00,11398.71495
7,2022-07-26 06:30:00,7442.39495
8,2022-07-26 07:00:00,3170.04995
9,2022-07-26 07:30:00,3126.4949


In [44]:
time_on = int(dc_tag.split('of')[0])

In [45]:
bout_metrics = bt_clustering.generate_bout_metrics_for_location_and_freq(location_sum_df, data_params, dc_tag)


In [47]:
dc_bouts = dh.construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag)
dc_bouts = dc_bouts.set_index("Date_and_Time_UTC")
activity_bouts_arr = pd.concat([activity_bouts_arr, dc_bouts], axis=1)

activity_bouts_arr

Unnamed: 0_level_0,percentage_time_occupied_by_bouts (1800of1800),percentage_time_occupied_by_bouts (1800of1800)
Date_and_Time_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-07-26 03:00:00,0.000000,0.000000
2022-07-26 03:30:00,0.000000,0.000000
2022-07-26 04:00:00,0.000000,0.000000
2022-07-26 04:30:00,3.364606,3.364606
2022-07-26 05:00:00,32.558867,32.558867
...,...,...
2022-09-01 11:00:00,0.267500,0.267500
2022-09-01 11:30:00,1.166100,1.166100
2022-09-01 12:00:00,2.743622,2.743622
2022-09-01 12:30:00,0.177589,0.177589


In [27]:
activity_arr = dh.generate_activity_bouts_results(data_params, file_paths)

In [28]:
activity_arr[:23]

Unnamed: 0_level_0,percentage_time_occupied_by_bouts (1800of1800),percentage_time_occupied_by_bouts (300of1800),percentage_time_occupied_by_bouts (60of360)
Date_and_Time_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-13 02:00:00,0.0,0.0,0.0
2022-07-13 02:30:00,0.0,0.0,0.0
2022-07-13 03:00:00,0.0,0.0,0.0
2022-07-13 03:30:00,0.0,0.0,0.0
2022-07-13 04:00:00,0.0,0.0,0.0
2022-07-13 04:30:00,0.780906,0.0,0.0
2022-07-13 05:00:00,0.739083,0.0,0.0
2022-07-13 05:30:00,5.27385,0.0,0.272711
2022-07-13 06:00:00,2.339961,0.0,0.0
2022-07-13 06:30:00,1.270328,0.0,0.241267
