# As of 2023/09/10, the implementation of bout metrics and clustering were leading to problematic results. 
## Like the percentage estimate being greater than 100%. Here I attempt to lay everything out to fix those mistakes.

In [1]:
import numpy as np
import pandas as pd
import scipy
from scipy import stats
import datetime as dt
import dask.dataframe as dd

import matplotlib.pyplot as plt
from matplotlib import colors
import soundfile as sf
import matplotlib.patches as patches
from pathlib import Path

In [2]:
import sys

sys.path.append("../src")

In [3]:
from core import SITE_NAMES, EXAMPLE_FILES_from_LOCATIONS, EXAMPLE_FILES_to_FILEPATHS, EXAMPLE_FILES_to_DETECTIONS
import bout_clustering as bt_clustering
import bout_plotting as bt_plt
import subsampling as ss
import data_handling as dh

from cli import get_file_paths

## Below are the variables needed in our bout pipeline

In [4]:
type_key = 'LF1'
site_key = "Carp"
# cycle_lengths = [1800, 720, 600, 360, 180, 60, 30, 6]
cycle_lengths = [1800, 360]
percent_ons = [0.1667]
specific_dc_tag = "1800of1800"

In [5]:
fig_details = dict()
fig_details['site_name'] = SITE_NAMES[site_key]

In [6]:
pipeline_params = dict()
pipeline_params["read_csv"] = False
pipeline_params["save_activity_grid"] = True
pipeline_params["save_presence_grid"] = True
pipeline_params["save_dc_night_comparisons"] = True
pipeline_params["save_activity_dc_comparisons"] = True
pipeline_params["save_presence_dc_comparisons"] = True
pipeline_params["show_plots"] = False
pipeline_params["show_PST"] = True
pipeline_params

{'read_csv': False,
 'save_activity_grid': True,
 'save_presence_grid': True,
 'save_dc_night_comparisons': True,
 'save_activity_dc_comparisons': True,
 'save_presence_dc_comparisons': True,
 'show_plots': False,
 'show_PST': True}

In [7]:
data_params = dict()
data_params["site_name"] = SITE_NAMES[site_key]
data_params["site_tag"] = site_key
data_params["type_tag"] = type_key
data_params["cycle_lengths"] = cycle_lengths
data_params["percent_ons"] = percent_ons
dc_tags = ss.get_list_of_dc_tags(data_params["cycle_lengths"], data_params["percent_ons"])
data_params["dc_tags"] = dc_tags
data_params["cur_dc_tag"] = specific_dc_tag
data_params['resolution_in_min'] = '30'
data_params['recording_start'] = '02:00'
data_params['recording_end'] = '13:30'
data_params

{'site_name': 'Carp Pond',
 'site_tag': 'Carp',
 'type_tag': 'LF1',
 'cycle_lengths': [1800, 360],
 'percent_ons': [0.1667],
 'dc_tags': ['1800of1800', '300of1800', '60of360'],
 'cur_dc_tag': '1800of1800',
 'resolution_in_min': '30',
 'recording_start': '02:00',
 'recording_end': '13:30'}

In [8]:
file_paths = get_file_paths(data_params)
file_paths

{'raw_SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/raw/Carp',
 'SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Carp',
 'bd2_TYPE_SITE_YEAR': 'bd2__LF1Carp_2022',
 'duty_cycled_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Carp/duty_cycled',
 'dc_dets_TYPE_SITE_summary': 'dc_dets_LF1Carp_summary',
 'dc_bouts_TYPE_SITE_summary': 'dc_bouts_LF1Carp_summary',
 'dc_inds_TYPE_SITE_summary': 'dc_inds_LF1Carp_summary',
 'simulated_schemes_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../data/2022_bd2_summary/Carp/duty_cycled/simulated_schemes',
 'figures_SITE_folder': '/Users/adityakrishna/duty-cycle-investigation/src/../figures/Carp',
 'activity_det_comparisons_figname': 'activity_det_comparisons_per_dc_LF1Carp',
 'dc_det_comparisons_figname': 'dc_det_comparisons_per_night_LF1Carp',
 'activity_bout_comparisons_figname': 'activity_bout_comparisons_per_dc_LF1Carp',
 'dc_bou

### The location dataframe seems to be good so let's initialize it before tackling the bugs

In [9]:
activity_bouts_arr = pd.DataFrame()

dc_tag = data_params['cur_dc_tag']
location_df = ss.prepare_summary_for_plotting_with_duty_cycle(file_paths, dc_tag)
location_df

Unnamed: 0,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,start_time,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name
0,1380.3355,1380.3532,LF1,2022-07-13 04:30:00,2022-07-13 04:53:00.335500000,2022-07-13 04:53:00.353200,1380.3355,1380.3532,28046.0,31703.0,Echolocation,Nyctalus leisleri,0.487,0.559,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
1,1513.5975,1513.6129,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.597500000,2022-07-13 04:55:13.612900,1513.5975,1513.6129,23750.0,28075.0,Echolocation,Nyctalus leisleri,0.449,0.571,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
2,1513.8235,1513.8396,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.823500000,2022-07-13 04:55:13.839600,1513.8235,1513.8396,23750.0,28002.0,Echolocation,Nyctalus leisleri,0.513,0.591,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
3,1513.9885,1514.0010,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.988500000,2022-07-13 04:55:14.001000,1513.9885,1514.0010,24609.0,30884.0,Echolocation,Nyctalus leisleri,0.424,0.632,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
4,1514.2675,1514.2839,LF1,2022-07-13 04:30:00,2022-07-13 04:55:14.267500000,2022-07-13 04:55:14.283900,1514.2675,1514.2839,23750.0,28526.0,Echolocation,Nyctalus leisleri,0.459,0.587,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92,121.6545,121.6710,LF1,2022-08-25 12:30:00,2022-08-25 12:32:01.654500000,2022-08-25 12:32:01.671000,121.6545,121.6710,22890.0,26398.0,Echolocation,Nyctalus leisleri,0.548,0.602,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
93,123.5645,123.5797,LF1,2022-08-25 12:30:00,2022-08-25 12:32:03.564500000,2022-08-25 12:32:03.579700,123.5645,123.5797,22031.0,27189.0,Echolocation,Nyctalus leisleri,0.403,0.542,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
94,421.5855,421.6014,LF1,2022-08-25 12:30:00,2022-08-25 12:37:01.585500000,2022-08-25 12:37:01.601400,421.5855,421.6014,24609.0,28908.0,Echolocation,Nyctalus leisleri,0.377,0.533,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
95,425.5125,425.5300,LF1,2022-08-25 12:30:00,2022-08-25 12:37:05.512499999,2022-08-25 12:37:05.530000,425.5125,425.5300,24609.0,29731.0,Echolocation,Nyctalus leisleri,0.261,0.506,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond


In [10]:
location_sum_df = location_df

### Check if the BCIs we gather from each group make sense

In [11]:
location_sum_df.reset_index(inplace=True)
location_sum_df.drop(columns=location_sum_df.columns[0], inplace=True)

bout_params = bt_clustering.get_bout_params_from_location(location_sum_df, data_params)
bout_params

{'site_key': 'Carp', 'LF1_bci': 2599.382616274815}

## Adding bout tags is the process that was most changed

### 1) First, we take our total location summary dataframe and select a subset corresponding to a specific frequency group

In [12]:
location_df = location_sum_df.copy()
location_df.insert(0, 'duration_from_last_call_ms', 0)
location_df.insert(0, 'bout_tag', 0)
location_df.insert(0, 'change_markers', 0)
location_df.insert(0, 'call_status', '')
result_df = pd.DataFrame()

group = 'LF1'
freq_group_df = location_df.loc[location_df['freq_group']==group].copy()
freq_group_df.reset_index(inplace=True)
freq_group_df.drop(columns=freq_group_df.columns[0], inplace=True)
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name
0,,0,0,0,1380.3355,1380.3532,LF1,2022-07-13 04:30:00,2022-07-13 04:53:00.335500000,2022-07-13 04:53:00.353200,...,1380.3532,28046.0,31703.0,Echolocation,Nyctalus leisleri,0.487,0.559,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
1,,0,0,0,1513.5975,1513.6129,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.597500000,2022-07-13 04:55:13.612900,...,1513.6129,23750.0,28075.0,Echolocation,Nyctalus leisleri,0.449,0.571,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
2,,0,0,0,1513.8235,1513.8396,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.823500000,2022-07-13 04:55:13.839600,...,1513.8396,23750.0,28002.0,Echolocation,Nyctalus leisleri,0.513,0.591,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
3,,0,0,0,1513.9885,1514.0010,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.988500000,2022-07-13 04:55:14.001000,...,1514.0010,24609.0,30884.0,Echolocation,Nyctalus leisleri,0.424,0.632,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
4,,0,0,0,1514.2675,1514.2839,LF1,2022-07-13 04:30:00,2022-07-13 04:55:14.267500000,2022-07-13 04:55:14.283900,...,1514.2839,23750.0,28526.0,Echolocation,Nyctalus leisleri,0.459,0.587,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622105,,0,0,0,121.6545,121.6710,LF1,2022-08-25 12:30:00,2022-08-25 12:32:01.654500000,2022-08-25 12:32:01.671000,...,121.6710,22890.0,26398.0,Echolocation,Nyctalus leisleri,0.548,0.602,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622106,,0,0,0,123.5645,123.5797,LF1,2022-08-25 12:30:00,2022-08-25 12:32:03.564500000,2022-08-25 12:32:03.579700,...,123.5797,22031.0,27189.0,Echolocation,Nyctalus leisleri,0.403,0.542,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622107,,0,0,0,421.5855,421.6014,LF1,2022-08-25 12:30:00,2022-08-25 12:37:01.585500000,2022-08-25 12:37:01.601400,...,421.6014,24609.0,28908.0,Echolocation,Nyctalus leisleri,0.377,0.533,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622108,,0,0,0,425.5125,425.5300,LF1,2022-08-25 12:30:00,2022-08-25 12:37:05.512499999,2022-08-25 12:37:05.530000,...,425.5300,24609.0,29731.0,Echolocation,Nyctalus leisleri,0.261,0.506,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond


### Calculate the intervals of this frequency-group subset of the dataframe. All these intervals only correspond to the selected frequency group

### Create bout tags for if the call is within or between bout using the call's interval to its previous call. 

In [13]:
intervals = (pd.to_datetime(freq_group_df['call_start_time'].values[1:]) - pd.to_datetime(freq_group_df['call_end_time'].values[:-1]))
ipis_f = intervals.to_numpy(dtype='float32')/1e6
ipis_f = np.insert(ipis_f, 0, bout_params[f'{group}_bci'])

freq_group_df['duration_from_last_call_ms'] =  ipis_f
freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] < bout_params[f'{group}_bci'], 'bout_tag'] = 1
freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] >= bout_params[f'{group}_bci'], 'bout_tag'] = 0

wb_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==1).dropna().index
ob_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==0).dropna().index
freq_group_df.loc[wb_indices, 'call_status'] = 'within bout'
freq_group_df.loc[ob_indices, 'call_status'] = 'outside bout'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name
0,outside bout,0,0,2599.382568,1380.3355,1380.3532,LF1,2022-07-13 04:30:00,2022-07-13 04:53:00.335500000,2022-07-13 04:53:00.353200,...,1380.3532,28046.0,31703.0,Echolocation,Nyctalus leisleri,0.487,0.559,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
1,outside bout,0,0,133244.296875,1513.5975,1513.6129,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.597500000,2022-07-13 04:55:13.612900,...,1513.6129,23750.0,28075.0,Echolocation,Nyctalus leisleri,0.449,0.571,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
2,within bout,0,1,210.600006,1513.8235,1513.8396,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.823500000,2022-07-13 04:55:13.839600,...,1513.8396,23750.0,28002.0,Echolocation,Nyctalus leisleri,0.513,0.591,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
3,within bout,0,1,148.899994,1513.9885,1514.0010,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.988500000,2022-07-13 04:55:14.001000,...,1514.0010,24609.0,30884.0,Echolocation,Nyctalus leisleri,0.424,0.632,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
4,within bout,0,1,266.500000,1514.2675,1514.2839,LF1,2022-07-13 04:30:00,2022-07-13 04:55:14.267500000,2022-07-13 04:55:14.283900,...,1514.2839,23750.0,28526.0,Echolocation,Nyctalus leisleri,0.459,0.587,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622105,within bout,0,1,258.299988,121.6545,121.6710,LF1,2022-08-25 12:30:00,2022-08-25 12:32:01.654500000,2022-08-25 12:32:01.671000,...,121.6710,22890.0,26398.0,Echolocation,Nyctalus leisleri,0.548,0.602,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622106,within bout,0,1,1893.500000,123.5645,123.5797,LF1,2022-08-25 12:30:00,2022-08-25 12:32:03.564500000,2022-08-25 12:32:03.579700,...,123.5797,22031.0,27189.0,Echolocation,Nyctalus leisleri,0.403,0.542,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622107,outside bout,0,0,298005.781250,421.5855,421.6014,LF1,2022-08-25 12:30:00,2022-08-25 12:37:01.585500000,2022-08-25 12:37:01.601400,...,421.6014,24609.0,28908.0,Echolocation,Nyctalus leisleri,0.377,0.533,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622108,outside bout,0,0,3911.099854,425.5125,425.5300,LF1,2022-08-25 12:30:00,2022-08-25 12:37:05.512499999,2022-08-25 12:37:05.530000,...,425.5300,24609.0,29731.0,Echolocation,Nyctalus leisleri,0.261,0.506,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond


### Create change markers using the linear difference of bout tags. 
#### When the bout tag is 0 and the next call's bout tag is 1, the call is starting a bout. Thus, the change marker of a call is 1
#### When the bout tag is 1 and the next call's bout tag is 0, the call is ending a bout. Thus, the change marker of a call is -1.

In [14]:
bout_tags = freq_group_df['bout_tag']
change_markers = bout_tags.shift(-1) - bout_tags
change_markers[len(change_markers)-1] = 0
freq_group_df['change_markers'] = change_markers

be_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==-1).dropna().index
bs_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==1).dropna().index

freq_group_df.loc[be_indices, 'call_status'] = 'bout end'
freq_group_df.loc[bs_indices, 'call_status'] = 'bout start'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name
0,outside bout,0.0,0,2599.382568,1380.3355,1380.3532,LF1,2022-07-13 04:30:00,2022-07-13 04:53:00.335500000,2022-07-13 04:53:00.353200,...,1380.3532,28046.0,31703.0,Echolocation,Nyctalus leisleri,0.487,0.559,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
1,bout start,1.0,0,133244.296875,1513.5975,1513.6129,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.597500000,2022-07-13 04:55:13.612900,...,1513.6129,23750.0,28075.0,Echolocation,Nyctalus leisleri,0.449,0.571,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
2,within bout,0.0,1,210.600006,1513.8235,1513.8396,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.823500000,2022-07-13 04:55:13.839600,...,1513.8396,23750.0,28002.0,Echolocation,Nyctalus leisleri,0.513,0.591,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
3,within bout,0.0,1,148.899994,1513.9885,1514.0010,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.988500000,2022-07-13 04:55:14.001000,...,1514.0010,24609.0,30884.0,Echolocation,Nyctalus leisleri,0.424,0.632,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
4,within bout,0.0,1,266.500000,1514.2675,1514.2839,LF1,2022-07-13 04:30:00,2022-07-13 04:55:14.267500000,2022-07-13 04:55:14.283900,...,1514.2839,23750.0,28526.0,Echolocation,Nyctalus leisleri,0.459,0.587,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622105,within bout,0.0,1,258.299988,121.6545,121.6710,LF1,2022-08-25 12:30:00,2022-08-25 12:32:01.654500000,2022-08-25 12:32:01.671000,...,121.6710,22890.0,26398.0,Echolocation,Nyctalus leisleri,0.548,0.602,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622106,bout end,-1.0,1,1893.500000,123.5645,123.5797,LF1,2022-08-25 12:30:00,2022-08-25 12:32:03.564500000,2022-08-25 12:32:03.579700,...,123.5797,22031.0,27189.0,Echolocation,Nyctalus leisleri,0.403,0.542,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622107,outside bout,0.0,0,298005.781250,421.5855,421.6014,LF1,2022-08-25 12:30:00,2022-08-25 12:37:01.585500000,2022-08-25 12:37:01.601400,...,421.6014,24609.0,28908.0,Echolocation,Nyctalus leisleri,0.377,0.533,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622108,bout start,1.0,0,3911.099854,425.5125,425.5300,LF1,2022-08-25 12:30:00,2022-08-25 12:37:05.512499999,2022-08-25 12:37:05.530000,...,425.5300,24609.0,29731.0,Echolocation,Nyctalus leisleri,0.261,0.506,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond


### As seen above in the last row, a bout that is started may not be ended if the last call within that bout is labelled as 'within'
#### That call will either be labelled 'within' or 'outside' bout because of the forced change marker of 0

In [15]:
last_call_status = freq_group_df.at[len(freq_group_df)-1, 'call_status']
if last_call_status == 'within bout':
    freq_group_df.at[len(freq_group_df)-1, 'call_status'] = 'bout end'
freq_group_df

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name
0,outside bout,0.0,0,2599.382568,1380.3355,1380.3532,LF1,2022-07-13 04:30:00,2022-07-13 04:53:00.335500000,2022-07-13 04:53:00.353200,...,1380.3532,28046.0,31703.0,Echolocation,Nyctalus leisleri,0.487,0.559,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
1,bout start,1.0,0,133244.296875,1513.5975,1513.6129,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.597500000,2022-07-13 04:55:13.612900,...,1513.6129,23750.0,28075.0,Echolocation,Nyctalus leisleri,0.449,0.571,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
2,within bout,0.0,1,210.600006,1513.8235,1513.8396,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.823500000,2022-07-13 04:55:13.839600,...,1513.8396,23750.0,28002.0,Echolocation,Nyctalus leisleri,0.513,0.591,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
3,within bout,0.0,1,148.899994,1513.9885,1514.0010,LF1,2022-07-13 04:30:00,2022-07-13 04:55:13.988500000,2022-07-13 04:55:14.001000,...,1514.0010,24609.0,30884.0,Echolocation,Nyctalus leisleri,0.424,0.632,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
4,within bout,0.0,1,266.500000,1514.2675,1514.2839,LF1,2022-07-13 04:30:00,2022-07-13 04:55:14.267500000,2022-07-13 04:55:14.283900,...,1514.2839,23750.0,28526.0,Echolocation,Nyctalus leisleri,0.459,0.587,-1,/mnt/ubna_data_01/recover-20220715/UBNA_008/20...,Carp Pond
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622105,within bout,0.0,1,258.299988,121.6545,121.6710,LF1,2022-08-25 12:30:00,2022-08-25 12:32:01.654500000,2022-08-25 12:32:01.671000,...,121.6710,22890.0,26398.0,Echolocation,Nyctalus leisleri,0.548,0.602,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622106,bout end,-1.0,1,1893.500000,123.5645,123.5797,LF1,2022-08-25 12:30:00,2022-08-25 12:32:03.564500000,2022-08-25 12:32:03.579700,...,123.5797,22031.0,27189.0,Echolocation,Nyctalus leisleri,0.403,0.542,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622107,outside bout,0.0,0,298005.781250,421.5855,421.6014,LF1,2022-08-25 12:30:00,2022-08-25 12:37:01.585500000,2022-08-25 12:37:01.601400,...,421.6014,24609.0,28908.0,Echolocation,Nyctalus leisleri,0.377,0.533,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond
622108,bout start,1.0,0,3911.099854,425.5125,425.5300,LF1,2022-08-25 12:30:00,2022-08-25 12:37:05.512499999,2022-08-25 12:37:05.530000,...,425.5300,24609.0,29731.0,Echolocation,Nyctalus leisleri,0.261,0.506,-1,/mnt/ubna_data_01/recover-20220825/UBNA_004/20...,Carp Pond


### Finally we concatenate these results for each frequency group, using each group's BCI to cluster their respective calls into bouts

In [16]:
result_df = pd.concat([result_df, freq_group_df])

## Below we put it all together in our new method

### We need to also use some if statements to make sure we don't pass in an invalid group or a group we have not defined yet

In [17]:
def add_bouttags_to_bd2_predictions_for_freqgroups(batdetect2_predictions, bout_params):

    location_df = batdetect2_predictions.copy()
    location_df.insert(0, 'duration_from_last_call_ms', 0)
    location_df.insert(0, 'bout_tag', 0)
    location_df.insert(0, 'change_markers', 0)
    location_df.insert(0, 'call_status', '')
    result_df = pd.DataFrame()

    for group in location_df['freq_group'].unique():
        if group != '':
            freq_group_df = location_df.loc[location_df['freq_group']==group].copy()
            freq_group_df.reset_index(inplace=True)
            freq_group_df.drop(columns=freq_group_df.columns[0], inplace=True)
            if not(freq_group_df.empty):
                intervals = (pd.to_datetime(freq_group_df['call_start_time'].values[1:]) - pd.to_datetime(freq_group_df['call_end_time'].values[:-1]))
                ipis_f = intervals.to_numpy(dtype='float32')/1e6
                ipis_f = np.insert(ipis_f, 0, bout_params[f'{group}_bci'])

                freq_group_df['duration_from_last_call_ms'] =  ipis_f
                freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] < bout_params[f'{group}_bci'], 'bout_tag'] = 1
                freq_group_df.loc[freq_group_df['duration_from_last_call_ms'] >= bout_params[f'{group}_bci'], 'bout_tag'] = 0
                wb_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==1).dropna().index
                ob_indices = pd.DataFrame.where(freq_group_df, freq_group_df['bout_tag']==0).dropna().index
                freq_group_df.loc[wb_indices, 'call_status'] = 'within bout'
                freq_group_df.loc[ob_indices, 'call_status'] = 'outside bout'

                bout_tags = freq_group_df['bout_tag']
                change_markers = bout_tags.shift(-1) - bout_tags
                change_markers[len(change_markers)-1] = 0
                freq_group_df['change_markers'] = change_markers
                be_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==-1).dropna().index
                bs_indices = pd.DataFrame.where(freq_group_df, freq_group_df['change_markers']==1).dropna().index
                freq_group_df.loc[be_indices, 'call_status'] = 'bout end'
                freq_group_df.loc[bs_indices, 'call_status'] = 'bout start'

                num_bout_starts = len(freq_group_df.loc[freq_group_df['call_status']=='bout start'])
                num_bout_ends = len(freq_group_df.loc[freq_group_df['call_status']=='bout end'])
                if num_bout_starts != num_bout_ends:
                    freq_group_df.at[len(freq_group_df)-1, 'call_status'] = 'bout end'

                result_df = pd.concat([result_df, freq_group_df])

    return result_df

### For this result, LF1 calls will always appear before HF1 which will appear before HF2. 

### In other words, the start and end times are not sorted over all frequency groups; rather only for each group

In [18]:
batdetect2_predictions = bt_clustering.classify_bouts_in_bd2_predictions_for_freqgroups(location_sum_df, bout_params)
batdetect2_predictions[batdetect2_predictions['freq_group']=='HF2']

Unnamed: 0,call_status,change_markers,bout_tag,duration_from_last_call_ms,start_time_wrt_ref,end_time_wrt_ref,freq_group,ref_time,call_start_time,call_end_time,...,end_time,low_freq,high_freq,event,class,class_prob,det_prob,individual,input_file,Site name


### There could be bouts that overlap in time, separated in frequency alone. 
### This makes generating % of time results not reliable if we want to include all frequency groups. However, our analysis will largely focus on individual groups

In [19]:
bout_metrics = bt_clustering.construct_bout_metrics_from_location_df_for_freqgroups(batdetect2_predictions)
bout_metrics.sort_values('start_time_of_bout')[:10]

Unnamed: 0,freq_group,start_time_of_bout,end_time_of_bout,start_time_wrt_ref,end_time_wrt_ref,start_time,end_time,low_freq,high_freq,bout_duration,bout_duration_in_secs
0,LF1,2022-07-13 04:55:13.597500,2022-07-13 04:55:23.927400,1513.5975,1523.9274,1513.5975,1523.9274,22890.0,33127.0,0 days 00:00:10.329900,10.3299
1,LF1,2022-07-13 04:57:55.094500,2022-07-13 04:57:58.820900,1675.0945,1678.8209,1675.0945,1678.8209,26328.0,34097.0,0 days 00:00:03.726400,3.7264
2,LF1,2022-07-13 05:05:25.821500,2022-07-13 05:05:28.472300,325.8215,328.4723,325.8215,328.4723,24609.0,29024.0,0 days 00:00:02.650800,2.6508
3,LF1,2022-07-13 05:05:32.149500,2022-07-13 05:05:37.731600,332.1495,337.7316,332.1495,337.7316,23750.0,27951.0,0 days 00:00:05.582100,5.5821
4,LF1,2022-07-13 05:05:47.819500,2022-07-13 05:05:51.224200,347.8195,351.2242,347.8195,351.2242,23750.0,30452.0,0 days 00:00:03.404700,3.4047
5,LF1,2022-07-13 05:08:16.282500,2022-07-13 05:08:16.952100,496.2825,496.9521,496.2825,496.9521,25468.0,29257.0,0 days 00:00:00.669600,0.6696
6,LF1,2022-07-13 05:26:56.276500,2022-07-13 05:26:57.272800,1616.2765,1617.2728,1616.2765,1617.2728,20312.0,26339.0,0 days 00:00:00.996300,0.9963
7,LF1,2022-07-13 05:38:55.964500,2022-07-13 05:39:02.893300,535.9645,542.8933,535.9645,542.8933,24609.0,35642.0,0 days 00:00:06.928800,6.9288
8,LF1,2022-07-13 05:40:19.894500,2022-07-13 05:40:21.841900,619.8945,621.8419,619.8945,621.8419,22890.0,28769.0,0 days 00:00:01.947400,1.9474
9,LF1,2022-07-13 05:40:25.224500,2022-07-13 05:40:53.786400,625.2245,653.7864,625.2245,653.7864,22031.0,36419.0,0 days 00:00:28.561900,28.5619


In [20]:
def construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag):
    all_processed_filepaths = sorted(list(map(str, list(Path(f'{file_paths["raw_SITE_folder"]}').glob('*.csv')))))
    all_processed_datetimes = pd.to_datetime(all_processed_filepaths, format="%Y%m%d_%H%M%S", exact=False)

    bout_metrics['ref_time'] = pd.DatetimeIndex(bout_metrics['start_time_of_bout'])
    bout_metrics['total_bout_duration_in_secs'] = bout_metrics['bout_duration_in_secs']
    bout_metrics = bout_metrics.set_index('ref_time')

    bout_duration_per_interval = bout_metrics.resample(f"{data_params['resolution_in_min']}T")['total_bout_duration_in_secs'].sum()

    percent_time_occupied_by_bouts = (100*(bout_duration_per_interval.values / 60*float(data_params['resolution_in_min'])))

    bout_dpi_df = pd.DataFrame(list(zip(bout_duration_per_interval.index, percent_time_occupied_by_bouts)), columns=['ref_time', f'percentage_time_occupied_by_bouts ({dc_tag})'])
    bout_dpi_df = bout_dpi_df.set_index('ref_time')
    bout_dpi_df = bout_dpi_df.reindex(index=all_processed_datetimes, fill_value=0).resample(f"{data_params['resolution_in_min']}T").first().between_time(data_params['recording_start'], data_params['recording_end'], inclusive='left')

    return pd.DataFrame(list(zip(bout_dpi_df.index, bout_dpi_df[f'percentage_time_occupied_by_bouts ({dc_tag})'].values)), columns=["Date_and_Time_UTC", f'percentage_time_occupied_by_bouts ({dc_tag})'])


### One error I encountered here was dividing but not using parentheses. That's all I fixed here

In [21]:
all_processed_filepaths = sorted(list(map(str, list(Path(f'{file_paths["raw_SITE_folder"]}').glob('*.csv')))))
all_processed_datetimes = pd.to_datetime(all_processed_filepaths, format="%Y%m%d_%H%M%S", exact=False)

bout_metrics['ref_time'] = pd.DatetimeIndex(bout_metrics['start_time_of_bout'])
bout_metrics['total_bout_duration_in_secs'] = bout_metrics['bout_duration_in_secs']
bout_metrics = bout_metrics.set_index('ref_time')

bout_duration_per_interval = bout_metrics.resample(f"{data_params['resolution_in_min']}T")['total_bout_duration_in_secs'].sum()

percent_time_occupied_by_bouts = (100*(bout_duration_per_interval.values / (60*float(data_params['resolution_in_min']))))

In [22]:
dc_bouts = construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag)

In [23]:
dc_bouts[:10]

Unnamed: 0,Date_and_Time_UTC,percentage_time_occupied_by_bouts (1800of1800)
0,2022-07-13 02:00:00,0.0
1,2022-07-13 02:30:00,0.0
2,2022-07-13 03:00:00,0.0
3,2022-07-13 03:30:00,0.0
4,2022-07-13 04:00:00,0.0
5,2022-07-13 04:30:00,702.815
6,2022-07-13 05:00:00,665.175
7,2022-07-13 05:30:00,4746.465
8,2022-07-13 06:00:00,2105.965
9,2022-07-13 06:30:00,1143.295


In [24]:
time_on = int(dc_tag.split('of')[0])

In [25]:
bout_metrics = bt_clustering.generate_bout_metrics_for_location_and_freq(location_sum_df, data_params, dc_tag)


In [26]:
dc_bouts = dh.construct_activity_arr_from_bout_metrics(bout_metrics, data_params, file_paths, dc_tag)
dc_bouts = dc_bouts.set_index("Date_and_Time_UTC")
activity_bouts_arr = pd.concat([activity_bouts_arr, dc_bouts], axis=1)

activity_bouts_arr

Unnamed: 0_level_0,percentage_time_occupied_by_bouts (1800of1800)
Date_and_Time_UTC,Unnamed: 1_level_1
2022-07-13 02:00:00,0.000000
2022-07-13 02:30:00,0.000000
2022-07-13 03:00:00,0.000000
2022-07-13 03:30:00,0.000000
2022-07-13 04:00:00,0.000000
...,...
2022-08-25 11:00:00,0.573578
2022-08-25 11:30:00,0.000000
2022-08-25 12:00:00,4.327972
2022-08-25 12:30:00,2.495389


In [27]:
activity_arr = dh.generate_activity_bouts_results(data_params, file_paths)

In [28]:
activity_arr[:23]

Unnamed: 0_level_0,percentage_time_occupied_by_bouts (1800of1800),percentage_time_occupied_by_bouts (300of1800),percentage_time_occupied_by_bouts (60of360)
Date_and_Time_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-07-13 02:00:00,0.0,0.0,0.0
2022-07-13 02:30:00,0.0,0.0,0.0
2022-07-13 03:00:00,0.0,0.0,0.0
2022-07-13 03:30:00,0.0,0.0,0.0
2022-07-13 04:00:00,0.0,0.0,0.0
2022-07-13 04:30:00,0.780906,0.0,0.0
2022-07-13 05:00:00,0.739083,0.0,0.0
2022-07-13 05:30:00,5.27385,0.0,0.272711
2022-07-13 06:00:00,2.339961,0.0,0.0
2022-07-13 06:30:00,1.270328,0.0,0.241267
