In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import datetime as dt

In [2]:
import sys

sys.path.append("../src")
sys.path.append("../src/tests")

In [3]:
import test_subsampling as test_ss

In [4]:
def are_there_expected_number_of_cycles(location_df, num_of_detections, cycle_length, data_params):
    first_date = pd.to_datetime(location_df['call_start_time']).iloc[0].date()
    last_date = (pd.to_datetime(location_df['call_start_time'])).iloc[-1].date()
    first_dt = dt.datetime.combine(first_date, pd.to_datetime(data_params['recording_start'], format="%H:%M").time())
    last_dt = dt.datetime.combine(last_date, pd.to_datetime(data_params['recording_end'], format="%H:%M").time())

    all_cycles = pd.date_range(first_dt, last_dt, freq=f'{cycle_length}T')
    between_time_cycles = all_cycles[(all_cycles.indexer_between_time(data_params['recording_start'], data_params['recording_end']))]
    
    assert num_of_detections.shape[0] <= between_time_cycles.shape[0]

In [5]:
def all_last_calls_of_cycle_within_recording(dc_applied_df, cycle_length, time_on_in_secs):
    resampled_cycle_length_df = dc_applied_df.resample(f'{cycle_length}T', on='cycle_ref_time', origin='start_day')
    last_call_of_each_group = resampled_cycle_length_df.last()
    assert last_call_of_each_group['end_time_wrt_ref'].all() < time_on_in_secs

In [6]:
def simulate_dutycycle_on_detections(location_df, cycle_length, time_on_in_secs, data_params):
    """
    Simulates a provided duty-cycling scheme on the provided location summary of concatenated bd2 outputs.
    """ 

    location_df = assign_cycle_groups_to_each_call(location_df, cycle_length, data_params)
    dc_applied_df = gather_calls_existing_in_on_windows(location_df, time_on_in_secs)
    all_last_calls_of_cycle_within_recording(dc_applied_df, cycle_length, time_on_in_secs)

    return dc_applied_df

def gather_calls_existing_in_on_windows(location_df, time_on_in_secs):

    location_df.insert(0, 'end_time_wrt_ref', (location_df['call_end_time'] - location_df['cycle_ref_time']).dt.total_seconds())
    location_df.insert(0, 'start_time_wrt_ref', (location_df['call_start_time'] - location_df['cycle_ref_time']).dt.total_seconds())
    dc_applied_df = location_df.loc[(location_df['end_time_wrt_ref'] <= time_on_in_secs)&(location_df['start_time_wrt_ref'] >= 0)].copy()

    return dc_applied_df

def assign_cycle_groups_to_each_call(location_df, cycle_length, data_params):
    location_df['ref_time'] = pd.DatetimeIndex(location_df['call_start_time'])
    location_df['cycle_ref_time'] = pd.DatetimeIndex(location_df['call_start_time'])
    location_df['call_end_time'] = pd.DatetimeIndex(location_df['call_end_time'])
    location_df['call_start_time'] = pd.DatetimeIndex(location_df['call_start_time'])

    resampled_cycle_length_df = location_df.resample(f'{cycle_length}T', on='cycle_ref_time', origin='start_day')
    first_call_of_each_group = resampled_cycle_length_df.first().between_time(data_params['recording_start'], data_params['recording_end'])
    are_there_expected_number_of_cycles(location_df, first_call_of_each_group, cycle_length, data_params)
    location_df['cycle_ref_time'] = pd.DatetimeIndex(resampled_cycle_length_df['cycle_ref_time'].transform(lambda x: x.name))

    return location_df

In [7]:
from pathlib import Path
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import datetime as dt

import sys

sys.path.append("../src/tests")

import activity.activity_assembly as actvt
import activity.subsampling as ss
from core import SITE_NAMES, FREQ_GROUPS
import test_activity_metric_functions as test_activity


def create_initial_mock_data_from_ipis(mock_start):
    """
    Created a simulated dataset of calls for a period of 30-min where the provided IPIs are used to separate each call.
    """

    mock_df = pd.DataFrame()
    call_duration = 0.01
    mock_df['start_time'] = np.linspace(0, (60*24) - call_duration, 2520)
    mock_df['end_time'] = mock_df['start_time']+call_duration
    mock_df['low_freq'] = [20000]*len(mock_df)
    mock_df['high_freq'] = [30000]*len(mock_df)
    mock_df['freq_group'] = ['LF']*len(mock_df)
    mock_df['call_start_time'] = mock_start + pd.to_timedelta(60e9*mock_df['start_time'])
    mock_df['call_end_time'] = mock_start + pd.to_timedelta(60e9*mock_df['end_time'])
    mock_df['ref_time'] = mock_df['call_start_time']

    resampled_cycle_length_df = mock_df.resample(f'30T', on='ref_time', origin='start_day')
    mock_df['ref_time'] = pd.DatetimeIndex(resampled_cycle_length_df['ref_time'].transform(lambda x: x.name))
    return mock_df


def test_if_subsampling_reduces_number_of_calls_by_expected_factor_in_mock_dataset():
    """
    Create a simulated dataset of calls with a constant IPI that produces N calls in 30-min period.

    Calls the subsampling method with the main cycle lengths and percent ons including the continuous scheme.
    Checks if the N calls are reduced exactly by the listening proportion of each duty-cycle scheme.
    """
    avail = np.arange(0, 720, 6) + 6
    reset_24 = avail[np.where((24*60 % avail) == 0)[0]]

    data_params = dict()
    cycle_lengths = reset_24
    percent_ons = [1/6, 1/3, 1/2, 2/3]
    data_params["cycle_lengths"] = cycle_lengths
    data_params["percent_ons"] = percent_ons
    dc_tags = ss.get_list_of_dc_tags(data_params["cycle_lengths"], data_params["percent_ons"])
    data_params["dc_tags"] = dc_tags
    data_params['recording_start'] = '00:00'
    data_params['recording_end'] = '16:00'

    mock_start = dt.datetime(2022, 6, 15, 0, 0, 0)
    mock_df = create_initial_mock_data_from_ipis(mock_start)
    for dc_tag in data_params['dc_tags']:
        
        cycle_length = int(dc_tag.split('of')[-1])
        time_on = int(dc_tag.split('of')[0])
        time_on_in_secs = (60*time_on)

        mock_df_subsampled = simulate_dutycycle_on_detections(mock_df.copy(), cycle_length, 
                                                              time_on_in_secs, data_params)
        assert np.isclose(len(mock_df_subsampled), len(mock_df)/(cycle_length/time_on), atol=3)

In [8]:
avail = np.arange(0, 720, 6) + 6
reset_24 = avail[np.where((24*60 % avail) == 0)[0]]
reset_24

array([  6,  12,  18,  24,  30,  36,  48,  60,  72,  90,  96, 120, 144,
       180, 240, 288, 360, 480, 720])

In [9]:
mock_start = dt.datetime(2022, 6, 15, 0, 0, 0)
mock_end = mock_start+dt.timedelta(hours=12)

mock_df = create_initial_mock_data_from_ipis(mock_start)
mock_df

Unnamed: 0,start_time,end_time,low_freq,high_freq,freq_group,call_start_time,call_end_time,ref_time
0,0.000000,0.010000,20000,30000,LF,2022-06-15 00:00:00.000000000,2022-06-15 00:00:00.600000000,2022-06-15 00:00:00
1,0.571651,0.581651,20000,30000,LF,2022-06-15 00:00:34.299086939,2022-06-15 00:00:34.899086939,2022-06-15 00:00:00
2,1.143303,1.153303,20000,30000,LF,2022-06-15 00:01:08.598173878,2022-06-15 00:01:09.198173878,2022-06-15 00:00:00
3,1.714954,1.724954,20000,30000,LF,2022-06-15 00:01:42.897260817,2022-06-15 00:01:43.497260817,2022-06-15 00:00:00
4,2.286606,2.296606,20000,30000,LF,2022-06-15 00:02:17.196347757,2022-06-15 00:02:17.796347757,2022-06-15 00:00:00
...,...,...,...,...,...,...,...,...
2515,1437.703394,1437.713394,20000,30000,LF,2022-06-15 23:57:42.203652242,2022-06-15 23:57:42.803652242,2022-06-15 23:30:00
2516,1438.275046,1438.285046,20000,30000,LF,2022-06-15 23:58:16.502739182,2022-06-15 23:58:17.102739182,2022-06-15 23:30:00
2517,1438.846697,1438.856697,20000,30000,LF,2022-06-15 23:58:50.801826121,2022-06-15 23:58:51.401826121,2022-06-15 23:30:00
2518,1439.418349,1439.428349,20000,30000,LF,2022-06-15 23:59:25.100913060,2022-06-15 23:59:25.700913060,2022-06-15 23:30:00


In [10]:
dc_tag = '1of6'
print(dc_tag)
cycle_length = int(dc_tag.split('of')[-1])
time_on = int(dc_tag.split('of')[0])
time_on_in_secs = (60*time_on)
data_params = dict()
data_params['recording_start'] = '00:00'
data_params['recording_end'] = '16:00'

mock_df_subsampled = simulate_dutycycle_on_detections(mock_df.copy(), cycle_length, 
                                                              time_on_in_secs, data_params)
mock_df_subsampled

1of6


Unnamed: 0,start_time_wrt_ref,end_time_wrt_ref,start_time,end_time,low_freq,high_freq,freq_group,call_start_time,call_end_time,ref_time,cycle_ref_time
0,0.000000,0.600000,0.000000,0.010000,20000,30000,LF,2022-06-15 00:00:00.000000000,2022-06-15 00:00:00.600000000,2022-06-15 00:00:00.000000000,2022-06-15 00:00:00
1,34.299087,34.899087,0.571651,0.581651,20000,30000,LF,2022-06-15 00:00:34.299086939,2022-06-15 00:00:34.899086939,2022-06-15 00:00:34.299086939,2022-06-15 00:00:00
11,17.289956,17.889956,6.288166,6.298166,20000,30000,LF,2022-06-15 00:06:17.289956331,2022-06-15 00:06:17.889956331,2022-06-15 00:06:17.289956331,2022-06-15 00:06:00
12,51.589043,52.189043,6.859817,6.869817,20000,30000,LF,2022-06-15 00:06:51.589043271,2022-06-15 00:06:52.189043271,2022-06-15 00:06:51.589043271,2022-06-15 00:06:00
21,0.280826,0.880826,12.004680,12.014680,20000,30000,LF,2022-06-15 00:12:00.280825724,2022-06-15 00:12:00.880825724,2022-06-15 00:12:00.280825724,2022-06-15 00:12:00
...,...,...,...,...,...,...,...,...,...,...,...
2488,16.128305,16.728305,1422.268805,1422.278805,20000,30000,LF,2022-06-15 23:42:16.128304882,2022-06-15 23:42:16.728304882,2022-06-15 23:42:16.128304882,2022-06-15 23:42:00
2489,50.427392,51.027392,1422.840457,1422.850457,20000,30000,LF,2022-06-15 23:42:50.427391822,2022-06-15 23:42:51.027391822,2022-06-15 23:42:50.427391822,2022-06-15 23:42:00
2499,33.418261,34.018261,1428.556971,1428.566971,20000,30000,LF,2022-06-15 23:48:33.418261214,2022-06-15 23:48:34.018261214,2022-06-15 23:48:33.418261214,2022-06-15 23:48:00
2509,16.409131,17.009131,1434.273486,1434.283486,20000,30000,LF,2022-06-15 23:54:16.409130607,2022-06-15 23:54:17.009130607,2022-06-15 23:54:16.409130607,2022-06-15 23:54:00


In [11]:
test_if_subsampling_reduces_number_of_calls_by_expected_factor_in_mock_dataset()

30of30
2520
1of6
417
2of6
838
3of6
1257
4of6
1678
2of12
419
4of12
840
6of12
1259
8of12
1680
3of18


420
6of18
840
9of18
1259
12of18
1679
4of24
420
8of24
840
12of24
1260
16of24
1680
5of30
420
10of30
840
15of30
1260
20of30
1679
6of36
420
12of36
840
18of36
1260
24of36
1680
8of48
420
16of48
840
24of48
1260
32of48
1680
10of60
420
20of60
840
30of60
1260
40of60
1680
12of72
420
24of72
840
36of72
1260
48of72
1680
15of90
420
30of90
840
45of90
1260
60of90
1680
16of96
420
32of96
840
48of96
1260
64of96
1680
20of120
420
40of120
840
60of120
1260
80of120
1680
24of144
420
48of144
840
72of144
1260
96of144
1680
30of180
420
60of180
840
90of180
1260
120of180
1680
40of240
420
80of240
840
120of240
1260
160of240
1680
48of288
420
96of288
840
144of288
1260
192of288
1680
60of360
420
120of360
840
180of360
1260
240of360
1680
80of480
420
160of480
840
240of480
1260
320of480
1680
120of720
420
240of720
840
360of720
1260
480of720
1680
