In [1]:
import numpy as np
import pandas as pd
import scipy
from scipy import stats
import datetime as dt

import matplotlib.pyplot as plt
from matplotlib import colors
import soundfile as sf
import matplotlib.patches as patches
from pathlib import Path

In [2]:
import sys

sys.path.append("../src")
sys.path.append("../src/activity")

In [3]:
from core import SITE_NAMES, FREQ_GROUPS
from cli import get_file_paths

import subsampling as ss
import bout as bt
import pipeline

In [4]:
cycle_lengths = [12, 30, 60]
percent_ons = [1/6, 1/3, 1/2, 2/3]
specific_dc_tag = "30of30"

data_params = dict()
data_params["cycle_lengths"] = cycle_lengths
data_params["percent_ons"] = percent_ons
dc_tags = ss.get_list_of_dc_tags(data_params["cycle_lengths"], data_params["percent_ons"])
data_params["dc_tags"] = dc_tags
data_params["cur_dc_tag"] = specific_dc_tag
data_params['bin_size'] = '30'
data_params['recording_start'] = '00:00'
data_params['recording_end'] = '16:00'

pipeline_params = dict()
pipeline_params['assemble_location_summary'] = False
pipeline_params["read_csv"] = False
pipeline_params['use_threshold_to_group'] = False
pipeline_params['use_kmeans_to_group'] = True
pipeline_params["save_activity_grid"] = False
pipeline_params["save_presence_grid"] = False
pipeline_params["save_dc_night_comparisons"] = False
pipeline_params["save_activity_dc_comparisons"] = False
pipeline_params["save_presence_dc_comparisons"] = False
pipeline_params["show_plots"] = True
pipeline_params["show_PST"] = True

site_keys = ['Carp']
type_keys = ['', 'LF', 'HF']
# for site_key in site_keys:
site_key = site_keys[0]
# for type_key in type_keys:
type_key = 'LF'
print(site_key, type_key)

data_params["site_name"] = SITE_NAMES[site_key]
data_params["site_tag"] = site_key
data_params["type_tag"] = type_key

file_paths = get_file_paths(data_params)

# activity_bouts_arr = pipeline.run_for_bouts(data_params, pipeline_params, file_paths)
# plot.plot_dc_bout_activity_comparisons_per_scheme(activity_bouts_arr, data_params, pipeline_params, file_paths)
# plot.plot_dc_presence_comparisons_per_scheme(activity_bouts_arr, data_params, pipeline_params, file_paths)

Carp LF


In [5]:
def construct_activity_arr_from_bout_metrics(bout_duration_per_interval, data_params, file_paths, dc_tag):
    """
    Construct an activity summary of the % of time occupied by bouts per date and time interval.
    Will be used later to assemble an activity summary for each duty-cycling scheme to compare effects.
    """

    time_occupied_by_bouts  = bout_duration_per_interval.values
    percent_time_occupied_by_bouts = (100*(time_occupied_by_bouts / (60*float(data_params['bin_size']))))

    all_processed_filepaths = sorted(list(map(str, list(Path(f'{file_paths["raw_SITE_folder"]}').glob('*.csv')))))
    all_processed_datetimes = pd.to_datetime(all_processed_filepaths, format="%Y%m%d_%H%M%S", exact=False)
    bout_dpi_df = pd.DataFrame(list(zip(bout_duration_per_interval.index, percent_time_occupied_by_bouts)),
                                columns=['ref_time', f'bout_time ({dc_tag})'])
    bout_dpi_df = bout_dpi_df.set_index('ref_time')
    bout_dpi_df = bout_dpi_df.reindex(index=all_processed_datetimes, fill_value=0).resample(f"{data_params['bin_size']}T").first()
    bout_dpi_df = bout_dpi_df.between_time(data_params['recording_start'], data_params['recording_end'], inclusive='left')

    return pd.DataFrame(list(zip(bout_dpi_df.index, bout_dpi_df[f'bout_time ({dc_tag})'].values)), columns=["datetime_UTC", f'bout_time ({dc_tag})'])

In [6]:
def get_bout_duration_per_interval(bout_metrics, dc_tag, data_params):
    """
    Constructs a pandas Series that records the duration of time occupied by bouts observed per interval.
    The used interval is the one stored inside data_params['bin_size']
    """

    bout_metrics['ref_time'] = pd.DatetimeIndex(bout_metrics['start_time_of_bout'])
    bout_metrics[f'total_bout_duration_in_secs ({dc_tag})'] = bout_metrics['bout_duration_in_secs']
    bout_metrics = bout_metrics.set_index('ref_time')

    bout_duration_per_interval = bout_metrics.resample(f"{data_params['bin_size']}T")[f'total_bout_duration_in_secs ({dc_tag})'].sum()

    return bout_duration_per_interval

In [7]:
data_params["dc_tags"]

['30of30',
 '2of12',
 '4of12',
 '6of12',
 '8of12',
 '5of30',
 '10of30',
 '15of30',
 '20of30',
 '10of60',
 '20of60',
 '30of60',
 '40of60']

In [8]:
activity_bouts_arr = pd.DataFrame()
bout_durations = pd.DataFrame()

for dc_tag in data_params['dc_tags']:
    location_df = ss.prepare_summary_for_plotting_with_duty_cycle(file_paths, dc_tag, data_params['bin_size'])
    bout_metrics = bt.generate_bout_metrics_for_location_and_freq(location_df, data_params, dc_tag)
    bout_duration_per_interval = get_bout_duration_per_interval(bout_metrics, dc_tag, data_params)
    dc_bouts = construct_activity_arr_from_bout_metrics(bout_duration_per_interval, data_params, file_paths, dc_tag)
    dc_bouts = dc_bouts.set_index("datetime_UTC")
    bout_durations = pd.concat([bout_durations, bout_duration_per_interval], axis=1)
    activity_bouts_arr = pd.concat([activity_bouts_arr, dc_bouts], axis=1)

In [9]:
data_params['start'] = dt.datetime(2022, 8, 10, 0, 0, 0)
data_params['end'] = dt.datetime(2022, 8, 10, 16, 0, 0)
actvt_metric = activity_bouts_arr.loc[data_params['start']:data_params['end']].copy()
actvt_metric.iloc[:32,:]

Unnamed: 0_level_0,bout_time (30of30),bout_time (2of12),bout_time (4of12),bout_time (6of12),bout_time (8of12),bout_time (5of30),bout_time (10of30),bout_time (15of30),bout_time (20of30),bout_time (10of60),bout_time (20of60),bout_time (30of60),bout_time (40of60)
datetime_UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-08-10 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 00:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 01:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 02:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 03:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 03:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 04:00:00,1.785306,0.155611,0.82425,1.780444,1.780444,0.0,0.0,0.759522,1.6245,0.0,1.6245,1.785306,1.785306
2022-08-10 04:30:00,15.354228,0.647967,1.822944,4.924417,7.148283,0.0,0.650917,7.226311,10.594161,0.0,0.0,0.0,0.650917


In [10]:
def get_recover_ratios_per_bin(clength, time_on, actvt_metric):
    slots = pd.date_range(actvt_metric.index[0], actvt_metric.index[-1], freq=f'1T', inclusive='left')
    dc_scheme = pd.DataFrame(index=slots)
    num_reps =np.floor(len(slots)/clength).astype(int)
    time_ons = ([1]*time_on + [0]*(clength-time_on)) * num_reps
    remaining_mins = (len(slots) - len(time_ons))
    if remaining_mins > 0:
        if remaining_mins > time_on:
            dc_scheme['time_on'] = time_ons + ([1]*time_on + [0]*(remaining_mins-time_on))
        else:
            dc_scheme['time_on'] = time_ons + ([1]*remaining_mins)
    else:
        dc_scheme['time_on'] = time_ons

    dc_scheme_on = dc_scheme.resample('30T').sum()
    dc_scheme_on['recover_ratio'] = (30/dc_scheme_on).replace(np.inf, 0)
    return dc_scheme_on

In [11]:
def get_desired_metrics_from_activity_section(actvt_metric, data_params):
    time_ons = []
    c_lengths = []

    time_ons_per_dc = pd.DataFrame()
    high_actvt_metric = pd.DataFrame()
    for i, col_name in enumerate(data_params["dc_tags"]):
        dc_tag = col_name.split('(')[-1].split(')')[0]
        on = int(dc_tag.split('of')[0])
        total = int(dc_tag.split('of')[-1])
        bin_size = int(data_params['bin_size'])
        dc_actvt = actvt_metric[f'{data_params["metric_tag"]} ({col_name})'].copy()
        if i > 0:
            time_ons.append(on)
            c_lengths.append(total)
                
        dc_scheme_on = get_recover_ratios_per_bin(total, on, dc_actvt)
        dc_scheme_on[f'time_on ({dc_tag})'] = dc_scheme_on['time_on']
        rep_dc_actvt = dc_actvt * dc_scheme_on['recover_ratio']
        time_ons_per_dc = pd.concat([time_ons_per_dc, dc_scheme_on[f'time_on ({dc_tag})']], axis=1)
        
        standard_metric = data_params['metric_tag']
        if data_params["metric_tag"]=='num_dets':
            standard_metric = 'call_rate_per_hour'

        if total-on >= bin_size:
            high_actvt_metric[f'{standard_metric} ({col_name})'] = rep_dc_actvt.replace(np.NaN, -1).replace(0, np.NaN).ffill(limit=(min((14*60)-on, total-on)//bin_size)).replace(np.NaN, 0).replace(-1, np.NaN)
        else:
            high_actvt_metric[f'{standard_metric} ({col_name})'] = rep_dc_actvt

    high_actvt_desired_metrics = high_actvt_metric.copy()
    if (standard_metric == 'call_rate_per_hour') or (standard_metric == 'activity_indices'):
        high_actvt_desired_metrics = high_actvt_desired_metrics * (2 / 1) # #calls per bin * 2 bins per 1 hour to get calls per hour

    exp_dc_metrics = high_actvt_desired_metrics.iloc[:,1:]
    control_dc_metrics = high_actvt_desired_metrics.iloc[:,0]

    metrics = dict()
    metrics['time_ons_per_dc'] = time_ons_per_dc
    metrics['exp_metric'] = exp_dc_metrics
    metrics['control_metric'] = control_dc_metrics
    metrics['cycle_lengths'] = c_lengths

    return metrics

In [12]:
data_params['metric_tag'] = 'bout_time'
metrics = get_desired_metrics_from_activity_section(actvt_metric, data_params)
metrics['exp_metric']

Unnamed: 0,bout_time (2of12),bout_time (4of12),bout_time (6of12),bout_time (8of12),bout_time (5of30),bout_time (10of30),bout_time (15of30),bout_time (20of30),bout_time (10of60),bout_time (20of60),bout_time (30of60),bout_time (40of60)
2022-08-10 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 00:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 01:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 01:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 02:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 02:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 03:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 03:30:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-08-10 04:00:00,0.778055,2.060625,2.967407,2.427879,0.0,0.0,1.519044,2.43675,0.0,2.43675,1.785306,1.785306
2022-08-10 04:30:00,4.85975,6.836042,12.311042,11.913806,0.0,1.95275,14.452622,15.891242,0.0,2.43675,1.785306,1.95275


In [13]:
metrics['time_ons_per_dc']

Unnamed: 0,time_on (30of30),time_on (2of12),time_on (4of12),time_on (6of12),time_on (8of12),time_on (5of30),time_on (10of30),time_on (15of30),time_on (20of30),time_on (10of60),time_on (20of60),time_on (30of60),time_on (40of60)
2022-08-10 00:00:00,30,6,12,18,22,5,10,15,20,10,20,30,30
2022-08-10 00:30:00,30,4,8,12,18,5,10,15,20,0,0,0,10
2022-08-10 01:00:00,30,6,12,18,22,5,10,15,20,10,20,30,30
2022-08-10 01:30:00,30,4,8,12,18,5,10,15,20,0,0,0,10
2022-08-10 02:00:00,30,6,12,18,22,5,10,15,20,10,20,30,30
2022-08-10 02:30:00,30,4,8,12,18,5,10,15,20,0,0,0,10
2022-08-10 03:00:00,30,6,12,18,22,5,10,15,20,10,20,30,30
2022-08-10 03:30:00,30,4,8,12,18,5,10,15,20,0,0,0,10
2022-08-10 04:00:00,30,6,12,18,22,5,10,15,20,10,20,30,30
2022-08-10 04:30:00,30,4,8,12,18,5,10,15,20,0,0,0,10


In [14]:
False in (metrics['exp_metric'] < 100)

False