In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import scipy
from pathlib import Path

import matplotlib.pyplot as plt
from sklearn.cluster import k_means, KMeans
from scipy.cluster.hierarchy import dendrogram, linkage
import seaborn as sns

In [2]:
import sys

sys.path.append("../src")

In [3]:
from core import SITE_NAMES
from calls import plot_call_features, compute_features


random_state_for_sites = {'Foliage':800, 'Carp':200, 'Central':0, 'Telephone':0}
color_for_groups = {0: 'blue', 1: 'red', 2:'limegreen'}
label_for_groups = {0: 'LF1', 1: 'HF1', 2:'HF2'}

In [4]:
site_key = 'Carp'
input_dir = Path(f'../data/detected_calls/{site_key}')
output_dir = Path(f'../data/generated_welch/{site_key}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
input_file_type = 'top1_inbouts_call_signals'
output_file_type = 'top1_inbouts_welch_signals'

location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
location_calls_sampled['index'] = location_calls_sampled.index

welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
welch_data = pd.DataFrame(welch_signals, columns=np.linspace(0, 96000, welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{site_key}_{output_file_type}.csv')

In [5]:
site_key = 'Central'
input_dir = Path(f'../data/detected_calls/{site_key}')
output_dir = Path(f'../data/generated_welch/{site_key}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
input_file_type = 'top1_inbouts_call_signals'
output_file_type = 'top1_inbouts_welch_signals'

location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
location_calls_sampled['index'] = location_calls_sampled.index

welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
welch_data = pd.DataFrame(welch_signals, columns=np.linspace(0, 96000, welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{site_key}_{output_file_type}.csv')

In [6]:
site_key = 'Foliage'
input_dir = Path(f'../data/detected_calls/{site_key}')
output_dir = Path(f'../data/generated_welch/{site_key}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
input_file_type = 'top1_inbouts_call_signals'
output_file_type = 'top1_inbouts_welch_signals'

location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
location_calls_sampled['index'] = location_calls_sampled.index

welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
welch_data = pd.DataFrame(welch_signals, columns=np.linspace(0, 96000, welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{site_key}_{output_file_type}.csv')

In [13]:
site_key = 'Telephone'
input_dir = Path(f'../data/detected_calls/{site_key}')
output_dir = Path(f'../data/generated_welch/{site_key}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
input_file_type = 'top1_inbouts_call_signals'
output_file_type = 'top1_inbouts_welch_signals'

location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
location_calls_sampled['index'] = location_calls_sampled.index

welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
welch_data = pd.DataFrame(welch_signals, columns=np.linspace(0, 96000, welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{site_key}_{output_file_type}.csv')

In [15]:
location_call_signals

array([array([ 1.70962579e-03, -7.26463348e-04, -1.65528840e-03, ...,
              -7.54591859e-03, -8.75610272e-03, -3.09310469e-05])    ,
       array([-8.06055475e-04,  1.64288399e-05,  5.84924665e-04, ...,
              -2.62544355e-03, -2.77484067e-03, -2.71933440e-04])    ,
       array([-0.00154669, -0.00055825,  0.00078432, ...,  0.01029894,
               0.01103709,  0.00024658])                              ,
       ...,
       array([-0.00037176,  0.00041908,  0.00029018, ...,  0.00012692,
               0.0003825 , -0.00019487])                              ,
       array([-3.49778587e-03, -1.18695424e-03,  1.38620008e-03, ...,
              -8.33001250e-04,  8.31596769e-04,  1.84040951e-05])    ,
       array([ 2.07472033e-03,  1.28122338e-03, -6.39980412e-04, ...,
               3.44189870e-03,  2.33933897e-03,  3.74337313e-05])    ],
      dtype=object)

In [8]:
save_site = 'all_locations'
output_dir = Path(f'../data/generated_welch/{save_site}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
output_file_type = 'top1_inbouts_welch_signals'

all_sites_welch_signals = []
for site_key in SITE_NAMES.keys():
    input_dir = Path(f'../data/detected_calls/{site_key}')
    input_file_type = 'top1_inbouts_call_signals'
    if (input_dir / f'2022_{site_key}_{input_file_type}.npy').exists():
        print(site_key)
        location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
        location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
        location_calls_sampled['index'] = location_calls_sampled.index

        welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
        all_sites_welch_signals.append(welch_signals)

all_sites_welch_signals = np.vstack(all_sites_welch_signals)
welch_data = pd.DataFrame(all_sites_welch_signals, columns=np.linspace(0, 96000, all_sites_welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{save_site}_{output_file_type}.csv')

Central
Foliage
Carp
Telephone


In [9]:
save_site = 'only_ponds'
output_dir = Path(f'../data/generated_welch/{save_site}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
output_file_type = 'top1_inbouts_welch_signals'

all_sites_welch_signals = []
for site_key in SITE_NAMES.keys():
    input_dir = Path(f'../data/detected_calls/{site_key}')
    input_file_type = 'top1_inbouts_call_signals'
    if (input_dir / f'2022_{site_key}_{input_file_type}.npy').exists() and 'Pond' in SITE_NAMES[site_key]:
        print(site_key)
        location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
        location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
        location_calls_sampled['index'] = location_calls_sampled.index

        welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
        all_sites_welch_signals.append(welch_signals)

all_sites_welch_signals = np.vstack(all_sites_welch_signals)
welch_data = pd.DataFrame(all_sites_welch_signals, columns=np.linspace(0, 96000, all_sites_welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{save_site}_{output_file_type}.csv')

Central
Carp


In [10]:
save_site = 'only_forests'
output_dir = Path(f'../data/generated_welch/{save_site}')
if not(output_dir.is_dir()):
    output_dir.mkdir(parents=True)
output_file_type = 'top1_inbouts_welch_signals'

all_sites_welch_signals = []
for site_key in SITE_NAMES.keys():
    input_dir = Path(f'../data/detected_calls/{site_key}')
    input_file_type = 'top1_inbouts_call_signals'
    if (input_dir / f'2022_{site_key}_{input_file_type}.npy').exists() and 'Pond' in SITE_NAMES[site_key]:
        print(site_key)
        location_call_signals = np.load(input_dir / f'2022_{site_key}_{input_file_type}.npy', allow_pickle=True)
        location_calls_sampled = pd.read_csv(input_dir / f'2022_{site_key}_{input_file_type}.csv', index_col=0, low_memory=False)
        location_calls_sampled['index'] = location_calls_sampled.index

        welch_signals = compute_features.generate_welchs_for_calls(location_calls_sampled, location_call_signals)
        all_sites_welch_signals.append(welch_signals)

all_sites_welch_signals = np.vstack(all_sites_welch_signals)
welch_data = pd.DataFrame(all_sites_welch_signals, columns=np.linspace(0, 96000, all_sites_welch_signals.shape[1]).astype(int))
welch_data.index.name = 'Call #'
welch_data.columns.name = 'Frequency (kHz)'
welch_data.to_csv(output_dir / f'2022_{save_site}_{output_file_type}.csv')

Central
Carp
