# Data extraction for covariance event analysis

This notebook is used to extract data such as resting EODf, sex, modulation amplitude during synchronous modulations, etc. from the dataset used in event detection.
The goal is to extract:

- Resting EOD $f$ for each interacting individual
- Sex of each interacting individual
- Modulation amplitude (EOD $f_{event}$ - EOD $f_{baseline}$) per individual
- Resting $\Delta$ EOD $f$ for each interacting dyad
- Event $\Delta$ EOD $f$ for each interacting dyad

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gridtools as gt
%matplotlib qt

dataroot = "../output/"
#exclude = ['2016-04-09-22_25']
recs = gt.ListRecordings(dataroot, exclude=[])

## Establish sex threshold

To establish a threshold for sex determination, I compute the kernel desity estimates of all frequency tracks of all recordings in the dataset. The resulting KDE and histogram should have two peaks, one for males and one for females. The lowest point between the two is the sex threshold.

In [2]:
# collect fundamentals for distribution
funds = []
for recording in recs.recordings:

    datapath = recs.dataroot + recording + '/'
    
    grid = gt.GridTracks(datapath, finespec=False, verbose=True)

    # norm by q10
    grid.q10_norm()

    # additionally bandpass filter for recordings without temp
    grid.freq_bandpass(
        rate = 3,
        flow = 0.00004,
        fhigh = 0.4,
        order=2,
        eod_shift=True
        )
    
    # collect fund for kde
    funds.extend(grid.fund_v.tolist())

# kde = gt.utils.kde1d(np.asarray(funds), 0.5)
# 
# fig, ax = plt.subplots(figsize = (12,6))
# ax.hist(funds, density=True, bins = 100)
# ax.plot(kde[0], kde[1])

[93m[1m[ GridTracks.__init__ ][0m No grid metadata found in directory ../output/2016-04-20-18_49/
[92m[1m[ GridTracks.freq_bandpass ][0m Applying bandpass filter ...
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9950552608808084
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9749434874143339
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9978156832004972
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9913657100200254
[93m[1m[ GridTracks.__init__ ][0m No grid metadata found in directory ../output/2016-04-18-19_22/
[92m[1m[ GridTracks.freq_bandpass ][0m Applying bandpass filter ...
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9947061984867224
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9951941546625365
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9985971936999303
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.998042298

After establishing a sex threshold, I iterate over all recordings in the dataset to extract the information for the individuals for which events where detected. Sex threshold is set to approximately 740 Hz for Q10 normalized frequency tracks.

In [3]:
def findlistinlist(list, listoflists):
        index = []
        for i, l in enumerate(listoflists):
            if list in [l]:
                index.append(i)
        return index

# open a new df
df = {
    "recording": [],        # recording date
    "dyad_id": [],          # id of dyad
    "id1": [],              # id1 of dyad
    "id2": [],              # id2 of dyad
    "initiator": [],        # interaction initiator
    "sex_id1": [],          # sex of id1
    "sex_id2": [],          # sex of id2
    "restingf_id1": [],     # resting eodf of id1
    "restingf_id2": [],     # resting eodf of id2
    "maxeventf_id1": [],    # max eodf during event of id1
    "maxeventf_id2": [],    # max eodf during event of id2
    "medianeventf_id1": [],   # median of eodf during event of id1
    "medianeventf_id2": [],   # median of eodf during event of id2
    "diff_event_id1": [], # maxevent_id1 - resting_id1, i.e. modulation amp of id
    "diff_event_id2": [], # maxevent_id2 - resting_id2
    "diff_medianevent": [], # abs(median(event1 - event2))
}

for counter, recording in enumerate(recs.recordings):

    # prepare to load grid recording
    datapath = recs.dataroot + '/' + recording + '/'
    
    # load and normalize grid recording
    grid = gt.GridTracks(datapath, finespec=False, verbose=True)
    grid.q10_norm()
    
    # additionally bandpass filter for recordings without temp
    # and to use restingeodf to calculate amp during modulation
    # grid.freq_bandpass(
    #     rate = 3,
    #     flow = 0.00004,
    #     fhigh = 0.4,
    #     order=2,
    #     eod_shift=True
    #     )
        
    grid.sex_ids()

    # open events dataframe 
    events = pd.read_csv(datapath+'/events.csv')

    # find unique dyads to make dyad ids
    dyads = []
    for idx in events.index:
        dy = sorted([events.id1[idx], events.id2[idx]])
        dyads.append(dy)
    unique_dyads = [list(x) for x in set(tuple(x) for x in dyads)]
    dyad_ids = [str(counter)+str(dy) for dy in range(len(unique_dyads))]

    for idx in events.index:

        dy = sorted([events.id1[idx], events.id2[idx]])
        dyad = gt.Dyad(grid, dy)
        indices = np.arange(len(dyad.times))
        start = gt.utils.find_closest(dyad.times, events.start[idx])
        stop = gt.utils.find_closest(dyad.times, events.stop[idx])
        baseline_start = gt.utils.find_closest(dyad.times, events.start[idx]-1200) # 20 minutes preceeding the event (=600s)

        if dy in unique_dyads:
            index = findlistinlist(dy, unique_dyads)
            df["recording"].append(recording)                   # write recording to output
            df["dyad_id"].append(dyad_ids[index[0]])            # dyad id
            df["id1"].append(int(events.id1[idx]))              # dyad id 1
            df["id2"].append(int(events.id2[idx]))              # dyad id 2
            df["initiator"].append(events.init[idx])            # initiator
            df["sex_id1"].append(grid.sex[grid.ids == events.id1[idx]][0])   # sex of id 1
            df["sex_id2"].append(grid.sex[grid.ids == events.id2[idx]][0])   # sex of id 2
            df["restingf_id1"].append(np.median(dyad.fund_id1[baseline_start:start]))   # baseline before event
            df["restingf_id2"].append(np.median(dyad.fund_id2[baseline_start:start]))   # baseline before event
            df["maxeventf_id1"].append(np.max(dyad.fund_id1[start:stop]))
            df["maxeventf_id2"].append(np.max(dyad.fund_id2[start:stop]))
            df["diff_event_id1"].append(np.max(dyad.fund_id1[start:stop]) - grid.eodf[grid.ids == events.id1[idx]][0])
            df["diff_event_id2"].append(np.max(dyad.fund_id2[start:stop]) - grid.eodf[grid.ids == events.id2[idx]][0])
            #df["modeeventf_id1"].append(gt.utils.kde1d_mode(dyad.fund_id1[start:stop], 0.3, 100))
            #df["modeeventf_id2"].append(gt.utils.kde1d_mode(dyad.fund_id2[start:stop], 0.3, 100))
            df["medianeventf_id1"].append(np.median(dyad.fund_id1[start:stop]))
            df["medianeventf_id2"].append(np.median(dyad.fund_id2[start:stop]))
            df["diff_medianevent"].append(np.abs(np.median(dyad.fund_id1[start:stop] - dyad.fund_id2[start:stop])))

        else:
            print("ERROR, event dyad did not end up in unique dyads!")

[93m[1m[ GridTracks.__init__ ][0m No grid metadata found in directory ../output//2016-04-20-18_49/
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9950552608808084
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9749434874143339
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9978156832004972
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9913657100200254
[93m[1m[ GridTracks.__init__ ][0m No grid metadata found in directory ../output//2016-04-18-19_22/
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9947061984867224
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9951941546625365
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9985971936999303
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9980422988728518
computed AreaUnderCurve (AUC) of KDE using sklearn.metrics.auc: 0.9984378899217253
[93m[1m[ GridTracks.__init__ ][0m No grid meta

In [4]:
# add delta colums to dataframe
df = pd.DataFrame.from_dict(df)

# difference between resting eodfs
df["diff_resting"] = np.abs(df["restingf_id1"]-df["restingf_id2"])

# difference between event eodf maxima
df["diff_maxevent"] = np.abs(df["maxeventf_id1"]-df["maxeventf_id2"])

In [5]:
df.to_csv("../output/eventstats.csv")