# Get MF-DFA on Surrogate Data
#### Date: 9/07/2022, JX
> Make IAAFT and get surrogate data on /mnt/cube/j8xing/starling_rhythm_mfdfa/data/starling_zenodo_2019_amp_envs_MF.pickle

In [1]:
from starling_rhythm.utils.paths import PROCESSED_DIR

## Import dataframe

In [2]:
import pandas as pd

In [3]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_rolling.pickle'
ae_df = pd.read_pickle(SAVE_PATH)

In [4]:
ae_df.head()

Unnamed: 0,file,ae_windows,wf_windows,ae_segs,wf_segs,MF_range,h_expos
0,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,"[0, 200, 400, 600, 800, 1000, 1200, 1400, 1600...","[0, 48000, 96000, 144000, 192000, 240000, 2880...","[[2.4892911824281327e-05, 0.002640440361574292...","[[-5, -5, -5, -5, -3, -5, -5, -4, -6, -4, -2, ...","[0.8730673405528055, 0.8706626699489289, 0.824...","[[1.4657739775265182, 1.1010962192283225, 0.91..."
1,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,"[0, 200, 400, 600, 800, 1000, 1200, 1400, 1600...","[0, 48000, 96000, 144000, 192000, 240000, 2880...","[[5.805372893519234e-06, 0.0016227918677031994...","[[3, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, -1, -1, ...","[0.9664169621780289, 0.9809983239740178, 0.921...","[[1.5238986061634554, 1.0951816274909019, 0.89..."
2,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,"[0, 200, 400, 600, 800, 1000, 1200, 1400, 1600...","[0, 48000, 96000, 144000, 192000, 240000, 2880...","[[9.765496361069381e-05, 0.007925855927169323,...","[[-11, -11, -11, -11, -12, -11, -11, -11, -10,...","[0.8120662801276709, 0.7727702312106444, 0.668...","[[1.4775920884087206, 1.2183430534260593, 1.04..."
3,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,"[0, 200, 400, 600, 800, 1000, 1200, 1400, 1600...","[0, 48000, 96000, 144000, 192000, 240000, 2880...","[[5.769828931079246e-05, 0.015915358439087868,...","[[-2, -2, -1, -2, -2, -2, -3, -3, -3, -3, -3, ...","[0.8538805324313644, 0.8593869372675185, 0.891...","[[1.6557807113490535, 1.2840303064511298, 1.09..."
4,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,"[0, 200, 400, 600, 800, 1000, 1200, 1400, 1600...","[0, 48000, 96000, 144000, 192000, 240000, 2880...","[[5.337237780622672e-06, 0.0012986172223463655...","[[6, 5, 5, 5, 3, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3,...","[0.5083580005636952, 0.4765988787740265, 0.458...","[[1.3349906028500809, 1.1858083499345686, 1.08..."


In [5]:
ae_df = ae_df[:25] ##look at only 25

## Generate IAAFT Surrogates
> 1. Store the amplitude spectrum (list of amplitudes corresponding to each frequency of a FT)
2. Randomized the series (build randomized phase spectrum)
3. Replace the amplitude spectrum in the randomized series with stored amplitude spectrum using inverse FT
4. Replace the rank-order value of iFT with ranked-ordered matches in the original series
5. Reiterate steps 2-4 until difference between empirical amplitude spectra and surrogate is less than 5 percent. 
## Retrieve Sampling Distribution of MF_range for each amplitude envelope
> Use surrogate count of 32 for $\alpha$ = 0.05, effect size = 0.5, power = 0.5 (See Kelty-Stephen et al. 2022)

In [6]:
import starling_rhythm.iaaft as iaaft
from starling_rhythm.mfdfa import hurst_expo
from joblib import Parallel, delayed
from timeit import timeit
from tqdm.autonotebook import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def SDoMF_iaaft(amp_env, ns = 32):
    '''
    For each amplitude envelope, find some number of its surrogates, and find MF_range for all surrogates
    '''
    
    ## calculate its surrogates
    surrogates = iaaft.surrogates(amp_env, ns = ns, maxiter = 10000, verbose = False)
    
    ## container for SDoMF_iaaft
    SDoMF = []
    
    ## find all the Hurst exponents of all the surrogates
    for surrogate in surrogates:
        ## Output Hs for each surrogate
        h_expos = hurst_expo(surrogate)
        SDoMF.append(max(h_expos) - min(h_expos))
            
    return SDoMF

In [8]:
n_jobs = 36 ## set up joblib parameters
verbose = 0

In [9]:
import psutil
psutil.Process().nice(19)

In [10]:
total_mf_ranges = []

for ae_seg in ae_df.ae_segs.values:
    with Parallel(n_jobs = n_jobs, verbose = verbose) as parallel:
        mf_ranges = parallel(
            delayed(SDoMF_iaaft)(
                amp_env
            )
            for amp_env in tqdm(ae_seg, desc = 'Running Surrogate Data')
        )
        
    total_mf_ranges.append(mf_ranges)
    
ae_df['mf_iaaft_range'] = total_mf_ranges

Running Surrogate Data: 100%|██████████| 60/60 [00:03<00:00, 17.45it/s]
Running Surrogate Data: 100%|██████████| 55/55 [00:00<00:00, 2313.81it/s]
Running Surrogate Data: 100%|██████████| 55/55 [00:00<00:00, 3038.79it/s]
Running Surrogate Data: 100%|██████████| 53/53 [00:00<00:00, 2045.10it/s]
Running Surrogate Data: 100%|██████████| 53/53 [00:00<00:00, 2999.17it/s]
Running Surrogate Data: 100%|██████████| 53/53 [00:00<00:00, 2975.36it/s]
Running Surrogate Data: 100%|██████████| 53/53 [00:00<00:00, 3327.17it/s]
Running Surrogate Data: 100%|██████████| 53/53 [00:00<00:00, 3035.91it/s]
Running Surrogate Data: 100%|██████████| 52/52 [00:00<00:00, 3165.83it/s]
Running Surrogate Data: 100%|██████████| 52/52 [00:00<00:00, 2194.00it/s]
Running Surrogate Data: 100%|██████████| 52/52 [00:00<00:00, 7191.97it/s]
Running Surrogate Data: 100%|██████████| 51/51 [00:00<00:00, 3341.97it/s]
Running Surrogate Data: 100%|██████████| 51/51 [00:00<00:00, 2387.68it/s]
Running Surrogate Data: 100%|██████████|

## Pickle Dataframe

In [11]:
from starling_rhythm.utils.paths import PROCESSED_DIR, ensure_dir

In [12]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_rolling_IAAFT.pickle'
ensure_dir(SAVE_PATH)

In [13]:
ae_df.to_pickle(SAVE_PATH)