# Get MF-DFA on Surrogate Data
#### Date: 9/07/2022, JX
> Make IAAFT and get surrogate data on /mnt/cube/j8xing/starling_rhythm_mfdfa/data/starling_zenodo_2019_amp_envs_MF.pickle

In [1]:
from starling_rhythm.utils.paths import PROCESSED_DIR

## Import dataframe

In [2]:
import pandas as pd

In [3]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_amp_envs200sr_MF.pickle'
ae_df = pd.read_pickle(SAVE_PATH)

## Generate IAAFT Surrogates
> 1. Store the amplitude spectrum (list of amplitudes corresponding to each frequency of a FT)
2. Randomized the series (build randomized phase spectrum)
3. Replace the amplitude spectrum in the randomized series with stored amplitude spectrum using inverse FT
4. Replace the rank-order value of iFT with ranked-ordered matches in the original series
5. Reiterate steps 2-4 until difference between empirical amplitude spectra and surrogate is less than 5 percent. 
## Retrieve Sampling Distribution of MF_range for each amplitude envelope
> Use surrogate count of 32 for $\alpha$ = 0.05, effect size = 0.5, power = 0.5 (See Kelty-Stephen et al. 2022)

In [4]:
import starling_rhythm.iaaft as iaaft
from starling_rhythm.mfdfa import hurst_expo
from joblib import Parallel, delayed
from timeit import timeit
from tqdm.autonotebook import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def SDoMF_iaaft(amp_env, ns = 32):
    '''
    For each amplitude envelope, find some number of its surrogates, and find MF_range for all surrogates
    '''
    
    ## calculate its surrogates
    surrogates = iaaft.surrogates(amp_env, ns = ns, maxiter = 10000, verbose = False)
    
    ## container for SDoMF_iaaft
    SDoMF = []
    
    ## find all the Hurst exponents of all the surrogates
    for surrogate in surrogates:
        ## Output Hs for each surrogate
        h_expos = hurst_expo(surrogate)
        SDoMF.append(max(h_expos) - min(h_expos))
            
    return SDoMF

In [6]:
n_jobs = 36 ## set up joblib parameters
verbose = 0

In [7]:
import psutil
psutil.Process().nice(19)

In [8]:
with Parallel(n_jobs = n_jobs, verbose = verbose) as parallel:
    mf_ranges = parallel(
        delayed(SDoMF_iaaft)(
            amp_env
        )
        for amp_env in tqdm(ae_df.amp_env.values, desc = 'Running Surrogate Data')
    )

Running Surrogate Data: 100%|██████████| 1041/1041 [3:09:02<00:00, 10.90s/it] 


In [9]:
mf_ranges

[[0.328925150116525,
  0.3176995526225117,
  0.31709367621135065,
  0.3276802033329357,
  0.3326296851556422,
  0.30082287120906437,
  0.3287132461388873,
  0.30783541545729043,
  0.2940195952415625,
  0.30584311811667486,
  0.3418690412189873,
  0.29080474611850793,
  0.3041275138921903,
  0.3158600588883491,
  0.276438499951157,
  0.28751627321166107,
  0.3643291497788963,
  0.2893012970016081,
  0.2997971513773213,
  0.3214552766292077,
  0.28189801526047165,
  0.2870632234197701,
  0.31647846672394064,
  0.27502312162566966,
  0.2687895730060871,
  0.32645268846753306,
  0.3084356341178659,
  0.31574717390993934,
  0.3203006910039533,
  0.3081892048414564,
  0.31309502024614,
  0.2818137801407491],
 [0.33788529506444487,
  0.39047859579810573,
  0.3179524517052278,
  0.3221691498053615,
  0.3395396203437179,
  0.3841570167023901,
  0.3522637921742481,
  0.3088946319762802,
  0.3294495627773628,
  0.3654736384519144,
  0.37989929920137455,
  0.36803369364426,
  0.371649725081754,
  

In [10]:
ae_df['mf_iaaft_range'] = mf_ranges

## Pickle Dataframe

In [11]:
from starling_rhythm.utils.paths import PROCESSED_DIR, ensure_dir

In [12]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_amp_envs200sr_MF_IAAFT.pickle'
ensure_dir(SAVE_PATH)

In [13]:
ae_df.to_pickle(SAVE_PATH)