# Get MF-DFA on Surrogate Data
#### Date: 9/07/2022, JX
> Make IAAFT and get surrogate data on /mnt/cube/j8xing/starling_rhythm_mfdfa/data/starling_zenodo_2019_amp_envs_MF.pickle

In [1]:
from starling_rhythm.utils.paths import PROCESSED_DIR

## Import dataframe

In [2]:
import pandas as pd

In [3]:
SAVE_PATH = PROCESSED_DIR / 'starling_zenodo_2019_amp_envs_MF200.pickle'
ae_df = pd.read_pickle(SAVE_PATH)

## Generate IAAFT Surrogates
> 1. Store the amplitude spectrum (list of amplitudes corresponding to each frequency of a FT)
2. Randomized the series (build randomized phase spectrum)
3. Replace the amplitude spectrum in the randomized series with stored amplitude spectrum using inverse FT
4. Replace the rank-order value of iFT with ranked-ordered matches in the original series
5. Reiterate steps 2-4 until difference between empirical amplitude spectra and surrogate is less than 5 percent. 
## Retrieve Sampling Distribution of MF_range for each amplitude envelope
> Use surrogate count of 32 for $\alpha$ = 0.05, effect size = 0.5, power = 0.5 (See Kelty-Stephen et al. 2022)

In [4]:
import starling_rhythm.iaaft as iaaft
from starling_rhythm.mfdfa import hurst_expo
from joblib import Parallel, delayed
from timeit import timeit
from tqdm.autonotebook import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def SDoMF_iaaft(amp_env, ns = 32):
    '''
    For each amplitude envelope, find some number of its surrogates, and find MF_range for all surrogates
    '''
    
    ## calculate its surrogates
    surrogates = iaaft.surrogates(amp_env, ns = ns, maxiter = 10000, verbose = False)
    
    ## container for SDoMF_iaaft
    SDoMF = []
    
    ## find all the Hurst exponents of all the surrogates
    for surrogate in surrogates:
        ## Output Hs for each surrogate
        h_expos = hurst_expo(surrogate)
        SDoMF.append(max(h_expos) - min(h_expos))
            
    return SDoMF

In [6]:
n_jobs = 36 ## set up joblib parameters
verbose = 0

In [7]:
import psutil
psutil.Process().nice(19)

In [None]:
with Parallel(n_jobs = n_jobs, verbose = verbose) as parallel:
    mf_ranges = parallel(
        delayed(SDoMF_iaaft)(
            amp_env
        )
        for amp_env in tqdm(ae_df.amp_env.values, desc = 'Running Surrogate Data')
    )

Running Surrogate Data:  34%|███▍      | 3600/10499 [7:33:56<21:58:53, 11.47s/it]

In [None]:
mf_ranges

In [None]:
ae_df['mf_iaaft_range'] = mf_ranges

## Pickle Dataframe

In [None]:
from starling_rhythm.utils.paths import PROCESSED_DIR, ensure_dir

In [None]:
SAVE_PATH = PROCESSED_DIR / 'starling_zenodo_2019_amp_envs_MF_IAAFT200_FULL.pickle'
ensure_dir(SAVE_PATH)

In [None]:
ae_df.to_pickle(SAVE_PATH)