# Get MF-DFA on Surrogate Data
#### Date: 9/07/2022, JX
> Make IAAFT and get surrogate data on /mnt/cube/j8xing/starling_rhythm_mfdfa/data/starling_zenodo_2019_amp_envs_MF.pickle

In [1]:
from starling_rhythm.utils.paths import PROCESSED_DIR

## Import dataframe

In [2]:
import pandas as pd

In [3]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_amp_envs200sr_MF_spl.pickle'
ae_df = pd.read_pickle(SAVE_PATH)

## Generate IAAFT Surrogates
> 1. Store the amplitude spectrum (list of amplitudes corresponding to each frequency of a FT)
2. Randomized the series (build randomized phase spectrum)
3. Replace the amplitude spectrum in the randomized series with stored amplitude spectrum using inverse FT
4. Replace the rank-order value of iFT with ranked-ordered matches in the original series
5. Reiterate steps 2-4 until difference between empirical amplitude spectra and surrogate is less than 5 percent. 
## Retrieve Sampling Distribution of MF_range for each amplitude envelope
> Use surrogate count of 32 for $\alpha$ = 0.05, effect size = 0.5, power = 0.5 (See Kelty-Stephen et al. 2022)

In [4]:
import starling_rhythm.iaaft as iaaft
from starling_rhythm.mfdfa import hurst_expo
from joblib import Parallel, delayed
from timeit import timeit
from tqdm.autonotebook import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def SDoMF_iaaft(amp_env, ns = 32):
    '''
    For each amplitude envelope, find some number of its surrogates, and find MF_range for all surrogates
    '''
    
    ## calculate its surrogates
    surrogates = iaaft.surrogates(amp_env, ns = ns, maxiter = 10000, verbose = False)
    
    ## container for SDoMF_iaaft
    SDoMF = []
    
    ## find all the Hurst exponents of all the surrogates
    for surrogate in surrogates:
        ## Output Hs for each surrogate
        h_expos = hurst_expo(surrogate)
        SDoMF.append(max(h_expos) - min(h_expos))
            
    return SDoMF

In [6]:
n_jobs = 47 ## set up joblib parameters
verbose = 0

In [7]:
import psutil
psutil.Process().nice(19)

In [8]:
with Parallel(n_jobs = n_jobs, verbose = verbose) as parallel:
    mf_ranges_ae = parallel(
        delayed(SDoMF_iaaft)(
            amp_env
        )
        for amp_env in tqdm(ae_df.amp_env.values, desc = 'Running Surrogate Data on Amp')
    )
    
with Parallel(n_jobs = n_jobs, verbose = verbose) as parallel:
    mf_ranges_spl = parallel(
        delayed(SDoMF_iaaft)(
            spl_env
        )
        for spl_env in tqdm(ae_df.spl_env.values, desc = 'Running Surrogate Data on SPL')
    )

Running Surrogate Data on Amp: 100%|██████████| 1041/1041 [1:18:26<00:00,  4.52s/it]
Running Surrogate Data on SPL: 100%|██████████| 1041/1041 [13:53<00:00,  1.25it/s]


In [10]:
ae_df['mf_iaaft_range_ae'] = mf_ranges_ae
ae_df['mf_iaaft_range_spl'] = mf_ranges_spl

In [14]:
ae_df

Unnamed: 0,start_ms,end_ms,start_sample,end_sample,p_step,rms_p,peak_p,bout_check,file,len_ms,...,confusing,is_call,amp_env,spl_env,h_expo_ae,MF_range_ae,h_expo_spl,MF_range_spl,mf_iaaft_range_ae,mf_iaaft_range_spl
0,508500,566065,24408000,27171120,"[28.88509981238539, 65.14898407709249, 103.180...",9.609107,143.211264,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,57565,...,False,False,"[2.4892911824281327e-05, 0.002640440361574292,...","[0.012887999192663964, 1.6298831714527595, 3.0...","[1.210268876562112, 1.0847868581968103, 0.9997...",0.417653,"[1.0488658170321583, 1.0192658050285757, 0.997...",0.137107,"[0.32529287768164084, 0.28577091554089273, 0.3...","[0.08512163665434325, 0.08596406153368308, 0.1..."
1,641180,694125,30776640,33318000,"[48.20694858466058, 83.76981436987464, 108.852...",5.388905,156.745806,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,52945,...,False,False,"[5.805372893519234e-06, 0.0016227918677031994,...","[0.00406788556508693, 1.1053593978847633, 2.39...","[1.206747469037927, 1.0668692741274823, 0.9747...",0.461689,"[1.0845706146074268, 1.0475678318350459, 1.022...",0.152306,"[0.3052440883392816, 0.377364154256425, 0.3982...","[0.09521406551035116, 0.09391900718340407, 0.0..."
2,868940,921030,41709120,44209440,"[2.3001409701376363, 15.027985352733767, 30.17...",8.135380,156.958417,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,52090,...,False,False,"[9.765496361069381e-05, 0.007925855927169323, ...","[0.025815673495165517, 3.3906952118208697, 5.3...","[1.2857270505789482, 1.1416211034999122, 1.035...",0.537401,"[1.054714273517234, 1.0216348378749254, 0.9976...",0.155954,"[0.4549324956708789, 0.4083176267976, 0.495630...","[0.1091284283096905, 0.10212253759305578, 0.09..."
3,1043035,1093720,50065680,52498560,"[2.9249842953419494, 3.1541257656248125, 14.16...",8.135380,144.054406,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,50685,...,False,False,"[5.769828931079246e-05, 0.015915358439087868, ...","[0.020930336806865055, 5.403901915623869, 11.0...","[1.317296971969834, 1.1366829235572742, 1.0264...",0.522708,"[1.1249544237114795, 1.0707444454679387, 1.032...",0.227368,"[0.39738275084129293, 0.38212592723227556, 0.3...","[0.11430570369910509, 0.15705306054070478, 0.1..."
4,1423905,1474480,68347440,70775040,"[3.937345912388517, 3.469701876813218, 2.86553...",4.015413,140.816750,True,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,50575,...,False,False,"[5.337237780622672e-06, 0.0012986172223463655,...","[0.00395726763677555, 0.9592529687682605, 1.81...","[1.1679092266897195, 1.0605409985158545, 0.984...",0.388723,"[0.990100912082668, 0.9594052503013833, 0.9354...",0.152659,"[0.3080191641039992, 0.2847893575065573, 0.273...","[0.09297751973838841, 0.09634551123919322, 0.0..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,1591530,1602065,76393440,76899120,"[4.643018462812651, 2.923919620954338, 13.5363...",2.043606,114.653102,False,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,10535,...,False,False,"[3.023368481080979e-05, 0.0012741615064442158,...","[0.017286074782681404, 1.0340200170418643, 2.0...","[1.2917844908328622, 1.1501793367891089, 1.021...",0.659497,"[1.1744464301571218, 1.1276826778296476, 1.096...",0.184049,"[0.6184457477429754, 0.5857270827153475, 0.557...","[0.1397161247886214, 0.14223847345304574, 0.14..."
123,1605580,1610770,77067840,77316960,"[33.25759392172391, 29.86369699500519, 6.09811...",2.043606,238.422834,False,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,5190,...,False,False,"[7.692791405133903e-05, 0.018301259726285934, ...","[0.019650267776198868, 4.695249442450509, 9.37...","[1.7012421954121975, 1.3053615144730153, 1.059...",1.050222,"[1.3893312733498826, 1.3055277312161337, 1.250...",0.308968,"[0.7864249106017592, 0.8868359581920928, 0.740...","[0.2179360092495497, 0.28271072654484075, 0.33..."
136,411070,415280,19731360,19933440,"[5.175037749402548, 70.80952142186685, 57.2026...",2.616865,159.078826,False,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,4210,...,False,False,"[1.1144771633553319e-05, 0.002191337989643216,...","[0.007439472871148186, 1.5052592241305685, 4.3...","[1.5564953817880078, 1.2752443607114707, 1.055...",0.935401,"[1.3317134342289572, 1.2596379996286222, 1.213...",0.264202,"[0.6632907947795349, 0.624795655885689, 0.7078...","[0.26168148423405935, 0.21135369598292253, 0.2..."
145,1620240,1623675,77771520,77936400,"[26.356576704532092, 27.208591690945624, 8.918...",2.125249,46.728800,False,/mnt/sphere/speech_bci/raw_data/s_b1555_22/202...,3435,...,False,False,"[7.4280251283198595e-06, 0.02198933996260166, ...","[0.008292225865660021, 9.129987313458093, 18.4...","[1.3447647895477572, 1.1398819811036296, 1.015...",0.629926,"[1.2788443178769036, 1.1866788098982235, 1.129...",0.336420,"[0.42443250851566927, 0.3992344470407846, 0.43...","[0.20415324008782743, 0.2406048846710387, 0.26..."


## Pickle Dataframe

In [11]:
from starling_rhythm.utils.paths import PROCESSED_DIR, ensure_dir

In [12]:
SAVE_PATH = PROCESSED_DIR / 'starling_b1555_amp_envs200sr_MF_IAAFT_spl.pickle'
ensure_dir(SAVE_PATH)

In [13]:
ae_df.to_pickle(SAVE_PATH)