# Autocorrelation analysis for different observables
Sept 12, 2023

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
%matplotlib notebook

In [3]:
import glob,sys, subprocess

In [4]:
import pandas as pd
import gvar as gv

## Autocorrelation formula:
## $$ C(X_i, X_{i+t}) = \langle (X_i - \langle X_i \rangle ) (X_{i+t} - \langle X_{i+t} \rangle )  \rangle$$ 

In [5]:
a1=np.array([np.random.normal(i,0.05,100) for i in range(1,16,4)]).flatten()

# a1=np.random.normal(1,0.05,25)
print(a1)
lgth=len(a1)

[ 1.02878602  0.87293401  0.97363775  1.00407134  0.95913908  1.10165769
  0.97872856  1.00174286  1.00098268  0.99379695  1.03016883  1.02169998
  0.9985518   1.02944767  0.96229783  1.000914    0.99594032  1.06308843
  0.97102047  0.98911373  0.97006762  1.05106407  1.04794259  1.00112002
  0.94835417  0.94606604  0.91833217  1.00908918  0.96917301  0.98919764
  1.01296061  0.99854438  0.99160084  0.99668574  1.03015698  0.97281648
  0.99778107  1.00689484  0.93227508  1.01796132  1.01155086  0.97510447
  0.98901319  0.95545458  1.08721727  0.89609401  0.93525691  0.96393781
  0.94255523  0.99164823  1.02171892  1.00220795  0.9999542   1.01461693
  1.03604806  1.08474571  0.93124711  1.05292517  1.01880342  0.89992633
  0.87389205  0.91560837  0.99044817  1.07251031  1.03199174  1.00292756
  0.96622104  1.07113658  0.984914    0.93357495  0.98196534  0.9607177
  0.99926678  0.99399952  0.89752125  0.97757297  1.0991162   0.95933774
  0.98215761  0.98916076  0.98434171  1.03530776  1.

In [6]:
def f_autocorr(a1):
    ''' 
    Compute the autocorrelation function of a given 1D array
    '''
    
    lgth=len(a1)
    auto_corr=np.ones(lgth-1,dtype=np.float64)
    
    avg=np.average(a1)
    var=np.var(a1)
    
    a2=a1-avg

    for t in range(0,lgth-1):
#         auto_corr[t] = (np.average([(a1[i]*a1[i+t]) for i in range(lgth-t)])-avg**2)/var
        auto_corr[t] = (np.sum([(a2[i]*a2[i+t]) for i in range(lgth-t)]))/(var*lgth)
#         print(t,np.average([(a1[i]*a1[i+t]) for i in range(lgth-t)]),avg**2)
        
    return auto_corr


def f_autocorr_time(auto_corr):
    
    ## Autocorrelation time as 1/e of 0th value
    for count,i in enumerate(auto_corr):
        if i < (1.0/np.e):
#             print("Autocorr",count,i)
            return count

    print("Error: Autocorr doesn't drop to 1/e")
    return np.inf


In [7]:
ans=f_autocorr(a1)
plt.figure()
plt.plot(ans,linestyle='',marker='o')
# plt.yscale('log')
plt.show()

<IPython.core.display.Javascript object>

In [8]:
f_autocorr_time(ans)

85

https://stackoverflow.com/questions/643699/how-can-i-use-numpy-correlate-to-do-autocorrelation

In [9]:

def autocorr2(x,lags):
    '''manualy compute, non partial'''

    mean=np.mean(x)
    var=np.var(x)
    xp=x-mean
    corr=[1. if l==0 else np.sum(xp[l:]*xp[:-l])/len(x)/var for l in lags]

    return np.array(corr)


def autocorr5(x,lags):
    '''numpy.correlate, non partial'''
    mean=x.mean()
    var=np.var(x)
    xp=x-mean
    corr=np.correlate(xp,xp,'full')[len(x)-1:]/var/len(x)

    return corr[:len(lags)]




In [10]:
autocorr2(a1,np.arange(0,22)), autocorr5(a1,np.arange(0,22))

(array([1.        , 0.99262647, 0.98516043, 0.97770315, 0.97022585,
        0.96282853, 0.95546011, 0.94794259, 0.94048562, 0.93297367,
        0.92541525, 0.91808009, 0.91064422, 0.90318998, 0.89573194,
        0.88820735, 0.88074277, 0.87328319, 0.86584399, 0.85835913,
        0.85089698, 0.84339202]),
 array([1.        , 0.99262647, 0.98516043, 0.97770315, 0.97022585,
        0.96282853, 0.95546011, 0.94794259, 0.94048562, 0.93297367,
        0.92541525, 0.91808009, 0.91064422, 0.90318998, 0.89573194,
        0.88820735, 0.88074277, 0.87328319, 0.86584399, 0.85835913,
        0.85089698, 0.84339202]))

In [11]:

def f_autocorr(a1):
    ''' 
    Compute the autocorrelation function of a given 1D array
    '''
    
    lgth=len(a1)
    auto_corr=np.ones(lgth-1,dtype=np.float64)
    
    avg=np.average(a1)
    var=np.var(a1)
    a1=a1-avg

    for t in range(0,lgth-1):
#         auto_corr[t] = (np.average([(a1[i]*a1[i+t]) for i in range(lgth-t)])-avg**2)/var
        auto_corr[t] = (np.sum([(a1[i]*a1[i+t]) for i in range(lgth-t)]))/(var*lgth)
    
    return auto_corr

def autocorr2(x,lags):
    '''manualy compute, non partial'''

    mean=np.mean(x)
    var=np.var(x)
    xp=x-mean
    corr=[1. if l==0 else np.sum(xp[l:]*xp[:-l])/len(x)/var for l in lags]

    return np.array(corr)

autocorr2(a1,np.arange(0,len(a1)-1)),f_autocorr(a1)

(array([ 1.        ,  0.99262647,  0.98516043,  0.97770315,  0.97022585,
         0.96282853,  0.95546011,  0.94794259,  0.94048562,  0.93297367,
         0.92541525,  0.91808009,  0.91064422,  0.90318998,  0.89573194,
         0.88820735,  0.88074277,  0.87328319,  0.86584399,  0.85835913,
         0.85089698,  0.84339202,  0.83582781,  0.82828579,  0.82076029,
         0.81330803,  0.80567405,  0.79827185,  0.79079456,  0.78333413,
         0.77582234,  0.76830778,  0.76081776,  0.75334543,  0.74577713,
         0.73835544,  0.73083962,  0.72342086,  0.71585193,  0.70833908,
         0.70078128,  0.69329086,  0.68574279,  0.67821502,  0.67066492,
         0.66323879,  0.65567196,  0.64817516,  0.64069811,  0.63312776,
         0.62557991,  0.61807175,  0.61061577,  0.60309375,  0.59562257,
         0.58808042,  0.58082582,  0.5732406 ,  0.56573035,  0.55816615,
         0.55060432,  0.54295727,  0.53532343,  0.52784843,  0.52029112,
         0.51282199,  0.50526642,  0.49771453,  0.4

In [12]:
%timeit autocorr2(a1,np.arange(0,len(a1)-1))

%timeit f_autocorr(a1)

5.48 ms ± 238 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
44.6 ms ± 105 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
def f_autocorr(a1):
    ''' 
    Compute the autocorrelation function of a given 1D array
    '''
    
    lgth=len(a1)
    
    auto_corr=np.ones(lgth-1,dtype=np.float64)
    
    avg=np.average(a1)
    var=np.var(a1)
    
    # Deviation from the mean
    a2=a1-avg

    mode=2
    
    if mode==1: # Simple method, slower
        
        for t in range(0,lgth-1):
            auto_corr[t] = (np.sum([(a2[i]*a2[i+t]) for i in range(lgth-t)]))/(var*lgth)
        
    elif mode==2: # Faster method with numpy array
        
        auto_corr=np.array([1. if l==0 else np.mean(a2[l:]*a2[:-l])/(var) for l in range(0,lgth-1)])

    return auto_corr


def f_autocorr_time(a1):
    '''
    Compute autocorrelation array and then compute autocorrelation time as epoch to attain (1/e) th value
    '''
    
    # Compute autocorr array
    auto_corr=f_autocorr(a1)
    
    ## Autocorrelation time as 1/e of 0th value
    for count,i in enumerate(auto_corr):
        if i < (1.0/np.e):
#             print("Autocorr",count,i)
            return count

    print("Error: Autocorr doesn't drop to 1/e")
    return np.inf


In [14]:
a1=np.array([np.random.normal(i,0.05,49) for i in range(1,16,4)]).flatten()
# a1=np.random.normal(1,0.05,25)


In [15]:
ans=f_autocorr(a1)
f_autocorr_time(ans)
# ans

50