## Scatter-analysis of a Stabilization graph
Based on Arie Landau *et al.*, *J. Phys. Chem.* A 2016, 120, 3098−3108

Analytic continuation of a single root using standard Pade approximants and input data from a plateau. 

In [1]:
import numpy as np
#from scipy.optimize import minimize_scalar
#from scipy.optimize import root
#from pandas import Series, DataFrame
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from pprint import pprint
matplotlib.use('Qt5Agg')
%matplotlib qt5
Angs2Bohr=1.8897259886
au2eV=27.211386027
au2cm=219474.63068
import time

In [2]:
#
# put pade.py into the current directory or put its location into the system path
#
import sys
sys.path.append('../../Python_libs')
import pade
import stabtools as st

Landau outlines a ten-step protocol:

### Step 1: Get a real stabilization graph

The scaling parameter $z$ may be a box length $L$  or a scaling factor $\alpha$ of a set of Gaussians.

In [3]:
df = pd.read_csv("DVR_stab_plot.csv")
#df = pd.read_csv("GTO_unc_stab_plot.csv")
#df = pd.read_csv("GTO_DZ_stab_plot.csv")
#df = pd.read_csv("GTO_TZ_stab_plot.csv")

#df=pd.read_csv("/home/thomas/Current_Work/Caroline/Glyoxal/Stab/E_geo0.csv", delim_whitespace=True)
#df=pd.read_csv("/home/thomas/Current_Work/Caroline/Glyoxal/Stab/E_geo-1.csv", delim_whitespace=True)
#df=pd.read_csv("/home/thomas/Current_Work/Caroline/Glyoxal/Stab/Z_geo0_cleaned.csv")
#df=pd.read_csv("/home/thomas/Current_Work/Caroline/Glyoxal/Stab/Z_geo-1_cleaned.csv")
df.head()

Unnamed: 0,L,z,E1,E2,E3,E4,E5,E6
0,1.423025,0.493827,-7.170508,0.159275,0.523245,1.098066,1.858843,2.728974
1,1.399532,0.510545,-7.170508,0.16575,0.545553,1.144416,1.933952,2.814184
2,1.377166,0.527263,-7.170508,0.172307,0.568155,1.19124,2.009268,2.890628
3,1.355838,0.543981,-7.170508,0.178946,0.59105,1.238525,2.08468,2.956664
4,1.335472,0.5607,-7.170508,0.185668,0.614238,1.286256,2.160057,3.011986


In [4]:
z_col=df.columns[1]
zs=df[z_col].values
zmin, zmax = zs[0], zs[-1]
if zmax < zmin:
    zmin, zmax = zmax, zmin
zs=(zs-zmin)/(zmax-zmin)

E_labels=df.columns[2:]
if True:
    plt.cla()
    for E in E_labels:
        plt.plot(zs, df[E].values, '.')
    plt.ylim(0,8)
    plt.show()

### Step 2: Select the low curvature region of a plateu 

Set the following parameters after inspecting the stabilization plot:
* lroot: root with a stabilization plateau
* if the autoselect doesn't work, narrow search by setting zmin, zmax

In [5]:
lroot=3
scut=0.7  #restart sheet if changing gives an exception 

zmin, zmax = -1, -1
E_0=df[E_labels[lroot]].values
d1s, d2s = st.der_and_curvature(zs,E_0)
(jc, j1, j2), (zc, z1, z2) = st.plateau(zs,E_0,srch_range=(zmin,zmax))

#j0, j1, j2: indices of zero and extrema of ys
#x0, x1, x2: precise positions of zero and extrema of d^2y/dx^2

if jc > 0:
    print('Plateau center at z=%f' % (zc))
if j1 > 0 and j2 > 0:
    print('Crossings at z=%f and z=%f' % (z1, z2))
    if j1 > j2:
        j1, j2 = j2, j1
    d2dn, d2up = d2s[j1], d2s[j2]
    ipdn=j1+np.argmin(np.abs(d2s[j1:j2]-scut*d2dn))
    ipup=j1+np.argmin(np.abs(d2s[j1:j2]-scut*d2up))
    alps=zs[ipdn:ipup+1]
    Es=E_0[ipdn:ipup+1]
    npts = len(Es)
    print('N = %d,  max n: %d' % (npts, (npts-1)/2))
    plt.cla()
    plt.plot(zs, E_0, '-')
    plt.plot(alps, Es, 'o')
    plt.ylabel('root '+str(lroot))
    plt.show()
else:
    print('No plateau with adjacent crossings found.')
    print(jc, j1, j2)
    plt.cla()
    plt.plot(zs,d1s,label="1st der")
    plt.plot(zs,d2s,label="2nd der")
    plt.legend()
    plt.ylabel('derivative and curvature')
    plt.show()

Plateau center at z=0.478218
Crossings at z=0.308333 and z=0.666667
N = 30,  max n: 14


### Generate inputs sets and select pade order

In [6]:
#Generate RVP sets

n=4 #pade order [n,n]


N=len(Es) #length of set
M=2*n+1 #minimum pts for [n,n] pade
Einputs=[]
alpinputs=[]
ilist=[]

#landau lean left
for i in range(len(Es)):
    for j in range(i+M, len(Es)+1):
        #print(Es[i:j])
        Einputs.append(Es[i:j])
        alpinputs.append(alps[i:j])
        ilist.append([i,len(Es)-j])


#landau loop
# for l in range(N, M-1, -1):
#     for k in range(N-l+1):
#         Einputs.append(Eset[k:k+l])
#         alpinputs.append(alpset[k:k+l])
        
    
print(len(Einputs),'input sets')

253 input sets


### Generate Pade approximates + Newton search grid

In [7]:
#options
npts = 10
max_step = 10
tol=1e-7
eps=10*tol
rp=1.2 # plus/minus x%
ip=2*zc

ttol=int(abs(np.log10(tol))-1)
Res = np.linspace(alps[0]*(1-rp), alps[-1]*(1+rp), npts)
Ims = np.linspace(-ip, 0, npts)

#Generate pade approximates
Plist=[]
Qlist=[]
for A,E in zip(alpinputs,Einputs):
    P, Q = pade.pade_via_lstsq(n, n, A, E)
    Plist.append(P)
    Qlist.append(Q)
print(len(Plist),'approximates')

start = time.time()

#Newton search grid
Lsolutions=[]
Esolutions=[]
bfl=[]
if len(Plist)!=len(ilist)!=len(Qlist):
    print('Regenerate inputs sets then re-run cell')
for P,Q,I in zip(Plist,Qlist,ilist):     #sometimes ilist changes when re-running
    Econv=[]
    Lconv=[]
    for r in range(npts):
        for i in range(npts):
            z_guess = Res[r] + 1j*Ims[i]
            Ecurr = P(z_guess)/Q(z_guess)
            Lstar=z_guess
            for j in range(max_step):
                delta_L = pade.EpoEpp(Lstar, P, Q)
                Lstar = Lstar - delta_L
                Enew = P(Lstar)/Q(Lstar)
                delta_E = Enew - Ecurr
                Ecurr = Enew
                if abs(delta_L) < tol and delta_E < tol and Ecurr.imag<-eps and Ecurr.real>eps:
                    Lconv.append(Lstar)
                    Econv.append(Ecurr)
                    break
    
    print(Plist.index(P)+1, end=' ')      
    Econv=np.round(Econv,ttol)
    Lconv=np.round(Lconv,ttol)
    sols=np.unique(Econv) #solutions (E)
    spts=np.unique(Lconv) #stationary points
    for s,l in zip(sols,spts):
        Esolutions.append(s)
        Lsolutions.append(l)
        bfl.append(I+[s])

print()
print(np.shape(Esolutions), 'non-zero solutions')
print(round(time.time()-start,2),'seconds to complete')

253 approximates
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 
(271,) non-zero solutions
20.21 seconds to complete


### Results
$E_{r}$ vs $E_{i}$

In [8]:
# Results Er vs Ei
rlist=[]
ilist=[]
for i in Esolutions:
    rlist.append(i.real)
    ilist.append(i.imag)

plt.cla()
plt.plot(rlist, ilist,'.')
plt.plot(3.17296,-0.160848,'r.',markersize=10)
#plt.plot(rlist[0],ilist[0],'g.',)#markersize=10)
plt.xlabel('$E_{r}$')
plt.ylabel('$E_{i}$')
#plt.savefig('n-5_ngrid-81.svg')
plt.show()

In [9]:
df=pd.DataFrame(bfl)
df.columns=['left','right','Eres']
df['Er']=df['Eres'].to_numpy().real
df['Ei']=df['Eres'].to_numpy().imag
df.describe()

Unnamed: 0,left,right,Eres,Er,Ei
count,271.0,271.0,271.00000+0.00000j,271.0,271.0
mean,7.760148,6.612546,3.181762-0.151519j,3.181762,-0.151519
std,5.87443,5.334138,0.062139+0.000000j,0.057875,0.022622
min,0.0,0.0,3.124345-0.171712j,3.124345,-0.384832
25%,3.0,2.0,3.169501-0.146010j,3.169501,-0.148444
50%,7.0,6.0,3.175739-0.149645j,3.175739,-0.147356
75%,12.0,10.0,3.181017-0.147715j,3.181017,-0.145755
max,21.0,21.0,3.712441-0.384832j,3.712441,-0.125651


Eliminate outliers

In [10]:
mean_Er=df.describe()['Er']['mean']
mean_Ei=df.describe()['Ei']['mean']
std_Er =df.describe()['Er']['std']
std_Ei =df.describe()['Er']['std']
s=3.0
pattern=( (abs(df['Er']-mean_Er) < s*std_Er) & (abs(df['Ei']-mean_Ei) < s*std_Ei) )
df2=df[pattern]
df2.plot.scatter(x='Er', y='Ei')
plt.plot(3.17296,-0.160848,'r.',markersize=10)
df2.describe()

Unnamed: 0,left,right,Eres,Er,Ei
count,267.0,267.0,267.00000+0.00000j,267.0,267.0
mean,7.599251,6.696629,3.174805-0.149175j,3.174805,-0.149175
std,5.764201,5.327058,0.011939+0.000000j,0.007813,0.009028
min,0.0,0.0,3.124345-0.171712j,3.124345,-0.203208
25%,3.0,2.0,3.169487-0.145795j,3.169487,-0.148401
50%,7.0,6.0,3.175604-0.155230j,3.175604,-0.147273
75%,12.0,10.0,3.180945-0.147803j,3.180945,-0.145691
max,21.0,21.0,3.197358-0.156416j,3.197358,-0.125651


In [14]:
df2.to_csv('pl_scatter_DVR.csv', index=False)
#df2.to_csv?

In [19]:
#
#  long vs short datasets
#
xs=df2.Er.values
ys=df2.Ei.values
j_left=df2.left.values*1.0
j_right=df2.right.values*1.0
sum_pts=j_left + j_right
plt.cla()
plt.scatter(xs, ys, marker='.', c=sum_pts, cmap='viridis')
plt.colorbar()
plt.tight_layout()
plt.show()

In [20]:
#
#  symmetric vs unsymmetric datasets
#
diff_pts=abs(j_left - j_right)
plt.cla()
plt.scatter(xs, ys, marker='.', c=diff_pts, cmap='viridis')
plt.colorbar()
plt.show()

In [22]:
# both

fig, axs = plt.subplots(1, 2, sharex=True, sharey=True)

fig.set_figwidth(6.4)
fig.set_figheight(3.2)

#axs[0].set_xticks([3.15, 3.18])

axs[0].scatter(xs, ys, marker='.', c=sum_pts, cmap='viridis')
axs[1].scatter(xs, ys, marker='.', c=diff_pts, cmap='viridis')
axs[0].plot(3.17296,-0.160848,'k+',markersize=10)
axs[1].plot(3.17296,-0.160848,'k+',markersize=10)
axs[0].set_xlabel("$E_r$ [eV]", fontsize=12)
axs[1].set_xlabel("$E_r$ [eV]", fontsize=12)
axs[0].set_ylabel("$E_i$ [eV]", fontsize=12)

plt.tight_layout()
plt.show()