In [1]:
%run module.ipynb
# this already imports the following packages
# import numpy as np
# import pandas as pd  
import pysindy as ps
import matplotlib.pyplot as plt

In this notebook we will explore how the uncertainty of the variables changes as we add more and more data.

In [67]:
# load data
r = np.load('./data/lorenz_r.npy')
r_noisy = np.load("./data/lorenz_r_noisy.npy")
t = np.load("./data/lorenz_t.npy")

# r is shaped like (n_points,n_dimensions)
print("Data vector:")
print(r[:5])

# t is time axis
print("\n\nTime vector:")
print(t[:5])

feature_names = ['x','y','z'] # just a label for the features, instead of simply using x1,x2....

# sampling length
iters = 2000
nr_burnin = 500

opt=SBAM(max_iter=iters,burn_in=nr_burnin,verbose=True)

# setting beta hyperparameters for theta
opt.a=1. 
opt.b=10000000000.

# hyperparameter for tau2
opt.s=0.5

# hyperparameter for sigma2
opt.a1=0.01
opt.a2=0.001

dif = ps.FiniteDifference(order=12)

model = ps.SINDy(feature_names=feature_names,optimizer=opt,differentiation_method=dif)

Data vector:
[[1.         1.         1.        ]
 [1.01256821 1.2599461  0.98488969]
 [1.04883096 1.52405449 0.97311235]
 [1.10722704 1.79839899 0.96515735]
 [1.18690156 2.08866556 0.96173704]]


Time vector:
[0.       0.010001 0.020002 0.030003 0.040004]


In [73]:
list = np.logspace(1.7,4).astype(int)


array([   50,    55,    62,    69,    77,    86,    95,   106,   118,
         132,   147,   164,   183,   204,   227,   253,   282,   314,
         350,   390,   435,   484,   540,   601,   670,   747,   832,
         927,  1033,  1151,  1282,  1429,  1592,  1774,  1976,  2202,
        2453,  2733,  3045,  3393,  3780,  4212,  4692,  5228,  5825,
        6490,  7230,  8056,  8975, 10000])

In [74]:

vars = pd.DataFrame()
for N in list:
    opt=SBAM(max_iter=iters,burn_in=nr_burnin,verbose=True)

    # setting beta hyperparameters for theta
    opt.a=1. 
    opt.b=10000000000.

    # hyperparameter for tau2
    opt.s=0.5

    # hyperparameter for sigma2
    opt.a1=0.01
    opt.a2=0.001

    dif = ps.FiniteDifference(order=12)

    model = ps.SINDy(feature_names=feature_names,optimizer=opt,differentiation_method=dif)
    print("Fitting with {} data points...".format(N))
    model.fit(r_noisy[:N,],t=t[:N])
    
    df=model.optimizer.samples[0] # xdot regression
    vars=vars.append(df['beta'][df['z']==1].var(),ignore_index=True)

Fitting with 50 data points...
Sampling for target n# 0/2...
|████████████████████████████████████████| 1999/1999 [100%] in 9.0s (222.55/s)                                          
Sampling for target n# 1/2...
|████████████████████████████████████████| 1999/1999 [100%] in 9.6s (207.27/s)                                          
Sampling for target n# 2/2...
|████████████████████████████████████████| 1999/1999 [100%] in 8.9s (223.61/s)                                          
Fitting with 55 data points...
Sampling for target n# 0/2...
|████████████████████████████████████████| 1999/1999 [100%] in 15.3s (131.05/s)                                         
Sampling for target n# 1/2...
|████████████████████████████████████████| 1999/1999 [100%] in 7.7s (258.01/s)                                          
Sampling for target n# 2/2...
|████████████████████████████████████████| 1999/1999 [100%] in 11.9s (168.24/s)                                         
Fitting with 62 data points...
S

In [81]:
renaming={key:value for (key,value) in zip(vars.columns,model.get_feature_names())}
vars.rename(columns=renaming,inplace=True)

In [82]:
vars

Unnamed: 0,1,x,y,z,x^2,x y,x z,y^2,y z,z^2,N
0,0.1303703,0.025107,0.006506197,0.1360049,0.0001849772,7.95839e-05,0.0002522274,8.339381e-05,1.43961e-05,0.0001220747,50
1,0.05644826,0.001789882,0.0008456946,0.05685033,5.679299e-06,4.259854e-06,5.38246e-05,3.165736e-05,1.56066e-06,4.958859e-05,55
2,0.08705748,0.01386852,0.003790951,0.09401215,2.800675e-05,8.527271e-06,0.0001149822,5.135821e-05,5.30024e-06,8.195683e-05,62
3,0.00500529,0.002253281,0.0003086049,0.003121443,2.582111e-06,1.628283e-06,2.813605e-06,2.538252e-06,2.888928e-07,2.923433e-06,69
4,0.0350054,0.1473469,0.02642656,0.004921246,0.0001731321,1.998205e-05,0.0001053836,1.451234e-05,1.159822e-05,6.392106e-06,77
5,0.003985661,0.02889136,0.004323189,0.001986708,4.301956e-05,1.070257e-05,2.296137e-05,2.993346e-06,9.210911e-07,1.765742e-06,86
6,0.0006748128,0.003418338,0.0005479392,0.0003420574,3.152309e-06,8.648257e-07,1.211677e-06,1.011511e-06,1.018359e-07,4.085108e-07,95
7,0.0007824284,0.001430297,0.0002024189,0.000141412,1.687415e-06,7.967477e-08,7.47576e-07,3.06676e-07,8.087673e-08,1.328939e-07,106
8,0.0006376686,0.002832419,0.0004616361,0.0002414023,3.52215e-06,3.930252e-07,1.378882e-06,4.069086e-07,2.064793e-07,2.345426e-07,118
9,0.0009449072,0.004426721,0.0005979357,0.0004166803,4.589195e-06,7.820625e-08,1.201587e-06,1.003481e-06,2.080611e-07,3.825915e-07,132


In [83]:
%matplotlib tk
vars=vars.fillna(0)
vars['N']=list
vars.plot(x='N',logx=True,logy=True,ylabel='Variance',title='$\dot{x}$ Coefficients')
plt.suptitle("Lorenz system")

Text(0.5, 0.98, 'Lorenz system')