Skip to content

Commit

Permalink
add seed option to lc sim functions
Browse files Browse the repository at this point in the history
  • Loading branch information
ywx649999311 committed Dec 12, 2023
1 parent 3646947 commit e7e3b8c
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 17 deletions.
68 changes: 58 additions & 10 deletions src/eztao/ts/carma_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,27 @@
__all__ = ["gpSimFull", "gpSimRand", "gpSimByTime", "addNoise", "pred_lc"]


def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True):
def gp_sample(gp, size=None, seed=None):
"""Sample Celerite GP with a fixed seed"""

if seed is not None:
rng = np.random.default_rng(seed=seed)
else:
rng = np.random.default_rng()

gp._recompute()
if size is None:
n = rng.standard_normal(len(gp._t))
else:
n = rng.standard_normal((len(gp._t), size))

n = gp.solver.dot_L(n)
if size is None:
return gp.mean.get_value(gp._t) + n[:, 0]
return gp.mean.get_value(gp._t)[None, :] + n.T


def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True, lc_seed=None):
"""
Simulate CARMA time series using uniform sampling.
Expand All @@ -25,6 +45,7 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True):
nLC (int, optional): Number of time series to simulate. Defaults to 1.
log_flux (bool): Whether the flux/y values are in astronomical magnitude.
This argument affects how errors are assigned. Defaults to True.
lc_seed (int): Random seed for time series simulation. Defaults to None.
Raises:
RuntimeError: If the input CARMA term/model is not stable, thus cannot be
Expand All @@ -44,9 +65,14 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True):
"The covariance matrix of the provided CARMA term is not positive definite!"
)

if lc_seed is not None:
rng = np.random.default_rng(seed=lc_seed)
else:
rng = np.random.default_rng()

t = np.linspace(0, duration, N)
noise = carmaTerm.get_rms_amp() / SNR
yerr = np.random.lognormal(0, 0.25, N) * noise
yerr = rng.lognormal(0, 0.25, N) * noise
yerr = yerr[np.argsort(np.abs(yerr))] # small->large

# init GP and solve matrix
Expand All @@ -55,7 +81,7 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True):

# simulate, assign yerr based on y
t = np.repeat(t[None, :], nLC, axis=0)
y = gp_sim.sample(size=nLC)
y = gp_sample(gp_sim, size=nLC, seed=lc_seed)

# format yerr making it heteroscedastic
yerr = np.repeat(yerr[None, :], nLC, axis=0)
Expand All @@ -77,7 +103,16 @@ def gpSimFull(carmaTerm, SNR, duration, N, nLC=1, log_flux=True):


def gpSimRand(
carmaTerm, SNR, duration, N, nLC=1, log_flux=True, season=True, full_N=10_000
carmaTerm,
SNR,
duration,
N,
nLC=1,
log_flux=True,
season=True,
full_N=10_000,
lc_seed=None,
downsample_seed=None,
):
"""
Simulate CARMA time series randomly downsampled from a much denser full time series.
Expand All @@ -95,12 +130,17 @@ def gpSimRand(
to True.
full_N (int, optional): The number of data points in the full time series
(before downsampling). Defaults to 10_000.
lc_seed (int): Random seed for full time series simulation. Defaults to None.
downsample_seed (int): Random seed for downsampling the simulated full time
series. Defaults to None.
Returns:
(array(float), array(float), array(float)): Time stamps (default in day), y
values and measurement errors of the simulated time series.
"""
t, y, yerr = gpSimFull(carmaTerm, SNR, duration, full_N, nLC=nLC, log_flux=log_flux)
t, y, yerr = gpSimFull(
carmaTerm, SNR, duration, full_N, nLC=nLC, log_flux=log_flux, lc_seed=lc_seed
)
t = np.atleast_2d(t)
y = np.atleast_2d(y)
yerr = np.atleast_2d(yerr)
Expand All @@ -116,7 +156,7 @@ def gpSimRand(
mask1 = add_season(t[i])
else:
mask1 = np.ones(t[i].shape[0], dtype=bool)
mask2 = downsample_byN(t[i, mask1], N)
mask2 = downsample_byN(t[i, mask1], N, seed=downsample_seed)
tOut[i, :] = t[i, mask1][mask2]
yOut[i, :] = y[i, mask1][mask2]
yerrOut[i, :] = yerr[i, mask1][mask2]
Expand All @@ -127,7 +167,7 @@ def gpSimRand(
return tOut, yOut, yerrOut


def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True):
def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True, lc_seed=None):
"""
Simulate CARMA time series at desired time stamps.
Expand All @@ -147,6 +187,7 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True):
nLC (int, optional): Number of time series to simulate. Defaults to 1.
log_flux (bool): Whether the flux/y values are in astronomical magnitude.
This argument affects how errors are assigned. Defaults to True.
lc_seed (int): Random seed for time series simulation. Defaults to None.
Returns:
(array(float), array(float), array(float)): Time stamps (default in day), y
Expand All @@ -158,7 +199,7 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True):

# simulate full LC
tFull, yFull, yerrFull = gpSimFull(
carmaTerm, SNR, duration, N=N, nLC=nLC, log_flux=log_flux
carmaTerm, SNR, duration, N=N, nLC=nLC, log_flux=log_flux, lc_seed=lc_seed
)
tFull = np.atleast_2d(tFull)
yFull = np.atleast_2d(yFull)
Expand All @@ -177,20 +218,27 @@ def gpSimByTime(carmaTerm, SNR, t, factor=10, nLC=1, log_flux=True):
return tOut, yOut, yerrOut


def addNoise(y, yerr):
def addNoise(y, yerr, seed=None):
"""
Add (gaussian) noise to the input simulated time series given the measurement uncertainties.
Args:
y (array(float)): The 'clean' time series.
yerr (array(float)): The measurement uncertainties for the input
time series.
seed (int): Random seed for simulating noise. Defaults to None.
Returns:
array(float): A new time series with simulated (gaussian ) noise added
on top.
"""
vec_norm = np.vectorize(np.random.normal, signature="(n),(n)->(n)")

if seed is not None:
rng = np.random.default_rng(seed=seed)
else:
rng = np.random.default_rng()

vec_norm = np.vectorize(rng.normal, signature="(n),(n)->(n)")
noise = vec_norm(np.zeros_like(y), yerr)

return y + noise
Expand Down
11 changes: 9 additions & 2 deletions src/eztao/ts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,28 @@
__all__ = ["downsample_byN", "add_season", "downsample_byTime", "median_clip"]


def downsample_byN(t, nObs):
def downsample_byN(t, nObs, seed):
"""
Randomly choose N data points from a given time series.
Args:
t (array(float)): Time stamps of the original time series.
N (int): The number of entries in the final time series.
seed (int): Random seed for downsampling. Defaults to None.
Returns:
A 1d array booleans indicating which data points to keep.
"""

if seed is not None:
rng = np.random.default_rng(seed=seed)
else:
rng = np.random.default_rng()

# random choose index
idx = np.arange(len(t))
mask = np.zeros_like(idx, dtype=np.bool_)
true_idx = np.random.choice(idx, nObs, replace=False)
true_idx = rng.choice(idx, nObs, replace=False)

# assign chosen index to 1/True
mask[true_idx] = 1
Expand Down
16 changes: 12 additions & 4 deletions tests/test_CARMAFit.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ def test_drwFit():


def test_dhoFit():
t1, y1, yerr1 = gpSimRand(dho1, 100, 365 * 10.0, 200, nLC=100, season=False)
t1, y1, yerr1 = gpSimRand(
dho1, 100, 365 * 10.0, 200, nLC=100, season=False, lc_seed=10
)
best_fit_dho1 = np.array(
Parallel(n_jobs=-1)(
delayed(dho_fit)(t1[i], y1[i], yerr1[i]) for i in range(len(t1))
Expand All @@ -156,7 +158,9 @@ def test_dhoFit():
assert np.percentile(diff1, 75) < 0.25

# the second test will down scale lc by 1e6
t2, y2, yerr2 = gpSimRand(dho2, 100, 365 * 10.0, 200, nLC=100, season=False)
t2, y2, yerr2 = gpSimRand(
dho2, 100, 365 * 10.0, 200, nLC=100, season=False, lc_seed=10
)
best_fit_dho2 = np.array(
Parallel(n_jobs=-1)(
delayed(dho_fit)(t2[i], y2[i] / 1e6, yerr2[i] / 1e6) for i in range(len(t2))
Expand All @@ -172,7 +176,9 @@ def test_dhoFit():


def test_carmaFit():
t1, y1, yerr1 = gpSimRand(carma30, 200, 365 * 10.0, 1000, nLC=150, season=False)
t1, y1, yerr1 = gpSimRand(
carma30, 200, 365 * 10.0, 1000, nLC=150, season=False, lc_seed=10
)
best_fit_carma1 = np.array(
Parallel(n_jobs=-1)(
delayed(carma_fit)(t1[i], y1[i] / 1e-6, yerr1[i] / 1e-6, 3, 0, n_opt=30)
Expand All @@ -189,7 +195,9 @@ def test_carmaFit():
assert np.percentile(diff1, 75) < 0.4

# the second test will down scale lc by 1e6
t2, y2, yerr2 = gpSimRand(carma31, 300, 365 * 10.0, 1000, nLC=150, season=False)
t2, y2, yerr2 = gpSimRand(
carma31, 300, 365 * 10.0, 1000, nLC=150, season=False, lc_seed=10
)
best_fit_carma2 = np.array(
Parallel(n_jobs=-1)(
delayed(carma_fit)(t2[i], y2[i], yerr2[i], 3, 1, n_opt=50)
Expand Down
62 changes: 61 additions & 1 deletion tests/test_CARMASim.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,30 @@ def test_invalidSim():
t, y, yerr = gpSimFull(carma_invalid, 20, 365 * 10.0, 10000)


def test_simFullSeed():
"""Test the lc_seed option of gpSimFull."""

# SNR = 20
nLC = 5
for kernel in test_kernels:
t, y1, yerr1 = gpSimFull(kernel, 20, 365 * 10.0, 1000, nLC=nLC, lc_seed=10)
t, y2, yerr2 = gpSimFull(kernel, 20, 365 * 10.0, 1000, nLC=nLC, lc_seed=10)

# agree between two separate calls
for i in range(nLC):
assert np.allclose(y1[i], y2[i])
assert np.allclose(yerr1[i], yerr2[i])

# disagree between different simulations from one call
assert not np.allclose(y1[0], y1[nLC - 1])
assert not np.allclose(y1[1], y1[nLC - 2])
assert not np.allclose(yerr1[0], yerr1[nLC - 1])
assert not np.allclose(yerr1[1], yerr1[nLC - 2])


def test_simRand():
"""Test function gpSimRand."""
# SNR = 10
# SNR = 20
for kernel in test_kernels:
t, y, yerr = gpSimRand(kernel, 20, 365 * 10.0, 150, nLC=100, season=False)
log_amp = np.log(kernel.get_rms_amp())
Expand All @@ -53,6 +74,26 @@ def test_simRand():
assert (np.argsort(yF) == np.argsort(-yerrF)).all()


def test_simRandSeed():
"""Test the downsample_seed option of gpSimRand."""

# SNR = 20
nLC = 1
for kernel in test_kernels:
t1, y1, yerr1 = gpSimRand(
kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10
)
t2, y2, yerr2 = gpSimRand(
kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10
)
t3, y3, yerr3 = gpSimRand(
kernel, 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=20
)

assert np.allclose(t1, t2)
assert not np.allclose(t1, t3)


def test_simByTime():
"""Test function gpSimByTime."""
t = np.sort(np.random.uniform(0, 3650, 5000))
Expand All @@ -72,3 +113,22 @@ def test_simByTime():
# test single LC simulation
tOut, yOut, yerrOut = gpSimByTime(dho2, SNR, t, nLC=1)
assert tOut.shape[0] == yOut.shape[0] == yerrOut.shape[0] == t.shape[0]


def test_addNoiseSeed():
"""Test the seed option of addNoise."""

nLC = 1
t1, y1, yerr1 = gpSimRand(
test_kernels[-1], 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10
)
t2, y2, yerr2 = gpSimRand(
test_kernels[-1], 20, 365 * 10.0, 150, nLC=nLC, lc_seed=10, downsample_seed=10
)

noise_y1 = addNoise(y1, yerr1, seed=10)
noise_y2 = addNoise(y2, yerr2, seed=10)
noise_y3 = addNoise(y1, yerr1, seed=20)

assert np.allclose(noise_y1, noise_y2)
assert not np.allclose(noise_y1, noise_y3)

0 comments on commit e7e3b8c

Please sign in to comment.