Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blob labels returned #37

Merged
merged 8 commits into from
Jan 24, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,12 @@ The following distributions are implemented:
!!! this is only a good approximation for blob_shape='exp' !!!
- `error`: float, optional,
numerical error at x = Lx when blob gets truncated

- `labels`: bool, optional,
if True, field with blob labels is returned
used for creating training data for supervised machine learning algorithms
- `label_border`: float, optional,
defines region of blob as region where density >= label_border * amplitude of Blob
only used if labels = True
### `show_model()`
- `ds`: xarray Dataset,
Model data
Expand Down
81 changes: 59 additions & 22 deletions blobmodel/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,12 @@ def __init__(
Ly: float, length of grid in y
dt: float, time step
T: float, time length
periodic_y: bool, optional
allow periodicity in y-direction
periodic_y: bool, optional
allow periodicity in y-direction

Important: only good approximation for Ly >> blob width

num_blobs:
number of blobs
blob_shape: str, optional
see Blob dataclass for available shapes
t_drain: float, optional
Expand Down Expand Up @@ -66,7 +65,12 @@ def __str__(self) -> str:
return f"2d Blob Model with blob shape:{self.blob_shape}, num_blobs:{self.num_blobs} and t_drain:{self.t_drain}"

def make_realization(
self, file_name: str = None, speed_up: bool = False, error: float = 1e-10
self,
file_name: str = None,
speed_up: bool = False,
error: float = 1e-10,
labels: bool = False,
label_border: float = 0.75,
) -> xr.Dataset:
"""Integrate Model over time and write out data as xarray dataset.

Expand All @@ -75,14 +79,19 @@ def make_realization(
file_name: str, optional
file name for .nc file containing data as xarray dataset
speed_up: bool, optional
speeding up code by discretizing each single blob at smaller time window given by
t in (Blob.t_init, truncation_Lx*Lx/Blob.v_x + Blob.t_init)

speeding up code by discretizing each single blob at smaller time window
when blob values fall under given error value the blob gets discarded
!!! this is only a good approximation for blob_shape='exp' !!!

truncation_Lx: float, optional
number of times blob propagate through length Lx before blob is neglected
error: float, optional
numerical error at x = Lx when blob gets truncated
only used if speed_up = True
labels: bool, optional
if True, field with blob labels is returned
used for creating training data for supervised machine learning algorithms
label_border: float, optional
defines region of blob as region where density >= label_border * amplitude of Blob
only used if labels = True

Returns
----------
Expand All @@ -97,42 +106,68 @@ def make_realization(
t_drain=self.t_drain,
)

output = np.zeros(
__density = np.zeros(
shape=(self.__geometry.Ny, self.__geometry.Nx, self.__geometry.t.size)
)
if labels:
__labels_field = np.zeros(
shape=(self.__geometry.Ny, self.__geometry.Nx, self.__geometry.t.size)
)

for b in tqdm(self.__blobs, desc="Summing up Blobs"):
# speedup implemeted for exponential pulses
# can also be used for gaussian pulses since they converge faster than exponential pulses
if speed_up:
start = int(b.t_init / self.__geometry.dt)
__start = int(b.t_init / self.__geometry.dt)
if b.v_x == 0:
stop = self.__geometry.t.size
__stop = self.__geometry.t.size
else:
# ignores t_drain when calculating stop time
stop = start + int(
(-np.log(error * np.sqrt(np.pi)) + self.__geometry.Lx - b.pos_x)
/ (b.v_x * self.__geometry.dt)
__stop = np.minimum(
self.__geometry.t.size,
__start
+ int(
(
-np.log(error * np.sqrt(np.pi))
+ self.__geometry.Lx
- b.pos_x
)
/ (b.v_x * self.__geometry.dt)
),
)
output[:, :, start:stop] += b.discretize_blob(
x=self.__geometry.x_matrix[:, :, start:stop],
y=self.__geometry.y_matrix[:, :, start:stop],
t=self.__geometry.t_matrix[:, :, start:stop],
__single_blob = b.discretize_blob(
x=self.__geometry.x_matrix[:, :, __start:__stop],
y=self.__geometry.y_matrix[:, :, __start:__stop],
t=self.__geometry.t_matrix[:, :, __start:__stop],
periodic_y=self.__geometry.periodic_y,
Ly=self.__geometry.Ly,
)
__density[:, :, __start:__stop] += __single_blob
if labels:
__max_amplitudes = np.max(__single_blob, axis=(0, 1))
__max_amplitudes[__max_amplitudes == 0] = np.inf
__tmp = np.copy(__labels_field[:, :, __start:__stop])
__tmp[__single_blob >= __max_amplitudes * label_border] = 1
__labels_field[:, :, __start:__stop] = __tmp
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why create __tmp? Seems unnecessary:

__labels_field[:, :, __start:__stop][__single_blob >= __max_amplitudes * label_border] = 1

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't find another way of implementing this since I am not aware of a simpler way of applying a condition only on a slice of the array.
i.e.
__label_field[:, :, __start:__stop && __single_blob >= __max_amplitudes * label_border]

@Sosnowsky do you know a simpler way in python? Otherwise I would keep the current implementation

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't what I wrote work?
__labels_field[:, :, __start:__stop][__single_blob >= __max_amplitudes * label_border] = 1

Creating the tmp array is time-comsuming, so in general you would prefer to avoid doing it if it is not necessary.


else:
output += b.discretize_blob(
__single_blob = b.discretize_blob(
x=self.__geometry.x_matrix,
y=self.__geometry.y_matrix,
t=self.__geometry.t_matrix,
periodic_y=self.__geometry.periodic_y,
Ly=self.__geometry.Ly,
)
__density += __single_blob
if labels:
__max_amplitudes = np.max(__single_blob, axis=(0, 1))
__max_amplitudes[__max_amplitudes == 0] = np.inf
__labels_field[__single_blob >= __max_amplitudes * label_border] = 1

if self.__geometry.Ly == 0:
ds = xr.Dataset(
data_vars=dict(
n=(["y", "x", "t"], output),
n=(["y", "x", "t"], __density),
),
coords=dict(
x=(["x"], self.__geometry.x),
Expand All @@ -143,7 +178,7 @@ def make_realization(
else:
ds = xr.Dataset(
data_vars=dict(
n=(["y", "x", "t"], output),
n=(["y", "x", "t"], __density),
),
coords=dict(
x=(["x"], self.__geometry.x),
Expand All @@ -152,6 +187,8 @@ def make_realization(
),
attrs=dict(description="2D propagating blobs."),
)
if labels:
ds = ds.assign(blob_labels=(["y", "x", "t"], __labels_field))

if file_name is not None:
ds.to_netcdf(file_name)
Expand Down
5 changes: 4 additions & 1 deletion blobmodel/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

def show_model(
ds: xr.Dataset,
variable: str = "n",
interval: int = 100,
save: bool = False,
gif_name: str = "2d_blobs.gif",
Expand All @@ -18,6 +19,8 @@ def show_model(
----------
ds: xarray Dataset,
Model data
variable: str, optional
variable to be animated
interval: int, optional
time interval between frames in ms
save: bool, optional
Expand All @@ -35,7 +38,7 @@ def show_model(
frames = []

for timestep in ds.t.values:
frame = ds["n"].sel(t=timestep).values
frame = ds[variable].sel(t=timestep).values
frames.append(frame)

cv0 = frames[0]
Expand Down
30 changes: 30 additions & 0 deletions examples/show_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from blobmodel import Model, show_model
import numpy as np

# here you can define your custom parameter distributions

bm = Model(
Nx=100,
Ny=100,
Lx=20,
Ly=20,
dt=0.1,
T=20,
periodic_y=True,
blob_shape="gauss",
num_blobs=10,
t_drain=1e10,
)

# create data
ds = bm.make_realization(speed_up=True, error=1e-2, labels=True)

print(ds)

import matplotlib.pyplot as plt

ds.n.isel(t=-1).plot()
plt.figure()
ds.blob_labels.isel(t=-1).plot()

plt.show()
104 changes: 104 additions & 0 deletions tests/test_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from blobmodel import Model, BlobFactory, Blob
import numpy as np
import warnings


# here you can define your custom parameter distributions
class CustomBlobFactory(BlobFactory):
def __init__(self) -> None:
pass

def sample_blobs(
self, Ly: float, T: float, num_blobs: int, blob_shape: str, t_drain: float
) -> list[Blob]:

# set custom parameter distributions
__amp = np.ones(num_blobs)
__width = np.ones(num_blobs)
__vx = np.ones(num_blobs)
__vy = np.zeros(num_blobs)

__posx = np.zeros(num_blobs)
__posy = np.ones(num_blobs) * Ly / 2
__t_init = np.ones(num_blobs) * 0

return [
Blob(
id=i,
blob_shape=blob_shape,
amplitude=__amp[i],
width_prop=__width[i],
width_perp=__width[i],
v_x=__vx[i],
v_y=__vy[i],
pos_x=__posx[i],
pos_y=__posy[i],
t_init=__t_init[i],
t_drain=t_drain,
)
for i in range(num_blobs)
]


def test_bloblabels_speedup():
warnings.filterwarnings("ignore")
bf = CustomBlobFactory()
bm = Model(
Nx=5,
Ny=1,
Lx=5,
Ly=5,
dt=1,
T=5,
periodic_y=True,
blob_shape="gauss",
num_blobs=1,
blob_factory=bf,
t_drain=1e10,
)
ds = bm.make_realization(speed_up=True, error=1e-2, labels=True)
correct_labels = np.array(
[
[
[1.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 1.0],
]
]
)
diff = ds["blob_labels"].values - correct_labels
assert np.max(diff) < 0.00001


def test_bloblabels():
warnings.filterwarnings("ignore")
bf = CustomBlobFactory()
bm = Model(
Nx=5,
Ny=1,
Lx=5,
Ly=5,
dt=1,
T=5,
periodic_y=True,
blob_shape="gauss",
num_blobs=1,
blob_factory=bf,
t_drain=1e10,
)
ds = bm.make_realization(speed_up=False, labels=True)
correct_labels = np.array(
[
[
[1.0, 0.0, 0.0, 0.0, 0.0],
[0.0, 1.0, 0.0, 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0, 0.0],
[0.0, 0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 0.0, 1.0],
]
]
)
diff = ds["blob_labels"].values - correct_labels
assert np.max(diff) < 0.00001