Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] add depth-limiting to off-axis sph particle projections #4712

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
301 changes: 283 additions & 18 deletions yt/utilities/lib/pixelization_routines.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,191 @@ def pixelize_sph_kernel_projection(

return mask

@cython.initializedcheck(False)
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def pixelize_sph_kernel_projection_with_depth_check(
np.float64_t[:, :] buff,
np.uint8_t[:, :] mask,
any_float[:] posx,
any_float[:] posy,
any_float[:] posz,
any_float[:] hsml,
any_float[:] pmass,
any_float[:] pdens,
any_float[:] quantity_to_smooth,
bounds,
kernel_name="cubic",
weight_field=None,
int check_period=1,
period=None):

cdef np.intp_t xsize, ysize
cdef np.float64_t x_min, x_max, y_min, y_max, z_min, z_max, prefactor_j
cdef np.int64_t xi, yi, x0, x1, y0, y1, xxi, yyi
cdef np.float64_t q_ij2, posx_diff, posy_diff, ih_j2
cdef np.float64_t x, y, dx, dy, idx, idy, h_j2, px, py, pz
cdef np.float64_t period_x = 0, period_y = 0, period_z = 0
cdef int i, j, ii, jj, kk
cdef np.float64_t[:] _weight_field
cdef int * xiter
cdef int * yiter
cdef int * ziter
cdef np.float64_t * xiterv
cdef np.float64_t * yiterv
cdef np.float64_t * ziterv

if weight_field is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you got this code from elsewhere, but I will say that it definitely confused me. I at first thought this was a string, but now I see it's either a numpy array or none. Yowza!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed! copied directly from pixelize_sph_kernel_projection. Up in the python somewhere there's a weight that is a string, so definitely confusing. Could rename this variable to weight_array (and also rename in pixelize_sph_kernel_projection).

_weight_field = weight_field

if period is not None:
period_x = period[0]
period_y = period[1]
period_z = period[2]

# we find the x and y range over which we have pixels and we find how many
# pixels we have in each dimension
xsize, ysize = buff.shape[0], buff.shape[1]
x_min = bounds[0]
x_max = bounds[1]
y_min = bounds[2]
y_max = bounds[3]

# we also want to skip particles outside the rotated z bounds
z_min = bounds[4]
z_max = bounds[5]

dx = (x_max - x_min) / xsize
dy = (y_max - y_min) / ysize

idx = 1.0/dx
idy = 1.0/dy

if kernel_name not in kernel_tables:
kernel_tables[kernel_name] = SPHKernelInterpolationTable(kernel_name)
cdef SPHKernelInterpolationTable itab = kernel_tables[kernel_name]

with nogil, parallel():
# loop through every particle
# NOTE: this loop can be quite time consuming. see description in
# pixelize_sph_kernel_projection.

local_buff = <np.float64_t *> malloc(sizeof(np.float64_t) * xsize * ysize)
xiterv = <np.float64_t *> malloc(sizeof(np.float64_t) * 2)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have a sneaking suspicion it's to avoid a race condition, but I think a long time ago we were supposed to be able to declare variables inside a parallel block to get them to be thread-local. No changes needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

helpful context! I was confused by why this was needed.

yiterv = <np.float64_t *> malloc(sizeof(np.float64_t) * 2)
ziterv = <np.float64_t *> malloc(sizeof(np.float64_t) * 2)
xiter = <int *> malloc(sizeof(int) * 2)
yiter = <int *> malloc(sizeof(int) * 2)
ziter = <int *> malloc(sizeof(int) * 2)
xiter[0] = yiter[0] = ziter[0] = 0
xiterv[0] = yiterv[0] = ziterv[0] = 0.0
for i in range(xsize * ysize):
local_buff[i] = 0.0

for j in prange(0, posx.shape[0], schedule="dynamic"):
if j % 100000 == 0:
with gil:
PyErr_CheckSignals()

xiter[1] = yiter[1] = ziter[1] = 999

if check_period == 1:
if posx[j] - hsml[j] < x_min:
xiter[1] = +1
xiterv[1] = period_x
elif posx[j] + hsml[j] > x_max:
xiter[1] = -1
xiterv[1] = -period_x
if posy[j] - hsml[j] < y_min:
yiter[1] = +1
yiterv[1] = period_y
elif posy[j] + hsml[j] > y_max:
yiter[1] = -1
yiterv[1] = -period_y
if posz[j] - hsml[j] < z_min:
ziter[1] = +1
ziterv[1] = period_z
elif posz[j] + hsml[j] > z_max:
ziter[1] = -1
ziterv[1] = -period_z

# we set the smoothing length squared with lower limit of the pixel
h_j2 = fmax(hsml[j]*hsml[j], dx*dy)
ih_j2 = 1.0/h_j2

prefactor_j = pmass[j] / pdens[j] / hsml[j]**2 * quantity_to_smooth[j]
if weight_field is not None:
prefactor_j *= _weight_field[j]

for ii in range(2):
if xiter[ii] == 999: continue
px = posx[j] + xiterv[ii]
if (px + hsml[j] < x_min) or (px - hsml[j] > x_max): continue


for jj in range(2):
if yiter[jj] == 999: continue
py = posy[j] + yiterv[jj]
if (py + hsml[j] < y_min) or (py - hsml[j] > y_max): continue

for kk in range(2):
# discard if z is outside bounds
if ziter[kk] == 999: continue
pz = posz[j] + ziterv[kk]
if (pz + hsml[j] < z_min) or (pz - hsml[j] > z_max): continue

# here we find the pixels which this particle contributes to
x0 = <np.int64_t> ((px - hsml[j] - x_min)*idx)
x1 = <np.int64_t> ((px + hsml[j] - x_min)*idx)
x0 = iclip(x0-1, 0, xsize)
x1 = iclip(x1+1, 0, xsize)

y0 = <np.int64_t> ((py - hsml[j] - y_min)*idy)
y1 = <np.int64_t> ((py + hsml[j] - y_min)*idy)
y0 = iclip(y0-1, 0, ysize)
y1 = iclip(y1+1, 0, ysize)

# found pixels we deposit on, loop through those pixels
for xi in range(x0, x1):
# we use the centre of the pixel to calculate contribution
x = (xi + 0.5) * dx + x_min

posx_diff = px - x
posx_diff = posx_diff * posx_diff

if posx_diff > h_j2: continue

for yi in range(y0, y1):
y = (yi + 0.5) * dy + y_min

posy_diff = py - y
posy_diff = posy_diff * posy_diff
if posy_diff > h_j2: continue

q_ij2 = (posx_diff + posy_diff) * ih_j2
if q_ij2 >= 1:
continue

# see equation 32 of the SPLASH paper
# now we just use the kernel projection
local_buff[xi + yi*xsize] += prefactor_j * itab.interpolate(q_ij2)
mask[xi, yi] = 1

with gil:
for xxi in range(xsize):
for yyi in range(ysize):
buff[xxi, yyi] += local_buff[xxi + yyi*xsize]
free(local_buff)
free(xiterv)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this also need to happen inside the parallel context?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hard to tell with the github interface, but these lines are all within the

with nogil, parallel():

block way up top.

Screenshot 2023-11-13 at 11 09 45 AM

free(yiterv)
free(ziterv)
free(xiter)
free(yiter)
free(ziter)

return mask

@cython.boundscheck(False)
@cython.wraparound(False)
def interpolate_sph_positions_gather(np.float64_t[:] buff,
Expand Down Expand Up @@ -1910,6 +2095,61 @@ def rotate_particle_coord(np.float64_t[:] px,
return px_rotated, py_rotated, rot_bounds_x0, rot_bounds_x1, rot_bounds_y0, rot_bounds_y1


@cython.boundscheck(False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I think this function looks right. What I'm wondering is if it needs to be in Cython as-is. I think it could potentially take advantage of being in cython by consolidating the matmul stuff with the copying of coordinates -- i.e., iterating over the particles one-by-one and avoiding temporary arrays. I'm not 100% sure that it's necessary, but it could potentially eliminate some allocations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ya, I agree -- it could be written with just numpy matrix operations at the python level or rewritten with a different loop control.

Also, could actually cut out an entire loop over the particles by calculating the rotated particle positions as needed within pixelize_sph_kernel_projection_with_depth_check (and pixelize_sph_kernel_projection)?

@cython.wraparound(False)
def rotate_particle_coord_with_z(np.float64_t[:] px,
np.float64_t[:] py,
np.float64_t[:] pz,
center,
width,
normal_vector,
north_vector):
# same as rotate_particle_coord but returns rotated z coords and bounds.
# This is a separate function so that we can avoid allocating the z-related
# arrays when they will not be used.
cdef int num_particles = np.size(px)
cdef np.float64_t[:] z_axis = np.array([0., 0., 1.], dtype="float64")
cdef np.float64_t[:] y_axis = np.array([0., 1., 0.], dtype="float64")
cdef np.float64_t[:, :] normal_rotation_matrix
cdef np.float64_t[:] transformed_north_vector
cdef np.float64_t[:, :] north_rotation_matrix
cdef np.float64_t[:, :] rotation_matrix

normal_rotation_matrix = get_rotation_matrix(normal_vector, z_axis)
transformed_north_vector = np.matmul(normal_rotation_matrix, north_vector)
north_rotation_matrix = get_rotation_matrix(transformed_north_vector, y_axis)
rotation_matrix = np.matmul(north_rotation_matrix, normal_rotation_matrix)

cdef np.float64_t[:] px_rotated = np.empty(num_particles, dtype="float64")
cdef np.float64_t[:] py_rotated = np.empty(num_particles, dtype="float64")
cdef np.float64_t[:] pz_rotated = np.empty(num_particles, dtype="float64")
cdef np.float64_t[:] coordinate_matrix = np.empty(3, dtype="float64")
cdef np.float64_t[:] rotated_coordinates
cdef np.float64_t[:] rotated_center
rotated_center = rotation_matmul(
rotation_matrix, np.array([center[0], center[1], center[2]]))

# set up the rotated bounds
cdef np.float64_t rot_bounds_x0 = rotated_center[0] - width[0] / 2
cdef np.float64_t rot_bounds_x1 = rotated_center[0] + width[0] / 2
cdef np.float64_t rot_bounds_y0 = rotated_center[1] - width[1] / 2
cdef np.float64_t rot_bounds_y1 = rotated_center[1] + width[1] / 2
cdef np.float64_t rot_bounds_z0 = rotated_center[2] - width[2] / 2
cdef np.float64_t rot_bounds_z1 = rotated_center[2] + width[2] / 2

for i in range(num_particles):
coordinate_matrix[0] = px[i]
coordinate_matrix[1] = py[i]
coordinate_matrix[2] = pz[i]
rotated_coordinates = rotation_matmul(
rotation_matrix, coordinate_matrix)
px_rotated[i] = rotated_coordinates[0]
py_rotated[i] = rotated_coordinates[1]
pz_rotated[i] = rotated_coordinates[2]

return px_rotated, py_rotated, pz_rotated, rot_bounds_x0, rot_bounds_x1, rot_bounds_y0, rot_bounds_y1, rot_bounds_z0, rot_bounds_z1


@cython.boundscheck(False)
@cython.wraparound(False)
def off_axis_projection_SPH(np.float64_t[:] px,
Expand All @@ -1926,28 +2166,53 @@ def off_axis_projection_SPH(np.float64_t[:] px,
np.uint8_t[:, :] mask,
normal_vector,
north_vector,
weight_field=None):
weight_field=None,
depth_set=False):
# Do nothing in event of a 0 normal vector
if np.allclose(normal_vector, 0.):
return

px_rotated, py_rotated, \
rot_bounds_x0, rot_bounds_x1, \
rot_bounds_y0, rot_bounds_y1 = rotate_particle_coord(px, py, pz,
center, width, normal_vector, north_vector)

pixelize_sph_kernel_projection(projection_array,
mask,
px_rotated,
py_rotated,
smoothing_lengths,
particle_masses,
particle_densities,
quantity_to_smooth,
[rot_bounds_x0, rot_bounds_x1,
rot_bounds_y0, rot_bounds_y1],
weight_field=weight_field,
check_period=0)
if depth_set:
px_rotated, py_rotated, pz_rotated, \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We shouldn't fix this here, but is this a case where something like a named tuple or simple data class could simplify our code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I do think that would help. it might be worth fixing here too :) I think it could cut down on some of the code duplication I'm introducing.

rot_bounds_x0, rot_bounds_x1, \
rot_bounds_y0, rot_bounds_y1, \
rot_bounds_z0, rot_bounds_z1 = rotate_particle_coord_with_z(px, py, pz,
center, width, normal_vector, north_vector)

pixelize_sph_kernel_projection_with_depth_check(projection_array,
mask,
px_rotated,
py_rotated,
pz_rotated,
smoothing_lengths,
particle_masses,
particle_densities,
quantity_to_smooth,
[rot_bounds_x0, rot_bounds_x1,
rot_bounds_y0, rot_bounds_y1,
rot_bounds_z0, rot_bounds_z1],
weight_field=weight_field,
check_period=0)
else:
px_rotated, py_rotated, \
rot_bounds_x0, rot_bounds_x1, \
rot_bounds_y0, rot_bounds_y1 = rotate_particle_coord(px, py, pz,
center, width, normal_vector, north_vector)

pixelize_sph_kernel_projection(projection_array,
mask,
px_rotated,
py_rotated,
smoothing_lengths,
particle_masses,
particle_densities,
quantity_to_smooth,
[rot_bounds_x0, rot_bounds_x1,
rot_bounds_y0, rot_bounds_y1],
weight_field=weight_field,
check_period=0)




@cython.boundscheck(False)
Expand Down
1 change: 1 addition & 0 deletions yt/visualization/fixed_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,6 +649,7 @@ def _generate_image_and_mask(self, item) -> None:
interpolated=dd.interpolated,
north_vector=dd.north_vector,
method=dd.method,
depth_set=dd.depth_set,
)
if self.data_source.moment == 2:

Expand Down
13 changes: 13 additions & 0 deletions yt/visualization/plot_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -2275,6 +2275,7 @@ def __init__(
data_source=None,
*,
moment=1,
depth_set=False,
):
validate_moment(moment, weight)
self.center = center
Expand All @@ -2301,6 +2302,11 @@ def __init__(
self.orienter = Orientation(normal_vector, north_vector=north_vector)
self.moment = moment

# note: the depth_set bool indicates whether we need to limit particles
# by the rotated z-bounds. If using all particles in the rotated x-y
# plane (depth_set is False), then we can avoid some computation.
self.depth_set = depth_set

def _determine_fields(self, *args):
return self.dd._determine_fields(*args)

Expand Down Expand Up @@ -2440,6 +2446,12 @@ def __init__(
f"currently supported geometries: {self._supported_geometries!r}"
)

depth_set = depth is not None
if not depth_set:
# need a valid depth for calculating bounds, but those
# bounds will not be used to limit particles.
depth = (1, "1")

(bounds, center_rot) = get_oblique_window_parameters(
normal, center, width, ds, depth=depth
)
Expand All @@ -2463,6 +2475,7 @@ def __init__(
method=method,
data_source=data_source,
moment=moment,
depth_set=depth_set,
)

validate_mesh_fields(OffAxisProj, fields)
Expand Down