In [1]:
# Third-party imports,
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.widgets import Slider
from matplotlib.ticker import FuncFormatter
import ipywidgets as widgets
from IPython.display import display
import torch

# Python Standard Library imports,
import gc
import sys

# Loading data,
TEST = True
QUANTITY = "Dark Matter Mass Density"
UNITS = r"$M_{\odot}/h/(Mpc/h)^3$"
SPEC = "SIMBA_CV_z=0"
grids = np.load("CAMELs\\SIMBA_CV_z=0\\Grids_Mcdm_SIMBA_CV_128_z=0.0.npy")

# Used for creating testing training data,
if TEST:
    grids = grids[0:5]

# dark matter density grids: CAMELs\\SIMBA_CV_z=0\\Grids_Mcdm_SIMBA_CV_128_z=0.0.npy UNITS: $M_{\odot}/h/(Mpc/h)^3$
# electron density grids: CAMELs\\SIMBA_CV_z=0\\Grids_ne_SIMBA_CV_128_z=0.0.npy" UNITS: r"$(Mpc/h)^{-3}$"
# temperature grids: CAMELs\\SIMBA_CV_z=0\\Grids_T_SIMBA_CV_128_z=0.0.npy"

# Data Visualiser 

In [2]:
# Function to update plot
def plot_slice(i, grid):
    fig, axes = plt.subplots(figsize=(8, 8))
    img = axes.imshow(np.log10(grid[i]), cmap="rainbow")

    # Setting labels
    axes.set_xlabel("(Mpc/h)")
    axes.set_ylabel("(Mpc/h)")
    axes.set_title(QUANTITY)

    # Custom xtick and ytick labels
    def custom_formatter(x, pos):
        return f'{x*25}'

    FORMATTER = FuncFormatter(custom_formatter)
    axes.xaxis.set_major_formatter(FORMATTER)
    axes.yaxis.set_major_formatter(FORMATTER)

    # Adding color bar
    cbar = plt.colorbar(img, ax=axes, orientation="vertical")
    cbar.set_label("Log(" + UNITS + ")")

    plt.show()

# Creating slider widget,
grid = grids[0]
grid_zlength = (grid.shape[2] - 1)
slider = widgets.IntSlider(min = 0, max = grid_zlength, value=0, description = "Slice")

# Link slider to function
output = widgets.interactive_output(lambda i: plot_slice(i, grid), {'i': slider})

# Display slider and plot
display(slider, output)

IntSlider(value=0, description='Slice', max=127)

Output()

# Data Processing

In [19]:
def create_subcubes(voxel_grid, subcube_length):
    """Given a 3D Numpy array which represents a grid of voxels, this function returns a 4D array which is a list of 3D sub-grids or subcubes 
    of the input grid of voxels. 

    PARAMETERS:
    voxel_grid - 3D NumPy array. Our input voxel array. Must be a cube with all each dimension being the same size.
    subcube_length - The voxel length of each subcube."""

    N_subcubes = int(voxel_grid.shape[0] // subcube_length)
    shape = (N_subcubes, N_subcubes, N_subcubes, subcube_length, subcube_length, subcube_length)

    # Use NumPy's as_strided to avoid extra copies,
    from numpy.lib.stride_tricks import as_strided
    strides = voxel_grid.strides
    subcubes = as_strided(
        voxel_grid,
        shape=shape,
        strides=(subcube_length * strides[0], subcube_length * strides[1], subcube_length * strides[2], *strides)
    )
    return subcubes.reshape(-1, subcube_length, subcube_length, subcube_length)

def grid_rotations(grid):
    """Given a 3D NumPy array representing a grid of voxels, this function returns all rotations of said grid as a 4D array which
    presents a list of each rotated grid.
    
    PARAMETERS:
    grid - 3D NumPy Array."""
    rotations = []
    for axes in [(0,1,2), (0,2,1), (1,0,2), (1,2,0), (2,0,1), (2,1,0)]:
        rotated = np.transpose(grid, axes)
        for k in range(4):  # 90-degree rotations in XY plane
            rotations.append(np.rot90(rotated, k=k, axes=(1,2)))

    return np.stack(rotations, axis=0)  # Use np.stack instead of np.array

def create_rotations(grids):
    """Generates all rotations for multiple 3D grids efficiently."""
    return np.concatenate([grid_rotations(grid) for grid in grids], axis=0)

def create_all_subcubes(grids, subcube_length):
    """Extracts subcubes from all input grids efficiently."""
    return np.concatenate([create_subcubes(grid, subcube_length) for grid in grids], axis=0)

def process_grids(grids, subcube_length, batch_size):
    """Processes grids in smaller batches to prevent memory overflow."""

    processed_grids = []  # Use a list instead of NumPy array for efficiency
    
    for i in range(0, len(grids), batch_size):  # Process in batches
        batch = grids[i : i + batch_size]
        
        batch_subcubes = []
        for grid in batch:
            sub_grids = create_subcubes(voxel_grid=grid, subcube_length=subcube_length)
            batch_subcubes.append(sub_grids)
        
        processed_grids.append(np.concatenate(batch_subcubes, axis=0))

    """#total_rotations = 24 * len(grids)
    total_subcubes_per_grid = (grids.shape[1] // subcube_length) ** 3
    #total_subcubes = total_rotations * total_subcubes_per_grid

    output_shape = (total_subcubes_per_grid, subcube_length, subcube_length, subcube_length)
    processed_subcubes = np.empty(output_shape, dtype=grids.dtype)

    index = 0
    total_batches = (len(grids) + batch_size - 1) // batch_size  # Compute total batch count

    for batch_num, i in enumerate(range(0, len(grids), batch_size), start = 1):
        batch = grids[i : i + batch_size]
        #rotated_batch = create_rotations(batch)
        #del batch  # Free memory

        subcubes_batch = create_all_subcubes(batch, subcube_length)
        #del rotated_batch  # Free memory

        processed_subcubes[index : index + subcubes_batch.shape[0]] = subcubes_batch
        index += subcubes_batch.shape[0]

        gc.collect()  # Force memory cleanup

        # Progress bar,
        progress = batch_num / total_batches
        bar_length = 40
        filled_length = int(bar_length * progress)
        bar = '=' * filled_length + '-' * (bar_length - filled_length)
        sys.stdout.write(f"[{bar}] {progress*100:.2f} %")
        sys.stdout.flush()"""

    return np.concatenate(processed_grids, axis=0) 

In [32]:
"""PROCESSING PIPELINE"""

# PARAMETERS,
SUBCUBE_SIZE = 32
BATCH_SIZE = 1

if TEST:
    SAVE_FILE_NAME = QUANTITY + "-" + SPEC + "-PROCESSED-TEST.npy"
else:
    SAVE_FILE_NAME = QUANTITY + "-" + SPEC + "-PROCESSED.npy"

# Processing DMO grids,
print(f"Creating: {SAVE_FILE_NAME}")
sub_grids = process_grids(grids = grids, subcube_length = SUBCUBE_SIZE, batch_size = BATCH_SIZE)

# Saving data,
print()
print("Saving File...")
np.save(SAVE_FILE_NAME, sub_grids)
print("File Saved.")

Creating: Dark Matter Mass Density-SIMBA_CV_z=0-PROCESSED-TEST.npy

Saving File...
File Saved.


In [29]:
# Creating slider widget,
grid = sub_grids[0]
grid_zlength = (grid.shape[2] - 1)
slider = widgets.IntSlider(min = 0, max = grid_zlength, value=0, description = "Slice")

# Link slider to function
output = widgets.interactive_output(lambda i: plot_slice(i, grid), {'i': slider})

# Display slider and plot
display(slider, output)

IntSlider(value=0, description='Slice', max=31)

Output()