# Generating data

This notebook explains how to generate data that can be used for training and evaluation the end-to-end model of the master's thesis “Fast, Accurate, and Scalable Numerical Wave Propagation: Enhancement by Deep Learning” by Luis Kaiser, supervised by Prof. Tsai (University of Texas Austin) and Prof. Klingenberg (University of Wuerzburg), in practice.

First, make sure that you have installed all necessary libraries specified in `requirements.txt` using `pip` or `pip3` depending on your setup by running the command below.

In [1]:
!pip3 install --upgrade pip
!pip3 install -r requirements.txt

zsh:1: /Users/udis/Documents/studies/phd/research/wave_propagation/repo_public/.venv/bin/pip3: bad interpreter: /Users/udis/Documents/studies/master/master_thesis/repo_public/.venv/bin/python: no such file or directory

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.13 -m pip install --upgrade pip[0m
[1;31merror[0m: [1mexternally-managed-environment[0m

[31m×[0m This environment is externally managed
[31m╰─>[0m To install Python packages system-wide, try brew install
[31m   [0m xyz, where xyz is the package you are trying to
[31m   [0m install.
[31m   [0m 
[31m   [0m If you wish to install a Python library that isn't in Homebrew,
[31m   [0m use a virtual environment:
[31m   [0m 
[31m   [0m python3 -m venv path/to/venv
[31m   [0m source path/to/venv/bin/activate
[31m   [0m python3 -

## Generate wave speed crops

We first generate the random velocity profile crops from `../wave_profile/marm1nonsmooth.mat` and `../wave_profile/bp2004.mat` and save the results in an `.npz` file. This allows us to use numerical solvers shown in `use_numerical_solvers.ipynb` to advance a wave field on these velocity profiles. The data is stored in a `.npz` file and can later be used for training and evaluation the models.

In [None]:
from generate_data import generate_velocity_profile_crop
from skimage.filters import gaussian
from scipy.io import loadmat


def generate_velocity_crops(
        resolution = 128,
        output_dir = 'data/crop_test.npz',
        num_crops = 1,
):
    '''

    Parameters
    ----------
    resolution : (int) resolution of crop
    output_dir : (string) output file path, ending with ".npz"
    num_crops :  (int) number of crops per image

    Returns
    -------
    saves the velocity crops in an .npz-file
    '''

    # load images
    datamat = loadmat('wave_profile/marm1nonsmooth.mat')  # Marmousi velocity image
    fullmarm = gaussian(datamat['marm1larg'],4)  # smoothing the image
    databp = loadmat('wave_profile/bp2004.mat')  # BP velocity image
    fullbp = gaussian(databp['V'],4)/1000  # smoothing the image and different order of magnitude

    # randomly crop and save images at "output_dir"
    generate_velocity_profile_crop(
        v_images = [fullmarm,fullbp],
        m = resolution,
        output_path = output_dir,
        num_times = num_crops
    )

In [3]:
generate_velocity_crops()

## Apply Solvers to create dataset

In [8]:
from matplotlib import pyplot as plt
import numpy as np
import torch
from generate_data import crop_center, initial_condition_gaussian, one_iteration_pseudo_spectral_tensor
from utils_wave_component_function import WaveSol_from_EnergyComponent_tensor, WaveEnergyComponentField_end_to_end, WaveEnergyField


def visualize_wavefield(
    u_elapse,
    ut_elapse,
    vel,
    f_delta_x,
    it,
    s
):
    '''
    Parameters
    ----------
    u_elapse : (numpy tensor) wave field component u
    ut_elapse : (numpy tensor) wave field component u derived by t, i.e. the velocity
    vel : (numpy tensor) velocity profile image
    f_delta_x : (float) grid time stepping of solver
    it : (int) number of current iteration
    s : (int) number of current snapshot

    Returns
    -------
    visualizes current wave field iteration
    '''

    # change representation to energy semi-norm
    w = WaveEnergyField(u_elapse.squeeze().numpy(),ut_elapse.squeeze().numpy(),vel, f_delta_x)

    # visualize results
    plt.axis("off")
    plt.imshow(w)
    plt.title(f"wave field for iteration {it} and snapshot {s}")
    plt.show()


def generate_data_end_to_end(
        input_path = "data/crop_test.npz",
        output_path = "data/datagen_test2.npz",
        boundary_condition = "periodic",
        n_snaps = 10,
        res = 128,
        n_it = 200,
        f_delta_x =  2. / 128.,
        visualize = False
):
    '''
    Parameters
    ----------
    input_path : (string) velocity profile data path
    output_path: (string) wave field data path
    boundary_condition : (string) boundary condition, either "periodic" or "absorbing"
    n_snaps : (int) amount of snapshots / dt_star steps to take
    res : (int) resolution of wave field output
    n_it : (int) amount of different wave propagation series
    f_delta_x : (float) grid time stepping of solver

    Returns
    -------
    saves generated wave propagation iterations in file
    '''

    # load velocity model created in function above `generate_velocity_crops()`
    velocities = np.load(input_path)['wavespeedlist']

    # setup tensors to store wave energy components and velocity profile
    # save image for each iteration and snapshot
    Ux, Uy, Utc, u_phys = np.zeros([n_it, n_snaps + 1, res, res]), \
                  np.zeros([n_it, n_snaps + 1, res, res]), \
                  np.zeros([n_it, n_snaps + 1, res, res]), \
                  np.zeros([n_it, n_snaps + 1, res, res])
    V = np.zeros([n_it, n_snaps+1, res, res])

    # training
    for it in range(n_it):
        if it % 10 == 0: print(it)

        # sample velocity instance
        if it >= len(velocities): vel = velocities[0]
        else: vel = velocities[it] # w_big x h_big

        # computing initial condition using gaussian pulse (switch to pytorch tensor if needed)
        # note that this will contain the energy components,
        # i.e. u_energy[b][0,...,2] relates to the partial derivatives mentioned in the thesis, while b denotes the batch
        u_energy, u_elapse = initial_condition_gaussian(
            torch.from_numpy(vel),
            mode="energy_comp",
            res_padded=res,
        )

        # create and save velocity crop
        vel_crop = crop_center(vel, res, 2)  # crop center of the image if boundary condition == "absorbing"
        V[it] = np.repeat(vel[np.newaxis, :, :], n_snaps + 1, axis=0)  # save velocity image (n_snaps + 1) times in V

        # visualize velocity profile used for iterations
        # plt.axis("off")
        # plt.imshow(vel_crop)
        # plt.show()
        # plt.title(f"velocity profile for iteration {it}")

        # integrate dt_star (step size) n_snaps times
        for s in range(n_snaps+1):

            # change energy components to wave field representation
            u_elapse, ut_elapse = WaveSol_from_EnergyComponent_tensor(
                u_energy[:,0],
                u_energy[:,1],
                u_energy[:,2],
                torch.from_numpy(vel),
                f_delta_x,
                torch.sum(torch.sum(torch.sum(u_elapse)))
            )

            # visualize current wave field
            if visualize: 
                visualize_wavefield(
                    u_elapse,
                    ut_elapse,
                    vel,
                    f_delta_x,
                    it,
                    s
                )

            if boundary_condition == "absorbing":

                # cropping and save current snapshot in tensors
                u_elapse_crop, ut_elapse_crop = crop_center(u_elapse.squeeze(), res, 2),crop_center(ut_elapse.squeeze(), res, 2)
                Ux[it, s], Uy[it, s], Utc[it, s] = \
                    WaveEnergyComponentField_end_to_end(u_elapse_crop, ut_elapse_crop, vel_crop, f_delta_x)

            else:
                # save current snapshot in tensors
                Ux[it, s], Uy[it, s], Utc[it, s] = u_energy[0,0], u_energy[0,1], u_energy[0,2]
                
            u_phys[it, s] = u_elapse

            # itegration step (done for all iterations except for last one)
            if s < n_snaps + 1:
                # apply pseudo spectral solver (alternatively the velocity verlet solver)
                u_energy, u_elapse = one_iteration_pseudo_spectral_tensor(
                    torch.cat([u_energy, torch.from_numpy(vel).unsqueeze(dim=0).unsqueeze(dim=0)], dim=1), u_elapse)

    # save tensors in file, accessible through key-value queries
    np.savez(output_path, vel=V, Ux=Ux, Uy=Uy, Utc=Utc, u_phys=u_phys)

In [9]:
generate_data_end_to_end()

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
