# Performance Benchmarks of `rhs!` Functions on CPU and GPU

To start, we need to download any packages that are not included in the Project.toml file of TrixiCUDA.jl.

In [None]:
# Add extra packages (not included in TrixiCUDA.jl project.toml file)
import Pkg
Pkg.add("Plots")
Pkg.add("BenchmarkTools")

# See colors https://github.com/JuliaGraphics/Colors.jl/blob/master/src/names_data.jl
using Plots
using BenchmarkTools

We also need to create a new subdirectory under benchmark to store the plots (optional).

In [None]:
# Create a directory for plots
mkdir("plots")

## Way 1: Run and plot each benchmark example individually
This approach allows you to see the benchmarking and plotting process for each example clearly, one by one. 

Note that since the warm up step is already included in each benchmark file, we only need to include each file once. We extract the mean, median, and standard deviation from the benchmark results, but only plot the mean and median, as the standard deviation bars often overlap and offer little extra information.

We also include degrees of freedom (DOFs) in the benchmark plots.



In [None]:
# Run benchmarks
include("advection_basic_1d.jl")

advec_1d_cpu = cpu_trial
advec_1d_gpu = gpu_trial
advec_1d_dofs = dofs

include("advection_basic_2d.jl")

advec_2d_cpu = cpu_trial
advec_2d_gpu = gpu_trial
advec_2d_dofs = dofs

include("advection_basic_3d.jl") 

advec_3d_cpu = cpu_trial
advec_3d_gpu = gpu_trial
advec_3d_dofs = dofs

# Medians
cpu_median_vals = [
    median(advec_1d_cpu.times) * 10^-3,
    median(advec_2d_cpu.times) * 10^-3,
    median(advec_3d_cpu.times) * 10^-3
]
gpu_median_vals = [
    median(advec_1d_gpu.times) * 10^-3,
    median(advec_2d_gpu.times) * 10^-3,
    median(advec_3d_gpu.times) * 10^-3
]
# Means
cpu_mean_vals = [
    mean(advec_1d_cpu.times) * 10^-3,
    mean(advec_2d_cpu.times) * 10^-3,
    mean(advec_3d_cpu.times) * 10^-3
]
gpu_mean_vals = [
    mean(advec_1d_gpu.times) * 10^-3,
    mean(advec_2d_gpu.times) * 10^-3,
    mean(advec_3d_gpu.times) * 10^-3
]
# Standard deviations
# cpu_stdev_vals = [
#     sqrt(var(advec_1d_cpu.times) * 10^-6),
#     sqrt(var(advec_2d_cpu.times) * 10^-6),
#     sqrt(var(advec_3d_cpu.times) * 10^-6)
# ]
# gpu_stdev_vals = [
#     sqrt(var(advec_1d_gpu.times) * 10^-6),
#     sqrt(var(advec_2d_gpu.times) * 10^-6),
#     sqrt(var(advec_3d_gpu.times) * 10^-6)
# ]

# DOFs (with base log10)
log_dofs_vals = [
    log10(advec_1d_dofs),
    log10(advec_2d_dofs),
    log10(advec_3d_dofs)
]

dims = ["1D", "2D", "3D"]

# Plot DOFs
plt = plot(dims, log_dofs_vals;
    seriestype=:line,
    fillrange=0,
    fillalpha=0.2,
    linewidth=0,
    label="log₁₀(DOFs)",
    ylabel="log₁₀(DOFs)",
    xlabel="Dimension",
    legend=:bottomright,
    title="Linear Advection Equation (Basic)",
    size=(500, 500)
)

plt2 = twinx(plt)
# Mean with deviation - CPU
plot!(plt2, dims, cpu_mean_vals;
    # yerror=cpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time CPU",
    ylabel="Time (μs)",
    legend=:topleft,
    linewidth=1.5,
    linestyle=:solid,
    color=:darkorange2
)
# Mean mith deviation - GPU
plot!(plt2, dims, gpu_mean_vals;
    # yerror=gpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time GPU",
    linewidth=1.5,
    linestyle=:solid,
    color=:green
)
# Median - CPU
plot!(plt2, dims, cpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time CPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:orange
)
# Median - GPU
plot!(plt2, dims, gpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time GPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:yellowgreen
)

# Save figure
savefig(plt, "./plots/plot_advec.png")

In [None]:
# Run benchmarks
include("advection_mortar_2d.jl")   

advecmtr_2d_cpu = cpu_trial
advecmtr_2d_gpu = gpu_trial
advecmtr_2d_dofs = dofs

include("advection_mortar_3d.jl")

advecmtr_3d_cpu = cpu_trial
advecmtr_3d_gpu = gpu_trial
advecmtr_3d_dofs = dofs

# Medians
cpu_median_vals = [
    median(advecmtr_2d_cpu.times) * 10^-3,
    median(advecmtr_3d_cpu.times) * 10^-3
]
gpu_median_vals = [
    median(advecmtr_2d_gpu.times) * 10^-3,
    median(advecmtr_3d_gpu.times) * 10^-3
]
# Means
cpu_mean_vals = [
    mean(advecmtr_2d_cpu.times) * 10^-3,
    mean(advecmtr_3d_cpu.times) * 10^-3
]
gpu_mean_vals = [
    mean(advecmtr_2d_gpu.times) * 10^-3,
    mean(advecmtr_3d_gpu.times) * 10^-3
]
# Standard deviations
# cpu_stdev_vals = [
#     sqrt(var(advecmtr_2d_cpu.times) * 10^-6),
#     sqrt(var(advecmtr_3d_cpu.times) * 10^-6)
# ]
# gpu_stdev_vals = [
#     sqrt(var(advecmtr_2d_gpu.times) * 10^-6),
#     sqrt(var(advecmtr_3d_gpu.times) * 10^-6)
# ]

# DOFs (with base log10)
log_dofs_vals = [
    log10(advecmtr_2d_dofs),
    log10(advecmtr_3d_dofs)
]

dims = ["2D", "3D"]

# Plot DOFs
plt = plot(dims, log_dofs_vals;
    seriestype=:line,
    fillrange=0,
    fillalpha=0.2,
    linewidth=0,
    label="log₁₀(DOFs)",
    ylabel="log₁₀(DOFs)",
    xlabel="Dimension",
    legend=:bottomright,
    title="Linear Advection Equation (Mortar)",
    size=(500, 500),
    xlims=(-0.1, 2.1)
)

plt2 = twinx(plt)
# Mean with deviation - CPU
plot!(plt2, dims, cpu_mean_vals;
    # yerror=cpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time CPU",
    ylabel="Time (μs)",
    legend=:topleft,
    linewidth=1.5,
    linestyle=:solid,
    color=:darkorange2,
    xlims=(-0.1, 2.1)
)
# Mean mith deviation - GPU
plot!(plt2, dims, gpu_mean_vals;
    # yerror=gpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time GPU",
    linewidth=1.5,
    linestyle=:solid,
    color=:green
)
# Median - CPU
plot!(plt2, dims, cpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time CPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:orange
)
# Median - GPU
plot!(plt2, dims, gpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time GPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:yellowgreen
)

# Save figure
savefig(plt, "./plots/plot_advecmtr.png")

In [None]:
# Run benchmarks
include("euler_shock_1d.jl")

eulersk_1d_cpu = cpu_trial
eulersk_1d_gpu = gpu_trial
eulersk_1d_dofs = dofs

include("euler_shock_2d.jl") 

eulersk_2d_cpu = cpu_trial
eulersk_2d_gpu = gpu_trial
eulersk_2d_dofs = dofs

include("euler_shock_3d.jl")

eulersk_3d_cpu = cpu_trial
eulersk_3d_gpu = gpu_trial
eulersk_3d_dofs = dofs

# Medians
cpu_median_vals = [
    median(eulersk_1d_cpu.times) * 10^-3,
    median(eulersk_2d_cpu.times) * 10^-3,
    median(eulersk_3d_cpu.times) * 10^-3
]
gpu_median_vals = [
    median(eulersk_1d_gpu.times) * 10^-3,
    median(eulersk_2d_gpu.times) * 10^-3,
    median(eulersk_3d_gpu.times) * 10^-3
]
# Means
cpu_mean_vals = [
    mean(eulersk_1d_cpu.times) * 10^-3,
    mean(eulersk_2d_cpu.times) * 10^-3,
    mean(eulersk_3d_cpu.times) * 10^-3
]
gpu_mean_vals = [
    mean(eulersk_1d_gpu.times) * 10^-3,
    mean(eulersk_2d_gpu.times) * 10^-3,
    mean(eulersk_3d_gpu.times) * 10^-3
]
# Standard deviations
# cpu_stdev_vals = [
#     sqrt(var(eulersk_1d_cpu.times) * 10^-6),
#     sqrt(var(eulersk_2d_cpu.times) * 10^-6),
#     sqrt(var(eulersk_3d_cpu.times) * 10^-6)
# ]
# gpu_stdev_vals = [
#     sqrt(var(eulersk_1d_gpu.times) * 10^-6),
#     sqrt(var(eulersk_2d_gpu.times) * 10^-6),
#     sqrt(var(eulersk_3d_gpu.times) * 10^-6)
# ]

# DOFs (with base log10)
log_dofs_vals = [
    log10(eulersk_1d_dofs),
    log10(eulersk_2d_dofs),
    log10(eulersk_3d_dofs)
]

dims = ["1D", "2D", "3D"]

# Plot DOFs
plt = plot(dims, log_dofs_vals;
    seriestype=:line,
    fillrange=0,
    fillalpha=0.2,
    linewidth=0,
    label="log₁₀(DOFs)",
    ylabel="log₁₀(DOFs)",
    xlabel="Dimension",
    legend=:bottomright,
    title="Euler Equations (Shock Capturing)",
    size=(500, 500)
)

plt2 = twinx(plt)
# Mean with deviation - CPU
plot!(plt2, dims, cpu_mean_vals;
    # yerror=cpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time CPU",
    ylabel="Time (μs)",
    legend=:topleft,
    linewidth=1.5,
    linestyle=:solid,
    color=:darkorange2
)
# Mean mith deviation - GPU
plot!(plt2, dims, gpu_mean_vals;
    # yerror=gpu_stdev_vals,
    seriestype=:line,
    marker=:circle,
    label="Mean time GPU",
    linewidth=1.5,
    linestyle=:solid,
    color=:green
)
# Median - CPU
plot!(plt2, dims, cpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time CPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:orange
)
# Median - GPU
plot!(plt2, dims, gpu_median_vals;
    marker=:diamond,
    seriestype=:line,
    label="Median time GPU",
    linewidth=1.5,
    linestyle=:dash,
    color=:yellowgreen
)

savefig(plt, "./plots/plot_eulersk.png")

Only three examples are shown above; however, you can plot additional benchmarks using the examples provided in the `benchmark` directory.

## Way 2: Benchmark and plot all examples in batch, in a single run
In this approach, all benchmarks are run in a single session, making the process faster and more convenient than running each benchmark individually.

Again, we plot the mean and median times for both CPU and GPU, and include the degrees of freedom (DOFs) in the benchmark plots.

In [None]:
# Define examples
examples = [
    # ("Linear Advection Equation (Basic)", [
    #     "advection_basic_1d.jl",
    #     "advection_basic_2d.jl",
    #     "advection_basic_3d.jl"
    # ]),
    # ("Linear Advection Equation (Mortar)", [
    #     "advection_mortar_2d.jl",
    #     "advection_mortar_3d.jl"
    # ]),
    # ("Euler Equations (Entropy Conservative)", [
    #     "euler_ec_1d.jl",
    #     "euler_ec_2d.jl",
    #     "euler_ec_3d.jl"
    # ]),
    # ("Euler Equations (Shock Capturing)", [
    #     "euler_shock_1d.jl",
    #     "euler_shock_2d.jl",
    #     "euler_shock_3d.jl"
    # ]),
    ("MHD Equations (Entropy Conservative)", [
        "mhd_ec_1d.jl",
        "mhd_ec_2d.jl",
        "mhd_ec_3d.jl"
    ]),
    ("MHD Equations (Alfven Wave)", [
        "mhd_alfven_wave_1d.jl",
        "mhd_alfven_wave_2d.jl",
        "mhd_alfven_wave_3d.jl"
    ]),
    # ("MHD Equations (Alfven Wave Mortar)", [
    #     "mhd_alfven_wave_mortar_2d.jl",
    #     "mhd_alfven_wave_mortar_3d.jl"
    # ]),
    # ("Hyperbolic Diffusion (Non-periodic)", [
    #     "hypdiff_nonperiodic_1d.jl",
    #     "hypdiff_nonperiodic_2d.jl",
    #     "hypdiff_nonperiodic_3d.jl"
    # ]),
    # ("Shallow Water (Entropy Conservative)", [
    #     "shallowwater_ec_1d.jl",
    #     "shallowwater_ec_2d.jl",
    # ]),
    # ("Shallow water (Source Terms)", [
    #     "shallowwater_source_terms_1d.jl",
    #     "shallowwater_source_terms_2d.jl",
    # ])
]

# Define dimensions
dims = ["1D", "2D", "3D"]
# dims = ["1D", "2D"]
# dims = ["2D", "3D"]

# Initialize arrays
cpu_medians = zeros(length(examples), length(dims))
gpu_medians = zeros(length(examples), length(dims))
cpu_means = zeros(length(examples), length(dims))
gpu_means = zeros(length(examples), length(dims))
# cpu_stdevs = zeros(length(examples), length(dims))
# gpu_stdevs = zeros(length(examples), length(dims))

# Add DOFs
dofs_array = zeros(length(examples), length(dims))

# Loop over examples
for (i, (name, scripts)) in enumerate(examples)
    for (j, script) in enumerate(scripts)
        include(script)

        # Convert to μs
        cpu_medians[i, j] = median(cpu_trial.times) * 10^-3
        gpu_medians[i, j] = median(gpu_trial.times) * 10^-3
        cpu_means[i, j] = mean(cpu_trial.times) * 10^-3
        gpu_means[i, j] = mean(gpu_trial.times) * 10^-3
        # cpu_stdevs[i, j] = sqrt(var(cpu_trial.times) * 10^-6)
        # gpu_stdevs[i, j] = sqrt(var(gpu_trial.times) * 10^-6)

        # DOFs
        dofs_array[i, j] = log10(dofs)
    end

    # Extract values
    cpu_mean_vals = cpu_means[i, :]
    gpu_mean_vals = gpu_means[i, :]
    cpu_median_vals = cpu_medians[i, :]
    gpu_median_vals = gpu_medians[i, :]
    # cpu_stdev_vals = cpu_stdevs[i, :]
    # gpu_stdev_vals = gpu_stdevs[i, :]

    # Extract DOFs
    log_dofs_vals = dofs_array[i, :]

    # Plot DOFs
    plt = plot(dims, log_dofs_vals;
        seriestype=:line,
        fillrange=0,
        fillalpha=0.2,
        linewidth=0,
        label="log₁₀(DOFs)",
        ylabel="log₁₀(DOFs)",
        xlabel="Dimension",
        legend=:bottomright,
        title=name,
        size=(500, 500),
        # xlims=(-0.1, 2.1) # for 1D and 2D, or 2D and 3D only
    )

    plt2 = twinx(plt)
    # Plot mean with deviation - CPU
    plot!(plt2, dims, cpu_mean_vals;
        # yerror=cpu_stdev_vals,
        seriestype=:line,
        marker=:circle,
        label="Mean time CPU",
        ylabel="Time (μs)",
        legend=:topleft,
        linewidth=1.5,
        linestyle=:solid,
        color=:darkorange2,
        # xlims=(-0.1, 2.1) # for 1D and 2D, or 2D and 3D only
    )
    # Plot mean mith deviation - GPU
    plot!(plt2, dims, gpu_mean_vals;
        # yerror=gpu_stdev_vals,
        seriestype=:line,
        marker=:circle,
        label="Mean time GPU",
        linewidth=1.5,
        linestyle=:solid,
        color=:green
    )
    # Plot median - CPU
    plot!(plt2, dims, cpu_median_vals;
        marker=:diamond,
        seriestype=:line,
        label="Median time CPU",
        linewidth=1.5,
        linestyle=:dash,
        color=:orange
    )
    # Plot median - GPU
    plot!(plt2, dims, gpu_median_vals;
        marker=:diamond,
        seriestype=:line,
        label="Median time GPU",
        linewidth=1.5,
        linestyle=:dash,
        color=:yellowgreen
    )

    # Save figures
    fname = "plots/" * replace(lowercase(name), " " => "_", 
                                                "(" => "", 
                                                ")" => "", 
                                                "." => "", 
                                                "," => "") * ".png"
    savefig(plt, fname)
    println("Saved: ", fname)
end


All benchmark examples in the `benchmark` directory are listed above, but you can also define your own examples to run your benchmarks.