diff --git a/benchmarks/closures.exs b/benchmarks/closures.exs index f282707..8910e60 100644 --- a/benchmarks/closures.exs +++ b/benchmarks/closures.exs @@ -16,6 +16,12 @@ # ./benchmarks/setup_luaport.sh # idempotent; patches + builds # MIX_ENV=benchmark mix run benchmarks/closures.exs # If luaport fails to start, the benchmark prints a notice and skips it. +# +# Run modes (see benchmarks/helpers.exs): +# default — quick mode (~4 s per Benchee.run) +# LUA_BENCH_MODE=full — long windows + memory_time, for publishable numbers + +Code.require_file("helpers.exs", __DIR__) Application.ensure_all_started(:luerl) @@ -83,9 +89,7 @@ Benchee.run( }, c_lua_benchmarks ), - time: 10, - warmup: 2, - memory_time: 1 + Bench.opts() ) c_lua_cleanup.() diff --git a/benchmarks/fibonacci.exs b/benchmarks/fibonacci.exs index 6ba154f..502f889 100644 --- a/benchmarks/fibonacci.exs +++ b/benchmarks/fibonacci.exs @@ -13,6 +13,8 @@ # MIX_ENV=benchmark mix run benchmarks/fibonacci.exs # If luaport fails to start, the benchmark prints a notice and skips it. +Code.require_file("helpers.exs", __DIR__) + Application.ensure_all_started(:luerl) fib_def = """ @@ -61,9 +63,7 @@ Benchee.run( }, c_lua_benchmarks ), - time: 10, - warmup: 2, - memory_time: 1 + Bench.opts() ) c_lua_cleanup.() diff --git a/benchmarks/helpers.exs b/benchmarks/helpers.exs new file mode 100644 index 0000000..eb719c9 --- /dev/null +++ b/benchmarks/helpers.exs @@ -0,0 +1,67 @@ +# Shared configuration for the benchmark scripts under `benchmarks/`. +# +# Each script `Code.require_file/2`s this file at the top so the harness +# stays consistent across workloads. There is one knob — the +# `LUA_BENCH_MODE` env var — which selects between two pre-canned +# Benchee profiles: +# +# * `quick` (default) — short windows for iteration during development. +# Each Benchee.run takes ~4 seconds. Memory measurement is off. +# Five workloads × 4 implementations ≈ 80 seconds wall clock. +# +# * `full` — longer windows + memory measurement, suitable for +# end-of-cycle definitive numbers and for the figures we paste into +# PR descriptions or ROADMAP.md. +# +# Usage: +# +# mix run benchmarks/fibonacci.exs # quick +# LUA_BENCH_MODE=full mix run benchmarks/fibonacci.exs # full +# mix lua.bench # quick across all +# LUA_BENCH_MODE=full mix lua.bench # full across all +# +# Quick mode is intended for "did my change move the needle?" loops. +# Full mode is the source of truth for any number we publish. + +defmodule Bench do + @moduledoc false + + @doc """ + Returns the Benchee options keyword list for the current run mode. + + Mode is selected via the `LUA_BENCH_MODE` environment variable. Any + value other than `"full"` is treated as quick mode. + """ + def opts(extra \\ []) do + base = + case System.get_env("LUA_BENCH_MODE") do + "full" -> [time: 10, warmup: 2, memory_time: 1] + _ -> [time: 3, warmup: 1, memory_time: 0] + end + + Keyword.merge(base, extra) + end + + @doc """ + Returns the n-size sweep used by the multi-input table benchmarks. + + Quick mode runs a single representative size to keep iteration cheap. + Full mode runs a sweep so we can see how a workload's perf curve + changes with input size. + """ + def table_inputs do + case System.get_env("LUA_BENCH_MODE") do + "full" -> [{"small (n=10)", 10}, {"medium (n=100)", 100}, {"large (n=1000)", 1000}] + _ -> [{"medium (n=100)", 100}] + end + end + + @doc """ + Convenience helper. Prints the current mode at the top of a script so + the run output is self-describing. + """ + def banner(name) do + mode = if System.get_env("LUA_BENCH_MODE") == "full", do: "full", else: "quick" + IO.puts("\n=== #{name} (mode: #{mode}) ===\n") + end +end diff --git a/benchmarks/oop.exs b/benchmarks/oop.exs index 25ac5a3..e6790e5 100644 --- a/benchmarks/oop.exs +++ b/benchmarks/oop.exs @@ -23,6 +23,8 @@ # MIX_ENV=benchmark mix run benchmarks/oop.exs # If luaport fails to start, the benchmark prints a notice and skips it. +Code.require_file("helpers.exs", __DIR__) + Application.ensure_all_started(:luerl) oop_def = """ @@ -94,9 +96,7 @@ Benchee.run( }, c_lua_benchmarks ), - time: 10, - warmup: 2, - memory_time: 1 + Bench.opts() ) c_lua_cleanup.() diff --git a/benchmarks/string_ops.exs b/benchmarks/string_ops.exs index cbdc3aa..50e6688 100644 --- a/benchmarks/string_ops.exs +++ b/benchmarks/string_ops.exs @@ -17,6 +17,8 @@ # MIX_ENV=benchmark mix run benchmarks/string_ops.exs # If luaport fails to start, the benchmark prints a notice and skips it. +Code.require_file("helpers.exs", __DIR__) + Application.ensure_all_started(:luerl) string_def = """ @@ -68,7 +70,7 @@ luerl_state = :luerl.init() {%{}, %{}, fn -> :ok end} end -IO.puts("\n=== String Concatenation via table.concat (n=100) ===\n") +Bench.banner("String Concatenation via table.concat (n=100)") Benchee.run( Map.merge( @@ -79,12 +81,10 @@ Benchee.run( }, c_lua_concat ), - time: 10, - warmup: 2, - memory_time: 1 + Bench.opts() ) -IO.puts("\n=== String Formatting via string.format (n=100) ===\n") +Bench.banner("String Formatting via string.format (n=100)") Benchee.run( Map.merge( @@ -95,9 +95,7 @@ Benchee.run( }, c_lua_format ), - time: 10, - warmup: 2, - memory_time: 1 + Bench.opts() ) c_lua_cleanup.() diff --git a/benchmarks/table_ops.exs b/benchmarks/table_ops.exs index dd93659..6dd3243 100644 --- a/benchmarks/table_ops.exs +++ b/benchmarks/table_ops.exs @@ -19,6 +19,8 @@ # MIX_ENV=benchmark mix run benchmarks/table_ops.exs # If luaport fails to start, the benchmark prints a notice and skips it. +Code.require_file("helpers.exs", __DIR__) + Application.ensure_all_started(:luerl) table_def = """ @@ -68,105 +70,83 @@ function run_table_map_reduce(n) end """ -n = 500 - -call_build = "return run_table_build(#{n})" -call_sort = "return run_table_sort(#{n})" -call_sum = "return run_table_sum(#{n})" -call_map_reduce = "return run_table_map_reduce(#{n})" - # --- This Lua implementation --- lua = Lua.new() {_, lua} = Lua.eval!(lua, table_def) -{build_chunk, _} = Lua.load_chunk!(lua, call_build) -{sort_chunk, _} = Lua.load_chunk!(lua, call_sort) -{sum_chunk, _} = Lua.load_chunk!(lua, call_sum) -{map_reduce_chunk, _} = Lua.load_chunk!(lua, call_map_reduce) + +# Pre-compile chunks per (operation, n) pair so the chunk path doesn't +# pay the compile cost during measurement. Inputs ship through Benchee's +# `inputs:` mechanism so all sizes share warmup/measurement state. +sizes = Bench.table_inputs() + +build_chunks = + Map.new(sizes, fn {label, n} -> + {chunk, _} = Lua.load_chunk!(lua, "return run_table_build(#{n})") + {label, {chunk, "return run_table_build(#{n})", n}} + end) + +sort_chunks = + Map.new(sizes, fn {label, n} -> + {chunk, _} = Lua.load_chunk!(lua, "return run_table_sort(#{n})") + {label, {chunk, "return run_table_sort(#{n})", n}} + end) + +sum_chunks = + Map.new(sizes, fn {label, n} -> + {chunk, _} = Lua.load_chunk!(lua, "return run_table_sum(#{n})") + {label, {chunk, "return run_table_sum(#{n})", n}} + end) + +map_reduce_chunks = + Map.new(sizes, fn {label, n} -> + {chunk, _} = Lua.load_chunk!(lua, "return run_table_map_reduce(#{n})") + {label, {chunk, "return run_table_map_reduce(#{n})", n}} + end) # --- Luerl --- luerl_state = :luerl.init() {:ok, _, luerl_state} = :luerl.do(table_def, luerl_state) # --- C Lua via luaport (optional) --- -{c_lua_build, c_lua_sort, c_lua_sum, c_lua_map_reduce, c_lua_cleanup} = +{c_lua_call, c_lua_cleanup} = case Application.ensure_all_started(:luaport) do {:ok, _} -> scripts_dir = Path.join(__DIR__, "scripts") {:ok, port_pid, _} = :luaport.spawn(:table_bench, to_charlist(scripts_dir)) :luaport.load(port_pid, table_def) - mk = fn func -> %{"C Lua (luaport)" => fn -> :luaport.call(port_pid, func, [n]) end} end - { - mk.(:run_table_build), - mk.(:run_table_sort), - mk.(:run_table_sum), - mk.(:run_table_map_reduce), + fn func, n -> :luaport.call(port_pid, func, [n]) end, fn -> :luaport.despawn(:table_bench) end } {:error, reason} -> IO.puts("luaport not available (#{inspect(reason)}) — skipping C Lua benchmarks") - empty = %{} - {empty, empty, empty, empty, fn -> :ok end} + {nil, fn -> :ok end} end -benchee_opts = [time: 10, warmup: 2, memory_time: 1] - -IO.puts("\n=== Table Build (n=#{n}) ===\n") - -Benchee.run( - Map.merge( - %{ - "lua (eval)" => fn -> Lua.eval!(lua, call_build) end, - "lua (chunk)" => fn -> Lua.eval!(lua, build_chunk) end, - "luerl" => fn -> :luerl.do(call_build, luerl_state) end - }, - c_lua_build - ), - benchee_opts -) - -IO.puts("\n=== Table Sort (n=#{n}) ===\n") - -Benchee.run( - Map.merge( - %{ - "lua (eval)" => fn -> Lua.eval!(lua, call_sort) end, - "lua (chunk)" => fn -> Lua.eval!(lua, sort_chunk) end, - "luerl" => fn -> :luerl.do(call_sort, luerl_state) end - }, - c_lua_sort - ), - benchee_opts -) - -IO.puts("\n=== Table Iterate/Sum (n=#{n}) ===\n") - -Benchee.run( - Map.merge( - %{ - "lua (eval)" => fn -> Lua.eval!(lua, call_sum) end, - "lua (chunk)" => fn -> Lua.eval!(lua, sum_chunk) end, - "luerl" => fn -> :luerl.do(call_sum, luerl_state) end - }, - c_lua_sum - ), - benchee_opts -) - -IO.puts("\n=== Table Map + Reduce (n=#{n}) ===\n") - -Benchee.run( - Map.merge( - %{ - "lua (eval)" => fn -> Lua.eval!(lua, call_map_reduce) end, - "lua (chunk)" => fn -> Lua.eval!(lua, map_reduce_chunk) end, - "luerl" => fn -> :luerl.do(call_map_reduce, luerl_state) end - }, - c_lua_map_reduce - ), - benchee_opts -) +bench = fn name, chunks_map, lua_func -> + Bench.banner(name) + + jobs = %{ + "lua (eval)" => fn {_chunk, call_str, _n} -> Lua.eval!(lua, call_str) end, + "lua (chunk)" => fn {chunk, _call_str, _n} -> Lua.eval!(lua, chunk) end, + "luerl" => fn {_chunk, call_str, _n} -> :luerl.do(call_str, luerl_state) end + } + + jobs = + if c_lua_call do + Map.put(jobs, "C Lua (luaport)", fn {_chunk, _call_str, n} -> c_lua_call.(lua_func, n) end) + else + jobs + end + + Benchee.run(jobs, [{:inputs, chunks_map} | Bench.opts()]) +end + +bench.("Table Build", build_chunks, :run_table_build) +bench.("Table Sort", sort_chunks, :run_table_sort) +bench.("Table Iterate/Sum", sum_chunks, :run_table_sum) +bench.("Table Map + Reduce", map_reduce_chunks, :run_table_map_reduce) c_lua_cleanup.() diff --git a/tasks/lua.bench.ex b/tasks/lua.bench.ex index 3a7f894..20a618a 100644 --- a/tasks/lua.bench.ex +++ b/tasks/lua.bench.ex @@ -22,11 +22,12 @@ defmodule Mix.Tasks.Lua.Bench do ## Usage - mix lua.bench # run all workloads + mix lua.bench # run all workloads (quick mode) mix lua.bench --workload fibonacci # run one mix lua.bench --list # print available workloads mix lua.bench --workload fibonacci --workload closures # run several + LUA_BENCH_MODE=full mix lua.bench # long runs + memory_time + n-sweep ## Options @@ -35,6 +36,20 @@ defmodule Mix.Tasks.Lua.Bench do every workload is run. * `--list` — Print the available workloads and exit. + ## Run modes + + The benchmark scripts read the `LUA_BENCH_MODE` environment variable + (see `benchmarks/helpers.exs`): + + * **default (`quick`)** — short Benchee windows (1 s warmup, 3 s + measurement, memory_time off) for fast development iteration. + Each workload takes ~16 s; the full suite is ~80 s wall clock. + * **`full`** — long windows (2 s warmup, 10 s measurement, memory + time on) plus a sweep of multiple input sizes for the table + workloads. Use this for any numbers you publish (PR descriptions, + ROADMAP.md). Each workload takes a minute or two; the full suite + runs ~15+ minutes. + ## Notes This task shells out to `mix run` in the `:benchmark` env so the