Skip to content

Commit

Permalink
more mining stats
Browse files Browse the repository at this point in the history
  • Loading branch information
vird committed Sep 29, 2023
1 parent 7d10818 commit f56d094
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 24 deletions.
4 changes: 4 additions & 0 deletions apps/arweave/include/ar_mining.hrl
Expand Up @@ -7,6 +7,8 @@
cache_ref = not_set, %% not serialized
chunk1 = not_set, %% not serialized
chunk2 = not_set, %% not serialized
chunk1_store_id = not_set,
chunk2_store_id = not_set,
cm_diff = not_set, %% serialized. set to the difficulty used by the H1 miner
cm_lead_peer = not_set, %% not serialized. if set, this candidate came from another peer
h0 = not_set, %% serialized
Expand Down Expand Up @@ -49,4 +51,6 @@
partition_upper_bound = not_set
}).

-define(MINING_HASH_MAX_BUCKET, 256).

-endif.
2 changes: 1 addition & 1 deletion apps/arweave/src/ar_coordination.erl
Expand Up @@ -76,7 +76,7 @@ compute_h2(Peer, Candidate, H1List) ->
computed_h2(Candidate) ->
gen_server:cast(?MODULE, {computed_h2, Candidate}).

post_solution(Peer, Candidate) ->
post_solution(_Peer, Candidate) ->
ar_mining_server:prepare_and_post_solution(Candidate).

poll_loop() ->
Expand Down
81 changes: 80 additions & 1 deletion apps/arweave/src/ar_metrics.erl
Expand Up @@ -4,6 +4,7 @@

-include_lib("arweave/include/ar.hrl").
-include_lib("arweave/include/ar_pricing.hrl").
-include_lib("arweave/include/ar_mining.hrl").
-include_lib("arweave/include/ar_config.hrl").

%%%===================================================================
Expand Down Expand Up @@ -419,7 +420,85 @@ register(MetricsDir) ->
"Only set when debug=true."}]),
prometheus_gauge:new([{name, process_info},
{labels, [process, type]},
{help, "Sampling info about active processes. Only set when debug=true."}]).
{help, "Sampling info about active processes. Only set when debug=true."}]),

% Mining performance

% useful for: more accurate VDF time estimation, max hashrate per partition (not storage module)
prometheus_counter:new([
{name, mining_perf_vdf_step_count},
{
help,
"Count of vdf steps provided to mining process"
}
]),

DiffBucketList = lists:seq(0, ?MINING_HASH_MAX_BUCKET),
prometheus_histogram:new([
{name, mining_perf_hash_gt_2_pow_x_1chunk_count},
{buckets, DiffBucketList},
{
help,
"Count of hashes (solutions) found since launch which are >= 2**(bucket_index) (1-chunk solutions only)"
}
]),
prometheus_histogram:new([
{name, mining_perf_hash_gt_2_pow_x_2chunk_count},
{buckets, DiffBucketList},
{
help,
"Count of hashes (solutions) found since launch which are >= 2**(bucket_index) (2-chunk solutions only)"
}
]),

{ok, Config} = application:get_env(arweave, config),
lists:foreach(fun(StorageModule) ->
StoreID = ar_storage_module:id(StorageModule),
% NOTE. If you have more than 100k storage_modules, then it can hit limit of ~1M (default atom table size limit)

% scheduled reads == 200*mining_perf_vdf_step_count for 1-chunk but
% * ?RECALL_RANGE_SIZE is different with DEBUG (2 chunks)
% * Unknown for 2-chunk
% * Also weave size grows, so storage modules can be allocated, but not triggered until weave will hit threshold
prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_scheduled_read_1chunk_counter", [StoreID]))},
{help, "Scheduled read count watermark for storage_module (1st chunk in solution)"}
]),
prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_scheduled_read_2chunk_counter", [StoreID]))},
{help, "Scheduled read count watermark for storage_module (2nd chunk in solution)"}
]),

prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_successful_read_1chunk_counter", [StoreID]))},
{help, "Successful read count for storage_module (1st chunk in solution)"}
]),
prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_successful_read_2chunk_counter", [StoreID]))},
{help, "Successful read count for storage_module (2nd chunk in solution)"}
]),

prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID]))},
{help, "Missing read count for storage_module (1st chunk in solution)"}
]),
prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID]))},
{help, "Missing read count for storage_module (2nd chunk in solution)"}
]),

prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_hash_1chunk_counter", [StoreID]))},
{help, "Hash count for storage_module (1-chunk solutions only)"}
]),
prometheus_counter:new([
{name, list_to_atom(io_lib:format("~s_hash_2chunk_counter", [StoreID]))},
{help, "Hash count for storage_module (2-chunk solutions only)"}
]),

ok
end, Config#config.storage_modules),
ok.

%% @doc Store the given metric in a file.
store(Name) ->
Expand Down
50 changes: 36 additions & 14 deletions apps/arweave/src/ar_mining_io.erl
Expand Up @@ -199,7 +199,14 @@ io_thread(PartitionNumber, MiningAddress, StoreID, SessionRef) ->
io_thread(PartitionNumber, MiningAddress, StoreID, Ref);
{WhichChunk, {Candidate, RecallRangeStart}} ->
case ar_mining_server:is_session_valid(SessionRef, Candidate) of
true ->
true ->
case WhichChunk of
chunk1 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_scheduled_read_1chunk_counter", [StoreID])), ?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE);
chunk2 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_scheduled_read_2chunk_counter", [StoreID])), ?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE);
_ ->unreach
end,
read_range(WhichChunk, Candidate, RecallRangeStart, StoreID);
false ->
ok %% Clear the message queue of requests from outdated mining sessions
Expand Down Expand Up @@ -238,34 +245,49 @@ read_range(WhichChunk, Candidate, RangeStart, StoreID) ->
MiningAddress, StoreID, ar_intervals:new()),
ChunkOffsets = ar_chunk_storage:get_range(RangeStart, Size, StoreID),
ChunkOffsets2 = filter_by_packing(ChunkOffsets, Intervals, StoreID),
MissingCount = (?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE) - length(ChunkOffsets),
case WhichChunk of
chunk1 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID])), MissingCount);
chunk2 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID])), MissingCount);
_ ->unreach
end,
?LOG_DEBUG([{event, mining_debug_read_recall_range},
{chunk, WhichChunk},
{range_start, RangeStart},
{store_id, StoreID},
{found_chunks, length(ChunkOffsets)},
{found_chunks_with_required_packing, length(ChunkOffsets2)}]),
NonceMax = max(0, (Size div ?DATA_CHUNK_SIZE - 1)),
read_range(WhichChunk, Candidate, RangeStart, 0, NonceMax, ChunkOffsets2).
read_range(WhichChunk, Candidate, RangeStart, StoreID, 0, NonceMax, ChunkOffsets2).

read_range(_WhichChunk, _Candidate, _RangeStart, Nonce, NonceMax, _ChunkOffsets)
read_range(_WhichChunk, _Candidate, _RangeStart, _StoreID, Nonce, NonceMax, _ChunkOffsets)
when Nonce > NonceMax ->
ok;
read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, []) ->
ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate),
read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax, []);
read_range(WhichChunk, Candidate,RangeStart, Nonce, NonceMax, [{EndOffset, Chunk} | ChunkOffsets])
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, []) ->
ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate, StoreID),
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax, []);
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, [{EndOffset, Chunk} | ChunkOffsets])
%% Only 256 KiB chunks are supported at this point.
when RangeStart + Nonce * ?DATA_CHUNK_SIZE < EndOffset - ?DATA_CHUNK_SIZE ->
ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate),
read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax,
case WhichChunk of
chunk1 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID])));
chunk2 ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID])));
_ ->unreach
end,
ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate, StoreID),
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax,
[{EndOffset, Chunk} | ChunkOffsets]);
read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, [{EndOffset, _Chunk} | ChunkOffsets])
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, [{EndOffset, _Chunk} | ChunkOffsets])
when RangeStart + Nonce * ?DATA_CHUNK_SIZE >= EndOffset ->
read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, ChunkOffsets);
read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax,
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, ChunkOffsets);
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax,
[{_EndOffset, Chunk} | ChunkOffsets]) ->
ar_mining_server:recall_chunk(WhichChunk, Chunk, Nonce, Candidate),
read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax, ChunkOffsets).
ar_mining_server:recall_chunk(WhichChunk, Chunk, Nonce, Candidate, StoreID),
read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax, ChunkOffsets).

find_thread(PartitionNumber, MiningAddress, RangeEnd, RangeStart, Threads) ->
Keys = find_thread2(PartitionNumber, MiningAddress, maps:iterator(Threads)),
Expand Down
60 changes: 52 additions & 8 deletions apps/arweave/src/ar_mining_server.erl
Expand Up @@ -4,7 +4,7 @@

-behaviour(gen_server).

-export([start_link/0, start_mining/1, recall_chunk/4, computed_hash/4, set_difficulty/1,
-export([start_link/0, start_mining/1, recall_chunk/5, computed_hash/4, set_difficulty/1,
compute_h2_for_peer/2, prepare_and_post_solution/1, post_solution/1,
set_merkle_rebase_threshold/1, get_recall_bytes/4, is_session_valid/2]).
-export([pause/0, get_task_queue_len/0]).
Expand Down Expand Up @@ -52,13 +52,15 @@ start_mining(Args) ->
gen_server:cast(?MODULE, {start_mining, Args}).

%% @doc Callback from ar_mining_io when a chunk is read
recall_chunk(chunk1, Chunk, Nonce, Candidate) ->
recall_chunk(chunk1, Chunk, Nonce, Candidate, StoreID) ->
ar_mining_stats:increment_partition_stats(Candidate#mining_candidate.partition_number),
add_task(chunk1, Candidate#mining_candidate{ chunk1 = Chunk, nonce = Nonce });
recall_chunk(chunk2, Chunk, Nonce, Candidate) ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_successful_read_1chunk_counter", [StoreID]))),
add_task(chunk1, Candidate#mining_candidate{ chunk1 = Chunk, chunk1_store_id = StoreID, nonce = Nonce });
recall_chunk(chunk2, Chunk, Nonce, Candidate, StoreID) ->
ar_mining_stats:increment_partition_stats(Candidate#mining_candidate.partition_number),
add_task(chunk2, Candidate#mining_candidate{ chunk2 = Chunk, nonce = Nonce });
recall_chunk(skipped, undefined, Nonce, Candidate) ->
prometheus_counter:inc(list_to_atom(io_lib:format("~s_successful_read_2chunk_counter", [StoreID]))),
add_task(chunk2, Candidate#mining_candidate{ chunk2 = Chunk, chunk2_store_id = StoreID, nonce = Nonce });
recall_chunk(skipped, undefined, Nonce, Candidate, _StoreID) ->
update_chunk_cache_size(-1),
signal_cache_cleanup(Nonce, Candidate).

Expand Down Expand Up @@ -254,6 +256,7 @@ handle_info({event, nonce_limiter, {computed_output, Args}},
StepNumber = Session#vdf_session.step_number,
true = is_integer(StepNumber),
ar_mining_stats:increment_vdf_stats(),
prometheus_counter:inc(mining_perf_vdf_step_count),
#vdf_session{ seed = Seed, step_number = StepNumber } = Session,
Task = {computed_output, {SessionKey, Seed, StepNumber, Output, PartitionUpperBound}},
Q2 = gb_sets:insert({priority(nonce_limiter_computed_output, StepNumber), make_ref(),
Expand Down Expand Up @@ -383,8 +386,9 @@ hashing_thread(SessionRef) ->
{compute_h1, Candidate} ->
case ar_mining_server:is_session_valid(SessionRef, Candidate) of
true ->
#mining_candidate{ h0 = H0, nonce = Nonce, chunk1 = Chunk1 } = Candidate,
#mining_candidate{ h0 = H0, nonce = Nonce, chunk1 = Chunk1, chunk1_store_id = StoreID } = Candidate,
{H1, Preimage} = ar_block:compute_h1(H0, Nonce, Chunk1),
prometheus_counter:inc(list_to_atom(io_lib:format("~s_hash_1chunk_counter", [StoreID]))),
ar_mining_server:computed_hash(computed_h1, H1, Preimage, Candidate);
false ->
ok %% Clear the message queue of requests from outdated mining sessions
Expand All @@ -393,8 +397,9 @@ hashing_thread(SessionRef) ->
{compute_h2, Candidate} ->
case ar_mining_server:is_session_valid(SessionRef, Candidate) of
true ->
#mining_candidate{ h0 = H0, h1 = H1, chunk2 = Chunk2 } = Candidate,
#mining_candidate{ h0 = H0, h1 = H1, chunk2 = Chunk2, chunk2_store_id = StoreID } = Candidate,
{H2, Preimage} = ar_block:compute_h2(H1, Chunk2, H0),
prometheus_counter:inc(list_to_atom(io_lib:format("~s_hash_2chunk_counter", [StoreID]))),
ar_mining_server:computed_hash(computed_h2, H2, Preimage, Candidate);
false ->
ok %% Clear the message queue of requests from outdated mining sessions
Expand Down Expand Up @@ -600,6 +605,8 @@ handle_task({computed_h1, Candidate}, State) ->
#state{ session = Session, diff = Diff, hashing_threads = Threads } = State,
#mining_session{ chunk_cache = Map } = Session,
#mining_candidate{ h1 = H1, chunk1 = Chunk1 } = Candidate,
DiffBucket = min(hash_to_diff_bucket(H1), ?MINING_HASH_MAX_BUCKET),
prometheus_histogram:observe(mining_perf_hash_gt_2_pow_x_1chunk_count, DiffBucket),
case binary:decode_unsigned(H1, big) > Diff of
true ->
#state{ session = Session } = State,
Expand Down Expand Up @@ -658,6 +665,8 @@ handle_task({computed_h2, Candidate}, State) ->
nonce = Nonce, partition_number = PartitionNumber,
partition_upper_bound = PartitionUpperBound, cm_lead_peer = Peer
} = Candidate,
DiffBucket = min(hash_to_diff_bucket(H2), ?MINING_HASH_MAX_BUCKET),
prometheus_histogram:observe(mining_perf_hash_gt_2_pow_x_2chunk_count, DiffBucket),
case binary:decode_unsigned(H2, big) > get_difficulty(State, Candidate) of
true ->
case Peer of
Expand Down Expand Up @@ -1081,6 +1090,41 @@ update_chunk_cache_size(Delta) ->
nonce_max() ->
max(0, ((?RECALL_RANGE_SIZE) div ?DATA_CHUNK_SIZE - 1)).

% slight optimise (256-bit binary operations may be slower than 64-bit)
hash_to_diff_bucket(Hash) ->
<<A:64, B:64, C:64, D:64>> = Hash,
ResA = hash_to_diff_bucket(A, 63),
ResB = case ResA of
64 ->
64+hash_to_diff_bucket(B, 63);
_ ->
ResA
end,
ResC = case ResB of
128 ->
128+hash_to_diff_bucket(C, 63);
_ ->
ResB
end,
ResD = case ResC of
192 ->
192+hash_to_diff_bucket(D, 63);
_ ->
ResC
end,
ResD.

hash_to_diff_bucket(Hash, BitPosition) when BitPosition >= 0 ->
Mask = 1 bsl BitPosition,
case Hash band Mask of
0 ->
0;
_ ->
1 + hash_to_diff_bucket(Hash, BitPosition - 1)
end;
hash_to_diff_bucket(_, _) ->
0.

%%%===================================================================
%%% Public Test interface.
%%%===================================================================
Expand Down

0 comments on commit f56d094

Please sign in to comment.