more mining stats

virdpool · Sep 29, 2023 · f56d094 · f56d094
1 parent 7d10818
commit f56d094
Show file tree

Hide file tree

Showing 5 changed files with 173 additions and 24 deletions.
diff --git a/apps/arweave/include/ar_mining.hrl b/apps/arweave/include/ar_mining.hrl
@@ -7,6 +7,8 @@
 	cache_ref = not_set, %% not serialized
 	chunk1 = not_set, %% not serialized
 	chunk2 = not_set, %% not serialized
+	chunk1_store_id = not_set,
+	chunk2_store_id = not_set,
 	cm_diff = not_set, %% serialized. set to the difficulty used by the H1 miner
 	cm_lead_peer = not_set, %% not serialized. if set, this candidate came from another peer
 	h0 = not_set, %% serialized
@@ -49,4 +51,6 @@
 	partition_upper_bound = not_set
 }).
 
+-define(MINING_HASH_MAX_BUCKET, 256).
+
 -endif.
diff --git a/apps/arweave/src/ar_coordination.erl b/apps/arweave/src/ar_coordination.erl
@@ -76,7 +76,7 @@ compute_h2(Peer, Candidate, H1List) ->
 computed_h2(Candidate) ->
 	gen_server:cast(?MODULE, {computed_h2, Candidate}).
 
-post_solution(Peer, Candidate) ->
+post_solution(_Peer, Candidate) ->
 	ar_mining_server:prepare_and_post_solution(Candidate).
 
 poll_loop() ->

diff --git a/apps/arweave/src/ar_metrics.erl b/apps/arweave/src/ar_metrics.erl
@@ -4,6 +4,7 @@
 
 -include_lib("arweave/include/ar.hrl").
 -include_lib("arweave/include/ar_pricing.hrl").
+-include_lib("arweave/include/ar_mining.hrl").
 -include_lib("arweave/include/ar_config.hrl").
 
 %%%===================================================================
@@ -419,7 +420,85 @@ register(MetricsDir) ->
 					"Only set when debug=true."}]),
 	prometheus_gauge:new([{name, process_info},
 			{labels, [process, type]},
-			{help, "Sampling info about active processes. Only set when debug=true."}]).
+			{help, "Sampling info about active processes. Only set when debug=true."}]),
+
+	% Mining performance
+
+	% useful for: more accurate VDF time estimation, max hashrate per partition (not storage module)
+	prometheus_counter:new([
+		{name, mining_perf_vdf_step_count},
+		{
+			help,
+			"Count of vdf steps provided to mining process"
+		}
+	]),
+
+	DiffBucketList = lists:seq(0, ?MINING_HASH_MAX_BUCKET),
+	prometheus_histogram:new([
+		{name, mining_perf_hash_gt_2_pow_x_1chunk_count},
+		{buckets, DiffBucketList},
+		{
+			help,
+			"Count of hashes (solutions) found since launch which are >= 2**(bucket_index) (1-chunk solutions only)"
+		}
+	]),
+	prometheus_histogram:new([
+		{name, mining_perf_hash_gt_2_pow_x_2chunk_count},
+		{buckets, DiffBucketList},
+		{
+			help,
+			"Count of hashes (solutions) found since launch which are >= 2**(bucket_index) (2-chunk solutions only)"
+		}
+	]),
+
+	{ok, Config} = application:get_env(arweave, config),
+	lists:foreach(fun(StorageModule) ->
+		StoreID = ar_storage_module:id(StorageModule),
+		% NOTE. If you have more than 100k storage_modules, then it can hit limit of ~1M (default atom table size limit)
+
+		% scheduled reads == 200*mining_perf_vdf_step_count for 1-chunk but
+		% * ?RECALL_RANGE_SIZE is different with DEBUG (2 chunks)
+		% * Unknown for 2-chunk
+		% * Also weave size grows, so storage modules can be allocated, but not triggered until weave will hit threshold
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_scheduled_read_1chunk_counter", [StoreID]))},
+			{help, "Scheduled read count watermark for storage_module (1st chunk in solution)"}
+		]),
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_scheduled_read_2chunk_counter", [StoreID]))},
+			{help, "Scheduled read count watermark for storage_module (2nd chunk in solution)"}
+		]),
+
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_successful_read_1chunk_counter", [StoreID]))},
+			{help, "Successful read count for storage_module (1st chunk in solution)"}
+		]),
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_successful_read_2chunk_counter", [StoreID]))},
+			{help, "Successful read count for storage_module (2nd chunk in solution)"}
+		]),
+
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID]))},
+			{help, "Missing read count for storage_module (1st chunk in solution)"}
+		]),
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID]))},
+			{help, "Missing read count for storage_module (2nd chunk in solution)"}
+		]),
+
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_hash_1chunk_counter", [StoreID]))},
+			{help, "Hash count for storage_module (1-chunk solutions only)"}
+		]),
+		prometheus_counter:new([
+			{name, list_to_atom(io_lib:format("~s_hash_2chunk_counter", [StoreID]))},
+			{help, "Hash count for storage_module (2-chunk solutions only)"}
+		]),
+
+		ok
+	end, Config#config.storage_modules),
+	ok.
 
 %% @doc Store the given metric in a file.
 store(Name) ->

diff --git a/apps/arweave/src/ar_mining_io.erl b/apps/arweave/src/ar_mining_io.erl
@@ -199,7 +199,14 @@ io_thread(PartitionNumber, MiningAddress, StoreID, SessionRef) ->
 			io_thread(PartitionNumber, MiningAddress, StoreID, Ref);
 		{WhichChunk, {Candidate, RecallRangeStart}} ->
 			case ar_mining_server:is_session_valid(SessionRef, Candidate) of
-				true -> 
+				true ->
+					case WhichChunk of
+						chunk1 ->
+							prometheus_counter:inc(list_to_atom(io_lib:format("~s_scheduled_read_1chunk_counter", [StoreID])), ?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE);
+						chunk2 ->
+							prometheus_counter:inc(list_to_atom(io_lib:format("~s_scheduled_read_2chunk_counter", [StoreID])), ?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE);
+						_ ->unreach
+					end,
 					read_range(WhichChunk, Candidate, RecallRangeStart, StoreID);
 				false -> 
 					ok %% Clear the message queue of requests from outdated mining sessions
@@ -238,34 +245,49 @@ read_range(WhichChunk, Candidate, RangeStart, StoreID) ->
 			MiningAddress, StoreID, ar_intervals:new()),
 	ChunkOffsets = ar_chunk_storage:get_range(RangeStart, Size, StoreID),
 	ChunkOffsets2 = filter_by_packing(ChunkOffsets, Intervals, StoreID),
+	MissingCount = (?RECALL_RANGE_SIZE / ?DATA_CHUNK_SIZE) - length(ChunkOffsets),
+	case WhichChunk of
+		chunk1 ->
+			prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID])), MissingCount);
+		chunk2 ->
+			prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID])), MissingCount);
+		_ ->unreach
+	end,
 	?LOG_DEBUG([{event, mining_debug_read_recall_range},
 			{chunk, WhichChunk},
 			{range_start, RangeStart},
 			{store_id, StoreID},
 			{found_chunks, length(ChunkOffsets)},
 			{found_chunks_with_required_packing, length(ChunkOffsets2)}]),
 	NonceMax = max(0, (Size div ?DATA_CHUNK_SIZE - 1)),
-	read_range(WhichChunk, Candidate, RangeStart, 0, NonceMax, ChunkOffsets2).
+	read_range(WhichChunk, Candidate, RangeStart, StoreID, 0, NonceMax, ChunkOffsets2).
 
-read_range(_WhichChunk, _Candidate, _RangeStart, Nonce, NonceMax, _ChunkOffsets)
+read_range(_WhichChunk, _Candidate, _RangeStart, _StoreID, Nonce, NonceMax, _ChunkOffsets)
 		when Nonce > NonceMax ->
 	ok;
-read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, []) ->
-	ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate),
-	read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax, []);
-read_range(WhichChunk, Candidate,RangeStart, Nonce, NonceMax, [{EndOffset, Chunk} | ChunkOffsets])
+read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, []) ->
+	ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate, StoreID),
+	read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax, []);
+read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, [{EndOffset, Chunk} | ChunkOffsets])
 		%% Only 256 KiB chunks are supported at this point.
 		when RangeStart + Nonce * ?DATA_CHUNK_SIZE < EndOffset - ?DATA_CHUNK_SIZE ->
-	ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate),
-	read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax,
+	case WhichChunk of
+		chunk1 ->
+			prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_1chunk_counter", [StoreID])));
+		chunk2 ->
+			prometheus_counter:inc(list_to_atom(io_lib:format("~s_missing_read_2chunk_counter", [StoreID])));
+		_ ->unreach
+	end,
+	ar_mining_server:recall_chunk(skipped, undefined, Nonce, Candidate, StoreID),
+	read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax,
 		[{EndOffset, Chunk} | ChunkOffsets]);
-read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, [{EndOffset, _Chunk} | ChunkOffsets])
+read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, [{EndOffset, _Chunk} | ChunkOffsets])
 		when RangeStart + Nonce * ?DATA_CHUNK_SIZE >= EndOffset ->
-	read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax, ChunkOffsets);
-read_range(WhichChunk, Candidate, RangeStart, Nonce, NonceMax,
+	read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax, ChunkOffsets);
+read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce, NonceMax,
 		[{_EndOffset, Chunk} | ChunkOffsets]) ->
-	ar_mining_server:recall_chunk(WhichChunk, Chunk, Nonce, Candidate),
-	read_range(WhichChunk, Candidate, RangeStart, Nonce + 1, NonceMax, ChunkOffsets).
+	ar_mining_server:recall_chunk(WhichChunk, Chunk, Nonce, Candidate, StoreID),
+	read_range(WhichChunk, Candidate, RangeStart, StoreID, Nonce + 1, NonceMax, ChunkOffsets).
 
 find_thread(PartitionNumber, MiningAddress, RangeEnd, RangeStart, Threads) ->
 	Keys = find_thread2(PartitionNumber, MiningAddress, maps:iterator(Threads)),

diff --git a/apps/arweave/src/ar_mining_server.erl b/apps/arweave/src/ar_mining_server.erl
@@ -4,7 +4,7 @@
 
 -behaviour(gen_server).
 
--export([start_link/0, start_mining/1, recall_chunk/4, computed_hash/4, set_difficulty/1,
+-export([start_link/0, start_mining/1, recall_chunk/5, computed_hash/4, set_difficulty/1,
 		compute_h2_for_peer/2, prepare_and_post_solution/1, post_solution/1,
 		set_merkle_rebase_threshold/1, get_recall_bytes/4, is_session_valid/2]).
 -export([pause/0, get_task_queue_len/0]).
@@ -52,13 +52,15 @@ start_mining(Args) ->
 	gen_server:cast(?MODULE, {start_mining, Args}).
 
 %% @doc Callback from ar_mining_io when a chunk is read
-recall_chunk(chunk1, Chunk, Nonce, Candidate) ->
+recall_chunk(chunk1, Chunk, Nonce, Candidate, StoreID) ->
 	ar_mining_stats:increment_partition_stats(Candidate#mining_candidate.partition_number),
-	add_task(chunk1, Candidate#mining_candidate{ chunk1 = Chunk, nonce = Nonce });
-recall_chunk(chunk2, Chunk, Nonce, Candidate) ->
+	prometheus_counter:inc(list_to_atom(io_lib:format("~s_successful_read_1chunk_counter", [StoreID]))),
+	add_task(chunk1, Candidate#mining_candidate{ chunk1 = Chunk, chunk1_store_id = StoreID, nonce = Nonce });
+recall_chunk(chunk2, Chunk, Nonce, Candidate, StoreID) ->
 	ar_mining_stats:increment_partition_stats(Candidate#mining_candidate.partition_number),
-	add_task(chunk2, Candidate#mining_candidate{ chunk2 = Chunk, nonce = Nonce });
-recall_chunk(skipped, undefined, Nonce, Candidate) ->
+	prometheus_counter:inc(list_to_atom(io_lib:format("~s_successful_read_2chunk_counter", [StoreID]))),
+	add_task(chunk2, Candidate#mining_candidate{ chunk2 = Chunk, chunk2_store_id = StoreID, nonce = Nonce });
+recall_chunk(skipped, undefined, Nonce, Candidate, _StoreID) ->
 	update_chunk_cache_size(-1),
 	signal_cache_cleanup(Nonce, Candidate).
 
@@ -254,6 +256,7 @@ handle_info({event, nonce_limiter, {computed_output, Args}},
 	StepNumber = Session#vdf_session.step_number,
 	true = is_integer(StepNumber),
 	ar_mining_stats:increment_vdf_stats(),
+	prometheus_counter:inc(mining_perf_vdf_step_count),
 	#vdf_session{ seed = Seed, step_number = StepNumber } = Session,
 	Task = {computed_output, {SessionKey, Seed, StepNumber, Output, PartitionUpperBound}},
 	Q2 = gb_sets:insert({priority(nonce_limiter_computed_output, StepNumber), make_ref(),
@@ -383,8 +386,9 @@ hashing_thread(SessionRef) ->
 		{compute_h1, Candidate} ->
 			case ar_mining_server:is_session_valid(SessionRef, Candidate) of
 				true ->
-					#mining_candidate{ h0 = H0, nonce = Nonce, chunk1 = Chunk1 } = Candidate,
+					#mining_candidate{ h0 = H0, nonce = Nonce, chunk1 = Chunk1, chunk1_store_id = StoreID } = Candidate,
 					{H1, Preimage} = ar_block:compute_h1(H0, Nonce, Chunk1),
+					prometheus_counter:inc(list_to_atom(io_lib:format("~s_hash_1chunk_counter", [StoreID]))),
 					ar_mining_server:computed_hash(computed_h1, H1, Preimage, Candidate);
 				false ->
 					ok %% Clear the message queue of requests from outdated mining sessions
@@ -393,8 +397,9 @@ hashing_thread(SessionRef) ->
 		{compute_h2, Candidate} ->
 			case ar_mining_server:is_session_valid(SessionRef, Candidate) of
 				true ->
-					#mining_candidate{ h0 = H0, h1 = H1, chunk2 = Chunk2 } = Candidate,
+					#mining_candidate{ h0 = H0, h1 = H1, chunk2 = Chunk2, chunk2_store_id = StoreID } = Candidate,
 					{H2, Preimage} = ar_block:compute_h2(H1, Chunk2, H0),
+					prometheus_counter:inc(list_to_atom(io_lib:format("~s_hash_2chunk_counter", [StoreID]))),
 					ar_mining_server:computed_hash(computed_h2, H2, Preimage, Candidate);
 				false ->
 					ok %% Clear the message queue of requests from outdated mining sessions
@@ -600,6 +605,8 @@ handle_task({computed_h1, Candidate}, State) ->
 			#state{ session = Session, diff = Diff, hashing_threads = Threads } = State,
 			#mining_session{ chunk_cache = Map } = Session,
 			#mining_candidate{ h1 = H1, chunk1 = Chunk1 } = Candidate,
+			DiffBucket = min(hash_to_diff_bucket(H1), ?MINING_HASH_MAX_BUCKET),
+			prometheus_histogram:observe(mining_perf_hash_gt_2_pow_x_1chunk_count, DiffBucket),
 			case binary:decode_unsigned(H1, big) > Diff of
 				true ->
 					#state{ session = Session } = State,
@@ -658,6 +665,8 @@ handle_task({computed_h2, Candidate}, State) ->
 				nonce = Nonce, partition_number = PartitionNumber, 
 				partition_upper_bound = PartitionUpperBound, cm_lead_peer = Peer
 			} = Candidate,
+			DiffBucket = min(hash_to_diff_bucket(H2), ?MINING_HASH_MAX_BUCKET),
+			prometheus_histogram:observe(mining_perf_hash_gt_2_pow_x_2chunk_count, DiffBucket),
 			case binary:decode_unsigned(H2, big) > get_difficulty(State, Candidate) of
 				true ->
 					case Peer of
@@ -1081,6 +1090,41 @@ update_chunk_cache_size(Delta) ->
 nonce_max() ->
 	max(0, ((?RECALL_RANGE_SIZE) div ?DATA_CHUNK_SIZE - 1)).
 
+% slight optimise (256-bit binary operations may be slower than 64-bit)
+hash_to_diff_bucket(Hash) ->
+	<<A:64, B:64, C:64, D:64>> = Hash,
+	ResA = hash_to_diff_bucket(A, 63),
+	ResB = case ResA of
+		64 ->
+			64+hash_to_diff_bucket(B, 63);
+		_ ->
+			ResA
+	end,
+	ResC = case ResB of
+		128 ->
+			128+hash_to_diff_bucket(C, 63);
+		_ ->
+			ResB
+	end,
+	ResD = case ResC of
+		192 ->
+			192+hash_to_diff_bucket(D, 63);
+		_ ->
+			ResC
+	end,
+	ResD.
+
+hash_to_diff_bucket(Hash, BitPosition) when BitPosition >= 0 ->
+	Mask = 1 bsl BitPosition,
+	case Hash band Mask of
+		0 ->
+			0;
+		_ ->
+			1 + hash_to_diff_bucket(Hash, BitPosition - 1)
+	end;
+hash_to_diff_bucket(_, _) ->
+	0.
+
 %%%===================================================================
 %%% Public Test interface.
 %%%===================================================================