Skip to content

Commit 8b1ccd9

Browse files
committed
feat: track ActiveRecord async queries
1 parent 3bb36a6 commit 8b1ccd9

File tree

6 files changed

+315
-2
lines changed

6 files changed

+315
-2
lines changed

instrumentation/active_record/lib/opentelemetry/instrumentation/active_record/instrumentation.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
module OpenTelemetry
88
module Instrumentation
99
module ActiveRecord
10+
QUERY_SPAN_NAME_KEY = OpenTelemetry::Context.create_key('async_query_span_name')
11+
1012
# The Instrumentation class contains logic to detect and install the ActiveRecord instrumentation
1113
class Instrumentation < OpenTelemetry::Instrumentation::Base
1214
MINIMUM_VERSION = Gem::Version.new('7')
@@ -39,6 +41,7 @@ def require_dependencies
3941
require_relative 'patches/transactions_class_methods'
4042
require_relative 'patches/validations'
4143
require_relative 'patches/relation_persistence'
44+
require_relative 'patches/async_query_context_propagation'
4245
end
4346

4447
def patch_activerecord
@@ -55,6 +58,9 @@ def patch_activerecord
5558
::ActiveRecord::Base.prepend(Patches::Validations)
5659

5760
::ActiveRecord::Relation.prepend(Patches::RelationPersistence)
61+
62+
::ActiveRecord::ConnectionAdapters::ConnectionPool.prepend(Patches::AsyncQueryContextPropagation)
63+
::ActiveRecord::FutureResult.prepend(Patches::FutureResultExtensions)
5864
end
5965
end
6066
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright The OpenTelemetry Authors
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
module OpenTelemetry
8+
module Instrumentation
9+
module ActiveRecord
10+
QUERY_SPAN_NAME_KEY = OpenTelemetry::Context.create_key('async_query_span_name')
11+
12+
module Patches
13+
# Module to prepend to ActiveRecord::ConnectionAdapters::ConnectionPool
14+
module AsyncQueryContextPropagation
15+
def schedule_query(future_result) # :nodoc:
16+
context = OpenTelemetry::Context.current
17+
18+
@async_executor.post do
19+
# This can happen in the request thread, in the case of a busy executor (fallback_action is executed.)
20+
# FIXME: This override should be unecessary if the concurrent-ruby instrumentation is always installed.
21+
OpenTelemetry::Context.with_current(context) do
22+
future_result.execute_or_skip
23+
end
24+
end
25+
26+
Thread.pass
27+
end
28+
end
29+
30+
# Module to support otel context propagation to ActiveRecord::FutureResults
31+
module FutureResultExtensions
32+
OTEL_QUERY_SPAN_NAME_IVAR = :@__otel_query_span_name
33+
34+
def initialize(...)
35+
super
36+
37+
if (query_span_name = OpenTelemetry::Context.current.value(QUERY_SPAN_NAME_KEY))
38+
instance_variable_set(OTEL_QUERY_SPAN_NAME_IVAR, query_span_name)
39+
end
40+
end
41+
42+
private
43+
44+
def execute_query(connection, async: false)
45+
name = instance_variable_get(OTEL_QUERY_SPAN_NAME_IVAR) || @args[1] || 'execute_query'
46+
Instrumentation.instance.tracer.in_span(name, attributes: { 'async' => async }) do
47+
super
48+
end
49+
end
50+
end
51+
end
52+
end
53+
end
54+
end

instrumentation/active_record/lib/opentelemetry/instrumentation/active_record/patches/querying.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ module ClassMethods
2121
method_name = ::ActiveRecord.version >= Gem::Version.new('7.0.0') ? :_query_by_sql : :find_by_sql
2222

2323
define_method(method_name) do |*args, **kwargs, &block|
24-
tracer.in_span("#{self} query") do
25-
super(*args, **kwargs, &block)
24+
query_span_name = "#{self} query"
25+
OpenTelemetry::Context.with_value(QUERY_SPAN_NAME_KEY, query_span_name) do
26+
tracer.in_span(kwargs[:async] ? "schedule #{query_span_name}" : query_span_name) do
27+
super(*args, **kwargs, &block)
28+
end
2629
end
2730
end
2831

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
# frozen_string_literal: true
2+
3+
# Copyright The OpenTelemetry Authors
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
require 'test_helper'
8+
9+
require_relative '../../../../lib/opentelemetry/instrumentation/active_record'
10+
require_relative '../../../../lib/opentelemetry/instrumentation/active_record/patches/async_query_context_propagation'
11+
12+
ASYNC_TEST_LOGGER = Logger.new($stdout).tap { |logger| logger.level = Logger::WARN }
13+
14+
describe OpenTelemetry::Instrumentation::ActiveRecord::Patches::AsyncQueryContextPropagation do
15+
let(:exporter) { EXPORTER }
16+
let(:unfiltered_spans) { exporter.finished_spans }
17+
let(:instrumentation) { OpenTelemetry::Instrumentation::ActiveRecord::Instrumentation.instance }
18+
let(:logger) { ASYNC_TEST_LOGGER }
19+
20+
before do
21+
exporter.reset
22+
setup_asynchronous_queries_session
23+
User.create!
24+
end
25+
26+
after do
27+
teardown_asynchronous_queries_session
28+
29+
ActiveRecord::Base.subclasses.each do |model|
30+
model.connection.truncate(model.table_name)
31+
end
32+
end
33+
34+
def setup_asynchronous_queries_session
35+
@_async_queries_session = ActiveRecord::Base.asynchronous_queries_tracker.start_session
36+
end
37+
38+
def teardown_asynchronous_queries_session
39+
ActiveRecord::Base.asynchronous_queries_tracker.finalize_session(true) if @_async_queries_session
40+
end
41+
42+
def run_async_load
43+
logger.debug('>>> run async load')
44+
in_new_trace do
45+
instrumentation.tracer.in_span('test_wrapper') do
46+
if block_given?
47+
yield
48+
else
49+
users = User.all.load_async
50+
sleep(0.5)
51+
logger.debug('>>> async #to_a')
52+
users.to_a
53+
end
54+
end
55+
end
56+
end
57+
58+
def in_new_trace(&block)
59+
OpenTelemetry::Context.with_current(OpenTelemetry::Context::ROOT, &block)
60+
end
61+
62+
def spans
63+
test_wrapper_span = unfiltered_spans.find { |span| span.name == 'test_wrapper' }
64+
unfiltered_spans.select { |span| span.trace_id == test_wrapper_span.trace_id }
65+
end
66+
67+
def span_names
68+
spans.map(&:name).sort
69+
end
70+
71+
# call with block_queue: true to completely block the executor (no tasks can be enqueued),
72+
# or with block_queue: false to block the workers only (tasks still accepted in the queue)
73+
def with_busy_executor(block_queue: true)
74+
_(ActiveRecord.async_query_executor).must_equal :global_thread_pool
75+
76+
mutex = Mutex.new
77+
condvar = ConditionVariable.new
78+
executor = ActiveRecord.instance_variable_get(:@global_thread_pool_async_query_executor)
79+
80+
task_count = executor.max_length
81+
task_count += executor.max_queue if block_queue
82+
83+
awaiting_signals = (0...task_count).to_a
84+
85+
# Fill up the max thread count and queue with tasks that
86+
# will never complete until they are signaled to do so.
87+
task_count.times do |n|
88+
executor.post do
89+
mutex.synchronize do
90+
ASYNC_TEST_LOGGER.debug("task #{n} waiting...")
91+
condvar.wait(mutex)
92+
ASYNC_TEST_LOGGER.debug("task #{n} got the signal")
93+
awaiting_signals.delete(n)
94+
end
95+
end
96+
end
97+
98+
logger.debug("yielding... block_queue=#{block_queue}")
99+
yield
100+
logger.debug('...done!')
101+
ensure
102+
logger.debug('cleaning up...')
103+
# clean up the queue
104+
mutex.synchronize { condvar.signal } until awaiting_signals.empty?
105+
end
106+
107+
def current_thread_id
108+
Thread.current.object_id
109+
end
110+
111+
def execute_query_span
112+
spans.find { |span| span.name == 'User query' }
113+
end
114+
115+
it 'async_query' do
116+
run_async_load
117+
118+
_(span_names).must_equal(['test_wrapper', 'User query', 'schedule User query'].sort)
119+
_(execute_query_span.attributes['__test_only_thread_id']).wont_equal(current_thread_id)
120+
_(execute_query_span.attributes['async']).must_equal(true)
121+
end
122+
123+
describe 'no executor' do
124+
before do
125+
@async_query_executor_was = ActiveRecord.async_query_executor
126+
ActiveRecord.async_query_executor = nil
127+
end
128+
129+
after do
130+
ActiveRecord.async_query_executor = @async_query_executor_was
131+
end
132+
133+
it 'is not actually async' do
134+
run_async_load # sic
135+
136+
_(spans.map(&:name)).wont_include('Schedule User query')
137+
_(spans.map(&:name)).must_include('User query')
138+
139+
user_query = spans.find { |span| span.name == 'User query' }
140+
_(user_query.attributes['async']).must_equal(false) if user_query.attributes.key?('async')
141+
_(span_names).must_equal(['User query', 'test_wrapper'].sort)
142+
end
143+
end
144+
145+
it 'async_query_blocked_executor' do
146+
with_busy_executor { run_async_load }
147+
148+
# In this case the wrapped task is executed as the 'fallback_action' by the thread pool executor,
149+
# so we get the async span, even though it is not actually async.
150+
_(execute_query_span.attributes['__test_only_thread_id']).must_equal(current_thread_id)
151+
152+
skip(<<~SKIP)
153+
`async` _should_ be false here, but it's executed as a fallback action and
154+
is incorrectly set to `true`.
155+
156+
Whether or not this is actually an issue is up for debate;
157+
it's true that the query would have been async if the global pool load was lower,
158+
so it could be said that the benefit of attempting to enqueue the task
159+
is measured in degrees, ranging from no benefit to saving the entire time of the query.
160+
161+
However, the _other_ scenario in which the task is enqueued but not yet worked on
162+
causes `async` to be false.
163+
164+
Ultimately, however, this is a bug in Rails's instrumentation around async queries,
165+
so it doesn't feel particularly pressing to solve it here with a bunch of
166+
otherwise unecessary patches.
167+
SKIP
168+
169+
_(execute_query_span.attributes['async']).must_equal(false)
170+
end
171+
172+
it 'async_query_slow_executor' do
173+
# executor accepts task, but doesn't fulfill it before the waiter
174+
with_busy_executor(block_queue: false) do
175+
run_async_load
176+
end
177+
178+
# When #to_a is called, the query is still pending and hasn't been picked up,
179+
# so AR executes is synchronously. The executor task is cancelled (or should be?),
180+
# so this span won't be here.
181+
_(execute_query_span.attributes['async']).must_equal(false)
182+
_(span_names).must_equal(['User query', 'schedule User query', 'test_wrapper'])
183+
end
184+
185+
it 'async_query_no_wait' do
186+
run_async_load do
187+
User.all.load_async.to_a
188+
end
189+
190+
# here we called #to_a inline, so it executed before the async scheduler
191+
# could assign the task to a worker. I'm not sure this test will always pass.
192+
_(execute_query_span.attributes['async']).must_equal(false)
193+
_(execute_query_span.attributes['__test_only_thread_id']).must_equal(current_thread_id)
194+
end
195+
196+
it 'async_count' do
197+
run_async_load do
198+
count = User.async_count
199+
sleep(0.5)
200+
count.value
201+
end
202+
203+
count_span = spans.find { |span| span.name == 'User Count' }
204+
_(count_span.attributes['async']).must_equal(true)
205+
end
206+
207+
it 'works with concurrent queries' do
208+
Account.create!
209+
210+
run_async_load do
211+
users = User.all.load_async
212+
accounts = Account.all.load_async
213+
214+
sleep(0.5)
215+
216+
users.to_a
217+
accounts.to_a
218+
end
219+
220+
user_schedule_span = spans.find { |span| span.name == 'schedule User query' }
221+
account_schedule_span = spans.find { |span| span.name == 'schedule Account query' }
222+
user_query_span = spans.find { |span| span.name == 'User query' }
223+
account_query_span = spans.find { |span| span.name == 'Account query' }
224+
test_wrapper_span = spans.find { |span| span.name == 'test_wrapper' }
225+
226+
_(user_schedule_span.parent_span_id).must_equal(test_wrapper_span.span_id)
227+
_(account_schedule_span.parent_span_id).must_equal(test_wrapper_span.span_id)
228+
229+
_(user_query_span.parent_span_id).must_equal(user_schedule_span.span_id)
230+
_(account_query_span.parent_span_id).must_equal(account_schedule_span.span_id)
231+
232+
_(user_query_span.attributes['async']).must_equal(true)
233+
_(account_query_span.attributes['async']).must_equal(true)
234+
end
235+
end

instrumentation/active_record/test/instrumentation/active_record/patches/querying_test.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
let(:spans) { exporter.finished_spans }
1515

1616
before { exporter.reset }
17+
1718
after do
1819
ActiveRecord::Base.subclasses.each do |model|
1920
model.connection.truncate(model.table_name)

instrumentation/active_record/test/test_helper.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
ActiveRecord::Base.logger = logger
3030
ActiveRecord::Migration.verbose = false
3131

32+
ActiveRecord.async_query_executor = :global_thread_pool
33+
3234
ActiveRecord::Base.establish_connection(
3335
adapter: 'sqlite3',
3436
database: 'db/development.sqlite3'
@@ -84,3 +86,15 @@ def change
8486
end
8587

8688
Minitest.after_run { CreateUserTable.migrate(:down) }
89+
90+
# Used in async tests to determine what thread spawned which span
91+
module SpanThreadIdTracking
92+
def internal_start_span(name, kind, attributes, links, start_timestamp, parent_context, instrumentation_scope) # rubocop: disable Metrics/ParameterLists
93+
attributes ||= {}
94+
attributes['__test_only_thread_id'] = Thread.current.object_id
95+
96+
super
97+
end
98+
end
99+
100+
OpenTelemetry::SDK::Trace::TracerProvider.prepend(SpanThreadIdTracking)

0 commit comments

Comments
 (0)