Skip to content

Commit f7c7379

Browse files
authored
Add RabbitMQ collector (#7)
1 parent 525202f commit f7c7379

34 files changed

+1009
-445
lines changed

DotNetObservabilitySample.sln

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Sample.MainApi", "src\Sampl
1212
EndProject
1313
Project("{E53339B2-1760-4266-BCC7-CA923CBCF16C}") = "docker-compose", "docker-compose.dcproj", "{D9851614-526F-43C3-B9E2-F93B0F0E1448}"
1414
EndProject
15+
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sample.RabbitMQCollector", "src\Sample.RabbitMQCollector\Sample.RabbitMQCollector.csproj", "{7A7BF83E-DA37-47FC-83CF-104D3EA9B36F}"
16+
EndProject
1517
Global
1618
GlobalSection(SolutionConfigurationPlatforms) = preSolution
1719
Debug|Any CPU = Debug|Any CPU
@@ -38,6 +40,10 @@ Global
3840
{D9851614-526F-43C3-B9E2-F93B0F0E1448}.Debug|Any CPU.Build.0 = Debug|Any CPU
3941
{D9851614-526F-43C3-B9E2-F93B0F0E1448}.Release|Any CPU.ActiveCfg = Release|Any CPU
4042
{D9851614-526F-43C3-B9E2-F93B0F0E1448}.Release|Any CPU.Build.0 = Release|Any CPU
43+
{7A7BF83E-DA37-47FC-83CF-104D3EA9B36F}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
44+
{7A7BF83E-DA37-47FC-83CF-104D3EA9B36F}.Debug|Any CPU.Build.0 = Debug|Any CPU
45+
{7A7BF83E-DA37-47FC-83CF-104D3EA9B36F}.Release|Any CPU.ActiveCfg = Release|Any CPU
46+
{7A7BF83E-DA37-47FC-83CF-104D3EA9B36F}.Release|Any CPU.Build.0 = Release|Any CPU
4147
EndGlobalSection
4248
GlobalSection(SolutionProperties) = preSolution
4349
HideSolutionNode = FALSE

ai-tips.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ To find end-to-end transactions in Azure Portal go to your Application Insights
3838

3939
## 3) Getting the longest end-to-end operation executions
4040

41-
Lists the longest end-to-end transaction times
41+
Lists the longest end-to-end transaction times including start, end, depth, total_duration
4242

4343
```kusto
4444
let endTime = (start: datetime, duration: int) { datetime_add("Millisecond", duration, start) };
@@ -49,9 +49,9 @@ requests
4949
| where timestamp >= startDate
5050
| project operation_Id
5151
| join kind=leftouter (requests) on $left.operation_Id == $right.operation_Id
52-
| summarize start=min(timestamp), end=max(endTime(timestamp, duration)) by operation_Id
53-
| project Name=operationName, operation_Id, start, end, timeInMs=datetime_diff("Millisecond", end, start)
54-
| order by timeInMs desc
52+
| summarize start=min(timestamp), end=max(endTime(timestamp, duration)), depth=dcount(cloud_RoleName) by operation_Id
53+
| project Name=operationName, operation_Id, start, end, depth, total_duration=datetime_diff("Millisecond", end, start)
54+
| order by total_duration desc
5555
```
5656

5757
### Graph of end-to-end operation/durations per minute

docker-compose.dcproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
<DockerTargetOS>Linux</DockerTargetOS>
66
<ProjectGuid>d9851614-526f-43c3-b9e2-f93b0f0e1448</ProjectGuid>
77
<DockerLaunchAction>LaunchBrowser</DockerLaunchAction>
8-
<DockerServiceUrl>{Scheme}://localhost:{ServicePort}/{Scheme}://{ServiceHost}:{ServicePort}/api/dbtime</DockerServiceUrl>
8+
<DockerServiceUrl>{Scheme}://localhost:{ServicePort}/api/dbtime</DockerServiceUrl>
99
<DockerServiceName>sample.mainapi</DockerServiceName>
1010
</PropertyGroup>
1111
<ItemGroup>

docker-compose.yml

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ services:
66
environment:
77
- SampleApp__RabbitMQHostName=rabbitmq
88
- SampleApp__TimeAPIUrl=http://sample.timeapi
9+
- SampleApp__UseApplicationInsights=${USE_APPLICATIONINSIGHTS-false}
10+
- SampleApp__UseOpenTelemetry=${USE_OPENTELEMETRY-true}
11+
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
12+
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
913
- OpenTelemetry__Prometheus__Url=http://sample.mainapi:9184/metrics/
10-
- OpenTelemetry__Jaeger__AgentHost=jaeger
14+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
1115
depends_on:
1216
- rabbitmq
1317
- sample.timeapi
@@ -25,7 +29,11 @@ services:
2529
environment:
2630
- SampleApp__RabbitMQHostName=rabbitmq
2731
- SampleApp__TimeAPIUrl=http://sample.timeapi
28-
- OpenTelemetry__Jaeger__AgentHost=jaeger
32+
- SampleApp__UseApplicationInsights=${USE_APPLICATIONINSIGHTS-false}
33+
- SampleApp__UseOpenTelemetry=${USE_OPENTELEMETRY-true}
34+
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
35+
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
36+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
2937
depends_on:
3038
- rabbitmq
3139
- sample.timeapi
@@ -37,14 +45,19 @@ services:
3745

3846
sample.timeapi:
3947
image: ${DOCKER_REGISTRY-}dotnetobservabilitysample-timeapi${TAG-}
48+
environment:
49+
- SampleApp__RabbitMQHostName=rabbitmq
50+
- SampleApp__TimeAPIUrl=http://sample.timeapi
51+
- SampleApp__UseApplicationInsights=${USE_APPLICATIONINSIGHTS-false}
52+
- SampleApp__UseOpenTelemetry=${USE_OPENTELEMETRY-true}
53+
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
54+
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
55+
- SqlConnectionString=server=sqlserver;user id=sa;password=Pass@Word1;
56+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
4057
depends_on:
4158
- sqlserver
4259
- prometheus
4360
- jaeger
44-
environment:
45-
- SampleApp__RabbitMQHostName=rabbitmq
46-
- SqlConnectionString=server=sqlserver;user id=sa;password=Pass@Word1;
47-
- OpenTelemetry__Jaeger__AgentHost=jaeger
4861
build:
4962
context: .
5063
dockerfile: src/Sample.TimeApi/Dockerfile

opentelemetry-ai-state.md

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,35 +33,43 @@ Using AI SDK:
3333

3434
![Failure Application Insights](media/failures-ai-ai.png)
3535

36-
## 4. Difference in first request duration
36+
## 4. Differences in creating span from System.Diagnostics.Activity
3737

38-
When building traces for asynchronous transactions (publisher/consumer) the starting request duration is different depending on the SDK used.
38+
When creating spans/operations from a System.Diagnostics.Activity, OpenTelemetry has 2 differences in comparison to Application Insights (between methods Tracer.StartActiveSpanFromActivity and TelemetryClient.StartOperation<RequestTelemetry>).
3939

40-
With Application Insights SDK the request duration seems to be the total time. On the other hand, using OpenTelemetry SDK, it seems to have only the self time.
40+
- OpenTelemetry won't automatically copy Activity.Tags
41+
- OpenTelemetry requires the activity to have been started (Activity.Start())
4142

42-
Application Insights SDK:
43+
## ~~5. Difference in first request duration~~
44+
45+
~~When building traces for asynchronous transactions (publisher/consumer) the starting request duration is different depending on the SDK used.~~
46+
47+
~~With Application Insights SDK the request duration seems to be the total time. On the other hand, using OpenTelemetry SDK, it seems to have only the self time.~~
48+
49+
~~Application Insights SDK:~~
4350

4451
![Application Insights SDK](media/ai-sdk-request-differences.png)
4552

46-
Open Telemetry SDK:
53+
~~Open Telemetry SDK:~~
4754

4855
![Open Telemetry SDK](media/ot-sdk-request-differences.png)
4956

50-
When using the Jaeger exporter the starting request has the self time as duration (as with OpenTelemetry + Application Insights exporter):
57+
~~When using the Jaeger exporter the starting request has the self time as duration (as with OpenTelemetry + Application Insights exporter):~~
5158

5259
![Jaeger start request duration](media/ot-sdk-jaeger-request-differences.png)
5360

54-
This difference is also visible in Application Insights/Workbooks/Application Performance Insights.
61+
~~This difference is also visible in Application Insights/Workbooks/Application Performance Insights.~~
5562

56-
OpenTelemetry SDK has only the self time:
63+
~~OpenTelemetry SDK has only the self time:~~
5764

5865
![Open Telemetry SDK performance Insights](media/ot-ai-performance-insights.png)
5966

60-
Application Insights SDK has the total time:
67+
~~Application Insights SDK has the total time:~~
6168

6269
![Application Insights SDK performance Insights](media/ai-ai-performance-insights.png)
6370

64-
For me, the ideal solution would be:
71+
~~For me, the ideal solution would be:~~
72+
73+
~~- Allow visualization and alerting on end-to-end transaction duration (total time)~~
74+
~~- Include self time of requests starting an asynchronous trace~~
6575

66-
- Allow visualization and alerting on end-to-end transaction duration (total time)
67-
- Include self time of requests starting an asynchronous trace

ready-to-run/sample/docker-compose.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ services:
1111
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
1212
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
1313
- OpenTelemetry__Prometheus__Url=http://sample.mainapi:9184/metrics/
14-
- OpenTelemetry__Jaeger__AgentHost=jaeger
14+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
1515
depends_on:
1616
- rabbitmq
1717
- sample.timeapi
@@ -30,7 +30,7 @@ services:
3030
- SampleApp__UseOpenTelemetry=${USE_OPENTELEMETRY-true}
3131
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
3232
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
33-
- OpenTelemetry__Jaeger__AgentHost=jaeger
33+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
3434
depends_on:
3535
- rabbitmq
3636
- sample.timeapi
@@ -46,7 +46,7 @@ services:
4646
- SampleApp__ApplicationInsightsInstrumentationKey=${AI_INSTRUMENTATIONKEY-}
4747
- SampleApp__ApplicationInsightsForOpenTelemetryInstrumentationKey=${AI_INSTRUMENTATIONKEY_OPENTELEMETRY-}
4848
- SqlConnectionString=server=sqlserver;user id=sa;password=Pass@Word1;
49-
- OpenTelemetry__Jaeger__AgentHost=jaeger
49+
- OpenTelemetry__Jaeger__AgentHost=${JAEGER_AGENTHOST-jaeger}
5050
depends_on:
5151
- sqlserver
5252
- prometheus

scenario1.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,12 @@ docker run -d --name jaeger \
108108
}
109109
```
110110

111+
To run you can either:
111112

112-
To run the sample start the project Sample.TimeApi. To generate load use the following script:
113+
- debug from Visual Studio (start project -> docker-compose)
114+
- start from the terminal, using docker-compose (docker-compose up --build).
115+
116+
To generate load use the following script:
113117

114118
```bash
115119
watch -n 1 curl --request GET http://localhost:5002/api/time/dbtime

scenario2.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,12 @@ docker run -d --name jaeger \
105105
}
106106
```
107107

108-
To run the sample start the projects Sample.TimeApi and Sample.MainApi. To generate load use the following script:
108+
To run you can either:
109+
110+
- debug from Visual Studio (start project -> docker-compose)
111+
- start from the terminal, using docker-compose (docker-compose up --build).
112+
113+
To generate load use the following script:
109114

110115
```bash
111116
watch -n 1 curl --request GET http://localhost:5001/api/dbtime

scenario3.md

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -12,80 +12,71 @@ This scenario covers asynchronous transactions using RabbitMQ queues. In this sc
1212

1313
## Implementation
1414

15-
It is necessary to write custom code to correlate RabbitMQ message consumption with the trace that generated the message. The sample application uses RabbitMQ message headers to include the trace parent of each message.
15+
In order to correlate the publisher and consumer of a RabbitMQ message we need have to add the traceability ourselves. There is no built-in traceability as with HTTP requests in .NET Core 3.0. The sample application uses RabbitMQ message headers to include the trace parent of each message. Similar to the way Azure Service Bus does.
1616

17-
In publisher:
17+
In the sample application, the publisher starts a System.Diagnostics.Activity before publishing a message to RabbitMQ. The full id (W3C format) is forwarded in the message header:
1818

1919
```c#
20-
var props = channel.CreateBasicProperties();
21-
props.Headers = new Dictionary<string, object>();
22-
props.Headers.Add("traceparent", "00-cd4262a7f7adf040bdd892959cf8c4fc-4a28d39ff0e725f2-01");
20+
Activity activity = null;
21+
if (diagnosticSource.IsEnabled(Constants.DiagnosticsName))
22+
{
23+
activity = new Activity(Constants.PublishActivityName);
24+
// code removed for simplicity
25+
diagnosticSource.StartActivity(activity, null);
26+
27+
basicProperties.Headers.Add("traceparent", activity.Id);
28+
}
2329

24-
channel.BasicPublish("", QueueName, props, System.Text.Encoding.UTF8.GetBytes(message))
30+
channel.BasicPublish(...)
2531
```
2632

27-
In consumer:
33+
In consumer, the trace parent is extracted from the message header and used when creating the consumer Activity.
2834

2935
```c#
30-
if (rabbitMessage.BasicProperties.Headers.TryGetValue(TraceParent.HeaderKey, out var rawTraceParent) && rawTraceParent is byte[] binRawTraceParent)
36+
public static Activity ExtractActivity(this BasicDeliverEventArgs source, string name)
3137
{
32-
// parse "00-cd4262a7f7adf040bdd892959cf8c4fc-4a28d39ff0e725f2-01" into
33-
// version
34-
// traceId
35-
// parentId
36-
// flags
38+
var activity = new Activity(name);
39+
40+
if (source.BasicProperties.Headers.TryGetValue("traceparent", out var rawTraceParent) && rawTraceParent is byte[] binRawTraceParent)
41+
{
42+
activity.SetParentId(Encoding.UTF8.GetString(binRawTraceParent));
43+
}
44+
45+
return activity;
3746
}
3847
```
3948

40-
Once we have the parent and trace ids we need to start the consumer activity referencing them.
49+
The created Activities won't be added to exported to OpenTelemetry or Application Insights by themselves. We have two options to export them.
4150

42-
For OpenTelemetry:
51+
### Using the Activity directly
4352

44-
```C#
45-
var parentContext = new SpanContext(traceId, parentId, ActivityTraceFlags.Recorded, isRemote: true);
46-
tracer.StartActiveSpan("Process single RabbitMQ message", parentContext, SpanKind.Consumer, out span);
47-
```
53+
In this approach the final span/operation is created from an Activity. This is how the consumer sample application was built.
4854

49-
For Application Insights:
55+
Activity extracting:
5056

51-
```C#
52-
using (var operation = telemetryClient.StartOperation<RequestTelemetry>("Process single RabbitMQ message", traceId, parentId))
53-
{
54-
...
55-
}
57+
```c#
58+
// ExtractActivity creates the Activity setting the parent based on the RabbitMQ "traceparent" header
59+
var activity = rabbitMQMessage.ExtractActivity("Process single RabbitMQ message");
5660
```
5761

58-
The requirement to track RabbitMQ publishing also requires additional code. The easier option is to use the target SDK and create spans/operations before calling RabbitMQ (as we did for SQL server in Open Telemetry).
59-
60-
The second option is to create System.Diagnostics.Activity objects, as a SDK provider would do. Collectors to OpenTelemetry or Application Insights subscribe to those activities, creating the the corresponding span (or dependency in Application Insights).
62+
OpenTelemetry:
6163

62-
This is the code added to RabbitMQ publisher to generate an activity:
64+
```c#
65+
activity.Start();
66+
tracer.StartActiveSpanFromActivity(activity.OperationName, activity, SpanKind.Consumer, out span);
67+
```
6368

64-
```C#
65-
static DiagnosticSource diagnosticSource = new DiagnosticListener("Sample.RabbitMQ");
69+
Application Insights:
6670

67-
public void Publish(string message, string traceId, string spanId)
68-
{
69-
Activity activity = null;
70-
if (diagnosticSource.IsEnabled("Sample.RabbitMQ"))
71-
{
72-
activity = new Activity("Publish to RabbitMQ");
73-
activity.AddTag("operation", "publish");
74-
activity.AddTag("host", HostName);
75-
activity.AddTag("queue", QueueName);
76-
diagnosticSource.StartActivity(activity, null);
77-
}
71+
```c#
72+
var operation = telemetryClient.StartOperation<RequestTelemetry>(activity);
73+
```
7874

79-
// publish to RabbitMQ
75+
### Using Diagnostic Listeners
8076

81-
if (activity != null)
82-
{
83-
diagnosticSource.StopActivity(activity, null);
84-
}
85-
}
86-
```
77+
Another way to consume generated Activities is to subscribe to them, creating the the corresponding span (or dependency in Application Insights).
8778

88-
The sample project contains a simplified collector implementation for Application Insights and OpenTelemetry. For production quality please refer to OpenTelemetry and/or Application Insights built-in collectors and this [user guide](https://github.com/dotnet/corefx/blob/master/src/System.Diagnostics.DiagnosticSource/src/ActivityUserGuide.md).
79+
The sample publisher (Sample.MainApi) contains a simplified collector implementation for Application Insights and OpenTelemetry (Sample.RabbitMQCollector project). For production quality please refer to OpenTelemetry and/or Application Insights built-in collectors and this [user guide](https://github.com/dotnet/corefx/blob/master/src/System.Diagnostics.DiagnosticSource/src/ActivityUserGuide.md).
8980

9081
For OpenTelemetry, this is how it looks like:
9182

@@ -94,7 +85,9 @@ public class RabbitMQListener : ListenerHandler
9485
{
9586
public override void OnStartActivity(Activity activity, object payload)
9687
{
97-
this.Tracer.StartSpanFromActivity(activity.OperationName, activity);
88+
var span = this.Tracer.StartSpanFromActivity(activity.OperationName, activity);
89+
foreach (var kv in activity.Tags)
90+
span.SetAttribute(kv.Key, kv.Value);
9891
}
9992

10093
public override void OnStopActivity(Activity activity, object payload)
@@ -108,10 +101,12 @@ public class RabbitMQListener : ListenerHandler
108101
}
109102
}
110103

111-
subscriber = new DiagnosticSourceSubscriber(new RabbitMQListener("Sample.RabbitMQ", tracer), DefaultFilter);
104+
var subscriber = new DiagnosticSourceSubscriber(new RabbitMQListener("Sample.RabbitMQ", tracer), DefaultFilter);
112105
subscriber.Subscribe();
113106
```
114107

108+
### Metrics requirements implementation
109+
115110
To fulfill metrics requirements using OpenTelemetry, the sample application uses a Prometheus exporter.
116111

117112
```c#
@@ -138,7 +133,6 @@ var labelSet = new Dictionary<string, string>()
138133
};
139134

140135
counter.Add(context, 1L, meter.GetLabelSet(labelSet));
141-
142136
```
143137

144138
Application Insights SDK also provides support to metrics, as the code below demonstrates:
@@ -236,7 +230,12 @@ docker run -d --name jaeger \
236230
}
237231
```
238232

239-
To run the sample start the projects Sample.TimeApi, Sample.MainApi and Sample.RabbitMQProcessor. To generate load use the following scripts:
233+
To run you can either:
234+
235+
- debug from Visual Studio (start project -> docker-compose)
236+
- start from the terminal, using docker-compose (docker-compose up --build).
237+
238+
To generate load use the following scripts:
240239

241240
Enqueuing from "WebSiteA" every 2 seconds
242241

@@ -258,4 +257,4 @@ watch -n 30 curl --request GET http://localhost:5001/api/enqueue/WebSiteC
258257

259258
## Where to go next
260259

261-
- [Back to overview](./README.md)
260+
- [Back to overview](./README.md)

0 commit comments

Comments
 (0)