Skip to content

Commit 59a9c81

Browse files
authored
Add new ErrorType metric (#3932)
This commit adds a new ErrorType core metric that is recorded for all failed API call attempts. The ErrorType records the general category of error that ocurred for a failed API call attempt. Those categories are: - Throttling errors - Service errors other than throttling - I/O errors - API call or API call attempt timeouts The intent of this metric is to help locate possible issues at a glance and help direct further debugging or investigation.
1 parent a1d9f64 commit 59a9c81

File tree

9 files changed

+287
-6
lines changed

9 files changed

+287
-6
lines changed
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"type": "feature",
3+
"category": "AWS SDK for Java v2",
4+
"contributor": "",
5+
"description": "This commit adds a new ErrorType core metric that is recorded for all failed API call attempts. The ErrorType records the general category of error that ocurred for a failed API call attempt. Those categories are:\n\n - Throttling errors\n - Service errors other than throttling\n - I/O errors\n - API call or API call attempt timeouts\n\n The intent of this metric is to help locate possible issues at a glance and help direct further debugging or investigation."
6+
}

core/sdk-core/src/main/java/software/amazon/awssdk/core/internal/http/pipeline/stages/ApiCallAttemptMetricCollectionStage.java

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import software.amazon.awssdk.core.internal.http.pipeline.RequestPipeline;
2626
import software.amazon.awssdk.core.internal.http.pipeline.RequestToResponsePipeline;
2727
import software.amazon.awssdk.core.internal.http.pipeline.stages.utils.RetryableStageHelper;
28+
import software.amazon.awssdk.core.internal.metrics.SdkErrorType;
2829
import software.amazon.awssdk.core.metrics.CoreMetric;
2930
import software.amazon.awssdk.http.SdkHttpFullRequest;
3031
import software.amazon.awssdk.metrics.MetricCollector;
@@ -47,11 +48,18 @@ public Response<OutputT> execute(SdkHttpFullRequest input, RequestExecutionConte
4748
context.attemptMetricCollector(apiCallAttemptMetrics);
4849
reportBackoffDelay(context);
4950

50-
Response<OutputT> response = wrapped.execute(input, context);
51+
try {
52+
Response<OutputT> response = wrapped.execute(input, context);
53+
collectHttpMetrics(apiCallAttemptMetrics, response.httpResponse());
5154

52-
collectHttpMetrics(apiCallAttemptMetrics, response.httpResponse());
53-
54-
return response;
55+
if (!Boolean.TRUE.equals(response.isSuccess()) && response.exception() != null) {
56+
reportErrorType(context, response.exception());
57+
}
58+
return response;
59+
} catch (Exception e) {
60+
reportErrorType(context, e);
61+
throw e;
62+
}
5563
}
5664

5765
private void reportBackoffDelay(RequestExecutionContext context) {
@@ -60,4 +68,8 @@ private void reportBackoffDelay(RequestExecutionContext context) {
6068
context.attemptMetricCollector().reportMetric(CoreMetric.BACKOFF_DELAY_DURATION, lastBackoffDelay);
6169
}
6270
}
71+
72+
private void reportErrorType(RequestExecutionContext context, Exception e) {
73+
context.attemptMetricCollector().reportMetric(CoreMetric.ERROR_TYPE, SdkErrorType.fromException(e).toString());
74+
}
6375
}

core/sdk-core/src/main/java/software/amazon/awssdk/core/internal/http/pipeline/stages/AsyncApiCallAttemptMetricCollectionStage.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import software.amazon.awssdk.core.internal.http.RequestExecutionContext;
2626
import software.amazon.awssdk.core.internal.http.pipeline.RequestPipeline;
2727
import software.amazon.awssdk.core.internal.http.pipeline.stages.utils.RetryableStageHelper;
28+
import software.amazon.awssdk.core.internal.metrics.SdkErrorType;
2829
import software.amazon.awssdk.core.metrics.CoreMetric;
2930
import software.amazon.awssdk.http.SdkHttpFullRequest;
3031
import software.amazon.awssdk.metrics.MetricCollector;
@@ -57,6 +58,12 @@ public CompletableFuture<Response<OutputT>> execute(SdkHttpFullRequest input,
5758
if (t == null) {
5859
collectHttpMetrics(apiCallAttemptMetrics, r.httpResponse());
5960
}
61+
62+
if (t != null) {
63+
reportErrorType(context, t.getCause());
64+
} else if (!Boolean.TRUE.equals(r.isSuccess()) && r.exception() != null) {
65+
reportErrorType(context, r.exception());
66+
}
6067
});
6168
CompletableFutureUtils.forwardExceptionTo(metricsCollectedFuture, executeFuture);
6269

@@ -69,4 +76,8 @@ private void reportBackoffDelay(RequestExecutionContext context) {
6976
context.attemptMetricCollector().reportMetric(CoreMetric.BACKOFF_DELAY_DURATION, lastBackoffDelay);
7077
}
7178
}
79+
80+
private void reportErrorType(RequestExecutionContext context, Throwable t) {
81+
context.attemptMetricCollector().reportMetric(CoreMetric.ERROR_TYPE, SdkErrorType.fromException(t).toString());
82+
}
7283
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
package software.amazon.awssdk.core.internal.metrics;
17+
18+
import java.io.IOException;
19+
import software.amazon.awssdk.annotations.SdkInternalApi;
20+
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
21+
import software.amazon.awssdk.core.exception.ApiCallTimeoutException;
22+
import software.amazon.awssdk.core.exception.SdkException;
23+
import software.amazon.awssdk.core.exception.SdkServiceException;
24+
import software.amazon.awssdk.core.retry.RetryUtils;
25+
26+
/**
27+
* General categories of errors that can be encountered when making an API call attempt.
28+
* <p>
29+
* This class is <b>NOT</b> intended to fully distinguish the details of every error that is possible to encounter when making
30+
* an API call attempt; for example, it is not a replacement for detailed logs. Instead, the categories are intentionally
31+
* broad to make it easy at-a-glance what is causing issues with requests, and to help direct further investigation.
32+
*/
33+
@SdkInternalApi
34+
public enum SdkErrorType {
35+
/**
36+
* The service responded with a throttling error.
37+
*/
38+
THROTTLING("Throttling"),
39+
40+
/**
41+
* The service responded with an error other than {@link #THROTTLING}.
42+
*/
43+
SERVER_ERROR("ServerError"),
44+
45+
/**
46+
* A clientside timeout occurred, either an attempt level timeout, or API call level.
47+
*/
48+
CONFIGURED_TIMEOUT("ConfiguredTimeout"),
49+
50+
/**
51+
* An I/O error.
52+
*/
53+
IO("IO"),
54+
55+
/**
56+
* Catch-all type for errors that don't fit into the other categories.
57+
*/
58+
OTHER("Other"),
59+
60+
;
61+
62+
private final String name;
63+
64+
SdkErrorType(String name) {
65+
this.name = name;
66+
}
67+
68+
@Override
69+
public String toString() {
70+
return name;
71+
}
72+
73+
public static SdkErrorType fromException(Throwable e) {
74+
if (e instanceof IOException) {
75+
return IO;
76+
}
77+
78+
if (e instanceof SdkException) {
79+
SdkException sdkError = (SdkException) e;
80+
if (sdkError instanceof ApiCallTimeoutException || sdkError instanceof ApiCallAttemptTimeoutException) {
81+
return CONFIGURED_TIMEOUT;
82+
}
83+
84+
if (RetryUtils.isThrottlingException(sdkError)) {
85+
return THROTTLING;
86+
}
87+
88+
if (e instanceof SdkServiceException) {
89+
return SERVER_ERROR;
90+
}
91+
}
92+
93+
return OTHER;
94+
}
95+
}

core/sdk-core/src/main/java/software/amazon/awssdk/core/metrics/CoreMetric.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,22 @@ public final class CoreMetric {
118118
public static final SdkMetric<String> AWS_EXTENDED_REQUEST_ID =
119119
metric("AwsExtendedRequestId", String.class, MetricLevel.INFO);
120120

121+
/**
122+
* The type of error that occurred for a call attempt.
123+
* <p>
124+
* The following are possible values:
125+
* <ul>
126+
* <li>Throttling - The service responded with a throttling error.</li>
127+
* <li>ServerError - The service responded with an error other than throttling.</li>
128+
* <li>ClientTimeout - A client timeout occurred, either at the API call level, or API call attempt level.</li>
129+
* <li>IO - An I/O error occurred.</li>
130+
* <li>Other - Catch-all for other errors that don't fall into the above categories.</li>
131+
* </ul>
132+
* <p>
133+
*/
134+
public static final SdkMetric<String> ERROR_TYPE =
135+
metric("ErrorType", String.class, MetricLevel.INFO);
136+
121137
private CoreMetric() {
122138
}
123139

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License").
5+
* You may not use this file except in compliance with the License.
6+
* A copy of the License is located at
7+
*
8+
* http://aws.amazon.com/apache2.0
9+
*
10+
* or in the "license" file accompanying this file. This file is distributed
11+
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
* express or implied. See the License for the specific language governing
13+
* permissions and limitations under the License.
14+
*/
15+
16+
package software.amazon.awssdk.core.internal.metrics;
17+
18+
import static org.assertj.core.api.Assertions.assertThat;
19+
20+
import java.io.IOException;
21+
import java.util.stream.Stream;
22+
import org.junit.jupiter.params.ParameterizedTest;
23+
import org.junit.jupiter.params.provider.MethodSource;
24+
import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException;
25+
import software.amazon.awssdk.core.exception.ApiCallTimeoutException;
26+
import software.amazon.awssdk.core.exception.SdkClientException;
27+
import software.amazon.awssdk.core.exception.SdkServiceException;
28+
29+
public class ErrorTypeTest {
30+
31+
@ParameterizedTest
32+
@MethodSource("testCases")
33+
public void fromException_mapsToCorrectType(TestCase tc) {
34+
assertThat(SdkErrorType.fromException(tc.thrown)).isEqualTo(tc.expectedType);
35+
}
36+
37+
private static Stream<? extends TestCase> testCases() {
38+
return Stream.of(
39+
tc(new IOException("I/O"), SdkErrorType.IO),
40+
41+
tc(TestServiceException.builder().build(), SdkErrorType.SERVER_ERROR),
42+
tc(TestServiceException.builder().throttling(true).build(), SdkErrorType.THROTTLING),
43+
44+
tc(ApiCallAttemptTimeoutException.builder().message("Attempt timeout").build(), SdkErrorType.CONFIGURED_TIMEOUT),
45+
tc(ApiCallTimeoutException.builder().message("Call timeout").build(), SdkErrorType.CONFIGURED_TIMEOUT),
46+
47+
tc(SdkClientException.create("Unmarshalling error"), SdkErrorType.OTHER),
48+
49+
tc(new OutOfMemoryError("OOM"), SdkErrorType.OTHER)
50+
);
51+
}
52+
53+
private static TestCase tc(Throwable thrown, SdkErrorType expectedType) {
54+
return new TestCase(thrown, expectedType);
55+
}
56+
57+
private static class TestCase {
58+
private final Throwable thrown;
59+
private final SdkErrorType expectedType;
60+
61+
public TestCase(Throwable thrown, SdkErrorType expectedType) {
62+
this.thrown = thrown;
63+
this.expectedType = expectedType;
64+
}
65+
}
66+
67+
private static class TestServiceException extends SdkServiceException {
68+
private final boolean throttling;
69+
70+
protected TestServiceException(BuilderImpl b) {
71+
super(b);
72+
this.throttling = b.throttling;
73+
}
74+
75+
@Override
76+
public boolean isThrottlingException() {
77+
return throttling;
78+
}
79+
80+
public static Builder builder() {
81+
return new BuilderImpl();
82+
}
83+
84+
public interface Builder extends SdkServiceException.Builder {
85+
Builder throttling(Boolean throttling);
86+
87+
@Override
88+
TestServiceException build();
89+
}
90+
91+
public static class BuilderImpl extends SdkServiceException.BuilderImpl implements Builder {
92+
private boolean throttling;
93+
94+
@Override
95+
public boolean equalsBySdkFields(Object other) {
96+
return super.equalsBySdkFields(other);
97+
}
98+
99+
@Override
100+
public Builder throttling(Boolean throttling) {
101+
this.throttling = throttling;
102+
return this;
103+
}
104+
105+
@Override
106+
public TestServiceException build() {
107+
return new TestServiceException(this);
108+
}
109+
}
110+
}
111+
}

docs/design/core/metrics/MetricsList.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ class within `sdk-core`.
2929
| AwsExtendedRequestId | `String` | The extended request ID of the service request.|
3030
| UnmarshallingDuration | `Duration` | The duration of time taken to unmarshall the HTTP response to an SDK response. |
3131
| ServiceCallDuration | `Duration` | The duration of time taken to connect to the service (or acquire a connection from the connection pool), send the serialized request and receive the initial response (e.g. HTTP status code and headers). This DOES NOT include the time taken to read the entire response from the service. |
32-
| `RetryCount` | `Integer` | The number of retries that the SDK performed in the execution of the request. 0 implies that the request worked the first time, and no retries were attempted. |
32+
| RetryCount | `Integer` | The number of retries that the SDK performed in the execution of the request. 0 implies that the request worked the first time, and no retries were attempted. |
33+
| ErrorType | `String` | The general type or category of error that was encountered for a failed API call attempt.<br>The following are possible values:<br> `Throttling` - The service responded with a throttling error.<br>`ServerError` - The service responded with an error other than throttling.<br>`ConfiguredTimeout` - A client timeout occurred, either at the API call level, or API call attempt level.<br>`IO` - An I/O error occurred.<br>`Other` - Catch-all for other errors that don't fall into the above categories.|
3334

3435
## HTTP Metrics
3536

test/codegen-generated-classes-test/src/test/java/software/amazon/awssdk/services/metrics/CoreMetricsTest.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
import org.mockito.junit.MockitoJUnitRunner;
3939
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
4040
import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider;
41+
import software.amazon.awssdk.core.exception.SdkException;
4142
import software.amazon.awssdk.core.metrics.CoreMetric;
43+
import software.amazon.awssdk.core.internal.metrics.SdkErrorType;
4244
import software.amazon.awssdk.http.AbortableInputStream;
4345
import software.amazon.awssdk.http.ExecutableHttpRequest;
4446
import software.amazon.awssdk.http.HttpExecuteRequest;
@@ -250,10 +252,35 @@ public void testApiCall_serviceReturnsError_errorInfoIncludedInMetrics() throws
250252
assertThat(requestMetrics.metricValues(CoreMetric.UNMARSHALLING_DURATION)).hasOnlyOneElementSatisfying(d -> {
251253
assertThat(d).isGreaterThanOrEqualTo(Duration.ZERO);
252254
});
255+
assertThat(requestMetrics.metricValues(CoreMetric.ERROR_TYPE)).containsExactly(SdkErrorType.SERVER_ERROR.toString());
253256
}
254257
}
255258
}
256259

260+
@Test
261+
public void testApiCall_httpClientThrowsNetworkError_errorTypeIncludedInMetrics() throws IOException {
262+
ExecutableHttpRequest mockExecuteRequest = mock(ExecutableHttpRequest.class);
263+
when(mockExecuteRequest.call()).thenThrow(new IOException("I/O error"));
264+
265+
when(mockHttpClient.prepareRequest(any(HttpExecuteRequest.class)))
266+
.thenReturn(mockExecuteRequest);
267+
268+
thrown.expect(SdkException.class);
269+
try {
270+
client.allTypes();
271+
} finally {
272+
ArgumentCaptor<MetricCollection> collectionCaptor = ArgumentCaptor.forClass(MetricCollection.class);
273+
verify(mockPublisher).publish(collectionCaptor.capture());
274+
275+
MetricCollection capturedCollection = collectionCaptor.getValue();
276+
assertThat(capturedCollection.children()).isNotEmpty();
277+
for (MetricCollection requestMetrics : capturedCollection.children()) {
278+
assertThat(requestMetrics.metricValues(CoreMetric.ERROR_TYPE)).containsExactly(SdkErrorType.IO.toString());
279+
}
280+
}
281+
}
282+
283+
257284
private static HttpExecuteResponse mockExecuteResponse(SdkHttpFullResponse httpResponse) {
258285
HttpExecuteResponse mockResponse = mock(HttpExecuteResponse.class);
259286
when(mockResponse.httpResponse()).thenReturn(httpResponse);

test/codegen-generated-classes-test/src/test/java/software/amazon/awssdk/services/metrics/async/BaseAsyncCoreMetricsTest.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@
2828
import java.time.Duration;
2929
import java.util.concurrent.CompletableFuture;
3030
import java.util.function.Supplier;
31-
import org.junit.Ignore;
3231
import org.junit.Test;
3332
import org.junit.runner.RunWith;
3433
import org.mockito.ArgumentCaptor;
3534
import org.mockito.junit.MockitoJUnitRunner;
3635
import software.amazon.awssdk.core.exception.SdkClientException;
3736
import software.amazon.awssdk.core.metrics.CoreMetric;
37+
import software.amazon.awssdk.core.internal.metrics.SdkErrorType;
3838
import software.amazon.awssdk.http.HttpMetric;
3939
import software.amazon.awssdk.metrics.MetricCollection;
4040
import software.amazon.awssdk.metrics.MetricPublisher;
@@ -108,6 +108,7 @@ public void apiCall_allRetryAttemptsFailedOfNetworkError() {
108108
.isEmpty();
109109
assertThat(requestMetrics.metricValues(CoreMetric.SERVICE_CALL_DURATION).get(0))
110110
.isGreaterThanOrEqualTo(FIXED_DELAY);
111+
assertThat(requestMetrics.metricValues(CoreMetric.ERROR_TYPE)).containsExactly(SdkErrorType.IO.toString());
111112
});
112113
}
113114

@@ -162,6 +163,7 @@ private void verifyFailedApiCallAttemptCollection(MetricCollection requestMetric
162163
.isGreaterThanOrEqualTo(Duration.ZERO);
163164
assertThat(requestMetrics.metricValues(CoreMetric.SERVICE_CALL_DURATION).get(0))
164165
.isGreaterThanOrEqualTo(Duration.ZERO);
166+
assertThat(requestMetrics.metricValues(CoreMetric.ERROR_TYPE)).containsExactly(SdkErrorType.SERVER_ERROR.toString());
165167
}
166168

167169
private void verifySuccessfulApiCallAttemptCollection(MetricCollection attemptCollection) {

0 commit comments

Comments
 (0)