Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

logging for cache put failures #1114

Merged
merged 7 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ Current
* Created `LegacyGenerator` as a bridge interface from the existing constructor based api request impls and the factory based value object usage.

### Added:
- [Add logging for cache put failures](https://github.com/yahoo/fili/pull/1114)
* Added `BardCacheInfo` logging for cache put failures.

- [Add withLogicalMetricInfo to MetricInstance class](https://github.com/yahoo/fili/pull/1105)
* Supporting method added to `MetricInstance` to replace `LogicalMetricInfo`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@

import static net.spy.memcached.DefaultConnectionFactory.DEFAULT_OPERATION_TIMEOUT;

import com.yahoo.bard.webservice.application.MetricRegistryFactory;
import com.yahoo.bard.webservice.config.SystemConfig;
import com.yahoo.bard.webservice.config.SystemConfigException;
import com.yahoo.bard.webservice.config.SystemConfigProvider;
import com.yahoo.bard.webservice.logging.RequestLog;
import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo;
import com.yahoo.bard.webservice.web.responseprocessors.CacheV2ResponseProcessor;

import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;

import org.joda.time.DateTime;
import org.slf4j.Logger;
Expand All @@ -17,6 +24,7 @@

import java.io.IOException;
import java.io.Serializable;
import java.util.concurrent.TimeoutException;

import javax.inject.Inject;
import javax.inject.Singleton;
Expand All @@ -38,6 +46,8 @@ public class MemDataCache<T extends Serializable> implements DataCache<T> {
);

private static final String SERVER_CONFIG = SYSTEM_CONFIG.getStringProperty(SERVER_CONFIG_KEY);
private static final MetricRegistry REGISTRY = MetricRegistryFactory.getRegistry();
public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures");

/**
* Memcached uses the actual value sent, and it may either be Unix time (number of seconds since January 1, 1970, as
Expand All @@ -52,6 +62,8 @@ public class MemDataCache<T extends Serializable> implements DataCache<T> {
private static final int EXPIRATION_DEFAULT_VALUE = 3600;

final private MemcachedClient client;
private static final @NotNull String WAIT_FOR_FUTURE =
SYSTEM_CONFIG.getPackageVariableName("wait_for_future_from_memcached");

/**
* Constructor using a default Memcached Client.
Expand Down Expand Up @@ -129,8 +141,22 @@ private boolean setInSeconds(String key, T value, int expirationInSeconds) throw
try {
// Omitting null checking for key since it should be rare.
// An exception will be thrown by the memcached client.
return client.set(key, expirationInSeconds, value) != null;
if (SYSTEM_CONFIG.getBooleanProperty(WAIT_FOR_FUTURE, false)) {
return client.set(key, expirationInSeconds, value).get();
} else {
return client.set(key, expirationInSeconds, value) != null;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These two paths could use a test to guarantee config results in the correct behaviors.

}
} catch (Exception e) {
Throwable exception = e.getClass() == RuntimeException.class ? e.getCause() : e;
if (exception instanceof TimeoutException) {
//mark and log the timeout errors on cache set
CACHE_SET_FAILURES.mark(1);
RequestLog.record(new BardCacheInfo(
key.length(),
CacheV2ResponseProcessor.getMD5Cksum(key),
value.toString().length()
));
}
LOG.warn("set failed {} {}", key, e.toString());
throw new IllegalStateException(e);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2016 Yahoo Inc.
// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms.
//
package com.yahoo.bard.webservice.logging.blocks;

import com.yahoo.bard.webservice.logging.LogInfo;


import com.fasterxml.jackson.annotation.JsonAutoDetect;

/**
* Log that keep tracks for cache set failures.
*/
@JsonAutoDetect(getterVisibility = JsonAutoDetect.Visibility.PUBLIC_ONLY)
public class BardCacheInfo implements LogInfo {
String cacheKeyCksum;
int cacheKeyLength;
int cacheValLength;

/**
* Constructor.
*
* @param cacheKeyLength The Length of the cache key
* @param cacheKeyCksum The cache Key MD5 checksum value
* @param cacheValLength The cache value length
*/
public BardCacheInfo(int cacheKeyLength, String cacheKeyCksum, int cacheValLength) {
this.cacheKeyLength = cacheKeyLength;
this.cacheKeyCksum = cacheKeyCksum;
this.cacheValLength = cacheValLength;
bharatmotwani marked this conversation as resolved.
Show resolved Hide resolved
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,34 @@
import static com.yahoo.bard.webservice.web.handlers.PartialDataRequestHandler.getPartialIntervalsWithDefault;
import static com.yahoo.bard.webservice.web.handlers.VolatileDataRequestHandler.getVolatileIntervalsWithDefault;

import com.yahoo.bard.webservice.application.MetricRegistryFactory;
import com.yahoo.bard.webservice.config.SystemConfig;
import com.yahoo.bard.webservice.config.SystemConfigProvider;
import com.yahoo.bard.webservice.data.cache.TupleDataCache;
import com.yahoo.bard.webservice.druid.client.FailureCallback;
import com.yahoo.bard.webservice.druid.client.HttpErrorCallback;
import com.yahoo.bard.webservice.druid.model.query.DruidAggregationQuery;
import com.yahoo.bard.webservice.logging.RequestLog;
import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo;
import com.yahoo.bard.webservice.metadata.QuerySigningService;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;

import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Locale;

import javax.validation.constraints.NotNull;
import javax.xml.bind.DatatypeConverter;

/**
* A response processor which caches the results if appropriate after completing a query.
Expand All @@ -31,6 +42,8 @@ public class CacheV2ResponseProcessor implements ResponseProcessor {

private static final Logger LOG = LoggerFactory.getLogger(CacheV2ResponseProcessor.class);
private static final SystemConfig SYSTEM_CONFIG = SystemConfigProvider.getInstance();
private static final MetricRegistry REGISTRY = MetricRegistryFactory.getRegistry();
public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures");

private final long maxDruidResponseLengthToCache = SYSTEM_CONFIG.getLongProperty(
SYSTEM_CONFIG.getPackageVariableName(
Expand Down Expand Up @@ -105,6 +118,13 @@ public void processResponse(JsonNode json, DruidAggregationQuery<?> druidQuery,
);
}
} catch (Exception e) {
//mark and log the cache put failure
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use a test.

CACHE_SET_FAILURES.mark(1);
RequestLog.record(new BardCacheInfo(
cacheKey.length(),
getMD5Cksum(cacheKey),
valueString != null ? valueString.length() : 0
));
LOG.warn(
"Unable to cache {}value of size: {}",
valueString == null ? "null " : "",
Expand All @@ -128,4 +148,41 @@ protected boolean isCacheable() {

return missingIntervals.isEmpty() && volatileIntervals.isEmpty();
}

/**
* Generate the Checksum of cacheKey using MD5 algorithm.
* @param cacheKey cache key
*
* @return String representation of the Cksum
*/
public static String getMD5Cksum(String cacheKey) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be better injected.

try {
MessageDigest digest = MessageDigest.getInstance("MD5");
byte[] hash = digest.digest(cacheKey.getBytes("UTF-8"));
return bytesToHex(hash); // make it readable
} catch (NoSuchAlgorithmException ex) {
String msg = "Unable to initialize hash generator with default MD5 algorithm ";
LOG.warn(msg, ex);
throw new RuntimeException(msg, ex);
} catch (UnsupportedEncodingException x) {
String msg = "Unable to initialize checksum byte array ";
LOG.warn(msg, x);
throw new RuntimeException(msg, x);
} catch (Exception exception) {
String msg = "Failed to generate checksum for cache key";
LOG.warn(msg, exception);
throw new RuntimeException(msg, exception);
}
}

/**
* Converts bytes array to the hex String.
* @param hash array of bytes to be converted in Hex
*
* @return String representation of the checksum
*/
public static String bytesToHex(byte[] hash) {
return DatatypeConverter.printHexBinary(hash)
.toLowerCase(Locale.ENGLISH);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import com.fasterxml.jackson.databind.ObjectWriter;
import com.yahoo.bard.webservice.config.SystemConfig;
import com.yahoo.bard.webservice.config.SystemConfigProvider;
import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo;
import com.yahoo.bard.webservice.util.SimplifiedIntervalList;
import com.yahoo.bard.webservice.application.MetricRegistryFactory;
import com.yahoo.bard.webservice.data.cache.TupleDataCache;
Expand All @@ -22,6 +23,7 @@
import com.yahoo.bard.webservice.metadata.QuerySigningService;
import com.yahoo.bard.webservice.util.Utils;
import com.yahoo.bard.webservice.web.handlers.RequestContext;
import com.yahoo.bard.webservice.web.responseprocessors.CacheV2ResponseProcessor;
import com.yahoo.bard.webservice.web.responseprocessors.ResponseProcessor;

import com.fasterxml.jackson.databind.JsonNode;
Expand Down Expand Up @@ -53,6 +55,7 @@ public class QuerySignedCacheService implements CacheService {
public static final Meter CACHE_POTENTIAL_HITS = REGISTRY.meter("queries.meter.cache.potential_hits");
public static final Meter CACHE_MISSES = REGISTRY.meter("queries.meter.cache.misses");
public static final Meter CACHE_REQUESTS = REGISTRY.meter("queries.meter.cache.total");
public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures");

TupleDataCache<String, Long, String> dataCache;
QuerySigningService<Long> querySigningService;
Expand Down Expand Up @@ -148,6 +151,13 @@ public void writeCache(
);
}
} catch (Exception e) {
//mark and log the cache put failure
CACHE_SET_FAILURES.mark(1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use a test.

RequestLog.record(new BardCacheInfo(
cacheKey.length(),
CacheV2ResponseProcessor.getMD5Cksum(cacheKey),
valueString != null ? valueString.length() : 0
));
LOG.warn(
"Unable to cache {}value of size: {}",
valueString == null ? "null " : "",
Expand Down
3 changes: 2 additions & 1 deletion fili-core/src/main/resources/moduleConfig.properties
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ bard__partial_data_enabled = true
# MemCached configuration
bard__memcached_servers = localhost:11211
bard__memcached_expiration_seconds = 3600
bard__wait_for_future_from_memcached = false


#Strategy for caching fact responses. NoCache for disabled, Ttl, LocalSignature and ETag for different signature expiry
Expand Down Expand Up @@ -138,7 +139,7 @@ bard__updated_metadata_collection_names_enabled = false
# Statically define the order of LogInfo parts in the RequestLog mega log line. If not set, the order matches the
# insertion order of the parts. Each part is represented by the simple name of its java class. This setting does not
# change the predefined order of Durations, Threads, Preface at the start and of Epilogue at the end of the log line.
# bard__requestlog_loginfo_order = BardQueryInfo,DataRequest,DimensionRequest,MetricRequest,SliceRequest,TableRequest,FeatureFlagRequest,DruidResponse
# bard__requestlog_loginfo_order = BardQueryInfo,BardCacheInfo,DataRequest,DimensionRequest,MetricRequest,SliceRequest,TableRequest,FeatureFlagRequest,DruidResponse

# List of allowed user roles, must provide value if uncommented
# bard__user_roles=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,16 @@ class CacheV2ResponseProcessorSpec extends Specification {
1 * next.getErrorCallback(groupByQuery) >> hec
1 * next.getFailureCallback(groupByQuery) >> fc
}

def "Cache key cksum is generated as expected"() {
expect:
CacheV2ResponseProcessor.getMD5Cksum(cacheKey) == "8c3c726f9f4cc3dd99d48a717addb033"
}
def "RuntimeException is thrown when null key is passed"() {
when:
CacheV2ResponseProcessor.getMD5Cksum(null)

then:
thrown RuntimeException
}
}
3 changes: 3 additions & 0 deletions fili-core/src/test/resources/testApplicationConfig.properties
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,6 @@ bard__default_in_filter_enabled=false

# Whether or not to require metrics in queries. Older druid versions require
bard__require_metrics_in_query=true

# Wait for future from memcached client.
bard__wait_for_future_from_memcached = false