diff --git a/CHANGELOG.md b/CHANGELOG.md index c26d712c65..cf6617ed44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,9 @@ Current * Created `LegacyGenerator` as a bridge interface from the existing constructor based api request impls and the factory based value object usage. ### Added: +- [Add logging for cache put failures](https://github.com/yahoo/fili/pull/1114) + * Added `BardCacheInfo` logging for cache put failures. + - [Add withLogicalMetricInfo to MetricInstance class](https://github.com/yahoo/fili/pull/1105) * Supporting method added to `MetricInstance` to replace `LogicalMetricInfo`. diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/data/cache/MemDataCache.java b/fili-core/src/main/java/com/yahoo/bard/webservice/data/cache/MemDataCache.java index 236e2cbc43..ed66050373 100644 --- a/fili-core/src/main/java/com/yahoo/bard/webservice/data/cache/MemDataCache.java +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/data/cache/MemDataCache.java @@ -4,9 +4,16 @@ import static net.spy.memcached.DefaultConnectionFactory.DEFAULT_OPERATION_TIMEOUT; +import com.yahoo.bard.webservice.application.MetricRegistryFactory; import com.yahoo.bard.webservice.config.SystemConfig; import com.yahoo.bard.webservice.config.SystemConfigException; import com.yahoo.bard.webservice.config.SystemConfigProvider; +import com.yahoo.bard.webservice.logging.RequestLog; +import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo; +import com.yahoo.bard.webservice.web.responseprocessors.CacheV2ResponseProcessor; + +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; import org.joda.time.DateTime; import org.slf4j.Logger; @@ -17,6 +24,7 @@ import java.io.IOException; import java.io.Serializable; +import java.util.concurrent.TimeoutException; import javax.inject.Inject; import javax.inject.Singleton; @@ -38,6 +46,8 @@ public class MemDataCache implements DataCache { ); private static final String SERVER_CONFIG = SYSTEM_CONFIG.getStringProperty(SERVER_CONFIG_KEY); + private static final MetricRegistry REGISTRY = MetricRegistryFactory.getRegistry(); + public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures"); /** * Memcached uses the actual value sent, and it may either be Unix time (number of seconds since January 1, 1970, as @@ -52,6 +62,8 @@ public class MemDataCache implements DataCache { private static final int EXPIRATION_DEFAULT_VALUE = 3600; final private MemcachedClient client; + private static final @NotNull String WAIT_FOR_FUTURE = + SYSTEM_CONFIG.getPackageVariableName("wait_for_future_from_memcached"); /** * Constructor using a default Memcached Client. @@ -129,8 +141,22 @@ private boolean setInSeconds(String key, T value, int expirationInSeconds) throw try { // Omitting null checking for key since it should be rare. // An exception will be thrown by the memcached client. - return client.set(key, expirationInSeconds, value) != null; + if (SYSTEM_CONFIG.getBooleanProperty(WAIT_FOR_FUTURE, false)) { + return client.set(key, expirationInSeconds, value).get(); + } else { + return client.set(key, expirationInSeconds, value) != null; + } } catch (Exception e) { + Throwable exception = e.getClass() == RuntimeException.class ? e.getCause() : e; + if (exception instanceof TimeoutException) { + //mark and log the timeout errors on cache set + CACHE_SET_FAILURES.mark(1); + RequestLog.record(new BardCacheInfo( + key.length(), + CacheV2ResponseProcessor.getMD5Cksum(key), + value.toString().length() + )); + } LOG.warn("set failed {} {}", key, e.toString()); throw new IllegalStateException(e); } diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/logging/blocks/BardCacheInfo.java b/fili-core/src/main/java/com/yahoo/bard/webservice/logging/blocks/BardCacheInfo.java new file mode 100644 index 0000000000..57ce528a7d --- /dev/null +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/logging/blocks/BardCacheInfo.java @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. +// Licensed under the terms of the Apache license. Please see LICENSE.md file distributed with this work for terms. +// +package com.yahoo.bard.webservice.logging.blocks; + +import com.yahoo.bard.webservice.logging.LogInfo; + + +import com.fasterxml.jackson.annotation.JsonAutoDetect; + +/** + * Log that keep tracks for cache set failures. + */ +@JsonAutoDetect(getterVisibility = JsonAutoDetect.Visibility.PUBLIC_ONLY) +public class BardCacheInfo implements LogInfo { + String cacheKeyCksum; + int cacheKeyLength; + int cacheValLength; + + /** + * Constructor. + * + * @param cacheKeyLength The Length of the cache key + * @param cacheKeyCksum The cache Key MD5 checksum value + * @param cacheValLength The cache value length + */ + public BardCacheInfo(int cacheKeyLength, String cacheKeyCksum, int cacheValLength) { + this.cacheKeyLength = cacheKeyLength; + this.cacheKeyCksum = cacheKeyCksum; + this.cacheValLength = cacheValLength; + } +} diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessor.java b/fili-core/src/main/java/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessor.java index 57da9079f1..1ee546c601 100644 --- a/fili-core/src/main/java/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessor.java +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessor.java @@ -6,15 +6,20 @@ import static com.yahoo.bard.webservice.web.handlers.PartialDataRequestHandler.getPartialIntervalsWithDefault; import static com.yahoo.bard.webservice.web.handlers.VolatileDataRequestHandler.getVolatileIntervalsWithDefault; +import com.yahoo.bard.webservice.application.MetricRegistryFactory; import com.yahoo.bard.webservice.config.SystemConfig; import com.yahoo.bard.webservice.config.SystemConfigProvider; import com.yahoo.bard.webservice.data.cache.TupleDataCache; import com.yahoo.bard.webservice.druid.client.FailureCallback; import com.yahoo.bard.webservice.druid.client.HttpErrorCallback; import com.yahoo.bard.webservice.druid.model.query.DruidAggregationQuery; +import com.yahoo.bard.webservice.logging.RequestLog; +import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo; import com.yahoo.bard.webservice.metadata.QuerySigningService; import com.yahoo.bard.webservice.util.SimplifiedIntervalList; +import com.codahale.metrics.Meter; +import com.codahale.metrics.MetricRegistry; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; @@ -22,7 +27,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.UnsupportedEncodingException; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Locale; + import javax.validation.constraints.NotNull; +import javax.xml.bind.DatatypeConverter; /** * A response processor which caches the results if appropriate after completing a query. @@ -31,6 +42,8 @@ public class CacheV2ResponseProcessor implements ResponseProcessor { private static final Logger LOG = LoggerFactory.getLogger(CacheV2ResponseProcessor.class); private static final SystemConfig SYSTEM_CONFIG = SystemConfigProvider.getInstance(); + private static final MetricRegistry REGISTRY = MetricRegistryFactory.getRegistry(); + public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures"); private final long maxDruidResponseLengthToCache = SYSTEM_CONFIG.getLongProperty( SYSTEM_CONFIG.getPackageVariableName( @@ -105,6 +118,13 @@ public void processResponse(JsonNode json, DruidAggregationQuery druidQuery, ); } } catch (Exception e) { + //mark and log the cache put failure + CACHE_SET_FAILURES.mark(1); + RequestLog.record(new BardCacheInfo( + cacheKey.length(), + getMD5Cksum(cacheKey), + valueString != null ? valueString.length() : 0 + )); LOG.warn( "Unable to cache {}value of size: {}", valueString == null ? "null " : "", @@ -128,4 +148,41 @@ protected boolean isCacheable() { return missingIntervals.isEmpty() && volatileIntervals.isEmpty(); } + + /** + * Generate the Checksum of cacheKey using MD5 algorithm. + * @param cacheKey cache key + * + * @return String representation of the Cksum + */ + public static String getMD5Cksum(String cacheKey) { + try { + MessageDigest digest = MessageDigest.getInstance("MD5"); + byte[] hash = digest.digest(cacheKey.getBytes("UTF-8")); + return bytesToHex(hash); // make it readable + } catch (NoSuchAlgorithmException ex) { + String msg = "Unable to initialize hash generator with default MD5 algorithm "; + LOG.warn(msg, ex); + throw new RuntimeException(msg, ex); + } catch (UnsupportedEncodingException x) { + String msg = "Unable to initialize checksum byte array "; + LOG.warn(msg, x); + throw new RuntimeException(msg, x); + } catch (Exception exception) { + String msg = "Failed to generate checksum for cache key"; + LOG.warn(msg, exception); + throw new RuntimeException(msg, exception); + } + } + + /** + * Converts bytes array to the hex String. + * @param hash array of bytes to be converted in Hex + * + * @return String representation of the checksum + */ + public static String bytesToHex(byte[] hash) { + return DatatypeConverter.printHexBinary(hash) + .toLowerCase(Locale.ENGLISH); + } } diff --git a/fili-core/src/main/java/com/yahoo/bard/webservice/web/util/QuerySignedCacheService.java b/fili-core/src/main/java/com/yahoo/bard/webservice/web/util/QuerySignedCacheService.java index 0cd41c6871..add11ac42d 100644 --- a/fili-core/src/main/java/com/yahoo/bard/webservice/web/util/QuerySignedCacheService.java +++ b/fili-core/src/main/java/com/yahoo/bard/webservice/web/util/QuerySignedCacheService.java @@ -13,6 +13,7 @@ import com.fasterxml.jackson.databind.ObjectWriter; import com.yahoo.bard.webservice.config.SystemConfig; import com.yahoo.bard.webservice.config.SystemConfigProvider; +import com.yahoo.bard.webservice.logging.blocks.BardCacheInfo; import com.yahoo.bard.webservice.util.SimplifiedIntervalList; import com.yahoo.bard.webservice.application.MetricRegistryFactory; import com.yahoo.bard.webservice.data.cache.TupleDataCache; @@ -22,6 +23,7 @@ import com.yahoo.bard.webservice.metadata.QuerySigningService; import com.yahoo.bard.webservice.util.Utils; import com.yahoo.bard.webservice.web.handlers.RequestContext; +import com.yahoo.bard.webservice.web.responseprocessors.CacheV2ResponseProcessor; import com.yahoo.bard.webservice.web.responseprocessors.ResponseProcessor; import com.fasterxml.jackson.databind.JsonNode; @@ -53,6 +55,7 @@ public class QuerySignedCacheService implements CacheService { public static final Meter CACHE_POTENTIAL_HITS = REGISTRY.meter("queries.meter.cache.potential_hits"); public static final Meter CACHE_MISSES = REGISTRY.meter("queries.meter.cache.misses"); public static final Meter CACHE_REQUESTS = REGISTRY.meter("queries.meter.cache.total"); + public static final Meter CACHE_SET_FAILURES = REGISTRY.meter("queries.meter.cache.put.failures"); TupleDataCache dataCache; QuerySigningService querySigningService; @@ -148,6 +151,13 @@ public void writeCache( ); } } catch (Exception e) { + //mark and log the cache put failure + CACHE_SET_FAILURES.mark(1); + RequestLog.record(new BardCacheInfo( + cacheKey.length(), + CacheV2ResponseProcessor.getMD5Cksum(cacheKey), + valueString != null ? valueString.length() : 0 + )); LOG.warn( "Unable to cache {}value of size: {}", valueString == null ? "null " : "", diff --git a/fili-core/src/main/resources/moduleConfig.properties b/fili-core/src/main/resources/moduleConfig.properties index 033f386abf..c63e2f6843 100644 --- a/fili-core/src/main/resources/moduleConfig.properties +++ b/fili-core/src/main/resources/moduleConfig.properties @@ -51,6 +51,7 @@ bard__partial_data_enabled = true # MemCached configuration bard__memcached_servers = localhost:11211 bard__memcached_expiration_seconds = 3600 +bard__wait_for_future_from_memcached = false #Strategy for caching fact responses. NoCache for disabled, Ttl, LocalSignature and ETag for different signature expiry @@ -138,7 +139,7 @@ bard__updated_metadata_collection_names_enabled = false # Statically define the order of LogInfo parts in the RequestLog mega log line. If not set, the order matches the # insertion order of the parts. Each part is represented by the simple name of its java class. This setting does not # change the predefined order of Durations, Threads, Preface at the start and of Epilogue at the end of the log line. -# bard__requestlog_loginfo_order = BardQueryInfo,DataRequest,DimensionRequest,MetricRequest,SliceRequest,TableRequest,FeatureFlagRequest,DruidResponse +# bard__requestlog_loginfo_order = BardQueryInfo,BardCacheInfo,DataRequest,DimensionRequest,MetricRequest,SliceRequest,TableRequest,FeatureFlagRequest,DruidResponse # List of allowed user roles, must provide value if uncommented # bard__user_roles= diff --git a/fili-core/src/test/groovy/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessorSpec.groovy b/fili-core/src/test/groovy/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessorSpec.groovy index 974fb8c152..243196d1f5 100644 --- a/fili-core/src/test/groovy/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessorSpec.groovy +++ b/fili-core/src/test/groovy/com/yahoo/bard/webservice/web/responseprocessors/CacheV2ResponseProcessorSpec.groovy @@ -224,4 +224,16 @@ class CacheV2ResponseProcessorSpec extends Specification { 1 * next.getErrorCallback(groupByQuery) >> hec 1 * next.getFailureCallback(groupByQuery) >> fc } + + def "Cache key cksum is generated as expected"() { + expect: + CacheV2ResponseProcessor.getMD5Cksum(cacheKey) == "8c3c726f9f4cc3dd99d48a717addb033" + } + def "RuntimeException is thrown when null key is passed"() { + when: + CacheV2ResponseProcessor.getMD5Cksum(null) + + then: + thrown RuntimeException + } } diff --git a/fili-core/src/test/resources/testApplicationConfig.properties b/fili-core/src/test/resources/testApplicationConfig.properties index 75c7c4f15a..95d1f0f884 100644 --- a/fili-core/src/test/resources/testApplicationConfig.properties +++ b/fili-core/src/test/resources/testApplicationConfig.properties @@ -91,3 +91,6 @@ bard__default_in_filter_enabled=false # Whether or not to require metrics in queries. Older druid versions require bard__require_metrics_in_query=true + +# Wait for future from memcached client. +bard__wait_for_future_from_memcached = false