From 25dac01fb68aea55d42f1bcf9dee0867d9d0efaa Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Wed, 11 May 2022 09:11:55 +0200 Subject: [PATCH] Add Cache metrics to groupcache (#5352) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add metrics about the hot and main caches[0]. * Number of bytes in each cache. * Number of items in each cache. * Counter of evictions from each cache. [0]: https://pkg.go.dev/github.com/vimeo/galaxycache#CacheStats Signed-off-by: SuperQ Signed-off-by: Giedrius Statkevičius --- CHANGELOG.md | 25 ++++++++++++++++++++++++- pkg/cache/groupcache.go | 31 +++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986328b1f6..c08302626b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,31 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re ## Performance ### Added -======= +- [#5352](https://github.com/thanos-io/thanos/pull/5352) Cache: Add cache metrics to groupcache. + +### Changed + +### Removed + +## [v0.26.0](https://github.com/thanos-io/thanos/tree/release-0.26) - 2022.05.05 + +### Fixed +- [#5281](https://github.com/thanos-io/thanos/pull/5281) Blocks: Use correct separators for filesystem paths and object storage paths respectively. +- [#5300](https://github.com/thanos-io/thanos/pull/5300) Query: Ignore cache on queries with deduplication off. +- [#5324](https://github.com/thanos-io/thanos/pull/5324) Reloader: Force trigger reload when config rollbacked + +### Added + +- [#5220](https://github.com/thanos-io/thanos/pull/5220) Query Frontend: Add `--query-frontend.forward-header` flag, forward headers to downstream querier. +- [#5250](https://github.com/thanos-io/thanos/pull/5250/files) Querier: Expose Query and QueryRange APIs through GRPC. +- [#5290](https://github.com/thanos-io/thanos/pull/5290) Add support for [ppc64le](https://en.wikipedia.org/wiki/Ppc64) + +### Changed + +- [#4838](https://github.com/thanos-io/thanos/pull/4838) Tracing: Chanced client for Stackdriver which deprecated "type: STACKDRIVER" in tracing YAML configuration. Use `type: GOOGLE_CLOUD` instead (`STACKDRIVER` type remains for backward compatibility). +- [#5170](https://github.com/thanos-io/thanos/pull/5170) All: Upgraded the TLS version from TLS1.2 to TLS1.3. +>>>>>>> cf3f5201... Add Cache metrics to groupcache (#5352) - [#5205](https://github.com/thanos-io/thanos/pull/5205) Rule: Add ruler labels as external labels in stateless ruler mode. - [#5206](https://github.com/thanos-io/thanos/pull/5206) Cache: add timeout for groupcache's fetch operation diff --git a/pkg/cache/groupcache.go b/pkg/cache/groupcache.go index 7b312b2968..02276ff856 100644 --- a/pkg/cache/groupcache.go +++ b/pkg/cache/groupcache.go @@ -254,7 +254,7 @@ func NewGroupcacheWithConfig(logger log.Logger, reg prometheus.Registerer, conf }, )) - RegisterCacheStatsCollector(galaxy, reg) + RegisterCacheStatsCollector(galaxy, &conf, reg) return &Groupcache{ logger: logger, @@ -305,8 +305,13 @@ func (c *Groupcache) Name() string { type CacheStatsCollector struct { galaxy *galaxycache.Galaxy + conf *GroupcacheConfig // GalaxyCache Metric descriptions. + bytes *prometheus.Desc + evictions *prometheus.Desc + items *prometheus.Desc + maxBytes *prometheus.Desc gets *prometheus.Desc loads *prometheus.Desc peerLoads *prometheus.Desc @@ -317,7 +322,16 @@ type CacheStatsCollector struct { } // RegisterCacheStatsCollector registers a groupcache metrics collector. -func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Registerer) { +func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, conf *GroupcacheConfig, reg prometheus.Registerer) { + // Cache metrics. + bytes := prometheus.NewDesc("thanos_cache_groupcache_bytes", "The number of bytes in the main cache.", []string{"cache"}, nil) + evictions := prometheus.NewDesc("thanos_cache_groupcache_evictions_total", "The number items evicted from the cache.", []string{"cache"}, nil) + items := prometheus.NewDesc("thanos_cache_groupcache_items", "The number of items in the cache.", []string{"cache"}, nil) + + // Configuration Metrics. + maxBytes := prometheus.NewDesc("thanos_cache_groupcache_max_bytes", "The max number of bytes in the cache.", nil, nil) + + // GroupCache metrics. gets := prometheus.NewDesc("thanos_cache_groupcache_get_requests_total", "Total number of get requests, including from peers.", nil, nil) loads := prometheus.NewDesc("thanos_cache_groupcache_loads_total", "Total number of loads from backend (gets - cacheHits).", nil, nil) peerLoads := prometheus.NewDesc("thanos_cache_groupcache_peer_loads_total", "Total number of loads from peers (remote load or remote cache hit).", nil, nil) @@ -328,6 +342,11 @@ func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Regi collector := &CacheStatsCollector{ galaxy: galaxy, + conf: conf, + bytes: bytes, + evictions: evictions, + items: items, + maxBytes: maxBytes, gets: gets, loads: loads, peerLoads: peerLoads, @@ -340,6 +359,14 @@ func RegisterCacheStatsCollector(galaxy *galaxycache.Galaxy, reg prometheus.Regi } func (s *CacheStatsCollector) Collect(ch chan<- prometheus.Metric) { + for _, cache := range []galaxycache.CacheType{galaxycache.MainCache, galaxycache.HotCache} { + cacheStats := s.galaxy.CacheStats(cache) + ch <- prometheus.MustNewConstMetric(s.bytes, prometheus.GaugeValue, float64(cacheStats.Bytes), cache.String()) + ch <- prometheus.MustNewConstMetric(s.evictions, prometheus.GaugeValue, float64(cacheStats.Evictions), cache.String()) + ch <- prometheus.MustNewConstMetric(s.items, prometheus.GaugeValue, float64(cacheStats.Items), cache.String()) + } + + ch <- prometheus.MustNewConstMetric(s.maxBytes, prometheus.GaugeValue, float64(s.conf.MaxSize)) ch <- prometheus.MustNewConstMetric(s.gets, prometheus.CounterValue, float64(s.galaxy.Stats.Gets.Get())) ch <- prometheus.MustNewConstMetric(s.loads, prometheus.CounterValue, float64(s.galaxy.Stats.Loads.Get())) ch <- prometheus.MustNewConstMetric(s.peerLoads, prometheus.CounterValue, float64(s.galaxy.Stats.PeerLoads.Get()))