diff --git a/README.md b/README.md index 0fa40f23..ec610edc 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ This repo contains the source code of Cherami server, cross-zone replicator serv Getting started --------------- - To get cherami-server: ``` @@ -15,16 +14,22 @@ git clone git@github.com:uber/cherami-server.git $GOPATH/src/github.com/uber/che Build ----- -We use [`glide`](https://glide.sh) to manage Go dependencies. Additionally, we need a Cassandra running locally in order to run the integration tests. Please make sure `glide` and `cqlsh` are in your PATH, and `cqlsh` can connect to the local Cassandra server. +We use [`glide`](https://glide.sh) to manage Go dependencies. Please make sure `glide` is in your PATH before you attempt to build. -* Build the `cherami-server` and other binaries: +* Build the `cherami-server` and other binaries (will not run test): ``` make bins ``` +Local Test +---------- +We need a Cassandra running locally in order to run the integration tests. Please make sure `cqlsh` is in `/usr/local/bin`, and it can connect to the local Cassandra server. +``` +make test +``` + Run Cherami locally ------------------- - * Setup the cherami keyspace for metadata: ``` ./scripts/cherami-setup-schema @@ -32,12 +37,14 @@ Run Cherami locally * The service can be started as follows: ``` -CHERAMI_ENVIRONMENT=laptop CHERAMI_CONFIG_DIR=`pwd`/config CHERAMI_STORE=/tmp/store ./cherami-server start all +CHERAMI_ENVIRONMENT=local ./cherami-server start all ``` +Note: `cherami-server` is configured via `config/base.yaml` with some parameters overriden by `config/local.yaml`. + One can use the CLI to verify if Cherami is running properly: ``` -./cherami-cli --hostport=:4922 create destination /test/cherami +./cherami-cli --env=prod --hostport=:4922 create destination /test/cherami ``` Deploy Cherami as a cluster diff --git a/clients/metadata/metadata_cassandra.go b/clients/metadata/metadata_cassandra.go index d664cdad..f38747d8 100644 --- a/clients/metadata/metadata_cassandra.go +++ b/clients/metadata/metadata_cassandra.go @@ -102,6 +102,7 @@ const ( columnHostName = "hostname" columnInputHostUUID = "input_host_uuid" columnOriginZone = "origin_zone" + columnRemoteExtentPrimaryStore = "remote_extent_primary_store" columnLastAddress = "last_address" columnLastEnqueueTime = "last_enqueue_time" columnLastSequence = "last_sequence" @@ -1926,6 +1927,7 @@ const ( columnStoreUUIDS + `: ?, ` + columnInputHostUUID + `: ?, ` + columnOriginZone + `: ?, ` + + columnRemoteExtentPrimaryStore + `: ?, ` + columnStatus + `: ?}` sqlInsertDstExent = `INSERT INTO ` + tableDestinationExtents + ` (` + @@ -1970,6 +1972,7 @@ const ( columnStoreUUIDS + `: ?, ` + columnInputHostUUID + `: ?, ` + columnOriginZone + `: ?, ` + + columnRemoteExtentPrimaryStore + `: ?, ` + columnStatus + `: ?, ` + columnArchivalLocation + `: ?` + `}` @@ -2084,6 +2087,7 @@ func (s *CassandraMetadataService) createExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), extentStatus, replicaStatsList, consumerGroupVisibility, @@ -2101,6 +2105,7 @@ func (s *CassandraMetadataService) createExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), extentStatus, replicaStatsList, ) @@ -2122,6 +2127,7 @@ func (s *CassandraMetadataService) createExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), extentStatus, replicaStats, ) @@ -2190,6 +2196,7 @@ func (s *CassandraMetadataService) deleteExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), shared.ExtentStatus_DELETED, extentStatsMap, extentStats.ConsumerGroupVisibility, @@ -2208,6 +2215,7 @@ func (s *CassandraMetadataService) deleteExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), shared.ExtentStatus_DELETED, extentStatsMap, defaultDeleteTTLSeconds, @@ -2227,6 +2235,7 @@ func (s *CassandraMetadataService) deleteExtentImpl(extent *shared.Extent, exten extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), shared.ExtentStatus_DELETED, replicaStats, defaultDeleteTTLSeconds, @@ -2252,6 +2261,7 @@ func (s *CassandraMetadataService) updateExtent(extentStats *shared.ExtentStats, extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), newStatus, newArchivalLocation, extent.GetDestinationUUID(), @@ -2266,6 +2276,7 @@ func (s *CassandraMetadataService) updateExtent(extentStats *shared.ExtentStats, extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), newStatus, newArchivalLocation, extent.GetDestinationUUID(), @@ -2282,6 +2293,7 @@ func (s *CassandraMetadataService) updateExtent(extentStats *shared.ExtentStats, extent.GetStoreUUIDs(), extent.GetInputHostUUID(), extent.GetOriginZone(), + extent.GetRemoteExtentPrimaryStore(), newStatus, newArchivalLocation, storeID, @@ -2316,6 +2328,10 @@ func (s *CassandraMetadataService) UpdateExtentStats(ctx thrift.Context, request } extent := extentStatsResult.ExtentStats.Extent + if request.IsSetRemoteExtentPrimaryStore() { + extent.RemoteExtentPrimaryStore = common.StringPtr(request.GetRemoteExtentPrimaryStore()) + } + if len(request.GetArchivalLocation()) == 0 { request.ArchivalLocation = common.StringPtr(extentStatsResult.ExtentStats.GetArchivalLocation()) } @@ -2442,11 +2458,12 @@ func uuidSliceToStringSlice(i interface{}) []string { func convertExtentStats(extentMap map[string]interface{}, extentStatsMap map[string]map[string]interface{}) *shared.ExtentStats { result := &shared.ExtentStats{ Extent: &shared.Extent{ - ExtentUUID: common.StringPtr(toUUIDString(extentMap[columnUUID])), - DestinationUUID: common.StringPtr(toUUIDString(extentMap[columnDestinationUUID])), - StoreUUIDs: uuidSliceToStringSlice(extentMap[columnStoreUUIDS]), - InputHostUUID: common.StringPtr(toUUIDString(extentMap[columnInputHostUUID])), - OriginZone: common.StringPtr(toString(extentMap[columnOriginZone])), + ExtentUUID: common.StringPtr(toUUIDString(extentMap[columnUUID])), + DestinationUUID: common.StringPtr(toUUIDString(extentMap[columnDestinationUUID])), + StoreUUIDs: uuidSliceToStringSlice(extentMap[columnStoreUUIDS]), + InputHostUUID: common.StringPtr(toUUIDString(extentMap[columnInputHostUUID])), + OriginZone: common.StringPtr(toString(extentMap[columnOriginZone])), + RemoteExtentPrimaryStore: common.StringPtr(toString(extentMap[columnRemoteExtentPrimaryStore])), }, Status: common.MetadataExtentStatusPtr(shared.ExtentStatus(toInt(extentMap[columnStatus]))), ArchivalLocation: common.StringPtr(toString(extentMap[columnArchivalLocation])), diff --git a/clients/metadata/metadata_cassandra_test.go b/clients/metadata/metadata_cassandra_test.go index f2d01858..50367240 100644 --- a/clients/metadata/metadata_cassandra_test.go +++ b/clients/metadata/metadata_cassandra_test.go @@ -563,10 +563,11 @@ func (s *CassandraSuite) TestExtentCRU() { extentUUID := uuid.New() storeIds := []string{uuid.New(), uuid.New(), uuid.New()} extent := &shared.Extent{ - ExtentUUID: common.StringPtr(extentUUID), - DestinationUUID: common.StringPtr(dest.GetDestinationUUID()), - StoreUUIDs: storeIds, - InputHostUUID: common.StringPtr(uuid.New()), + ExtentUUID: common.StringPtr(extentUUID), + DestinationUUID: common.StringPtr(dest.GetDestinationUUID()), + StoreUUIDs: storeIds, + InputHostUUID: common.StringPtr(uuid.New()), + RemoteExtentPrimaryStore: common.StringPtr(uuid.New()), } createExtent := &shared.CreateExtentRequest{Extent: extent} t0 := time.Now().UnixNano() / int64(time.Millisecond) @@ -622,10 +623,11 @@ func (s *CassandraSuite) TestExtentCRU() { s.Equal(extentStatsOrig.GetStatusUpdatedTimeMillis(), extentStats.GetExtentStats().GetStatusUpdatedTimeMillis()) updateExtent := &m.UpdateExtentStatsRequest{ - DestinationUUID: common.StringPtr(extent.GetDestinationUUID()), - ExtentUUID: common.StringPtr(extent.GetExtentUUID()), - Status: common.MetadataExtentStatusPtr(shared.ExtentStatus_ARCHIVED), - ArchivalLocation: common.StringPtr("S3:foo/bar"), + DestinationUUID: common.StringPtr(extent.GetDestinationUUID()), + ExtentUUID: common.StringPtr(extent.GetExtentUUID()), + Status: common.MetadataExtentStatusPtr(shared.ExtentStatus_ARCHIVED), + ArchivalLocation: common.StringPtr("S3:foo/bar"), + RemoteExtentPrimaryStore: common.StringPtr(uuid.New()), } t0 := time.Now().UnixNano() / int64(time.Millisecond) @@ -634,14 +636,17 @@ func (s *CassandraSuite) TestExtentCRU() { s.Nil(err) s.Equal(updateExtent.Status, updateResult.ExtentStats.Status) - s.Equal(updateExtent.ArchivalLocation, updateResult.ExtentStats.ArchivalLocation) + s.Equal(updateExtent.GetArchivalLocation(), updateResult.GetExtentStats().GetArchivalLocation()) + s.Equal(updateExtent.GetRemoteExtentPrimaryStore(), updateResult.GetExtentStats().GetExtent().GetRemoteExtentPrimaryStore()) readExtentStats = &m.ReadExtentStatsRequest{DestinationUUID: extent.DestinationUUID, ExtentUUID: extent.ExtentUUID} time.Sleep(1 * time.Second) extentStats, err = s.client.ReadExtentStats(nil, readExtentStats) s.Nil(err) s.NotNil(extentStats) - s.Equal(shared.ExtentStatus_ARCHIVED, extentStats.ExtentStats.GetStatus()) + s.Equal(shared.ExtentStatus_ARCHIVED, extentStats.GetExtentStats().GetStatus()) + s.Equal(updateExtent.GetArchivalLocation(), extentStats.GetExtentStats().GetArchivalLocation()) + s.Equal(updateExtent.GetRemoteExtentPrimaryStore(), extentStats.GetExtentStats().GetExtent().GetRemoteExtentPrimaryStore()) s.True(extentStats.GetExtentStats().GetStatusUpdatedTimeMillis() >= t0) s.True(extentStats.GetExtentStats().GetStatusUpdatedTimeMillis() <= tX) } diff --git a/clients/metadata/schema/metadata_test.cql b/clients/metadata/schema/metadata.cql similarity index 99% rename from clients/metadata/schema/metadata_test.cql rename to clients/metadata/schema/metadata.cql index 76e61e1b..811fada9 100644 --- a/clients/metadata/schema/metadata_test.cql +++ b/clients/metadata/schema/metadata.cql @@ -115,7 +115,8 @@ CREATE TYPE extent ( input_host_uuid uuid, status int, -- ExtentStatus enum archival_location text, - origin_zone text + origin_zone text, + remote_extent_primary_store text, // the primary store for remote extent ); CREATE TYPE extent_replica_stats ( diff --git a/clients/metadata/schema/metadata_keyspace_prod.cql b/clients/metadata/schema/metadata_keyspace.cql similarity index 100% rename from clients/metadata/schema/metadata_keyspace_prod.cql rename to clients/metadata/schema/metadata_keyspace.cql diff --git a/clients/metadata/schema/metadata_keyspace_test.cql b/clients/metadata/schema/metadata_keyspace_test.cql deleted file mode 100644 index a583bca7..00000000 --- a/clients/metadata/schema/metadata_keyspace_test.cql +++ /dev/null @@ -1,21 +0,0 @@ --- Copyright (c) 2016 Uber Technologies, Inc. - --- Permission is hereby granted, free of charge, to any person obtaining a copy --- of this software and associated documentation files (the "Software"), to deal --- in the Software without restriction, including without limitation the rights --- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell --- copies of the Software, and to permit persons to whom the Software is --- furnished to do so, subject to the following conditions: - --- The above copyright notice and this permission notice shall be included in --- all copies or substantial portions of the Software. - --- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR --- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, --- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE --- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER --- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, --- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN --- THE SOFTWARE. - -CREATE KEYSPACE IF NOT EXISTS cherami WITH replication = {'class': 'SimpleStrategy', 'replication_factor': '1'}; diff --git a/clients/metadata/schema/v13/201612180000_xdc_add_remote_extent_primary_store.cql b/clients/metadata/schema/v13/201612180000_xdc_add_remote_extent_primary_store.cql new file mode 100644 index 00000000..0dcfb4ad --- /dev/null +++ b/clients/metadata/schema/v13/201612180000_xdc_add_remote_extent_primary_store.cql @@ -0,0 +1 @@ +ALTER TYPE extent ADD remote_extent_primary_store text; diff --git a/clients/metadata/schema/v13/manifest.json b/clients/metadata/schema/v13/manifest.json new file mode 100644 index 00000000..784aca82 --- /dev/null +++ b/clients/metadata/schema/v13/manifest.json @@ -0,0 +1,8 @@ +{ + "CurrVersion": 13, + "MinCompatibleVersion": 8, + "Description": "add remote_extent_primary_store to extent type", + "SchemaUpdateCqlFiles": [ + "201612180000_xdc_add_remote_extent_primary_store.cql" + ] +} diff --git a/clients/metadata/util.go b/clients/metadata/util.go index 7abb0652..0955b5d4 100644 --- a/clients/metadata/util.go +++ b/clients/metadata/util.go @@ -144,7 +144,7 @@ func (s *TestCluster) SetupTestCluster() { ip := `127.0.0.1` s.createCluster(ip, gocql.Consistency(1), generateRandomKeyspace(10)) s.createKeyspace(1) - s.loadSchema("schema/metadata_test.cql") + s.loadSchema("schema/metadata.cql") var err error s.client, err = NewCassandraMetadataService(&configure.MetadataConfig{ diff --git a/cmd/tools/cli/main.go b/cmd/tools/cli/main.go index 06b8ee35..48e8d5d8 100644 --- a/cmd/tools/cli/main.go +++ b/cmd/tools/cli/main.go @@ -53,7 +53,7 @@ func main() { }) app.Name = "cherami" app.Usage = "A command-line tool for cherami users" - app.Version = "1.1.4" + app.Version = "1.1.5" app.Flags = []cli.Flag{ cli.BoolTFlag{ Name: "hyperbahn", @@ -66,8 +66,8 @@ func main() { }, cli.StringFlag{ Name: "env", - Value: "", - Usage: "Deployment to connect. By default connects to production. Use \"staging\" to connect to staging", + Value: "staging", + Usage: "Deployment to connect to. By default connects to staging. Use \"prod\" to connect to production", }, cli.StringFlag{ Name: "hyperbahn_bootstrap_file, hbfile", diff --git a/common/metadata/metaMetrics.go b/common/metadata/metaMetrics.go new file mode 100644 index 00000000..70ff2b39 --- /dev/null +++ b/common/metadata/metaMetrics.go @@ -0,0 +1,752 @@ +// Copyright (c) 2016 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package metadata + +import ( + "github.com/uber/cherami-server/common/metrics" + m "github.com/uber/cherami-thrift/.generated/go/metadata" + "github.com/uber/cherami-thrift/.generated/go/shared" + + "github.com/uber-common/bark" + "github.com/uber/tchannel-go/thrift" +) + +// metadataMetricsMgr Implements TChanMetadataServiceClient interface +type metadataMetricsMgr struct { + meta m.TChanMetadataService + m3 metrics.Client + log bark.Logger +} + +// NewMetadataMetricsMgr creates an instance of metadataMetricsMgr that collects/emits metrics +func NewMetadataMetricsMgr(metaClient m.TChanMetadataService, m3Client metrics.Client, logger bark.Logger) m.TChanMetadataService { + + return &metadataMetricsMgr{ + meta: metaClient, + m3: m3Client, + log: logger, + } +} + +func (m *metadataMetricsMgr) ListEntityOps(ctx thrift.Context, request *m.ListEntityOpsRequest) (result *m.ListEntityOpsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListEntityOpsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListEntityOpsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListEntityOps(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListEntityOpsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) HostAddrToUUID(ctx thrift.Context, request string) (result string, err error) { + + m.m3.IncCounter(metrics.MetadataHostAddrToUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataHostAddrToUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.HostAddrToUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataHostAddrToUUIDScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListAllConsumerGroups(ctx thrift.Context, request *m.ListConsumerGroupRequest) (result *m.ListConsumerGroupResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListAllConsumerGroupsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListAllConsumerGroupsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListAllConsumerGroups(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListAllConsumerGroupsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListConsumerGroups(ctx thrift.Context, request *m.ListConsumerGroupRequest) (result *m.ListConsumerGroupResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListConsumerGroupsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListConsumerGroups(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListDestinations(ctx thrift.Context, request *shared.ListDestinationsRequest) (result *shared.ListDestinationsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListDestinationsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListDestinationsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListDestinations(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListDestinationsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListDestinationsByUUID(ctx thrift.Context, request *shared.ListDestinationsByUUIDRequest) (result *shared.ListDestinationsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListDestinationsByUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListDestinationsByUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListDestinationsByUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListDestinationsByUUIDScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListExtentsStats(ctx thrift.Context, request *shared.ListExtentsStatsRequest) (result *shared.ListExtentsStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListExtentsStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListExtentsStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListExtentsStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListExtentsStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListHosts(ctx thrift.Context, request *m.ListHostsRequest) (result *m.ListHostsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListHostsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListHostsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListHosts(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListHostsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListInputHostExtentsStats(ctx thrift.Context, request *m.ListInputHostExtentsStatsRequest) (result *m.ListInputHostExtentsStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListInputHostExtentsStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ListStoreExtentsStats(ctx thrift.Context, request *m.ListStoreExtentsStatsRequest) (result *m.ListStoreExtentsStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ListStoreExtentsStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadConsumerGroup(ctx thrift.Context, request *m.ReadConsumerGroupRequest) (result *shared.ConsumerGroupDescription, err error) { + + m.m3.IncCounter(metrics.MetadataReadConsumerGroupScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadConsumerGroupScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadConsumerGroup(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadConsumerGroupScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadConsumerGroupByUUID(ctx thrift.Context, request *m.ReadConsumerGroupRequest) (result *shared.ConsumerGroupDescription, err error) { + + m.m3.IncCounter(metrics.MetadataReadConsumerGroupByUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadConsumerGroupByUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadConsumerGroupByUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadConsumerGroupByUUIDScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadConsumerGroupExtent(ctx thrift.Context, request *m.ReadConsumerGroupExtentRequest) (result *m.ReadConsumerGroupExtentResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadConsumerGroupExtent(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadConsumerGroupExtents(ctx thrift.Context, request *m.ReadConsumerGroupExtentsRequest) (result *m.ReadConsumerGroupExtentsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadConsumerGroupExtentsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadConsumerGroupExtents(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadConsumerGroupExtentsByExtUUID(ctx thrift.Context, request *m.ReadConsumerGroupExtentsByExtUUIDRequest) (result *m.ReadConsumerGroupExtentsByExtUUIDResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentsByExtUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadConsumerGroupExtentsByExtUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadConsumerGroupExtentsByExtUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadConsumerGroupExtentsByExtUUIDScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadDestination(ctx thrift.Context, request *m.ReadDestinationRequest) (result *shared.DestinationDescription, err error) { + + m.m3.IncCounter(metrics.MetadataReadDestinationScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadDestinationScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadDestination(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadDestinationScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadExtentStats(ctx thrift.Context, request *m.ReadExtentStatsRequest) (result *m.ReadExtentStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadExtentStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadExtentStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadExtentStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadExtentStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UUIDToHostAddr(ctx thrift.Context, request string) (result string, err error) { + + m.m3.IncCounter(metrics.MetadataUUIDToHostAddrScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUUIDToHostAddrScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.UUIDToHostAddr(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUUIDToHostAddrScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UpdateServiceConfig(ctx thrift.Context, request *m.UpdateServiceConfigRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataUpdateServiceConfigScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateServiceConfigScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.UpdateServiceConfig(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateServiceConfigScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) CreateConsumerGroup(ctx thrift.Context, request *shared.CreateConsumerGroupRequest) (result *shared.ConsumerGroupDescription, err error) { + + m.m3.IncCounter(metrics.MetadataCreateConsumerGroupScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateConsumerGroupScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.CreateConsumerGroup(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateConsumerGroupScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) CreateConsumerGroupExtent(ctx thrift.Context, request *m.CreateConsumerGroupExtentRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.CreateConsumerGroupExtent(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) CreateDestination(ctx thrift.Context, request *shared.CreateDestinationRequest) (result *shared.DestinationDescription, err error) { + + m.m3.IncCounter(metrics.MetadataCreateDestinationScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateDestinationScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.CreateDestination(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateDestinationScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) CreateDestinationUUID(ctx thrift.Context, request *shared.CreateDestinationUUIDRequest) (result *shared.DestinationDescription, err error) { + + m.m3.IncCounter(metrics.MetadataCreateDestinationUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateDestinationUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.CreateDestinationUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateDestinationUUIDScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) CreateExtent(ctx thrift.Context, request *shared.CreateExtentRequest) (result *shared.CreateExtentResult_, err error) { + + m.m3.IncCounter(metrics.MetadataCreateExtentScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateExtentScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.CreateExtent(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateExtentScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) CreateHostInfo(ctx thrift.Context, request *m.CreateHostInfoRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataCreateHostInfoScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateHostInfoScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.CreateHostInfo(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateHostInfoScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) CreateServiceConfig(ctx thrift.Context, request *m.CreateServiceConfigRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataCreateServiceConfigScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataCreateServiceConfigScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.CreateServiceConfig(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataCreateServiceConfigScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) DeleteConsumerGroup(ctx thrift.Context, request *shared.DeleteConsumerGroupRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataDeleteConsumerGroupScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataDeleteConsumerGroupScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.DeleteConsumerGroup(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataDeleteConsumerGroupScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) DeleteDestination(ctx thrift.Context, request *shared.DeleteDestinationRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataDeleteDestinationScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataDeleteDestinationScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.DeleteDestination(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataDeleteDestinationScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) DeleteDestinationUUID(ctx thrift.Context, request *m.DeleteDestinationUUIDRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataDeleteDestinationUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataDeleteDestinationUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.DeleteDestinationUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataDeleteDestinationUUIDScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) DeleteHostInfo(ctx thrift.Context, request *m.DeleteHostInfoRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataDeleteHostInfoScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataDeleteHostInfoScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.DeleteHostInfo(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataDeleteHostInfoScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) DeleteServiceConfig(ctx thrift.Context, request *m.DeleteServiceConfigRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataDeleteServiceConfigScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataDeleteServiceConfigScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.DeleteServiceConfig(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataDeleteServiceConfigScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) MoveExtent(ctx thrift.Context, request *m.MoveExtentRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataMoveExtentScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataMoveExtentScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.MoveExtent(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataMoveExtentScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) ReadHostInfo(ctx thrift.Context, request *m.ReadHostInfoRequest) (result *m.ReadHostInfoResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadHostInfoScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadHostInfoScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadHostInfo(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadHostInfoScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadServiceConfig(ctx thrift.Context, request *m.ReadServiceConfigRequest) (result *m.ReadServiceConfigResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadServiceConfigScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadServiceConfigScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadServiceConfig(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadServiceConfigScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) ReadStoreExtentReplicaStats(ctx thrift.Context, request *m.ReadStoreExtentReplicaStatsRequest) (result *m.ReadStoreExtentReplicaStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.ReadStoreExtentReplicaStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) RegisterHostUUID(ctx thrift.Context, request *m.RegisterHostUUIDRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataRegisterHostUUIDScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataRegisterHostUUIDScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.RegisterHostUUID(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataRegisterHostUUIDScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) SealExtent(ctx thrift.Context, request *m.SealExtentRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataSealExtentScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataSealExtentScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.SealExtent(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataSealExtentScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) SetAckOffset(ctx thrift.Context, request *m.SetAckOffsetRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataSetAckOffsetScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataSetAckOffsetScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.SetAckOffset(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataSetAckOffsetScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) SetOutputHost(ctx thrift.Context, request *m.SetOutputHostRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataSetOutputHostScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataSetOutputHostScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.SetOutputHost(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataSetOutputHostScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) UpdateConsumerGroup(ctx thrift.Context, request *shared.UpdateConsumerGroupRequest) (result *shared.ConsumerGroupDescription, err error) { + + m.m3.IncCounter(metrics.MetadataUpdateConsumerGroupScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateConsumerGroupScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.UpdateConsumerGroup(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateConsumerGroupScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UpdateConsumerGroupExtentStatus(ctx thrift.Context, request *m.UpdateConsumerGroupExtentStatusRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataUpdateConsumerGroupExtentStatusScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateConsumerGroupExtentStatusScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.UpdateConsumerGroupExtentStatus(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateConsumerGroupExtentStatusScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) UpdateDestination(ctx thrift.Context, request *shared.UpdateDestinationRequest) (result *shared.DestinationDescription, err error) { + + m.m3.IncCounter(metrics.MetadataUpdateDestinationScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateDestinationScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.UpdateDestination(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateDestinationScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UpdateDestinationDLQCursors(ctx thrift.Context, request *m.UpdateDestinationDLQCursorsRequest) (result *shared.DestinationDescription, err error) { + + m.m3.IncCounter(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.UpdateDestinationDLQCursors(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UpdateExtentReplicaStats(ctx thrift.Context, request *m.UpdateExtentReplicaStatsRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataUpdateExtentReplicaStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateExtentReplicaStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.UpdateExtentReplicaStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateExtentReplicaStatsScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) UpdateExtentStats(ctx thrift.Context, request *m.UpdateExtentStatsRequest) (result *m.UpdateExtentStatsResult_, err error) { + + m.m3.IncCounter(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + result, err = m.meta.UpdateExtentStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataFailures) + } + + return result, err +} + +func (m *metadataMetricsMgr) UpdateHostInfo(ctx thrift.Context, request *m.UpdateHostInfoRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataUpdateHostInfoScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateHostInfoScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.UpdateHostInfo(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateHostInfoScope, metrics.MetadataFailures) + } + + return err +} + +func (m *metadataMetricsMgr) UpdateStoreExtentReplicaStats(ctx thrift.Context, request *m.UpdateStoreExtentReplicaStatsRequest) (err error) { + + m.m3.IncCounter(metrics.MetadataUpdateStoreExtentReplicaStatsScope, metrics.MetadataRequests) + sw := m.m3.StartTimer(metrics.MetadataUpdateStoreExtentReplicaStatsScope, metrics.MetadataLatency) + defer sw.Stop() + + err = m.meta.UpdateStoreExtentReplicaStats(ctx, request) + + if err != nil { + m.m3.IncCounter(metrics.MetadataUpdateStoreExtentReplicaStatsScope, metrics.MetadataFailures) + } + + return err +} diff --git a/common/metadataMgr.go b/common/metadataMgr.go index 5206e8e6..1cd21672 100644 --- a/common/metadataMgr.go +++ b/common/metadataMgr.go @@ -57,10 +57,14 @@ type ( ListExtentsByInputIDStatus(inputID string, status *shared.ExtentStatus) ([]*shared.ExtentStats, error) // ListExtentsByStoreIDStatus lists extents by storeID/Status ListExtentsByStoreIDStatus(storeID string, status *shared.ExtentStatus) ([]*shared.ExtentStats, error) + // ListExtentsByReplicationStatus lists extents by storeID/ReplicationStatus + ListExtentsByReplicationStatus(storeID string, status *shared.ExtentReplicaReplicationStatus) ([]*shared.ExtentStats, error) // ListExtentsByConsumerGroup lists all extents for the given destination / consumer group ListExtentsByConsumerGroup(dstID string, cgID string, filterByStatus []m.ConsumerGroupExtentStatus) ([]*m.ConsumerGroupExtent, error) // CreateExtent creates a new extent for the given destination and marks the status as OPEN - CreateExtent(dstID string, extentID string, inhostID string, storeIDs []string, originZone string) (*shared.CreateExtentResult_, error) + CreateExtent(dstID string, extentID string, inhostID string, storeIDs []string) (*shared.CreateExtentResult_, error) + // CreateRemoteZoneExtent creates a new remote zone extent for the given destination and marks the status as OPEN + CreateRemoteZoneExtent(dstID string, extentID string, inhostID string, storeIDs []string, originZone string, remoteExtentPrimaryStore string) (*shared.CreateExtentResult_, error) // AddExtentToConsumerGroup adds an open extent to consumer group for consumption AddExtentToConsumerGroup(dstID string, cgID string, extentID string, outHostID string, storeIDs []string) error // ListConsumerGroupsByDstID lists all consumer groups for a given destination uuid @@ -83,6 +87,8 @@ type ( ReadConsumerGroupByUUID(cgUUID string) (*shared.ConsumerGroupDescription, error) // UpdateExtentStatus updates the status of an extent UpdateExtentStatus(dstID, extID string, status shared.ExtentStatus) error + // UpdateRemoteExtentPrimaryStore updates remoteExtentPrimaryStore + UpdateRemoteExtentPrimaryStore(dstID string, extentID string, remoteExtentPrimaryStore string) (*m.UpdateExtentStatsResult_, error) // UpdateConsumerGroupExtentStatus updates the status of a consumer group extent UpdateConsumerGroupExtentStatus(cgID, extID string, status m.ConsumerGroupExtentStatus) error // DeleteDestination marks a destination to be deleted @@ -140,14 +146,14 @@ func (mm *metadataMgrImpl) ListDestinations() ([]*shared.DestinationDescription, func (mm *metadataMgrImpl) ListDestinationsPage(mReq *shared.ListDestinationsRequest) (*shared.ListDestinationsResult_, error) { - mm.m3Client.IncCounter(metrics.MetadataListDestinationsPageScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListDestinationsScope, metrics.MetadataRequests) - sw := mm.m3Client.StartTimer(metrics.MetadataListDestinationsPageScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataListDestinationsScope, metrics.MetadataLatency) defer sw.Stop() resp, err := mm.mClient.ListDestinations(nil, mReq) if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListDestinationsPageScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListDestinationsScope, metrics.MetadataFailures) return nil, err } @@ -156,7 +162,7 @@ func (mm *metadataMgrImpl) ListDestinationsPage(mReq *shared.ListDestinationsReq func (mm *metadataMgrImpl) ReadDestination(dstID string, dstPath string) (*shared.DestinationDescription, error) { - mm.m3Client.IncCounter(metrics.MetadataReadDstScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataReadDestinationScope, metrics.MetadataRequests) mReq := &m.ReadDestinationRequest{} @@ -168,13 +174,13 @@ func (mm *metadataMgrImpl) ReadDestination(dstID string, dstPath string) (*share mReq.Path = StringPtr(dstPath) } - sw := mm.m3Client.StartTimer(metrics.MetadataReadDstScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataReadDestinationScope, metrics.MetadataLatency) desc, err := mm.mClient.ReadDestination(nil, mReq) sw.Stop() if err != nil { if _, ok := err.(*shared.EntityNotExistsError); !ok { - mm.m3Client.IncCounter(metrics.MetadataReadDstScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataReadDestinationScope, metrics.MetadataFailures) } } @@ -208,7 +214,7 @@ func (mm *metadataMgrImpl) ListDestinationsByUUID() ([]*shared.DestinationDescri func (mm *metadataMgrImpl) ListExtentsByDstIDStatus(dstID string, filterByStatus []shared.ExtentStatus) ([]*shared.ExtentStats, error) { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByDstScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListExtentsStatsScope, metrics.MetadataRequests) listReq := &shared.ListExtentsStatsRequest{ DestinationUUID: StringPtr(dstID), @@ -233,14 +239,14 @@ func (mm *metadataMgrImpl) ListExtentsByDstIDStatus(dstID string, filterByStatus }).Info("listExtentsByDstID high latency") } - mm.m3Client.RecordTimer(metrics.MetadataListExtentsByDstScope, metrics.MetadataLatency, elapsed) + mm.m3Client.RecordTimer(metrics.MetadataListExtentsStatsScope, metrics.MetadataLatency, elapsed) }() var result []*shared.ExtentStats for { listResp, err := mm.mClient.ListExtentsStats(nil, listReq) if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByDstScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListExtentsStatsScope, metrics.MetadataFailures) return nil, err } @@ -270,18 +276,18 @@ func (mm *metadataMgrImpl) ListExtentsByDstIDStatus(dstID string, filterByStatus func (mm *metadataMgrImpl) ListExtentsByInputIDStatus(inputID string, status *shared.ExtentStatus) ([]*shared.ExtentStats, error) { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByInputScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataRequests) listReq := &m.ListInputHostExtentsStatsRequest{ InputHostUUID: StringPtr(inputID), Status: status, } - sw := mm.m3Client.StartTimer(metrics.MetadataListExtentsByInputScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataLatency) resp, err := mm.mClient.ListInputHostExtentsStats(nil, listReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByInputScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListInputHostExtentsStatsScope, metrics.MetadataFailures) return nil, err } @@ -290,18 +296,38 @@ func (mm *metadataMgrImpl) ListExtentsByInputIDStatus(inputID string, status *sh func (mm *metadataMgrImpl) ListExtentsByStoreIDStatus(storeID string, status *shared.ExtentStatus) ([]*shared.ExtentStats, error) { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByStoreScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataRequests) listReq := &m.ListStoreExtentsStatsRequest{ StoreUUID: StringPtr(storeID), Status: status, } - sw := mm.m3Client.StartTimer(metrics.MetadataListExtentsByStoreScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataLatency) resp, err := mm.mClient.ListStoreExtentsStats(nil, listReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByStoreScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataFailures) + return nil, err + } + + return resp.GetExtentStatsList(), nil +} + +func (mm *metadataMgrImpl) ListExtentsByReplicationStatus(storeID string, status *shared.ExtentReplicaReplicationStatus) ([]*shared.ExtentStats, error) { + + mm.m3Client.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataRequests) + + listReq := &m.ListStoreExtentsStatsRequest{ + StoreUUID: StringPtr(storeID), + ReplicationStatus: status, + } + + sw := mm.m3Client.StartTimer(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataLatency) + resp, err := mm.mClient.ListStoreExtentsStats(nil, listReq) + sw.Stop() + if err != nil { + mm.m3Client.IncCounter(metrics.MetadataListStoreExtentsStatsScope, metrics.MetadataFailures) return nil, err } @@ -310,7 +336,7 @@ func (mm *metadataMgrImpl) ListExtentsByStoreIDStatus(storeID string, status *sh func (mm *metadataMgrImpl) ListExtentsByConsumerGroup(dstID string, cgID string, filterByStatus []m.ConsumerGroupExtentStatus) ([]*m.ConsumerGroupExtent, error) { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByConsGroupScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataRequests) mReq := &m.ReadConsumerGroupExtentsRequest{ DestinationUUID: StringPtr(dstID), @@ -326,7 +352,7 @@ func (mm *metadataMgrImpl) ListExtentsByConsumerGroup(dstID string, cgID string, startTime := time.Now() defer func() { elapsed := time.Since(startTime) - mm.m3Client.RecordTimer(metrics.MetadataListExtentsByConsGroupScope, metrics.MetadataLatency, elapsed) + mm.m3Client.RecordTimer(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataLatency, elapsed) if elapsed >= time.Second { mm.logger.WithFields(bark.Fields{ TagDst: dstID, @@ -341,7 +367,7 @@ func (mm *metadataMgrImpl) ListExtentsByConsumerGroup(dstID string, cgID string, for { mResp, err := mm.mClient.ReadConsumerGroupExtents(nil, mReq) if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListExtentsByConsGroupScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataReadConsumerGroupExtentScope, metrics.MetadataFailures) return nil, err } @@ -370,16 +396,26 @@ func (mm *metadataMgrImpl) ListExtentsByConsumerGroup(dstID string, cgID string, } // CreateExtent creates a new extent for the given destination and marks the status as OPEN -func (mm *metadataMgrImpl) CreateExtent(dstID string, extentID string, inhostID string, storeIDs []string, originZone string) (*shared.CreateExtentResult_, error) { +func (mm *metadataMgrImpl) CreateExtent(dstID string, extentID string, inhostID string, storeIDs []string) (*shared.CreateExtentResult_, error) { + return mm.createExtentInternal(dstID, extentID, inhostID, storeIDs, ``, ``) +} + +// CreateRemoteZoneExtent creates a new remote zone extent for the given destination and marks the status as OPEN +func (mm *metadataMgrImpl) CreateRemoteZoneExtent(dstID string, extentID string, inhostID string, storeIDs []string, originZone string, remoteExtentPrimaryStore string) (*shared.CreateExtentResult_, error) { + return mm.createExtentInternal(dstID, extentID, inhostID, storeIDs, originZone, remoteExtentPrimaryStore) +} + +func (mm *metadataMgrImpl) createExtentInternal(dstID string, extentID string, inhostID string, storeIDs []string, originZone string, remoteExtentPrimaryStore string) (*shared.CreateExtentResult_, error) { mm.m3Client.IncCounter(metrics.MetadataCreateExtentScope, metrics.MetadataRequests) extent := &shared.Extent{ - ExtentUUID: StringPtr(extentID), - DestinationUUID: StringPtr(dstID), - InputHostUUID: StringPtr(inhostID), - StoreUUIDs: storeIDs, - OriginZone: StringPtr(originZone), + ExtentUUID: StringPtr(extentID), + DestinationUUID: StringPtr(dstID), + InputHostUUID: StringPtr(inhostID), + StoreUUIDs: storeIDs, + OriginZone: StringPtr(originZone), + RemoteExtentPrimaryStore: StringPtr(remoteExtentPrimaryStore), } mReq := &shared.CreateExtentRequest{Extent: extent} @@ -396,7 +432,7 @@ func (mm *metadataMgrImpl) CreateExtent(dstID string, extentID string, inhostID func (mm *metadataMgrImpl) AddExtentToConsumerGroup(dstID string, cgID string, extentID string, outHostID string, storeIDs []string) error { - mm.m3Client.IncCounter(metrics.MetadataAddExtentToConsGroupScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataRequests) mReq := &m.CreateConsumerGroupExtentRequest{ DestinationUUID: StringPtr(dstID), @@ -406,11 +442,11 @@ func (mm *metadataMgrImpl) AddExtentToConsumerGroup(dstID string, cgID string, e StoreUUIDs: storeIDs, } - sw := mm.m3Client.StartTimer(metrics.MetadataAddExtentToConsGroupScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataLatency) err := mm.mClient.CreateConsumerGroupExtent(nil, mReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataAddExtentToConsGroupScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataCreateConsumerGroupExtentScope, metrics.MetadataFailures) return err } @@ -419,7 +455,7 @@ func (mm *metadataMgrImpl) AddExtentToConsumerGroup(dstID string, cgID string, e func (mm *metadataMgrImpl) ListConsumerGroupsByDstID(dstID string) ([]*shared.ConsumerGroupDescription, error) { - mm.m3Client.IncCounter(metrics.MetadataListConsGroupsByDstScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataRequests) mReq := &m.ListConsumerGroupRequest{ DestinationUUID: StringPtr(dstID), @@ -428,12 +464,12 @@ func (mm *metadataMgrImpl) ListConsumerGroupsByDstID(dstID string) ([]*shared.Co var result []*shared.ConsumerGroupDescription - sw := mm.m3Client.StartTimer(metrics.MetadataListConsGroupsByDstScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataListConsumerGroupsScope, metrics.MetadataLatency) defer sw.Stop() for { resp, err := mm.mClient.ListConsumerGroups(nil, mReq) if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListConsGroupsByDstScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataFailures) return nil, err } @@ -451,14 +487,14 @@ func (mm *metadataMgrImpl) ListConsumerGroupsByDstID(dstID string) ([]*shared.Co func (mm *metadataMgrImpl) ListConsumerGroupsPage(mReq *m.ListConsumerGroupRequest) (*m.ListConsumerGroupResult_, error) { - mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsPageScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataRequests) - sw := mm.m3Client.StartTimer(metrics.MetadataListConsumerGroupsPageScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataListConsumerGroupsScope, metrics.MetadataLatency) defer sw.Stop() resp, err := mm.mClient.ListConsumerGroups(nil, mReq) if err != nil { - mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsPageScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataListConsumerGroupsScope, metrics.MetadataFailures) return nil, err } @@ -467,7 +503,7 @@ func (mm *metadataMgrImpl) ListConsumerGroupsPage(mReq *m.ListConsumerGroupReque func (mm *metadataMgrImpl) UpdateOutHost(dstID string, cgID string, extentID string, outHostID string) error { - mm.m3Client.IncCounter(metrics.MetadataUpdateOutputScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataSetOutputHostScope, metrics.MetadataRequests) mReq := &m.SetOutputHostRequest{ DestinationUUID: StringPtr(dstID), @@ -476,11 +512,11 @@ func (mm *metadataMgrImpl) UpdateOutHost(dstID string, cgID string, extentID str OutputHostUUID: StringPtr(outHostID), } - sw := mm.m3Client.StartTimer(metrics.MetadataUpdateOutputScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataSetOutputHostScope, metrics.MetadataLatency) err := mm.mClient.SetOutputHost(nil, mReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataUpdateOutputScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataSetOutputHostScope, metrics.MetadataFailures) return err } @@ -530,18 +566,18 @@ func (mm *metadataMgrImpl) ReadConsumerGroupExtent(dstID string, cgID string, ex } func (mm *metadataMgrImpl) ReadStoreExtentStats(extentID string, storeID string) (*shared.ExtentStats, error) { - mm.m3Client.IncCounter(metrics.MetadataReadStoreExtentStatsScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataRequests) mReq := &m.ReadStoreExtentReplicaStatsRequest{ StoreUUID: StringPtr(storeID), ExtentUUID: StringPtr(extentID), } - sw := mm.m3Client.StartTimer(metrics.MetadataReadStoreExtentStatsScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataLatency) result, err := mm.mClient.ReadStoreExtentReplicaStats(nil, mReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataReadStoreExtentStatsScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataReadStoreExtentReplicaStatsScope, metrics.MetadataFailures) return nil, err } return result.GetExtent(), nil @@ -644,7 +680,7 @@ func (mm *metadataMgrImpl) ReadConsumerGroupByUUID(cgUUID string) (cgDesc *share func (mm *metadataMgrImpl) UpdateDestinationDLQCursors(dstID string, mergeBefore UnixNanoTime, purgeBefore UnixNanoTime) error { - mm.m3Client.IncCounter(metrics.MetadataUpdateDLQCursorScope, metrics.MetadataRequests) + mm.m3Client.IncCounter(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataRequests) mReq := m.NewUpdateDestinationDLQCursorsRequest() mReq.DestinationUUID = StringPtr(dstID) @@ -657,12 +693,12 @@ func (mm *metadataMgrImpl) UpdateDestinationDLQCursors(dstID string, mergeBefore mReq.DLQPurgeBefore = Int64Ptr(int64(purgeBefore)) } - sw := mm.m3Client.StartTimer(metrics.MetadataUpdateDLQCursorScope, metrics.MetadataLatency) + sw := mm.m3Client.StartTimer(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataLatency) _, err := mm.mClient.UpdateDestinationDLQCursors(nil, mReq) sw.Stop() if err != nil { - mm.m3Client.IncCounter(metrics.MetadataUpdateDLQCursorScope, metrics.MetadataFailures) + mm.m3Client.IncCounter(metrics.MetadataUpdateDestinationDLQCursorsScope, metrics.MetadataFailures) return err } @@ -714,6 +750,33 @@ func (mm *metadataMgrImpl) UpdateExtentStatus(dstID, extID string, status shared return nil } +func (mm *metadataMgrImpl) UpdateRemoteExtentPrimaryStore(dstID string, extentID string, remoteExtentPrimaryStore string) (*m.UpdateExtentStatsResult_, error) { + mm.m3Client.IncCounter(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataRequests) + + mReq := &m.UpdateExtentStatsRequest{ + DestinationUUID: StringPtr(dstID), + ExtentUUID: StringPtr(extentID), + RemoteExtentPrimaryStore: StringPtr(remoteExtentPrimaryStore), + } + + if len(remoteExtentPrimaryStore) == 0 { + mm.m3Client.IncCounter(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataFailures) + return nil, &shared.BadRequestError{ + Message: "remoteExtentPrimaryStore is empty", + } + } + + sw := mm.m3Client.StartTimer(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataLatency) + res, err := mm.mClient.UpdateExtentStats(nil, mReq) + sw.Stop() + if err != nil { + mm.m3Client.IncCounter(metrics.MetadataUpdateExtentStatsScope, metrics.MetadataFailures) + return nil, err + } + + return res, err +} + func (mm *metadataMgrImpl) UpdateConsumerGroupExtentStatus(cgID, extID string, status m.ConsumerGroupExtentStatus) error { mm.m3Client.IncCounter(metrics.MetadataUpdateConsumerGroupExtentStatusScope, metrics.MetadataRequests) diff --git a/common/metrics/defs.go b/common/metrics/defs.go index 5af323d1..58acc126 100644 --- a/common/metrics/defs.go +++ b/common/metrics/defs.go @@ -51,7 +51,7 @@ const ( Gauge ) -// Service names for all services that emit M3 +// Service names for all services that emit Metrics const ( Common ServiceIdx = iota Frontend @@ -111,54 +111,100 @@ var GoRuntimeMetrics = map[MetricName]MetricType{ const ( // -- Operation scopes for Metadata (common) -- - // MetadataListDestinationsScope represents method within metadatamgr - MetadataListDestinationsScope = iota - // MetadataListDestinationsPageScope represents method within metadatamgr - MetadataListDestinationsPageScope - // MetadataReadDstScope represents method within metadatamgr - MetadataReadDstScope - // MetadataReadExtentStatsScope represents method within metadatamgr - MetadataReadExtentStatsScope - // MetadataReadStoreExtentStatsScope represents method within metadatamgr - MetadataReadStoreExtentStatsScope - // MetadataReadConsumerGroupExtentScope represents method within metadatamgr + // MetadataListEntityOpsScope defines scope for an operation on metadata + MetadataListEntityOpsScope = iota + // MetadataHostAddrToUUIDScope defines scope for an operation on metadata + MetadataHostAddrToUUIDScope + // MetadataListAllConsumerGroupsScope defines scope for an operation on metadata + MetadataListAllConsumerGroupsScope + // MetadataListConsumerGroupsScope defines scope for an operation on metadata + MetadataListConsumerGroupsScope + // MetadataListDestinationsScope defines scope for an operation on metadata + MetadataListDestinationsScope + // MetadataListDestinationsByUUIDScope defines scope for an operation on metadata + MetadataListDestinationsByUUIDScope + // MetadataListExtentsStatsScope defines scope for an operation on metadata + MetadataListExtentsStatsScope + // MetadataListHostsScope defines scope for an operation on metadata + MetadataListHostsScope + // MetadataListInputHostExtentsStatsScope defines scope for an operation on metadata + MetadataListInputHostExtentsStatsScope + // MetadataListStoreExtentsStatsScope defines scope for an operation on metadata + MetadataListStoreExtentsStatsScope + // MetadataReadConsumerGroupScope defines scope for an operation on metadata + MetadataReadConsumerGroupScope + // MetadataReadConsumerGroupByUUIDScope defines scope for an operation on metadata + MetadataReadConsumerGroupByUUIDScope + // MetadataReadConsumerGroupExtentScope defines scope for an operation on metadata MetadataReadConsumerGroupExtentScope - // MetadataListExtentsByDstScope represents method within metadatamgr - MetadataListExtentsByDstScope - // MetadataListExtentsByInputScope represents method within metadatamgr - MetadataListExtentsByInputScope - // MetadataListExtentsByStoreScope represents method within metadatamgr - MetadataListExtentsByStoreScope - // MetadataListExtentsByConsGroupScope represents method within metadatamgr - MetadataListExtentsByConsGroupScope - // MetadataListConsGroupsByDstScope represents method within metadatamgr - MetadataListConsGroupsByDstScope - // MetadataListConsumerGroupsPageScope represents method within metadatamgr - MetadataListConsumerGroupsPageScope - // MetadataAddExtentToConsGroupScope represents method within metadatamgr - MetadataAddExtentToConsGroupScope - // MetadataCreateExtentScope represents method within metadatamgr + // MetadataReadConsumerGroupExtentsScope defines scope for an operation on metadata + MetadataReadConsumerGroupExtentsScope + // MetadataReadConsumerGroupExtentsByExtUUIDScope defines scope for an operation on metadata + MetadataReadConsumerGroupExtentsByExtUUIDScope + // MetadataReadDestinationScope defines scope for an operation on metadata + MetadataReadDestinationScope + // MetadataReadExtentStatsScope defines scope for an operation on metadata + MetadataReadExtentStatsScope + // MetadataUUIDToHostAddrScope defines scope for an operation on metadata + MetadataUUIDToHostAddrScope + // MetadataUpdateServiceConfigScope defines scope for an operation on metadata + MetadataUpdateServiceConfigScope + // MetadataCreateConsumerGroupScope defines scope for an operation on metadata + MetadataCreateConsumerGroupScope + // MetadataCreateConsumerGroupExtentScope defines scope for an operation on metadata + MetadataCreateConsumerGroupExtentScope + // MetadataCreateDestinationScope defines scope for an operation on metadata + MetadataCreateDestinationScope + // MetadataCreateDestinationUUIDScope defines scope for an operation on metadata + MetadataCreateDestinationUUIDScope + // MetadataCreateExtentScope defines scope for an operation on metadata MetadataCreateExtentScope - // MetadataUpdateOutputScope represents method within metadatamgr - MetadataUpdateOutputScope - // MetadataSealExtentScope represents method within metadatamgr - MetadataSealExtentScope - // MetadataDeleteConsumerGroupScope represents method within metadatamgr + // MetadataCreateHostInfoScope defines scope for an operation on metadata + MetadataCreateHostInfoScope + // MetadataCreateServiceConfigScope defines scope for an operation on metadata + MetadataCreateServiceConfigScope + // MetadataDeleteConsumerGroupScope defines scope for an operation on metadata MetadataDeleteConsumerGroupScope - // MetadataUpdateDLQCursorScope represents a method within metadatamgr - MetadataUpdateDLQCursorScope - // MetadataMoveExtentScope lorem ipsum + // MetadataDeleteDestinationScope defines scope for an operation on metadata + MetadataDeleteDestinationScope + // MetadataDeleteDestinationUUIDScope defines scope for an operation on metadata + MetadataDeleteDestinationUUIDScope + // MetadataDeleteHostInfoScope defines scope for an operation on metadata + MetadataDeleteHostInfoScope + // MetadataDeleteServiceConfigScope defines scope for an operation on metadata + MetadataDeleteServiceConfigScope + // MetadataMoveExtentScope defines scope for an operation on metadata MetadataMoveExtentScope - // MetadataReadConsumerGroupScope lorem ipsum - MetadataReadConsumerGroupScope - // MetadataReadConsumerGroupByUUIDScope lorem ipsum - MetadataReadConsumerGroupByUUIDScope - // MetadataUpdateExtentStatsScope lorem ipsum - MetadataUpdateExtentStatsScope - // MetadataUpdateConsumerGroupExtentStatusScope lorem ipsum + // MetadataReadHostInfoScope defines scope for an operation on metadata + MetadataReadHostInfoScope + // MetadataReadServiceConfigScope defines scope for an operation on metadata + MetadataReadServiceConfigScope + // MetadataReadStoreExtentReplicaStatsScope defines scope for an operation on metadata + MetadataReadStoreExtentReplicaStatsScope + // MetadataRegisterHostUUIDScope defines scope for an operation on metadata + MetadataRegisterHostUUIDScope + // MetadataSealExtentScope defines scope for an operation on metadata + MetadataSealExtentScope + // MetadataSetAckOffsetScope defines scope for an operation on metadata + MetadataSetAckOffsetScope + // MetadataSetOutputHostScope defines scope for an operation on metadata + MetadataSetOutputHostScope + // MetadataUpdateConsumerGroupScope defines scope for an operation on metadata + MetadataUpdateConsumerGroupScope + // MetadataUpdateConsumerGroupExtentStatusScope defines scope for an operation on metadata MetadataUpdateConsumerGroupExtentStatusScope - // MetadataDeleteDestinationUUIDScope lorem ipsum - MetadataDeleteDestinationUUIDScope + // MetadataUpdateDestinationScope defines scope for an operation on metadata + MetadataUpdateDestinationScope + // MetadataUpdateDestinationDLQCursorsScope defines scope for an operation on metadata + MetadataUpdateDestinationDLQCursorsScope + // MetadataUpdateExtentReplicaStatsScope defines scope for an operation on metadata + MetadataUpdateExtentReplicaStatsScope + // MetadataUpdateExtentStatsScope defines scope for an operation on metadata + MetadataUpdateExtentStatsScope + // MetadataUpdateHostInfoScope defines scope for an operation on metadata + MetadataUpdateHostInfoScope + // MetadataUpdateStoreExtentReplicaStatsScope defines scope for an operation on metadata + MetadataUpdateStoreExtentReplicaStatsScope // -- Operation scopes for InputHost -- @@ -238,8 +284,8 @@ const ( StoreFailedEventScope // StoreExtentStatusOutOfSyncEventScope represents an event handler StoreExtentStatusOutOfSyncEventScope - // RemoteZoneExtentCreatedEventScope represents event handler - RemoteZoneExtentCreatedEventScope + // StartReplicationForRemoteZoneExtentScope represents event handler + StartReplicationForRemoteZoneExtentScope // ExtentMonitorScope represents the extent monitor daemon ExtentMonitorScope // RetentionMgrScope represents the retention manager @@ -358,34 +404,57 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ // Common operation tag values (shared by all services) Common: { - // Metadata operation tag values as seen by the M3 backend - MetadataListDestinationsScope: {operation: "MetadataListDestinations"}, - MetadataListDestinationsPageScope: {operation: "MetadataListDestinationsPage"}, - MetadataReadDstScope: {operation: "MetadataReadDst"}, - MetadataReadExtentStatsScope: {operation: "MetadataReadExtentStats"}, - MetadataReadStoreExtentStatsScope: {operation: "MetadataReadStoreExtentStats"}, - MetadataReadConsumerGroupExtentScope: {operation: "MetadataReadConsumerGroupExtent"}, - MetadataListExtentsByDstScope: {operation: "MetadataListExtentsByDst"}, - MetadataListExtentsByInputScope: {operation: "MetadataListExtentsByInput"}, - MetadataListExtentsByStoreScope: {operation: "MetadataListExtentsByStore"}, - MetadataListExtentsByConsGroupScope: {operation: "MetadataListExtentsByConsGroup"}, - MetadataListConsGroupsByDstScope: {operation: "MetadataListConsGroupsByDst"}, - MetadataListConsumerGroupsPageScope: {operation: "MetadataListConsumerGroupsPage"}, - MetadataAddExtentToConsGroupScope: {operation: "MetadataAddExtentToConsGroup"}, - MetadataCreateExtentScope: {operation: "MetadataCreateExtent"}, - MetadataUpdateOutputScope: {operation: "MetadataUpdateOutput"}, - MetadataSealExtentScope: {operation: "MetadataSealExtent"}, - MetadataDeleteConsumerGroupScope: {operation: "MetadataDeleteConsumerGroup"}, - MetadataUpdateDLQCursorScope: {operation: "MetadataUpdateDLQCursor"}, - MetadataMoveExtentScope: {operation: "MetadataMoveExtent"}, - MetadataReadConsumerGroupScope: {operation: "MetadataReadConsumerGroup"}, - MetadataReadConsumerGroupByUUIDScope: {operation: "MetadataReadConsumerGroupByUUID"}, - MetadataUpdateExtentStatsScope: {operation: "MetadataUpdateExtentStats"}, - MetadataUpdateConsumerGroupExtentStatusScope: {operation: "MetadataUpdateConsumerGroupExtentStatus"}, - MetadataDeleteDestinationUUIDScope: {operation: "MetadataDeleteDestinationUUID"}, + // Metadata operation tag values as seen by the Metrics backend + MetadataListEntityOpsScope: {operation: "MetadataListEntityOps"}, + MetadataHostAddrToUUIDScope: {operation: "MetadataHostAddrToUUID"}, + MetadataListAllConsumerGroupsScope: {operation: "MetadataListAllConsumerGroups"}, + MetadataListConsumerGroupsScope: {operation: "MetadataListConsumerGroups"}, + MetadataListDestinationsScope: {operation: "MetadataListDestinations"}, + MetadataListDestinationsByUUIDScope: {operation: "MetadataListDestinationsByUUID"}, + MetadataListExtentsStatsScope: {operation: "MetadataListExtentsStats"}, + MetadataListHostsScope: {operation: "MetadataListHosts"}, + MetadataListInputHostExtentsStatsScope: {operation: "MetadataListInputHostExtentsStats"}, + MetadataListStoreExtentsStatsScope: {operation: "MetadataListStoreExtentsStats"}, + MetadataReadConsumerGroupScope: {operation: "MetadataReadConsumerGroup"}, + MetadataReadConsumerGroupByUUIDScope: {operation: "MetadataReadConsumerGroupByUUID"}, + MetadataReadConsumerGroupExtentScope: {operation: "MetadataReadConsumerGroupExtent"}, + MetadataReadConsumerGroupExtentsScope: {operation: "MetadataReadConsumerGroupExtents"}, + MetadataReadConsumerGroupExtentsByExtUUIDScope: {operation: "MetadataReadConsumerGroupExtentsByExtUUID"}, + MetadataReadDestinationScope: {operation: "MetadataReadDestination"}, + MetadataReadExtentStatsScope: {operation: "MetadataReadExtentStats"}, + MetadataUUIDToHostAddrScope: {operation: "MetadataUUIDToHostAddr"}, + MetadataUpdateServiceConfigScope: {operation: "MetadataUpdateServiceConfig"}, + MetadataCreateConsumerGroupScope: {operation: "MetadataCreateConsumerGroup"}, + MetadataCreateConsumerGroupExtentScope: {operation: "MetadataCreateConsumerGroupExtent"}, + MetadataCreateDestinationScope: {operation: "MetadataCreateDestination"}, + MetadataCreateDestinationUUIDScope: {operation: "MetadataCreateDestinationUUID"}, + MetadataCreateExtentScope: {operation: "MetadataCreateExtent"}, + MetadataCreateHostInfoScope: {operation: "MetadataCreateHostInfo"}, + MetadataCreateServiceConfigScope: {operation: "MetadataCreateServiceConfig"}, + MetadataDeleteConsumerGroupScope: {operation: "MetadataDeleteConsumerGroup"}, + MetadataDeleteDestinationScope: {operation: "MetadataDeleteDestination"}, + MetadataDeleteDestinationUUIDScope: {operation: "MetadataDeleteDestinationUUID"}, + MetadataDeleteHostInfoScope: {operation: "MetadataDeleteHostInfo"}, + MetadataDeleteServiceConfigScope: {operation: "MetadataDeleteServiceConfig"}, + MetadataMoveExtentScope: {operation: "MetadataMoveExtent"}, + MetadataReadHostInfoScope: {operation: "MetadataReadHostInfo"}, + MetadataReadServiceConfigScope: {operation: "MetadataReadServiceConfig"}, + MetadataReadStoreExtentReplicaStatsScope: {operation: "MetadataReadStoreExtentReplicaStats"}, + MetadataRegisterHostUUIDScope: {operation: "MetadataRegisterHostUUID"}, + MetadataSealExtentScope: {operation: "MetadataSealExtent"}, + MetadataSetAckOffsetScope: {operation: "MetadataSetAckOffset"}, + MetadataSetOutputHostScope: {operation: "MetadataSetOutputHost"}, + MetadataUpdateConsumerGroupScope: {operation: "MetadataUpdateConsumerGroup"}, + MetadataUpdateConsumerGroupExtentStatusScope: {operation: "MetadataUpdateConsumerGroupExtentStatus"}, + MetadataUpdateDestinationScope: {operation: "MetadataUpdateDestination"}, + MetadataUpdateDestinationDLQCursorsScope: {operation: "MetadataUpdateDestinationDLQCursors"}, + MetadataUpdateExtentReplicaStatsScope: {operation: "MetadataUpdateExtentReplicaStats"}, + MetadataUpdateExtentStatsScope: {operation: "MetadataUpdateExtentStats"}, + MetadataUpdateHostInfoScope: {operation: "MetadataUpdateHostInfo"}, + MetadataUpdateStoreExtentReplicaStatsScope: {operation: "MetadataUpdateStoreExtentReplicaStats"}, }, - // Frontend operation tag values as seen by the M3 backend + // Frontend operation tag values as seen by the Metrics backend Frontend: { CreateDestinationScope: {operation: "CreateDestination"}, ReadDestinationScope: {operation: "ReadDestination"}, @@ -406,7 +475,7 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ MergeDLQForConsumerGroupScope: {operation: "MergeDLQForConsumerGroup"}, }, - // Inputhost operation tag values as seen by the M3 backend + // Inputhost operation tag values as seen by the Metrics backend Inputhost: { OpenPublisherStreamScope: {operation: "OpenPublisherStream"}, DestinationsUpdatedScope: {operation: "DestinationsUpdated"}, @@ -414,7 +483,7 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ PutMessageBatchInputHostScope: {operation: "PutMessageBatchInputHost"}, }, - // Outputhost operation tag values as seen by the M3 backend + // Outputhost operation tag values as seen by the Metrics backend Outputhost: { OpenConsumerStreamScope: {operation: "OpenConsumerStream"}, AckMessagesScope: {operation: "AckMessage"}, @@ -424,7 +493,7 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ UnloadConsumerGroupsScope: {operation: "UnloadConsumerGroups"}, }, - // Storage operation tag values as seen by the M3 backend + // Storage operation tag values as seen by the Metrics backend Storage: { OpenAppendStreamScope: {operation: "OpenAppendStream"}, OpenReadStreamScope: {operation: "OpenReadStream"}, @@ -439,7 +508,7 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ ReplicateExtentScope: {operation: "ReplicateExtent"}, }, - // Replicator operation tag valuies as seen by the M3 backend + // Replicator operation tag values as seen by the Metrics backend Replicator: { OpenReplicationRemoteReadScope: {operation: "OpenReplicationRemoteReadStream"}, OpenReplicationReadScope: {operation: "OpenReplicationReadStream"}, @@ -454,40 +523,40 @@ var scopeDefs = map[ServiceIdx]map[int]scopeDefinition{ ReplicatorReconcileScope: {operation: "ReplicatorReconcile"}, }, - // Controller operation tag values as seen by the M3 backend + // Controller operation tag values as seen by the Metrics backend Controller: { - GetInputHostsScope: {operation: "GetInputHosts"}, - GetOutputHostsScope: {operation: "GetOutputHosts"}, - ReportNodeMetricScope: {operation: "ReportNodeMetric"}, - ReportDestinationMetricScope: {operation: "ReportDestinationMetric"}, - ReportDestinationExtentMetricScope: {operation: "ReportDestinatoinExtentMetric"}, - ReportConsumerGroupMetricScope: {operation: "ReportConsumerGroupMetric"}, - ReportConsumerGroupExtentMetricScope: {operation: "ReportConsumerGroupExtentMetric"}, - ReportStoreExtentMetricScope: {operation: "ReportStoreExtentMetric"}, - RefreshInputHostsForDstScope: {operation: "RefreshInputHostsForDst"}, - RefreshOutputHostsForConsGroupScope: {operation: "RefreshOutputHostsForConsGroup"}, - EventPipelineScope: {operation: "EventPipeline"}, - ExtentsUnreachableScope: {operation: "ExtentsUnreachable"}, - ExtentCreatedEventScope: {operation: "ExtentCreatedEvent"}, - ConsGroupUpdatedEventScope: {operation: "ConsGroupUpdatedEvent"}, - ExtentDownEventScope: {operation: "ExtentDownEvent"}, - InputNotifyEventScope: {operation: "InputNotifyEvent"}, - OutputNotifyEventScope: {operation: "OutputNotifyEvent"}, - InputFailedEventScope: {operation: "InputFailedEvent"}, - StoreFailedEventScope: {operation: "StoreFailedEvent"}, - StoreExtentStatusOutOfSyncEventScope: {operation: "StoreExtentStatusOutOfSyncEvent"}, - RemoteZoneExtentCreatedEventScope: {operation: "RemoteZoneExtentCreatedEvent"}, - QueueDepthBacklogCGScope: {operation: "QueueDepthBacklog"}, - ExtentMonitorScope: {operation: "ExtentMonitor"}, - RetentionMgrScope: {operation: "RetentionMgr"}, - DLQOperationScope: {operation: "DLQOperation"}, - ControllerCreateDestinationScope: {operation: "CreateDestination"}, - ControllerUpdateDestinationScope: {operation: "UpdateDestination"}, - ControllerDeleteDestinationScope: {operation: "DeleteDestination"}, - ControllerCreateConsumerGroupScope: {operation: "CreateConsumerGroup"}, - ControllerUpdateConsumerGroupScope: {operation: "UpdateConsumerGroup"}, - ControllerDeleteConsumerGroupScope: {operation: "DeleteConsumerGroup"}, - ControllerCreateRemoteZoneExtentScope: {operation: "CreateRemoteZoneExtent"}, + GetInputHostsScope: {operation: "GetInputHosts"}, + GetOutputHostsScope: {operation: "GetOutputHosts"}, + ReportNodeMetricScope: {operation: "ReportNodeMetric"}, + ReportDestinationMetricScope: {operation: "ReportDestinationMetric"}, + ReportDestinationExtentMetricScope: {operation: "ReportDestinatoinExtentMetric"}, + ReportConsumerGroupMetricScope: {operation: "ReportConsumerGroupMetric"}, + ReportConsumerGroupExtentMetricScope: {operation: "ReportConsumerGroupExtentMetric"}, + ReportStoreExtentMetricScope: {operation: "ReportStoreExtentMetric"}, + RefreshInputHostsForDstScope: {operation: "RefreshInputHostsForDst"}, + RefreshOutputHostsForConsGroupScope: {operation: "RefreshOutputHostsForConsGroup"}, + EventPipelineScope: {operation: "EventPipeline"}, + ExtentsUnreachableScope: {operation: "ExtentsUnreachable"}, + ExtentCreatedEventScope: {operation: "ExtentCreatedEvent"}, + ConsGroupUpdatedEventScope: {operation: "ConsGroupUpdatedEvent"}, + ExtentDownEventScope: {operation: "ExtentDownEvent"}, + InputNotifyEventScope: {operation: "InputNotifyEvent"}, + OutputNotifyEventScope: {operation: "OutputNotifyEvent"}, + InputFailedEventScope: {operation: "InputFailedEvent"}, + StoreFailedEventScope: {operation: "StoreFailedEvent"}, + StoreExtentStatusOutOfSyncEventScope: {operation: "StoreExtentStatusOutOfSyncEvent"}, + StartReplicationForRemoteZoneExtentScope: {operation: "StartReplicationForRemoteZoneExtent"}, + QueueDepthBacklogCGScope: {operation: "QueueDepthBacklog"}, + ExtentMonitorScope: {operation: "ExtentMonitor"}, + RetentionMgrScope: {operation: "RetentionMgr"}, + DLQOperationScope: {operation: "DLQOperation"}, + ControllerCreateDestinationScope: {operation: "CreateDestination"}, + ControllerUpdateDestinationScope: {operation: "UpdateDestination"}, + ControllerDeleteDestinationScope: {operation: "DeleteDestination"}, + ControllerCreateConsumerGroupScope: {operation: "CreateConsumerGroup"}, + ControllerUpdateConsumerGroupScope: {operation: "UpdateConsumerGroup"}, + ControllerDeleteConsumerGroupScope: {operation: "DeleteConsumerGroup"}, + ControllerCreateRemoteZoneExtentScope: {operation: "CreateRemoteZoneExtent"}, }, } @@ -590,6 +659,8 @@ const ( OutputhostFailures // OutputhostLongPollingTimeOut indicates time out for long polling OutputhostLongPollingTimeOut + // OutputhostReceiveMsgBatchWriteToMsgCacheTimeout indicates time out for ReceiveMsgBatch to write to msg cache + OutputhostReceiveMsgBatchWriteToMsgCacheTimeout // OutputhostMessageSent records the count of messages sent OutputhostMessageSent // OutputhostMessageFailures records the count of messages sent failures @@ -888,26 +959,27 @@ var metricDefs = map[ServiceIdx]map[int]metricDefinition{ // definitions for Outputhost metrics Outputhost: { - OutputhostRequests: {Counter, "outputhost.requests"}, - OutputhostFailures: {Counter, "outputhost.errors"}, - OutputhostLongPollingTimeOut: {Counter, "outputhost.timeout-longpoll"}, - OutputhostMessageSent: {Counter, "outputhost.message.sent"}, - OutputhostMessageFailures: {Counter, "outputhost.message.errors"}, - OutputhostCreditsReceived: {Counter, "outputhost.credit-received"}, - OutputhostDLQMessageRequests: {Counter, "outputhost.message.sent-dlq"}, - OutputhostDLQMessageFailures: {Counter, "outputhost.message.errors-dlq"}, - OutputhostMessageRedelivered: {Counter, "outputhost.message.redelivered"}, - OutputhostMessageSentAck: {Counter, "outputhost.message.sent-ack"}, - OutputhostMessageSentNAck: {Counter, "outputhost.message.sent-nack"}, - OutputhostMessageAckFailures: {Counter, "outputhost.message.errors-ack"}, - OutputhostMessageNackFailures: {Counter, "outputhost.message.errors-nack"}, - OutputhostMessageNoAckManager: {Counter, "outputhost.message.no-ackmgr"}, - OutputhostMessageDiffSession: {Counter, "outputhost.message.diff-session"}, - OutputhostMessageAckManagerError: {Counter, "outputhost.message.errors-ackmgr"}, - OutputhostUserFailures: {Counter, "outputhost.user-errors"}, - OutputhostInternalFailures: {Counter, "outputhost.internal-errors"}, - OutputhostConsConnection: {Gauge, "outputhost.consconnection"}, - OutputhostLatencyTimer: {Timer, "outputhost.latency"}, + OutputhostRequests: {Counter, "outputhost.requests"}, + OutputhostFailures: {Counter, "outputhost.errors"}, + OutputhostLongPollingTimeOut: {Counter, "outputhost.timeout-longpoll"}, + OutputhostReceiveMsgBatchWriteToMsgCacheTimeout: {Counter, "outputhost.timeout-receive-msg-batch-write-to-msg-cache"}, + OutputhostMessageSent: {Counter, "outputhost.message.sent"}, + OutputhostMessageFailures: {Counter, "outputhost.message.errors"}, + OutputhostCreditsReceived: {Counter, "outputhost.credit-received"}, + OutputhostDLQMessageRequests: {Counter, "outputhost.message.sent-dlq"}, + OutputhostDLQMessageFailures: {Counter, "outputhost.message.errors-dlq"}, + OutputhostMessageRedelivered: {Counter, "outputhost.message.redelivered"}, + OutputhostMessageSentAck: {Counter, "outputhost.message.sent-ack"}, + OutputhostMessageSentNAck: {Counter, "outputhost.message.sent-nack"}, + OutputhostMessageAckFailures: {Counter, "outputhost.message.errors-ack"}, + OutputhostMessageNackFailures: {Counter, "outputhost.message.errors-nack"}, + OutputhostMessageNoAckManager: {Counter, "outputhost.message.no-ackmgr"}, + OutputhostMessageDiffSession: {Counter, "outputhost.message.diff-session"}, + OutputhostMessageAckManagerError: {Counter, "outputhost.message.errors-ackmgr"}, + OutputhostUserFailures: {Counter, "outputhost.user-errors"}, + OutputhostInternalFailures: {Counter, "outputhost.internal-errors"}, + OutputhostConsConnection: {Gauge, "outputhost.consconnection"}, + OutputhostLatencyTimer: {Timer, "outputhost.latency"}, }, // definitions for Frontend metrics diff --git a/common/mockloadreporterdaemonfactory.go b/common/mockloadreporterdaemonfactory.go index 064fd29b..2da5f82b 100644 --- a/common/mockloadreporterdaemonfactory.go +++ b/common/mockloadreporterdaemonfactory.go @@ -37,6 +37,7 @@ type MockLoadReporterDaemonFactory struct { // CreateReporter is the mock implementation for CreateReporter function on common.LoadReporterDaemonFactory func (m *MockLoadReporterDaemonFactory) CreateReporter(interval time.Duration, source LoadReporterSource, logger bark.Logger) LoadReporterDaemon { // Ignore the logger parameter and create a new one as it causes data race with mock library - args := m.Called(interval, source, bark.NewLoggerFromLogrus(log.New())) + // force the LoadReporterSource to nil as it also introduces a data race with mock lib + args := m.Called(interval, nil, bark.NewLoggerFromLogrus(log.New())) return args.Get(0).(LoadReporterDaemon) } diff --git a/common/mockrpm.go b/common/mockrpm.go index 2b22edae..ba85f21a 100644 --- a/common/mockrpm.go +++ b/common/mockrpm.go @@ -69,6 +69,11 @@ func (rpm *MockRingpopMonitor) Start() {} // Stop attempts to stop the RingpopMonitor routines func (rpm *MockRingpopMonitor) Stop() {} +// GetBootstrappedChannel returns a channel, which will be closed once ringpop is bootstrapped +func (rpm *MockRingpopMonitor) GetBootstrappedChannel() chan struct{} { + return nil +} + // GetHosts retrieves all the members for the given service func (rpm *MockRingpopMonitor) GetHosts(service string) ([]*HostInfo, error) { rpm.RLock() diff --git a/common/rpm.go b/common/rpm.go index e4dc63f9..368e05c9 100644 --- a/common/rpm.go +++ b/common/rpm.go @@ -56,6 +56,8 @@ type ( Start() // Stop stops the RingpopMonitor Stop() + // GetBootstrappedChannel returns a channel, which will be closed once ringpop is bootstrapped + GetBootstrappedChannel() chan struct{} // GetHosts retrieves all the members for the given service GetHosts(service string) ([]*HostInfo, error) // FindHostForAddr finds and returns the host for the given service:addr @@ -109,6 +111,8 @@ type ( logger bark.Logger oldChecksum uint32 serverCount int + bootstrapped bool + bootstrappedC chan struct{} } serviceInfo struct { @@ -155,6 +159,7 @@ func NewRingpopMonitor(rp *ringpop.Ringpop, services []string, resolver UUIDReso serviceToInfo: make(map[string]*serviceInfo), logger: log, rp: rp, + bootstrappedC: make(chan struct{}), } membershipMap := make(map[string]*membershipInfo) @@ -199,6 +204,10 @@ func (rpm *ringpopMonitorImpl) Stop() { } } +func (rpm *ringpopMonitorImpl) GetBootstrappedChannel() chan struct{} { + return rpm.bootstrappedC +} + // GetHosts retrieves all the members for the given service func (rpm *ringpopMonitorImpl) GetHosts(service string) ([]*HostInfo, error) { membershipMap := rpm.serviceToMembers.Load().(map[string]*membershipInfo) @@ -336,6 +345,12 @@ func (rpm *ringpopMonitorImpl) workerLoop() { case <-refreshTimeout: rpm.refreshAll() refreshTimeout = time.After(refreshInterval) + + // broadcast bootstrap is done by closing the channel + if !rpm.bootstrapped { + close(rpm.bootstrappedC) + rpm.bootstrapped = true + } case <-rpm.shutdownC: quit = true } @@ -414,7 +429,7 @@ func (rpm *ringpopMonitorImpl) refresh(service string, currInfo *membershipInfo) for _, addr := range addrs { uuid, err := rpm.uuidResolver.ReverseLookup(addr) if err != nil { - rpm.logger.WithFields(bark.Fields{`ringpopAddress`: addr}).Debug("ReverseLookup failed") + rpm.logger.WithFields(bark.Fields{`ringpopAddress`: addr}).Info("ReverseLookup failed") continue } diff --git a/common/rpm_test.go b/common/rpm_test.go index df74319c..c96f6a63 100644 --- a/common/rpm_test.go +++ b/common/rpm_test.go @@ -87,6 +87,9 @@ func (s *RpmSuite) TestRingpopMon() { detectedInHosts := make(map[string]bool) detectedOutHosts := make(map[string]bool) + bootstrapped := rpm.GetBootstrappedChannel() + bootstrapNotified := false + timeoutCh := time.After(time.Minute) for { @@ -97,6 +100,8 @@ func (s *RpmSuite) TestRingpopMon() { case e := <-outListenCh: s.Equal(HostAddedEvent, e.Type, "Wrong event type") detectedOutHosts[e.Key] = true + case <-bootstrapped: + bootstrapNotified = true case <-timeoutCh: s.Fail("Timed out waiting for hosts to be discovered") } @@ -119,6 +124,8 @@ func (s *RpmSuite) TestRingpopMon() { s.Equal(false, rpm.IsHostHealthy("in", uuid), "Ringpop monitor state is corrupted") } + s.Equal(true, bootstrapNotified, `bootstrap not notified`) + inAddrs := make(map[string]bool) outAddrs := make(map[string]bool) diff --git a/common/util.go b/common/util.go index de37e3ae..8c11750a 100644 --- a/common/util.go +++ b/common/util.go @@ -57,7 +57,6 @@ var ringpopBootstrapFile string const hyperbahnPort int16 = 21300 const rpAppNamePrefix string = "cherami" const maxRpJoinTimeout = 30 * time.Second -const defaultNumReplicas = 3 const maxRateExtrapolationTime = 120.0 // MaxDuration is maximum time duration diff --git a/config/base.yaml b/config/base.yaml index 5d8ba711..763e2c8a 100644 --- a/config/base.yaml +++ b/config/base.yaml @@ -17,13 +17,14 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. + +# DefaultServiceConfig stores the default service config for each service DefaultServiceConfig: - HostUUID: "" HyperbahnBootstrapFile: "" RingHosts: "" - EnableLimits: true # by default we have limits enabled - ListenAddress: "" + EnableLimits: true +# ServiceConfig overrides default config with service-specific config such as ports ServiceConfig: cherami-inputhost: Port: 4240 @@ -72,21 +73,30 @@ ServiceConfig: logLevel: warn disableLogging: true +# MetadataConfig specifies location of the Cassandra and ketspace MetadataConfig: CassandraHosts: "" Keyspace: "cherami" Consistency: "one" ClusterName: "base" +# ReplicatorConfig specifies ReplicatorConfig: DefaultAuthoritativeZone: "zone1" ReplicatorHosts: zone1: 192.168.0.1 zone2: 192.168.0.2 +# Logging configuration logging: level: error stdout: false +# DefaultDestinationConfig right now configures how many replicas you want DefaultDestinationConfig: Replicas: 3 + +# StorageConfig specfies location of the message storage, and the UUID of the storage host. The UUID should be different in each host. +# StorageConfig: +# BaseDir: "" +# HostUUID: "" \ No newline at end of file diff --git a/config/laptop.yaml b/config/local.yaml similarity index 86% rename from config/laptop.yaml rename to config/local.yaml index ab6ebd86..642caf0d 100644 --- a/config/laptop.yaml +++ b/config/local.yaml @@ -17,5 +17,17 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. + DefaultDestinationConfig: Replicas: 1 + +MetadataConfig: + CassandraHosts: "127.0.0.1" + +StorageConfig: + BaseDir: /tmp/cherami-store + HostUUID: "11111111-1111-1111-1111-111111111111" + +logging: + level: debug + stdout: true \ No newline at end of file diff --git a/glide.lock b/glide.lock index 97bedaf6..fe266ff4 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ hash: d8faa3da0bc5a170bf96a93b78a7531adb802a618dd7f429256d6d83dc61ceae -updated: 2016-12-29T08:19:28.767949689-08:00 +updated: 2017-01-11T08:37:29.102849926-08:00 imports: - name: github.com/apache/thrift version: 366e89ead7df34b4132c2accb59dc14fce564883 @@ -113,7 +113,7 @@ imports: - common/websocket - stream - name: github.com/uber/cherami-thrift - version: dbe0aa7dcb2e10646e9d81fda57f0cdb5265b846 + version: 0b29ca48a00837364f44084c2733bfc395585347 subpackages: - .generated/go/admin - .generated/go/cherami diff --git a/scripts/cherami-setup-schema b/scripts/cherami-setup-schema index 3d486ffd..4fde5b92 100755 --- a/scripts/cherami-setup-schema +++ b/scripts/cherami-setup-schema @@ -11,5 +11,5 @@ keyspace="cherami" CHERAMI_CQLSH_DIR="/usr/local/bin" CHERAMI_SCHEMA_DIR="clients/metadata/schema" -$CHERAMI_CQLSH_DIR/cqlsh -f $CHERAMI_SCHEMA_DIR/metadata_keyspace_test.cql -$CHERAMI_CQLSH_DIR/cqlsh -k $keyspace -f $CHERAMI_SCHEMA_DIR/metadata_test.cql +$CHERAMI_CQLSH_DIR/cqlsh -f $CHERAMI_SCHEMA_DIR/metadata_keyspace.cql +$CHERAMI_CQLSH_DIR/cqlsh -k $keyspace -f $CHERAMI_SCHEMA_DIR/metadata.cql diff --git a/services/controllerhost/api_handlers.go b/services/controllerhost/api_handlers.go index 9f57ca83..c175a454 100644 --- a/services/controllerhost/api_handlers.go +++ b/services/controllerhost/api_handlers.go @@ -276,7 +276,7 @@ func createExtent(context *Context, dstUUID string, isMultiZoneDest bool, m3Scop } extentUUID = uuid.New() - _, err = context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids, ``) + _, err = context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids) if err != nil { context.m3Client.IncCounter(m3Scope, metrics.ControllerErrMetadataUpdateCounter) return diff --git a/services/controllerhost/consumer_test.go b/services/controllerhost/consumer_test.go index f56be628..2b114ac0 100644 --- a/services/controllerhost/consumer_test.go +++ b/services/controllerhost/consumer_test.go @@ -70,7 +70,7 @@ func (s *McpSuite) TestCGExtentSelectorWithNoConsumableExtents() { for i := 0; i < 15; i++ { extID := uuid.New() storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, storeIDs, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, storeIDs) if i%3 == 0 { context.mm.SealExtent(dstID, extID) } @@ -83,7 +83,7 @@ func (s *McpSuite) TestCGExtentSelectorWithNoConsumableExtents() { for i := 0; i < 2; i++ { extID := uuid.New() storeIDs := []string{stores[0], stores[1], stores[2]} - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, storeIDs, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, storeIDs) cgExtents.consumed[extID] = struct{}{} } @@ -113,7 +113,7 @@ func (s *McpSuite) TestCGExtentSelectorHonorsCreatedTime() { for i := 0; i < 10; i++ { extID := uuid.New() - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores) if i%3 == 0 { context.mm.SealExtent(dstID, extID) } @@ -157,7 +157,7 @@ func (s *McpSuite) TestCGExtentSelectorHonorsDlqQuota() { for i := 0; i <= maxExtentsToConsumeForDstPlain; i++ { extID := uuid.New() - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores) if i == maxExtentsToConsumeForDstPlain { // make the last extent a DLQExtent // don't add it to the list of open CGExtents @@ -189,7 +189,7 @@ func (s *McpSuite) TestCGExtentSelectorHonorsDlqQuota() { dlqExtents := make(map[string]struct{}) for i := 0; i < 15; i++ { extID := uuid.New() - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores) if i < 5 { dlqExtents[extID] = struct{}{} context.mm.SealExtent(dstID, extID) @@ -256,7 +256,7 @@ func (s *McpSuite) TestCGExtentSelectorHonorsRemoteExtent() { if i%2 == 0 { zone = `zone1` } - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores, zone) + context.mm.CreateRemoteZoneExtent(dstID, extID, inhosts[0].UUID, stores, zone, stores[0]) extents = append(extents, extID) nExtents++ } @@ -313,7 +313,7 @@ func (s *McpSuite) TestCGExtentSelectorWithBacklog() { extID := uuid.New() // create 96 DLQ dstExtents and 4 regular dstExtents // with dlq extents preceding in creation time - context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores, "zone1") + context.mm.CreateExtent(dstID, extID, inhosts[0].UUID, stores) if i < 96 { context.mm.SealExtent(dstID, extID) context.mm.MoveExtent(dstID, dstID, extID, cgDesc.GetConsumerGroupUUID()) diff --git a/services/controllerhost/controllerhost.go b/services/controllerhost/controllerhost.go index 6587e3b1..05d03772 100644 --- a/services/controllerhost/controllerhost.go +++ b/services/controllerhost/controllerhost.go @@ -33,7 +33,7 @@ package controllerhost import ( "encoding/json" "fmt" - "sort" + "math/rand" "strings" "sync/atomic" "time" @@ -219,6 +219,7 @@ func (mcp *Mcp) Start(thriftService []thrift.TChanServer) { clientFactory: context.clientFactory, log: context.log, m3Client: context.m3Client, + localZone: context.localZone, }) context.retMgr.Start() @@ -1127,13 +1128,14 @@ func (mcp *Mcp) CreateRemoteZoneExtent(ctx thrift.Context, createRequest *shared for i := 0; i < nReplicasPerExtent; i++ { storeids[i] = storehosts[i].UUID } - sort.Strings(storeids) + remoteExtentPrimaryStore := storeids[rand.Intn(len(storeids))] // Since this is an extent from another zone, we don't need to assign input host. i.e. the extent is read-only // We use a special input host uuid (instead of an empty one) for this because there're lots of places where an valid uuid is required inputHost := common.InputHostForRemoteExtent - res, err := context.mm.CreateExtent(createRequest.GetExtent().GetDestinationUUID(), - createRequest.GetExtent().GetExtentUUID(), inputHost, storeids, createRequest.GetExtent().GetOriginZone()) + + res, err := context.mm.CreateRemoteZoneExtent(createRequest.GetExtent().GetDestinationUUID(), + createRequest.GetExtent().GetExtentUUID(), inputHost, storeids, createRequest.GetExtent().GetOriginZone(), remoteExtentPrimaryStore) if err != nil { lclLg.Error(err.Error()) context.m3Client.IncCounter(metrics.ControllerCreateRemoteZoneExtentScope, metrics.ControllerErrMetadataUpdateCounter) @@ -1141,7 +1143,7 @@ func (mcp *Mcp) CreateRemoteZoneExtent(ctx thrift.Context, createRequest *shared } // trigger store to start replication - event := NewRemoteZoneExtentCreatedEvent(createRequest.GetExtent().GetDestinationUUID(), createRequest.GetExtent().GetExtentUUID(), storeids) + event := NewStartReplicationForRemoteZoneExtent(createRequest.GetExtent().GetDestinationUUID(), createRequest.GetExtent().GetExtentUUID(), storeids, remoteExtentPrimaryStore) mcp.context.eventPipeline.Add(event) lclLg.Info("Remote Zone Extent Created") diff --git a/services/controllerhost/controllerhost_test.go b/services/controllerhost/controllerhost_test.go index 4fa820ef..afca2635 100644 --- a/services/controllerhost/controllerhost_test.go +++ b/services/controllerhost/controllerhost_test.go @@ -441,7 +441,7 @@ func (s *McpSuite) TestGetOutputHostsMaxOpenExtentsLimit() { for i := 0; i < maxExtents+1; i++ { extentUUID := uuid.New() - _, err = s.mcp.context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids, ``) + _, err = s.mcp.context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids) s.Nil(err, "Failed to create new extent") extents[extentUUID] = true } @@ -581,7 +581,7 @@ func (s *McpSuite) TestGetOutputHosts() { } inhost, _ := s.mcp.context.placement.PickInputHost(storehosts) extentUUID = uuid.New() - _, err = s.mcp.context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids, ``) + _, err = s.mcp.context.mm.CreateExtent(dstUUID, extentUUID, inhost.UUID, storeids) s.Nil(err, "Failed to create new extent") extents[extentUUID] = true @@ -874,4 +874,17 @@ func (s *McpSuite) TestCreateRemoteZoneExtent() { s.Equal(extentUUID, extentStats.GetExtentStats().GetExtent().GetExtentUUID()) s.Equal(destUUID, extentStats.GetExtentStats().GetExtent().GetDestinationUUID()) s.Equal(originZone, extentStats.GetExtentStats().GetExtent().GetOriginZone()) + + primary := extentStats.GetExtentStats().GetExtent().GetRemoteExtentPrimaryStore() + s.True(len(primary) > 0) + + primaryValid := false + stores := extentStats.GetExtentStats().GetExtent().GetStoreUUIDs() + for _, store := range stores { + if store == primary { + primaryValid = true + break + } + } + s.True(primaryValid) } diff --git a/services/controllerhost/dfdd.go b/services/controllerhost/dfdd.go index 3e1009f1..da764a70 100644 --- a/services/controllerhost/dfdd.go +++ b/services/controllerhost/dfdd.go @@ -26,6 +26,7 @@ import ( "time" "github.com/uber/cherami-server/common" + "github.com/uber-common/bark" ) type ( @@ -36,6 +37,10 @@ type ( // detection logic (on top of Ringpop) must go. Dfdd interface { common.Daemon + + // override the durations, used for testing + OverrideHostDownPeriodForStage2(period time.Duration) + OverrideHealthCheckInterval(period time.Duration) } // serviceID is an enum for identifying @@ -52,6 +57,17 @@ type ( // events to this channel inputListenerCh chan *common.RingpopListenerEvent storeListenerCh chan *common.RingpopListenerEvent + + healthCheckTicker *time.Ticker + healthCheckInterval time.Duration + + unhealthyStores map[string]time.Time + unhealthyStoresLock sync.RWMutex + + unhealthyInputs map[string]time.Time + unhealthyInputsLock sync.RWMutex + + hostDownPeriodForStage2 time.Duration } ) @@ -63,6 +79,24 @@ const ( const ( listenerChannelSize = 32 + + healthCheckInterval = time.Duration(1 * time.Minute) +) + +const ( + hostDownPeriodForStage2 = time.Duration(3 * time.Minute) +) + +// different stages of host being down +// stage 1: host is just removed from ringpop. Any service restart or deployment can trigger it +// stage 2: host is removed from ringpop for hostDownPeriodForStage2. For example a machine reboot can trigger it +// stage 3: host is removed form ringpop for hostDownPeriodForStage3(for example: 24 hrs). Most likely the machine is down and needs manual repair. +// Note currently stage 3 is not being handled yet +type hostDownStage int + +const ( + hostDownStage1 hostDownStage = iota + hostDownStage2 ) // NewDfdd creates and returns an instance of discovery @@ -73,10 +107,14 @@ const ( // of healthy hosts for every service, thats a WIP. func NewDfdd(context *Context) Dfdd { return &dfddImpl{ - context: context, - shutdownC: make(chan struct{}), - inputListenerCh: make(chan *common.RingpopListenerEvent, listenerChannelSize), - storeListenerCh: make(chan *common.RingpopListenerEvent, listenerChannelSize), + context: context, + shutdownC: make(chan struct{}), + inputListenerCh: make(chan *common.RingpopListenerEvent, listenerChannelSize), + storeListenerCh: make(chan *common.RingpopListenerEvent, listenerChannelSize), + unhealthyStores: make(map[string]time.Time), + unhealthyInputs: make(map[string]time.Time), + hostDownPeriodForStage2: hostDownPeriodForStage2, + healthCheckInterval: healthCheckInterval, } } @@ -85,6 +123,8 @@ func (dfdd *dfddImpl) Start() { dfdd.context.log.Fatal("Attempt to start failure detector twice") } + dfdd.healthCheckTicker = time.NewTicker(dfdd.healthCheckInterval) + rpm := dfdd.context.rpm err := rpm.AddListener(common.InputServiceName, buildListenerName(common.InputServiceName), dfdd.inputListenerCh) @@ -103,6 +143,8 @@ func (dfdd *dfddImpl) Start() { go dfdd.run() + go dfdd.healthCheck() + dfdd.context.log.Info("Failure Detector Daemon started") } @@ -132,14 +174,113 @@ func (dfdd *dfddImpl) run() { } } +func (dfdd *dfddImpl) OverrideHostDownPeriodForStage2(period time.Duration) { + dfdd.hostDownPeriodForStage2 = period +} + +func (dfdd *dfddImpl) OverrideHealthCheckInterval(period time.Duration) { + dfdd.healthCheckInterval = period +} + +func (dfdd *dfddImpl) healthCheckRoutine() { + var unhealthyInputList []string + { + dfdd.unhealthyInputsLock.RLock() + currentTime := time.Now().UTC() + for host, lastSeenTime := range dfdd.unhealthyInputs { + if currentTime.Sub(lastSeenTime) > dfdd.hostDownPeriodForStage2 { + dfdd.context.log.WithFields(bark.Fields{ + common.TagIn: common.FmtIn(host), + `last seen time`: lastSeenTime, + }).Info("Input host is down(stage 2)") + unhealthyInputList = append(unhealthyInputList, host) + } + } + dfdd.unhealthyInputsLock.RUnlock() + } + for _, host := range unhealthyInputList { + // report unhealthy here(which will reset the timestamp to now) so that the actions can be triggered again + // in next cycle(after hostDownPeriodForStage2) if the host still doesn't come back + dfdd.reportHostUnhealthy(inputServiceID, host) + } + + var unhealthyStoreList []string + { + dfdd.unhealthyStoresLock.RLock() + currentTime := time.Now().UTC() + for host, lastSeenTime := range dfdd.unhealthyStores { + if currentTime.Sub(lastSeenTime) > dfdd.hostDownPeriodForStage2 { + dfdd.context.log.WithFields(bark.Fields{ + common.TagStor: common.FmtStor(host), + `last seen time`: lastSeenTime, + }).Info("Store host is down(stage2)") + + event := NewStoreHostFailedEvent(host, hostDownStage2) + if !dfdd.context.eventPipeline.Add(event) { + dfdd.context.log.WithField(common.TagStor, common.FmtStor(host)).Error("Failed to enqueue StoreHostFailedEvent(stage 2)") + } + unhealthyStoreList = append(unhealthyStoreList, host) + } + } + dfdd.unhealthyStoresLock.RUnlock() + } + for _, host := range unhealthyStoreList { + // report unhealthy here(which will reset the timestamp to now) so that the actions can be triggered again + // in next cycle(after hostDownPeriodForStage2) if the host still doesn't come back + dfdd.reportHostUnhealthy(storeServiceID, host) + } + + return +} + +func (dfdd *dfddImpl) healthCheck() { + for { + select { + case <-dfdd.healthCheckTicker.C: + dfdd.healthCheckRoutine() + case <-dfdd.shutdownC: + return + } + } +} + +func (dfdd *dfddImpl) reportHostUnhealthy(id serviceID, hostUUID string) { + switch id { + case inputServiceID: + dfdd.context.log.WithField(common.TagIn, common.FmtIn(hostUUID)).Info("report input unhealthy") + dfdd.unhealthyInputsLock.Lock() + defer dfdd.unhealthyInputsLock.Unlock() + dfdd.unhealthyInputs[hostUUID] = time.Now().UTC() + case storeServiceID: + dfdd.context.log.WithField(common.TagStor, common.FmtStor(hostUUID)).Info("report store unhealthy") + dfdd.unhealthyStoresLock.Lock() + defer dfdd.unhealthyStoresLock.Unlock() + dfdd.unhealthyStores[hostUUID] = time.Now().UTC() + } +} + +func (dfdd *dfddImpl) reportHostHealthy(id serviceID, hostUUID string) { + switch id { + case inputServiceID: + dfdd.unhealthyInputsLock.Lock() + defer dfdd.unhealthyInputsLock.Unlock() + delete(dfdd.unhealthyInputs, hostUUID) + case storeServiceID: + dfdd.unhealthyStoresLock.Lock() + defer dfdd.unhealthyStoresLock.Unlock() + delete(dfdd.unhealthyStores, hostUUID) + } +} + func (dfdd *dfddImpl) handleListenerEvent(id serviceID, listenerEvent *common.RingpopListenerEvent) { - if listenerEvent.Type != common.HostRemovedEvent { - // TODO: Use HostAddedEvent to maintain a - // list of healthy peers + if listenerEvent.Type == common.HostAddedEvent { + dfdd.reportHostHealthy(id, listenerEvent.Key) return } + dfdd.reportHostUnhealthy(id, listenerEvent.Key) + var event Event switch id { @@ -148,7 +289,7 @@ func (dfdd *dfddImpl) handleListenerEvent(id serviceID, listenerEvent *common.Ri event = NewInputHostFailedEvent(listenerEvent.Key) case storeServiceID: dfdd.context.log.WithField(common.TagStor, common.FmtStor(listenerEvent.Key)).Info("StoreHostFailed") - event = NewStoreHostFailedEvent(listenerEvent.Key) + event = NewStoreHostFailedEvent(listenerEvent.Key, hostDownStage1) default: dfdd.context.log.Error("ListenerEvent for unknown service") return diff --git a/services/controllerhost/dfdd_test.go b/services/controllerhost/dfdd_test.go index 6a08cb6d..4b7afb07 100644 --- a/services/controllerhost/dfdd_test.go +++ b/services/controllerhost/dfdd_test.go @@ -62,6 +62,8 @@ func (s *DfddTestSuite) SetupTest() { } func (s *DfddTestSuite) TestFailureDetection() { + s.dfdd.OverrideHostDownPeriodForStage2(time.Duration(1 * time.Second)) + s.dfdd.OverrideHealthCheckInterval(time.Duration(1 * time.Second)) s.dfdd.Start() inHostIDs := []string{uuid.New(), uuid.New(), uuid.New()} storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} @@ -77,7 +79,8 @@ func (s *DfddTestSuite) TestFailureDetection() { cond := func() bool { return (s.eventPipeline.inHostFailureCount() == len(inHostIDs) && - s.eventPipeline.storeHostFailureCount() == len(storeIDs)) + s.eventPipeline.storeHostFailureStage1Count() == len(storeIDs) && + s.eventPipeline.storeHostFailureStage2Count() == len(storeIDs)) } succ := common.SpinWaitOnCondition(cond, 10*time.Second) @@ -89,20 +92,25 @@ func (s *DfddTestSuite) TestFailureDetection() { for _, h := range storeIDs { s.True(s.eventPipeline.isHostFailed(h), "Dfdd failed to detect store host failure") + s.True(s.eventPipeline.isStoreFailedStage2(h), "Dfdd failed to detect store host stage 2 failure") } + s.dfdd.Stop() } type testEventPipelineImpl struct { - inHostFailures int - storeHostFailures int - failedHosts map[string]bool - mutex sync.Mutex + inHostFailures int + storeHostStage1Failures int + storeHostStage2Failures int + failedHosts map[string]bool + failedStage2Stores map[string]bool + mutex sync.Mutex } func newTestEventPipeline() *testEventPipelineImpl { return &testEventPipelineImpl{ - failedHosts: make(map[string]bool), + failedHosts: make(map[string]bool), + failedStage2Stores: make(map[string]bool), } } @@ -116,9 +124,14 @@ func (ep *testEventPipelineImpl) Add(event Event) bool { e, _ := event.(*InputHostFailedEvent) ep.failedHosts[e.hostUUID] = true case *StoreHostFailedEvent: - ep.storeHostFailures++ e, _ := event.(*StoreHostFailedEvent) - ep.failedHosts[e.hostUUID] = true + if e.stage == hostDownStage1 { + ep.storeHostStage1Failures++ + ep.failedHosts[e.hostUUID] = true + } else if e.stage == hostDownStage2 { + ep.storeHostStage2Failures++ + ep.failedStage2Stores[e.hostUUID] = true + } } ep.mutex.Unlock() return true @@ -136,10 +149,26 @@ func (ep *testEventPipelineImpl) isHostFailed(uuid string) bool { return ok } -func (ep *testEventPipelineImpl) storeHostFailureCount() int { +func (ep *testEventPipelineImpl) isStoreFailedStage2(uuid string) bool { + ok := false + ep.mutex.Lock() + _, ok = ep.failedStage2Stores[uuid] + ep.mutex.Unlock() + return ok +} + +func (ep *testEventPipelineImpl) storeHostFailureStage1Count() int { count := 0 ep.mutex.Lock() - count = ep.storeHostFailures + count = ep.storeHostStage1Failures + ep.mutex.Unlock() + return count +} + +func (ep *testEventPipelineImpl) storeHostFailureStage2Count() int { + count := 0 + ep.mutex.Lock() + count = ep.storeHostStage2Failures ep.mutex.Unlock() return count } @@ -164,6 +193,9 @@ func newTestRpm() *testRpmImpl { func (rpm *testRpmImpl) Start() {} func (rpm *testRpmImpl) Stop() {} +func (rpm *testRpmImpl) GetBootstrappedChannel() chan struct{} { + return nil +} func (rpm *testRpmImpl) GetHosts(service string) ([]*common.HostInfo, error) { return nil, common.ErrUUIDLookupFailed diff --git a/services/controllerhost/event_handlers.go b/services/controllerhost/event_handlers.go index 766a87bd..464f8507 100644 --- a/services/controllerhost/event_handlers.go +++ b/services/controllerhost/event_handlers.go @@ -21,6 +21,7 @@ package controllerhost import ( + "sort" "sync" "sync/atomic" "time" @@ -110,13 +111,15 @@ type ( desiredStatus shared.ExtentStatus } - // RemoteZoneExtentCreatedEvent is triggered - // when a remote zone extent is created - RemoteZoneExtentCreatedEvent struct { + // StartReplicationForRemoteZoneExtent is triggered + // when a remote zone extent is created or the primary store + // is switched + StartReplicationForRemoteZoneExtent struct { eventBase - dstID string - extentID string - storeIDs []string + dstID string + extentID string + storeIDs []string + remoteExtentPrimaryStore string } // InputHostFailedEvent is triggered @@ -130,6 +133,7 @@ type ( StoreHostFailedEvent struct { eventBase hostUUID string + stage hostDownStage } ) @@ -221,12 +225,13 @@ func NewStoreExtentStatusOutOfSyncEvent(dstID string, extentID string, storeID s } } -// NewRemoteZoneExtentCreatedEvent creates and returns a RemoteZoneExtentCreatedEvent -func NewRemoteZoneExtentCreatedEvent(dstID string, extentID string, storeIDs []string) Event { - return &RemoteZoneExtentCreatedEvent{ - dstID: dstID, - extentID: extentID, - storeIDs: storeIDs, +// NewStartReplicationForRemoteZoneExtent creates and returns a StartReplicationForRemoteZoneExtent +func NewStartReplicationForRemoteZoneExtent(dstID string, extentID string, storeIDs []string, remoteExtentPrimaryStore string) Event { + return &StartReplicationForRemoteZoneExtent{ + dstID: dstID, + extentID: extentID, + storeIDs: storeIDs, + remoteExtentPrimaryStore: remoteExtentPrimaryStore, } } @@ -245,8 +250,11 @@ func NewInputHostFailedEvent(hostUUID string) Event { } // NewStoreHostFailedEvent creates and returns a StoreHostFailedEvent -func NewStoreHostFailedEvent(hostUUID string) Event { - return &StoreHostFailedEvent{hostUUID: hostUUID} +func NewStoreHostFailedEvent(hostUUID string, stage hostDownStage) Event { + return &StoreHostFailedEvent{ + hostUUID: hostUUID, + stage: stage, + } } // Handle handles the creation of a new extent. @@ -534,19 +542,112 @@ func (event *StoreHostFailedEvent) Handle(context *Context) error { sw := context.m3Client.StartTimer(metrics.StoreFailedEventScope, metrics.ControllerLatencyTimer) defer sw.Stop() context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerRequests) - stats, err := context.mm.ListExtentsByStoreIDStatus(event.hostUUID, common.MetadataExtentStatusPtr(shared.ExtentStatus_OPEN)) + + if event.stage == hostDownStage1 { + stats, err := context.mm.ListExtentsByStoreIDStatus(event.hostUUID, common.MetadataExtentStatusPtr(shared.ExtentStatus_OPEN)) + if err != nil { + // metadata intermittent failure, we will wait for the background + // reconciler task to catch up and seal this extent + context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerFailures) + context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerErrMetadataReadCounter) + context.log.WithFields(bark.Fields{ + common.TagErr: err, + common.TagStor: event.hostUUID, + }).Error(`StoreHostFailedEvent: Cannot list extents`) + return nil + } + createExtentDownEvents(context, stats) + return nil + } else if event.stage == hostDownStage2 { + return event.handleHostDownForRemoteExtent(context) + } + + return nil +} + +func (event *StoreHostFailedEvent) handleHostDownForRemoteExtent(context *Context) error { + + // We need to get extents in both 'pending' and 'done' state, and assign a new store host for these extents + // Currently there's no way to get the list in a single cassandra query + // Since this event is very rare, we'll just issue two cassandra queries + extents, err := context.mm.ListExtentsByReplicationStatus(event.hostUUID, common.InternalExtentReplicaReplicationStatusTypePtr(shared.ExtentReplicaReplicationStatus_PENDING)) if err != nil { - // metadata intermittent failure, we will wait for the background - // reconciler task to catch up and seal this extent context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerFailures) - context.m3Client.IncCounter(metrics.InputFailedEventScope, metrics.ControllerErrMetadataReadCounter) - context.log.WithFields(bark.Fields{ - common.TagErr: err, - common.TagStor: event.hostUUID, - }).Error(`StoreHostFailedEvent: Cannot list extents`) - return nil + context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerErrMetadataReadCounter) + context.log.WithFields(bark.Fields{common.TagErr: err, `host`: event.hostUUID}).Error(`HandleRemoteExtent: Cannot list pending extents`) + return errRetryable + } + doneExtents, err := context.mm.ListExtentsByReplicationStatus(event.hostUUID, common.InternalExtentReplicaReplicationStatusTypePtr(shared.ExtentReplicaReplicationStatus_DONE)) + if err != nil { + context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerFailures) + context.m3Client.IncCounter(metrics.StoreFailedEventScope, metrics.ControllerErrMetadataReadCounter) + context.log.WithFields(bark.Fields{common.TagErr: err, `host`: event.hostUUID}).Error(`HandleRemoteExtent: Cannot list done extents`) + return errRetryable + } + extents = append(extents, doneExtents...) + + // same extent might exist in both lists due to state transition, so dedup here + extentsDedup := make(map[string]struct{}) + + for _, extent := range extents { + if _, ok := extentsDedup[extent.GetExtent().GetExtentUUID()]; ok { + continue + } else { + extentsDedup[extent.GetExtent().GetExtentUUID()] = struct{}{} + } + + var oldRemoteExtentPrimaryStore string + if extent.GetExtent().IsSetRemoteExtentPrimaryStore() { + oldRemoteExtentPrimaryStore = extent.GetExtent().GetRemoteExtentPrimaryStore() + } else { + // For old extent that doesn't have the primary store field set, the assumption is first store after sorting is treated as primary + sort.Strings(extent.GetExtent().GetStoreUUIDs()) + oldRemoteExtentPrimaryStore = extent.GetExtent().GetStoreUUIDs()[0] + } + + // the unhealthy store is the primary store, so we'll try to promote an old store as primary since it likely already has some data + // if we couldn't find such store, we'll just log here + // TODO: ideally we should allocate a new healthy store if the system is not in a disasterous state, or the replication factor is set to 1 + // Note load on primary store might become unbalanced if a lot of stores are down then come back, because we don't rebalance the stores + // in these cases. + var newRemoteExtentPrimaryStore string + if oldRemoteExtentPrimaryStore == event.hostUUID { + foundReplacement := false + for _, store := range extent.GetExtent().GetStoreUUIDs() { + if store != oldRemoteExtentPrimaryStore && context.rpm.IsHostHealthy(common.StoreServiceName, store) { + newRemoteExtentPrimaryStore = store + foundReplacement = true + break + } + } + + lclog := context.log.WithFields(bark.Fields{ + common.TagExt: common.FmtExt(extent.GetExtent().GetExtentUUID()), + common.TagDst: common.FmtDst(extent.GetExtent().GetDestinationUUID()), + `old primary`: oldRemoteExtentPrimaryStore, + `new primary`: newRemoteExtentPrimaryStore, + }) + + if foundReplacement { + _, err = context.mm.UpdateRemoteExtentPrimaryStore(extent.GetExtent().GetDestinationUUID(), extent.GetExtent().GetExtentUUID(), newRemoteExtentPrimaryStore) + if err != nil { + lclog.WithField(common.TagErr, err).Warn(`failed to update primary store in metadata`) + return errRetryable + } + + remoteExtentReplicationEvent := NewStartReplicationForRemoteZoneExtent(extent.GetExtent().GetDestinationUUID(), extent.GetExtent().GetExtentUUID(), extent.GetExtent().GetStoreUUIDs(), newRemoteExtentPrimaryStore) + succ := context.eventPipeline.Add(remoteExtentReplicationEvent) + if !succ { + lclog.WithField(common.TagErr, err).Warn(`failed to add replication event`) + return errRetryable + } + + lclog.Info(`successfully replaced primary`) + } else { + lclog.WithField(common.TagErr, err).Error(`failed to find replacement for old primary store`) + } + } } - createExtentDownEvents(context, stats) return nil } @@ -588,28 +689,29 @@ func (event *StoreExtentStatusOutOfSyncEvent) Handle(context *Context) error { return nil } -// Handle handles an RemoteExtentCreatedEvent. +// Handle handles a StartReplicationForRemoteZoneExtent. // This handler calls store to start replication. -// The first store will be issued with a remote replication request +// The primary store will be issued with a remote replication request // The rest of stores will be issued with a re-replication request -func (event *RemoteZoneExtentCreatedEvent) Handle(context *Context) error { - sw := context.m3Client.StartTimer(metrics.RemoteZoneExtentCreatedEventScope, metrics.ControllerLatencyTimer) +// This is the fast path to notify store to start or resume a replication. If the notificaiton is lost, the slow path (a periodic +// job in store) will kick in to start replication +func (event *StartReplicationForRemoteZoneExtent) Handle(context *Context) error { + sw := context.m3Client.StartTimer(metrics.StartReplicationForRemoteZoneExtentScope, metrics.ControllerLatencyTimer) defer sw.Stop() - context.m3Client.IncCounter(metrics.RemoteZoneExtentCreatedEventScope, metrics.ControllerRequests) + context.m3Client.IncCounter(metrics.StartReplicationForRemoteZoneExtentScope, metrics.ControllerRequests) var err error - primaryStoreID := event.storeIDs[0] - primaryStoreAddr, err := context.rpm.ResolveUUID(common.StoreServiceName, primaryStoreID) + primaryStoreAddr, err := context.rpm.ResolveUUID(common.StoreServiceName, event.remoteExtentPrimaryStore) if err != nil { return errRetryable } - primaryStoreClient, err := context.clientFactory.GetThriftStoreClient(primaryStoreAddr, primaryStoreID) + primaryStoreClient, err := context.clientFactory.GetThriftStoreClient(primaryStoreAddr, event.remoteExtentPrimaryStore) if err != nil { context.log.WithFields(bark.Fields{ common.TagExt: common.FmtExt(event.extentID), - common.TagStor: common.FmtStor(primaryStoreID), + common.TagStor: common.FmtStor(event.remoteExtentPrimaryStore), common.TagErr: err, }).Error(`Client factory failed to get store client`) return err @@ -625,14 +727,17 @@ func (event *RemoteZoneExtentCreatedEvent) Handle(context *Context) error { if err != nil { context.log.WithFields(bark.Fields{ common.TagExt: common.FmtExt(event.extentID), - common.TagStor: common.FmtStor(primaryStoreID), + common.TagStor: common.FmtStor(event.remoteExtentPrimaryStore), common.TagErr: err, }).Error("Attempt to call RemoteReplicateExtent on storehost failed") return err } - for i := 1; i < len(event.storeIDs); i++ { - secondaryStoreID := event.storeIDs[i] + for _, storeID := range event.storeIDs { + if event.remoteExtentPrimaryStore == storeID { + continue + } + secondaryStoreID := storeID secondaryStoreAddr, err := context.rpm.ResolveUUID(common.StoreServiceName, secondaryStoreID) if err != nil { return errRetryable @@ -651,13 +756,14 @@ func (event *RemoteZoneExtentCreatedEvent) Handle(context *Context) error { req := store.NewReplicateExtentRequest() req.DestinationUUID = common.StringPtr(event.dstID) req.ExtentUUID = common.StringPtr(event.extentID) - req.StoreUUID = common.StringPtr(primaryStoreID) + req.StoreUUID = common.StringPtr(event.remoteExtentPrimaryStore) err = secondaryStoreClient.ReplicateExtent(ctx, req) if err != nil { context.log.WithFields(bark.Fields{ - common.TagExt: common.FmtExt(event.extentID), - common.TagStor: common.FmtStor(secondaryStoreID), - `error`: err, + common.TagExt: common.FmtExt(event.extentID), + common.TagStor: common.FmtStor(secondaryStoreID), + `primary_store`: common.FmtStor(event.remoteExtentPrimaryStore), + `error`: err, }).Error("Attempt to call ReplicateExtent on storehost failed") return err } diff --git a/services/controllerhost/event_pipeline_test.go b/services/controllerhost/event_pipeline_test.go index ec16238d..de44c0bb 100644 --- a/services/controllerhost/event_pipeline_test.go +++ b/services/controllerhost/event_pipeline_test.go @@ -141,7 +141,7 @@ func (s *EventPipelineSuite) TestExtentCreatedEvent() { storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} for i := 0; i < len(extentIDs); i++ { - _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs, ``) + _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs) s.Nil(err, "Failed to create extent") } @@ -245,7 +245,7 @@ func (s *EventPipelineSuite) TestStoreHostFailedEvent() { storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} for i := 0; i < len(extentIDs); i++ { - _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs, ``) + _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs) s.Nil(err, "Failed to create extent") } @@ -259,7 +259,7 @@ func (s *EventPipelineSuite) TestStoreHostFailedEvent() { } s.mcp.context.rpm = rpm - event := NewStoreHostFailedEvent(storeIDs[0]) + event := NewStoreHostFailedEvent(storeIDs[0], hostDownStage1) s.mcp.context.eventPipeline.Add(event) for i := 0; i < len(extentIDs); i++ { @@ -298,6 +298,65 @@ func (s *EventPipelineSuite) TestStoreHostFailedEvent() { } } +func (s *EventPipelineSuite) TestStoreHostFailedEventStage2() { + + path := s.generateName("/cherami/event-test") + dstDesc, err := s.createDestination(path) + s.Nil(err, "Failed to create destination") + s.Equal(common.UUIDStringLength, len(dstDesc.GetDestinationUUID()), "Invalid destination uuid") + + dstID := dstDesc.GetDestinationUUID() + inHostIDs := []string{uuid.New(), uuid.New()} + extentIDs := []string{uuid.New()} + storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} + primaryStoreIdx := 1 + originZone := `zone1` + + for i := 0; i < len(extentIDs); i++ { + _, err := s.mcp.context.mm.CreateRemoteZoneExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs, originZone, storeIDs[primaryStoreIdx]) + s.Nil(err, "Failed to create extent") + } + + // verify the primary store is correctly set + stats, err := s.mcp.context.mm.ListExtentsByDstIDStatus(dstID, nil) + s.Nil(err, "Failed to list extents") + for _, stat := range stats { + s.Equal(storeIDs[primaryStoreIdx], stat.GetExtent().GetRemoteExtentPrimaryStore()) + } + + rpm := common.NewMockRingpopMonitor() + + stores := make([]*MockStoreService, len(storeIDs)) + for i := 0; i < len(storeIDs); i++ { + stores[i] = NewMockStoreService() + stores[i].Start() + rpm.Add(common.StoreServiceName, storeIDs[i], stores[i].hostPort) + } + s.mcp.context.rpm = rpm + + event := NewStoreHostFailedEvent(storeIDs[1], hostDownStage2) + s.mcp.context.eventPipeline.Add(event) + + cond := func() bool { + newStats, err := s.mcp.context.mm.ListExtentsByDstIDStatus(dstID, nil) + if err != nil { + return false + } + for _, stat := range newStats { + if stat.GetExtent().GetRemoteExtentPrimaryStore() == storeIDs[primaryStoreIdx] { + return false + } + } + return true + } + succ := common.SpinWaitOnCondition(cond, 60*time.Second) + s.True(succ, "Timed out waiting for primary store to be changed") + + for i := 0; i < len(stores); i++ { + stores[i].Stop() + } +} + func (s *EventPipelineSuite) TestInputHostFailedEvent() { path := s.generateName("/cherami/event-test") @@ -311,7 +370,7 @@ func (s *EventPipelineSuite) TestInputHostFailedEvent() { storeIDs := []string{uuid.New(), uuid.New(), uuid.New()} for i := 0; i < len(extentIDs); i++ { - _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs, ``) + _, err := s.mcp.context.mm.CreateExtent(dstID, extentIDs[i], inHostIDs[i], storeIDs) s.Nil(err, "Failed to create extent") } @@ -380,7 +439,7 @@ func (s *EventPipelineSuite) TestRemoteZoneExtentCreatedEvent() { } s.mcp.context.rpm = rpm - event := NewRemoteZoneExtentCreatedEvent(destID, extentID, storeIDs) + event := NewStartReplicationForRemoteZoneExtent(destID, extentID, storeIDs, storeIDs[0]) s.mcp.context.eventPipeline.Add(event) // The first store is expected to be remote replicated diff --git a/services/controllerhost/extentmon.go b/services/controllerhost/extentmon.go index 27873f8e..20f2939f 100644 --- a/services/controllerhost/extentmon.go +++ b/services/controllerhost/extentmon.go @@ -329,7 +329,14 @@ func (monitor *extentStateMonitor) processExtents(dstDesc *shared.DestinationDes // fixOutOfSyncStoreExtents checks if all stores have the same view of // the extent status and if not, issues a SEAL to the out of sync // store to bring it up to speed +// For a remote zone extent, store will mark a store extent as sealed +// only after it gets the sealed marker from replication +// So we don't need to sync the status for remote zone extent func (monitor *extentStateMonitor) fixOutOfSyncStoreExtents(dstID string, extent *shared.Extent) { + if common.IsRemoteZoneExtent(extent.GetOriginZone(), monitor.context.localZone) { + return + } + for _, storeh := range extent.GetStoreUUIDs() { key := buildExtentCacheKey(storeh, extent.GetExtentUUID()) @@ -390,6 +397,12 @@ func (monitor *extentStateMonitor) deleteConsumerGroups(dstDesc *shared.Destinat filterBy := []metadata.ConsumerGroupExtentStatus{metadata.ConsumerGroupExtentStatus_OPEN} extents, e := context.mm.ListExtentsByConsumerGroup(dstID, cgID, filterBy) if e != nil { + monitor.ll.WithFields(bark.Fields{ + common.TagErr: e, + common.TagDst: dstID, + common.TagCnsm: cgID, + `statusFilter`: filterBy[0], + }).Error(`ListExtentsByConsumerGroup failed`) // if we cannot list extents, we wont be // able to find the output hosts to notify. // lets try next time @@ -488,6 +501,13 @@ nextConsGroup: } monitor.mi.publishEvent(eCnsmExtent, ext) } + } else { + monitor.ll.WithFields(bark.Fields{ + common.TagErr: e, + common.TagDst: dstID, + common.TagCnsm: cgID, + `statusFilter`: status, + }).Error(`ListExtentsByConsumerGroup failed`) } } monitor.mi.publishEvent(eCnsmExtentIterEnd, nil) diff --git a/services/controllerhost/extentmon_test.go b/services/controllerhost/extentmon_test.go index d3e99796..d0929aed 100644 --- a/services/controllerhost/extentmon_test.go +++ b/services/controllerhost/extentmon_test.go @@ -201,13 +201,13 @@ func (s *ExtentStateMonitorSuite) TestExtentMonitor() { extentIDs := []string{uuid.New(), uuid.New(), uuid.New()} // healthy extent - _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[0], inHostIDs[0], storeIDs[0:3], ``) + _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[0], inHostIDs[0], storeIDs[0:3]) s.Nil(err, "Failed to create extent") // bad input host - _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[1], inHostIDs[1], storeIDs[0:3], ``) + _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[1], inHostIDs[1], storeIDs[0:3]) s.Nil(err, "Failed to create extent") // bad store host - _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[2], inHostIDs[0], storeIDs[1:4], ``) + _, err = s.mcp.context.mm.CreateExtent(dstID, extentIDs[2], inHostIDs[0], storeIDs[1:4]) s.Nil(err, "Failed to create extent") name := dst.GetPath() + "/consumer" diff --git a/services/controllerhost/retMgrRun.go b/services/controllerhost/retMgrRun.go index 58c5fe04..0dfcd620 100644 --- a/services/controllerhost/retMgrRun.go +++ b/services/controllerhost/retMgrRun.go @@ -55,6 +55,7 @@ type ( clientFactory common.ClientFactory log bark.Logger m3Client metrics.Client + localZone string } // retMgrRunner holds the instance context @@ -162,6 +163,7 @@ func (t *retMgrRunner) startRetentionMgr() { SingleCGVisibleExtentGracePeriod: singleCGVisibleExtentGracePeriod, ExtentDeleteDeferPeriod: extentDeleteDeferPeriod, NumWorkers: retentionMgrWorkers, + LocalZone: t.retMgrRunnerContext.localZone, } t.retentionMgr = retentionMgr.New(opts, t.metadataClient, t.clientFactory, t.m3Client, t.log) diff --git a/services/frontendhost/dynamicConfig.go b/services/frontendhost/dynamicConfig.go index 757bb250..0ebc405e 100644 --- a/services/frontendhost/dynamicConfig.go +++ b/services/frontendhost/dynamicConfig.go @@ -20,21 +20,14 @@ package frontendhost -import ( - log "github.com/Sirupsen/logrus" - - dconfig "github.com/uber/cherami-server/common/dconfigclient" -) - -const ( - // UkeyUseWebsocket is the key for UseWebsocket - UkeyUseWebsocket = "frontendhost.UseWebsocket" -) +import dconfig "github.com/uber/cherami-server/common/dconfigclient" func (h *Frontend) registerInt() { // Add handler function for the dynamic config value handlerMap := make(map[string]dconfig.Handler) - handlerMap[UkeyUseWebsocket] = dconfig.GenerateIntHandler(UkeyUseWebsocket, h.SetUseWebsocket, h.GetUseWebsocket) + + // nothing to be dynamically configured right now + h.dClient.AddHandlers(handlerMap) // Add verify function for the dynamic config value verifierMap := make(map[string]dconfig.Verifier) @@ -43,15 +36,7 @@ func (h *Frontend) registerInt() { // LoadUconfig load the dynamic config values for key func (h *Frontend) LoadUconfig() { - // UseWebsocket - valueUcfg, ok := h.dClient.GetOrDefault(UkeyUseWebsocket, 0).(int) - if ok { - h.SetUseWebsocket(int32(valueUcfg)) - log.WithField(UkeyUseWebsocket, valueUcfg). - Info("Update the value") - } else { - log.WithField("dconfigKey", UkeyUseWebsocket).Error("Cannot get key from dynamic config; try using the right format") - } + // nothing to be loaded dynamically right now } // Manage do the work for uconfig diff --git a/services/frontendhost/frontend.go b/services/frontendhost/frontend.go index c41ea170..02f1f7e5 100644 --- a/services/frontendhost/frontend.go +++ b/services/frontendhost/frontend.go @@ -30,7 +30,6 @@ import ( "strconv" "strings" "sync" - "sync/atomic" "time" c "github.com/uber/cherami-thrift/.generated/go/cherami" @@ -41,6 +40,7 @@ import ( "github.com/uber/cherami-server/common" "github.com/uber/cherami-server/common/configure" dconfig "github.com/uber/cherami-server/common/dconfigclient" + mm "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" "github.com/pborman/uuid" @@ -65,10 +65,9 @@ type destinationUUID string // Frontend is the main server class for Frontends type Frontend struct { + metaClnt m.TChanMetadataService + hostIDHeartbeater common.HostIDHeartbeater common.SCommon - metaClnt m.TChanMetadataService - metadata common.MetadataMgr - hostIDHeartbeater common.HostIDHeartbeater AppConfig configure.CommonAppConfig hyperbahnClient *hyperbahn.Client cacheDestinationPathForUUID map[destinationUUID]string // Read/Write protected by lk @@ -81,7 +80,6 @@ type Frontend struct { outputClientByUUID map[string]c.TChanBOut m3Client metrics.Client dClient dconfig.Client - useWebsocket int32 // flag of whether ask client to use websocket to connect with input/output, under uConfig control } type publisherInstance struct { @@ -134,7 +132,6 @@ func NewFrontendHost(serviceName string, sVice common.SCommon, metadataClient m. bs := Frontend{ logger: (sVice.GetConfig().GetLogger()).WithFields(bark.Fields{common.TagFrnt: common.FmtFrnt(sVice.GetHostUUID()), common.TagDplName: common.FmtDplName(deploymentName)}), SCommon: sVice, - metaClnt: metadataClient, cacheDestinationPathForUUID: make(map[destinationUUID]string), cClient: nil, publishers: make(map[string]*publisherInstance), @@ -143,16 +140,16 @@ func NewFrontendHost(serviceName string, sVice common.SCommon, metadataClient m. AppConfig: config, } - bs.m3Client = metrics.NewClient(sVice.GetMetricsReporter(), metrics.Frontend) + // Add the frontend id as a field on all subsequent log lines in this module + bs.logger.WithFields(bark.Fields{`serviceName`: serviceName}).Info(`New Frontend`) - bs.metadata = common.NewMetadataMgr(bs.metaClnt, bs.m3Client, bs.logger) + bs.m3Client = metrics.NewClient(sVice.GetMetricsReporter(), metrics.Frontend) + bs.metaClnt = mm.NewMetadataMetricsMgr(metadataClient, bs.m3Client, bs.logger) // manage uconfig, regiester handerFunc and verifyFunc for uConfig values bs.dClient = sVice.GetDConfigClient() bs.dynamicConfigManage() - // Add the frontend id as a field on all subsequent log lines in this module - bs.logger.WithFields(bark.Fields{`serviceName`: serviceName, `metaClnt`: bs.metaClnt}).Info(`New Frontend`) return &bs, []thrift.TChanServer{c.NewTChanBFrontendServer(&bs)} //, clientgen.NewTChanBFrontendServer(&bs)} } @@ -352,12 +349,13 @@ func (h *Frontend) convertConsumerGroupFromInternal(ctx thrift.Context, _cgDesc if len(destPath) == 0 { var destDesc *shared.DestinationDescription - - destDesc, err = h.metadata.ReadDestination(_cgDesc.GetDestinationUUID(), "") // TODO: -= Maybe a GetDestinationPathForUUID =- + readRequest := m.NewReadDestinationRequest() + readRequest.DestinationUUID = common.StringPtr(_cgDesc.GetDestinationUUID()) + destDesc, err = h.metaClnt.ReadDestination(ctx, readRequest) // TODO: -= Maybe a GetDestinationPathForUUID =- if err != nil || len(destDesc.GetPath()) == 0 { h.logger.WithFields(bark.Fields{ - common.TagDst: common.FmtDst(_cgDesc.GetDestinationUUID()), + common.TagDst: common.FmtDst(readRequest.GetDestinationUUID()), common.TagErr: err, }).Error(`Failed to get destination path`) return @@ -403,8 +401,8 @@ const ( // that any destination that is returned by the metadata server will be returned to the client // TODO: Add a cache here with time-based retention func (h *Frontend) getUUIDForDestination(ctx thrift.Context, path string, rejectDisabled bool) (UUID string, err error) { - - destDesc, err := h.metadata.ReadDestination("", path) + mGetRequest := m.ReadDestinationRequest{Path: common.StringPtr(path)} + destDesc, err := h.metaClnt.ReadDestination(ctx, &mGetRequest) if err != nil { h.logger.WithField(common.TagDstPth, common.FmtDstPth(path)). @@ -470,13 +468,14 @@ func isRunnerDestination(destPath string) bool { // getHostAddressWithProtocol returns host address with different protocols with correct ports, together with deprecation info // this could be moved once ringpop supports rich meta information so we can store mutiple ports for different protocols -func (h *Frontend) getHostAddressWithProtocol(hostAddresses []*c.HostAddress, serviceName string, forceUseWebsocket bool) []*c.HostProtocol { +func (h *Frontend) getHostAddressWithProtocol(hostAddresses []*c.HostAddress, serviceName string) []*c.HostProtocol { tchannelHosts := &c.HostProtocol{ HostAddresses: make([]*c.HostAddress, 0, len(hostAddresses)), Protocol: c.ProtocolPtr(c.Protocol_TCHANNEL), - Deprecated: common.BoolPtr(forceUseWebsocket || h.GetUseWebsocket() > 0), + Deprecated: common.BoolPtr(true), } + websocketHosts := &c.HostProtocol{ HostAddresses: make([]*c.HostAddress, 0, len(hostAddresses)), Protocol: c.ProtocolPtr(c.Protocol_WS), @@ -587,15 +586,13 @@ func (h *Frontend) ReadDestination(ctx thrift.Context, readRequest *c.ReadDestin } var mReadRequest m.ReadDestinationRequest - var destUUID, destPath string - if common.UUIDRegex.MatchString(readRequest.GetPath()) { - destUUID, destPath = readRequest.GetPath(), "" + mReadRequest.DestinationUUID = common.StringPtr(readRequest.GetPath()) } else { - destUUID, destPath = "", readRequest.GetPath() + mReadRequest.Path = common.StringPtr(readRequest.GetPath()) } - _destDesc, err := h.metadata.ReadDestination(destUUID, destPath) + _destDesc, err := h.metaClnt.ReadDestination(ctx, &mReadRequest) if _destDesc != nil { destDesc = convertDestinationFromInternal(_destDesc) @@ -650,9 +647,9 @@ func (h *Frontend) ReadPublisherOptions(ctx thrift.Context, r *c.ReadPublisherOp } } + readDestRequest := m.ReadDestinationRequest{Path: common.StringPtr(r.GetPath())} var destDesc *shared.DestinationDescription - destDesc, err = h.metadata.ReadDestination("", r.GetPath()) - + destDesc, err = h.metaClnt.ReadDestination(ctx, &readDestRequest) if err != nil { return nil, err } @@ -682,12 +679,10 @@ func (h *Frontend) ReadPublisherOptions(ctx thrift.Context, r *c.ReadPublisherOp inputHostIds := getInputHostResp.GetInputHostIds() - forceUseWebsocket := isRunnerDestination(r.GetPath()) // force runners to use websocket - // Build our result rDHResult := c.NewReadPublisherOptionsResult_() rDHResult.HostAddresses = buildHostAddressesFromHostIds(inputHostIds, h.logger) - rDHResult.HostProtocols = h.getHostAddressWithProtocol(rDHResult.HostAddresses, common.InputServiceName, forceUseWebsocket) + rDHResult.HostProtocols = h.getHostAddressWithProtocol(rDHResult.HostAddresses, common.InputServiceName) rDHResult.ChecksumOption = c.ChecksumOptionPtr(c.ChecksumOption(checksumOption)) if len(rDHResult.HostAddresses) > 0 { @@ -752,12 +747,10 @@ func (h *Frontend) ReadDestinationHosts(ctx thrift.Context, r *c.ReadDestination inputHostIds := getInputHostResp.GetInputHostIds() - forceUseWebsocket := isRunnerDestination(r.GetPath()) // force runners to use websocket - // Build our result rDHResult := c.NewReadDestinationHostsResult_() rDHResult.HostAddresses = buildHostAddressesFromHostIds(inputHostIds, h.logger) - rDHResult.HostProtocols = h.getHostAddressWithProtocol(rDHResult.HostAddresses, common.InputServiceName, forceUseWebsocket) + rDHResult.HostProtocols = h.getHostAddressWithProtocol(rDHResult.HostAddresses, common.InputServiceName) if len(rDHResult.HostAddresses) > 0 { return rDHResult, nil @@ -847,7 +840,11 @@ func (h *Frontend) ReadConsumerGroup(ctx thrift.Context, readRequest *c.ReadCons }) // Build a metadata version of the consumer group request - mCGDesc, err := h.metadata.ReadConsumerGroup("", readRequest.GetDestinationPath(), "", readRequest.GetConsumerGroupName()) + mReadRequest := m.NewReadConsumerGroupRequest() + mReadRequest.DestinationPath = common.StringPtr(readRequest.GetDestinationPath()) + mReadRequest.ConsumerGroupName = common.StringPtr(readRequest.GetConsumerGroupName()) + + mCGDesc, err := h.metaClnt.ReadConsumerGroup(ctx, mReadRequest) if mCGDesc != nil { cGDesc, err = h.convertConsumerGroupFromInternal(ctx, mCGDesc) lclLg = lclLg.WithFields(bark.Fields{ @@ -878,7 +875,11 @@ func (h *Frontend) ReadConsumerGroupHosts(ctx thrift.Context, readRequest *c.Rea }) // Build a metadata version of the consumer group request - mCGDesc, err := h.metadata.ReadConsumerGroup("", readRequest.GetDestinationPath(), "", readRequest.GetConsumerGroupName()) + mReadRequest := m.NewReadConsumerGroupRequest() + mReadRequest.DestinationPath = common.StringPtr(readRequest.GetDestinationPath()) + mReadRequest.ConsumerGroupName = common.StringPtr(readRequest.GetConsumerGroupName()) + + mCGDesc, err := h.metaClnt.ReadConsumerGroup(ctx, mReadRequest) if err != nil { return nil, err } @@ -907,12 +908,10 @@ func (h *Frontend) ReadConsumerGroupHosts(ctx thrift.Context, readRequest *c.Rea outputHostIds := getOutputHostResp.GetOutputHostIds() - forceUseWebsocket := isRunnerDestination(readRequest.GetDestinationPath()) // force runners to use websocket - // Build our result rCGHResult = c.NewReadConsumerGroupHostsResult_() rCGHResult.HostAddresses = buildHostAddressesFromHostIds(outputHostIds, h.logger) - rCGHResult.HostProtocols = h.getHostAddressWithProtocol(rCGHResult.HostAddresses, common.OutputServiceName, forceUseWebsocket) + rCGHResult.HostProtocols = h.getHostAddressWithProtocol(rCGHResult.HostAddresses, common.OutputServiceName) if len(rCGHResult.HostAddresses) > 0 { return @@ -995,7 +994,11 @@ func (h *Frontend) CreateConsumerGroup(ctx thrift.Context, createRequest *c.Crea switch err.(type) { case *shared.EntityAlreadyExistsError: lclLg.Info("DeadLetterQueue destination already existed") - dlqDestDesc, err = h.metadata.ReadDestination("", dlqPath) + mDLQReadRequest := m.ReadDestinationRequest{ + Path: dlqCreateRequest.Path, + } + + dlqDestDesc, err = h.metaClnt.ReadDestination(ctx, &mDLQReadRequest) if err != nil || dlqDestDesc == nil { lclLg.WithField(common.TagErr, err).Error(`Can't read existing DeadLetterQueue destination`) @@ -1097,7 +1100,7 @@ func (h *Frontend) ListConsumerGroups(ctx thrift.Context, listRequest *c.ListCon mListRequest.PageToken = listRequest.PageToken mListRequest.Limit = common.Int64Ptr(listRequest.GetLimit()) - listResult, err := h.metadata.ListConsumerGroupsPage(mListRequest) + listResult, err := h.metaClnt.ListConsumerGroups(ctx, mListRequest) if err != nil { lclLg.WithField(common.TagErr, err).Warn(`List consumer groups failed with error`) @@ -1143,7 +1146,7 @@ func (h *Frontend) ListDestinations(ctx thrift.Context, listRequest *c.ListDesti common.FmtDstPth(listRequest.GetPrefix())) // TODO : Prefix might need it's own tag // This is the same routine on the metadata library, from which we are forwarding destinations - listResult, err := h.metadata.ListDestinationsPage(mListRequest) + listResult, err := h.metaClnt.ListDestinations(ctx, mListRequest) if err != nil { lclLg.WithFields(bark.Fields{`Prefix`: listRequest.GetPrefix(), common.TagErr: err}).Warn(`List destinations for prefix failed with error`) @@ -1175,6 +1178,7 @@ func (h *Frontend) PurgeDLQForConsumerGroup(ctx thrift.Context, purgeRequest *c. func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationPath, consumerGroupName string, purge bool) (err error) { var lclLg bark.Logger var mCGDesc *shared.ConsumerGroupDescription + mReadRequest := m.NewReadConsumerGroupRequest() if purge { lclLg = h.logger.WithField(`operation`, `purge`) @@ -1189,7 +1193,9 @@ func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationP }) // First, determine the DLQ destination UUID - mCGDesc, err = h.metadata.ReadConsumerGroup("", destinationPath, "", consumerGroupName) + mReadRequest.DestinationPath = &destinationPath + mReadRequest.ConsumerGroupName = &consumerGroupName + mCGDesc, err = h.metaClnt.ReadConsumerGroup(ctx, mReadRequest) if err != nil || mCGDesc == nil { lclLg.WithFields(bark.Fields{common.TagErr: err, `mCGDesc`: mCGDesc}).Error(`ReadConsumerGroup failed`) @@ -1199,7 +1205,8 @@ func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationP lclLg = lclLg.WithField(common.TagCnsm, common.FmtCnsm(consumerGroupName)) // First, determine the DLQ destination UUID - mCGDesc, err = h.metadata.ReadConsumerGroupByUUID(consumerGroupName) + mReadRequest.ConsumerGroupUUID = &consumerGroupName + mCGDesc, err = h.metaClnt.ReadConsumerGroupByUUID(ctx, mReadRequest) if err != nil || mCGDesc == nil { lclLg.WithFields(bark.Fields{common.TagErr: err, `mCGDesc`: mCGDesc}).Error(`ReadConsumerGroup failed`) @@ -1208,7 +1215,9 @@ func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationP } // Read the destination to see if we should allow this request - destDesc, err := h.metadata.ReadDestination(mCGDesc.GetDeadLetterQueueDestinationUUID(), "") + mReadDestRequest := m.NewReadDestinationRequest() + mReadDestRequest.DestinationUUID = mCGDesc.DeadLetterQueueDestinationUUID + destDesc, err := h.metaClnt.ReadDestination(ctx, mReadDestRequest) if err != nil || destDesc == nil { lclLg.WithFields(bark.Fields{common.TagErr: err, `mCGDesc`: mCGDesc}).Error(`ReadDestination failed`) @@ -1216,18 +1225,17 @@ func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationP } // Now create the merge/purge request, which is simply a cursor update on the DLQ destination - var now, mergeBefore, purgeBefore common.UnixNanoTime - - now = common.Now() - + now := int64(common.Now()) + mCursorRequest := m.NewUpdateDestinationDLQCursorsRequest() + mCursorRequest.DestinationUUID = common.StringPtr(mCGDesc.GetDeadLetterQueueDestinationUUID()) if purge { - mergeBefore, purgeBefore = -1, now + mCursorRequest.DLQPurgeBefore = common.Int64Ptr(now) } else { - mergeBefore, purgeBefore = now, -1 + mCursorRequest.DLQMergeBefore = common.Int64Ptr(now) } - mergeTimeExisting := common.UnixNanoTime(destDesc.GetDLQMergeBefore()) - purgeTimeExisting := common.UnixNanoTime(destDesc.GetDLQPurgeBefore()) + mergeTimeExisting := destDesc.GetDLQMergeBefore() + purgeTimeExisting := destDesc.GetDLQPurgeBefore() mergeActive := mergeTimeExisting != 0 purgeActive := purgeTimeExisting != 0 @@ -1245,7 +1253,7 @@ func (h *Frontend) dlqOperationForConsumerGroup(ctx thrift.Context, destinationP return e } - err = h.metadata.UpdateDestinationDLQCursors(mCGDesc.GetDeadLetterQueueDestinationUUID(), mergeBefore, purgeBefore) + _, err = h.metaClnt.UpdateDestinationDLQCursors(ctx, mCursorRequest) if err != nil { lclLg.WithField(common.TagErr, err).Warn(`Could not merge/purge DLQ for consumer group`) @@ -1342,16 +1350,6 @@ func (h *Frontend) allowMutatePath(path *string) bool { return false } -// SetUseWebsocket sets the flag of whether ask client to use websocket to connect with input/output -func (h *Frontend) SetUseWebsocket(useWebsocket int32) { - atomic.StoreInt32(&h.useWebsocket, useWebsocket) -} - -// GetUseWebsocket gets the flag of whether ask client to use websocket to connect with input/output -func (h *Frontend) GetUseWebsocket() int { - return int(atomic.LoadInt32(&h.useWebsocket)) -} - func (h *Frontend) incFailureCounterHelper(scope int, errC metrics.ErrorClass, err error) { if scope >= 0 { h.m3Client.IncCounter(scope, metrics.FrontendFailures) diff --git a/services/inputhost/dynamicConfig.go b/services/inputhost/dynamicConfig.go index a2745e34..0b0530d5 100644 --- a/services/inputhost/dynamicConfig.go +++ b/services/inputhost/dynamicConfig.go @@ -37,8 +37,6 @@ const ( UkeyExtMsgs = "inputhost.HostPerExtentMsgsLimitPerSecond" // UkeyConnMsgs is the uconfig key for HostPerConnMsgsLimitPerSecond UkeyConnMsgs = "inputhost.HostPerConnMsgsLimitPerSecond" - // UkeyUseWebsocket is the uconfig key for UseWebsocket - UkeyUseWebsocket = "inputhost.UseWebsocket" ) func (h *InputHost) registerInt() { @@ -49,7 +47,6 @@ func (h *InputHost) registerInt() { handlerMap[UkeyMaxConnPerDest] = dconfig.GenerateIntHandler(UkeyMaxConnPerDest, h.SetMaxConnPerDest, h.GetMaxConnPerDest) handlerMap[UkeyExtMsgs] = dconfig.GenerateIntHandler(UkeyExtMsgs, h.SetExtMsgsLimitPerSecond, h.GetExtMsgsLimitPerSecond) handlerMap[UkeyConnMsgs] = dconfig.GenerateIntHandler(UkeyConnMsgs, h.SetConnMsgsLimitPerSecond, h.GetConnMsgsLimitPerSecond) - handlerMap[UkeyUseWebsocket] = dconfig.GenerateIntHandler(UkeyUseWebsocket, h.SetUseWebsocket, h.GetUseWebsocket) h.dConfigClient.AddHandlers(handlerMap) // Add verify function for the dynamic config value verifierMap := make(map[string]dconfig.Verifier) @@ -71,16 +68,6 @@ func (h *InputHost) LoadUconfig() { } else { log.Errorf("Cannot get %s from uconfig, Using right format", UkeyHostOverall) } - - // UseWebsocket - valueUcfg, ok = h.dConfigClient.GetOrDefault(UkeyUseWebsocket, 0).(int) - if ok { - h.SetUseWebsocket(int32(valueUcfg)) - log.WithField(UkeyUseWebsocket, valueUcfg). - Info("Update the uconfig value") - } else { - log.Errorf("Cannot get %s from uconfig, Using right format", UkeyUseWebsocket) - } } // uconfigManage do the work for uconfig diff --git a/services/inputhost/exthost.go b/services/inputhost/exthost.go index 21b54dbe..ef3804f9 100644 --- a/services/inputhost/exthost.go +++ b/services/inputhost/exthost.go @@ -53,22 +53,22 @@ type ( // channel to notify the path cache that this exthost is going down // once the pathCache gets this message, he will disconnect clients if all extents are down - notifyCloseCh chan<- string - // notifyUnload is to notify the path to completely unload the extent - notifyUnloadCh chan<- string - extUUID string - destUUID string - destType shared.DestinationType - loadReporter common.LoadReporterDaemon - logger bark.Logger - tClients common.ClientFactory - closeChannel chan struct{} - streamClosedChannel chan struct{} - numReplicas int - seqNo int64 // monotonic sequence number for the messages on this extent - lastSuccessSeqNo int64 // last sequence number where we replied success - lastSuccessSeqNoCh chan int64 // last sequence number where we replied success - lastSentWatermark int64 // last watermark sent to the replicas + notifyExtCacheClosedCh chan string + // channel to notify the path cache to completely unload the extent + notifyExtCacheUnloadCh chan string + extUUID string + destUUID string + destType shared.DestinationType + loadReporter common.LoadReporterDaemon + logger bark.Logger + tClients common.ClientFactory + closeChannel chan struct{} + streamClosedChannel chan struct{} + numReplicas int + seqNo int64 // monotonic sequence number for the messages on this extent + lastSuccessSeqNo int64 // last sequence number where we replied success + lastSuccessSeqNoCh chan int64 // last sequence number where we replied success + lastSentWatermark int64 // last watermark sent to the replicas waitWriteWG sync.WaitGroup waitReadWG sync.WaitGroup @@ -112,7 +112,6 @@ type ( // Holds a particular extent for use by multiple publisher connections. // This is the cache member, not the cache. See extentCache in inputhost_util inExtentCache struct { - cacheMutex sync.RWMutex extUUID extentUUID connection *extHost } @@ -133,12 +132,12 @@ type ( conn *replicaConnection sendTimer *common.Timer } + + extCacheClosedCb func(string) + extCacheUnloadedCb func(string) ) const ( - // perMsgAckTimeout is the time to wait for the ack from the replicas - perMsgAckTimeout = 1 * time.Minute - // thriftCallTimeout is the timeout for the thrift context thriftCallTimeout = 1 * time.Minute @@ -152,7 +151,7 @@ const ( logTimeout = 1 * time.Minute // unloadTimeout is the timeout until which we keep the extent loaded - unloadTimeout = 10 * time.Minute + unloadTimeout = 2 * time.Minute // maxTBSleepDuration is the max sleep duration for the rate limiter maxTBSleepDuration = 1 * time.Second @@ -161,17 +160,22 @@ const ( extLoadReportingInterval = 2 * time.Second ) -// ErrTimeout is returned when the host is already shutdown -var ErrTimeout = &cherami.InternalServiceError{Message: "sending message to replica timed out"} +var ( + // ErrTimeout is returned when the host is already shutdown + ErrTimeout = &cherami.InternalServiceError{Message: "sending message to replica timed out"} -// nullTime is an empty time struct -var nullTime time.Time + // nullTime is an empty time struct + nullTime time.Time -// open is to indicate the extent is still open and we have not yet notified the controller -var open uint32 + // open is to indicate the extent is still open and we have not yet notified the controller + open uint32 -// sealed is to indicate we have already sent the seal notification -var sealed uint32 = 1 + // sealed is to indicate we have already sent the seal notification + sealed uint32 = 1 + + // msgAckTimeout is the time to wait for the ack from the replicas + msgAckTimeout = 1 * time.Minute +) func newExtConnection(destUUID string, pathCache *inPathCache, extUUID string, numReplicas int, loadReporterFactory common.LoadReporterDaemonFactory, logger bark.Logger, tClients common.ClientFactory, shutdownWG *sync.WaitGroup, limitsEnabled bool) *extHost { conn := &extHost{ @@ -183,8 +187,8 @@ func newExtConnection(destUUID string, pathCache *inPathCache, extUUID string, n tClients: tClients, lastSuccessSeqNo: int64(-1), lastSuccessSeqNoCh: nil, - notifyCloseCh: pathCache.notifyExtHostCloseCh, - notifyUnloadCh: pathCache.notifyExtHostUnloadCh, + notifyExtCacheClosedCh: pathCache.notifyExtHostCloseCh, + notifyExtCacheUnloadCh: pathCache.notifyExtHostUnloadCh, putMessagesCh: pathCache.putMsgCh, replyClientCh: make(chan writeResponse, defaultBufferSize), closeChannel: make(chan struct{}), @@ -250,78 +254,84 @@ func (conn *extHost) shutdown() { } func (conn *extHost) close() { + conn.lk.Lock() - if !conn.closed { - conn.closed = true - // Shutdown order: - // 1. stop the write pump to replicas and wait for the pump to close - // 2. close the replica streams - // 3. stop the read pump from replicas - close(conn.closeChannel) - if ok := common.AwaitWaitGroup(&conn.waitWriteWG, defaultWGTimeout); !ok { - conn.logger.Fatal("waitWriteGroup timed out") - } - for _, stream := range conn.streams { - stream.conn.close() - // stop the timer as well so that it gets gc'ed - stream.sendTimer.Stop() - // release the client, which will inturn close the channel - conn.tClients.ReleaseThriftStoreClient(conn.destUUID) - } - close(conn.streamClosedChannel) - close(conn.replyClientCh) - if conn.lastSuccessSeqNoCh != nil { - CLOSED: - for { - select { - case _, ok := <-conn.lastSuccessSeqNoCh: - if !ok { - break CLOSED - } + if conn.closed { + conn.lk.Unlock() + return + } + + conn.closed = true + + // Shutdown order: + // 1. stop the write pump to replicas and wait for the pump to close + // 2. close the replica streams + // 3. stop the read pump from replicas + close(conn.closeChannel) + if ok := common.AwaitWaitGroup(&conn.waitWriteWG, defaultWGTimeout); !ok { + conn.logger.Fatal("waitWriteGroup timed out") + } + for _, stream := range conn.streams { + stream.conn.close() + // stop the timer as well so that it gets gc'ed + stream.sendTimer.Stop() + // release the client, which will inturn close the channel + conn.tClients.ReleaseThriftStoreClient(conn.destUUID) + } + close(conn.streamClosedChannel) + close(conn.replyClientCh) + if conn.lastSuccessSeqNoCh != nil { + CLOSED: + for { + select { + case _, ok := <-conn.lastSuccessSeqNoCh: + if !ok { + break CLOSED } } } - if ok := common.AwaitWaitGroup(&conn.waitReadWG, defaultWGTimeout); !ok { - conn.logger.Fatal("waitReadGroup timed out") - } - // we are not going to resuse the extents at this point - // seal the extent - if err := conn.sealExtent(); err != nil { - conn.logger.Warn("seal extent notify failed during closed") - } - // set the shutdownWG to be done here - conn.shutdownWG.Done() + } - // notify the pathCache so that we can tear down the client - // connections if needed - select { - case conn.notifyCloseCh <- conn.extUUID: - default: - } + if ok := common.AwaitWaitGroup(&conn.waitReadWG, defaultWGTimeout); !ok { + conn.logger.Fatal("waitReadGroup timed out") + } + // we are not going to resuse the extents at this point + // seal the extent + if err := conn.sealExtent(); err != nil { + conn.logger.Warn("seal extent notify failed during closed") + } - conn.logger.WithFields(bark.Fields{ - `sentSeqNo`: conn.seqNo, - `ackSeqNo`: conn.lastSuccessSeqNo, - }).Info("extHost closed") + conn.lk.Unlock() // no longer need the lock - unloadTimer := common.NewTimer(unloadTimeout) - defer unloadTimer.Stop() - // now wait for unload timeout to keep the extent loaded in the pathCache - // or wait for the force shutdown which will happen when we are completely unloading - // the pathCache - select { - case <-unloadTimer.C: - case <-conn.forceUnloadCh: - } + conn.logger.WithFields(bark.Fields{ + `sentSeqNo`: conn.seqNo, + `ackSeqNo`: conn.lastSuccessSeqNo, + }).Info("extHost closed") - // now notify the pathCache to unload the extent - select { - case conn.notifyUnloadCh <- conn.extUUID: - default: - } - conn.loadReporter.Stop() + // notify the pathCache so that we can tear down the client + // connections if needed + conn.notifyExtCacheClosedCh <- conn.extUUID + + unloadTimer := common.NewTimer(unloadTimeout) + defer unloadTimer.Stop() + // now wait for unload timeout to keep the extent loaded in the pathCache + // this is needed to deal with the eventually consistent nature of cassandra. + // After an extent is marked as SEALED, a subsequent listDestinationExtents + // might still continue to show the extent as OPENED. To avoid agressive + // unload/reload (store would reject the call to openStream), sleep for + // a while before totally unloading + // or wait for the force shutdown which will happen when we are completely unloading + // the pathCache + select { + case <-unloadTimer.C: + case <-conn.forceUnloadCh: } - conn.lk.Unlock() + + // now notify the pathCache to unload the extent + conn.notifyExtCacheUnloadCh <- conn.extUUID + + conn.loadReporter.Stop() + conn.shutdownWG.Done() } func (conn *extHost) getEnqueueTime() int64 { @@ -572,7 +582,7 @@ func (conn *extHost) aggregateAndSendReplies(numReplicas int) { defer conn.failInflightMessages(inflightMessages) // Setup the perMsgTimer - perMsgTimer := common.NewTimer(perMsgAckTimeout) + perMsgTimer := common.NewTimer(msgAckTimeout) defer perMsgTimer.Stop() if conn.lastSuccessSeqNoCh != nil { @@ -594,7 +604,7 @@ func (conn *extHost) aggregateAndSendReplies(numReplicas int) { elapsed := time.Since(resCh.sentTime) // Note: even if this value is negative, it is ok because we should timeout immediately - perMsgTimer.Reset(perMsgAckTimeout - elapsed) + perMsgTimer.Reset(msgAckTimeout - elapsed) for i := 0; i < numReplicas; i++ { select { case ack, ok := <-resCh.appendMsgAck: @@ -644,7 +654,7 @@ func (conn *extHost) aggregateAndSendReplies(numReplicas int) { fmt.Sprintf("%s:%d:%8x", string(conn.extUUID), resCh.seqNo, address)) // Try to send the ack back to the client within the timeout period - perMsgTimer.Reset(perMsgAckTimeout) + perMsgTimer.Reset(msgAckTimeout) select { case resCh.putMsgAck <- putMsgAck: case <-perMsgTimer.C: diff --git a/services/inputhost/inputhost.go b/services/inputhost/inputhost.go index 36a92a72..37f5b4b7 100644 --- a/services/inputhost/inputhost.go +++ b/services/inputhost/inputhost.go @@ -39,6 +39,7 @@ import ( "github.com/uber/cherami-thrift/.generated/go/shared" "github.com/uber/cherami-server/common" dconfig "github.com/uber/cherami-server/common/dconfigclient" + mm "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" "github.com/uber/cherami-server/services/inputhost/load" "github.com/uber/cherami-server/stream" @@ -90,7 +91,6 @@ type ( maxConnLimit int32 extMsgsLimitPerSecond int32 connMsgsLimitPerSecond int32 - useWebsocket int32 // flag of whether to use websocket to connect to store, under uConfig control hostMetrics *load.HostMetrics lastLoadReportedTime int64 // unix nanos when the last load report was sent common.SCommon @@ -126,6 +126,9 @@ var ErrHostShutdown = &cherami.InternalServiceError{Message: "InputHost already // ErrThrottled is returned when the host is already shutdown var ErrThrottled = &cherami.InternalServiceError{Message: "InputHost throttling publisher cconnection"} +// ErrDstNotLoaded is returned when this input host doesn't own any extents for the destination +var ErrDstNotLoaded = &cherami.InternalServiceError{Message: "Destination no longer served by this input host"} + func (h *InputHost) isDestinationWritable(destDesc *shared.DestinationDescription) bool { status := destDesc.GetStatus() if status != shared.DestinationStatus_ENABLED && status != shared.DestinationStatus_SENDONLY { @@ -377,8 +380,25 @@ func (h *InputHost) OpenPublisherStream(ctx thrift.Context, call stream.BInOpenP return &ReplicaNotExistsError{} } } + doneCh := make(chan bool, 5) - pathCache.extMutex.Lock() + + pathCache.Lock() + + errCleanup := func() { + pathCache.Unlock() + // put back the loadShutdownRef + atomic.AddInt32(&h.loadShutdownRef, -1) + // stop the stream before returning + call.Done() + } + + if !pathCache.isActive() { + // path cache is being unloaded, can't add new conns + errCleanup() + h.m3Client.IncCounter(metrics.OpenPublisherStreamScope, metrics.InputhostInternalFailures) + return ErrDstNotLoaded + } // if the number of connections has breached then we can reject the connection hostMaxConnPerDestination := h.GetMaxConnPerDest() @@ -386,26 +406,17 @@ func (h *InputHost) OpenPublisherStream(ctx thrift.Context, call stream.BInOpenP pathCache.logger.WithField(common.TagHostConnLimit, common.FmtHostConnLimit(hostMaxConnPerDestination)). Warn("Too many open connections on this path. Rejecting this open") - - // put back the loadShutdownRef - atomic.AddInt32(&h.loadShutdownRef, -1) - - pathCache.extMutex.Unlock() - // stop the stream before returning - call.Done() + errCleanup() h.m3Client.IncCounter(metrics.OpenPublisherStreamScope, metrics.InputhostUserFailures) return ErrThrottled } - conn := newPubConnection(path, call, pathCache.putMsgCh, h.cacheTimeout, pathCache.notifyCloseCh, pathCache.currID, - doneCh, h.m3Client, pathCache.logger, h.IsLimitsEnabled(), pathCache) + conn := newPubConnection(path, call, pathCache, h.m3Client, h.IsLimitsEnabled(), h.cacheTimeout, doneCh) pathCache.connections[pathCache.currID] = conn - // wait for shutdown here as well. this makes sure the pubconnection is done - h.shutdownWG.Add(1) conn.open() pathCache.currID++ // increase the active connection count pathCache.dstMetrics.Increment(load.DstMetricNumOpenConns) - pathCache.extMutex.Unlock() + pathCache.Unlock() // increase the num open conns for the host h.hostMetrics.Increment(load.HostMetricNumOpenConns) @@ -416,7 +427,6 @@ func (h *InputHost) OpenPublisherStream(ctx thrift.Context, call stream.BInOpenP // wait till the conn is closed. we cannot return immediately. // If we do so, we will get data races reading/writing from/to the stream <-conn.doneCh - h.shutdownWG.Done() // decrement the active connection count pathCache.dstMetrics.Decrement(load.DstMetricNumOpenConns) @@ -466,6 +476,7 @@ func (h *InputHost) PutMessageBatch(ctx thrift.Context, request *cherami.PutMess result := cherami.NewPutMessageBatchResult_() ackChannel := make(chan *cherami.PutMessageAck, defaultBufferSize) inflightRequestCnt := 0 + inflightMsgMap := make(map[string]struct{}) for _, msg := range messages { inMsg := &inPutMessage{ @@ -478,6 +489,7 @@ func (h *InputHost) PutMessageBatch(ctx thrift.Context, request *cherami.PutMess case pathCache.putMsgCh <- inMsg: // remember how many ack is needed inflightRequestCnt++ + inflightMsgMap[msg.GetID()] = struct{}{} default: // just send a THROTTLED status back if sending to message channel is blocked result.FailedMessages = append(result.FailedMessages, &cherami.PutMessageAck{ @@ -489,20 +501,66 @@ func (h *InputHost) PutMessageBatch(ctx thrift.Context, request *cherami.PutMess } internalErrs, userErrs := int64(0), int64(0) + var respStatus cherami.Status + var respMsg string + + // Setup the msgTimer + msgTimer := common.NewTimer(msgAckTimeout) + defer msgTimer.Stop() + + // Try to get as many acks as possible. + // We should break out if either of the following happens: + // 1. pathCache is unloaded + // 2. we hit the message timeout +ACKDRAIN: for i := 0; i < inflightRequestCnt; i++ { - ack := <-ackChannel - if ack.GetStatus() != cherami.Status_OK { - if ack.GetStatus() != cherami.Status_THROTTLED { - internalErrs++ + select { + case ack := <-ackChannel: + if ack.GetStatus() != cherami.Status_OK { + if ack.GetStatus() != cherami.Status_THROTTLED { + internalErrs++ + } else { + userErrs++ + } + result.FailedMessages = append(result.FailedMessages, ack) } else { - userErrs++ + result.SuccessMessages = append(result.SuccessMessages, ack) + } + delete(inflightMsgMap, ack.GetID()) + default: + // Now look for either the pathCache unloading, + // or the msgTimer timing out. + // We do this in the default case to make sure + // we can drain all the acks in the channel above + // before bailing out + select { + case <-pathCache.closeCh: + respStatus = cherami.Status_FAILED + respMsg = "pathCache unloaded" + break ACKDRAIN + case <-msgTimer.C: + respStatus = cherami.Status_TIMEDOUT + respMsg = "message timedout" + break ACKDRAIN } - result.FailedMessages = append(result.FailedMessages, ack) - } else { - result.SuccessMessages = append(result.SuccessMessages, ack) } } + // all remaining messages in the inflight map failed + if len(inflightMsgMap) > 0 { + pathCache.logger.WithFields(bark.Fields{ + `numFailedMessages`: len(inflightMsgMap), + `respMsg`: respMsg, + }).Info("failing putMessageBatch") + for id := range inflightMsgMap { + result.FailedMessages = append(result.FailedMessages, &cherami.PutMessageAck{ + ID: common.StringPtr(id), + Status: common.CheramiStatusPtr(respStatus), + Message: common.StringPtr(respMsg), + }) + internalErrs++ + } + } // update the last disconnect time now pathCache.updateLastDisconnectTime() @@ -528,7 +586,7 @@ func (h *InputHost) DestinationsUpdated(ctx thrift.Context, request *admin.Desti h.m3Client.IncCounter(metrics.DestinationsUpdatedScope, metrics.InputhostRequests) // If we are already shutting down, no need to do anything here if atomic.AddInt32(&h.loadShutdownRef, 1) <= 0 { - h.logger.Error("not loading the path cache because inputHost already shutdown") + h.logger.WithField(common.TagReconfigureID, common.FmtReconfigureID(request.GetUpdateUUID())).Error("inputhost: DestinationsUpdated: dropping reconfiguration due to shutdown") h.m3Client.IncCounter(metrics.DestinationsUpdatedScope, metrics.InputhostFailures) return ErrHostShutdown } @@ -536,7 +594,7 @@ func (h *InputHost) DestinationsUpdated(ctx thrift.Context, request *admin.Desti var intErr error updateUUID := request.GetUpdateUUID() h.logger.WithField(common.TagReconfigureID, common.FmtReconfigureID(updateUUID)). - Debug("inputhost: DestinationsUpdated: processing update") + Debug("inputhost: DestinationsUpdated: processing reconfiguration") // Find all the updates we have and do the right thing for _, req := range request.Updates { // get the destUUID and see if it is in the inputhost cache @@ -544,11 +602,14 @@ func (h *InputHost) DestinationsUpdated(ctx thrift.Context, request *admin.Desti pathCache, ok := h.getPathCacheByDestUUID(destUUID) if ok { // We have a path cache loaded - // reconfigure the cache by letting the path cache know about this request - pathCache.reconfigureCh <- inReconfigInfo{req: req, updateUUID: updateUUID} + // check if it is active or not + if pathCache.isActiveNoLock() { + // reconfigure the cache by letting the path cache know about this request + pathCache.reconfigureCh <- inReconfigInfo{req: req, updateUUID: updateUUID} + } else { + intErr = errPathCacheUnloading + } } else { - h.logger.WithField(common.TagDst, common.FmtDst(destUUID)). - Error("inputhost: DestinationsUpdated: this destination doesn't exist on this inputhost") intErr = &cherami.EntityNotExistsError{} } @@ -556,11 +617,16 @@ func (h *InputHost) DestinationsUpdated(ctx thrift.Context, request *admin.Desti if intErr != nil { err = intErr h.m3Client.IncCounter(metrics.DestinationsUpdatedScope, metrics.InputhostFailures) + h.logger.WithFields(bark.Fields{ + common.TagDst: common.FmtDst(destUUID), + common.TagReconfigureID: common.FmtReconfigureID(updateUUID), + common.TagErr: intErr, + }).Error("inputhost: DestinationsUpdated: dropping reconfiguration") } } h.logger.WithField(common.TagReconfigureID, common.FmtReconfigureID(updateUUID)). - Debug("inputhost: DestinationsUpdated: finished update") + Debug("inputhost: DestinationsUpdated: finished reconfiguration") return } @@ -659,16 +725,6 @@ func (h *InputHost) GetNumConnections() int { return int(h.hostMetrics.Get(load.HostMetricNumOpenConns)) } -// SetUseWebsocket gets the flag of whether to use websocket to connect to store -func (h *InputHost) SetUseWebsocket(useWebsocket int32) { - atomic.StoreInt32(&h.useWebsocket, useWebsocket) -} - -// GetUseWebsocket gets the flag of whether to use websocket to connect to store -func (h *InputHost) GetUseWebsocket() int { - return int(atomic.LoadInt32(&h.useWebsocket)) -} - // Shutdown shutsdown all the InputHost cleanly func (h *InputHost) Shutdown() { // make sure we have atleast loaded everything @@ -716,7 +772,6 @@ func NewInputHost(serviceName string, sVice common.SCommon, mClient metadata.TCh bs := InputHost{ logger: (sVice.GetConfig().GetLogger()).WithFields(bark.Fields{common.TagIn: common.FmtIn(sVice.GetHostUUID()), common.TagDplName: common.FmtDplName(deploymentName)}), SCommon: sVice, - mClient: mClient, pathCache: make(map[string]*inPathCache), pathCacheByDestPath: make(map[string]string), // simple map which just resolves the path to uuid cacheTimeout: defaultIdleTimeout, @@ -740,6 +795,8 @@ func NewInputHost(serviceName string, sVice common.SCommon, mClient metadata.TCh bs.cacheTimeout = opts.CacheIdleTimeout } + bs.mClient = mm.NewMetadataMetricsMgr(mClient, bs.m3Client, bs.logger) + // manage uconfig, regiester handerFunc and verifyFunc for uConfig values bs.dConfigClient = sVice.GetDConfigClient() bs.dynamicConfigManage() diff --git a/services/inputhost/inputhost_test.go b/services/inputhost/inputhost_test.go index e97c2875..1db7121c 100644 --- a/services/inputhost/inputhost_test.go +++ b/services/inputhost/inputhost_test.go @@ -37,6 +37,7 @@ import ( "github.com/uber/cherami-server/common" "github.com/uber/cherami-server/common/configure" dconfig "github.com/uber/cherami-server/common/dconfigclient" + "github.com/uber/cherami-server/common/metrics" "github.com/uber/cherami-server/services/inputhost/load" mockcommon "github.com/uber/cherami-server/test/mocks/common" mockin "github.com/uber/cherami-server/test/mocks/inputhost" @@ -419,11 +420,11 @@ func (s *InputHostSuite) TestInputHostMultipleClients() { s.Equal(numExtents, len(inputHost.pathCache)) // Make sure we just have one stream to replica even though we have numClients clients for _, pathCache := range inputHost.pathCache { - pathCache.extMutex.RLock() + pathCache.RLock() for _, extInfo := range pathCache.extentCache { s.Equal(numStoreStreams, len(extInfo.connection.streams)) } - pathCache.extMutex.RUnlock() + pathCache.RUnlock() } inputHost.pathMutex.RUnlock() @@ -459,11 +460,13 @@ func (s *InputHostSuite) TestInputHostCacheTime() { inputHost.pathMutex.RLock() s.Equal(numExtents, len(inputHost.pathCache)) for _, pathCache := range inputHost.pathCache { + pathCache.RLock() for _, conn := range pathCache.connections { conn.lk.Lock() s.Equal(false, conn.closed, "connection must be closed") conn.lk.Unlock() } + pathCache.RUnlock() } inputHost.pathMutex.RUnlock() @@ -473,7 +476,7 @@ func (s *InputHostSuite) TestInputHostCacheTime() { inputHost.Shutdown() } -func (s *InputHostSuite) _TestInputHostLoadUnloadRace() { +func (s *InputHostSuite) TestInputHostLoadUnloadRace() { numAttempts := 10 inputHost, _ := NewInputHost("inputhost-test", s.mockService, s.mockMeta, nil) @@ -491,27 +494,26 @@ func (s *InputHostSuite) _TestInputHostLoadUnloadRace() { for i := 0; i < numAttempts; i++ { wg.Add(1) go func(waitG *sync.WaitGroup) { - err := inputHost.OpenPublisherStream(ctx, s.mockPub) - s.NoError(err) + inputHost.OpenPublisherStream(ctx, s.mockPub) waitG.Done() }(&wg) - // sleep for a bit to further have the load/unload race - time.Sleep(10 * time.Millisecond) } - time.Sleep(5 * time.Second) - wg.Wait() - // all connections must be torn down by now - inputHost.pathMutex.RLock() - for _, pathCache := range inputHost.pathCache { - for _, conn := range pathCache.connections { - conn.lk.Lock() - s.Equal(false, conn.closed, "connection must be closed") - conn.lk.Unlock() - } - } - inputHost.pathMutex.RUnlock() + // sleep a bit so that we create the path + time.Sleep(1 * time.Second) + // first get the pathCache now + pathCache, _ := inputHost.getPathCacheByDestPath("foo") + s.NotNil(pathCache) + + // unload everything + inputHost.unloadAll() + + // make sure the pathCache is inactive + s.Equal(false, pathCache.isActiveNoLock(), "pathCache should not be active") + + // make sure everything is stopped + wg.Wait() inputHost.Shutdown() } @@ -614,9 +616,9 @@ func (s *InputHostSuite) TestInputHostReconfigure() { // 3. Make sure we just have one extent pathCache, ok := inputHost.getPathCacheByDestPath("foo") s.True(ok) - pathCache.extMutex.Lock() + pathCache.Lock() s.Equal(1, len(pathCache.extentCache)) - pathCache.extMutex.Unlock() + pathCache.Unlock() // 4. Now add another extent mExt1 := shared.NewExtent() @@ -656,9 +658,9 @@ func (s *InputHostSuite) TestInputHostReconfigure() { // 6. Now make sure we have 2 extents pathCache, ok = inputHost.getPathCacheByDestPath("foo") if ok { - pathCache.extMutex.Lock() + pathCache.Lock() s.Equal(2, len(pathCache.extentCache)) - pathCache.extMutex.Unlock() + pathCache.Unlock() } inputHost.Shutdown() @@ -860,8 +862,11 @@ func (s *InputHostSuite) _TestInputHostPutMessageBatch() { s.Equal(true, ok, "destination should be in the resolver cache now") s.NotNil(pathCache) inputHost.pathMutex.Lock() - inputHost.unloadPathCache(pathCache) + pathCache.Lock() + pathCache.prepareForUnload() + pathCache.Unlock() inputHost.pathMutex.Unlock() + pathCache.unload() // Now check the both the caches _, ok = inputHost.getPathCacheByDestUUID(destDesc.GetDestinationUUID()) @@ -872,6 +877,55 @@ func (s *InputHostSuite) _TestInputHostPutMessageBatch() { inputHost.Shutdown() } +// TestInputHostPutMessageBatchTimeout publishes a batch of messages and make sure +// we timeout appropriately +func (s *InputHostSuite) TestInputHostPutMessageBatchTimeout() { + destinationPath := "foo" + ctx, cancel := utilGetThriftContextWithPath(destinationPath) + defer cancel() + inputHost, _ := NewInputHost("inputhost-test", s.mockService, s.mockMeta, nil) + + aMsg := store.NewAppendMessageAck() + msg := cherami.NewPutMessage() + + appendTicker := time.NewTicker(5 * time.Second) + defer appendTicker.Stop() + + pubTicker := time.NewTicker(5 * time.Second) + defer pubTicker.Stop() + + // make sure we don't respond immediately, so that we can timeout + s.mockAppend.On("Write", mock.Anything).Return(nil).WaitUntil(appendTicker.C) + s.mockPub.On("Write", mock.Anything).Return(nil).WaitUntil(pubTicker.C) + + s.mockAppend.On("Read").Return(aMsg, io.EOF).WaitUntil(appendTicker.C) + s.mockPub.On("Read").Return(msg, io.EOF).WaitUntil(pubTicker.C) + + s.mockStore.On("OpenAppendStream", mock.Anything).Return(s.mockAppend, nil) + + // setup a putMessageRequest, which is about to be timed out + putMessageRequest := &cherami.PutMessageBatchRequest{DestinationPath: &destinationPath} + msg.ID = common.StringPtr(strconv.Itoa(1)) + msg.Data = []byte(fmt.Sprintf("hello-%d", 1)) + + // set the msgAckTimeout to a very low value before + // publishing the message through the batch API. + // We should see a failed message in the ack we get back and + // it's status should be timeout as well + msgAckTimeout = 1 * time.Second + + putMessageRequest.Messages = append(putMessageRequest.Messages, msg) + putMessageAcks, err := inputHost.PutMessageBatch(ctx, putMessageRequest) + s.NoError(err) + s.NotNil(putMessageAcks) + s.Len(putMessageAcks.GetSuccessMessages(), 0) + s.Len(putMessageAcks.GetFailedMessages(), 1) + // make sure the status is Status_TIMEDOUT + s.Equal(cherami.Status_TIMEDOUT, putMessageAcks.GetFailedMessages()[0].GetStatus()) + + inputHost.Shutdown() +} + func (s *InputHostSuite) TestInputHostConnLimit() { destinationPath := "foo" inputHost, _ := NewInputHost("inputhost-test", s.mockService, s.mockMeta, nil) @@ -955,16 +1009,17 @@ func (s *InputHostSuite) TestInputExtHostRateLimit() { destUUID, destType, _, _ := inputHost.checkDestination(ctx, destinationPath) - extents, err := inputHost.getExtentsInfoForDestination(ctx, destUUID) - s.Nil(err) + extents, e := inputHost.getExtentsInfoForDestination(ctx, destUUID) + s.Nil(e) putMsgCh := make(chan *inPutMessage, 90) - notifyExtHostCloseCh := make(chan string, defaultExtCloseNotifyChSize) - notifyExtHostUnloadCh := make(chan string, defaultExtCloseNotifyChSize) ackChannel := make(chan *cherami.PutMessageAck, 90) mockLoadReporterDaemonFactory := setupMockLoadReporterDaemonFactory() + reporter := metrics.NewSimpleReporter(nil) + logger := common.GetDefaultLogger().WithFields(bark.Fields{"test": "ExtHostRateLimit"}) + pathCache := &inPathCache{ destinationPath: destinationPath, destUUID: destUUID, @@ -973,18 +1028,22 @@ func (s *InputHostSuite) TestInputExtHostRateLimit() { loadReporterFactory: mockLoadReporterDaemonFactory, reconfigureCh: make(chan inReconfigInfo, defaultBufferSize), putMsgCh: putMsgCh, - notifyCloseCh: make(chan connectionID), - notifyExtHostCloseCh: notifyExtHostCloseCh, - notifyExtHostUnloadCh: notifyExtHostUnloadCh, connections: make(map[connectionID]*pubConnection), closeCh: make(chan struct{}), - logger: common.GetDefaultLogger().WithFields(bark.Fields{"test": "ExtHostRateLimit"}), - m3Client: nil, + notifyExtHostCloseCh: make(chan string, defaultExtCloseNotifyChSize), + notifyExtHostUnloadCh: make(chan string, defaultExtCloseNotifyChSize), + notifyConnsCloseCh: make(chan connectionID, defaultConnsCloseChSize), + logger: logger, + m3Client: metrics.NewClient(reporter, metrics.Inputhost), lastDisconnectTime: time.Now(), dstMetrics: load.NewDstMetrics(), hostMetrics: load.NewHostMetrics(), + inputHost: inputHost, } + pathCache.loadReporter = inputHost.GetLoadReporterDaemonFactory().CreateReporter(time.Minute, pathCache, logger) + pathCache.destM3Client = metrics.NewClientWithTags(pathCache.m3Client, metrics.Inputhost, inputHost.getDestinationTags(destinationPath)) + var connection *extHost for _, extent := range extents { connection = newExtConnection( @@ -995,16 +1054,16 @@ func (s *InputHostSuite) TestInputExtHostRateLimit() { mockLoadReporterDaemonFactory, inputHost.logger, inputHost.GetClientFactory(), - &inputHost.shutdownWG, + &pathCache.connsWG, true) - err = inputHost.checkAndLoadReplicaStreams(connection, extentUUID(extent.uuid), extent.replicas) + err := pathCache.checkAndLoadReplicaStreams(connection, extentUUID(extent.uuid), extent.replicas) s.Nil(err) // overwrite the token bucket connection.SetExtTokenBucketValue(90) - inputHost.shutdownWG.Add(1) + pathCache.connsWG.Add(1) connection.open() break } @@ -1059,7 +1118,6 @@ func (s *InputHostSuite) TestInputExtHostRateLimit() { close(wCh) close(connection.forceUnloadCh) connection.close() - inputHost.Shutdown() } diff --git a/services/inputhost/inputhost_util.go b/services/inputhost/inputhost_util.go index 1a9010d3..102665fa 100644 --- a/services/inputhost/inputhost_util.go +++ b/services/inputhost/inputhost_util.go @@ -21,25 +21,16 @@ package inputhost import ( - "net" - "net/http" - "os" "time" - "golang.org/x/net/context" - - "github.com/uber-common/bark" - "github.com/uber/tchannel-go/thrift" - "github.com/uber/cherami-thrift/.generated/go/admin" "github.com/uber/cherami-thrift/.generated/go/cherami" - "github.com/uber/cherami-thrift/.generated/go/controller" "github.com/uber/cherami-thrift/.generated/go/shared" - "github.com/uber/cherami-thrift/.generated/go/store" "github.com/uber/cherami-server/common" "github.com/uber/cherami-server/common/metrics" "github.com/uber/cherami-server/services/inputhost/load" - serverStream "github.com/uber/cherami-server/stream" + "github.com/uber-common/bark" + "github.com/uber/tchannel-go/thrift" ) type extentUUID string @@ -55,22 +46,18 @@ const ( // defaultExtCloseNotifyChSize is the buffer size for the notification channel when an extent is closed defaultExtCloseNotifyChSize = 50 + // defaultConnsCloseBufSize is the buffer size for the notification channel when a connection is closed + defaultConnsCloseChSize = 500 + // defaultWGTimeout is the timeout for the waitgroup during shutdown defaultWGTimeout = 10 * time.Minute - // metaPollTimeout is the interval to poll metadata - metaPollTimeout = 1 * time.Minute - - // unloadTicker is the interval to unload the pathCache - unloadTickerTimeout = 10 * time.Minute - - // idleTimeout is the idle time after the last client got disconnected - idleTimeout = 15 * time.Minute - // dstLoadReportingInterval is the interval destination load is reported to controller dstLoadReportingInterval = 2 * time.Second ) +var errPathCacheUnloading = &cherami.InternalServiceError{Message: "InputHost pathCache is being unloaded"} + func (h *InputHost) getDestinationTags(destPath string) map[string]string { destTagValue, tagErr := common.GetTagsFromPath(destPath) if tagErr != nil { @@ -97,17 +84,18 @@ func (h *InputHost) checkAndLoadPathCache(destPath string, destUUID string, dest loadReporterFactory: h.GetLoadReporterDaemonFactory(), reconfigureCh: make(chan inReconfigInfo, defaultBufferSize), putMsgCh: make(chan *inPutMessage, defaultBufferSize), - notifyCloseCh: make(chan connectionID), - notifyExtHostCloseCh: make(chan string, defaultExtCloseNotifyChSize), - notifyExtHostUnloadCh: make(chan string, defaultExtCloseNotifyChSize), connections: make(map[connectionID]*pubConnection), closeCh: make(chan struct{}), + notifyExtHostCloseCh: make(chan string, defaultExtCloseNotifyChSize), + notifyExtHostUnloadCh: make(chan string, defaultExtCloseNotifyChSize), + notifyConnsCloseCh: make(chan connectionID, defaultConnsCloseChSize), logger: logger, m3Client: m3Client, lastDisconnectTime: time.Now(), dstMetrics: load.NewDstMetrics(), hostMetrics: hostMetrics, lastDstLoadReportedTime: time.Now().UnixNano(), + inputHost: h, } h.pathCache[destUUID] = pathCache h.pathCacheByDestPath[destPath] = destUUID @@ -117,106 +105,12 @@ func (h *InputHost) checkAndLoadPathCache(destPath string, destUUID string, dest // the destM3Client is the destination specific client to report destination specific metrics // the m3Client above is the overall host client to report host-level metrics. pathCache.destM3Client = metrics.NewClientWithTags(pathCache.m3Client, metrics.Inputhost, h.getDestinationTags(destPath)) - go h.managePath(pathCache) + pathCache.startEventLoop() } h.pathMutex.Unlock() return } -func (h *InputHost) checkAndLoadExtentCache(pathCache *inPathCache, destUUID string, extUUID extentUUID, replicas []string) (err error) { - pathCache.extMutex.Lock() - defer pathCache.extMutex.Unlock() - if extCache, exists := pathCache.extentCache[extUUID]; !exists { - extCache = &inExtentCache{ - extUUID: extUUID, - connection: newExtConnection( - destUUID, - pathCache, - string(extUUID), - len(replicas), - pathCache.loadReporterFactory, - pathCache.logger.WithField(common.TagExt, common.FmtExt(string(extUUID))), - h.GetClientFactory(), - &h.shutdownWG, - h.IsLimitsEnabled()), - } - - err = h.checkAndLoadReplicaStreams(extCache.connection, extUUID, replicas) - if err != nil { - // error loading replica stream - extCache.connection.logger.Error("error loading replica streams for extent") - return err - } - - pathCache.extentCache[extUUID] = extCache - // all open connections should be closed before shutdown - h.shutdownWG.Add(1) - extCache.connection.open() - - // make sure the number of loaded extents is incremented - pathCache.dstMetrics.Increment(load.DstMetricNumOpenExtents) - pathCache.hostMetrics.Increment(load.HostMetricNumOpenExtents) - } - return -} - -func (h *InputHost) checkAndLoadReplicaStreams(conn *extHost, extUUID extentUUID, replicas []string /*storehostPort*/) (err error) { - conn.lk.Lock() - defer conn.lk.Unlock() - var call serverStream.BStoreOpenAppendStreamOutCall - var cancel context.CancelFunc - var ok bool - for i := 0; i < len(replicas); i++ { - if _, ok = conn.streams[storeHostPort(replicas[i])]; !ok { - - cDestType, _ := common.CheramiDestinationType(conn.destType) - - req := &store.OpenAppendStreamRequest{ - DestinationUUID: common.StringPtr(string(conn.destUUID)), - DestinationType: cherami.DestinationTypePtr(cDestType), - ExtentUUID: common.StringPtr(string(extUUID)), - } - reqHeaders := common.GetOpenAppendStreamRequestHeaders(req) - - host, _, _ := net.SplitHostPort(replicas[i]) - port := os.Getenv("CHERAMI_STOREHOST_WS_PORT") - if len(port) == 0 { - port = "6191" - } else if port == "test" { - // XXX: this is a hack to get the wsPort specific to this hostport. - // this is needed specifically for benchmark tests and other tests which - // try to start multiple replicas on the same local machine. - // this is a temporary workaround until we have ringpop labels - // if we have the label feature we can set the websocket port corresponding - // to a replica as a metadata rather than the env variables - envVar := common.GetEnvVariableFromHostPort(replicas[i]) - port = os.Getenv(envVar) - } - - httpHeaders := http.Header{} - for k, v := range reqHeaders { - httpHeaders.Add(k, v) - } - - hostPort := net.JoinHostPort(host, port) - conn.logger.WithField(`replica`, hostPort).Info(`inputhost: Using websocket to connect to store replica`) - call, err = h.GetWSConnector().OpenAppendStream(hostPort, httpHeaders) - if err != nil { - conn.logger.WithFields(bark.Fields{`replicas[i]`: hostPort, common.TagErr: err}).Error(`inputhost: Websocket dial store replica: failed`) - return - } - cancel = nil - - repl := newReplicaConnection(call, cancel, - conn.logger. - WithField(common.TagInReplicaHost, common.FmtInReplicaHost(replicas[i]))) - conn.setReplicaInfo(storeHostPort(replicas[i]), repl) - repl.open() - } - } - return -} - // loadPath loads the extent stuff into the respective caches and opens up the replica stream func (h *InputHost) loadPath(extents []*extentInfo, destPath string, destUUID string, destType shared.DestinationType, m3Client metrics.Client) *inPathCache { // First make sure we have the path cached. @@ -228,7 +122,7 @@ func (h *InputHost) loadPath(extents []*extentInfo, destPath string, destUUID st // Now we have the pathCache. check and load all extents for _, extent := range extents { - err := h.checkAndLoadExtentCache(pathCache, destUUID, extentUUID(extent.uuid), extent.replicas) + err := pathCache.checkAndLoadExtent(destUUID, extentUUID(extent.uuid), extent.replicas) // if we are able to successfully load *atleast* one extent, then we are good if err == nil { foundOne = true @@ -238,37 +132,16 @@ func (h *InputHost) loadPath(extents []*extentInfo, destPath string, destUUID st // if we didn't load any extent and we just loaded the pathCache, unload it if !foundOne && !exists && pathCache != nil { pathCache.logger.Error("unable to load any extent for the given destination") - h.pathMutex.Lock() - h.unloadPathCache(pathCache) - h.pathMutex.Unlock() + pathCache.Lock() + pathCache.prepareForUnload() + pathCache.Unlock() + go pathCache.unload() pathCache = nil } return pathCache } -func (h *InputHost) reconfigureClients(pathCache *inPathCache, updateUUID string) { - - var notified, dropped int - - pathCache.extMutex.Lock() - for _, conn := range pathCache.connections { - select { - case conn.reconfigureClientCh <- updateUUID: - notified++ - default: - dropped++ - } - } - pathCache.extMutex.Unlock() - - pathCache.logger.WithFields(bark.Fields{ - common.TagUpdateUUID: updateUUID, - `notified`: notified, - `dropped`: dropped, - }).Info(`reconfigureClients: notified clients`) -} - func (h *InputHost) updatePathCache(destinationsUpdated *admin.DestinationUpdatedNotification, destPath string) { ctx, cancel := thrift.NewContext(defaultMetaCtxTimeout) defer cancel() @@ -288,121 +161,6 @@ func (h *InputHost) updatePathCache(destinationsUpdated *admin.DestinationUpdate } } -func (h *InputHost) managePath(pathCache *inPathCache) { - defer h.shutdownWG.Done() - - refreshTicker := time.NewTicker(metaPollTimeout) // start ticker to refresh metadata - defer refreshTicker.Stop() - - unloadTicker := time.NewTicker(unloadTickerTimeout) // start ticker to unload pathCache - defer unloadTicker.Stop() - - for { - select { - case conn := <-pathCache.notifyCloseCh: - h.pathMutex.Lock() - pathCache.extMutex.Lock() - if _, ok := pathCache.connections[conn]; ok { - pathCache.logger.WithField(`conn`, conn).Info(`updating path cache to remove the connection with ID`) - delete(pathCache.connections, conn) - } - if len(pathCache.connections) <= 0 { - pathCache.lastDisconnectTime = time.Now() - } - pathCache.extMutex.Unlock() - h.pathMutex.Unlock() - case extUUID := <-pathCache.notifyExtHostCloseCh: - // if all the extents for this path went down, no point in keeping the connection open - h.pathMutex.Lock() - pathCache.extMutex.Lock() - if _, ok := pathCache.extentCache[extentUUID(extUUID)]; ok { - pathCache.logger. - WithField(common.TagExt, common.FmtExt(string(extUUID))). - Info("updating path cache to decrement the active extents, since extent is closed") - // decrement the number of open extents and if we don't even have 1 open extent disconnect clients - pathCache.hostMetrics.Decrement(load.HostMetricNumOpenExtents) - if pathCache.dstMetrics.Decrement(load.DstMetricNumOpenExtents) <= 0 { - // Note: Make sure we don't race with a load here. - // It is safe to unload the pathCache completely here, - // since there are no open extents. - // Incase the client reconfigures, it will load a fresh - // path cache and will continue as usual. - // Remove it from the map first so that a load doesn't interfere - // with the unload. - delete(h.pathCache, pathCache.destUUID) - pathCache.logger.Info("unloading empty pathCache") - go h.unloadSpecificPath(pathCache) - } - } - pathCache.extMutex.Unlock() - h.pathMutex.Unlock() - case extUUID := <-pathCache.notifyExtHostUnloadCh: - // now we can safely unload the extent completely - h.pathMutex.Lock() - pathCache.extMutex.Lock() - if _, ok := pathCache.extentCache[extentUUID(extUUID)]; ok { - pathCache.logger. - WithField(common.TagExt, common.FmtExt(string(extUUID))). - Info("updating path cache to unload extent") - delete(pathCache.extentCache, extentUUID(extUUID)) - } - pathCache.extMutex.Unlock() - h.pathMutex.Unlock() - case reconfigInfo := <-pathCache.reconfigureCh: - // we need to reload the cache, if the notification type is either - // HOST or ALL - reconfigType := reconfigInfo.req.GetType() - h.pathMutex.RLock() - pathCache.extMutex.RLock() - extentCacheSize := len(pathCache.extentCache) - pathCache.extMutex.RUnlock() - h.pathMutex.RUnlock() - pathCache.logger.WithFields(bark.Fields{ - common.TagReconfigureID: common.FmtReconfigureID(reconfigInfo.updateUUID), - common.TagReconfigureType: common.FmtReconfigureType(reconfigType), - common.TagExtentCacheSize: extentCacheSize, - }).Debugf("reconfiguring inputhost") - switch reconfigType { - case admin.NotificationType_CLIENT: - h.reconfigureClients(pathCache, reconfigInfo.updateUUID) - case admin.NotificationType_HOST: - h.updatePathCache(reconfigInfo.req, pathCache.destinationPath) - case admin.NotificationType_ALL: - h.updatePathCache(reconfigInfo.req, pathCache.destinationPath) - h.reconfigureClients(pathCache, reconfigInfo.updateUUID) - default: - pathCache.logger. - WithField(common.TagReconfigureID, common.FmtReconfigureID(reconfigInfo.updateUUID)). - WithField(common.TagReconfigureType, common.FmtReconfigureType(reconfigType)). - Error("Invalid reconfigure type") - } - pathCache.logger. - WithField(common.TagReconfigureID, common.FmtReconfigureID(reconfigInfo.updateUUID)). - WithField(common.TagReconfigureType, common.FmtReconfigureType(reconfigType)). - Debug("finished reconfiguration of inputhost") - case <-refreshTicker.C: - pathCache.logger.Debug("refreshing all extents") - h.getExtentsAndLoadPathCache(nil, "", pathCache.destUUID, shared.DestinationType_UNKNOWN) - - case <-unloadTicker.C: - h.pathMutex.Lock() - pathCache.extMutex.Lock() - if len(pathCache.connections) <= 0 && time.Since(pathCache.lastDisconnectTime) > idleTimeout { - pathCache.logger.Info("unloading idle pathCache") - // Note: remove from the map so that a load doesn't race with unload - h.removeFromCaches(pathCache) - go h.unloadSpecificPath(pathCache) - } - pathCache.extMutex.Unlock() - h.pathMutex.Unlock() - case <-pathCache.closeCh: - return - case <-h.shutdown: - return - } - } -} - // getPathCache returns the pathCache, given the path func (h *InputHost) getPathCacheByDestPath(path string) (retPathCache *inPathCache, ok bool) { h.pathMutex.RLock() @@ -425,47 +183,15 @@ func (h *InputHost) getPathCacheByDestUUID(destUUID string) (retPathCache *inPat return retPathCache, ok } -// unloadPathCache is the routine to unload this destUUID from the -// inputhost's pathCache. -// It should be called with the pathMutex held and it will stop -// all connections and extents from this pathCache -func (h *InputHost) unloadPathCache(pathCache *inPathCache) { - pathCache.unloadInProgress = true - close(pathCache.closeCh) - // close all connections - pathCache.extMutex.Lock() - for _, conn := range pathCache.connections { - go conn.close() - } - for _, extCache := range pathCache.extentCache { - extCache.cacheMutex.Lock() - // call shutdown of the extCache to unload without the timeout - go extCache.connection.shutdown() - extCache.cacheMutex.Unlock() - } - pathCache.loadReporter.Stop() - pathCache.extMutex.Unlock() - - // since we already stopped the load reporter above and - // we close the connections asynchronously, - // make sure the number of connections is explicitly marked as 0 - pathCache.destM3Client.UpdateGauge(metrics.PubConnectionScope, metrics.InputhostDestPubConnection, 0) - h.removeFromCaches(pathCache) - pathCache.logger.Info("pathCache successfully unloaded") -} - -// removeFromCache removes this pathCache from both the caches +// removeFromCache removes this pathCache from both the +// pathCache map. +// This method only removes the entry, if the existing entry +// is the same as the passed reference func (h *InputHost) removeFromCaches(pathCache *inPathCache) { - delete(h.pathCache, pathCache.destUUID) - delete(h.pathCacheByDestPath, pathCache.destinationPath) -} - -// unloadSpecificPath unloads the specific path cache -func (h *InputHost) unloadSpecificPath(pathCache *inPathCache) { h.pathMutex.Lock() - // unload only if we are not already in the process of unloading - if !pathCache.unloadInProgress { - h.unloadPathCache(pathCache) + if curr, ok := h.pathCache[pathCache.destUUID]; ok && curr == pathCache { + delete(h.pathCache, pathCache.destUUID) + delete(h.pathCacheByDestPath, pathCache.destinationPath) } h.pathMutex.Unlock() } @@ -475,9 +201,10 @@ func (h *InputHost) unloadAll() { h.pathMutex.Lock() for _, pathCache := range h.pathCache { pathCache.logger.Info("inputhost: closing streams on path") - if !pathCache.unloadInProgress { - h.unloadPathCache(pathCache) - } + pathCache.Lock() + pathCache.prepareForUnload() + pathCache.Unlock() + go pathCache.unload() } h.pathMutex.Unlock() } @@ -486,15 +213,13 @@ func (h *InputHost) unloadAll() { func (h *InputHost) updateExtTokenBucket(connLimit int32) { h.pathMutex.RLock() for _, inPath := range h.pathCache { - // TODO: pathMutex or extMutex which one is better here? - inPath.extMutex.RLock() + inPath.RLock() for _, extCache := range inPath.extentCache { extCache.connection.SetMsgsLimitPerSecond(connLimit) } - inPath.extMutex.RUnlock() + inPath.RUnlock() } h.pathMutex.RUnlock() - h.logger.Infof("The size of pathCache in extent is %v", len(h.pathCache)) h.logger.WithField("UpdateExtTokenBucket", connLimit). Info("update extent TB for new connLimit") } @@ -503,46 +228,13 @@ func (h *InputHost) updateExtTokenBucket(connLimit int32) { func (h *InputHost) updateConnTokenBucket(connLimit int32) { h.pathMutex.RLock() for _, inPath := range h.pathCache { + inPath.RLock() for _, conn := range inPath.connections { conn.SetMsgsLimitPerSecond(connLimit) } + inPath.RUnlock() } h.pathMutex.RUnlock() - h.logger.Infof("The size of pathCache in conn is %v", len(h.pathCache)) h.logger.WithField("UpdateConnTokenBucket", connLimit). Info("update connection TB for new connLimit") } - -// Report is used for reporting Destination specific load to controller -func (p *inPathCache) Report(reporter common.LoadReporter) { - - now := time.Now().UnixNano() - diffSecs := (now - p.lastDstLoadReportedTime) / int64(time.Second) - if diffSecs < 1 { - return - } - - numConnections := p.dstMetrics.Get(load.DstMetricNumOpenConns) - numExtents := p.dstMetrics.Get(load.DstMetricNumOpenExtents) - numMsgsInPerSec := p.dstMetrics.GetAndReset(load.DstMetricMsgsIn) / diffSecs - - metric := controller.DestinationMetrics{ - NumberOfConnections: common.Int64Ptr(numConnections), - NumberOfActiveExtents: common.Int64Ptr(numExtents), - IncomingMessagesCounter: common.Int64Ptr(numMsgsInPerSec), - } - - p.lastDstLoadReportedTime = now - reporter.ReportDestinationMetric(p.destUUID, metric) - // Also update the metrics reporter to make sure the connection gauge is updated - p.destM3Client.UpdateGauge(metrics.PubConnectionScope, metrics.InputhostDestPubConnection, numConnections) -} - -// updateLastDisconnectTime is used to update the last disconnect time for -// this path -func (p *inPathCache) updateLastDisconnectTime() { - p.extMutex.Lock() - defer p.extMutex.Unlock() - - p.lastDisconnectTime = time.Now() -} diff --git a/services/inputhost/pathCache.go b/services/inputhost/pathCache.go new file mode 100644 index 00000000..7ef56566 --- /dev/null +++ b/services/inputhost/pathCache.go @@ -0,0 +1,481 @@ +// Copyright (c) 2016 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package inputhost + +import ( + "net" + "net/http" + "os" + "sync" + "time" + + "github.com/uber/cherami-thrift/.generated/go/admin" + "github.com/uber/cherami-thrift/.generated/go/cherami" + "github.com/uber/cherami-thrift/.generated/go/controller" + "github.com/uber/cherami-thrift/.generated/go/shared" + "github.com/uber/cherami-thrift/.generated/go/store" + "github.com/uber/cherami-server/common" + "github.com/uber/cherami-server/common/metrics" + "github.com/uber/cherami-server/services/inputhost/load" + serverStream "github.com/uber/cherami-server/stream" + + "github.com/uber-common/bark" + "golang.org/x/net/context" +) + +type ( + // inPathCache holds all the extents for this path + inPathCache struct { + sync.RWMutex + destinationPath string + destUUID string + destType shared.DestinationType + currID connectionID + state pathCacheState // state of this pathCache + loadReporterFactory common.LoadReporterDaemonFactory + loadReporter common.LoadReporterDaemon + reconfigureCh chan inReconfigInfo + putMsgCh chan *inPutMessage + connections map[connectionID]*pubConnection + extentCache map[extentUUID]*inExtentCache + inputHost *InputHost + closeCh chan struct{} // this is the channel which is used to unload path cache + notifyConnsCloseCh chan connectionID + notifyExtHostCloseCh chan string + notifyExtHostUnloadCh chan string + logger bark.Logger + lastDisconnectTime time.Time + // m3Client for mertics per host + m3Client metrics.Client + //destM3Client for metrics per destination path + destM3Client metrics.Client + + // destination level load metrics reported + // to the controller periodically. int32 + // should suffice for counts, because these + // metrics get zero'd out every few seconds + dstMetrics *load.DstMetrics + hostMetrics *load.HostMetrics + + // unix nanos when the last time + // dstMetrics were reported to the + // controller + lastDstLoadReportedTime int64 + + // connsWG is used to wait for all the connections (including ext) to go away before stopping the manage routine. + connsWG sync.WaitGroup + } + + pathCacheState int +) + +const ( + pathCacheActive pathCacheState = iota + pathCacheUnloading + pathCacheInactive +) + +const ( + // metaPollTimeout is the interval to poll metadata + metaPollTimeout = 1 * time.Minute + // unloadTicker is the interval to unload the pathCache + unloadTickerTimeout = 10 * time.Minute + // idleTimeout is the idle time after the last client got disconnected + idleTimeout = 15 * time.Minute +) + +// isActive is called with the pathCache lock held +// returns true if the pathCache is still active +func (pathCache *inPathCache) isActive() bool { + return pathCache.state == pathCacheActive +} + +func (pathCache *inPathCache) startEventLoop() { + go pathCache.eventLoop() +} + +// isActiveNoLock is called without the lock held +// checks and returns true if the pathCache is indeed active +// after acquiring the lock. +func (pathCache *inPathCache) isActiveNoLock() bool { + var active bool + pathCache.RLock() + active = pathCache.isActive() + pathCache.RUnlock() + + return active +} + +// updateLastDisconnectTime is used to update the last disconnect time for +// this path +func (pathCache *inPathCache) updateLastDisconnectTime() { + pathCache.Lock() + defer pathCache.Unlock() + pathCache.lastDisconnectTime = time.Now() +} + +func (pathCache *inPathCache) isIdleTimedOut() bool { + ans := false + if pathCache.isActive() && len(pathCache.connections) <= 0 && + time.Since(pathCache.lastDisconnectTime) > idleTimeout { + ans = true + } + return ans +} + +// must be called while holding the pathCache.Lock +func (pathCache *inPathCache) changeState(newState pathCacheState) { + pathCache.state = newState +} + +// eventLoop is the main worker loop for pathCache +func (pathCache *inPathCache) eventLoop() { + + h := pathCache.inputHost + + defer h.shutdownWG.Done() + + refreshTicker := time.NewTicker(metaPollTimeout) // start ticker to refresh metadata + defer refreshTicker.Stop() + + unloadTicker := time.NewTicker(unloadTickerTimeout) // start ticker to unload pathCache + defer unloadTicker.Stop() + + for { + select { + case conn := <-pathCache.notifyConnsCloseCh: + pathCache.pubConnectionClosed(conn) + case extUUID := <-pathCache.notifyExtHostCloseCh: + pathCache.extCacheClosed(extUUID) + case extUUID := <-pathCache.notifyExtHostUnloadCh: + pathCache.extCacheUnloaded(extUUID) + case reconfigInfo := <-pathCache.reconfigureCh: + // we need to reload the cache, if the notification type is either + // HOST or ALL + reconfigType := reconfigInfo.req.GetType() + pathCache.RLock() + extentCacheSize := len(pathCache.extentCache) + pathCache.RUnlock() + + pathCache.logger.WithFields(bark.Fields{ + common.TagReconfigureID: common.FmtReconfigureID(reconfigInfo.updateUUID), + common.TagReconfigureType: common.FmtReconfigureType(reconfigType), + common.TagExtentCacheSize: extentCacheSize, + }).Debugf("reconfiguring inputhost") + switch reconfigType { + case admin.NotificationType_CLIENT: + pathCache.reconfigureClients(reconfigInfo.updateUUID) + case admin.NotificationType_HOST: + h.updatePathCache(reconfigInfo.req, pathCache.destinationPath) + case admin.NotificationType_ALL: + h.updatePathCache(reconfigInfo.req, pathCache.destinationPath) + pathCache.reconfigureClients(reconfigInfo.updateUUID) + default: + pathCache.logger. + WithField(common.TagReconfigureID, common.FmtReconfigureID(reconfigInfo.updateUUID)). + WithField(common.TagReconfigureType, common.FmtReconfigureType(reconfigType)). + Error("Invalid reconfigure type") + } + pathCache.logger. + WithField(common.TagReconfigureID, common.FmtReconfigureID(reconfigInfo.updateUUID)). + WithField(common.TagReconfigureType, common.FmtReconfigureType(reconfigType)). + Debug("finished reconfiguration of inputhost") + case <-refreshTicker.C: + pathCache.logger.Debug("refreshing all extents") + h.getExtentsAndLoadPathCache(nil, "", pathCache.destUUID, shared.DestinationType_UNKNOWN) + case <-unloadTicker.C: + unload := false + pathCache.RLock() + unload = pathCache.isIdleTimedOut() + pathCache.RUnlock() + if unload { + pathCache.Lock() + if pathCache.isIdleTimedOut() { + pathCache.prepareForUnload() + go pathCache.unload() + } + pathCache.Unlock() + } + case <-pathCache.closeCh: + return + case <-h.shutdown: + return + } + } +} + +// unload unloads the path cache by shutting +// down all the connections / extents and +// go-routines. Must be called after calling +// pathCache.prepareForUnload(). No locks +// must be held. +func (pathCache *inPathCache) unload() { + // first remove the path from cache + pathCache.inputHost.removeFromCaches(pathCache) + // close all connections + pathCache.Lock() + if pathCache.state != pathCacheUnloading { + pathCache.Unlock() + return + } + + for _, conn := range pathCache.connections { + go conn.close() + } + for _, extCache := range pathCache.extentCache { + // call shutdown of the extCache to unload without the timeout + go extCache.connection.shutdown() + } + pathCache.loadReporter.Stop() + pathCache.state = pathCacheInactive + pathCache.Unlock() + + // wait for all the above to go away + pathCache.connsWG.Wait() + + // now close the closeChannel which will stop the manage routine + close(pathCache.closeCh) + // since we already stopped the load reporter above and + // we close the connections asynchronously, + // make sure the number of connections is explicitly marked as 0 + pathCache.destM3Client.UpdateGauge(metrics.PubConnectionScope, metrics.InputhostDestPubConnection, 0) + pathCache.logger.Info("pathCache successfully unloaded") +} + +// prepareForUnload prepares the pathCache for unload. +// This func must be called with the following lock held: +// pathCache.Lock() +// +// Sets the state to Unloading to prevent races from load +func (pathCache *inPathCache) prepareForUnload() { + if !pathCache.isActive() { + pathCache.logger.Info("pathCache is already not active") + return + } + pathCache.changeState(pathCacheUnloading) +} + +// patch cache is closed +func (pathCache *inPathCache) pubConnectionClosed(connID connectionID) { + pathCache.Lock() + if _, ok := pathCache.connections[connID]; ok { + pathCache.logger.WithField(`conn`, connID).Info(`updating path cache to remove the connection with ID`) + delete(pathCache.connections, connID) + } + if len(pathCache.connections) <= 0 { + pathCache.lastDisconnectTime = time.Now() + } + pathCache.Unlock() +} + +// extCacheClosed is the routine that cleans up +// the state associated with this extent on this +// pathCache after it is closed. +func (pathCache *inPathCache) extCacheClosed(extUUID string) { + + active := true + + pathCache.Lock() + if _, ok := pathCache.extentCache[extentUUID(extUUID)]; ok { + pathCache.hostMetrics.Decrement(load.HostMetricNumOpenExtents) + pathCache.dstMetrics.Decrement(load.DstMetricNumOpenExtents) + pathCache.logger. + WithField(common.TagExt, common.FmtExt(string(extUUID))). + Info("updating path cache to decrement the active extents, since extent is closed") + } + active = pathCache.isActive() + pathCache.Unlock() + + if !active { + return + } + + // initiate unloading of pathCache if all extents + // for this cache are gone + pathCache.Lock() + if pathCache.isActive() && pathCache.dstMetrics.Get(load.DstMetricNumOpenExtents) <= 0 { + pathCache.prepareForUnload() + go pathCache.unload() + } + pathCache.Unlock() +} + +// extCacheUnloaded is the routine that is called to completely +// remove the extent from this pathCache +func (pathCache *inPathCache) extCacheUnloaded(extUUID string) { + pathCache.Lock() + if _, ok := pathCache.extentCache[extentUUID(extUUID)]; ok { + pathCache.logger. + WithField(common.TagExt, common.FmtExt(string(extUUID))). + Info("updating path cache to unload extent") + delete(pathCache.extentCache, extentUUID(extUUID)) + } + pathCache.Unlock() +} + +func (pathCache *inPathCache) reconfigureClients(updateUUID string) { + + var notified, dropped int + + pathCache.RLock() + for _, conn := range pathCache.connections { + select { + case conn.reconfigureClientCh <- updateUUID: + notified++ + default: + dropped++ + } + } + pathCache.RUnlock() + + pathCache.logger.WithFields(bark.Fields{ + common.TagUpdateUUID: updateUUID, + `notified`: notified, + `dropped`: dropped, + }).Info(`reconfigureClients: notified clients`) +} + +func (pathCache *inPathCache) checkAndLoadReplicaStreams(conn *extHost, extUUID extentUUID, replicas []string /*storehostPort*/) (err error) { + + h := pathCache.inputHost + + conn.lk.Lock() + defer conn.lk.Unlock() + var call serverStream.BStoreOpenAppendStreamOutCall + var cancel context.CancelFunc + var ok bool + for i := 0; i < len(replicas); i++ { + if _, ok = conn.streams[storeHostPort(replicas[i])]; !ok { + + cDestType, _ := common.CheramiDestinationType(conn.destType) + + req := &store.OpenAppendStreamRequest{ + DestinationUUID: common.StringPtr(string(conn.destUUID)), + DestinationType: cherami.DestinationTypePtr(cDestType), + ExtentUUID: common.StringPtr(string(extUUID)), + } + reqHeaders := common.GetOpenAppendStreamRequestHeaders(req) + + host, _, _ := net.SplitHostPort(replicas[i]) + port := os.Getenv("CHERAMI_STOREHOST_WS_PORT") + if len(port) == 0 { + port = "6191" + } else if port == "test" { + // XXX: this is a hack to get the wsPort specific to this hostport. + // this is needed specifically for benchmark tests and other tests which + // try to start multiple replicas on the same local machine. + // this is a temporary workaround until we have ringpop labels + // if we have the label feature we can set the websocket port corresponding + // to a replica as a metadata rather than the env variables + envVar := common.GetEnvVariableFromHostPort(replicas[i]) + port = os.Getenv(envVar) + } + + httpHeaders := http.Header{} + for k, v := range reqHeaders { + httpHeaders.Add(k, v) + } + + hostPort := net.JoinHostPort(host, port) + conn.logger.WithField(`replica`, hostPort).Info(`inputhost: Using websocket to connect to store replica`) + call, err = h.GetWSConnector().OpenAppendStream(hostPort, httpHeaders) + if err != nil { + conn.logger.WithFields(bark.Fields{`replicas[i]`: hostPort, common.TagErr: err}).Error(`inputhost: Websocket dial store replica: failed`) + return + } + cancel = nil + + repl := newReplicaConnection(call, cancel, + conn.logger. + WithField(common.TagInReplicaHost, common.FmtInReplicaHost(replicas[i]))) + conn.setReplicaInfo(storeHostPort(replicas[i]), repl) + repl.open() + } + } + return +} + +func (pathCache *inPathCache) checkAndLoadExtent(destUUID string, extUUID extentUUID, replicas []string) (err error) { + pathCache.Lock() + defer pathCache.Unlock() + + if !pathCache.isActive() { + return errPathCacheUnloading + } + + if extCache, exists := pathCache.extentCache[extUUID]; !exists { + extCache = &inExtentCache{ + extUUID: extUUID, + connection: newExtConnection( + destUUID, + pathCache, + string(extUUID), + len(replicas), + pathCache.loadReporterFactory, + pathCache.logger.WithField(common.TagExt, common.FmtExt(string(extUUID))), + pathCache.inputHost.GetClientFactory(), + &pathCache.connsWG, + pathCache.inputHost.IsLimitsEnabled()), + } + + err = pathCache.checkAndLoadReplicaStreams(extCache.connection, extUUID, replicas) + if err != nil { + // error loading replica stream + extCache.connection.logger.Error("error loading replica streams for extent") + return err + } + + pathCache.extentCache[extUUID] = extCache + // all open connections should be closed before shutdown + pathCache.connsWG.Add(1) + extCache.connection.open() + + // make sure the number of loaded extents is incremented + pathCache.dstMetrics.Increment(load.DstMetricNumOpenExtents) + pathCache.hostMetrics.Increment(load.HostMetricNumOpenExtents) + } + return +} + +// Report is used for reporting Destination specific load to controller +func (pathCache *inPathCache) Report(reporter common.LoadReporter) { + + now := time.Now().UnixNano() + diffSecs := (now - pathCache.lastDstLoadReportedTime) / int64(time.Second) + if diffSecs < 1 { + return + } + + numConnections := pathCache.dstMetrics.Get(load.DstMetricNumOpenConns) + numExtents := pathCache.dstMetrics.Get(load.DstMetricNumOpenExtents) + numMsgsInPerSec := pathCache.dstMetrics.GetAndReset(load.DstMetricMsgsIn) / diffSecs + + metric := controller.DestinationMetrics{ + NumberOfConnections: common.Int64Ptr(numConnections), + NumberOfActiveExtents: common.Int64Ptr(numExtents), + IncomingMessagesCounter: common.Int64Ptr(numMsgsInPerSec), + } + + pathCache.lastDstLoadReportedTime = now + reporter.ReportDestinationMetric(pathCache.destUUID, metric) + // Also update the metrics reporter to make sure the connection gauge is updated + pathCache.destM3Client.UpdateGauge(metrics.PubConnectionScope, metrics.InputhostDestPubConnection, numConnections) +} diff --git a/services/inputhost/pubconnection.go b/services/inputhost/pubconnection.go index 4d981557..8bbec34b 100644 --- a/services/inputhost/pubconnection.go +++ b/services/inputhost/pubconnection.go @@ -28,10 +28,8 @@ import ( "github.com/uber-common/bark" "github.com/uber/cherami-thrift/.generated/go/cherami" - "github.com/uber/cherami-thrift/.generated/go/shared" "github.com/uber/cherami-server/common" "github.com/uber/cherami-server/common/metrics" - "github.com/uber/cherami-server/services/inputhost/load" serverStream "github.com/uber/cherami-server/stream" ) @@ -39,44 +37,6 @@ import ( const timeLatencyToLog = 70 * time.Second type ( - // inPathCache holds all the extents for this path - inPathCache struct { - destinationPath string - extMutex sync.RWMutex - destUUID string - destType shared.DestinationType - currID connectionID - unloadInProgress bool // flag to indicate if an unload is already in progress - loadReporterFactory common.LoadReporterDaemonFactory - loadReporter common.LoadReporterDaemon - reconfigureCh chan inReconfigInfo - putMsgCh chan *inPutMessage - notifyExtHostCloseCh chan string - notifyExtHostUnloadCh chan string - notifyCloseCh chan connectionID - connections map[connectionID]*pubConnection - extentCache map[extentUUID]*inExtentCache - closeCh chan struct{} // this is the channel which is used to unload path cache - logger bark.Logger - lastDisconnectTime time.Time - // m3Client for mertics per host - m3Client metrics.Client - //destM3Client for metrics per destination path - destM3Client metrics.Client - - // destination level load metrics reported - // to the controller periodically. int32 - // should suffice for counts, because these - // metrics get zero'd out every few seconds - dstMetrics *load.DstMetrics - hostMetrics *load.HostMetrics - - // unix nanos when the last time - // dstMetrics were reported to the - // controller - lastDstLoadReportedTime int64 - } - pubConnection struct { connID connectionID destinationPath string @@ -89,7 +49,7 @@ type ( replyCh chan response closeChannel chan struct{} // this is the channel which is used to actually close the stream waitWG sync.WaitGroup - notifyCloseCh chan connectionID // this channel is used to notify the path cache to remove us from its list + notifyCloseCh chan connectionID // this is used to notify the path cache to remove us from its list doneCh chan bool // this is used to unblock the OpenPublisherStream() recvMsgs int64 // total msgs received @@ -105,6 +65,7 @@ type ( closed bool limitsEnabled bool pathCache *inPathCache + pathWG *sync.WaitGroup } response struct { @@ -122,6 +83,8 @@ type ( putMsgAckCh chan *cherami.PutMessageAck putMsgRecvTime time.Time } + + pubConnectionClosedCb func(connectionID) ) // failTimeout is the timeout to wait for acks from the store when a @@ -141,28 +104,27 @@ const ( // perConnMsgsLimitPerSecond is the rate limit per connection const perConnMsgsLimitPerSecond = 10000 -func newPubConnection(destinationPath string, stream serverStream.BInOpenPublisherStreamInCall, msgCh chan *inPutMessage, - timeout time.Duration, notifyCloseCh chan connectionID, id connectionID, doneCh chan bool, - m3Client metrics.Client, logger bark.Logger, limitsEnabled bool, pathCache *inPathCache) *pubConnection { +func newPubConnection(destinationPath string, stream serverStream.BInOpenPublisherStreamInCall, pathCache *inPathCache, m3Client metrics.Client, limitsEnabled bool, timeout time.Duration, doneCh chan bool) *pubConnection { conn := &pubConnection{ - connID: id, + connID: pathCache.currID, destinationPath: destinationPath, - logger: logger.WithFields(bark.Fields{ - common.TagInPubConnID: common.FmtInPubConnID(int(id)), + logger: pathCache.logger.WithFields(bark.Fields{ + common.TagInPubConnID: common.FmtInPubConnID(int(pathCache.currID)), common.TagModule: `pubConn`, }), stream: stream, - putMsgCh: msgCh, + putMsgCh: pathCache.putMsgCh, cacheTimeout: timeout, //perConnTokenBucket: common.NewTokenBucket(perConnMsgsLimitPerSecond, common.NewRealTimeSource()), replyCh: make(chan response, defaultBufferSize), reconfigureClientCh: make(chan string, reconfigClientChSize), ackChannel: make(chan *cherami.PutMessageAck, defaultBufferSize), closeChannel: make(chan struct{}), - notifyCloseCh: notifyCloseCh, + notifyCloseCh: pathCache.notifyConnsCloseCh, doneCh: doneCh, limitsEnabled: limitsEnabled, pathCache: pathCache, + pathWG: &pathCache.connsWG, } conn.SetMsgsLimitPerSecond(common.HostPerConnMsgsLimitPerSecond) return conn @@ -174,6 +136,7 @@ func (conn *pubConnection) open() { if !conn.opened { conn.waitWG.Add(2) + conn.pathWG.Add(1) // this makes the manage routine in the pathCache is alive go conn.readRequestStream() go conn.writeAcksStream() @@ -184,25 +147,34 @@ func (conn *pubConnection) open() { } func (conn *pubConnection) close() { + conn.lk.Lock() - if !conn.closed { - close(conn.closeChannel) - conn.closed = true - conn.waitWG.Wait() - // we have successfully closed the connection - // make sure we update the ones who are waiting for us - select { - case conn.doneCh <- true: - default: - } + if conn.closed { + conn.lk.Unlock() + return + } - // now notify the pathCache to update its connections map - select { - case conn.notifyCloseCh <- conn.connID: - default: - } + close(conn.closeChannel) + conn.closed = true + conn.waitWG.Wait() + + // we have successfully closed the connection + // make sure we update the ones who are waiting for us + select { + case conn.doneCh <- true: + default: } + conn.lk.Unlock() + + // notify the patch cache to remove this conn + // from the cache. No need to hold the lock + // for this. + conn.notifyCloseCh <- conn.connID + + // set the wait group for the pathCache to be done + conn.pathWG.Done() + conn.logger.WithFields(bark.Fields{ `sentAcks`: conn.sentAcks, `sentNacks`: conn.sentNacks, diff --git a/services/outputhost/cgcache.go b/services/outputhost/cgcache.go index 43871ff6..b7144fc6 100644 --- a/services/outputhost/cgcache.go +++ b/services/outputhost/cgcache.go @@ -146,9 +146,6 @@ type ( // lastDisconnectTime is the time the last consumer got disconnected lastDisconnectTime time.Time - // useWebsocket is the flag of whether to use websocket to connect to store - useWebsocket bool - // sessionID is the 16 bit session identifier for this host sessionID uint16 @@ -232,7 +229,6 @@ func newConsumerGroupCache(destPath string, cgDesc shared.ConsumerGroupDescripti creditNotifyCh: make(chan int32, 50), creditRequestCh: make(chan string, 50), lastDisconnectTime: time.Now(), - useWebsocket: (h.GetUseWebsocket() > 0), sessionID: h.sessionID, ackIDGen: h.ackMgrIDGen, ackMgrLoadCh: h.ackMgrLoadCh, @@ -299,7 +295,6 @@ func (cgCache *consumerGroupCache) loadExtentCache(tClients common.ClientFactory creditNotifyCh: cgCache.creditNotifyCh, creditRequestCh: cgCache.creditRequestCh, initialCredits: defaultNumOutstandingMsgs, - useWebsocket: cgCache.useWebsocket, loadMetrics: load.NewExtentMetrics(), } diff --git a/services/outputhost/dynamicConfig.go b/services/outputhost/dynamicConfig.go index 754e1e9f..0ff09d46 100644 --- a/services/outputhost/dynamicConfig.go +++ b/services/outputhost/dynamicConfig.go @@ -20,20 +20,14 @@ package outputhost -import ( - dconfig "github.com/uber/cherami-server/common/dconfigclient" - log "github.com/Sirupsen/logrus" -) - -const ( - // UkeyUseWebsocket is the uconfig key for UseWebsocket - UkeyUseWebsocket = "outputhost.UseWebsocket" -) +import dconfig "github.com/uber/cherami-server/common/dconfigclient" func (h *OutputHost) registerInt() { // Add handler function for the dynamic config value handlerMap := make(map[string]dconfig.Handler) - handlerMap[UkeyUseWebsocket] = dconfig.GenerateIntHandler(UkeyUseWebsocket, h.SetUseWebsocket, h.GetUseWebsocket) + + // nothing to be dynamically configured right now + h.dClient.AddHandlers(handlerMap) // Add verify function for the dynamic config value verifierMap := make(map[string]dconfig.Verifier) @@ -42,15 +36,7 @@ func (h *OutputHost) registerInt() { // LoadUconfig load the dynamic config values for key func (h *OutputHost) LoadUconfig() { - // UseWebsocket - valueUcfg, ok := h.dClient.GetOrDefault(UkeyUseWebsocket, 0).(int) - if ok { - h.SetUseWebsocket(int32(valueUcfg)) - log.WithField(UkeyUseWebsocket, valueUcfg). - Info("Update the uconfig value") - } else { - log.Errorf("Cannot get %s from uconfig, Using right format", UkeyUseWebsocket) - } + // nothing to be loaded dynamically right now } // uconfigManage do the work for uconfig diff --git a/services/outputhost/extcache.go b/services/outputhost/extcache.go index 8ae3dc48..79d85d7e 100644 --- a/services/outputhost/extcache.go +++ b/services/outputhost/extcache.go @@ -124,9 +124,6 @@ type extentCache struct { // initialCredits is the credits that should be given to the stores at the start initialCredits int32 - // useWebsocket is the flag of whether to use websocket to connect to store - useWebsocket bool - // loadReporter to report metrics to controller loadReporter common.LoadReporterDaemon diff --git a/services/outputhost/outputhost.go b/services/outputhost/outputhost.go index 85ad83b4..5f17f4b2 100644 --- a/services/outputhost/outputhost.go +++ b/services/outputhost/outputhost.go @@ -41,6 +41,7 @@ import ( "github.com/uber/cherami-thrift/.generated/go/shared" "github.com/uber/cherami-server/common" dconfig "github.com/uber/cherami-server/common/dconfigclient" + mm "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" "github.com/uber/cherami-server/services/outputhost/load" "github.com/uber/cherami-server/stream" @@ -52,9 +53,11 @@ const ( defaultPrefetchBufferSize = 10000 // XXX: find the optimal prefetch buffer size defaultUnloadChSize = 50 defaultAckMgrMapChSize = 500 - defaultAckMgrIDStartFrom = 0 // the default ack mgr id for this host to start from - defaultMaxConnLimitPerCg = 1000 // default max connections per CG per host - hostLoadReportingInterval = 2 * time.Second // interval at which output host load is reported to controller + defaultAckMgrIDStartFrom = 0 // the default ack mgr id for this host to start from + defaultMaxConnLimitPerCg = 1000 // default max connections per CG per host + hostLoadReportingInterval = 2 * time.Second // interval at which output host load is reported to controller + msgCacheWriteTimeout = 10 * time.Minute // timeout to write to message cache. Intentionally set to a large number + // because if we don't write to cache, client won't be able to ack the message ) var thisOutputHost *OutputHost @@ -77,7 +80,6 @@ type ( m3Client metrics.Client dClient dconfig.Client numConsConn int32 // number of active pubConnection - useWebsocket int32 // flag of whether to use websocket to connect to store, under uConfig control ackMgrMap map[uint32]*ackManager // map of all the ack managers on this output ackMgrMutex sync.RWMutex // mutex protecting the above map sessionID uint16 @@ -450,36 +452,44 @@ func (h *OutputHost) ReceiveMessageBatch(ctx thrift.Context, request *cherami.Re // load the CG and all the extents for this CG lclLg = lclLg.WithField(common.TagCnsm, cgDesc.GetConsumerGroupUUID()) cgCache, err := h.createAndLoadCGCache(ctx, *cgDesc, path, lclLg) + // putback the load ref which we got in the createAndLoadCGCache + defer atomic.AddInt32(&h.loadShutdownRef, -1) if err != nil { lclLg.WithField(common.TagErr, err).Error(`unable to load consumer group with error`) - // putback the load ref which we got in the createAndLoadCGCache - atomic.AddInt32(&h.loadShutdownRef, -1) h.m3Client.IncCounter(metrics.ReceiveMessageBatchOutputHostScope, metrics.OutputhostFailures) return nil, err } cgCache.updateLastDisconnectTime() res := cherami.NewReceiveMessageBatchResult_() + msgCacheWriteTicker := common.NewTimer(msgCacheWriteTimeout) + defer msgCacheWriteTicker.Stop() deliver := func(msg *cherami.ConsumerMessage, msgCacheCh chan cacheMsg) { // send msg back to caller res.Messages = append(res.Messages, msg) - // and send msg to cache waiting ack. (better handle timeout?) - t := time.NewTimer(timeoutTime.Sub(time.Now())) - defer t.Stop() + // long poll consumers don't have a connectionID and we don't have anything to throttle here // using -1 as a special connection ID here, // XXX: move this to a const + + // Note: we use a very large time out here. Because if we won't write the message to the cache, client + // won't be able to ack the message. This means client might already timeout while we're still waiting for + // writing to message cache but this is the best we can do. + msgCacheWriteTicker.Reset(msgCacheWriteTimeout) select { case msgCacheCh <- cacheMsg{msg: msg, connID: -1}: - break - case <-t.C: + case <-cgCache.closeChannel: + lclLg.WithField(common.TagAckID, common.FmtAckID(msg.GetAckId())). + Error("outputhost: Unable to write the message to the cache because cg cache closing") + case <-msgCacheWriteTicker.C: lclLg.WithField(common.TagAckID, common.FmtAckID(msg.GetAckId())). - Error("outputhost: Unable to write the message to the cache") + Error("outputhost: Unable to write the message to the cache because time out") + h.m3Client.IncCounter(metrics.ReceiveMessageBatchOutputHostScope, metrics.OutputhostReceiveMsgBatchWriteToMsgCacheTimeout) } } // just timeout for long pull if no message available - firstResultTimer := time.NewTimer(timeoutTime.Sub(time.Now())) + firstResultTimer := common.NewTimer(timeoutTime.Sub(time.Now())) defer firstResultTimer.Stop() select { case msg := <-cgCache.msgsRedeliveryCh: @@ -494,24 +504,27 @@ func (h *OutputHost) ReceiveMessageBatch(ctx thrift.Context, request *cherami.Re } // once there's any message, try get back to caller with as much as possbile messages before timeout - timeoutTime = timeoutTime.Add(-3 * time.Second) - moreResultTimer := time.NewTimer(timeoutTime.Sub(time.Now())) + moreResultTimer := common.NewTimer(timeoutTime.Sub(time.Now())) defer moreResultTimer.Stop() MORERESULTLOOP: - for remaining := count - 1; remaining > 0; remaining-- { + for remaining := count - 1; remaining > 0; { select { - case msg := <-cgCache.msgsRedeliveryCh: - deliver(msg, cgCache.msgCacheRedeliveredCh) - case msg := <-cgCache.msgsCh: - deliver(msg, cgCache.msgCacheCh) case <-moreResultTimer.C: break MORERESULTLOOP + default: + select { + case msg := <-cgCache.msgsRedeliveryCh: + deliver(msg, cgCache.msgCacheRedeliveredCh) + remaining-- + case msg := <-cgCache.msgsCh: + deliver(msg, cgCache.msgCacheCh) + remaining-- + default: + break + } } } - // putback the load ref which we got in the createAndLoadCGCache - atomic.AddInt32(&h.loadShutdownRef, -1) - // Emit M3 metrics for per host and per consumer group h.m3Client.AddCounter(metrics.ReceiveMessageBatchOutputHostScope, metrics.OutputhostMessageSent, int64(len(res.Messages))) @@ -595,16 +608,6 @@ func (h *OutputHost) ConsumerGroupsUpdated(ctx thrift.Context, request *admin.Co return err } -// SetUseWebsocket gets the flag of whether to use websocket to connect to store -func (h *OutputHost) SetUseWebsocket(useWebsocket int32) { - atomic.StoreInt32(&h.useWebsocket, useWebsocket) -} - -// GetUseWebsocket gets the flag of whether to use websocket to connect to store -func (h *OutputHost) GetUseWebsocket() int { - return 1 -} - // unloadAll tries to unload everything on this OutputHost func (h *OutputHost) unloadAll() { h.cgMutex.Lock() @@ -721,7 +724,6 @@ func NewOutputHost(serviceName string, sVice common.SCommon, metadataClient meta bs := OutputHost{ logger: (sVice.GetConfig().GetLogger()).WithFields(bark.Fields{common.TagOut: common.FmtOut(sVice.GetHostUUID()), common.TagDplName: common.FmtDplName(deploymentName)}), SCommon: sVice, - metaClient: metadataClient, frontendClient: frontendClient, cgCache: make(map[string]*consumerGroupCache), cacheTimeout: defaultIdleTimeout, @@ -741,6 +743,8 @@ func NewOutputHost(serviceName string, sVice common.SCommon, metadataClient meta bs.cacheTimeout = opts.CacheIdleTimeout } + bs.metaClient = mm.NewMetadataMetricsMgr(metadataClient, bs.m3Client, bs.logger) + // manage uconfig, regiester handerFunc and verifyFunc for uConfig values bs.dClient = sVice.GetDConfigClient() bs.dynamicConfigManage() diff --git a/services/outputhost/outputhost_test.go b/services/outputhost/outputhost_test.go index b6fa2f52..aee2eb19 100644 --- a/services/outputhost/outputhost_test.go +++ b/services/outputhost/outputhost_test.go @@ -541,6 +541,125 @@ func (s *OutputHostSuite) TestOutputHostReceiveMessageBatch() { outputHost.Shutdown() } +// TestOutputHostReceiveMessageBatch_NoMsg tests the no message available scenario +func (s *OutputHostSuite) TestOutputHostReceiveMessageBatch_NoMsg() { + var count int32 + count = 10 + + outputHost, _ := NewOutputHost("outputhost-test", s.mockService, s.mockMeta, nil, nil) + ctx, _ := utilGetThriftContext() + + destUUID := uuid.New() + destDesc := shared.NewDestinationDescription() + destDesc.Path = common.StringPtr("/foo/bar") + destDesc.DestinationUUID = common.StringPtr(destUUID) + destDesc.Status = common.InternalDestinationStatusPtr(shared.DestinationStatus_ENABLED) + s.mockMeta.On("ReadDestination", mock.Anything, mock.Anything).Return(destDesc, nil) + + cgDesc := shared.NewConsumerGroupDescription() + cgDesc.ConsumerGroupUUID = common.StringPtr(uuid.New()) + cgDesc.DestinationUUID = common.StringPtr(destUUID) + s.mockMeta.On("ReadConsumerGroup", mock.Anything, mock.Anything).Return(cgDesc, nil).Twice() + + cgExt := metadata.NewConsumerGroupExtent() + cgExt.ExtentUUID = common.StringPtr(uuid.New()) + cgExt.StoreUUIDs = []string{"mock"} + + cgRes := &metadata.ReadConsumerGroupExtentsResult_{} + cgRes.Extents = append(cgRes.Extents, cgExt) + s.mockMeta.On("ReadConsumerGroupExtents", mock.Anything, mock.Anything).Return(cgRes, nil).Once() + s.mockRead.On("Write", mock.Anything).Return(nil) + + // read will delay for 2 seconds + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + s.mockRead.On("Read").Return(nil, io.EOF).WaitUntil(ticker.C) + + receiveMessageRequest := &cherami.ReceiveMessageBatchRequest{ + DestinationPath: common.StringPtr("foo"), + ConsumerGroupName: common.StringPtr("testcons"), + MaxNumberOfMessages: common.Int32Ptr(count), + ReceiveTimeout: common.Int32Ptr(1), + } + + receivedMessages, err := outputHost.ReceiveMessageBatch(ctx, receiveMessageRequest) + s.Error(err) + assert.IsType(s.T(), &cherami.TimeoutError{}, err) + s.Nil(receivedMessages) + + outputHost.Shutdown() +} + +// TestOutputHostReceiveMessageBatch_NoMsg tests the some message available scenario +func (s *OutputHostSuite) TestOutputHostReceiveMessageBatch_SomeMsgAvailable() { + var count int32 + count = 10 + + outputHost, _ := NewOutputHost("outputhost-test", s.mockService, s.mockMeta, nil, nil) + ctx, _ := utilGetThriftContext() + + destUUID := uuid.New() + destDesc := shared.NewDestinationDescription() + destDesc.Path = common.StringPtr("/foo/bar") + destDesc.DestinationUUID = common.StringPtr(destUUID) + destDesc.Status = common.InternalDestinationStatusPtr(shared.DestinationStatus_ENABLED) + s.mockMeta.On("ReadDestination", mock.Anything, mock.Anything).Return(destDesc, nil) + + cgDesc := shared.NewConsumerGroupDescription() + cgDesc.ConsumerGroupUUID = common.StringPtr(uuid.New()) + cgDesc.DestinationUUID = common.StringPtr(destUUID) + s.mockMeta.On("ReadConsumerGroup", mock.Anything, mock.Anything).Return(cgDesc, nil).Twice() + + cgExt := metadata.NewConsumerGroupExtent() + cgExt.ExtentUUID = common.StringPtr(uuid.New()) + cgExt.StoreUUIDs = []string{"mock"} + + cgRes := &metadata.ReadConsumerGroupExtentsResult_{} + cgRes.Extents = append(cgRes.Extents, cgExt) + s.mockMeta.On("ReadConsumerGroupExtents", mock.Anything, mock.Anything).Return(cgRes, nil).Once() + s.mockRead.On("Write", mock.Anything).Return(nil) + + // setup the mock so that we can read 10 messages + for i := 0; i < int(count); i++ { + aMsg := store.NewAppendMessage() + aMsg.SequenceNumber = common.Int64Ptr(int64(i)) + pMsg := cherami.NewPutMessage() + pMsg.ID = common.StringPtr(strconv.Itoa(i)) + pMsg.Data = []byte(fmt.Sprintf("hello-%d", i)) + + aMsg.Payload = pMsg + rMsg := store.NewReadMessage() + rMsg.Message = aMsg + + rmc := store.NewReadMessageContent() + rmc.Type = store.ReadMessageContentTypePtr(store.ReadMessageContentType_MESSAGE) + rmc.Message = rMsg + + s.mockRead.On("Read").Return(rmc, nil).Once() + } + + // next read will delay for 2 seconds + ticker := time.NewTicker(2 * time.Second) + defer ticker.Stop() + s.mockRead.On("Read").Return(nil, io.EOF).WaitUntil(ticker.C) + + receiveMessageRequest := &cherami.ReceiveMessageBatchRequest{ + DestinationPath: common.StringPtr("foo"), + ConsumerGroupName: common.StringPtr("testcons"), + MaxNumberOfMessages: common.Int32Ptr(count + 1), + ReceiveTimeout: common.Int32Ptr(1), + } + + receivedMessages, err := outputHost.ReceiveMessageBatch(ctx, receiveMessageRequest) + s.NoError(err) + s.Len(receivedMessages.GetMessages(), int(count)) + for i := 0; i < int(count); i++ { + s.Equal(strconv.Itoa(i), receivedMessages.GetMessages()[i].GetPayload().GetID()) + } + + outputHost.Shutdown() +} + func (s *OutputHostSuite) TestOutputCgUnload() { outputHost, _ := NewOutputHost("outputhost-test", s.mockService, s.mockMeta, nil, nil) httpRequest := utilGetHTTPRequestWithPath("foo") diff --git a/services/replicator/metadataReconciler.go b/services/replicator/metadataReconciler.go index e38d9f56..63258ae3 100644 --- a/services/replicator/metadataReconciler.go +++ b/services/replicator/metadataReconciler.go @@ -161,7 +161,10 @@ func (r *metadataReconciler) reconcileDest(localDests []*shared.DestinationDescr defer cancel() err := r.replicator.DeleteDestination(ctx, deleteRequest) if err != nil { - r.logger.WithField(common.TagDst, common.FmtDst(remoteDest.GetDestinationUUID())).Error(`Failed to delete destination in local zone for reconciliation`) + r.logger.WithFields(bark.Fields{ + common.TagErr: err, + common.TagDst: common.FmtDst(remoteDest.GetDestinationUUID()), + }).Error(`Failed to delete destination in local zone for reconciliation`) continue } } else { @@ -201,7 +204,10 @@ func (r *metadataReconciler) reconcileDest(localDests []*shared.DestinationDescr defer cancel() _, err := r.replicator.UpdateDestination(ctx, updateRequest) if err != nil { - r.logger.WithField(common.TagDst, common.FmtDst(remoteDest.GetDestinationUUID())).Error(`Failed to update destination in local zone for reconciliation`) + r.logger.WithFields(bark.Fields{ + common.TagErr: err, + common.TagDst: common.FmtDst(remoteDest.GetDestinationUUID()), + }).Error(`Failed to update destination in local zone for reconciliation`) continue } } @@ -234,7 +240,10 @@ func (r *metadataReconciler) reconcileDest(localDests []*shared.DestinationDescr defer cancel() _, err := r.replicator.CreateDestinationUUID(ctx, createRequest) if err != nil { - r.logger.WithField(common.TagDst, common.FmtDst(remoteDest.GetDestinationUUID())).Error(`Failed to create destination in local zone for reconciliation`) + r.logger.WithFields(bark.Fields{ + common.TagErr: err, + common.TagDst: common.FmtDst(remoteDest.GetDestinationUUID()), + }).Error(`Failed to create destination in local zone for reconciliation`) continue } } @@ -254,7 +263,7 @@ func (r *metadataReconciler) getAllMultiZoneDestInLocalZone() ([]*shared.Destina for { res, err := r.mClient.ListDestinationsByUUID(nil, listReq) if err != nil { - r.logger.Error(`Metadata call ListDestinationsByUUID failed`) + r.logger.WithField(common.TagErr, err).Error(`Metadata call ListDestinationsByUUID failed`) return nil, err } @@ -274,7 +283,10 @@ func (r *metadataReconciler) getAllMultiZoneDestInAuthoritativeZone() ([]*shared authoritativeZone := r.replicator.getAuthoritativeZone() remoteReplicator, err := r.replicator.clientFactory.GetReplicatorClient(authoritativeZone) if err != nil { - r.logger.WithField(common.TagZoneName, common.FmtZoneName(authoritativeZone)).Error(`Failed to get remote replicator client`) + r.logger.WithFields(bark.Fields{ + common.TagErr: err, + common.TagZoneName: common.FmtZoneName(authoritativeZone), + }).Error(`Failed to get remote replicator client`) return nil, err } @@ -291,7 +303,7 @@ func (r *metadataReconciler) getAllMultiZoneDestInAuthoritativeZone() ([]*shared defer cancel() res, err := remoteReplicator.ListDestinationsByUUID(ctx, listReq) if err != nil { - r.logger.Error(`Remote replicator call ListDestinationsByUUID failed`) + r.logger.WithField(common.TagErr, err).Error(`Remote replicator call ListDestinationsByUUID failed`) return nil, err } @@ -342,7 +354,10 @@ func (r *metadataReconciler) getAllDestExtentInRemoteZone(zone string, destUUID var err error remoteReplicator, err := r.replicator.clientFactory.GetReplicatorClient(zone) if err != nil { - r.logger.WithField(common.TagZoneName, common.FmtZoneName(zone)).Error(`Failed to get remote replicator client`) + r.logger.WithFields(bark.Fields{ + common.TagErr: err, + common.TagZoneName: common.FmtZoneName(zone), + }).Error(`Failed to get remote replicator client`) return nil, err } @@ -358,7 +373,7 @@ func (r *metadataReconciler) getAllDestExtentInRemoteZone(zone string, destUUID defer cancel() res, err := remoteReplicator.ListExtentsStats(ctx, listReq) if err != nil { - r.logger.Error(`Remote replicator call ListExtentsStats failed`) + r.logger.WithField(common.TagErr, err).Error(`Remote replicator call ListExtentsStats failed`) return nil, err } @@ -386,7 +401,7 @@ func (r *metadataReconciler) getAllDestExtentInCurrentZone(destUUID string) (map for { res, err := r.mClient.ListExtentsStats(nil, listReq) if err != nil { - r.logger.Error(`Metadata call ListExtentsStats failed`) + r.logger.WithField(common.TagErr, err).Error(`Metadata call ListExtentsStats failed`) return nil, err } @@ -428,6 +443,7 @@ func (r *metadataReconciler) reconcileDestExtent(destUUID string, localExtents m _, err := r.replicator.CreateExtent(ctx, createRequest) if err != nil { r.logger.WithFields(bark.Fields{ + common.TagErr: err, common.TagDst: common.FmtDst(destUUID), common.TagExt: common.FmtExt(remoteExtentUUID), common.TagZoneName: common.FmtZoneName(remoteZone), @@ -451,6 +467,7 @@ func (r *metadataReconciler) reconcileDestExtent(destUUID string, localExtents m _, err := r.mClient.UpdateExtentStats(nil, updateRequest) if err != nil { r.logger.WithFields(bark.Fields{ + common.TagErr: err, common.TagDst: common.FmtDst(destUUID), common.TagExt: common.FmtExt(remoteExtentUUID), }).Error(`Failed to update extent status to sealed`) diff --git a/services/replicator/replicator.go b/services/replicator/replicator.go index 1b7f7022..797030e8 100644 --- a/services/replicator/replicator.go +++ b/services/replicator/replicator.go @@ -40,6 +40,7 @@ import ( "github.com/uber/cherami-server/common" "github.com/uber/cherami-server/common/configure" dconfig "github.com/uber/cherami-server/common/dconfigclient" + mm "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" storeStream "github.com/uber/cherami-server/stream" ) @@ -121,11 +122,12 @@ func NewReplicator(serviceName string, sVice common.SCommon, metadataClient meta defaultAuthoritativeZone: config.GetReplicatorConfig().GetDefaultAuthoritativeZone(), tenancy: tenancy, clientFactory: clientFactory, - metaClient: metadataClient, remoteReplicatorConn: make(map[string]*outConnection), storehostConn: make(map[string]*outConnection), } + r.metaClient = mm.NewMetadataMetricsMgr(metadataClient, r.m3Client, r.logger) + r.uconfigClient = sVice.GetDConfigClient() r.dynamicConfigManage() @@ -206,6 +208,7 @@ func (r *Replicator) OpenReplicationReadStreamHandler(w http.ResponseWriter, req outStream, err := r.createStoreHostReadStream(destUUID, extUUID, request) if err != nil { r.logger.WithFields(bark.Fields{ + common.TagErr: err, common.TagExt: common.FmtExt(*request.OpenReadStreamRequest.ExtentUUID), common.TagDst: common.FmtDst(*request.OpenReadStreamRequest.DestinationUUID), }).Error("Can't open store host read stream") @@ -257,6 +260,7 @@ func (r *Replicator) OpenReplicationRemoteReadStreamHandler(w http.ResponseWrite outStream, err := r.createRemoteReplicationReadStream(extUUID, destUUID, request) if err != nil { r.logger.WithFields(bark.Fields{ + common.TagErr: err, common.TagExt: common.FmtExt(*request.OpenReadStreamRequest.ExtentUUID), common.TagDst: common.FmtDst(*request.OpenReadStreamRequest.DestinationUUID), }).Error("Can't open remote replication read stream") @@ -282,8 +286,8 @@ func (r *Replicator) CreateDestinationUUID(ctx thrift.Context, createRequest *sh destDesc, err := r.metaClient.CreateDestinationUUID(ctx, createRequest) if err != nil { r.logger.WithFields(bark.Fields{ - common.TagDst: common.FmtDst(destDesc.GetDestinationUUID()), - common.TagDstPth: common.FmtDstPth(destDesc.GetPath()), + common.TagDst: common.FmtDst(createRequest.GetDestinationUUID()), + common.TagDstPth: common.FmtDstPth(createRequest.GetRequest().GetPath()), common.TagErr: err, }).Error(`Error creating destination`) r.m3Client.IncCounter(metrics.ReplicatorCreateDestUUIDScope, metrics.ReplicatorFailures) diff --git a/services/retentionmgr/metadataDep.go b/services/retentionmgr/metadataDep.go index 3178800b..6edadb91 100644 --- a/services/retentionmgr/metadataDep.go +++ b/services/retentionmgr/metadataDep.go @@ -27,57 +27,77 @@ import ( "github.com/uber/cherami-thrift/.generated/go/shared" "github.com/uber/cherami-thrift/.generated/go/store" "github.com/uber/cherami-server/common" - "github.com/uber/cherami-server/common/metrics" "github.com/uber-common/bark" + "github.com/uber/tchannel-go/thrift" ) const defaultPageSize = 1000 type metadataDepImpl struct { - metadata common.MetadataMgr + metadata metadata.TChanMetadataService logger bark.Logger } -func newMetadataDep(metadata metadata.TChanMetadataService, m3Client metrics.Client, logger bark.Logger) *metadataDepImpl { +func newMetadataDep(metadata metadata.TChanMetadataService, log bark.Logger) *metadataDepImpl { return &metadataDepImpl{ - metadata: common.NewMetadataMgr(metadata, m3Client, logger), - logger: logger, + metadata: metadata, + logger: log, } } // -- the following are various helper routines, that talk to metadata, storehosts, etc -- // func (t *metadataDepImpl) GetDestinations() (destinations []*destinationInfo) { + req := shared.NewListDestinationsByUUIDRequest() + req.Limit = common.Int64Ptr(defaultPageSize) + + ctx, cancel := thrift.NewContext(5 * time.Second) + defer cancel() + log := t.logger - log.Debug("GetDestinations: calling ListDestinations") + i := 0 + for { - list, err := t.metadata.ListDestinations() - if err != nil { - log.WithField(common.TagErr, err).Error(`GetDestinations: ListDestinations failed`) - return - } + log.Debug("GetDestinations: ListDestinationsByUUID on metadata") - for _, destDesc := range list { + resp, err := t.metadata.ListDestinationsByUUID(ctx, req) + if err != nil { + log.WithField(common.TagErr, err).Error(`GetDestinations: ListDestinationsByUUID failed`) + return + } - dest := &destinationInfo{ - id: destinationID(destDesc.GetDestinationUUID()), - status: destDesc.GetStatus(), - softRetention: destDesc.GetConsumedMessagesRetention(), - hardRetention: destDesc.GetUnconsumedMessagesRetention(), - // type: mDestDesc.GetType(), - path: destDesc.GetPath(), + for _, destDesc := range resp.GetDestinations() { + + dest := &destinationInfo{ + id: destinationID(destDesc.GetDestinationUUID()), + status: destDesc.GetStatus(), + softRetention: destDesc.GetConsumedMessagesRetention(), + hardRetention: destDesc.GetUnconsumedMessagesRetention(), + // type: mDestDesc.GetType(), + path: destDesc.GetPath(), + isMultiZone: destDesc.GetIsMultiZone(), + } + + destinations = append(destinations, dest) + i++ + + log.WithFields(bark.Fields{ + common.TagDst: dest.id, + `status`: dest.status, + `hardRetention`: dest.hardRetention, + `softRetention`: dest.softRetention, + }).Debug("GetDestinations: ListDestinationsByUUID output") } - destinations = append(destinations, dest) + if len(resp.GetNextPageToken()) == 0 { + break + } + + req.PageToken = resp.GetNextPageToken() - log.WithFields(bark.Fields{ - common.TagDst: dest.id, - `status`: dest.status, - `hardRetention`: dest.hardRetention, - `softRetention`: dest.softRetention, - }).Debug("GetDestinations: ListDestinations output") + log.Debug("GetDestinations: fetching next page of ListDestinationsByUUID") } log.WithField(`numDestinations`, len(destinations)).Debug("GetDestinations done") @@ -86,43 +106,62 @@ func (t *metadataDepImpl) GetDestinations() (destinations []*destinationInfo) { func (t *metadataDepImpl) GetExtents(destID destinationID) (extents []*extentInfo) { + req := shared.NewListExtentsStatsRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.Limit = common.Int64Ptr(defaultPageSize) + + ctx, cancel := thrift.NewContext(5 * time.Second) + defer cancel() + log := t.logger.WithField(common.TagDst, string(destID)) extents = make([]*extentInfo, 0, 8) + i := 0 - log.Debug("GetExtents: calling ListExtentsByDstIDStatus") + for { + log.Debug("GetExtents: ListExtentStats on metadata") - list, err := t.metadata.ListExtentsByDstIDStatus(string(destID), nil) - if err != nil { - log.WithField(common.TagErr, err).Error(`GetExtents: ListExtentsByDstIDStatus failed`) - return - } + resp, err := t.metadata.ListExtentsStats(ctx, req) + if err != nil { + log.WithField(common.TagErr, err).Error(`GetExtents: ListExtentsStats failed`) + return + } - for _, extStats := range list { + for _, extStats := range resp.GetExtentStatsList() { - extent := extStats.GetExtent() - storeUUIDs := extent.GetStoreUUIDs() + extent := extStats.GetExtent() + storeUUIDs := extent.GetStoreUUIDs() - extInfo := &extentInfo{ - id: extentID(extent.GetExtentUUID()), - status: extStats.GetStatus(), - statusUpdatedTime: time.Unix(0, extStats.GetStatusUpdatedTimeMillis()*int64(time.Millisecond)), - storehosts: make([]storehostID, 0, len(storeUUIDs)), - singleCGVisibility: consumerGroupID(extStats.GetConsumerGroupVisibility()), - } + extInfo := &extentInfo{ + id: extentID(extent.GetExtentUUID()), + status: extStats.GetStatus(), + statusUpdatedTime: time.Unix(0, extStats.GetStatusUpdatedTimeMillis()*int64(time.Millisecond)), + storehosts: make([]storehostID, 0, len(storeUUIDs)), + singleCGVisibility: consumerGroupID(extStats.GetConsumerGroupVisibility()), + originZone: extStats.GetExtent().GetOriginZone(), + } + + for j := range storeUUIDs { + extInfo.storehosts = append(extInfo.storehosts, storehostID(storeUUIDs[j])) + } - for j := range storeUUIDs { - extInfo.storehosts = append(extInfo.storehosts, storehostID(storeUUIDs[j])) + extents = append(extents, extInfo) + i++ + + log.WithFields(bark.Fields{ + common.TagExt: string(extInfo.id), + `status`: extInfo.status, + `replicas`: extInfo.storehosts, + }).Debug(`GetExtents: ListExtentStats output`) } - extents = append(extents, extInfo) + if len(resp.GetNextPageToken()) == 0 { + break + } - log.WithFields(bark.Fields{ - common.TagExt: string(extInfo.id), - `status`: extInfo.status, - `replicas`: extInfo.storehosts, - }).Debug(`GetExtents: ListExtentsByDstIDStatus output`) + req.PageToken = resp.GetNextPageToken() + log.Debug("GetExtents: fetching next page of ListExtentStats") } log.WithField(`numExtents`, len(extents)).Debug("GetExtents done") @@ -131,6 +170,13 @@ func (t *metadataDepImpl) GetExtents(destID destinationID) (extents []*extentInf func (t *metadataDepImpl) GetExtentInfo(destID destinationID, extID extentID) (extInfo *extentInfo, err error) { + req := metadata.NewReadExtentStatsRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.ExtentUUID = common.StringPtr(string(extID)) + + ctx, cancel := thrift.NewContext(5 * time.Second) + defer cancel() + log := t.logger.WithFields(bark.Fields{ common.TagDst: string(destID), common.TagExt: string(extID), @@ -138,12 +184,14 @@ func (t *metadataDepImpl) GetExtentInfo(destID destinationID, extID extentID) (e log.Debug("GetExtentInfo: ReadExtentStats on metadata") - extStats, err := t.metadata.ReadExtentStats(string(destID), string(destID)) + resp, err := t.metadata.ReadExtentStats(ctx, req) if err != nil { log.WithField(common.TagErr, err).Error("GetExtentInfo: ReadExtentStats failed") return } + extStats := resp.GetExtentStats() + extInfo = &extentInfo{ id: extID, status: extStats.GetStatus(), @@ -166,30 +214,48 @@ func (t *metadataDepImpl) GetExtentInfo(destID destinationID, extID extentID) (e func (t *metadataDepImpl) GetConsumerGroups(destID destinationID) (consumerGroups []*consumerGroupInfo) { + req := metadata.NewListConsumerGroupRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.Limit = common.Int64Ptr(defaultPageSize) + ctx, _ := thrift.NewContext(time.Second) + + ctx, cancel := thrift.NewContext(5 * time.Second) + defer cancel() + log := t.logger.WithField(common.TagDst, string(destID)) - log.Debug("GetConsumerGroups: ListConsumerGroupsByDstID on metadata") + for { + log.Debug("GetConsumerGroups: ListConsumerGroups on metadata") - list, err := t.metadata.ListConsumerGroupsByDstID(string(destID)) - if err != nil { - log.WithField(common.TagErr, err).Error("GetConsumerGroups: ListConsumerGroupsByDstID failed") - return - } + res, err := t.metadata.ListConsumerGroups(ctx, req) + if err != nil { + log.WithField(common.TagErr, err).Error("GetConsumerGroups: ListConsumerGroups failed") + break + } + + for _, cgDesc := range res.GetConsumerGroups() { + // assert(destId == cgDesc.GetDestinationUUID()) // + + cg := &consumerGroupInfo{ + id: consumerGroupID(cgDesc.GetConsumerGroupUUID()), + status: cgDesc.GetStatus(), + } - for _, cgDesc := range list { - // assert(destId == cgDesc.GetDestinationUUID()) // + consumerGroups = append(consumerGroups, cg) + + log.WithFields(bark.Fields{ + common.TagCnsm: string(cg.id), + `status`: cg.status, + }).Debug(`GetConsumerGroups: ListConsumerGroups output`) + } - cg := &consumerGroupInfo{ - id: consumerGroupID(cgDesc.GetConsumerGroupUUID()), - status: cgDesc.GetStatus(), + if len(res.GetNextPageToken()) == 0 { + break } - consumerGroups = append(consumerGroups, cg) + req.PageToken = res.GetNextPageToken() - log.WithFields(bark.Fields{ - common.TagCnsm: string(cg.id), - `status`: cg.status, - }).Debug(`GetConsumerGroups: ListConsumerGroupsByDstID output`) + log.Debug("GetConsumerGroups: fetching next page of ListConsumerGroups") } log.WithField(`numConsumerGroups`, len(consumerGroups)).Debug("GetConsumerGroups done") @@ -198,6 +264,14 @@ func (t *metadataDepImpl) GetConsumerGroups(destID destinationID) (consumerGroup func (t *metadataDepImpl) DeleteExtent(destID destinationID, extID extentID) (err error) { + req := metadata.NewUpdateExtentStatsRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.ExtentUUID = common.StringPtr(string(extID)) + req.Status = shared.ExtentStatusPtr(shared.ExtentStatus_DELETED) + + ctx, cancel := thrift.NewContext(2 * time.Second) + defer cancel() + log := t.logger.WithFields(bark.Fields{ common.TagDst: string(destID), common.TagExt: string(extID), @@ -205,18 +279,26 @@ func (t *metadataDepImpl) DeleteExtent(destID destinationID, extID extentID) (er log.Debug("DeleteExtent: UpdateExtentStats on metadata") - err = t.metadata.UpdateExtentStatus(string(destID), string(destID), shared.ExtentStatus_DELETED) + resp, err := t.metadata.UpdateExtentStats(ctx, req) if err != nil { log.WithField(common.TagErr, err).Error(`DeleteExtent: UpdateExtentStats failed`) return } - log.Debug(`DeleteExtent done`) + log.WithField(`resp.status`, resp.GetExtentStats().GetStatus()).Debug(`DeleteExtent done`) return } func (t *metadataDepImpl) MarkExtentConsumed(destID destinationID, extID extentID) (err error) { + req := metadata.NewUpdateExtentStatsRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.ExtentUUID = common.StringPtr(string(extID)) + req.Status = shared.ExtentStatusPtr(shared.ExtentStatus_CONSUMED) + + ctx, cancel := thrift.NewContext(2 * time.Second) + defer cancel() + log := t.logger.WithFields(bark.Fields{ common.TagDst: string(destID), common.TagExt: string(extID), @@ -224,17 +306,25 @@ func (t *metadataDepImpl) MarkExtentConsumed(destID destinationID, extID extentI log.Debug("MarkExtentConsumed: UpdateExtentStats on metadata") - err = t.metadata.UpdateExtentStatus(string(destID), string(destID), shared.ExtentStatus_CONSUMED) + resp, err := t.metadata.UpdateExtentStats(ctx, req) if err != nil { log.WithField(common.TagErr, err).Error(`MarkExtentConsumed: UpdateExtentStats failed`) return } - log.Debug(`MarkExtentConsumed done`) + log.WithField(`resp.status`, resp.GetExtentStats().GetStatus()).Debug(`MarkExtentConsumed done`) return } -func (t *metadataDepImpl) DeleteConsumerGroupExtent(cgID consumerGroupID, extID extentID) (err error) { +func (t *metadataDepImpl) DeleteConsumerGroupExtent(cgID consumerGroupID, extID extentID) error { + + req := metadata.NewUpdateConsumerGroupExtentStatusRequest() + req.ExtentUUID = common.StringPtr(string(extID)) + req.ConsumerGroupUUID = common.StringPtr(string(cgID)) + req.Status = common.MetadataConsumerGroupExtentStatusPtr(metadata.ConsumerGroupExtentStatus_DELETED) + + ctx, cancel := thrift.NewContext(2 * time.Second) + defer cancel() log := t.logger.WithFields(bark.Fields{ common.TagCnsmID: string(cgID), @@ -243,7 +333,7 @@ func (t *metadataDepImpl) DeleteConsumerGroupExtent(cgID consumerGroupID, extID log.Debug("DeleteConsumerGroupExtent: UpdateConsumerGroupExtentStatus on metadata") - err = t.metadata.UpdateConsumerGroupExtentStatus(string(cgID), string(extID), metadata.ConsumerGroupExtentStatus_DELETED) + err := t.metadata.UpdateConsumerGroupExtentStatus(ctx, req) if err != nil { log.WithField(common.TagErr, err).Error("DeleteConsumerGroupExtent: UpdateConsumerGroupExtentStatus failed") @@ -254,16 +344,22 @@ func (t *metadataDepImpl) DeleteConsumerGroupExtent(cgID consumerGroupID, extID return nil } -func (t *metadataDepImpl) DeleteDestination(destID destinationID) (err error) { +func (t *metadataDepImpl) DeleteDestination(destID destinationID) error { + + req := metadata.NewDeleteDestinationUUIDRequest() + req.UUID = common.StringPtr(string(destID)) + + ctx, cancel := thrift.NewContext(2 * time.Second) + defer cancel() log := t.logger.WithField(common.TagDst, string(destID)) - log.Debug("DeleteDestination: DeleteDestination on metadata") + log.Debug("DeleteDestination: DeleteDestinationUUID on metadata") - err = t.metadata.DeleteDestination(string(destID)) + err := t.metadata.DeleteDestinationUUID(ctx, req) if err != nil { - log.WithField(common.TagErr, err).Error("DeleteDestination: DeleteDestination failed") + log.WithField(common.TagErr, err).Error("DeleteDestination: DeleteDestinationUUID failed") return err } @@ -273,6 +369,14 @@ func (t *metadataDepImpl) DeleteDestination(destID destinationID) (err error) { func (t *metadataDepImpl) GetAckLevel(destID destinationID, extID extentID, cgID consumerGroupID) (ackLevel int64, err error) { + req := metadata.NewReadConsumerGroupExtentRequest() + req.DestinationUUID = common.StringPtr(string(destID)) + req.ExtentUUID = common.StringPtr(string(extID)) + req.ConsumerGroupUUID = common.StringPtr(string(cgID)) + + ctx, cancel := thrift.NewContext(2 * time.Second) + defer cancel() + log := t.logger.WithFields(bark.Fields{ common.TagDst: string(destID), common.TagExt: string(extID), @@ -281,20 +385,20 @@ func (t *metadataDepImpl) GetAckLevel(destID destinationID, extID extentID, cgID log.Debug("GetAckLevel: ReadConsumerGroupExtent on metadata") - resp, err := t.metadata.ReadConsumerGroupExtent(string(destID), string(destID), string(destID)) + resp, err := t.metadata.ReadConsumerGroupExtent(ctx, req) if err != nil { log.WithField(common.TagErr, err).Error("GetAckLevel: ReadConsumerGroupExtent failed") return store.ADDR_BEGIN, err } - // assert(resp.GetExtentUUID() == extID) - // assert(resp.GetConsumerGroupUUID() == cgID) + // assert(resp.GetExtent().GetExtentUUID() == extID) + // assert(resp.GetExtent().GetConsumerGroupUUID() == cgID) // check if the consumer-group has read to "sealed" point - if resp.GetStatus() != metadata.ConsumerGroupExtentStatus_OPEN { + if resp.GetExtent().GetStatus() != metadata.ConsumerGroupExtentStatus_OPEN { ackLevel = store.ADDR_SEAL } else { - ackLevel = resp.GetAckLevelOffset() + ackLevel = resp.GetExtent().GetAckLevelOffset() // assert(ackLevel != cherami.ADDR_END } diff --git a/services/retentionmgr/retention.go b/services/retentionmgr/retention.go index 9808dcbd..2d3011fe 100644 --- a/services/retentionmgr/retention.go +++ b/services/retentionmgr/retention.go @@ -30,6 +30,7 @@ import ( "github.com/uber/cherami-thrift/.generated/go/shared" "github.com/uber/cherami-thrift/.generated/go/store" "github.com/uber/cherami-server/common" + metadataMetrics "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" "github.com/uber-common/bark" ) @@ -51,6 +52,7 @@ type ( SingleCGVisibleExtentGracePeriod time.Duration ExtentDeleteDeferPeriod time.Duration NumWorkers int + LocalZone string } // RetentionManager context @@ -113,6 +115,7 @@ type ( softRetention int32 // in seconds hardRetention int32 // in seconds path string + isMultiZone bool } extentInfo struct { @@ -121,6 +124,7 @@ type ( statusUpdatedTime time.Time storehosts []storehostID singleCGVisibility consumerGroupID + originZone string // destID destinationID // dest *destinationInfo } @@ -156,12 +160,13 @@ func New(opts *Options, metadata metadata.TChanMetadataService, clientFactory co } logger = logger.WithField(common.TagModule, `retMgr`) + metadata = metadataMetrics.NewMetadataMetricsMgr(metadata, m3Client, logger) return &RetentionManager{ Options: opts, logger: logger, m3Client: m3Client, - metadata: newMetadataDep(metadata, m3Client, logger), + metadata: newMetadataDep(metadata, logger), storehost: newStorehostDep(clientFactory, logger), lastDLQRetentionRun: time.Now().AddDate(0, 0, -1), } @@ -596,6 +601,14 @@ func (t *RetentionManager) computeRetention(job *retentionJob, log bark.Logger) } } + // If this is a multi_zone destination and local extent, disable soft retention + // The reason is if soft retention is very short, we may delete messages before remote zone has a chance to replicate the messages + // Long term solution should create a fake consumer for the remote zone + if dest.isMultiZone && !common.IsRemoteZoneExtent(ext.originZone, t.Options.LocalZone) { + log.Info(`overridden: soft retention overridden for multi_zone extent`) + softRetentionAddr = int64(store.ADDR_BEGIN) + } + log.WithFields(bark.Fields{ `softRetentionAddr`: softRetentionAddr, `softRetentionAddr_time`: time.Unix(0, softRetentionAddr), @@ -662,7 +675,8 @@ func (t *RetentionManager) computeRetention(job *retentionJob, log bark.Logger) }).Debug("computed minAckAddr") // -- step 5: compute retention address -- // - // retentionAddr = max( hardRetentionAddr, min( softRetentionAddr, minAckAddr ) ) // + + //** retentionAddr = max( hardRetentionAddr, min( softRetentionAddr, minAckAddr ) ) **// if softRetentionAddr == store.ADDR_SEAL || (minAckAddr != store.ADDR_SEAL && minAckAddr < softRetentionAddr) { softRetentionAddr = minAckAddr @@ -682,16 +696,30 @@ func (t *RetentionManager) computeRetention(job *retentionJob, log bark.Logger) // -- step 6: check to see if the extent status can be updated to 'consumed' -- // - // move the extent to 'consumed' because all of the following are true: - // 1. the extent was sealed - // 2. the extent as fully consumed by all of the consumer groups - // 3. a period of 'soft retention period' has passed (in other words, a consumer - // that is consume along the soft retention time has "consumed" the extent, too) + // move the extent to 'consumed' if either: + // A. all of the following are true: + // 1. the extent was sealed + // 2. the extent as fully consumed by all of the consumer groups + // 3. a period of 'soft retention period' has passed (in other words, + // a consumer that is consuming along the soft retention time has + // "consumed" the extent) + // B. or, the hard-retention has reached the end of the sealed extent, + // in which case we will force the extent to be "consumed" + // NB: retentionAddr == ADDR_BEGIN indicates there was an error, so we no-op if job.retentionAddr != store.ADDR_BEGIN && - ext.status == shared.ExtentStatus_SEALED && - minAckAddr == store.ADDR_SEAL && softRetentionConsumed { + ((ext.status == shared.ExtentStatus_SEALED && + minAckAddr == store.ADDR_SEAL && + softRetentionConsumed) || + hardRetentionConsumed) { + + log.WithFields(bark.Fields{ + `retentionAddr`: job.retentionAddr, + `extent-status`: ext.status, + `minAckAddr`: minAckAddr, + `softRetentionConsumed`: softRetentionConsumed, + `hardRetentionConsumed`: hardRetentionConsumed, + }).Info("computeRetention: marking extent consumed") - log.Info("computeRetention: marking extent consumed") e := t.metadata.MarkExtentConsumed(dest.id, ext.id) if e != nil { diff --git a/services/retentionmgr/retention_test.go b/services/retentionmgr/retention_test.go index d9bc9162..d2124f02 100644 --- a/services/retentionmgr/retention_test.go +++ b/services/retentionmgr/retention_test.go @@ -76,6 +76,8 @@ func (s *RetentionMgrSuite) TestRetentionManager() { // DEST2,EXT1: minAckAddr < hardRetentionAddr -> retentionAddr = hardRetentionAddr // DEST2,EXT2: softRetentionAddr < minAckAddr -> retentionAddr = softRetentionAddr // DEST2,EXT3: hardRetentionAddr < minAckAddr < softRetentionAddr -> retentionAddr = minAckAddr + // DEST2,EXT31: hardRetentionAddr < minAckAddr < softRetentionAddr but is multi_zone(extent in source zone) -> retentionAddr = hardRetentionAddr + // DEST2,EXT32: hardRetentionAddr < minAckAddr < softRetentionAddr but is multi_zone(extent in remote zone) -> retentionAddr = minAckAddr // DEST2,EXT4: minAckAddr < softRetentionAddr; softRetentionAddr == seal -> retentionAddr = minAckAddr // DEST2,EXT5: minAckAddr == seal; softRetentionAddr == seal -> retentionAddr = seal (and delete) // DEST2,EXT6: minAckAddr == seal; softRetentionAddr != seal -> retentionAddr = softRetentionAddr @@ -96,7 +98,7 @@ func (s *RetentionMgrSuite) TestRetentionManager() { addrSoft, addrHard := tSoft, tHard addrPreSoft, addrPostSoft := addrSoft-2*seconds, addrSoft+2*seconds addrPreHard, addrPostHard := addrHard-2*seconds, addrHard+2*seconds - addrSeal := int64(store.ADDR_SEAL) + addrBegin, addrSeal := int64(store.ADDR_BEGIN), int64(store.ADDR_SEAL) // fmt.Printf("tNow=%v tSoft=%v tHard=%v\n", tNow, tSoft, tHard) // fmt.Printf("addrPreSoft=%v addrePostSoft=%v\n", addrPreSoft, addrPostSoft) @@ -104,45 +106,50 @@ func (s *RetentionMgrSuite) TestRetentionManager() { // fmt.Printf("addrSeal=%v", addrSeal) destinations := []*destinationInfo{ - {id: "DEST2", status: shared.DestinationStatus_ENABLED, softRetention: softRetSecs, hardRetention: hardRetSecs}, + {id: "DEST2", status: shared.DestinationStatus_ENABLED, softRetention: softRetSecs, hardRetention: hardRetSecs, isMultiZone: true}, } extStoreMap := map[extentID][]storehostID{ - "EXT1": {"STOR1", "STOR2", "STOR3"}, - "EXT2": {"STOR4", "STOR5", "STOR6"}, - "EXT3": {"STOR1", "STOR3", "STOR5"}, - "EXT4": {"STOR2", "STOR4", "STOR6"}, - "EXT5": {"STOR2", "STOR3", "STOR4"}, - "EXT6": {"STOR3", "STOR5", "STOR4"}, - "EXT7": {"STOR7", "STOR6", "STOR5"}, - "EXT8": {"STOR3", "STOR4", "STOR6"}, - "EXT9": {"STOR1", "STOR2", "STOR5"}, - "EXTA": {"STOR2", "STOR3", "STOR4"}, - "EXTB": {"STOR2", "STOR3", "STOR4"}, - "EXTC": {"STOR2", "STOR4", "STOR6"}, - "EXTD": {"STOR2", "STOR4", "STOR6"}, - "EXTE": {"STOR3", "STOR5", "STOR4"}, - "EXTm": {"STOR3"}, // Single CG Visible - "EXTn": {"STOR7"}, // Single CG Visible + "EXT1": {"STOR1", "STOR2", "STOR3"}, + "EXT2": {"STOR4", "STOR5", "STOR6"}, + "EXT3": {"STOR1", "STOR3", "STOR5"}, + "EXT31": {"STOR1", "STOR3", "STOR5"}, + "EXT32": {"STOR1", "STOR3", "STOR5"}, + "EXT4": {"STOR2", "STOR4", "STOR6"}, + "EXT5": {"STOR2", "STOR3", "STOR4"}, + "EXT6": {"STOR3", "STOR5", "STOR4"}, + "EXT61": {"STOR3", "STOR5", "STOR4"}, + "EXT7": {"STOR7", "STOR6", "STOR5"}, + "EXT8": {"STOR3", "STOR4", "STOR6"}, + "EXT9": {"STOR1", "STOR2", "STOR5"}, + "EXTA": {"STOR2", "STOR3", "STOR4"}, + "EXTB": {"STOR2", "STOR3", "STOR4"}, + "EXTC": {"STOR2", "STOR4", "STOR6"}, + "EXTD1": {"STOR2", "STOR4", "STOR6"}, + "EXTm": {"STOR3"}, // Single CG Visible + "EXTn": {"STOR7"}, // Single CG Visible } extStatusMap := map[extentID]shared.ExtentStatus{ - "EXT1": shared.ExtentStatus_OPEN, - "EXT2": shared.ExtentStatus_OPEN, - "EXT3": shared.ExtentStatus_OPEN, - "EXT4": shared.ExtentStatus_OPEN, - "EXT5": shared.ExtentStatus_SEALED, - "EXT6": shared.ExtentStatus_CONSUMED, - "EXT7": shared.ExtentStatus_DELETED, - "EXT8": shared.ExtentStatus_OPEN, - "EXT9": shared.ExtentStatus_SEALED, - "EXTA": shared.ExtentStatus_SEALED, - "EXTB": shared.ExtentStatus_SEALED, - "EXTC": shared.ExtentStatus_OPEN, - "EXTD": shared.ExtentStatus_SEALED, - "EXTE": shared.ExtentStatus_CONSUMED, - "EXTm": shared.ExtentStatus_SEALED, // Merged DLQ extents should always be sealed - "EXTn": shared.ExtentStatus_SEALED, // + "EXT1": shared.ExtentStatus_OPEN, + "EXT2": shared.ExtentStatus_OPEN, + "EXT3": shared.ExtentStatus_OPEN, + "EXT31": shared.ExtentStatus_OPEN, + "EXT32": shared.ExtentStatus_OPEN, + "EXT4": shared.ExtentStatus_OPEN, + "EXT5": shared.ExtentStatus_SEALED, + "EXT6": shared.ExtentStatus_CONSUMED, + "EXT61": shared.ExtentStatus_CONSUMED, + "EXT7": shared.ExtentStatus_DELETED, + "EXT8": shared.ExtentStatus_OPEN, + "EXT9": shared.ExtentStatus_SEALED, + "EXTA": shared.ExtentStatus_SEALED, + "EXTB": shared.ExtentStatus_SEALED, + "EXTC": shared.ExtentStatus_OPEN, + "EXTD": shared.ExtentStatus_SEALED, + "EXTD1": shared.ExtentStatus_SEALED, + "EXTm": shared.ExtentStatus_SEALED, // Merged DLQ extents should always be sealed + "EXTn": shared.ExtentStatus_SEALED, // } extSingleCGVisibilityMap := map[extentID]consumerGroupID{ @@ -156,9 +163,9 @@ func (s *RetentionMgrSuite) TestRetentionManager() { statusUpdatedTime := time.Unix(0, tNow).Add(-55 * time.Minute) - // 'EXTE' is like EXT6, except the statusUpdatedTime is beyond the ExtentDeleteDeferPeriod, + // 'EXT61' is like EXT6, except the statusUpdatedTime is beyond the ExtentDeleteDeferPeriod, // causing it to be deleted. - if ext == "EXTE" { + if ext == "EXT61" { statusUpdatedTime = time.Unix(0, tNow).Add(-2 * time.Hour) } @@ -171,26 +178,32 @@ func (s *RetentionMgrSuite) TestRetentionManager() { }) } - consumerGroups := []*consumerGroupInfo{ - {id: "CG1", status: shared.ConsumerGroupStatus_ENABLED}, - {id: "CG2", status: shared.ConsumerGroupStatus_DELETED}, - {id: "CG3", status: shared.ConsumerGroupStatus_DISABLED}, - {id: "CGm", status: shared.ConsumerGroupStatus_ENABLED}, // Single CG Visible consumer group - } - s.metadata.On("GetDestinations").Return(destinations).Once() s.metadata.On("GetExtents", destinationID("DEST2")).Return(extents).Once() for ext, storehosts := range extStoreMap { + originZone := `zone2` + if ext == "EXT31" { + originZone = `` + } + extInfo := &extentInfo{ id: extentID(ext), status: extStatusMap[ext], storehosts: storehosts, singleCGVisibility: extSingleCGVisibilityMap[ext], + originZone: originZone, } s.metadata.On("GetExtentInfo", destinationID("DEST2"), extentID(ext)).Return(extInfo, nil).Once() } + consumerGroups := []*consumerGroupInfo{ + {id: "CG1", status: shared.ConsumerGroupStatus_ENABLED}, + {id: "CG2", status: shared.ConsumerGroupStatus_DELETED}, + {id: "CG3", status: shared.ConsumerGroupStatus_DISABLED}, + {id: "CGm", status: shared.ConsumerGroupStatus_ENABLED}, // Single CG Visible consumer group + } + s.metadata.On("GetConsumerGroups", destinationID("DEST2")).Return(consumerGroups) type gaftRet struct { @@ -200,59 +213,70 @@ func (s *RetentionMgrSuite) TestRetentionManager() { // hard retention addr gaftHard := map[extentID]map[storehostID]gaftRet{ - "EXT1": {"STOR1": {addrHard, false}, "STOR2": {addrHard - 5, false}, "STOR3": {addrHard - 10, false}}, - "EXT2": {"STOR4": {addrHard - 10, false}, "STOR5": {addrHard - 8, false}, "STOR6": {addrHard, false}}, - "EXT3": {"STOR1": {addrHard - 30, false}, "STOR3": {addrHard, false}, "STOR5": {addrHard - 7, false}}, - "EXT4": {"STOR2": {addrHard, false}, "STOR4": {addrHard - 1, false}, "STOR6": {addrHard, false}}, - "EXT5": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, + "EXT1": {"STOR1": {addrHard, false}, "STOR2": {addrHard - 5, false}, "STOR3": {addrHard - 10, false}}, + "EXT2": {"STOR4": {addrHard - 10, false}, "STOR5": {addrHard - 8, false}, "STOR6": {addrHard, false}}, + "EXT3": {"STOR1": {addrHard - 30, false}, "STOR3": {addrHard, false}, "STOR5": {addrHard - 7, false}}, + "EXT31": {"STOR1": {addrHard - 30, false}, "STOR3": {addrHard, false}, "STOR5": {addrHard - 7, false}}, + "EXT32": {"STOR1": {addrHard - 30, false}, "STOR3": {addrHard, false}, "STOR5": {addrHard - 7, false}}, + "EXT4": {"STOR2": {addrHard, false}, "STOR4": {addrHard - 1, false}, "STOR6": {addrHard, false}}, + "EXT5": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, // "EXT6": {}, // should not get called + // "EXT61": {}, // should not get called // "EXT7": {}, // should not get called - "EXT8": {"STOR3": {addrHard, false}, "STOR4": {addrHard - 5, false}, "STOR6": {addrHard - 20, false}}, - "EXT9": {"STOR1": {addrHard, false}, "STOR2": {addrHard - 5, false}, "STOR5": {addrHard - 20, false}}, - "EXTA": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, - "EXTB": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, - "EXTC": {"STOR2": {addrHard, false}, "STOR4": {addrHard, false}, "STOR6": {addrHard, false}}, - "EXTD": {"STOR2": {addrHard, false}, "STOR4": {addrHard, false}, "STOR6": {addrHard, false}}, - "EXTm": {"STOR3": {addrHard - 100, true}}, - "EXTn": {"STOR7": {addrHard - 100, true}}, + "EXT8": {"STOR3": {addrHard, false}, "STOR4": {addrHard - 5, false}, "STOR6": {addrHard - 20, false}}, + "EXT9": {"STOR1": {addrHard, false}, "STOR2": {addrHard - 5, false}, "STOR5": {addrHard - 20, false}}, + "EXTA": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, + "EXTB": {"STOR2": {addrHard - 10, false}, "STOR3": {addrHard, false}, "STOR4": {addrHard - 20, false}}, + "EXTC": {"STOR2": {addrHard, false}, "STOR4": {addrHard, false}, "STOR6": {addrHard, false}}, + "EXTD": {"STOR2": {addrHard, false}, "STOR4": {addrHard, false}, "STOR6": {addrHard, false}}, + "EXTD1": {"STOR2": {addrHard, true}, "STOR4": {addrHard, false}, "STOR6": {addrHard, false}}, + "EXTm": {"STOR3": {addrHard - 100, true}}, + "EXTn": {"STOR7": {addrHard - 100, true}}, } // soft retention addr gaftSoft := map[extentID]map[storehostID]gaftRet{ - "EXT1": {"STOR1": {addrSoft - 3, false}, "STOR2": {addrSoft - 1, false}, "STOR3": {addrSoft, false}}, - "EXT2": {"STOR4": {addrSoft, false}, "STOR5": {addrSoft - 5, false}, "STOR6": {addrSoft - 42, false}}, - "EXT3": {"STOR1": {addrSoft - 9, false}, "STOR3": {addrSoft, false}, "STOR5": {addrSoft - 12, false}}, - "EXT4": {"STOR2": {addrSoft, false}, "STOR4": {addrSoft + 51, true}, "STOR6": {addrSoft + 50, false}}, - "EXT5": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, + "EXT1": {"STOR1": {addrSoft - 3, false}, "STOR2": {addrSoft - 1, false}, "STOR3": {addrSoft, false}}, + "EXT2": {"STOR4": {addrSoft, false}, "STOR5": {addrSoft - 5, false}, "STOR6": {addrSoft - 42, false}}, + "EXT3": {"STOR1": {addrSoft - 9, false}, "STOR3": {addrSoft, false}, "STOR5": {addrSoft - 12, false}}, + "EXT31": {"STOR1": {addrSoft - 9, false}, "STOR3": {addrSoft, false}, "STOR5": {addrSoft - 12, false}}, + "EXT32": {"STOR1": {addrSoft - 9, false}, "STOR3": {addrSoft, false}, "STOR5": {addrSoft - 12, false}}, + "EXT4": {"STOR2": {addrSoft, false}, "STOR4": {addrSoft + 51, true}, "STOR6": {addrSoft + 50, false}}, + "EXT5": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, // "EXT6": {}, // should not get called + // "EXT61": {}, // should not get called // "EXT7": {}, // should not get called - "EXT8": {"STOR3": {addrSoft + 51, true}, "STOR4": {addrSoft + 50, false}, "STOR6": {addrSoft - 20, false}}, - "EXT9": {"STOR1": {addrSoft + 51, true}, "STOR2": {addrSoft + 50, false}, "STOR5": {addrSoft - 20, false}}, - "EXTA": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, - "EXTB": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, - "EXTC": {"STOR2": {addrSoft, true}, "STOR4": {addrSoft, true}, "STOR6": {addrSoft, true}}, - "EXTD": {"STOR2": {addrSoft, true}, "STOR4": {addrSoft, true}, "STOR6": {addrSoft, true}}, - "EXTm": {"STOR3": {addrSoft, true}}, - "EXTn": {"STOR7": {addrSoft, true}}, + "EXT8": {"STOR3": {addrSoft + 51, true}, "STOR4": {addrSoft + 50, false}, "STOR6": {addrSoft - 20, false}}, + "EXT9": {"STOR1": {addrSoft + 51, true}, "STOR2": {addrSoft + 50, false}, "STOR5": {addrSoft - 20, false}}, + "EXTA": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, + "EXTB": {"STOR2": {addrSoft + 10, false}, "STOR3": {addrSoft - 100, false}, "STOR4": {addrSoft + 11, true}}, + "EXTC": {"STOR2": {addrSoft, true}, "STOR4": {addrSoft, true}, "STOR6": {addrSoft, true}}, + "EXTD": {"STOR2": {addrSoft, true}, "STOR4": {addrSoft, true}, "STOR6": {addrSoft, true}}, + "EXTD1": {"STOR2": {addrBegin, false}, "STOR4": {addrBegin, false}, "STOR6": {addrBegin, false}}, + "EXTm": {"STOR3": {addrSoft, true}}, + "EXTn": {"STOR7": {addrSoft, true}}, } // get ack level gal := map[extentID]map[consumerGroupID]int64{ - "EXT1": {"CGm": addrSeal, "CG1": addrPreHard, "CG2": addrPostHard, "CG3": addrSoft}, - "EXT2": {"CGm": addrSeal, "CG1": addrPostSoft - 10, "CG2": addrSoft + 100, "CG3": addrPostSoft - 20}, - "EXT3": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrPreSoft - 20, "CG3": addrPreSoft + 50}, - "EXT4": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrSoft, "CG3": addrPostSoft}, - "EXT5": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXT1": {"CGm": addrSeal, "CG1": addrPreHard, "CG2": addrPostHard, "CG3": addrSoft}, + "EXT2": {"CGm": addrSeal, "CG1": addrPostSoft - 10, "CG2": addrSoft + 100, "CG3": addrPostSoft - 20}, + "EXT3": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrPreSoft - 20, "CG3": addrPreSoft + 50}, + "EXT31": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrPreSoft - 20, "CG3": addrPreSoft + 50}, + "EXT32": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrPreSoft - 20, "CG3": addrPreSoft + 50}, + "EXT4": {"CGm": addrSeal, "CG1": addrPreSoft, "CG2": addrSoft, "CG3": addrPostSoft}, + "EXT5": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, // "EXT6": {}, // should not get called // "EXT7": {}, // should not get called - "EXT8": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXT9": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXTA": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXTB": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXTC": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXTD": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, - "EXTm": {"CGm": addrPostSoft}, - "EXTn": {"CGm": addrPreSoft}, + "EXT8": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXT9": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXTA": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXTB": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXTC": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXTD": {"CGm": addrSeal, "CG1": addrSeal, "CG2": addrSeal, "CG3": addrSeal}, + "EXTD1": {"CGm": addrBegin, "CG1": addrBegin, "CG2": addrBegin, "CG3": addrBegin}, + "EXTm": {"CGm": addrPostSoft}, + "EXTn": {"CGm": addrPreSoft}, } for ext, retMap := range gaftHard { @@ -278,25 +302,31 @@ func (s *RetentionMgrSuite) TestRetentionManager() { s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTA")).Return(nil).Once() s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTB")).Return(nil).Once() s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTD")).Return(nil).Once() + s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTD1")).Return(nil).Once() + s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTm")).Return(nil).Once() + s.metadata.On("MarkExtentConsumed", destinationID("DEST2"), extentID("EXTn")).Return(nil).Once() // expected retention addresses purgeAddrMap := map[extentID]int64{ - "EXT1": addrHard, - "EXT2": addrSoft, - "EXT3": addrPreSoft, - "EXT4": addrPreSoft, - "EXT5": addrSoft + 11, + "EXT1": addrHard, + "EXT2": addrSoft, + "EXT3": addrPreSoft, + "EXT31": addrHard, + "EXT32": addrPreSoft, + "EXT4": addrPreSoft, + "EXT5": addrSoft + 11, // "EXT6": addrSeal, + "EXT61": addrSeal, // "EXT7": , - "EXT8": addrSoft + 51, - "EXT9": addrSoft + 51, - "EXTA": addrSoft + 11, - "EXTB": addrSoft + 11, - "EXTC": addrSoft, - "EXTD": addrSoft, - "EXTE": addrSeal, - "EXTm": addrSoft, - "EXTn": addrPreSoft, + "EXT8": addrSoft + 51, + "EXT9": addrSoft + 51, + "EXTA": addrSoft + 11, + "EXTB": addrSoft + 11, + "EXTC": addrSoft, + "EXTD": addrSoft, + "EXTD1": addrHard, + "EXTm": addrSoft, + "EXTn": addrPreSoft, } for ext, addr := range purgeAddrMap { @@ -307,21 +337,21 @@ func (s *RetentionMgrSuite) TestRetentionManager() { for _, cg := range consumerGroups { s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXT5")).Return(nil).Once() + s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXT61")).Return(nil).Once() s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXT9")).Return(shared.NewEntityNotExistsError()).Once() s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXTA")).Return(shared.NewEntityNotExistsError()).Once() s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXTB")).Return(shared.NewBadRequestError()).Once() s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXTD")).Return(nil).Once() - s.metadata.On("DeleteConsumerGroupExtent", consumerGroupID(cg.id), extentID("EXTE")).Return(nil).Once() } // // DeleteConsumerGroupExtent on EXTA got an EntityNotExistsError, but it should still be deleted; while EXTB shouldn't // s.metadata.On("DeleteExtent", destinationID("DEST2"), extentID("EXTA")).Return(nil).Once() - s.metadata.On("DeleteExtent", destinationID("DEST2"), extentID("EXTE")).Return(nil).Once() + s.metadata.On("DeleteExtent", destinationID("DEST2"), extentID("EXT61")).Return(nil).Once() var retMgr *RetentionManager - opts := &Options{NumWorkers: 3, RetentionInterval: 5 * time.Second, ExtentDeleteDeferPeriod: time.Hour} + opts := &Options{NumWorkers: 3, RetentionInterval: 5 * time.Second, ExtentDeleteDeferPeriod: 1 * time.Hour, LocalZone: `zone1`} metricsClient := metrics.NewClient(common.NewMetricReporterWithHostname(configure.NewCommonServiceConfig()), metrics.Controller) retMgr = tNew(opts, s.metadata, s.storehost, metricsClient, common.GetDefaultLogger()) @@ -397,7 +427,7 @@ func (s *RetentionMgrSuite) TestRetentionManagerOnDeletedDestinations() { var retMgr *RetentionManager - opts := &Options{NumWorkers: 3, RetentionInterval: 5 * time.Second} + opts := &Options{NumWorkers: 3, RetentionInterval: 5 * time.Second, LocalZone: `zone1`} metricsClient := metrics.NewClient(common.NewMetricReporterWithHostname(configure.NewCommonServiceConfig()), metrics.Controller) retMgr = tNew(opts, s.metadata, s.storehost, metricsClient, common.GetDefaultLogger()) diff --git a/services/storehost/replicationJobRunner.go b/services/storehost/replicationJobRunner.go index 72e05782..117fa9b8 100644 --- a/services/storehost/replicationJobRunner.go +++ b/services/storehost/replicationJobRunner.go @@ -53,7 +53,8 @@ type ( mClient metadata.TChanMetadataService logger bark.Logger - closeChannel chan struct{} + closeChannel chan struct{} + rpmBootstrapped chan struct{} ticker *time.Ticker running int64 @@ -63,6 +64,9 @@ type ( const ( // runInterval determines how often the runner will run runInterval = time.Duration(10 * time.Minute) + + // timeout to wait for rpm bootstrap + rpmBootstrapTimeout = time.Duration(2 * time.Minute) ) // NewReplicationJobRunner returns an instance of ReplicationJobRunner @@ -82,12 +86,25 @@ func (runner *replicationJobRunner) Start() { runner.currentZone, _ = common.GetLocalClusterInfo(strings.ToLower(runner.storeHost.SCommon.GetConfig().GetDeploymentName())) runner.closeChannel = make(chan struct{}) + + // replication job needs rpm to be bootstrapped first (in order to resolve other store host or replicator) + select { + case <-runner.closeChannel: + runner.logger.Error("ReplicationJobRunner: runner stopped before rpm is bootstrapped") + return + case <-runner.storeHost.SCommon.GetRingpopMonitor().GetBootstrappedChannel(): + case <-time.After(rpmBootstrapTimeout): + // rpm still not bootstrapped after time out. Start the jobs anyway (won't hurt) + runner.logger.Warn("ReplicationJobRunner: rpm not bootstrapped after timeout") + } + go runner.run() go runner.houseKeep() } func (runner *replicationJobRunner) Stop() { close(runner.closeChannel) + close(runner.rpmBootstrapped) runner.logger.Info("ReplicationJobRunner: stopped") } @@ -143,9 +160,13 @@ func (runner *replicationJobRunner) run() { }).Error(`No store Ids for extent from metadata`) continue } - sort.Strings(storeIds) - primaryStore := storeIds[0] + // If the primary store field doesn't exist, the first store will be treated as primary store + primaryStore := extentStats.GetExtent().GetRemoteExtentPrimaryStore() + if len(primaryStore) == 0 { + sort.Strings(storeIds) + primaryStore = storeIds[0] + } if primaryStore == runner.storeID { primaryExtents++ diff --git a/services/storehost/storehost.go b/services/storehost/storehost.go index f9d40555..997fe3d1 100644 --- a/services/storehost/storehost.go +++ b/services/storehost/storehost.go @@ -31,17 +31,18 @@ import ( "time" ccommon "github.com/uber/cherami-client-go/common" - "github.com/uber/cherami-thrift/.generated/go/cherami" - "github.com/uber/cherami-thrift/.generated/go/metadata" - "github.com/uber/cherami-thrift/.generated/go/store" "github.com/uber/cherami-server/common" + mm "github.com/uber/cherami-server/common/metadata" "github.com/uber/cherami-server/common/metrics" "github.com/uber/cherami-server/storage" + "github.com/uber/cherami-thrift/.generated/go/cherami" + "github.com/uber/cherami-thrift/.generated/go/metadata" + "github.com/uber/cherami-thrift/.generated/go/store" // "code.uber.internal/odp/cherami/storage/rockstor" - "github.com/uber/cherami-thrift/.generated/go/controller" "github.com/uber/cherami-server/services/storehost/load" "github.com/uber/cherami-server/storage/manyrocks" storeStream "github.com/uber/cherami-server/stream" + "github.com/uber/cherami-thrift/.generated/go/controller" "github.com/pborman/uuid" "github.com/uber-common/bark" @@ -233,13 +234,19 @@ func NewStoreHost(serviceName string, sCommon common.SCommon, mClient metadata.T t := &StoreHost{ SCommon: sCommon, opts: opts, - mClient: mClient, hostMetrics: load.NewHostMetrics(), shutdownC: make(chan struct{}), disableWriteC: make(chan struct{}), m3Client: metrics.NewClient(sCommon.GetMetricsReporter(), metrics.Storage), } + t.logger = common.GetDefaultLogger().WithFields(bark.Fields{ + common.TagStor: common.FmtStor(sCommon.GetHostUUID()), + common.TagDplName: common.FmtDplName(sCommon.GetConfig().GetDeploymentName()), + }) + + t.mClient = mm.NewMetadataMetricsMgr(mClient, t.m3Client, t.logger) + return t, []thrift.TChanServer{store.NewTChanBStoreServer(t)} } @@ -250,12 +257,7 @@ func (t *StoreHost) Start(thriftService []thrift.TChanServer) { hostID := t.GetHostUUID() - // Get the deployment name for logger field - deploymentName := t.SCommon.GetConfig().GetDeploymentName() - - t.logger = common.GetDefaultLogger().WithFields(bark.Fields{common.TagStor: common.FmtFrnt(hostID), common.TagDplName: common.FmtDplName(deploymentName)}) - - t.hostIDHeartbeater = common.NewHostIDHeartbeater(t.mClient, t.GetHostUUID(), t.GetHostPort(), t.GetHostName(), t.logger) + t.hostIDHeartbeater = common.NewHostIDHeartbeater(t.mClient, hostID, t.GetHostPort(), t.GetHostName(), t.logger) t.hostIDHeartbeater.Start() // setup the store manager @@ -291,7 +293,7 @@ func (t *StoreHost) Start(thriftService []thrift.TChanServer) { t.replMgr = NewReplicationManager(t.xMgr, t.m3Client, t.mClient, t.logger, hostID, t.GetWSConnector()) t.replicationJobRunner = NewReplicationJobRunner(t.mClient, t, t.logger) - t.replicationJobRunner.Start() + go t.replicationJobRunner.Start() loadReporterDaemonfactory := t.GetLoadReporterDaemonFactory() t.xMgr.loadReporterFactory = loadReporterDaemonfactory diff --git a/test/integration/base.go b/test/integration/base.go index 80e2908d..ac8decfb 100644 --- a/test/integration/base.go +++ b/test/integration/base.go @@ -173,7 +173,7 @@ func (tb *testBase) setupSuiteImpl(t *testing.T) { err = metadata.CreateKeyspace(tb.mClient.GetSession(), tb.keyspace, 1, true) tb.NoError(err) - err = metadata.LoadSchema("/usr/local/bin/cqlsh", "../../clients/metadata/schema/metadata_test.cql", tb.keyspace) + err = metadata.LoadSchema("/usr/local/bin/cqlsh", "../../clients/metadata/schema/metadata.cql", tb.keyspace) tb.NoError(err) // Adjust the controller and storehost scan intervals diff --git a/tools/admin/lib.go b/tools/admin/lib.go index 6078ab78..f4278700 100644 --- a/tools/admin/lib.go +++ b/tools/admin/lib.go @@ -606,7 +606,7 @@ type destAllJSONOutputFields struct { TotalExts int `json:"total_ext"` OpenExts int `json:"open"` SealedExts int `json:"sealed"` - ConsumedExts int `json:"consumerd"` + ConsumedExts int `json:"consumed"` DeletedExts int `json:"Deleted"` ConsumedMessagesRetention int32 `json:"consumed_messages_retention"` UnconsumedMessagesRetention int32 `json:"unconsumed_messages_retention"` diff --git a/tools/common/lib.go b/tools/common/lib.go index 3f9b0b40..58acd911 100644 --- a/tools/common/lib.go +++ b/tools/common/lib.go @@ -102,10 +102,14 @@ func ExitIfError(err error) { func newGlobalOptionsFromCLIContext(c *cli.Context) *GlobalOptions { host, port, err := common.SplitHostPort(c.GlobalString("hostport")) ExitIfError(err) + environment := c.GlobalString("env") + if strings.HasPrefix(environment, `prod`) { + environment = `` + } return &GlobalOptions{ hyperbahn: c.GlobalBool("hyperbahn"), hyperbahnBootstrapFile: c.GlobalString("hyperbahn_bootstrap_file"), - env: c.GlobalString("env"), + env: environment, frontendHost: host, frontendPort: port, timeoutSecs: c.GlobalInt("timeout"), @@ -697,7 +701,7 @@ type destJSONOutputFields struct { TotalExts int `json:"total_ext"` OpenExts int `json:"open"` SealedExts int `json:"sealed"` - ConsumedExts int `json:"consumerd"` + ConsumedExts int `json:"consumed"` DeletedExts int `json:"Deleted"` ConsumedMessagesRetention int32 `json:"consumed_messages_retention"` UnconsumedMessagesRetention int32 `json:"unconsumed_messages_retention"`