Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/periodically scan disk #1861

Merged
merged 27 commits into from
Mar 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c2baf2f
add disk_use config options
parkerduckworth Mar 16, 2022
f86f618
implement disk use scan
parkerduckworth Mar 16, 2022
c1a6697
Merge branch 'gh-1839-read-only-shards' into feature/periodically-sca…
parkerduckworth Mar 16, 2022
cfc7ad1
Merge branch 'gh-1839-read-only-shards' into feature/periodically-sca…
parkerduckworth Mar 16, 2022
0f438a0
Merge branch 'gh-1839-read-only-shards' into feature/periodically-sca…
parkerduckworth Mar 17, 2022
4a6a9fc
add back Shard.initStatus after merge
parkerduckworth Mar 17, 2022
47f5375
Merge branch 'master' into feature/periodically-scan-disk
parkerduckworth Mar 17, 2022
966dd79
go mod tidy
parkerduckworth Mar 17, 2022
804fbef
Merge branch 'master' into feature/periodically-scan-disk
parkerduckworth Mar 17, 2022
c0a411a
add back build tags that travisbot removed
parkerduckworth Mar 17, 2022
469ef68
remove TODOs
parkerduckworth Mar 17, 2022
f49acd2
make disk scan goroutine cancel-able
parkerduckworth Mar 17, 2022
52f7384
only scan disk use if user provides config to do so
parkerduckworth Mar 17, 2022
52ff788
emit disk use warnings with backoff intervals
parkerduckworth Mar 21, 2022
fe08383
resolve merge conflicts
parkerduckworth Mar 21, 2022
b157219
linter
parkerduckworth Mar 21, 2022
13fc6d8
set default disk use vals, streamline warn logs
parkerduckworth Mar 21, 2022
da0fd10
add disk use config to test repo
parkerduckworth Mar 21, 2022
257b132
increase log verbosity to debug
parkerduckworth Mar 21, 2022
077c5f5
increase log verbosity to debug
parkerduckworth Mar 21, 2022
b33717a
increase log verbosity to debug
parkerduckworth Mar 21, 2022
0589be1
increase log verbosity to debug
parkerduckworth Mar 21, 2022
fb3a6e8
increase log verbosity to debug
parkerduckworth Mar 21, 2022
30578ba
add default disk scan config to tests
parkerduckworth Mar 21, 2022
90e5ccf
change log level from fatal to error
parkerduckworth Mar 22, 2022
d9168a3
remove debug logs
parkerduckworth Mar 22, 2022
9af573a
[skip ci] remove go test verbose flag
parkerduckworth Mar 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions adapters/handlers/rest/configure_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,11 @@ func configureAPI(api *operations.WeaviateAPI) http.Handler {
// TODO: configure http transport for efficient intra-cluster comm
remoteIndexClient := clients.NewRemoteIndex(clusterHttpClient)
repo := db.New(appState.Logger, db.Config{
RootPath: appState.ServerConfig.Config.Persistence.DataPath,
QueryLimit: appState.ServerConfig.Config.QueryDefaults.Limit,
QueryMaximumResults: appState.ServerConfig.Config.QueryMaximumResults,
RootPath: appState.ServerConfig.Config.Persistence.DataPath,
QueryLimit: appState.ServerConfig.Config.QueryDefaults.Limit,
QueryMaximumResults: appState.ServerConfig.Config.QueryMaximumResults,
DiskUseWarningPercentage: appState.ServerConfig.Config.DiskUse.WarningPercentage,
DiskUseReadOnlyPercentage: appState.ServerConfig.Config.DiskUse.ReadOnlyPercentage,
}, remoteIndexClient, appState.Cluster) // TODO client
vectorMigrator = db.NewMigrator(repo, appState.Logger)
vectorRepo = repo
Expand Down
15 changes: 13 additions & 2 deletions adapters/repos/db/aggregations_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/semi-technologies/weaviate/entities/filters"
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand All @@ -45,7 +46,12 @@ func Test_Aggregations(t *testing.T) {
shardState := singleShardState()
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: shardState}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down Expand Up @@ -77,7 +83,12 @@ func Test_Aggregations_MultiShard(t *testing.T) {
shardState := fixedMultiShardState()
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: shardState}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/batch_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/objects"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus"
Expand All @@ -49,7 +50,12 @@ func TestBatchPutObjects(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/batch_reference_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/schema/crossref"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/objects"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus"
Expand All @@ -47,7 +48,12 @@ func Test_AddingReferencesInBatches(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/classification_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/classification"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand All @@ -45,7 +46,12 @@ func TestClassifications(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/schema/crossref"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/objects"
"github.com/semi-technologies/weaviate/usecases/sharding"
"github.com/semi-technologies/weaviate/usecases/traverser"
Expand Down Expand Up @@ -505,7 +506,12 @@ func (n *node) init(numberOfNodes int, dirName string, shardStateRaw []byte,
}

client := clients.NewRemoteIndex(&http.Client{})
n.repo = db.New(logger, db.Config{RootPath: localDir, QueryMaximumResults: 10000}, client, nodeResolver)
n.repo = db.New(logger, db.Config{
RootPath: localDir,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, client, nodeResolver)
n.schemaGetter = &fakeSchemaGetter{
shardState: shardState,
schema: schema.Schema{Objects: &models.Schema{}},
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/crud_deletion_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/semi-technologies/weaviate/entities/filters"
"github.com/semi-technologies/weaviate/entities/models"
libschema "github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
Expand All @@ -43,7 +44,12 @@ func TestDeleteJourney(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
15 changes: 13 additions & 2 deletions adapters/repos/db/crud_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/schema/crossref"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/objects"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus/hooks/test"
Expand Down Expand Up @@ -84,7 +85,12 @@ func TestCRUD(t *testing.T) {
},
}
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down Expand Up @@ -903,7 +909,12 @@ func Test_ImportWithoutVector_UpdateWithVectorLater(t *testing.T) {
}()

schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/crud_noindex_property_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -59,7 +60,12 @@ func TestCRUD_NoIndexProp(t *testing.T) {
}},
}
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
13 changes: 11 additions & 2 deletions adapters/repos/db/crud_references_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -122,7 +123,11 @@ func TestNestedReferences(t *testing.T) {
}
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down Expand Up @@ -460,7 +465,11 @@ func Test_AddingReferenceOneByOne(t *testing.T) {
}
logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/semi-technologies/weaviate/entities/additional"
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand All @@ -42,7 +43,11 @@ func TestMultipleCrossRefTypes(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
8 changes: 7 additions & 1 deletion adapters/repos/db/crud_update_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/schema"
libschema "github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/assert"
Expand All @@ -52,7 +53,12 @@ func TestUpdateJourney(t *testing.T) {

logger := logrus.New()
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down
15 changes: 13 additions & 2 deletions adapters/repos/db/delete_filter_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/semi-technologies/weaviate/entities/models"
"github.com/semi-technologies/weaviate/entities/schema"
"github.com/semi-technologies/weaviate/entities/search"
"github.com/semi-technologies/weaviate/usecases/config"
"github.com/semi-technologies/weaviate/usecases/traverser"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -64,7 +65,12 @@ func Test_FilterSearchesOnDeletedDocIDsWithLimits(t *testing.T) {
}},
}
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down Expand Up @@ -178,7 +184,12 @@ func TestLimitOneAfterDeletion(t *testing.T) {
}},
}
schemaGetter := &fakeSchemaGetter{shardState: singleShardState()}
repo := New(logger, Config{RootPath: dirName, QueryMaximumResults: 10000}, &fakeRemoteClient{},
repo := New(logger, Config{
RootPath: dirName,
QueryMaximumResults: 10000,
DiskUseWarningPercentage: config.DefaultDiskUseWarningPercentage,
DiskUseReadOnlyPercentage: config.DefaultDiskUseReadonlyPercentage,
}, &fakeRemoteClient{},
&fakeNodeResolver{})
repo.SetSchemaGetter(schemaGetter)
err := repo.WaitForStartup(testCtx())
Expand Down