Skip to content

Commit

Permalink
fix tablegc flaky test (#10230)
Browse files Browse the repository at this point in the history
* fix tablegc flaky test

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* Pseudo GC state: TableDroppedGCState

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* allow next transition and any transition that follows; validate rowcount where table exists, validate drop table

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* perform validation

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* simplify

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* remove redundant check

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* more debug info

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* reduce intervals

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* do not waste time waiting

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* parameterize all time values

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* give the table more chance to get purged

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>

* flakiness: remove --heartbeat_on_demand_duration

Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
  • Loading branch information
shlomi-noach committed May 9, 2022
1 parent 87f986e commit 4c80ca9
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 107 deletions.
211 changes: 104 additions & 107 deletions go/test/endtoend/tabletmanager/tablegc/tablegc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
package tablegc

import (
"context"
"flag"
"os"
"testing"
Expand Down Expand Up @@ -67,6 +68,11 @@ var (
}
}
}`

tableTransitionExpiration = 10 * time.Second
gcCheckInterval = 2 * time.Second
gcPurgeCheckInterval = 2 * time.Second
waitForTransitionTimeout = 30 * time.Second
)

func TestMain(m *testing.M) {
Expand All @@ -90,8 +96,8 @@ func TestMain(m *testing.M) {
"--enable_replication_reporter",
"--heartbeat_enable",
"--heartbeat_interval", "250ms",
"--gc_check_interval", "5s",
"--gc_purge_check_interval", "5s",
"--gc_check_interval", gcCheckInterval.String(),
"--gc_purge_check_interval", gcPurgeCheckInterval.String(),
"--table_gc_lifecycle", "hold,purge,evac,drop",
}
// We do not need semiSync for this test case.
Expand Down Expand Up @@ -160,12 +166,73 @@ func tableExists(tableExpr string) (exists bool, tableName string, err error) {
return true, row.AsString("Name", ""), nil
}

// tableExists sees that a given table exists in MySQL
func dropTable(tableName string) (err error) {
func validateTableDoesNotExist(t *testing.T, tableExpr string) {
ctx, cancel := context.WithTimeout(context.Background(), waitForTransitionTimeout)
defer cancel()

ticker := time.NewTicker(time.Second)
var foundTableName string
var exists bool
var err error
for {
select {
case <-ticker.C:
exists, foundTableName, err = tableExists(tableExpr)
require.NoError(t, err)
if !exists {
return
}
case <-ctx.Done():
assert.NoError(t, ctx.Err(), "validateTableDoesNotExist timed out, table %v still exists (%v)", tableExpr, foundTableName)
return
}
}
}

func validateAnyState(t *testing.T, expectNumRows int64, states ...schema.TableGCState) {
for _, state := range states {
expectTableToExist := true
searchExpr := ""
switch state {
case schema.HoldTableGCState:
searchExpr = `\_vt\_HOLD\_%`
case schema.PurgeTableGCState:
searchExpr = `\_vt\_PURGE\_%`
case schema.EvacTableGCState:
searchExpr = `\_vt\_EVAC\_%`
case schema.DropTableGCState:
searchExpr = `\_vt\_DROP\_%`
case schema.TableDroppedGCState:
searchExpr = `\_vt\_%`
expectTableToExist = false
default:
t.Log("Unknown state")
t.Fail()
}
exists, tableName, err := tableExists(searchExpr)
require.NoError(t, err)

if exists {
if expectNumRows >= 0 {
checkTableRows(t, tableName, expectNumRows)
}
// Now that the table is validated, we can drop it
dropTable(t, tableName)
}
if exists == expectTableToExist {
// condition met
return
}
}
assert.Fail(t, "could not match any of the states: %v", states)
}

// dropTable drops a table
func dropTable(t *testing.T, tableName string) {
query := `drop table if exists %a`
parsed := sqlparser.BuildParsedQuery(query, tableName)
_, err = primaryTablet.VttabletProcess.QueryTablet(parsed.Query, keyspaceName, true)
return err
_, err := primaryTablet.VttabletProcess.QueryTablet(parsed.Query, keyspaceName, true)
require.NoError(t, err)
}

func TestPopulateTable(t *testing.T) {
Expand All @@ -175,33 +242,25 @@ func TestPopulateTable(t *testing.T) {
assert.NoError(t, err)
assert.True(t, exists)
}
{
exists, _, err := tableExists("no_such_table")
assert.NoError(t, err)
assert.False(t, exists)
}
validateTableDoesNotExist(t, "no_such_table")
}

func TestHold(t *testing.T) {
populateTable(t)
query, tableName, err := schema.GenerateRenameStatement("t1", schema.HoldTableGCState, time.Now().UTC().Add(10*time.Second))
query, tableName, err := schema.GenerateRenameStatement("t1", schema.HoldTableGCState, time.Now().UTC().Add(tableTransitionExpiration))
assert.NoError(t, err)

_, err = primaryTablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
assert.NoError(t, err)

{
exists, _, err := tableExists("t1")
assert.NoError(t, err)
assert.False(t, exists)
}
validateTableDoesNotExist(t, "t1")
{
exists, _, err := tableExists(tableName)
assert.NoError(t, err)
assert.True(t, exists)
}

time.Sleep(5 * time.Second)
time.Sleep(tableTransitionExpiration / 2)
{
// Table was created with +10s timestamp, so it should still exist
exists, _, err := tableExists(tableName)
Expand All @@ -211,43 +270,30 @@ func TestHold(t *testing.T) {
checkTableRows(t, tableName, 1024)
}

time.Sleep(10 * time.Second)
time.Sleep(tableTransitionExpiration)
{
// We're now both beyond table's timestamp as well as a tableGC interval
exists, _, err := tableExists(tableName)
assert.NoError(t, err)
assert.False(t, exists)
}
{
// Table should be renamed as _vt_PURGE_...
exists, purgeTableName, err := tableExists(`\_vt\_PURGE\_%`)
assert.NoError(t, err)
assert.True(t, exists)
err = dropTable(purgeTableName)
assert.NoError(t, err)
validateTableDoesNotExist(t, tableName)
}
validateAnyState(t, -1, schema.PurgeTableGCState, schema.EvacTableGCState, schema.DropTableGCState, schema.TableDroppedGCState)
}

func TestEvac(t *testing.T) {
populateTable(t)
query, tableName, err := schema.GenerateRenameStatement("t1", schema.EvacTableGCState, time.Now().UTC().Add(10*time.Second))
query, tableName, err := schema.GenerateRenameStatement("t1", schema.EvacTableGCState, time.Now().UTC().Add(tableTransitionExpiration))
assert.NoError(t, err)

_, err = primaryTablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
assert.NoError(t, err)

{
exists, _, err := tableExists("t1")
assert.NoError(t, err)
assert.False(t, exists)
}
validateTableDoesNotExist(t, "t1")
{
exists, _, err := tableExists(tableName)
assert.NoError(t, err)
assert.True(t, exists)
}

time.Sleep(5 * time.Second)
time.Sleep(tableTransitionExpiration / 2)
{
// Table was created with +10s timestamp, so it should still exist
exists, _, err := tableExists(tableName)
Expand All @@ -257,48 +303,32 @@ func TestEvac(t *testing.T) {
checkTableRows(t, tableName, 1024)
}

time.Sleep(10 * time.Second)
{
// We're now both beyond table's timestamp as well as a tableGC interval
exists, _, err := tableExists(tableName)
assert.NoError(t, err)
assert.False(t, exists)
}
time.Sleep(5 * time.Second)
{
// Table should be renamed as _vt_DROP_... and then dropped!
exists, _, err := tableExists(`\_vt\_DROP\_%`)
assert.NoError(t, err)
assert.False(t, exists)
}
time.Sleep(tableTransitionExpiration)
// We're now both beyond table's timestamp as well as a tableGC interval
validateTableDoesNotExist(t, tableName)
// Table should be renamed as _vt_DROP_... and then dropped!
validateAnyState(t, 0, schema.DropTableGCState, schema.TableDroppedGCState)
}

func TestDrop(t *testing.T) {
populateTable(t)
query, tableName, err := schema.GenerateRenameStatement("t1", schema.DropTableGCState, time.Now().UTC().Add(10*time.Second))
query, tableName, err := schema.GenerateRenameStatement("t1", schema.DropTableGCState, time.Now().UTC().Add(tableTransitionExpiration))
assert.NoError(t, err)

_, err = primaryTablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
assert.NoError(t, err)

{
exists, _, err := tableExists("t1")
assert.NoError(t, err)
assert.False(t, exists)
}
validateTableDoesNotExist(t, "t1")

time.Sleep(20 * time.Second) // 10s for timestamp to pass, then 10s for checkTables and drop of table
{
// We're now both beyond table's timestamp as well as a tableGC interval
exists, _, err := tableExists(tableName)
assert.NoError(t, err)
assert.False(t, exists)
}
time.Sleep(tableTransitionExpiration)
time.Sleep(2 * gcCheckInterval)
// We're now both beyond table's timestamp as well as a tableGC interval
validateTableDoesNotExist(t, tableName)
}

func TestPurge(t *testing.T) {
populateTable(t)
query, tableName, err := schema.GenerateRenameStatement("t1", schema.PurgeTableGCState, time.Now().UTC().Add(10*time.Second))
query, tableName, err := schema.GenerateRenameStatement("t1", schema.PurgeTableGCState, time.Now().UTC().Add(tableTransitionExpiration))
require.NoError(t, err)

_, err = primaryTablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
Expand All @@ -315,7 +345,7 @@ func TestPurge(t *testing.T) {
require.True(t, exists)
}

time.Sleep(5 * time.Second)
time.Sleep(tableTransitionExpiration / 2)
{
// Table was created with +10s timestamp, so it should still exist
exists, _, err := tableExists(tableName)
Expand All @@ -325,27 +355,14 @@ func TestPurge(t *testing.T) {
checkTableRows(t, tableName, 1024)
}

time.Sleep(15 * time.Second) // purgeReentranceInterval
{
// We're now both beyond table's timestamp as well as a tableGC interval
exists, _, err := tableExists(tableName)
require.NoError(t, err)
require.False(t, exists)
}
{
// Table should be renamed as _vt_EVAC_...
exists, evacTableName, err := tableExists(`\_vt\_EVAC\_%`)
require.NoError(t, err)
require.True(t, exists)
checkTableRows(t, evacTableName, 0)
err = dropTable(evacTableName)
require.NoError(t, err)
}
time.Sleep(5 * gcPurgeCheckInterval) // wwait for table to be purged
time.Sleep(2 * gcCheckInterval) // wait for GC state transition
validateAnyState(t, 0, schema.EvacTableGCState, schema.DropTableGCState, schema.TableDroppedGCState)
}

func TestPurgeView(t *testing.T) {
populateTable(t)
query, tableName, err := schema.GenerateRenameStatement("v1", schema.PurgeTableGCState, time.Now().UTC().Add(10*time.Second))
query, tableName, err := schema.GenerateRenameStatement("v1", schema.PurgeTableGCState, time.Now().UTC().Add(tableTransitionExpiration))
require.NoError(t, err)

_, err = primaryTablet.VttabletProcess.QueryTablet(query, keyspaceName, true)
Expand All @@ -368,7 +385,7 @@ func TestPurgeView(t *testing.T) {
require.True(t, exists)
}

time.Sleep(5 * time.Second)
time.Sleep(tableTransitionExpiration / 2)
{
// View was created with +10s timestamp, so it should still exist
exists, _, err := tableExists(tableName)
Expand All @@ -379,7 +396,8 @@ func TestPurgeView(t *testing.T) {
checkTableRows(t, tableName, 1024)
}

time.Sleep(15 * time.Second) // purgeReentranceInterval
time.Sleep(2 * gcPurgeCheckInterval) // wwait for table to be purged
time.Sleep(2 * gcCheckInterval) // wait for GC state transition
{
// We're now both beyond view's timestamp as well as a tableGC interval
exists, _, err := tableExists(tableName)
Expand All @@ -392,26 +410,5 @@ func TestPurgeView(t *testing.T) {
require.NoError(t, err)
require.True(t, exists)
}
{
// View should be renamed as _vt_EVAC_ or _vt_DROP: views only spend a fraction of a second in "EVAC"
// because evacuation is irrelevant to views. They are immediately renamed to DROP.
// Because there might be a race condition, we allow both cases
evacTableExists, evacTableName, err := tableExists(`\_vt\_EVAC\_%`)
require.NoError(t, err)

dropTableExists, dropTableName, err := tableExists(`\_vt\_DROP\_%`)
require.NoError(t, err)

require.True(t, evacTableExists || dropTableExists)
switch {
case evacTableExists:
checkTableRows(t, evacTableName, 1024) // the renamed view still points to t1's data
err = dropTable(evacTableName)
require.NoError(t, err)
case dropTableExists:
checkTableRows(t, dropTableName, 1024) // the renamed view still points to t1's data
err = dropTable(dropTableName)
require.NoError(t, err)
}
}
validateAnyState(t, 1024, schema.EvacTableGCState, schema.DropTableGCState, schema.TableDroppedGCState)
}
2 changes: 2 additions & 0 deletions go/vt/schema/tablegc.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ const (
EvacTableGCState TableGCState = "EVAC"
// DropTableGCState is the state where the table is to be dropped. Probably ASAP
DropTableGCState TableGCState = "DROP"
// TableDroppedGCState is a pseudo state; a hint that the table does not exists anymore
TableDroppedGCState TableGCState = ""
)

var (
Expand Down

0 comments on commit 4c80ca9

Please sign in to comment.