Skip to content

Commit

Permalink
[#16441] YSQL: Retry CREATE DATABASE when oid collision happens
Browse files Browse the repository at this point in the history
Summary:
OID collision is one general issue for YSQL.
In vanilla PG, OIDs are assigned by a cluster-wide counter.
However, for YSQL, we allocate OIDs in a bit weird way. We allocate OIDs on a per-database level and share the allocated OIDs on tserver for all databases.
In this case, OID collision happens due to the same range of OIDs allocated to and shared by different tservers.

This diff resolves the oid collision issue for YSQL CREATE DATABASE by retrying CREATE DATABASE if oid collision happens.
A more general fix for the OID collision issue will be completed in a future diff.
Jira: DB-5849

Test Plan:
./yb_build.sh --cxx-test pg_libpq-test --gtest_filter PgLibPqTest.RetryCreateDatabasePgOidCollisionFromTservers
Jenkins: urgent

Reviewers: tverona, myang, zdrudi

Reviewed By: myang, zdrudi

Subscribers: ybase, yql, bogdan

Differential Revision: https://phorge.dev.yugabyte.com/D27004
  • Loading branch information
yifanguan committed Jul 20, 2023
1 parent 231659a commit 1c781b7
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 33 deletions.
3 changes: 2 additions & 1 deletion src/postgres/src/backend/bootstrap/bootstrap.c
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,8 @@ BootstrapModeMain(void)
"template1",
InvalidOid,
FirstBootstrapObjectId,
false /* colocated */);
false /* colocated */,
NULL /* retry_on_oid_collision */);
}

/*
Expand Down
27 changes: 21 additions & 6 deletions src/postgres/src/backend/commands/dbcommands.c
Original file line number Diff line number Diff line change
Expand Up @@ -612,10 +612,28 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
*/
pg_database_rel = heap_open(DatabaseRelationId, RowExclusiveLock);

do
/*
* In vanilla PG, OIDs are assigned by a cluster-wide counter.
* For YSQL, we allocate OIDs on a per-database level and share the
* per-database OID range on tserver for all databases. OID collision
* happens due to the same range of OIDs allocated to different tservers.
* OID collision can happen for CREATE DATABASE. If it happens, we want to
* keep retrying CREATE DATABASE using the next available OID.
* This is needed for xcluster.
*/
bool retry_on_oid_collision = false;
do
{
dboid = GetNewOid(pg_database_rel);
} while (check_db_file_conflict(dboid));
do
{
dboid = GetNewOid(pg_database_rel);
} while (check_db_file_conflict(dboid));

retry_on_oid_collision = false;
if (IsYugaByteEnabled())
YBCCreateDatabase(dboid, dbname, src_dboid, InvalidOid, dbcolocated,
&retry_on_oid_collision);
} while (retry_on_oid_collision);

/*
* Insert a new tuple into pg_database. This establishes our ownership of
Expand Down Expand Up @@ -643,9 +661,6 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);

if (IsYugaByteEnabled())
YBCCreateDatabase(dboid, dbname, src_dboid, InvalidOid, dbcolocated);

/*
* We deliberately set datacl to default (NULL), rather than copying it
* from the template database. Copying it would be a bad idea when the
Expand Down
22 changes: 20 additions & 2 deletions src/postgres/src/backend/commands/ybccmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ ColumnSortingOptions(SortByDir dir, SortByNulls nulls, bool* is_desc, bool* is_n
/* Database Functions. */

void
YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated)
YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated,
bool *retry_on_oid_collision)
{
if (YBIsDBCatalogVersionMode())
{
Expand All @@ -120,7 +121,24 @@ YBCCreateDatabase(Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bo
next_oid,
colocated,
&handle));
HandleYBStatus(YBCPgExecCreateDatabase(handle));

YBCStatus createdb_status = YBCPgExecCreateDatabase(handle);
/* If OID collision happends for CREATE DATABASE, then we need to retry CREATE DATABASE. */
if (retry_on_oid_collision)
{
*retry_on_oid_collision = createdb_status &&
YBCStatusPgsqlError(createdb_status) == ERRCODE_DUPLICATE_DATABASE &&
*YBCGetGFlags()->ysql_enable_create_database_oid_collision_retry;

if (*retry_on_oid_collision)
{
YBCFreeStatus(createdb_status);
return;
}
}

HandleYBStatus(createdb_status);

if (YBIsDBCatalogVersionMode())
YbCreateMasterDBCatalogVersionTableEntry(dboid);
}
Expand Down
3 changes: 2 additions & 1 deletion src/postgres/src/include/commands/ybccmds.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
/* Database Functions -------------------------------------------------------------------------- */

extern void YBCCreateDatabase(
Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated);
Oid dboid, const char *dbname, Oid src_dboid, Oid next_oid, bool colocated,
bool *retry_on_oid_collision);

extern void YBCDropDatabase(Oid dboid, const char *dbname);

Expand Down
31 changes: 24 additions & 7 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
#include "yb/common/common_util.h"
#include "yb/common/constants.h"
#include "yb/common/key_encoder.h"
#include "yb/common/pgsql_error.h"
#include "yb/common/pg_catversions.h"
#include "yb/common/ql_type.h"
#include "yb/common/ql_type_util.h"
Expand Down Expand Up @@ -209,6 +210,7 @@
#include "yb/util/trace.h"
#include "yb/util/tsan_util.h"
#include "yb/util/uuid.h"
#include "yb/util/yb_pg_errcodes.h"

#include "yb/yql/pgwrapper/pg_wrapper.h"
#include "yb/yql/redis/redisserver/redis_constants.h"
Expand Down Expand Up @@ -8438,14 +8440,29 @@ Status CatalogManager::CreateNamespace(const CreateNamespaceRequestPB* req,

// Validate the user request.

// Verify that the namespace does not already exist.
ns = FindPtrOrNull(namespace_ids_map_, req->namespace_id()); // Same ID.
if (ns == nullptr) {
// Use the by name namespace map to enforce global uniqueness for both YCQL keyspaces and YSQL
// databases. Although postgres metadata normally enforces db name uniqueness, it fails in
// case of concurrent CREATE DATABASE requests in different sessions.
ns = FindPtrOrNull(namespace_names_mapper_[db_type], req->name());
// Verify that the namespace with same id does not already exist.
ns = FindPtrOrNull(namespace_ids_map_, req->namespace_id());
if (ns != nullptr) {
// If PG OID collision happens. Use the PG error code: YB_PG_DUPLICATE_DATABASE to signal PG
// backend to retry CREATE DATABASE using the next available OID.
// Otherwise, don't set customized error code in the return status.
// This is the default behavior of STATUS().
resp->set_id(ns->id());
auto pg_createdb_oid_collision_errcode = PgsqlError(YBPgErrorCode::YB_PG_DUPLICATE_DATABASE);
return_status = STATUS(AlreadyPresent,
Format("Keyspace with id '$0' already exists", req->namespace_id()),
Slice(),
db_type == YQL_DATABASE_PGSQL
? &pg_createdb_oid_collision_errcode : nullptr);
LOG(WARNING) << "Found keyspace: " << ns->id() << ". Failed creating keyspace with error: "
<< return_status.ToString() << " Request:\n" << req->DebugString();
return SetupError(resp->mutable_error(), MasterErrorPB::NAMESPACE_ALREADY_PRESENT,
return_status);
}
// Use the by name namespace map to enforce global uniqueness for both YCQL keyspaces and YSQL
// databases. Although postgres metadata normally enforces db name uniqueness, it fails in
// case of concurrent CREATE DATABASE requests in different sessions.
ns = FindPtrOrNull(namespace_names_mapper_[db_type], req->name());
if (ns != nullptr) {
resp->set_id(ns->id());
return_status = STATUS_SUBSTITUTE(AlreadyPresent, "Keyspace '$0' already exists",
Expand Down
5 changes: 5 additions & 0 deletions src/yb/yql/pggate/pggate_flags.cc
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,8 @@ DEFINE_UNKNOWN_int32(ysql_num_databases_reserved_in_db_catalog_version_mode, 10,
"fail the create database statement.");
TAG_FLAG(ysql_num_databases_reserved_in_db_catalog_version_mode, advanced);
TAG_FLAG(ysql_num_databases_reserved_in_db_catalog_version_mode, hidden);

DEFINE_NON_RUNTIME_bool(ysql_enable_create_database_oid_collision_retry, true,
"Whether to retry YSQL CREATE DATABASE statement "
"if oid collision happens.");
TAG_FLAG(ysql_enable_create_database_oid_collision_retry, advanced);
1 change: 1 addition & 0 deletions src/yb/yql/pggate/pggate_flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,4 @@ DECLARE_bool(ysql_disable_portal_run_context);
DECLARE_bool(TEST_yb_lwlock_crash_after_acquire_pg_stat_statements_reset);
DECLARE_bool(TEST_yb_test_fail_matview_refresh_after_creation);
DECLARE_bool(ysql_enable_read_request_caching);
DECLARE_bool(ysql_enable_create_database_oid_collision_retry);
1 change: 1 addition & 0 deletions src/yb/yql/pggate/ybc_pg_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ typedef struct PgGFlagsAccessor {
const bool* ysql_enable_profile;
const bool* ysql_disable_global_impact_ddl_statements;
const bool* ysql_minimal_catalog_caches_preload;
const bool* ysql_enable_create_database_oid_collision_retry;
} YBCPgGFlagsAccessor;

typedef struct YbTablePropertiesData {
Expand Down
2 changes: 2 additions & 0 deletions src/yb/yql/pggate/ybc_pggate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,8 @@ const YBCPgGFlagsAccessor* YBCGetGFlags() {
.ysql_disable_global_impact_ddl_statements =
&FLAGS_ysql_disable_global_impact_ddl_statements,
.ysql_minimal_catalog_caches_preload = &FLAGS_ysql_minimal_catalog_caches_preload,
.ysql_enable_create_database_oid_collision_retry =
&FLAGS_ysql_enable_create_database_oid_collision_retry
};
return &accessor;
}
Expand Down
86 changes: 70 additions & 16 deletions src/yb/yql/pgwrapper/pg_libpq-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ class PgLibPqTest : public LibPqTestBase {

Status TestDuplicateCreateTableRequest(PGConn conn);

void KillPostmasterProcessOnTservers();

private:
Result<PGConn> RestartTSAndConnectToPostgres(int ts_idx, const std::string& db_name);
};
Expand Down Expand Up @@ -3027,6 +3029,25 @@ class PgLibPqTestEnumType: public PgLibPqTest {
}
};

void PgLibPqTest::KillPostmasterProcessOnTservers() {
for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) {
ExternalTabletServer* ts = cluster_->tablet_server(i);
const string pg_pid_file = JoinPathSegments(ts->GetRootDir(), "pg_data",
"postmaster.pid");

LOG(INFO) << "pg_pid_file: " << pg_pid_file;
ASSERT_TRUE(Env::Default()->FileExists(pg_pid_file));
std::ifstream pg_pid_in;
pg_pid_in.open(pg_pid_file, std::ios_base::in);
ASSERT_FALSE(pg_pid_in.eof());
pid_t pg_pid = 0;
pg_pid_in >> pg_pid;
ASSERT_GT(pg_pid, 0);
LOG(INFO) << "Killing PostgresSQL process: " << pg_pid;
ASSERT_EQ(kill(pg_pid, SIGKILL), 0);
}
}

// Make sure that enum type backfill works.
TEST_F_EX(PgLibPqTest,
EnumType,
Expand Down Expand Up @@ -3074,22 +3095,7 @@ TEST_F_EX(PgLibPqTest,
// A new PostgreSQL process will be respawned by the tablet server and
// inherit the new --TEST_do_not_add_enum_sort_order flag from the tablet
// server.
for (size_t i = 0; i < cluster_->num_tablet_servers(); ++i) {
ExternalTabletServer* ts = cluster_->tablet_server(i);
const string pg_pid_file = JoinPathSegments(ts->GetRootDir(), "pg_data",
"postmaster.pid");

LOG(INFO) << "pg_pid_file: " << pg_pid_file;
ASSERT_TRUE(Env::Default()->FileExists(pg_pid_file));
std::ifstream pg_pid_in;
pg_pid_in.open(pg_pid_file, std::ios_base::in);
ASSERT_FALSE(pg_pid_in.eof());
pid_t pg_pid = 0;
pg_pid_in >> pg_pid;
ASSERT_GT(pg_pid, 0);
LOG(INFO) << "Killing PostgresSQL process: " << pg_pid;
ASSERT_EQ(kill(pg_pid, SIGKILL), 0);
}
KillPostmasterProcessOnTservers();

// Reconnect to the database after the new PostgreSQL starts.
conn = std::make_unique<PGConn>(ASSERT_RESULT(ConnectToDB(kDatabaseName)));
Expand Down Expand Up @@ -3650,5 +3656,53 @@ TEST_F_EX(
ASSERT_NOK(status_future.get());
}

// This test verifies retry solves CREATE DATABASE OID collision issue.
// Using our current YSQL OID allocation method, we can hit the following OID collision for
// PG CREATE DATABASE.
// Connections used in the example:
// Connection 1 on tserver 0 (Conn1) && Connection 2 on tserver 1 (Conn2)
// Example:
// Conn1: CREATE DATABASE db1; -- db1 OID: 16384; OID range: [16384, 16640) on tserver 0
// -- db2 OID: 16385; db2 is used to trigger the same range of OID allocation on tserver 1
// Conn1: CREATE DATABASE db2;
// Conn1: DROP DATABASE db1;
// Conn2: \c db2
// Conn2: CREATE DATABASE db3; -- db3 OID: 16384; OID range: [16384, 16640) on tserver 1
// ERROR: Keyspace 'db3' already exists
TEST_F(PgLibPqTest, YB_DISABLE_TEST_IN_TSAN(RetryCreateDatabasePgOidCollisionFromTservers)) {
const string db1 = "db1";
const string db2 = "db2";
const string db3 = "db3";

// Tserver 0
auto conn1 = ASSERT_RESULT(Connect());
ASSERT_OK(conn1.ExecuteFormat("CREATE DATABASE $0", db1));
ASSERT_OK(conn1.ExecuteFormat("CREATE DATABASE $0", db2));
ASSERT_OK(conn1.ExecuteFormat("DROP DATABASE $0", db1));

// Tserver 1
LOG(INFO) << "Make a new connection to a different node at index 1";
pg_ts = cluster_->tablet_server(1);
auto conn2 = ASSERT_RESULT(ConnectToDB(db2));
ASSERT_OK(conn2.ExecuteFormat("CREATE DATABASE $0", db3));

// Verify internally retry CREATE DATABASE works.
int db3_oid = ASSERT_RESULT(conn2.FetchValue<int32_t>(
Format("SELECT oid FROM pg_database WHERE datname = \'$0\'", db3)));
ASSERT_EQ(db3_oid, 16386);

// Verify the keyspace already exists issue still exists if we disable retry.
// Connection to db3 on Tserver 2 uses 16384 as the next available oid.
ASSERT_OK(cluster_->SetFlagOnTServers("ysql_enable_create_database_oid_collision_retry",
"false"));
KillPostmasterProcessOnTservers();
pg_ts = cluster_->tablet_server(2);
auto conn3 = ASSERT_RESULT(ConnectToDB(db3));
Status s = conn3.ExecuteFormat("CREATE DATABASE createdb_oid_collision");
ASSERT_NOK(s);
ASSERT_TRUE(s.message().ToBuffer().find("Keyspace with id") != std::string::npos);
ASSERT_TRUE(s.message().ToBuffer().find("already exists") != std::string::npos);
}

} // namespace pgwrapper
} // namespace yb

0 comments on commit 1c781b7

Please sign in to comment.