Skip to content

Commit f664037

Browse files
committed
Handle DROP DATABASE getting interrupted
Until now, when DROP DATABASE got interrupted in the wrong moment, the removal of the pg_database row would also roll back, even though some irreversible steps have already been taken. E.g. DropDatabaseBuffers() might have thrown out dirty buffers, or files could have been unlinked. But we continued to allow connections to such a corrupted database. To fix this, mark databases invalid with an in-place update, just before starting to perform irreversible steps. As we can't add a new column in the back branches, we use pg_database.datconnlimit = -2 for this purpose. An invalid database cannot be connected to anymore, but can still be dropped. Unfortunately we can't easily add output to psql's \l to indicate that some database is invalid, it doesn't fit in any of the existing columns. Add tests verifying that a interrupted DROP DATABASE is handled correctly in the backend and in various tools. Reported-by: Evgeny Morozov <postgresql3@realityexists.net> Author: Andres Freund <andres@anarazel.de> Reviewed-by: Daniel Gustafsson <daniel@yesql.se> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/20230509004637.cgvmfwrbht7xm7p6@awork3.anarazel.de Discussion: https://postgr.es/m/20230314174521.74jl6ffqsee5mtug@awork3.anarazel.de Backpatch: 11-, bug present in all supported versions
1 parent 82e97b8 commit f664037

File tree

19 files changed

+444
-28
lines changed

19 files changed

+444
-28
lines changed

doc/src/sgml/catalogs.sgml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3003,7 +3003,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
30033003
</para>
30043004
<para>
30053005
Sets maximum number of concurrent connections that can be made
3006-
to this database. -1 means no limit.
3006+
to this database. -1 means no limit, -2 indicates the database is
3007+
invalid.
30073008
</para></entry>
30083009
</row>
30093010

src/backend/commands/dbcommands.c

Lines changed: 90 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
715715
int encoding = -1;
716716
bool dbistemplate = false;
717717
bool dballowconnections = true;
718-
int dbconnlimit = -1;
718+
int dbconnlimit = DATCONNLIMIT_UNLIMITED;
719719
char *dbcollversion = NULL;
720720
int notherbackends;
721721
int npreparedxacts;
@@ -914,7 +914,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
914914
if (dconnlimit && dconnlimit->arg)
915915
{
916916
dbconnlimit = defGetInt32(dconnlimit);
917-
if (dbconnlimit < -1)
917+
if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
918918
ereport(ERROR,
919919
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
920920
errmsg("invalid connection limit: %d", dbconnlimit)));
@@ -965,6 +965,16 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
965965
errmsg("template database \"%s\" does not exist",
966966
dbtemplate)));
967967

968+
/*
969+
* If the source database was in the process of being dropped, we can't
970+
* use it as a template.
971+
*/
972+
if (database_is_invalid_oid(src_dboid))
973+
ereport(ERROR,
974+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
975+
errmsg("cannot use invalid database \"%s\" as template", dbtemplate),
976+
errhint("Use DROP DATABASE to drop invalid databases."));
977+
968978
/*
969979
* Permission check: to copy a DB that's not marked datistemplate, you
970980
* must be superuser or the owner thereof.
@@ -1513,6 +1523,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
15131523
bool db_istemplate;
15141524
Relation pgdbrel;
15151525
HeapTuple tup;
1526+
Form_pg_database datform;
15161527
int notherbackends;
15171528
int npreparedxacts;
15181529
int nslots,
@@ -1628,17 +1639,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
16281639
dbname),
16291640
errdetail_busy_db(notherbackends, npreparedxacts)));
16301641

1631-
/*
1632-
* Remove the database's tuple from pg_database.
1633-
*/
1634-
tup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(db_id));
1635-
if (!HeapTupleIsValid(tup))
1636-
elog(ERROR, "cache lookup failed for database %u", db_id);
1637-
1638-
CatalogTupleDelete(pgdbrel, &tup->t_self);
1639-
1640-
ReleaseSysCache(tup);
1641-
16421642
/*
16431643
* Delete any comments or security labels associated with the database.
16441644
*/
@@ -1655,6 +1655,37 @@ dropdb(const char *dbname, bool missing_ok, bool force)
16551655
*/
16561656
dropDatabaseDependencies(db_id);
16571657

1658+
/*
1659+
* Tell the cumulative stats system to forget it immediately, too.
1660+
*/
1661+
pgstat_drop_database(db_id);
1662+
1663+
tup = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(db_id));
1664+
if (!HeapTupleIsValid(tup))
1665+
elog(ERROR, "cache lookup failed for database %u", db_id);
1666+
datform = (Form_pg_database) GETSTRUCT(tup);
1667+
1668+
/*
1669+
* Except for the deletion of the catalog row, subsequent actions are not
1670+
* transactional (consider DropDatabaseBuffers() discarding modified
1671+
* buffers). But we might crash or get interrupted below. To prevent
1672+
* accesses to a database with invalid contents, mark the database as
1673+
* invalid using an in-place update.
1674+
*
1675+
* We need to flush the WAL before continuing, to guarantee the
1676+
* modification is durable before performing irreversible filesystem
1677+
* operations.
1678+
*/
1679+
datform->datconnlimit = DATCONNLIMIT_INVALID_DB;
1680+
heap_inplace_update(pgdbrel, tup);
1681+
XLogFlush(XactLastRecEnd);
1682+
1683+
/*
1684+
* Also delete the tuple - transactionally. If this transaction commits,
1685+
* the row will be gone, but if we fail, dropdb() can be invoked again.
1686+
*/
1687+
CatalogTupleDelete(pgdbrel, &tup->t_self);
1688+
16581689
/*
16591690
* Drop db-specific replication slots.
16601691
*/
@@ -1667,11 +1698,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
16671698
*/
16681699
DropDatabaseBuffers(db_id);
16691700

1670-
/*
1671-
* Tell the cumulative stats system to forget it immediately, too.
1672-
*/
1673-
pgstat_drop_database(db_id);
1674-
16751701
/*
16761702
* Tell checkpointer to forget any pending fsync and unlink requests for
16771703
* files in the database; else the fsyncs will fail at next checkpoint, or
@@ -2188,7 +2214,7 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
21882214
ListCell *option;
21892215
bool dbistemplate = false;
21902216
bool dballowconnections = true;
2191-
int dbconnlimit = -1;
2217+
int dbconnlimit = DATCONNLIMIT_UNLIMITED;
21922218
DefElem *distemplate = NULL;
21932219
DefElem *dallowconnections = NULL;
21942220
DefElem *dconnlimit = NULL;
@@ -2259,7 +2285,7 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
22592285
if (dconnlimit && dconnlimit->arg)
22602286
{
22612287
dbconnlimit = defGetInt32(dconnlimit);
2262-
if (dbconnlimit < -1)
2288+
if (dbconnlimit < DATCONNLIMIT_UNLIMITED)
22632289
ereport(ERROR,
22642290
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
22652291
errmsg("invalid connection limit: %d", dbconnlimit)));
@@ -2286,6 +2312,14 @@ AlterDatabase(ParseState *pstate, AlterDatabaseStmt *stmt, bool isTopLevel)
22862312
datform = (Form_pg_database) GETSTRUCT(tuple);
22872313
dboid = datform->oid;
22882314

2315+
if (database_is_invalid_form(datform))
2316+
{
2317+
ereport(FATAL,
2318+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2319+
errmsg("cannot alter invalid database \"%s\"", stmt->dbname),
2320+
errhint("Use DROP DATABASE to drop invalid databases."));
2321+
}
2322+
22892323
if (!pg_database_ownercheck(dboid, GetUserId()))
22902324
aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
22912325
stmt->dbname);
@@ -3007,6 +3041,42 @@ get_database_name(Oid dbid)
30073041
return result;
30083042
}
30093043

3044+
3045+
/*
3046+
* While dropping a database the pg_database row is marked invalid, but the
3047+
* catalog contents still exist. Connections to such a database are not
3048+
* allowed.
3049+
*/
3050+
bool
3051+
database_is_invalid_form(Form_pg_database datform)
3052+
{
3053+
return datform->datconnlimit == DATCONNLIMIT_INVALID_DB;
3054+
}
3055+
3056+
3057+
/*
3058+
* Convenience wrapper around database_is_invalid_form()
3059+
*/
3060+
bool
3061+
database_is_invalid_oid(Oid dboid)
3062+
{
3063+
HeapTuple dbtup;
3064+
Form_pg_database dbform;
3065+
bool invalid;
3066+
3067+
dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(dboid));
3068+
if (!HeapTupleIsValid(dbtup))
3069+
elog(ERROR, "cache lookup failed for database %u", dboid);
3070+
dbform = (Form_pg_database) GETSTRUCT(dbtup);
3071+
3072+
invalid = database_is_invalid_form(dbform);
3073+
3074+
ReleaseSysCache(dbtup);
3075+
3076+
return invalid;
3077+
}
3078+
3079+
30103080
/*
30113081
* recovery_create_dbdir()
30123082
*

src/backend/commands/vacuum.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,6 +1749,20 @@ vac_truncate_clog(TransactionId frozenXID,
17491749
Assert(TransactionIdIsNormal(datfrozenxid));
17501750
Assert(MultiXactIdIsValid(datminmxid));
17511751

1752+
/*
1753+
* If database is in the process of getting dropped, or has been
1754+
* interrupted while doing so, no connections to it are possible
1755+
* anymore. Therefore we don't need to take it into account here.
1756+
* Which is good, because it can't be processed by autovacuum either.
1757+
*/
1758+
if (database_is_invalid_form((Form_pg_database) dbform))
1759+
{
1760+
elog(DEBUG2,
1761+
"skipping invalid database \"%s\" while computing relfrozenxid",
1762+
NameStr(dbform->datname));
1763+
continue;
1764+
}
1765+
17521766
/*
17531767
* If things are working properly, no database should have a
17541768
* datfrozenxid or datminmxid that is "in the future". However, such

src/backend/postmaster/autovacuum.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1915,6 +1915,18 @@ get_database_list(void)
19151915
avw_dbase *avdb;
19161916
MemoryContext oldcxt;
19171917

1918+
/*
1919+
* If database has partially been dropped, we can't, nor need to,
1920+
* vacuum it.
1921+
*/
1922+
if (database_is_invalid_form(pgdatabase))
1923+
{
1924+
elog(DEBUG2,
1925+
"autovacuum: skipping invalid database \"%s\"",
1926+
NameStr(pgdatabase->datname));
1927+
continue;
1928+
}
1929+
19181930
/*
19191931
* Allocate our results in the caller's context, not the
19201932
* transaction's. We do this inside the loop, and restore the original

src/backend/utils/init/postinit.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
10421042
if (!bootstrap)
10431043
{
10441044
HeapTuple tuple;
1045+
Form_pg_database datform;
10451046

10461047
tuple = GetDatabaseTuple(dbname);
10471048
if (!HeapTupleIsValid(tuple) ||
@@ -1051,6 +1052,15 @@ InitPostgres(const char *in_dbname, Oid dboid,
10511052
(errcode(ERRCODE_UNDEFINED_DATABASE),
10521053
errmsg("database \"%s\" does not exist", dbname),
10531054
errdetail("It seems to have just been dropped or renamed.")));
1055+
1056+
datform = (Form_pg_database) GETSTRUCT(tuple);
1057+
if (database_is_invalid_form(datform))
1058+
{
1059+
ereport(FATAL,
1060+
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1061+
errmsg("cannot connect to invalid database \"%s\"", dbname),
1062+
errhint("Use DROP DATABASE to drop invalid databases."));
1063+
}
10541064
}
10551065

10561066
/*

src/bin/pg_amcheck/pg_amcheck.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1590,7 +1590,7 @@ compile_database_list(PGconn *conn, SimplePtrList *databases,
15901590
"FROM pg_catalog.pg_database d "
15911591
"LEFT OUTER JOIN exclude_raw e "
15921592
"ON d.datname ~ e.rgx "
1593-
"\nWHERE d.datallowconn "
1593+
"\nWHERE d.datallowconn AND datconnlimit != -2 "
15941594
"AND e.pattern_id IS NULL"
15951595
"),"
15961596

src/bin/pg_amcheck/t/002_nonesuch.pl

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,40 @@
291291
'many unmatched patterns and one matched pattern under --no-strict-names'
292292
);
293293

294+
295+
#########################################
296+
# Test that an invalid / partially dropped database won't be targeted
297+
298+
$node->safe_psql(
299+
'postgres', q(
300+
CREATE DATABASE regression_invalid;
301+
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
302+
));
303+
304+
$node->command_checks_all(
305+
[
306+
'pg_amcheck', '-d', 'regression_invalid'
307+
],
308+
1,
309+
[qr/^$/],
310+
[
311+
qr/pg_amcheck: error: no connectable databases to check matching "regression_invalid"/,
312+
],
313+
'checking handling of invalid database');
314+
315+
$node->command_checks_all(
316+
[
317+
'pg_amcheck', '-d', 'postgres',
318+
'-t', 'regression_invalid.public.foo',
319+
],
320+
1,
321+
[qr/^$/],
322+
[
323+
qr/pg_amcheck: error: no connectable databases to check matching "regression_invalid.public.foo"/,
324+
],
325+
'checking handling of object in invalid database');
326+
327+
294328
#########################################
295329
# Test checking otherwise existent objects but in databases where they do not exist
296330

src/bin/pg_dump/pg_dumpall.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,7 @@ dropDBs(PGconn *conn)
11781178
res = executeQuery(conn,
11791179
"SELECT datname "
11801180
"FROM pg_database d "
1181-
"WHERE datallowconn "
1181+
"WHERE datallowconn AND datconnlimit != -2 "
11821182
"ORDER BY datname");
11831183

11841184
if (PQntuples(res) > 0)
@@ -1321,7 +1321,7 @@ dumpDatabases(PGconn *conn)
13211321
res = executeQuery(conn,
13221322
"SELECT datname "
13231323
"FROM pg_database d "
1324-
"WHERE datallowconn "
1324+
"WHERE datallowconn AND datconnlimit != -2 "
13251325
"ORDER BY (datname <> 'template1'), datname");
13261326

13271327
if (PQntuples(res) > 0)

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1575,6 +1575,17 @@
15751575
},
15761576
},
15771577

1578+
'CREATE DATABASE regression_invalid...' => {
1579+
create_order => 1,
1580+
create_sql => q(
1581+
CREATE DATABASE regression_invalid;
1582+
UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid'),
1583+
regexp => qr/^CREATE DATABASE regression_invalid/m,
1584+
not_like => {
1585+
pg_dumpall_dbprivs => 1,
1586+
},
1587+
},
1588+
15781589
'CREATE ACCESS METHOD gist2' => {
15791590
create_order => 52,
15801591
create_sql =>
@@ -4028,6 +4039,14 @@
40284039
qr/pg_dump: error: connection to server .* failed: FATAL: database "qqq" does not exist/,
40294040
'connecting to a non-existent database');
40304041
4042+
#########################################
4043+
# Test connecting to an invalid database
4044+
4045+
$node->command_fails_like(
4046+
[ 'pg_dump', '-d', 'regression_invalid' ],
4047+
qr/pg_dump: error: connection to server .* failed: FATAL: cannot connect to invalid database "regression_invalid"/,
4048+
'connecting to an invalid database');
4049+
40314050
#########################################
40324051
# Test connecting with an unprivileged user
40334052

0 commit comments

Comments
 (0)