Skip to content

Commit 595db87

Browse files
nathan-bossarthari90
authored andcommitted
pg_dump: Reduce memory usage of dumps with statistics.
yb conflict resolutions: - src/bin/pg_dump/pg_backup_archiver.c >@@ -3849,7 +3890,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) >- * string if any, but we have three special cases: >+ * string if any, but we have four special cases: <@@ -3698,12 +3739,50 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) >@@ -3862,6 +3903,11 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx) >+ * 4. Entries with a defnDumper need to call it to generate the <+ * Entries with a defnDumper need to call it to generate the Cause: Missing a45c78e: pg_dump changes to better handle large objects Resolution: This improvement does not exist in pg15 so the comment is not relevant. Right now, pg_dump stores all generated commands for statistics in memory. These commands can be quite large and therefore can significantly increase pg_dump's memory footprint. To fix, wait until we are about to write out the commands before generating them, and be sure to free the commands after writing. This is implemented via a new defnDumper callback that works much like the dataDumper one but is specifically designed for TOC entries. Custom dumps that include data might write the TOC twice (to update data offset information), which would ordinarily cause pg_dump to run the attribute statistics queries twice. However, as a hack, we save the length of the written-out entry in the first pass and skip over it in the second. While there is no known technical issue with executing the queries multiple times and rewriting the results, it's expensive and feels risky, so let's avoid it. As an exception, we _do_ execute the queries twice for the tar format. This format does a second pass through the TOC to generate the restore.sql file. pg_restore doesn't use this file, so even if the second round of queries returns different results than the first, it won't corrupt the output; the archive and restore.sql file will just have different content. A follow-up commit will teach pg_dump to gather attribute statistics in batches, which our testing indicates more than makes up for the added expense of running the queries twice. Author: Corey Huinker <corey.huinker@gmail.com> Co-authored-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com (cherry picked from commit 7d5c83b)
1 parent 57f8178 commit 595db87

File tree

4 files changed

+119
-15
lines changed

4 files changed

+119
-15
lines changed

src/bin/pg_dump/pg_backup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ typedef int DumpId;
277277
* Function pointer prototypes for assorted callback methods.
278278
*/
279279

280+
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
280281
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
281282

282283
typedef void (*SetupWorkerPtrType) (Archive *AH);

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,9 @@ ArchiveEntry(Archive *AHX, CatalogId catalogId, DumpId dumpId,
11741174
newToc->dataDumperArg = opts->dumpArg;
11751175
newToc->hadDumper = opts->dumpFn ? true : false;
11761176

1177+
newToc->defnDumper = opts->defnFn;
1178+
newToc->defnDumperArg = opts->defnArg;
1179+
11771180
newToc->formatData = NULL;
11781181
newToc->dataLength = 0;
11791182

@@ -2540,7 +2543,45 @@ WriteToc(ArchiveHandle *AH)
25402543
WriteStr(AH, te->tag);
25412544
WriteStr(AH, te->desc);
25422545
WriteInt(AH, te->section);
2543-
WriteStr(AH, te->defn);
2546+
2547+
if (te->defnLen)
2548+
{
2549+
/*
2550+
* defnLen should only be set for custom format's second call to
2551+
* WriteToc(), which rewrites the TOC in place to update data
2552+
* offsets. Instead of calling the defnDumper a second time
2553+
* (which could involve re-executing queries), just skip writing
2554+
* the entry. While regenerating the definition should
2555+
* theoretically produce the same result as before, it's expensive
2556+
* and feels risky.
2557+
*
2558+
* The custom format only calls WriteToc() a second time if
2559+
* fseeko() is usable (see _CloseArchive() in pg_backup_custom.c),
2560+
* so we can safely use it without checking. For other formats,
2561+
* we fail because one of our assumptions must no longer hold
2562+
* true.
2563+
*
2564+
* XXX This is a layering violation, but the alternative is an
2565+
* awkward and complicated callback infrastructure for this
2566+
* special case. This might be worth revisiting in the future.
2567+
*/
2568+
if (AH->format != archCustom)
2569+
pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s",
2570+
te->dumpId, te->desc, te->tag);
2571+
2572+
if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0))
2573+
pg_fatal("error during file seek: %m");
2574+
}
2575+
else if (te->defnDumper)
2576+
{
2577+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
2578+
2579+
te->defnLen = WriteStr(AH, defn);
2580+
pg_free(defn);
2581+
}
2582+
else
2583+
WriteStr(AH, te->defn);
2584+
25442585
WriteStr(AH, te->dropStmt);
25452586
WriteStr(AH, te->copyStmt);
25462587
WriteStr(AH, te->namespace);
@@ -3697,12 +3738,50 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
36973738
* versions put into CREATE SCHEMA. Don't mutate the variant for schema
36983739
* "public" that is a comment. We have to do this when --no-owner mode is
36993740
* selected. This is ugly, but I see no other good way ...
3741+
*
3742+
* Entries with a defnDumper need to call it to generate the
3743+
* definition. This is primarily intended to provide a way to save memory
3744+
* for objects that would otherwise need a lot of it (e.g., statistics
3745+
* data).
37003746
*/
37013747
if (ropt->noOwner &&
37023748
strcmp(te->desc, "SCHEMA") == 0 && strncmp(te->defn, "--", 2) != 0)
37033749
{
37043750
ahprintf(AH, "CREATE SCHEMA %s;\n\n\n", fmtId(te->tag));
37053751
}
3752+
else if (te->defnLen && AH->format != archTar)
3753+
{
3754+
/*
3755+
* If defnLen is set, the defnDumper has already been called for this
3756+
* TOC entry. We don't normally expect a defnDumper to be called for
3757+
* a TOC entry a second time in _printTocEntry(), but there's an
3758+
* exception. The tar format first calls WriteToc(), which scans the
3759+
* entire TOC, and then it later calls RestoreArchive() to generate
3760+
* restore.sql, which scans the TOC again. There doesn't appear to be
3761+
* a good way to prevent a second defnDumper call in this case without
3762+
* storing the definition in memory, which defeats the purpose. This
3763+
* second defnDumper invocation should generate the same output as the
3764+
* first, but even if it doesn't, the worst-case scenario is that
3765+
* restore.sql might have different statistics data than the archive.
3766+
*
3767+
* In all other cases, encountering a TOC entry a second time in
3768+
* _printTocEntry() is unexpected, so we fail because one of our
3769+
* assumptions must no longer hold true.
3770+
*
3771+
* XXX This is a layering violation, but the alternative is an awkward
3772+
* and complicated callback infrastructure for this special case. This
3773+
* might be worth revisiting in the future.
3774+
*/
3775+
pg_fatal("unexpected TOC entry in _printTocEntry(): %d %s %s",
3776+
te->dumpId, te->desc, te->tag);
3777+
}
3778+
else if (te->defnDumper)
3779+
{
3780+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
3781+
3782+
te->defnLen = ahprintf(AH, "%s\n\n", defn);
3783+
pg_free(defn);
3784+
}
37063785
else
37073786
{
37083787
if (te->defn && strlen(te->defn) > 0)

src/bin/pg_dump/pg_backup_archiver.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,10 @@ struct _tocEntry
388388
const void *dataDumperArg; /* Arg for above routine */
389389
void *formatData; /* TOC Entry data specific to file format */
390390

391+
DefnDumperPtr defnDumper; /* routine to dump definition statement */
392+
const void *defnDumperArg; /* arg for above routine */
393+
size_t defnLen; /* length of dumped definition */
394+
391395
/* working state while dumping/restoring */
392396
pgoff_t dataLength; /* item's data size; 0 if none or unknown */
393397
int reqs; /* do we need schema and/or data of object
@@ -426,6 +430,8 @@ typedef struct _archiveOpts
426430
int nDeps;
427431
DataDumperPtr dumpFn;
428432
const void *dumpArg;
433+
DefnDumperPtr defnFn;
434+
const void *defnArg;
429435
} ArchiveOpts;
430436
#define ARCHIVE_OPTS(...) &(ArchiveOpts){__VA_ARGS__}
431437
/* Called to add a TOC entry */

src/bin/pg_dump/pg_dump.c

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9979,17 +9979,21 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
99799979
}
99809980

99819981
/*
9982-
* dumpRelationStats --
9982+
* dumpRelationStats_dumper --
99839983
*
9984-
* Dump command to import stats into the relation on the new database.
9984+
* Generate command to import stats into the relation on the new database.
9985+
* This routine is called by the Archiver when it wants the statistics to be
9986+
* dumped.
99859987
*/
9986-
static void
9987-
dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
9988+
static char *
9989+
dumpRelationStats_dumper(Archive *fout, const void *userArg)
99889990
{
9991+
const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
99899992
const DumpableObject *dobj = &rsinfo->dobj;
99909993
PGresult *res;
99919994
PQExpBuffer query;
9992-
PQExpBuffer out;
9995+
PQExpBufferData out_data;
9996+
PQExpBuffer out = &out_data;
99939997
int i_attname;
99949998
int i_inherited;
99959999
int i_null_frac;
@@ -10006,10 +10010,6 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
1000610010
int i_range_empty_frac;
1000710011
int i_range_bounds_histogram;
1000810012

10009-
/* nothing to do if we are not dumping statistics */
10010-
if (!fout->dopt->dumpStatistics)
10011-
return;
10012-
1001310013
query = createPQExpBuffer();
1001410014
if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
1001510015
{
@@ -10045,7 +10045,7 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
1004510045
resetPQExpBuffer(query);
1004610046
}
1004710047

10048-
out = createPQExpBuffer();
10048+
initPQExpBuffer(out);
1004910049

1005010050
/* restore relation stats */
1005110051
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_relation_stats(\n");
@@ -10183,17 +10183,35 @@ dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
1018310183

1018410184
PQclear(res);
1018510185

10186+
destroyPQExpBuffer(query);
10187+
return out->data;
10188+
}
10189+
10190+
/*
10191+
* dumpRelationStats --
10192+
*
10193+
* Make an ArchiveEntry for the relation statistics. The Archiver will take
10194+
* care of gathering the statistics and generating the restore commands when
10195+
* they are needed.
10196+
*/
10197+
static void
10198+
dumpRelationStats(Archive *fout, const RelStatsInfo *rsinfo)
10199+
{
10200+
const DumpableObject *dobj = &rsinfo->dobj;
10201+
10202+
/* nothing to do if we are not dumping statistics */
10203+
if (!fout->dopt->dumpStatistics)
10204+
return;
10205+
1018610206
ArchiveEntry(fout, nilCatalogId, createDumpId(),
1018710207
ARCHIVE_OPTS(.tag = dobj->name,
1018810208
.namespace = dobj->namespace->dobj.name,
1018910209
.description = "STATISTICS DATA",
1019010210
.section = rsinfo->section,
10191-
.createStmt = out->data,
10211+
.defnFn = dumpRelationStats_dumper,
10212+
.defnArg = rsinfo,
1019210213
.deps = dobj->dependencies,
1019310214
.nDeps = dobj->nDeps));
10194-
10195-
destroyPQExpBuffer(out);
10196-
destroyPQExpBuffer(query);
1019710215
}
1019810216

1019910217
/*

0 commit comments

Comments
 (0)