Skip to content

Commit 7bb3deb

Browse files
nathan-bossarthari90
authored andcommitted
pg_dump: Retrieve attribute statistics in batches.
yb conflict resolutions: - src/bin/pg_dump/pg_dump.c >@@ -10642,16 +10746,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) <@@ -10062,15 +10166,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg) >+ /* Fetch the next batch of attribute statistics if needed. */ >+ if (rownum >= PQntuples(res)) >+ { >+ PQclear(res); >+ res = fetchAttributeStats(fout); >+ rownum = 0; >+ } <+ /* Fetch the next batch of attribute statistics if needed. */ <+ if (rownum >= PQntuples(res)) <+ { <+ PQclear(res); <+ res = fetchAttributeStats(fout); <+ rownum = 0; <+ } Cause: Missing 4694aed: `relallfrozen` column was added to the query. Resolution: Simple merge of the code. This shows up as a conflict since the previous lines were different. Currently, pg_dump gathers attribute statistics with a query per relation, which can cause pg_dump to take significantly longer, especially when there are many relations. This commit addresses this by teaching pg_dump to gather attribute statistics for 64 relations at a time. Some simple tests showed this was the optimal batch size, but performance may vary depending on the workload. Our lookahead code determines the next batch of relations by searching the TOC sequentially for relevant entries. This approach assumes that we will dump all such entries in TOC order, which unfortunately isn't true for dump formats that use RestoreArchive(). RestoreArchive() does multiple passes through the TOC and selectively dumps certain groups of entries each time. This is particularly problematic for index stats and a subset of matview stats; both are in SECTION_POST_DATA, but matview stats that depend on matview data are dumped in RESTORE_PASS_POST_ACL, while all other stats are dumped in RESTORE_PASS_MAIN. To handle this, this commit moves all statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL, which ensures that we always dump them in TOC order. A convenient side effect of this change is that we can revert a decent chunk of commit a0a4601, but that is left for a follow-up commit. Author: Corey Huinker <corey.huinker@gmail.com> Co-authored-by: Nathan Bossart <nathandbossart@gmail.com> Reviewed-by: Jeff Davis <pgsql@j-davis.com> Discussion: https://postgr.es/m/CADkLM%3Dc%2Br05srPy9w%2B-%2BnbmLEo15dKXYQ03Q_xyK%2BriJerigLQ%40mail.gmail.com (cherry picked from commit 9c02e3a)
1 parent 595db87 commit 7bb3deb

File tree

3 files changed

+142
-38
lines changed

3 files changed

+142
-38
lines changed

src/bin/pg_dump/pg_backup.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,10 @@ typedef int DumpId;
277277
* Function pointer prototypes for assorted callback methods.
278278
*/
279279

280-
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg);
280+
/* forward declaration to avoid including pg_backup_archiver.h here */
281+
typedef struct _tocEntry TocEntry;
282+
283+
typedef char *(*DefnDumperPtr) (Archive *AH, const void *userArg, const TocEntry *te);
281284
typedef int (*DataDumperPtr) (Archive *AH, const void *userArg);
282285

283286
typedef void (*SetupWorkerPtrType) (Archive *AH);

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,7 +2574,7 @@ WriteToc(ArchiveHandle *AH)
25742574
}
25752575
else if (te->defnDumper)
25762576
{
2577-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
2577+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
25782578

25792579
te->defnLen = WriteStr(AH, defn);
25802580
pg_free(defn);
@@ -3195,23 +3195,16 @@ _tocEntryRestorePass(ArchiveHandle *AH, TocEntry *te)
31953195

31963196
/*
31973197
* If statistics data is dependent on materialized view data, it must be
3198-
* deferred to RESTORE_PASS_POST_ACL.
3198+
* deferred to RESTORE_PASS_POST_ACL. Those entries are already marked as
3199+
* SECTION_POST_DATA, and some other stats entries (e.g., index stats)
3200+
* will also be marked as SECTION_POST_DATA. Additionally, our lookahead
3201+
* code in fetchAttributeStats() assumes that we dump all statistics data
3202+
* entries in TOC order. To ensure this assumption holds, we move all
3203+
* statistics data entries in SECTION_POST_DATA to RESTORE_PASS_POST_ACL.
31993204
*/
3200-
if (strcmp(te->desc, "STATISTICS DATA") == 0)
3201-
{
3202-
for (int i = 0; i < te->nDeps; i++)
3203-
{
3204-
DumpId depid = te->dependencies[i];
3205-
3206-
if (depid <= AH->maxDumpId && AH->tocsByDumpId[depid] != NULL)
3207-
{
3208-
TocEntry *otherte = AH->tocsByDumpId[depid];
3209-
3210-
if (strcmp(otherte->desc, "MATERIALIZED VIEW DATA") == 0)
3211-
return RESTORE_PASS_POST_ACL;
3212-
}
3213-
}
3214-
}
3205+
if (strcmp(te->desc, "STATISTICS DATA") == 0 &&
3206+
te->section == SECTION_POST_DATA)
3207+
return RESTORE_PASS_POST_ACL;
32153208

32163209
/* All else can be handled in the main pass. */
32173210
return RESTORE_PASS_MAIN;
@@ -3777,7 +3770,7 @@ _printTocEntry(ArchiveHandle *AH, TocEntry *te, const char *pfx)
37773770
}
37783771
else if (te->defnDumper)
37793772
{
3780-
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg);
3773+
char *defn = te->defnDumper((Archive *) AH, te->defnDumperArg, te);
37813774

37823775
te->defnLen = ahprintf(AH, "%s\n\n", defn);
37833776
pg_free(defn);

src/bin/pg_dump/pg_dump.c

Lines changed: 127 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ static int ncomments = 0;
146146
static SecLabelItem *seclabels = NULL;
147147
static int nseclabels = 0;
148148

149+
/* Maximum number of relations to fetch in a fetchAttributeStats() call. */
150+
#define MAX_ATTR_STATS_RELS 64
151+
149152
/*
150153
* The default number of rows per INSERT when
151154
* --inserts is specified without --rows-per-insert
@@ -9978,6 +9981,77 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
99789981
appendPQExpBuffer(out, "::%s", argtype);
99799982
}
99809983

9984+
/*
9985+
* fetchAttributeStats --
9986+
*
9987+
* Fetch next batch of attribute statistics for dumpRelationStats_dumper().
9988+
*/
9989+
static PGresult *
9990+
fetchAttributeStats(Archive *fout)
9991+
{
9992+
ArchiveHandle *AH = (ArchiveHandle *) fout;
9993+
PQExpBuffer nspnames = createPQExpBuffer();
9994+
PQExpBuffer relnames = createPQExpBuffer();
9995+
int count = 0;
9996+
PGresult *res = NULL;
9997+
static TocEntry *te;
9998+
static bool restarted;
9999+
10000+
/* If we're just starting, set our TOC pointer. */
10001+
if (!te)
10002+
te = AH->toc->next;
10003+
10004+
/*
10005+
* We can't easily avoid a second TOC scan for the tar format because it
10006+
* writes restore.sql separately, which means we must execute the queries
10007+
* twice. This feels risky, but there is no known reason it should
10008+
* generate different output than the first pass. Even if it does, the
10009+
* worst-case scenario is that restore.sql might have different statistics
10010+
* data than the archive.
10011+
*/
10012+
if (!restarted && te == AH->toc && AH->format == archTar)
10013+
{
10014+
te = AH->toc->next;
10015+
restarted = true;
10016+
}
10017+
10018+
/*
10019+
* Scan the TOC for the next set of relevant stats entries. We assume
10020+
* that statistics are dumped in the order they are listed in the TOC.
10021+
* This is perhaps not the sturdiest assumption, so we verify it matches
10022+
* reality in dumpRelationStats_dumper().
10023+
*/
10024+
for (; te != AH->toc && count < MAX_ATTR_STATS_RELS; te = te->next)
10025+
{
10026+
if ((te->reqs & REQ_STATS) != 0 &&
10027+
strcmp(te->desc, "STATISTICS DATA") == 0)
10028+
{
10029+
appendPQExpBuffer(nspnames, "%s%s", count ? "," : "",
10030+
fmtId(te->namespace));
10031+
appendPQExpBuffer(relnames, "%s%s", count ? "," : "",
10032+
fmtId(te->tag));
10033+
count++;
10034+
}
10035+
}
10036+
10037+
/* Execute the query for the next batch of relations. */
10038+
if (count > 0)
10039+
{
10040+
PQExpBuffer query = createPQExpBuffer();
10041+
10042+
appendPQExpBuffer(query, "EXECUTE getAttributeStats("
10043+
"'{%s}'::pg_catalog.name[],"
10044+
"'{%s}'::pg_catalog.name[])",
10045+
nspnames->data, relnames->data);
10046+
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10047+
destroyPQExpBuffer(query);
10048+
}
10049+
10050+
destroyPQExpBuffer(nspnames);
10051+
destroyPQExpBuffer(relnames);
10052+
return res;
10053+
}
10054+
998110055
/*
998210056
* dumpRelationStats_dumper --
998310057
*
@@ -9986,14 +10060,16 @@ appendNamedArgument(PQExpBuffer out, Archive *fout, const char *argname,
998610060
* dumped.
998710061
*/
998810062
static char *
9989-
dumpRelationStats_dumper(Archive *fout, const void *userArg)
10063+
dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te)
999010064
{
999110065
const RelStatsInfo *rsinfo = (RelStatsInfo *) userArg;
9992-
const DumpableObject *dobj = &rsinfo->dobj;
9993-
PGresult *res;
10066+
static PGresult *res;
10067+
static int rownum;
999410068
PQExpBuffer query;
999510069
PQExpBufferData out_data;
999610070
PQExpBuffer out = &out_data;
10071+
int i_schemaname;
10072+
int i_tablename;
999710073
int i_attname;
999810074
int i_inherited;
999910075
int i_null_frac;
@@ -10009,13 +10085,31 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1000910085
int i_range_length_histogram;
1001010086
int i_range_empty_frac;
1001110087
int i_range_bounds_histogram;
10088+
static TocEntry *expected_te;
10089+
10090+
/*
10091+
* fetchAttributeStats() assumes that the statistics are dumped in the
10092+
* order they are listed in the TOC. We verify that here for safety.
10093+
*/
10094+
if (!expected_te)
10095+
expected_te = ((ArchiveHandle *) fout)->toc;
10096+
10097+
expected_te = expected_te->next;
10098+
while ((expected_te->reqs & REQ_STATS) == 0 ||
10099+
strcmp(expected_te->desc, "STATISTICS DATA") != 0)
10100+
expected_te = expected_te->next;
10101+
10102+
if (te != expected_te)
10103+
pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)",
10104+
te->dumpId, te->desc, te->tag,
10105+
expected_te->dumpId, expected_te->desc, expected_te->tag);
1001210106

1001310107
query = createPQExpBuffer();
1001410108
if (!fout->is_prepared[PREPQUERY_GETATTRIBUTESTATS])
1001510109
{
1001610110
appendPQExpBufferStr(query,
10017-
"PREPARE getAttributeStats(pg_catalog.name, pg_catalog.name) AS\n"
10018-
"SELECT s.attname, s.inherited, "
10111+
"PREPARE getAttributeStats(pg_catalog.name[], pg_catalog.name[]) AS\n"
10112+
"SELECT s.schemaname, s.tablename, s.attname, s.inherited, "
1001910113
"s.null_frac, s.avg_width, s.n_distinct, "
1002010114
"s.most_common_vals, s.most_common_freqs, "
1002110115
"s.histogram_bounds, s.correlation, "
@@ -10033,11 +10127,21 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1003310127
"NULL AS range_empty_frac,"
1003410128
"NULL AS range_bounds_histogram ");
1003510129

10130+
/*
10131+
* The results must be in the order of the relations supplied in the
10132+
* parameters to ensure we remain in sync as we walk through the TOC.
10133+
* The redundant filter clause on s.tablename = ANY(...) seems
10134+
* sufficient to convince the planner to use
10135+
* pg_class_relname_nsp_index, which avoids a full scan of pg_stats.
10136+
* This may not work for all versions.
10137+
*/
1003610138
appendPQExpBufferStr(query,
1003710139
"FROM pg_catalog.pg_stats s "
10038-
"WHERE s.schemaname = $1 "
10039-
"AND s.tablename = $2 "
10040-
"ORDER BY s.attname, s.inherited");
10140+
"JOIN unnest($1, $2) WITH ORDINALITY AS u (schemaname, tablename, ord) "
10141+
"ON s.schemaname = u.schemaname "
10142+
"AND s.tablename = u.tablename "
10143+
"WHERE s.tablename = ANY($2) "
10144+
"ORDER BY u.ord, s.attname, s.inherited");
1004110145

1004210146
ExecuteSqlStatement(fout, query->data);
1004310147

@@ -10062,15 +10166,16 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1006210166
appendPQExpBuffer(out, "\t'relallvisible', '%d'::integer\n);\n",
1006310167
rsinfo->relallvisible);
1006410168

10065-
/* fetch attribute stats */
10066-
appendPQExpBufferStr(query, "EXECUTE getAttributeStats(");
10067-
appendStringLiteralAH(query, dobj->namespace->dobj.name, fout);
10068-
appendPQExpBufferStr(query, ", ");
10069-
appendStringLiteralAH(query, dobj->name, fout);
10070-
appendPQExpBufferStr(query, ");");
10071-
10072-
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
10169+
/* Fetch the next batch of attribute statistics if needed. */
10170+
if (rownum >= PQntuples(res))
10171+
{
10172+
PQclear(res);
10173+
res = fetchAttributeStats(fout);
10174+
rownum = 0;
10175+
}
1007310176

10177+
i_schemaname = PQfnumber(res, "schemaname");
10178+
i_tablename = PQfnumber(res, "tablename");
1007410179
i_attname = PQfnumber(res, "attname");
1007510180
i_inherited = PQfnumber(res, "inherited");
1007610181
i_null_frac = PQfnumber(res, "null_frac");
@@ -10088,10 +10193,15 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1008810193
i_range_bounds_histogram = PQfnumber(res, "range_bounds_histogram");
1008910194

1009010195
/* restore attribute stats */
10091-
for (int rownum = 0; rownum < PQntuples(res); rownum++)
10196+
for (; rownum < PQntuples(res); rownum++)
1009210197
{
1009310198
const char *attname;
1009410199

10200+
/* Stop if the next stat row in our cache isn't for this relation. */
10201+
if (strcmp(te->tag, PQgetvalue(res, rownum, i_tablename)) != 0 ||
10202+
strcmp(te->namespace, PQgetvalue(res, rownum, i_schemaname)) != 0)
10203+
break;
10204+
1009510205
appendPQExpBufferStr(out, "SELECT * FROM pg_catalog.pg_restore_attribute_stats(\n");
1009610206
appendPQExpBuffer(out, "\t'version', '%u'::integer,\n",
1009710207
fout->remoteVersion);
@@ -10181,8 +10291,6 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg)
1018110291
appendPQExpBufferStr(out, "\n);\n");
1018210292
}
1018310293

10184-
PQclear(res);
10185-
1018610294
destroyPQExpBuffer(query);
1018710295
return out->data;
1018810296
}

0 commit comments

Comments
 (0)