Skip to content

Commit

Permalink
Propagate statistics with optional name
Browse files Browse the repository at this point in the history
  • Loading branch information
naisila committed Aug 17, 2023
1 parent 0a35bb2 commit 64c7c90
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 0 deletions.
145 changes: 145 additions & 0 deletions src/backend/distributed/commands/statistics.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "catalog/pg_namespace.h"
#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_type.h"
#include "commands/defrem.h"
#include "distributed/commands/utility_hook.h"
#include "distributed/commands.h"
#include "distributed/deparse_shard_query.h"
Expand All @@ -42,6 +43,7 @@
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/regproc.h"
#include "utils/relcache.h"
#include "utils/ruleutils.h"
#include "utils/syscache.h"
Expand All @@ -56,6 +58,11 @@ static char * GenerateAlterIndexColumnSetStatsCommand(char *indexNameWithSchema,
static Oid GetRelIdByStatsOid(Oid statsOid);
static char * CreateAlterCommandIfOwnerNotDefault(Oid statsOid);
static char * CreateAlterCommandIfTargetNotDefault(Oid statsOid);
#if PG_VERSION_NUM >= PG_VERSION_16
static char * ChooseExtendedStatisticName(const char *name1, const char *name2,
const char *label, Oid namespaceid);
static char * ChooseExtendedStatisticNameAddition(List *exprs);
#endif

/*
* PreprocessCreateStatisticsStmt is called during the planning phase for
Expand All @@ -77,6 +84,22 @@ PreprocessCreateStatisticsStmt(Node *node, const char *queryString,

EnsureCoordinator();

#if PG_VERSION_NUM >= PG_VERSION_16
if (!(stmt->defnames))
{
Relation rel = relation_openrv((RangeVar *) relation, ShareUpdateExclusiveLock);

Oid namespaceId = RelationGetNamespace(rel);
char *namestr = ChooseExtendedStatisticName(RelationGetRelationName(rel),
ChooseExtendedStatisticNameAddition(
stmt->exprs),
"stat",
namespaceId);
stmt->defnames = stringToQualifiedNameList(namestr, NULL);
relation_close(rel, ShareUpdateExclusiveLock);
}
#endif

QualifyTreeNode((Node *) stmt);

Oid statsOid = get_statistics_object_oid(stmt->defnames, true);
Expand Down Expand Up @@ -780,3 +803,125 @@ CreateAlterCommandIfTargetNotDefault(Oid statsOid)

return DeparseAlterStatisticsStmt((Node *) alterStatsStmt);
}


#if PG_VERSION_NUM >= PG_VERSION_16

/*
* ChooseExtendedStatisticName - copied from PG source
*
* Select a nonconflicting name for a new statistics object.
*
* name1, name2, and label are used the same way as for makeObjectName(),
* except that the label can't be NULL; digits will be appended to the label
* if needed to create a name that is unique within the specified namespace.
*
* Returns a palloc'd string.
*
* Note: it is theoretically possible to get a collision anyway, if someone
* else chooses the same name concurrently. This is fairly unlikely to be
* a problem in practice, especially if one is holding a share update
* exclusive lock on the relation identified by name1. However, if choosing
* multiple names within a single command, you'd better create the new object
* and do CommandCounterIncrement before choosing the next one!
*/
static char *
ChooseExtendedStatisticName(const char *name1, const char *name2,
const char *label, Oid namespaceid)
{
int pass = 0;
char *stxname = NULL;
char modlabel[NAMEDATALEN];

/* try the unmodified label first */
strlcpy(modlabel, label, sizeof(modlabel));

for (;;)
{
Oid existingstats;

stxname = makeObjectName(name1, name2, modlabel);

existingstats = GetSysCacheOid2(STATEXTNAMENSP, Anum_pg_statistic_ext_oid,
PointerGetDatum(stxname),
ObjectIdGetDatum(namespaceid));
if (!OidIsValid(existingstats))
{
break;
}

/* found a conflict, so try a new name component */
pfree(stxname);
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
}

return stxname;
}


/*
* ChooseExtendedStatisticNameAddition - copied from PG source
*
* Generate "name2" for a new statistics object given the list of column
* names for it. This will be passed to ChooseExtendedStatisticName along
* with the parent table name and a suitable label.
*
* We know that less than NAMEDATALEN characters will actually be used,
* so we can truncate the result once we've generated that many.
*
* XXX see also ChooseForeignKeyConstraintNameAddition and
* ChooseIndexNameAddition.
*/
static char *
ChooseExtendedStatisticNameAddition(List *exprs)
{
char buf[NAMEDATALEN * 2];
int buflen = 0;
ListCell *lc;

buf[0] = '\0';
foreach(lc, exprs)
{
StatsElem *selem = (StatsElem *) lfirst(lc);
const char *name;

/* It should be one of these, but just skip if it happens not to be */
if (!IsA(selem, StatsElem))
{
continue;

Check warning on line 891 in src/backend/distributed/commands/statistics.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/commands/statistics.c#L891

Added line #L891 was not covered by tests
}

name = selem->name;

if (buflen > 0)
{
buf[buflen++] = '_'; /* insert _ between names */
}

/*
* We use fixed 'expr' for expressions, which have empty column names.
* For indexes this is handled in ChooseIndexColumnNames, but we have
* no such function for stats and it does not seem worth adding. If a
* better name is needed, the user can specify it explicitly.
*/
if (!name)
{
name = "expr";

Check warning on line 909 in src/backend/distributed/commands/statistics.c

View check run for this annotation

Codecov / codecov/patch

src/backend/distributed/commands/statistics.c#L909

Added line #L909 was not covered by tests
}

/*
* At this point we have buflen <= NAMEDATALEN. name should be less
* than NAMEDATALEN already, but use strlcpy for paranoia.
*/
strlcpy(buf + buflen, name, NAMEDATALEN);
buflen += strlen(buf + buflen);
if (buflen >= NAMEDATALEN)
{
break;
}
}
return pstrdup(buf);
}


#endif
63 changes: 63 additions & 0 deletions src/test/regress/expected/pg16.out
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,69 @@ SET citus.log_remote_commands TO OFF;
-- only verifying it works and not printing log
-- remote commands because it can be flaky
VACUUM (ONLY_DATABASE_STATS);
-- New feature supported on Citus - test statistics without a name
-- Relevant PG commit:
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df
CREATE TABLE test_stats (
a int,
b int
);
SELECT create_distributed_table('test_stats', 'a');
create_distributed_table
---------------------------------------------------------------------

(1 row)

CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;
-- test for distributing an already existing statistics
CREATE TABLE test_stats2 (
a int,
b int
);
CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;
ERROR: statistics object "test_stats_a_b_stat" already exists
SELECT create_distributed_table('test_stats2', 'a');
create_distributed_table
---------------------------------------------------------------------

(1 row)

-- test when stats is on a different schema
CREATE SCHEMA sc1;
CREATE TABLE tbl (a int, b int);
SELECT create_distributed_table ('tbl', 'a');
create_distributed_table
---------------------------------------------------------------------

(1 row)

SET search_path TO sc1;
CREATE STATISTICS ON a, b FROM pg16.tbl;
\c - - - :worker_1_port
-- check all statistics have been correctly created at shards
SELECT stxnamespace, stxname
FROM pg_statistic_ext
WHERE stxnamespace IN (
SELECT oid
FROM pg_namespace
WHERE nspname IN ('pg16', 'sc1')
) ORDER BY stxname ASC;
stxnamespace | stxname
---------------------------------------------------------------------
19232 | tbl_a_b_stat
19232 | tbl_a_b_stat_950003
19204 | test_stats_a_b_stat
19204 | test_stats_a_b_stat1
19204 | test_stats_a_b_stat1_950001
19204 | test_stats_a_b_stat2
19204 | test_stats_a_b_stat2_950001
19204 | test_stats_a_b_stat_950001
(8 rows)

\c - - - :master_port
SET search_path TO pg16;
\set VERBOSITY terse
SET client_min_messages TO ERROR;
DROP SCHEMA pg16 CASCADE;
46 changes: 46 additions & 0 deletions src/test/regress/sql/pg16.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,52 @@ SET citus.log_remote_commands TO OFF;
-- remote commands because it can be flaky
VACUUM (ONLY_DATABASE_STATS);

-- New feature supported on Citus - test statistics without a name
-- Relevant PG commit:
-- https://github.com/postgres/postgres/commit/624aa2a13bd02dd584bb0995c883b5b93b2152df

CREATE TABLE test_stats (
a int,
b int
);

SELECT create_distributed_table('test_stats', 'a');

CREATE STATISTICS (dependencies) ON a, b FROM test_stats;
CREATE STATISTICS (ndistinct, dependencies) on a, b from test_stats;
CREATE STATISTICS (ndistinct, dependencies, mcv) on a, b from test_stats;

-- test for distributing an already existing statistics
CREATE TABLE test_stats2 (
a int,
b int
);

CREATE STATISTICS test_stats_a_b_stat (dependencies) ON a, b FROM test_stats2;

SELECT create_distributed_table('test_stats2', 'a');

-- test when stats is on a different schema
CREATE SCHEMA sc1;
CREATE TABLE tbl (a int, b int);
SELECT create_distributed_table ('tbl', 'a');

SET search_path TO sc1;
CREATE STATISTICS ON a, b FROM pg16.tbl;

\c - - - :worker_1_port
-- check all statistics have been correctly created at shards
SELECT stxnamespace, stxname
FROM pg_statistic_ext
WHERE stxnamespace IN (
SELECT oid
FROM pg_namespace
WHERE nspname IN ('pg16', 'sc1')
) ORDER BY stxname ASC;

\c - - - :master_port
SET search_path TO pg16;

\set VERBOSITY terse
SET client_min_messages TO ERROR;
DROP SCHEMA pg16 CASCADE;

0 comments on commit 64c7c90

Please sign in to comment.