-
Notifications
You must be signed in to change notification settings - Fork 666
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve the performance of CitusHasBeenLoaded function for a database that does not do CREATE EXTENSION citus but load citus.so. #7123
Changes from all commits
9aac2ca
0f3de4f
6d7ced3
30d5357
883f9fc
6c229c1
bd290dd
2bdb0d7
2d805e8
a87a3fb
c3aa546
2027e6b
0f991eb
b260b1e
db7c846
bd778d3
54194b8
710faf3
1e79689
05be333
ef95881
7eaffb6
f94ee91
12c828c
6e2120e
4db8c56
f43d530
d3c3fb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -133,14 +133,27 @@ typedef struct ShardIdCacheEntry | |
int shardIndex; | ||
} ShardIdCacheEntry; | ||
|
||
/* | ||
* ExtensionCreatedState is used to track if citus extension has been created | ||
* using CREATE EXTENSION command. | ||
* UNKNOWN : MetadataCache is invalid. State is UNKNOWN. | ||
* CREATED : Citus is created. | ||
* NOTCREATED : Citus is not created. | ||
*/ | ||
typedef enum ExtensionCreatedState | ||
{ | ||
UNKNOWN = 0, | ||
CREATED = 1, | ||
NOTCREATED = 2, | ||
} ExtensionCreatedState; | ||
|
||
/* | ||
* State which should be cleared upon DROP EXTENSION. When the configuration | ||
* changes, e.g. because extension is dropped, these summarily get set to 0. | ||
*/ | ||
typedef struct MetadataCacheData | ||
{ | ||
bool extensionLoaded; | ||
ExtensionCreatedState extensionCreatedState; | ||
Oid distShardRelationId; | ||
Oid distPlacementRelationId; | ||
Oid distBackgroundJobRelationId; | ||
|
@@ -288,7 +301,6 @@ static void CreateDistTableCache(void); | |
static void CreateShardIdCache(void); | ||
static void CreateDistObjectCache(void); | ||
static void InvalidateForeignRelationGraphCacheCallback(Datum argument, Oid relationId); | ||
static void InvalidateDistRelationCacheCallback(Datum argument, Oid relationId); | ||
static void InvalidateNodeRelationCacheCallback(Datum argument, Oid relationId); | ||
static void InvalidateLocalGroupIdRelationCacheCallback(Datum argument, Oid relationId); | ||
static void InvalidateConnParamsCacheCallback(Datum argument, Oid relationId); | ||
|
@@ -2187,16 +2199,30 @@ HasOverlappingShardInterval(ShardInterval **shardIntervalArray, | |
bool | ||
CitusHasBeenLoaded(void) | ||
{ | ||
if (!MetadataCache.extensionLoaded || creating_extension) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should probably call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have reorganized this part of the code such that when This change is to make the following scneario work:
When This might be due to that we invalidate cache during preprocessing phase of the citus utility hook when
did not eliminate the issue. I opted for not changing the cache during This change should not have any impact on performance since
used to run previously every time even when the cache is valid. |
||
/* | ||
* We do not use Citus hooks during CREATE/ALTER EXTENSION citus | ||
* since the objects used by the C code might be not be there yet. | ||
*/ | ||
if (creating_extension) | ||
{ | ||
/* | ||
* Refresh if we have not determined whether the extension has been | ||
* loaded yet, or in case of ALTER EXTENSION since we want to treat | ||
* Citus as "not loaded" during ALTER EXTENSION citus. | ||
*/ | ||
bool extensionLoaded = CitusHasBeenLoadedInternal(); | ||
Oid citusExtensionOid = get_extension_oid("citus", true); | ||
|
||
if (CurrentExtensionObject == citusExtensionOid) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
if (extensionLoaded && !MetadataCache.extensionLoaded) | ||
/* | ||
* If extensionCreatedState is UNKNOWN, query pg_extension for Citus | ||
* and cache the result. Otherwise return the value extensionCreatedState | ||
* indicates. | ||
*/ | ||
if (MetadataCache.extensionCreatedState == UNKNOWN) | ||
{ | ||
bool extensionCreated = CitusHasBeenLoadedInternal(); | ||
|
||
if (extensionCreated) | ||
{ | ||
/* | ||
* Loaded Citus for the first time in this session, or first time after | ||
|
@@ -2208,31 +2234,22 @@ CitusHasBeenLoaded(void) | |
*/ | ||
StartupCitusBackend(); | ||
|
||
/* | ||
* InvalidateDistRelationCacheCallback resets state such as extensionLoaded | ||
* when it notices changes to pg_dist_partition (which usually indicate | ||
* `DROP EXTENSION citus;` has been run) | ||
* | ||
* Ensure InvalidateDistRelationCacheCallback will notice those changes | ||
* by caching pg_dist_partition's oid. | ||
* | ||
* We skip these checks during upgrade since pg_dist_partition is not | ||
* present during early stages of upgrade operation. | ||
*/ | ||
DistPartitionRelationId(); | ||
|
||
/* | ||
* This needs to be initialized so we can receive foreign relation graph | ||
* invalidation messages in InvalidateForeignRelationGraphCacheCallback(). | ||
* See the comments of InvalidateForeignKeyGraph for more context. | ||
*/ | ||
DistColocationRelationId(); | ||
} | ||
|
||
MetadataCache.extensionLoaded = extensionLoaded; | ||
MetadataCache.extensionCreatedState = CREATED; | ||
} | ||
else | ||
{ | ||
MetadataCache.extensionCreatedState = NOTCREATED; | ||
} | ||
} | ||
|
||
return MetadataCache.extensionLoaded; | ||
return (MetadataCache.extensionCreatedState == CREATED) ? true : false; | ||
} | ||
|
||
|
||
|
@@ -2257,15 +2274,6 @@ CitusHasBeenLoadedInternal(void) | |
return false; | ||
} | ||
|
||
if (creating_extension && CurrentExtensionObject == citusExtensionOid) | ||
{ | ||
/* | ||
* We do not use Citus hooks during CREATE/ALTER EXTENSION citus | ||
* since the objects used by the C code might be not be there yet. | ||
*/ | ||
return false; | ||
} | ||
|
||
/* citus extension exists and has been created */ | ||
return true; | ||
} | ||
|
@@ -4201,10 +4209,6 @@ InitializeDistCache(void) | |
CreateShardIdCache(); | ||
|
||
InitializeDistObjectCache(); | ||
|
||
/* Watch for invalidation events. */ | ||
CacheRegisterRelcacheCallback(InvalidateDistRelationCacheCallback, | ||
(Datum) 0); | ||
} | ||
|
||
|
||
|
@@ -4754,20 +4758,26 @@ InvalidateForeignKeyGraph(void) | |
* InvalidateDistRelationCacheCallback flushes cache entries when a relation | ||
* is updated (or flushes the entire cache). | ||
*/ | ||
static void | ||
void | ||
InvalidateDistRelationCacheCallback(Datum argument, Oid relationId) | ||
{ | ||
/* invalidate either entire cache or a specific entry */ | ||
if (relationId == InvalidOid) | ||
{ | ||
InvalidateDistTableCache(); | ||
InvalidateDistObjectCache(); | ||
InvalidateMetadataSystemCache(); | ||
} | ||
else | ||
{ | ||
void *hashKey = (void *) &relationId; | ||
bool foundInCache = false; | ||
|
||
if (DistTableCacheHash == NULL) | ||
{ | ||
return; | ||
} | ||
|
||
CitusTableCacheEntrySlot *cacheSlot = | ||
hash_search(DistTableCacheHash, hashKey, HASH_FIND, &foundInCache); | ||
if (foundInCache) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can now remove the InvalidateMetadataSystemCache call below here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing InvalidateMetadataSystemCache call here causes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It turns out that
command causes the relcache invalidation of pg_dist tables which are under pg_catalog. If we do not invalidate Metadatacache, it will end up with stale oids causing the subsequent commands to fail. For instance, the following command will run a SELECT command that invokes citus hooks and will cause failed cached lookups of pg_dist* tables. So we need to invalidate the cache when one of the pg_dist* tables got invalidated. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have restored the previous code not to regress check-vanilla. Note that, even if I restore the original code there are likely two issues with the original code
|
||
|
@@ -4776,21 +4786,19 @@ InvalidateDistRelationCacheCallback(Datum argument, Oid relationId) | |
} | ||
|
||
/* | ||
* If pg_dist_partition is being invalidated drop all state | ||
* This happens pretty rarely, but most importantly happens during | ||
* DROP EXTENSION citus; This isn't the only time when this happens | ||
* though, it can happen for multiple other reasons, such as an | ||
* autovacuum running ANALYZE on pg_dist_partition. Such an ANALYZE | ||
* wouldn't really need a full Metadata cache invalidation, but we | ||
* don't know how to differentiate between DROP EXTENSION and ANALYZE. | ||
* So for now we simply drop it in both cases and take the slight | ||
* temporary performance hit. | ||
* if pg_dist_partition relcache is invalidated for some reason, | ||
* invalidate the MetadataCache. It is likely an overkill to invalidate | ||
* the entire cache here. But until a better fix, we keep it this way | ||
* for postgres regression tests that includes | ||
* REINDEX SCHEMA CONCURRENTLY pg_catalog | ||
* command. | ||
*/ | ||
if (relationId == MetadataCache.distPartitionRelationId) | ||
{ | ||
InvalidateMetadataSystemCache(); | ||
} | ||
|
||
|
||
if (relationId == MetadataCache.distObjectRelationId) | ||
{ | ||
InvalidateDistObjectCache(); | ||
|
@@ -4830,6 +4838,11 @@ InvalidateDistTableCache(void) | |
CitusTableCacheEntrySlot *cacheSlot = NULL; | ||
HASH_SEQ_STATUS status; | ||
|
||
if (DistTableCacheHash == NULL) | ||
{ | ||
return; | ||
} | ||
|
||
hash_seq_init(&status, DistTableCacheHash); | ||
|
||
while ((cacheSlot = (CitusTableCacheEntrySlot *) hash_seq_search(&status)) != NULL) | ||
|
@@ -4848,6 +4861,11 @@ InvalidateDistObjectCache(void) | |
DistObjectCacheEntry *cacheEntry = NULL; | ||
HASH_SEQ_STATUS status; | ||
|
||
if (DistObjectCacheHash == NULL) | ||
{ | ||
return; | ||
} | ||
|
||
hash_seq_init(&status, DistObjectCacheHash); | ||
|
||
while ((cacheEntry = (DistObjectCacheEntry *) hash_seq_search(&status)) != NULL) | ||
|
@@ -4930,8 +4948,8 @@ CreateDistObjectCache(void) | |
|
||
|
||
/* | ||
* InvalidateMetadataSystemCache resets all the cached OIDs and the extensionLoaded flag, | ||
* and invalidates the worker node, ConnParams, and local group ID caches. | ||
* InvalidateMetadataSystemCache resets all the cached OIDs and the extensionCreatedState | ||
* flag and invalidates the worker node, ConnParams, and local group ID caches. | ||
*/ | ||
void | ||
InvalidateMetadataSystemCache(void) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,8 @@ | |
#include "tcop/tcopprot.h" | ||
#include "utils/guc.h" | ||
#include "utils/guc_tables.h" | ||
#include "utils/inval.h" | ||
#include "utils/lsyscache.h" | ||
#include "utils/syscache.h" | ||
#include "utils/varlena.h" | ||
|
||
|
@@ -554,6 +556,9 @@ _PG_init(void) | |
"ColumnarSupportsIndexAM", | ||
true, &handle); | ||
|
||
CacheRegisterRelcacheCallback(InvalidateDistRelationCacheCallback, | ||
(Datum) 0); | ||
|
||
Comment on lines
+559
to
+561
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why was this call moved here instead of staying in InitializeDistCache |
||
INIT_COLUMNAR_SYMBOL(CompressionTypeStr_type, CompressionTypeStr); | ||
INIT_COLUMNAR_SYMBOL(IsColumnarTableAmTable_type, IsColumnarTableAmTable); | ||
INIT_COLUMNAR_SYMBOL(ReadColumnarOptions_type, ReadColumnarOptions); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import psycopg | ||
import pytest | ||
|
||
|
||
def test_create_drop_citus(coord): | ||
with coord.cur() as cur1: | ||
with coord.cur() as cur2: | ||
# Conn1 drops the extension | ||
# and Conn2 cannot use it. | ||
cur1.execute("DROP EXTENSION citus") | ||
|
||
with pytest.raises(psycopg.errors.UndefinedFunction): | ||
# Conn1 dropped the extension. citus_version udf | ||
# cannot be found.sycopg.errors.UndefinedFunction | ||
# is expected here. | ||
cur2.execute("SELECT citus_version();") | ||
|
||
# Conn2 creates the extension, | ||
# Conn1 is able to use it immediadtely. | ||
cur2.execute("CREATE EXTENSION citus") | ||
cur1.execute("SELECT citus_version();") | ||
cur1.execute("DROP EXTENSION citus;") | ||
|
||
with coord.cur() as cur1: | ||
with coord.cur() as cur2: | ||
# A connection is able to create and use the extension | ||
# within a transaction block. | ||
cur1.execute("BEGIN;") | ||
cur1.execute("CREATE TABLE t1(id int);") | ||
cur1.execute("CREATE EXTENSION citus;") | ||
cur1.execute("SELECT create_reference_table('t1')") | ||
cur1.execute("ABORT;") | ||
|
||
# Conn1 aborted so Conn2 is be able to create and | ||
# use the extension within a transaction block. | ||
cur2.execute("BEGIN;") | ||
cur2.execute("CREATE TABLE t1(id int);") | ||
cur2.execute("CREATE EXTENSION citus;") | ||
cur2.execute("SELECT create_reference_table('t1')") | ||
cur2.execute("COMMIT;") | ||
|
||
# Conn2 commited so Conn1 is be able to use the | ||
# extension immediately. | ||
cur1.execute("SELECT citus_version();") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why remove this? The original comment makes it sounds like it is important.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, hmm I guess this is related to moving InitializeDistCache to _PG_init. Because we register the callback there, we don't do that here anymore.