Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize recompression for non-segmentby chunks #7632

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions tsl/src/compression/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,22 +1066,9 @@ get_compressed_chunk_index_for_recompression(Chunk *uncompressed_chunk)

CompressionSettings *settings = ts_compression_settings_get(compressed_chunk->table_id);

// For chunks with no segmentby, we don't want to do segmentwise recompression as it is less
// performant than a full recompression. This is temporary; once we optimize recompression
// code for chunks with no segments we should remove this check.
int num_segmentby = ts_array_length(settings->fd.segmentby);

if (num_segmentby == 0)
{
table_close(compressed_chunk_rel, NoLock);
table_close(uncompressed_chunk_rel, NoLock);
return InvalidOid;
}

CatalogIndexState indstate = CatalogOpenIndexes(compressed_chunk_rel);
Oid index_oid = get_compressed_chunk_index(indstate, settings);
CatalogCloseIndexes(indstate);

table_close(compressed_chunk_rel, NoLock);
table_close(uncompressed_chunk_rel, NoLock);

Expand Down
4 changes: 4 additions & 0 deletions tsl/src/compression/recompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)

CompressedSegmentInfo *current_segment = palloc0(sizeof(CompressedSegmentInfo) * n_keys);

// For chunks with no segmentby settings, we can still do segmentwise recompression
// The entire chunk is treated as a single segment
elog(ts_guc_debug_compression_path_info ? INFO : DEBUG1, "using non-segmentby index for recompression") ;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will log every time that you are using non-segmentby index but thats not true. You should log the index name you are using instead (its easy to check what index is being used that way).


for (int n = 0; n < n_keys; n++)
{
const char *attname;
Expand Down
10 changes: 5 additions & 5 deletions tsl/test/expected/compression.out
Original file line number Diff line number Diff line change
Expand Up @@ -2794,12 +2794,12 @@ COPY compressed_table (time,a,b,c) FROM stdin;
SELECT compress_chunk(i, if_not_compressed => true) FROM show_chunks('compressed_table') i;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_49_108_chunk
_timescaledb_internal._hyper_49_107_chunk
(1 row)

\set ON_ERROR_STOP 0
COPY compressed_table (time,a,b,c) FROM stdin;
ERROR: duplicate key value violates unique constraint "_hyper_49_108_chunk_compressed_table_index"
ERROR: duplicate key value violates unique constraint "_hyper_49_107_chunk_compressed_table_index"
\set ON_ERROR_STOP 1
COPY compressed_table (time,a,b,c) FROM stdin;
SELECT * FROM compressed_table;
Expand All @@ -2813,7 +2813,7 @@ SELECT * FROM compressed_table;
SELECT compress_chunk(i, if_not_compressed => true) FROM show_chunks('compressed_table') i;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_49_108_chunk
_timescaledb_internal._hyper_49_107_chunk
(1 row)

-- Check DML decompression limit
Expand All @@ -2837,15 +2837,15 @@ NOTICE: default order by for hypertable "hyper_84" is set to ""time" DESC"
SELECT compress_chunk(ch) FROM show_chunks('hyper_84') ch;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_51_110_chunk
_timescaledb_internal._hyper_51_109_chunk
(1 row)

-- indexscan for decompression: UPDATE
UPDATE hyper_84 SET temp = 100 where device = 1;
SELECT compress_chunk(ch) FROM show_chunks('hyper_84') ch;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_51_110_chunk
_timescaledb_internal._hyper_51_109_chunk
(1 row)

-- indexscan for decompression: DELETE
Expand Down
44 changes: 22 additions & 22 deletions tsl/test/expected/compression_ddl.out
Original file line number Diff line number Diff line change
Expand Up @@ -2212,15 +2212,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
(12 rows)

-- make second one of them partial
Expand All @@ -2233,15 +2233,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Sort
Sort Key: _hyper_35_138_chunk."time"
-> Seq Scan on _hyper_35_138_chunk
Expand All @@ -2256,18 +2256,18 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Sort
Sort Key: _hyper_35_137_chunk."time"
-> Seq Scan on _hyper_35_137_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Sort
Sort Key: _hyper_35_138_chunk."time"
-> Seq Scan on _hyper_35_138_chunk
Expand Down Expand Up @@ -2300,14 +2300,14 @@ values ('meter1', 1, 2.3, '2022-01-01'::timestamptz, '2022-01-01'::timestamptz),
select compress_chunk(show_chunks('mytab'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

REINDEX TABLE mytab; -- should update index
select decompress_chunk(show_chunks('mytab'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

\set EXPLAIN 'EXPLAIN (costs off,timing off,summary off)'
Expand All @@ -2318,7 +2318,7 @@ set enable_indexscan = on;
:EXPLAIN_ANALYZE select * from mytab where lower(col1::text) = 'meter1';
QUERY PLAN
--------------------------------------------------------------------------------------------------
Index Scan using _hyper_37_142_chunk_myidx_unique on _hyper_37_142_chunk (actual rows=3 loops=1)
Index Scan using _hyper_37_141_chunk_myidx_unique on _hyper_37_141_chunk (actual rows=3 loops=1)
Index Cond: (lower((col1)::text) = 'meter1'::text)
(2 rows)

Expand All @@ -2336,19 +2336,19 @@ WHERE (value > 2.4 AND value < 3);
select compress_chunk(show_chunks('mytab'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

select decompress_chunk(show_chunks('mytab'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

:EXPLAIN_ANALYZE SELECT * FROM mytab WHERE value BETWEEN 2.4 AND 2.8;
QUERY PLAN
---------------------------------------------------------------------------------------
Seq Scan on _hyper_37_142_chunk (actual rows=1 loops=1)
Seq Scan on _hyper_37_141_chunk (actual rows=1 loops=1)
Filter: ((value >= '2.4'::double precision) AND (value <= '2.8'::double precision))
Rows Removed by Filter: 2
(3 rows)
Expand Down Expand Up @@ -2399,28 +2399,28 @@ NOTICE: default order by for hypertable "hyper_unique_deferred" is set to ""tim
select compress_chunk(show_chunks('hyper_unique_deferred')); -- also worked fine before 2.11.0
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

select decompress_chunk(show_chunks('hyper_unique_deferred'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

\set ON_ERROR_STOP 0
begin; insert INTO hyper_unique_deferred values (1257987700000000000, 'dev1', 1); abort;
ERROR: new row for relation "_hyper_40_146_chunk" violates check constraint "hyper_unique_deferred_sensor_1_check"
ERROR: new row for relation "_hyper_40_145_chunk" violates check constraint "hyper_unique_deferred_sensor_1_check"
\set ON_ERROR_STOP 1
select compress_chunk(show_chunks('hyper_unique_deferred'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

\set ON_ERROR_STOP 0
begin; insert INTO hyper_unique_deferred values (1257987700000000000, 'dev1', 1); abort;
ERROR: duplicate key value violates unique constraint "146_2_hyper_unique_deferred_time_key"
ERROR: duplicate key value violates unique constraint "145_2_hyper_unique_deferred_time_key"
\set ON_ERROR_STOP 1
-- tests chunks being compressed using different segmentby settings
-- github issue #7102
Expand Down Expand Up @@ -2450,7 +2450,7 @@ FROM timescaledb_information.chunks
WHERE hypertable_name = 'compression_drop' AND NOT is_compressed;
CHUNK_NAME
-------------------------------------------
_timescaledb_internal._hyper_42_151_chunk
_timescaledb_internal._hyper_42_150_chunk
(1 row)

-- try dropping column v0, should fail
Expand Down Expand Up @@ -2480,7 +2480,7 @@ ALTER TABLE test2 SET (
);
\set ON_ERROR_STOP 0
INSERT INTO test2(ts,b,t) VALUES ('2024-11-18 18:04:51',99,'magic');
ERROR: null value in column "i" of relation "_hyper_44_180_chunk" violates not-null constraint
ERROR: null value in column "i" of relation "_hyper_44_179_chunk" violates not-null constraint
\set ON_ERROR_STOP 1
ALTER TABLE test2 ALTER COLUMN i DROP NOT NULL;
INSERT INTO test2(ts,b,t) VALUES ('2024-11-18 18:04:51',99,'magic');
Expand Down
20 changes: 10 additions & 10 deletions tsl/test/expected/compression_insert.out
Original file line number Diff line number Diff line change
Expand Up @@ -880,9 +880,9 @@ SELECT compress_chunk(format('%I.%I',chunk_schema,chunk_name), true) FROM timesc
Custom Scan (ChunkAppend) on test_ordering
Order: test_ordering."time"
-> Custom Scan (DecompressChunk) on _hyper_13_20_chunk
-> Index Scan Backward using compress_hyper_14_23_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_23_chunk
-> Index Scan Backward using compress_hyper_14_21_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_21_chunk
-> Custom Scan (DecompressChunk) on _hyper_13_22_chunk
-> Index Scan Backward using compress_hyper_14_24_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_24_chunk
-> Index Scan Backward using compress_hyper_14_23_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_14_23_chunk
(6 rows)

SET timescaledb.enable_decompression_sorted_merge = 1;
Expand Down Expand Up @@ -920,15 +920,15 @@ NOTICE: default order by for hypertable "conditions" is set to "timec DESC"
SELECT compress_chunk(ch) FROM show_chunks('conditions') ch;
compress_chunk
------------------------------------------
_timescaledb_internal._hyper_15_25_chunk
_timescaledb_internal._hyper_15_24_chunk
(1 row)

SELECT chunk_name, range_start, range_end, is_compressed
FROM timescaledb_information.chunks
WHERE hypertable_name = 'conditions';
chunk_name | range_start | range_end | is_compressed
--------------------+------------------------------+------------------------------+---------------
_hyper_15_25_chunk | Wed Dec 30 16:00:00 2009 PST | Wed Jan 06 16:00:00 2010 PST | t
_hyper_15_24_chunk | Wed Dec 30 16:00:00 2009 PST | Wed Jan 06 16:00:00 2010 PST | t
(1 row)

--now insert into compressed chunk
Expand Down Expand Up @@ -1081,11 +1081,11 @@ SET timescaledb.max_tuples_decompressed_per_dml_transaction = 1;
\set ON_ERROR_STOP 0
-- Inserting in the same period should decompress tuples
INSERT INTO test_limit SELECT t, 2 FROM generate_series(1,6000,1000) t;
ERROR: duplicate key value violates unique constraint "_hyper_24_54_chunk_timestamp_id_idx"
ERROR: duplicate key value violates unique constraint "_hyper_24_53_chunk_timestamp_id_idx"
-- Setting to 0 should remove the limit.
SET timescaledb.max_tuples_decompressed_per_dml_transaction = 0;
INSERT INTO test_limit SELECT t, 2 FROM generate_series(1,6000,1000) t;
ERROR: duplicate key value violates unique constraint "_hyper_24_54_chunk_timestamp_id_idx"
ERROR: duplicate key value violates unique constraint "_hyper_24_53_chunk_timestamp_id_idx"
\set ON_ERROR_STOP 1
DROP TABLE test_limit;
RESET timescaledb.max_tuples_decompressed_per_dml_transaction;
Expand All @@ -1109,13 +1109,13 @@ SELECT count(compress_chunk(c)) FROM show_chunks('multi_unique') c;
\set ON_ERROR_STOP 0
-- all INSERTS should fail with constraint violation
BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 0, 1.0); ROLLBACK;
ERROR: duplicate key value violates unique constraint "76_1_multi_unique_time_u1_key"
ERROR: duplicate key value violates unique constraint "75_1_multi_unique_time_u1_key"
DETAIL: Key ("time", u1)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 1, 1.0); ROLLBACK;
ERROR: duplicate key value violates unique constraint "76_1_multi_unique_time_u1_key"
ERROR: duplicate key value violates unique constraint "75_1_multi_unique_time_u1_key"
DETAIL: Key ("time", u1)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 1, 0, 1.0); ROLLBACK;
ERROR: duplicate key value violates unique constraint "76_2_multi_unique_time_u2_key"
ERROR: duplicate key value violates unique constraint "75_2_multi_unique_time_u2_key"
DETAIL: Key ("time", u2)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
\set ON_ERROR_STOP 1
DROP TABLE multi_unique;
Expand All @@ -1139,7 +1139,7 @@ SELECT count(compress_chunk(c)) FROM show_chunks('unique_null') c;
\set ON_ERROR_STOP 0
-- all INSERTS should fail with constraint violation
BEGIN; INSERT INTO unique_null VALUES('2024-01-01', 0, 0, 1.0); ROLLBACK;
ERROR: duplicate key value violates unique constraint "78_3_unique_null_time_u1_u2_key"
ERROR: duplicate key value violates unique constraint "77_3_unique_null_time_u1_u2_key"
\set ON_ERROR_STOP 1
-- neither of these should need to decompress
:ANALYZE INSERT INTO unique_null VALUES('2024-01-01', NULL, 1, 1.0);
Expand Down
Loading
Loading