-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: temporary support for sparse aggregate. (#489)
* Implement aggregate functions for sparse vector. Signed-off-by: my-vegetable-has-exploded <[email protected]> * fix null. Signed-off-by: my-vegetable-has-exploded <[email protected]> * tests: add more e2e test. Signed-off-by: my-vegetable-has-exploded <[email protected]> --------- Signed-off-by: my-vegetable-has-exploded <[email protected]>
- Loading branch information
1 parent
8f99933
commit 13e4245
Showing
4 changed files
with
256 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
statement ok | ||
SET search_path TO pg_temp, vectors; | ||
|
||
statement ok | ||
CREATE TABLE t (id bigserial, val svector); | ||
|
||
statement ok | ||
INSERT INTO t (val) | ||
VALUES ('[1,2,3]'), ('[4,5,6]'); | ||
|
||
query I | ||
SELECT vector_dims(val) FROM t; | ||
---- | ||
3 | ||
3 | ||
|
||
query R | ||
SELECT round(vector_norm(val)::numeric, 5) FROM t; | ||
---- | ||
3.74166 | ||
8.77496 | ||
|
||
query ? | ||
SELECT avg(val) FROM t; | ||
---- | ||
[2.5, 3.5, 4.5] | ||
|
||
query ? | ||
SELECT sum(val) FROM t; | ||
---- | ||
[5, 7, 9] | ||
|
||
statement ok | ||
CREATE TABLE test_vectors (id serial, data vector(1000)); | ||
|
||
statement ok | ||
INSERT INTO test_vectors (data) | ||
SELECT | ||
ARRAY_AGG(CASE WHEN random() < 0.95 THEN 0 ELSE (random() * 99 + 1)::real END)::real[]::vector AS v | ||
FROM generate_series(1, 1000 * 5000) i | ||
GROUP BY i % 5000; | ||
|
||
query ? | ||
SELECT count(*) FROM test_vectors; | ||
---- | ||
5000 | ||
|
||
query R | ||
SELECT vector_norm('[3,4]'::svector); | ||
---- | ||
5 | ||
|
||
query I | ||
SELECT vector_dims(v) FROM unnest(ARRAY['[1,2]'::svector, '[3]']) v; | ||
---- | ||
2 | ||
1 | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[3,5,7]']) v; | ||
---- | ||
[2, 3.5, 5] | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[-1,2,-3]']) v; | ||
---- | ||
[0, 2, 0] | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[3,5,7]', NULL]) v; | ||
---- | ||
[2, 3.5, 5] | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::svector,NULL]) v; | ||
---- | ||
[1, 2, 3] | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY[]::svector[]) v; | ||
---- | ||
NULL | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY[NULL]::svector[]) v; | ||
---- | ||
NULL | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY['[3e38]'::svector, '[3e38]']) v; | ||
---- | ||
[inf] | ||
|
||
statement error differs in dimensions | ||
SELECT avg(v) FROM unnest(ARRAY['[1,2]'::svector, '[3]']) v; | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY[to_svector(5, '{0,1}', '{2,3}'), to_svector(5, '{0,2}', '{1,3}'), to_svector(5, '{3,4}', '{3,3}')]) v; | ||
---- | ||
[1, 1, 1, 1, 1] | ||
|
||
query ? | ||
SELECT avg(v) FROM unnest(ARRAY[to_svector(32, '{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}'), to_svector(32, '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}'), to_svector(32, '{2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}') ]) v; | ||
---- | ||
[0.33333334, 0.6666667, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.6666667, 0.33333334, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ||
|
||
# test avg(svector) get the same result as avg(vector) | ||
query ? | ||
SELECT avg(data) = avg(data::svector)::vector FROM test_vectors; | ||
---- | ||
t | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[3,5,7]']) v; | ||
---- | ||
[4, 7, 10] | ||
|
||
# test zero element | ||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[-1,2,-3]']) v; | ||
---- | ||
[0, 4, 0] | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY['[1,2,3]'::svector, '[3,5,7]', NULL]) v; | ||
---- | ||
[4, 7, 10] | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY[]::svector[]) v; | ||
---- | ||
NULL | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY[NULL]::svector[]) v; | ||
---- | ||
NULL | ||
|
||
statement error differs in dimensions | ||
SELECT sum(v) FROM unnest(ARRAY['[1,2]'::svector, '[3]']) v; | ||
|
||
# should this return an error ? | ||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY['[3e38]'::svector, '[3e38]']) v; | ||
---- | ||
[inf] | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY[to_svector(5, '{0,1}', '{1,2}'), to_svector(5, '{0,2}', '{1,2}'), to_svector(5, '{3,4}', '{3,3}')]) v; | ||
---- | ||
[2, 2, 2, 3, 3] | ||
|
||
query ? | ||
SELECT sum(v) FROM unnest(ARRAY[to_svector(32, '{0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}'), to_svector(32, '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}'), to_svector(32, '{2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17}', '{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}') ]) v; | ||
---- | ||
[1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ||
|
||
# test sum(svector) get the same result as sum(vector) | ||
query ? | ||
SELECT sum(data) = sum(data::svector)::vector FROM test_vectors; | ||
---- | ||
t | ||
|
||
statement ok | ||
DROP TABLE t, test_vectors; |