diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d6a2eb79..2cf5ea0dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add WithFieldName implementation to KNNQueryBuilder (#2398)[https://github.com/opensearch-project/k-NN/pull/2398] - Make the build work for M series MacOS without manual code changes and local JAVA_HOME config (#2397)[https://github.com/opensearch-project/k-NN/pull/2397] - Remove DocsWithFieldSet reference from NativeEngineFieldVectorsWriter (#2408)[https://github.com/opensearch-project/k-NN/pull/2408] +- Remove skip building graph check for quantization use case (#2430)[https://github.com/opensearch-project/k-NN/2430] ### Bug Fixes * Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282] * Fixing the bug where search fails with "fields" parameter for an index with a knn_vector field (#2314)[https://github.com/opensearch-project/k-NN/pull/2314] diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java index 3966a2c95..fb93bfc07 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java @@ -104,9 +104,8 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException { field.getVectors() ); final QuantizationState quantizationState = train(field.getFieldInfo(), knnVectorValuesSupplier, totalLiveDocs); - // Check only after quantization state writer finish writing its state, since it is required - // even if there are no graph files in segment, which will be later used by exact search - if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { + // should skip graph building only for non quantization use case and if threshold is met + if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { log.info( "Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during flush", fieldInfo.name, @@ -144,9 +143,8 @@ public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState } final QuantizationState quantizationState = train(fieldInfo, knnVectorValuesSupplier, totalLiveDocs); - // Check only after quantization state writer finish writing its state, since it is required - // even if there are no graph files in segment, which will be later used by exact search - if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { + // should skip graph building only for non quantization use case and if threshold is met + if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { log.info( "Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during merge", fieldInfo.name, diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java index 6685e2b22..f87ed6bcf 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java @@ -630,8 +630,7 @@ public void testFlush_whenThresholdIsEqualToFixedValue_thenRelevantNativeIndexWr } } - public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenSkipBuildingGraph() - throws IOException { + public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenStillBuildGraph() throws IOException { // Given List> expectedVectorValues = new ArrayList<>(); final Map sizeMap = new HashMap<>(); @@ -714,7 +713,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres } else { assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size()); } - verifyNoInteractions(nativeIndexWriter); IntStream.range(0, vectorsPerField.size()).forEach(i -> { try { if (vectorsPerField.get(i).isEmpty()) { @@ -729,12 +727,12 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count(); knnVectorValuesFactoryMockedStatic.verify( () -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()), - times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled)) + times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2) ); } } - public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenSkipBuildingGraph() + public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenStillBuildGraph() throws IOException { // Given List> expectedVectorValues = new ArrayList<>(); @@ -817,7 +815,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres } else { assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size()); } - verifyNoInteractions(nativeIndexWriter); IntStream.range(0, vectorsPerField.size()).forEach(i -> { try { if (vectorsPerField.get(i).isEmpty()) { @@ -832,7 +829,7 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count(); knnVectorValuesFactoryMockedStatic.verify( () -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()), - times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled)) + times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2) ); } }