From bb677d626c19117f63e92e5cc4a56ddc366bc4b4 Mon Sep 17 00:00:00 2001 From: Dooyong Kim Date: Mon, 21 Oct 2024 10:56:18 -0700 Subject: [PATCH] Reflecting feedbacks (moving logics + new branding) during code review. Signed-off-by: Dooyong Kim --- CHANGELOG.md | 2 +- .../KNN80Codec/KNN80DocValuesProducer.java | 58 ++++++++++--------- .../NativeEngines990KnnVectorsReader.java | 46 +++++++-------- .../knn/index/codec/util/KNNCodecUtil.java | 2 +- .../KNN80DocValuesProducerTests.java | 10 ++-- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b00812eb2..b52dae858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 3.0](https://github.com/opensearch-project/k-NN/compare/2.x...HEAD) ### Features ### Enhancements -* Remove FSDirectory dependency and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182) ### Bug Fixes ### Infrastructure * Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921) @@ -26,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * KNNIterators should support with and without filters [#2155](https://github.com/opensearch-project/k-NN/pull/2155) * Adding Support to Enable/Disble Share level Rescoring and Update Oversampling Factor[#2172](https://github.com/opensearch-project/k-NN/pull/2172) * Add support to build vector data structures greedily and perform exact search when there are no engine files [#1942](https://github.com/opensearch-project/k-NN/issues/1942) +* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182) ### Bug Fixes * Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946) * KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147) diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java index 83f22959a..23c9f3105 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java @@ -16,7 +16,9 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; + import java.io.IOException; + import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; @@ -27,39 +29,16 @@ import org.opensearch.knn.index.memory.NativeMemoryCacheManager; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; - -import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD; @Log4j2 public class KNN80DocValuesProducer extends DocValuesProducer { private final DocValuesProducer delegate; - private final Map fieldNameToVectorFileName = new HashMap<>(); + private List cacheKeys; public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state) { this.delegate = delegate; - - for (FieldInfo field : state.fieldInfos) { - if (!field.attributes().containsKey(KNN_FIELD)) { - continue; - } - // Only segments that contains BinaryDocValues and doesn't have vector values should be considered. - // By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true - // to create binary doc values explicitly like any other field. Hence, we only want to include fields - // where approximate search is possible only by BinaryDocValues. - if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) { - continue; - } - - final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, state.segmentInfo); - if (vectorIndexFileName == null) { - continue; - } - final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, state.segmentInfo); - fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey); - } + this.cacheKeys = getVectorCacheKeysFromSegmentReaderState(state); } @Override @@ -95,11 +74,34 @@ public void checkIntegrity() throws IOException { @Override public void close() throws IOException { final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance(); - fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate); + cacheKeys.forEach(nativeMemoryCacheManager::invalidate); delegate.close(); } - public final List getOpenedIndexPath() { - return new ArrayList<>(fieldNameToVectorFileName.values()); + public final List getCacheKeys() { + return new ArrayList<>(cacheKeys); + } + + private static List getVectorCacheKeysFromSegmentReaderState(SegmentReadState segmentReadState) { + final List cacheKeys = new ArrayList<>(); + + for (FieldInfo field : segmentReadState.fieldInfos) { + // Only segments that contains BinaryDocValues and doesn't have vector values should be considered. + // By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true + // to create binary doc values explicitly like any other field. Hence, we only want to include fields + // where approximate search is possible only by BinaryDocValues. + if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) { + continue; + } + + final String vectorIndexFileName = KNNCodecUtil.getNativeEngineFileFromFieldInfo(field, segmentReadState.segmentInfo); + if (vectorIndexFileName == null) { + continue; + } + final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo); + cacheKeys.add(cacheKey); + } + + return cacheKeys; } } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java index 30b5809d1..efabc3a70 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java @@ -33,11 +33,11 @@ import org.opensearch.knn.quantization.models.quantizationState.QuantizationStateReadConfig; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; -import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD; - /** * Vectors reader class for reading the flat vectors for native engines. The class provides methods for iterating * over the vectors and retrieving their values. @@ -45,32 +45,15 @@ public class NativeEngines990KnnVectorsReader extends KnnVectorsReader { private final FlatVectorsReader flatVectorsReader; - private final SegmentReadState segmentReadState; private Map quantizationStateCacheKeyPerField; - private final Map fieldNameToVectorFileName = new HashMap<>(); + private SegmentReadState segmentReadState; + private final List cacheKeys; public NativeEngines990KnnVectorsReader(final SegmentReadState state, final FlatVectorsReader flatVectorsReader) { - this.segmentReadState = state; this.flatVectorsReader = flatVectorsReader; + this.segmentReadState = state; + this.cacheKeys = getVectorCacheKeysFromSegmentReaderState(state); loadCacheKeyMap(); - fillIndexToVectorFileName(); - } - - private void fillIndexToVectorFileName() { - for (FieldInfo field : segmentReadState.fieldInfos) { - if (!field.attributes().containsKey(KNN_FIELD)) { - continue; - } - if (!field.hasVectorValues()) { - continue; - } - final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, segmentReadState.segmentInfo); - if (vectorIndexFileName == null) { - continue; - } - final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo); - fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey); - } } /** @@ -202,7 +185,7 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits public void close() throws IOException { // Clean up allocated vector indices resources from cache. final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance(); - fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate); + cacheKeys.forEach(nativeMemoryCacheManager::invalidate); // Close a reader. IOUtils.close(flatVectorsReader); @@ -231,4 +214,19 @@ private void loadCacheKeyMap() { quantizationStateCacheKeyPerField.put(fieldInfo.getName(), cacheKey); } } + + private static List getVectorCacheKeysFromSegmentReaderState(SegmentReadState segmentReadState) { + final List cacheKeys = new ArrayList<>(); + + for (FieldInfo field : segmentReadState.fieldInfos) { + final String vectorIndexFileName = KNNCodecUtil.getNativeEngineFileFromFieldInfo(field, segmentReadState.segmentInfo); + if (vectorIndexFileName == null) { + continue; + } + final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo); + cacheKeys.add(cacheKey); + } + + return cacheKeys; + } } diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index 33452b276..3ccfc3c2b 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -98,7 +98,7 @@ public static List getEngineFiles(String extension, String fieldName, Se * @param segmentInfo : Segment where we are collecting an engine file list. * @return : Found vector engine names, if not found, returns null. */ - public static String getEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) { + public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) { if (!field.attributes().containsKey(KNN_FIELD)) { return null; } diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java index ccceee62b..6b18c51c3 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java @@ -121,11 +121,11 @@ public void testProduceKNNBinaryField_fromCodec_nmslibCurrent() throws IOExcepti assertTrue(docValuesFormat instanceof KNN80DocValuesFormat); DocValuesProducer producer = docValuesFormat.fieldsProducer(state); assertTrue(producer instanceof KNN80DocValuesProducer); - int pathSize = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size(); - assertEquals(pathSize, 1); + int cacheKeySize = ((KNN80DocValuesProducer) producer).getCacheKeys().size(); + assertEquals(cacheKeySize, 1); - String path = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().get(0); - assertTrue(path.contains(segmentFiles.get(0))); + String cacheKey = ((KNN80DocValuesProducer) producer).getCacheKeys().get(0); + assertTrue(cacheKey.contains(segmentFiles.get(0))); } public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipThoseField() throws IOException { @@ -178,7 +178,7 @@ public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipTho assertTrue(docValuesFormat instanceof KNN80DocValuesFormat); DocValuesProducer producer = docValuesFormat.fieldsProducer(state); assertTrue(producer instanceof KNN80DocValuesProducer); - assertEquals(0, ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size()); + assertEquals(0, ((KNN80DocValuesProducer) producer).getCacheKeys().size()); } }