From 6c4fe69d160ff858d6b8bf6f4aba0696f4cad5ec Mon Sep 17 00:00:00 2001 From: Dooyong Kim Date: Mon, 21 Oct 2024 10:56:18 -0700 Subject: [PATCH] Reflecting feedbacks (moving logics + new branding) during code review. Signed-off-by: Dooyong Kim --- CHANGELOG.md | 2 +- .../KNN80Codec/KNN80DocValuesProducer.java | 38 ++++--------------- .../NativeEngines990KnnVectorsReader.java | 29 +++----------- .../knn/index/codec/util/KNNCodecUtil.java | 28 +++++++++++++- .../KNN80DocValuesProducerTests.java | 10 ++--- 5 files changed, 46 insertions(+), 61 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b00812eb24..b52dae8582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 3.0](https://github.com/opensearch-project/k-NN/compare/2.x...HEAD) ### Features ### Enhancements -* Remove FSDirectory dependency and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182) ### Bug Fixes ### Infrastructure * Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921) @@ -26,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * KNNIterators should support with and without filters [#2155](https://github.com/opensearch-project/k-NN/pull/2155) * Adding Support to Enable/Disble Share level Rescoring and Update Oversampling Factor[#2172](https://github.com/opensearch-project/k-NN/pull/2172) * Add support to build vector data structures greedily and perform exact search when there are no engine files [#1942](https://github.com/opensearch-project/k-NN/issues/1942) +* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182) ### Bug Fixes * Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946) * KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147) diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java index 83f22959ad..1989639e4d 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducer.java @@ -14,52 +14,28 @@ import lombok.extern.log4j.Log4j2; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; + import java.io.IOException; + import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.opensearch.knn.index.codec.util.KNNCodecUtil; -import org.opensearch.knn.index.codec.util.NativeMemoryCacheKeyHelper; import org.opensearch.knn.index.memory.NativeMemoryCacheManager; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; - -import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD; @Log4j2 public class KNN80DocValuesProducer extends DocValuesProducer { private final DocValuesProducer delegate; - private final Map fieldNameToVectorFileName = new HashMap<>(); + private SegmentReadState segmentReadState; public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state) { this.delegate = delegate; - - for (FieldInfo field : state.fieldInfos) { - if (!field.attributes().containsKey(KNN_FIELD)) { - continue; - } - // Only segments that contains BinaryDocValues and doesn't have vector values should be considered. - // By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true - // to create binary doc values explicitly like any other field. Hence, we only want to include fields - // where approximate search is possible only by BinaryDocValues. - if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) { - continue; - } - - final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, state.segmentInfo); - if (vectorIndexFileName == null) { - continue; - } - final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, state.segmentInfo); - fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey); - } + this.segmentReadState = state; } @Override @@ -95,11 +71,11 @@ public void checkIntegrity() throws IOException { @Override public void close() throws IOException { final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance(); - fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate); + getCacheKeys().forEach(nativeMemoryCacheManager::invalidate); delegate.close(); } - public final List getOpenedIndexPath() { - return new ArrayList<>(fieldNameToVectorFileName.values()); + public final List getCacheKeys() { + return KNNCodecUtil.getVectorCacheKeysFromSegmentReaderState(segmentReadState); } } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java index 30b5809d12..c328addb24 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsReader.java @@ -25,7 +25,6 @@ import org.apache.lucene.util.IOUtils; import org.opensearch.common.UUIDs; import org.opensearch.knn.index.codec.util.KNNCodecUtil; -import org.opensearch.knn.index.codec.util.NativeMemoryCacheKeyHelper; import org.opensearch.knn.index.memory.NativeMemoryCacheManager; import org.opensearch.knn.index.quantizationservice.QuantizationService; import org.opensearch.knn.quantization.models.quantizationState.QuantizationState; @@ -34,10 +33,9 @@ import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; -import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD; - /** * Vectors reader class for reading the flat vectors for native engines. The class provides methods for iterating * over the vectors and retrieving their values. @@ -47,30 +45,11 @@ public class NativeEngines990KnnVectorsReader extends KnnVectorsReader { private final FlatVectorsReader flatVectorsReader; private final SegmentReadState segmentReadState; private Map quantizationStateCacheKeyPerField; - private final Map fieldNameToVectorFileName = new HashMap<>(); public NativeEngines990KnnVectorsReader(final SegmentReadState state, final FlatVectorsReader flatVectorsReader) { this.segmentReadState = state; this.flatVectorsReader = flatVectorsReader; loadCacheKeyMap(); - fillIndexToVectorFileName(); - } - - private void fillIndexToVectorFileName() { - for (FieldInfo field : segmentReadState.fieldInfos) { - if (!field.attributes().containsKey(KNN_FIELD)) { - continue; - } - if (!field.hasVectorValues()) { - continue; - } - final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, segmentReadState.segmentInfo); - if (vectorIndexFileName == null) { - continue; - } - final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo); - fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey); - } } /** @@ -202,7 +181,7 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits public void close() throws IOException { // Clean up allocated vector indices resources from cache. final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance(); - fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate); + getCacheKeys().forEach(nativeMemoryCacheManager::invalidate); // Close a reader. IOUtils.close(flatVectorsReader); @@ -231,4 +210,8 @@ private void loadCacheKeyMap() { quantizationStateCacheKeyPerField.put(fieldInfo.getName(), cacheKey); } } + + public final List getCacheKeys() { + return KNNCodecUtil.getVectorCacheKeysFromSegmentReaderState(segmentReadState); + } } diff --git a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java index 33452b2766..946853789b 100644 --- a/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java +++ b/src/main/java/org/opensearch/knn/index/codec/util/KNNCodecUtil.java @@ -7,14 +7,17 @@ import lombok.NonNull; import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.SegmentInfo; +import org.apache.lucene.index.SegmentReadState; import org.opensearch.knn.common.FieldInfoExtractor; import org.opensearch.knn.common.KNNConstants; import org.opensearch.knn.index.VectorDataType; import org.opensearch.knn.index.codec.KNN80Codec.KNN80BinaryDocValues; import org.opensearch.knn.index.engine.KNNEngine; +import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.stream.Collectors; @@ -98,7 +101,7 @@ public static List getEngineFiles(String extension, String fieldName, Se * @param segmentInfo : Segment where we are collecting an engine file list. * @return : Found vector engine names, if not found, returns null. */ - public static String getEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) { + public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) { if (!field.attributes().containsKey(KNN_FIELD)) { return null; } @@ -116,6 +119,29 @@ public static String getEngineFileFromFieldInfo(FieldInfo field, SegmentInfo seg } } + public static List getVectorCacheKeysFromSegmentReaderState(SegmentReadState segmentReadState) { + final List cacheKeys = new ArrayList<>(); + + for (FieldInfo field : segmentReadState.fieldInfos) { + // Only segments that contains BinaryDocValues and doesn't have vector values should be considered. + // By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true + // to create binary doc values explicitly like any other field. Hence, we only want to include fields + // where approximate search is possible only by BinaryDocValues. + if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) { + continue; + } + + final String vectorIndexFileName = KNNCodecUtil.getNativeEngineFileFromFieldInfo(field, segmentReadState.segmentInfo); + if (vectorIndexFileName == null) { + continue; + } + final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo); + cacheKeys.add(cacheKey); + } + + return cacheKeys; + } + /** * Get KNNEngine From FieldInfo * diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java index ccceee62b2..6b18c51c30 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesProducerTests.java @@ -121,11 +121,11 @@ public void testProduceKNNBinaryField_fromCodec_nmslibCurrent() throws IOExcepti assertTrue(docValuesFormat instanceof KNN80DocValuesFormat); DocValuesProducer producer = docValuesFormat.fieldsProducer(state); assertTrue(producer instanceof KNN80DocValuesProducer); - int pathSize = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size(); - assertEquals(pathSize, 1); + int cacheKeySize = ((KNN80DocValuesProducer) producer).getCacheKeys().size(); + assertEquals(cacheKeySize, 1); - String path = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().get(0); - assertTrue(path.contains(segmentFiles.get(0))); + String cacheKey = ((KNN80DocValuesProducer) producer).getCacheKeys().get(0); + assertTrue(cacheKey.contains(segmentFiles.get(0))); } public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipThoseField() throws IOException { @@ -178,7 +178,7 @@ public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipTho assertTrue(docValuesFormat instanceof KNN80DocValuesFormat); DocValuesProducer producer = docValuesFormat.fieldsProducer(state); assertTrue(producer instanceof KNN80DocValuesProducer); - assertEquals(0, ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size()); + assertEquals(0, ((KNN80DocValuesProducer) producer).getCacheKeys().size()); } }