Skip to content

Commit

Permalink
Reflecting feedbacks (moving logics + new branding) during code review.
Browse files Browse the repository at this point in the history
Signed-off-by: Dooyong Kim <[email protected]>
  • Loading branch information
Dooyong Kim committed Oct 21, 2024
1 parent 88509d4 commit bb677d6
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 59 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
## [Unreleased 3.0](https://github.com/opensearch-project/k-NN/compare/2.x...HEAD)
### Features
### Enhancements
* Remove FSDirectory dependency and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182)
### Bug Fixes
### Infrastructure
* Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921)
Expand All @@ -26,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* KNNIterators should support with and without filters [#2155](https://github.com/opensearch-project/k-NN/pull/2155)
* Adding Support to Enable/Disble Share level Rescoring and Update Oversampling Factor[#2172](https://github.com/opensearch-project/k-NN/pull/2172)
* Add support to build vector data structures greedily and perform exact search when there are no engine files [#1942](https://github.com/opensearch-project/k-NN/issues/1942)
* Remove FSDirectory dependency from native engine constructing side and deprecated FileWatcher [#2182](https://github.com/opensearch-project/k-NN/pull/2182)
### Bug Fixes
* Add DocValuesProducers for releasing memory when close index [#1946](https://github.com/opensearch-project/k-NN/pull/1946)
* KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;

import java.io.IOException;

import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
Expand All @@ -27,39 +29,16 @@
import org.opensearch.knn.index.memory.NativeMemoryCacheManager;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD;

@Log4j2
public class KNN80DocValuesProducer extends DocValuesProducer {
private final DocValuesProducer delegate;
private final Map<String, String> fieldNameToVectorFileName = new HashMap<>();
private List<String> cacheKeys;

public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state) {
this.delegate = delegate;

for (FieldInfo field : state.fieldInfos) {
if (!field.attributes().containsKey(KNN_FIELD)) {
continue;
}
// Only segments that contains BinaryDocValues and doesn't have vector values should be considered.
// By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true
// to create binary doc values explicitly like any other field. Hence, we only want to include fields
// where approximate search is possible only by BinaryDocValues.
if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) {
continue;
}

final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, state.segmentInfo);
if (vectorIndexFileName == null) {
continue;
}
final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, state.segmentInfo);
fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey);
}
this.cacheKeys = getVectorCacheKeysFromSegmentReaderState(state);
}

@Override
Expand Down Expand Up @@ -95,11 +74,34 @@ public void checkIntegrity() throws IOException {
@Override
public void close() throws IOException {
final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance();
fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate);
cacheKeys.forEach(nativeMemoryCacheManager::invalidate);
delegate.close();
}

public final List<String> getOpenedIndexPath() {
return new ArrayList<>(fieldNameToVectorFileName.values());
public final List<String> getCacheKeys() {
return new ArrayList<>(cacheKeys);
}

private static List<String> getVectorCacheKeysFromSegmentReaderState(SegmentReadState segmentReadState) {
final List<String> cacheKeys = new ArrayList<>();

for (FieldInfo field : segmentReadState.fieldInfos) {
// Only segments that contains BinaryDocValues and doesn't have vector values should be considered.
// By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true
// to create binary doc values explicitly like any other field. Hence, we only want to include fields
// where approximate search is possible only by BinaryDocValues.
if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues()) {
continue;
}

final String vectorIndexFileName = KNNCodecUtil.getNativeEngineFileFromFieldInfo(field, segmentReadState.segmentInfo);
if (vectorIndexFileName == null) {
continue;
}
final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo);
cacheKeys.add(cacheKey);
}

return cacheKeys;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,44 +33,27 @@
import org.opensearch.knn.quantization.models.quantizationState.QuantizationStateReadConfig;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.opensearch.knn.index.mapper.KNNVectorFieldMapper.KNN_FIELD;

/**
* Vectors reader class for reading the flat vectors for native engines. The class provides methods for iterating
* over the vectors and retrieving their values.
*/
public class NativeEngines990KnnVectorsReader extends KnnVectorsReader {

private final FlatVectorsReader flatVectorsReader;
private final SegmentReadState segmentReadState;
private Map<String, String> quantizationStateCacheKeyPerField;
private final Map<String, String> fieldNameToVectorFileName = new HashMap<>();
private SegmentReadState segmentReadState;
private final List<String> cacheKeys;

public NativeEngines990KnnVectorsReader(final SegmentReadState state, final FlatVectorsReader flatVectorsReader) {
this.segmentReadState = state;
this.flatVectorsReader = flatVectorsReader;
this.segmentReadState = state;
this.cacheKeys = getVectorCacheKeysFromSegmentReaderState(state);
loadCacheKeyMap();
fillIndexToVectorFileName();
}

private void fillIndexToVectorFileName() {
for (FieldInfo field : segmentReadState.fieldInfos) {
if (!field.attributes().containsKey(KNN_FIELD)) {
continue;
}
if (!field.hasVectorValues()) {
continue;
}
final String vectorIndexFileName = KNNCodecUtil.getEngineFileFromFieldInfo(field, segmentReadState.segmentInfo);
if (vectorIndexFileName == null) {
continue;
}
final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo);
fieldNameToVectorFileName.putIfAbsent(field.getName(), cacheKey);
}
}

/**
Expand Down Expand Up @@ -202,7 +185,7 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits
public void close() throws IOException {
// Clean up allocated vector indices resources from cache.
final NativeMemoryCacheManager nativeMemoryCacheManager = NativeMemoryCacheManager.getInstance();
fieldNameToVectorFileName.values().forEach(nativeMemoryCacheManager::invalidate);
cacheKeys.forEach(nativeMemoryCacheManager::invalidate);

// Close a reader.
IOUtils.close(flatVectorsReader);
Expand Down Expand Up @@ -231,4 +214,19 @@ private void loadCacheKeyMap() {
quantizationStateCacheKeyPerField.put(fieldInfo.getName(), cacheKey);
}
}

private static List<String> getVectorCacheKeysFromSegmentReaderState(SegmentReadState segmentReadState) {
final List<String> cacheKeys = new ArrayList<>();

for (FieldInfo field : segmentReadState.fieldInfos) {
final String vectorIndexFileName = KNNCodecUtil.getNativeEngineFileFromFieldInfo(field, segmentReadState.segmentInfo);
if (vectorIndexFileName == null) {
continue;
}
final String cacheKey = NativeMemoryCacheKeyHelper.constructCacheKey(vectorIndexFileName, segmentReadState.segmentInfo);
cacheKeys.add(cacheKey);
}

return cacheKeys;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public static List<String> getEngineFiles(String extension, String fieldName, Se
* @param segmentInfo : Segment where we are collecting an engine file list.
* @return : Found vector engine names, if not found, returns null.
*/
public static String getEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) {
public static String getNativeEngineFileFromFieldInfo(FieldInfo field, SegmentInfo segmentInfo) {
if (!field.attributes().containsKey(KNN_FIELD)) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ public void testProduceKNNBinaryField_fromCodec_nmslibCurrent() throws IOExcepti
assertTrue(docValuesFormat instanceof KNN80DocValuesFormat);
DocValuesProducer producer = docValuesFormat.fieldsProducer(state);
assertTrue(producer instanceof KNN80DocValuesProducer);
int pathSize = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size();
assertEquals(pathSize, 1);
int cacheKeySize = ((KNN80DocValuesProducer) producer).getCacheKeys().size();
assertEquals(cacheKeySize, 1);

String path = ((KNN80DocValuesProducer) producer).getOpenedIndexPath().get(0);
assertTrue(path.contains(segmentFiles.get(0)));
String cacheKey = ((KNN80DocValuesProducer) producer).getCacheKeys().get(0);
assertTrue(cacheKey.contains(segmentFiles.get(0)));
}

public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipThoseField() throws IOException {
Expand Down Expand Up @@ -178,7 +178,7 @@ public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipTho
assertTrue(docValuesFormat instanceof KNN80DocValuesFormat);
DocValuesProducer producer = docValuesFormat.fieldsProducer(state);
assertTrue(producer instanceof KNN80DocValuesProducer);
assertEquals(0, ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size());
assertEquals(0, ((KNN80DocValuesProducer) producer).getCacheKeys().size());
}

}

0 comments on commit bb677d6

Please sign in to comment.