Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP do not merge #13577

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ public void seek(int level, int targetOrd) throws IOException {
// unsafe; no bounds checking
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
arcCount = dataIn.readVInt();
assert arcCount <= currentNeighborsBuffer.length : "too many neighbors: " + arcCount;
if (arcCount > 0) {
currentNeighborsBuffer[0] = dataIn.readVInt();
for (int i = 1; i < arcCount; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ public T copyValue(T vectorValue) {
throw new UnsupportedOperationException();
}

OnHeapHnswGraph getGraph() {
OnHeapHnswGraph getGraph() throws IOException {
assert flatFieldVectorsWriter.isFinished();
if (node > 0) {
return hnswGraphBuilder.getCompletedGraph();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ public interface HnswBuilder {
* components, re-ordering node ids for better delta compression) may be triggered, so callers
* should expect this call to take some time.
*/
OnHeapHnswGraph getCompletedGraph();
OnHeapHnswGraph getCompletedGraph() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ public OnHeapHnswGraph build(int maxOrd) throws IOException {
});
}
taskExecutor.invokeAll(futures);
finish();
frozen = true;
return workers[0].getCompletedGraph();
}
Expand All @@ -106,11 +107,19 @@ public void setInfoStream(InfoStream infoStream) {
}

@Override
public OnHeapHnswGraph getCompletedGraph() {
frozen = true;
public OnHeapHnswGraph getCompletedGraph() throws IOException {
if (frozen == false) {
// should already have been called in build(), but just in case
finish();
frozen = true;
}
return getGraph();
}

private void finish() throws IOException {
workers[0].finish();
}

@Override
public OnHeapHnswGraph getGraph() {
return workers[0].getGraph();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import static java.lang.Math.log;

import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.SplittableRandom;
Expand All @@ -28,6 +30,7 @@
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.hnsw.HnswUtil.Component;

/**
* Builder for HNSW graph. See {@link HnswGraph} for a gloss on the algorithm and the meaning of the
Expand Down Expand Up @@ -134,7 +137,7 @@ protected HnswGraphBuilder(
HnswGraphSearcher graphSearcher)
throws IOException {
if (M <= 0) {
throw new IllegalArgumentException("maxConn must be positive");
throw new IllegalArgumentException("M (max connections) must be positive");
}
if (beamWidth <= 0) {
throw new IllegalArgumentException("beamWidth must be positive");
Expand Down Expand Up @@ -169,8 +172,11 @@ public void setInfoStream(InfoStream infoStream) {
}

@Override
public OnHeapHnswGraph getCompletedGraph() {
frozen = true;
public OnHeapHnswGraph getCompletedGraph() throws IOException {
if (!frozen) {
finish();
frozen = true;
}
return getGraph();
}

Expand Down Expand Up @@ -399,6 +405,79 @@ private static int getRandomGraphLevel(double ml, SplittableRandom random) {
return ((int) (-log(randDouble) * ml));
}

void finish() throws IOException {
connectComponents();
}

private void connectComponents() throws IOException {
long start = System.nanoTime();
for (int level = 0; level < hnsw.numLevels(); level++) {
if (connectComponents(level) == false) {
if (infoStream.isEnabled(HNSW_COMPONENT)) {
infoStream.message(HNSW_COMPONENT, "connectComponents failed on level " + level);
}
}
}
if (infoStream.isEnabled(HNSW_COMPONENT)) {
infoStream.message(
HNSW_COMPONENT, "connectComponents " + (System.nanoTime() - start) / 1_000_000 + " ms");
}
}

private boolean connectComponents(int level) throws IOException {
List<Component> components = HnswUtil.components(hnsw, level, true);
boolean result = true;
if (components.size() > 1) {
// connect other components to the largest one
Component c0 = components.stream().max(Comparator.comparingInt(Component::size)).get();
// try even harder to find connections by using twice the beam width
GraphBuilderKnnCollector beam = new GraphBuilderKnnCollector(beamCandidates.k * 2);
int[] eps = new int[1];
for (Component c : components) {
if (c != c0) {
beam.clear();
eps[0] = c0.start();
RandomVectorScorer scorer = scorerSupplier.scorer(c.start());
// find the closest node in the largest component to the lowest-numbered node in this
// component
graphSearcher.searchLevel(beam, scorer, 0, eps, hnsw, null);
boolean linked = false;
while (beam.size() > 0) {
float score = beam.minimumScore();
int c0node = beam.popNode();
// link the nodes, best effort only as they may be full
if (tryLink(c0node, c.start(), score)) {
linked = true;
break;
}
}
if (!linked) {
result = false;
// TODO: maybe try some more aggressive measures to ensure linkage
// eg we can try pruning to make room, but if we do that we don't want to
// create more islands. Or we can consider making the NeighborArray bigger, maybe.
}
}
}
}
return result;
}

// Try to link two nodes bidirectionally, return false (and do not link either way) if
// either node has the max number of connections already.
private boolean tryLink(int n0, int n1, float score) {
NeighborArray nbr0 = hnsw.getNeighbors(0, n0);
NeighborArray nbr1 = hnsw.getNeighbors(0, n1);
// must subtract 1 here since the nodes array is one larger than the configured
// max neighbors (M / 2M).
if (nbr0.size() >= nbr0.nodes().length - 1 || nbr1.size() >= nbr1.nodes().length - 1) {
return false;
}
nbr0.addOutOfOrder(n1, score);
nbr1.addOutOfOrder(n0, score);
return true;
}

/**
* A restricted, specialized knnCollector that can be used when building a graph.
*
Expand Down
Loading
Loading