Skip to content

Commit

Permalink
Speed up writeGroupVInts (#13203)
Browse files Browse the repository at this point in the history
  • Loading branch information
easyice committed Mar 26, 2024
1 parent 668303c commit e177b28
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 28 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ Optimizations

* GITHUB#13121: Speedup multi-segment HNSW graph search for diversifying child kNN queries. Builds on GITHUB#12962.
(Ben Trent)

* GITHUB#13184: Make the HitQueue size more appropriate for KNN exact search (Pan Guixin)

* GITHUB#13199: Speed up dynamic pruning by breaking point estimation when threshold get exceeded. (Guo Feng)

* GITHUB#13203: Speed up writeGroupVInts (Zhang Chao)

Bug Fixes
---------------------

Expand Down
33 changes: 5 additions & 28 deletions lucene/core/src/java/org/apache/lucene/store/DataOutput.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import java.util.Set;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.GroupVIntUtil;

/**
* Abstract base class for performing write operations of Lucene's low-level data types.
Expand All @@ -30,7 +30,7 @@
* internal state like file position).
*/
public abstract class DataOutput {
private final BytesRefBuilder groupVIntBytes = new BytesRefBuilder();
private byte[] groupVIntBytes;

/**
* Writes a single byte.
Expand Down Expand Up @@ -335,32 +335,9 @@ public void writeSetOfStrings(Set<String> set) throws IOException {
* @lucene.experimental
*/
public void writeGroupVInts(long[] values, int limit) throws IOException {
int off = 0;

// encode each group
while ((limit - off) >= 4) {
byte flag = 0;
groupVIntBytes.setLength(1);
flag |= (encodeGroupValue(Math.toIntExact(values[off++])) - 1) << 6;
flag |= (encodeGroupValue(Math.toIntExact(values[off++])) - 1) << 4;
flag |= (encodeGroupValue(Math.toIntExact(values[off++])) - 1) << 2;
flag |= (encodeGroupValue(Math.toIntExact(values[off++])) - 1);
groupVIntBytes.setByteAt(0, flag);
writeBytes(groupVIntBytes.bytes(), groupVIntBytes.length());
}

// tail vints
for (; off < limit; off++) {
writeVInt(Math.toIntExact(values[off]));
if (groupVIntBytes == null) {
groupVIntBytes = new byte[GroupVIntUtil.MAX_LENGTH_PER_GROUP];
}
}

private int encodeGroupValue(int v) {
int lastOff = groupVIntBytes.length();
do {
groupVIntBytes.append((byte) (v & 0xFF));
v >>>= 8;
} while (v != 0);
return groupVIntBytes.length() - lastOff;
GroupVIntUtil.writeGroupVInts(this, groupVIntBytes, values, limit);
}
}
41 changes: 41 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;

/**
* This class contains utility methods and constants for group varint
Expand Down Expand Up @@ -111,4 +112,44 @@ public static int readGroupVInt(
pos += 1 + n4Minus1;
return (int) (pos - posStart);
}

private static int numBytes(int v) {
// | 1 to return 1 when v = 0
return Integer.BYTES - (Integer.numberOfLeadingZeros(v | 1) >> 3);
}

/**
* The implementation for group-varint encoding, It uses a maximum of {@link
* #MAX_LENGTH_PER_GROUP} bytes scratch buffer.
*/
public static void writeGroupVInts(DataOutput out, byte[] scratch, long[] values, int limit)
throws IOException {
int readPos = 0;

// encode each group
while ((limit - readPos) >= 4) {
int writePos = 0;
final int n1Minus1 = numBytes(Math.toIntExact(values[readPos])) - 1;
final int n2Minus1 = numBytes(Math.toIntExact(values[readPos + 1])) - 1;
final int n3Minus1 = numBytes(Math.toIntExact(values[readPos + 2])) - 1;
final int n4Minus1 = numBytes(Math.toIntExact(values[readPos + 3])) - 1;
int flag = (n1Minus1 << 6) | (n2Minus1 << 4) | (n3Minus1 << 2) | (n4Minus1);
scratch[writePos++] = (byte) flag;
BitUtil.VH_LE_INT.set(scratch, writePos, Math.toIntExact(values[readPos++]));
writePos += n1Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, Math.toIntExact(values[readPos++]));
writePos += n2Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, Math.toIntExact(values[readPos++]));
writePos += n3Minus1 + 1;
BitUtil.VH_LE_INT.set(scratch, writePos, Math.toIntExact(values[readPos++]));
writePos += n4Minus1 + 1;

out.writeBytes(scratch, writePos);
}

// tail vints
for (; readPos < limit; readPos++) {
out.writeVInt(Math.toIntExact(values[readPos]));
}
}
}

0 comments on commit e177b28

Please sign in to comment.