Skip to content

Commit

Permalink
Avoid interface calls in hot loop
Browse files Browse the repository at this point in the history
Before, every charAt would emit (on android):
```
    0x00002104    adrp x17, #+0x1000 (addr 0x3000)
    0x00002108    ldr w17, [x17, #20]
    0x0000210c    ldr x0, [x0, #128]
    0x00002110    ldr x0, [x0, #328]
    0x00002114    ldr lr, [x0, #24]
    0x00002118    blr lr <-- Call into String.charAt(int)
```
Now, it emits the inlined implementation of charAt (branch is for possibly compressed strings):
```
    0x000020b4    ldur w16, [x4, #-8]
    0x000020b8    tbnz w16, #0, #+0xc (addr 0x20c4)
    0x000020bc    ldrb w4, [x4, x0]
    0x000020c0    b #+0x8 (addr 0x20c8)
    0x000020c4    ldrh w4, [x4, x0, lsl #1]
```

PiperOrigin-RevId: 591147406
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 15, 2023
1 parent 220415d commit b10d3f9
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 21 deletions.
40 changes: 20 additions & 20 deletions java/core/src/main/java/com/google/protobuf/Utf8.java
Original file line number Diff line number Diff line change
Expand Up @@ -214,24 +214,24 @@ static class UnpairedSurrogateException extends IllegalArgumentException {
* @throws IllegalArgumentException if {@code sequence} contains ill-formed UTF-16 (unpaired
* surrogates)
*/
static int encodedLength(CharSequence sequence) {
static int encodedLength(String string) {
// Warning to maintainers: this implementation is highly optimized.
int utf16Length = sequence.length();
int utf16Length = string.length();
int utf8Length = utf16Length;
int i = 0;

// This loop optimizes for pure ASCII.
while (i < utf16Length && sequence.charAt(i) < 0x80) {
while (i < utf16Length && string.charAt(i) < 0x80) {
i++;
}

// This loop optimizes for chars less than 0x800.
for (; i < utf16Length; i++) {
char c = sequence.charAt(i);
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += ((0x7f - c) >>> 31); // branch free!
} else {
utf8Length += encodedLengthGeneral(sequence, i);
utf8Length += encodedLengthGeneral(string, i);
break;
}
}
Expand All @@ -244,19 +244,19 @@ static int encodedLength(CharSequence sequence) {
return utf8Length;
}

private static int encodedLengthGeneral(CharSequence sequence, int start) {
int utf16Length = sequence.length();
private static int encodedLengthGeneral(String string, int start) {
int utf16Length = string.length();
int utf8Length = 0;
for (int i = start; i < utf16Length; i++) {
char c = sequence.charAt(i);
char c = string.charAt(i);
if (c < 0x800) {
utf8Length += (0x7f - c) >>> 31; // branch free!
} else {
utf8Length += 2;
// jdk7+: if (Character.isSurrogate(c)) {
if (Character.MIN_SURROGATE <= c && c <= Character.MAX_SURROGATE) {
// Check that we have a well-formed surrogate pair.
int cp = Character.codePointAt(sequence, i);
int cp = Character.codePointAt(string, i);
if (cp < MIN_SUPPLEMENTARY_CODE_POINT) {
throw new UnpairedSurrogateException(i, utf16Length);
}
Expand All @@ -267,7 +267,7 @@ private static int encodedLengthGeneral(CharSequence sequence, int start) {
return utf8Length;
}

static int encode(CharSequence in, byte[] out, int offset, int length) {
static int encode(String in, byte[] out, int offset, int length) {
return processor.encodeUtf8(in, out, offset, length);
}
// End Guava UTF-8 methods.
Expand Down Expand Up @@ -326,9 +326,9 @@ static String decodeUtf8(byte[] bytes, int index, int size)
*
* @param in the source string to be encoded
* @param out the target buffer to receive the encoded string.
* @see Utf8#encode(CharSequence, byte[], int, int)
* @see Utf8#encode(String, byte[], int, int)
*/
static void encodeUtf8(CharSequence in, ByteBuffer out) {
static void encodeUtf8(String in, ByteBuffer out) {
processor.encodeUtf8(in, out);
}

Expand Down Expand Up @@ -724,7 +724,7 @@ final String decodeUtf8Default(ByteBuffer buffer, int index, int size)
* {@code bytes.length - offset}
* @return the new offset, equivalent to {@code offset + Utf8.encodedLength(sequence)}
*/
abstract int encodeUtf8(CharSequence in, byte[] out, int offset, int length);
abstract int encodeUtf8(String in, byte[] out, int offset, int length);

/**
* Encodes an input character sequence ({@code in}) to UTF-8 in the target buffer ({@code out}).
Expand All @@ -743,7 +743,7 @@ final String decodeUtf8Default(ByteBuffer buffer, int index, int size)
* @throws ArrayIndexOutOfBoundsException if {@code in} encoded in UTF-8 is longer than {@code
* out.remaining()}
*/
final void encodeUtf8(CharSequence in, ByteBuffer out) {
final void encodeUtf8(String in, ByteBuffer out) {
if (out.hasArray()) {
final int offset = out.arrayOffset();
int endIndex = Utf8.encode(in, out.array(), offset + out.position(), out.remaining());
Expand All @@ -756,13 +756,13 @@ final void encodeUtf8(CharSequence in, ByteBuffer out) {
}

/** Encodes the input character sequence to a direct {@link ByteBuffer} instance. */
abstract void encodeUtf8Direct(CharSequence in, ByteBuffer out);
abstract void encodeUtf8Direct(String in, ByteBuffer out);

/**
* Encodes the input character sequence to a {@link ByteBuffer} instance using the {@link
* ByteBuffer} API, rather than potentially faster approaches.
*/
final void encodeUtf8Default(CharSequence in, ByteBuffer out) {
final void encodeUtf8Default(String in, ByteBuffer out) {
final int inLength = in.length();
int outIx = out.position();
int inIx = 0;
Expand Down Expand Up @@ -1013,7 +1013,7 @@ String decodeUtf8Direct(ByteBuffer buffer, int index, int size)
}

@Override
int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
int encodeUtf8(String in, byte[] out, int offset, int length) {
int utf16Length = in.length();
int j = offset;
int i = 0;
Expand Down Expand Up @@ -1065,7 +1065,7 @@ int encodeUtf8(CharSequence in, byte[] out, int offset, int length) {
}

@Override
void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
void encodeUtf8Direct(String in, ByteBuffer out) {
// For safe processing, we have to use the ByteBuffer API.
encodeUtf8Default(in, out);
}
Expand Down Expand Up @@ -1442,7 +1442,7 @@ String decodeUtf8Direct(ByteBuffer buffer, int index, int size)
}

@Override
int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final int length) {
int encodeUtf8(final String in, final byte[] out, final int offset, final int length) {
long outIx = offset;
final long outLimit = outIx + length;
final int inLimit = in.length();
Expand Down Expand Up @@ -1503,7 +1503,7 @@ int encodeUtf8(final CharSequence in, final byte[] out, final int offset, final
}

@Override
void encodeUtf8Direct(CharSequence in, ByteBuffer out) {
void encodeUtf8Direct(String in, ByteBuffer out) {
final long address = addressOffset(out);
long outIx = address + out.position();
final long outLimit = address + out.limit();
Expand Down
2 changes: 1 addition & 1 deletion java/core/src/test/java/com/google/protobuf/Utf8Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ private void assertEncoding_insufficientSpace(String message) {

private static byte[] encodeToByteArray(String message, int length, Utf8.Processor processor) {
byte[] output = new byte[length];
processor.encodeUtf8(message, output, 0, output.length);
int unused = processor.encodeUtf8(message, output, 0, output.length);
return output;
}

Expand Down

0 comments on commit b10d3f9

Please sign in to comment.