From 005ee8069dd69169e5c1842f0849d90373910b18 Mon Sep 17 00:00:00 2001 From: belugabehr <12578579+belugabehr@users.noreply.github.com> Date: Tue, 24 Sep 2024 16:14:30 -0400 Subject: [PATCH] AVRO-4045: Use JDK compare for byte arrays (#3126) --- .../org/apache/avro/generic/GenericData.java | 2 +- .../java/org/apache/avro/io/BinaryData.java | 28 ++++++++----------- .../org/apache/avro/reflect/ReflectData.java | 3 +- .../apache/avro/specific/SpecificFixed.java | 3 +- .../main/java/org/apache/avro/util/Utf8.java | 3 +- 5 files changed, 15 insertions(+), 24 deletions(-) diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java index 6db0a40eee6..362ebdc9cfc 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java @@ -545,7 +545,7 @@ public String toString() { @Override public int compareTo(Fixed that) { - return BinaryData.compareBytes(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); + return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java index f925bcd9698..e6fc7086eac 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryData.java @@ -18,6 +18,7 @@ package org.apache.avro.io; import java.io.IOException; +import java.util.Arrays; import org.apache.avro.Schema; import org.apache.avro.Schema.Field; @@ -155,7 +156,8 @@ private static int compare(Decoders d, Schema schema) throws IOException { } case FIXED: { int size = schema.getFixedSize(); - int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(), d.d2.getPos(), size); + int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() + size, d.d2.getBuf(), d.d2.getPos(), + d.d2.getPos() + size); d.d1.skipFixed(size); d.d2.skipFixed(size); return c; @@ -164,7 +166,8 @@ private static int compare(Decoders d, Schema schema) throws IOException { case BYTES: { int l1 = d1.readInt(); int l2 = d2.readInt(); - int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(), d.d2.getPos(), l2); + int c = Arrays.compare(d.d1.getBuf(), d.d1.getPos(), d.d1.getPos() + l1, d.d2.getBuf(), d.d2.getPos(), + d.d2.getPos() + l2); d.d1.skipFixed(l1); d.d2.skipFixed(l2); return c; @@ -181,16 +184,7 @@ private static int compare(Decoders d, Schema schema) throws IOException { * return a positive value, if less than return a negative value. */ public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - int end1 = s1 + l1; - int end2 = s2 + l2; - for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) { - int a = (b1[i] & 0xff); - int b = (b2[j] & 0xff); - if (a != b) { - return a - b; - } - } - return l1 - l2; + return Arrays.compare(b1, s1, s1 + l1, b2, s2, s2 + l2); } private static class HashData { @@ -298,7 +292,7 @@ public static int skipLong(final byte[] bytes, int start) { /** * Encode a boolean to the byte array at the given position. Will throw * IndexOutOfBounds if the position is not valid. - * + * * @return The number of bytes written to the buffer, 1. */ public static int encodeBoolean(boolean b, byte[] buf, int pos) { @@ -310,7 +304,7 @@ public static int encodeBoolean(boolean b, byte[] buf, int pos) { * Encode an integer to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 5 bytes left in the buffer before calling this method. - * + * * @return The number of bytes written to the buffer, between 1 and 5. */ public static int encodeInt(int n, byte[] buf, int pos) { @@ -341,7 +335,7 @@ public static int encodeInt(int n, byte[] buf, int pos) { * Encode a long to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 10 bytes left in the buffer before calling this method. - * + * * @return The number of bytes written to the buffer, between 1 and 10. */ public static int encodeLong(long n, byte[] buf, int pos) { @@ -392,7 +386,7 @@ public static int encodeLong(long n, byte[] buf, int pos) { * Encode a float to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 4 bytes left in the buffer before calling this method. - * + * * @return Returns the number of bytes written to the buffer, 4. */ public static int encodeFloat(float f, byte[] buf, int pos) { @@ -408,7 +402,7 @@ public static int encodeFloat(float f, byte[] buf, int pos) { * Encode a double to the byte array at the given position. Will throw * IndexOutOfBounds if it overflows. Users should ensure that there are at least * 8 bytes left in the buffer before calling this method. - * + * * @return Returns the number of bytes written to the buffer, 8. */ public static int encodeDouble(double d, byte[] buf, int pos) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index 95fc9598c90..ab702a60e87 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -30,7 +30,6 @@ import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; -import org.apache.avro.io.BinaryData; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; import org.apache.avro.specific.FixedSize; @@ -986,7 +985,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) { break; byte[] b1 = (byte[]) o1; byte[] b2 = (byte[]) o2; - return BinaryData.compareBytes(b1, 0, b1.length, b2, 0, b2.length); + return Arrays.compare(b1, 0, b1.length, b2, 0, b2.length); } return super.compare(o1, o2, s, equals); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java index 9984acb5689..98a553eecbf 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificFixed.java @@ -24,7 +24,6 @@ import java.util.Arrays; import org.apache.avro.Schema; import org.apache.avro.generic.GenericFixed; -import org.apache.avro.io.BinaryData; /** Base class for generated fixed-sized data classes. */ public abstract class SpecificFixed implements GenericFixed, Comparable, Externalizable { @@ -70,7 +69,7 @@ public String toString() { @Override public int compareTo(SpecificFixed that) { - return BinaryData.compareBytes(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); + return Arrays.compare(this.bytes, 0, this.bytes.length, that.bytes, 0, that.bytes.length); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index 6de9ebb22c5..b38d237f212 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -25,7 +25,6 @@ import java.util.Arrays; import org.apache.avro.SystemLimitException; -import org.apache.avro.io.BinaryData; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -185,7 +184,7 @@ public int hashCode() { @Override public int compareTo(Utf8 that) { - return BinaryData.compareBytes(this.bytes, 0, this.length, that.bytes, 0, that.length); + return Arrays.compare(this.bytes, 0, this.length, that.bytes, 0, that.length); } // CharSequence implementation