Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New APIs: ByteString.toIndex() and ByteString.toFraction() #729

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions okio/src/commonMain/kotlin/okio/ByteString.kt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,98 @@ internal constructor(data: ByteArray) : Comparable<ByteString> {

override fun compareTo(other: ByteString): Int

/**
* Projects this value to the range `[0..size)` using linear interpolation. This is equivalent to
* a sorted partitioning of all possible byte strings across [size] equally-sized buckets and
* returning the index of the bucket that this byte string fits in.
*
* For example, the byte string `8000` is the median of all 2-element byte strings, and calling
* `toIndex(100)` on it returns 50. Some other examples:
*
* | Byte String (hex) | `toIndex(100)` | `toIndex(256)` | `toIndex(Int.MAX_VALUE)` |
* | :----------------- | -------------: | -------------: | -----------------------: |
* | (empty) | 0 | 0 | 0 |
* | 00 | 0 | 0 | 0 |
* | 0000 | 0 | 0 | 0 |
* | 000000 | 0 | 0 | 0 |
* | 0000000001 | 0 | 0 | 0 |
* | 00000001 | 0 | 0 | 0 |
* | 00000002 | 0 | 0 | 0 |
* | 00000003 | 0 | 0 | 1 |
* | 01 | 0 | 1 | 8388607 |
* | 02 | 0 | 2 | 16777215 |
* | 03 | 1 | 3 | 25165823 |
* | 80 | 50 | 128 | 1073741823 |
* | 8000 | 50 | 128 | 1073741823 |
* | 80000000 | 50 | 128 | 1073741823 |
* | 81 | 50 | 129 | 1082130431 |
* | 81ffffff | 50 | 129 | 1090519038 |
* | 82 | 50 | 130 | 1090519039 |
* | 83 | 51 | 131 | 1098907647 |
* | ff | 99 | 255 | 2139095039 |
* | ffff | 99 | 255 | 2147450879 |
* | ffffffff | 99 | 255 | 2147483646 |
* | ffffffffffff | 99 | 255 | 2147483646 |
*
* This interprets the bytes in this byte string as **unsigned**. This behavior is consistent with
* [compareTo]. The returned value is also consistent with [compareTo] though the dynamic range
* is compressed. For two byte strings `a` and `b`, if `a < b`, then
* `a.toIndex(n) <= b.toIndex(n)` for all sizes `n`.
*
* This examines at most the first 4 bytes of this byte string. Data beyond the first 4 bytes is
* not used to compute the result.
*
* @param size a positive integer.
* @return a value that is greater than or equal to `0` and less than [size].
*/
fun toIndex(size: Int): Int

/**
* Projects this value to the range `[0.0..1.0)` using linear interpolation. This is equivalent to
* sorting all possible byte strings and returning the fraction that precede this byte string.
*
* For example, the byte string `8000` is the median of all 2-element byte strings, and calling
* `toFraction()` on it returns 0.5. Some other examples:
*
* | Byte String (hex) | `toFraction()` |
* | :----------------- | :----------------- |
* | (empty) | 0.0 |
* | 00 | 0.0 |
* | 0000 | 0.0 |
* | 000000 | 0.0 |
* | 00000000000001 | 0.0 |
* | 00000000000007 | 0.0 |
* | 00000000000008 | 0.0000000000000001 |
* | 0000000001 | 0.0000000000009094 |
* | 00000001 | 0.0000000002328306 |
* | 01 | 0.00390625 |
* | 02 | 0.0078125 |
* | 03 | 0.01171875 |
* | 80 | 0.5 |
* | 8000 | 0.5 |
* | 80000000000000 | 0.5 |
* | 81 | 0.50390625 |
* | 81ffffff | 0.5078124997671694 |
* | 82 | 0.5078125 |
* | 83 | 0.51171875 |
* | ff | 0.99609375 |
* | ffff | 0.9999847412109375 |
* | ffffffff | 0.9999999997671694 |
* | ffffffffffff | 0.9999999999999964 |
* | ffffffffffffff | 0.9999999999999999 |
*
* This interprets the bytes in this byte string as **unsigned**. This behavior is consistent with
* [compareTo]. The returned value is also consistent with [compareTo] though the dynamic range
* is compressed. For two byte strings `a` and `b`, if `a < b`, then
* `a.toFraction() <= b.toFraction()`.
*
* This examines at most the first 7 bytes of this byte string. Data beyond the first 7 bytes is
* not used to compute the result.
*
* @return a value that is greater than or equal to `0.0` and less than `1.0`.
*/
fun toFraction(): Double
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great comments!


/**
* Returns a human-readable string that describes the contents of this byte string. Typically this
* is a string like `[text=Hello]` or `[hex=0000ffff]`.
Expand Down
32 changes: 32 additions & 0 deletions okio/src/commonMain/kotlin/okio/internal/ByteString.kt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import okio.isIsoControl
import okio.processUtf8CodePoints
import okio.shr
import okio.toUtf8String
import kotlin.math.min

// TODO Kotlin's expect classes can't have default implementations, so platform implementations
// have to call these functions. Remove all this nonsense when expect class allow actual code.
Expand Down Expand Up @@ -248,6 +249,37 @@ internal inline fun ByteString.commonCompareTo(other: ByteString): Int {
return if (sizeA < sizeB) -1 else 1
}

@Suppress("NOTHING_TO_INLINE")
internal inline fun ByteString.commonToIndex(size: Int): Int {
require(size > 0)
var numerator = 0L
var denominator = 1L
for (i in 0 until min(4, this.size)) {
numerator = (numerator shl 8) + (get(i) and 0xff)
denominator = (denominator shl 8)
}
return (size * numerator / denominator).toInt()
}

@Suppress("NOTHING_TO_INLINE")
internal inline fun ByteString.commonToFraction(): Double {
var numerator = 0L
var denominator = 1L
for (i in 0 until min(7, size)) {
numerator = (numerator shl 8) + (get(i) and 0xff)
denominator = (denominator shl 8)
}

// Double wants 53 bits of precision but we have 56. Discard 3 bits of precision. Without this
// it's possible that this method returns 1.0 for byte strings like "ffffffffffffff".
if (size >= 7) {
numerator = numerator shr 3
denominator = denominator shr 3
}

return numerator.toDouble() / denominator
}

@Suppress("NOTHING_TO_INLINE")
internal inline fun commonOf(data: ByteArray) = ByteString(data.copyOf())

Expand Down
68 changes: 68 additions & 0 deletions okio/src/commonTest/kotlin/okio/ByteStringTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -457,4 +457,72 @@ abstract class AbstractByteStringTest internal constructor(
sortedByteStrings.sort()
assertEquals(originalByteStrings, sortedByteStrings)
}

@Test fun toIndex() {
assertEquals(0, factory.decodeHex("").toIndex(1))
assertEquals(0, factory.decodeHex("00").toIndex(1))
assertEquals(0, factory.decodeHex("ff").toIndex(1))
assertEquals(0, factory.decodeHex("ffffffff").toIndex(1))
assertEquals(0, factory.decodeHex("ffffffffffff").toIndex(1))

assertEquals(0, factory.decodeHex("").toIndex(100))
assertEquals(0, factory.decodeHex("00").toIndex(100))
assertEquals(10, factory.decodeHex("1a").toIndex(100))
assertEquals(25, factory.decodeHex("40").toIndex(100))
assertEquals(50, factory.decodeHex("80").toIndex(100))
assertEquals(75, factory.decodeHex("c0").toIndex(100))
assertEquals(99, factory.decodeHex("ff").toIndex(100))
assertEquals(99, factory.decodeHex("ffff").toIndex(100))
assertEquals(99, factory.decodeHex("ffffff").toIndex(100))
assertEquals(99, factory.decodeHex("ffffffff").toIndex(100))

assertEquals(0, factory.decodeHex("").toIndex(Int.MAX_VALUE))
assertEquals(0x7f7fffff, factory.decodeHex("ff").toIndex(Int.MAX_VALUE))
assertEquals(0x7fff7fff, factory.decodeHex("ffff").toIndex(Int.MAX_VALUE))
assertEquals(0x7fffff7f, factory.decodeHex("ffffff").toIndex(Int.MAX_VALUE))
assertEquals(0x7ffffffe, factory.decodeHex("ffffffff").toIndex(Int.MAX_VALUE))
}

/**
* Our math is incorrect for values that round differently depending on data beyond the first 4
* bytes. For example, "aaaaaaaaab".toIndex(3) is 1, but if we did arbitrary-precision math the
* result would be 2.
*/
@Test fun toIndexHonorsFirstFourBytesOnly() {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of everything, this part hurts me the most

assertEquals(2, factory.decodeHex("aaaaab").toIndex(3))
assertEquals(2, factory.decodeHex("aaaaaaab").toIndex(3))

// Note: wrong due to truncation!
assertEquals(1, factory.decodeHex("aaaaaaaaab").toIndex(3))
assertEquals(1, factory.decodeHex("aaaaaaaaaaab").toIndex(3))
assertEquals(1, factory.decodeHex("aaaaaaaaaaaaab").toIndex(3))
}

@Test fun toFraction() {
assertEquals(0.0, factory.decodeHex("").toFraction())
assertEquals(0.0, factory.decodeHex("00").toFraction())
assertEquals(0.0, factory.decodeHex("00").toFraction())
assertEquals(0.1015625, factory.decodeHex("1a").toFraction())
assertEquals(0.25, factory.decodeHex("40").toFraction())
assertEquals(0.5, factory.decodeHex("80").toFraction())
assertEquals(0.75, factory.decodeHex("c0").toFraction())
assertEquals(0.7929493631236255, factory.decodeHex("cafebabe").toFraction())
assertEquals(0.99609375, factory.decodeHex("ff").toFraction())
assertEquals(0.9999847412109375, factory.decodeHex("ffff").toFraction())
assertEquals(0.9999999403953552, factory.decodeHex("ffffff").toFraction())
assertEquals(0.9999999997671694, factory.decodeHex("ffffffff").toFraction())
assertEquals(0.9999999999999964, factory.decodeHex("ffffffffffff").toFraction())
assertEquals(0.9999999999999999, factory.decodeHex("ffffffffffffff").toFraction())
assertEquals(0.9999999999999999, factory.decodeHex("ffffffffffffffff").toFraction())
}

/** Only 5 bits of the 7th byte are used. We use 53 bits in total for IEEE 754 doubles. */
@Test fun toFractionLast5BitsOf7thByte() {
assertEquals(0.0000000000000000, factory.decodeHex("00000000000007").toFraction())
assertEquals(1.1102230246251565E-16, factory.decodeHex("00000000000008").toFraction())
assertEquals(1.1102230246251565E-16, factory.decodeHex("0000000000000f").toFraction())
assertEquals(2.220446049250313E-16, factory.decodeHex("00000000000010").toFraction())
assertEquals(0.9999999999999998, factory.decodeHex("fffffffffffff0").toFraction())
assertEquals(0.9999999999999999, factory.decodeHex("fffffffffffff8").toFraction())
}
}
6 changes: 6 additions & 0 deletions okio/src/jsMain/kotlin/okio/ByteString.kt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import okio.internal.commonToAsciiLowercase
import okio.internal.commonToAsciiUppercase
import okio.internal.commonToByteArray
import okio.internal.commonToByteString
import okio.internal.commonToFraction
import okio.internal.commonToIndex
import okio.internal.commonToString
import okio.internal.commonUtf8
import okio.internal.commonWrite
Expand Down Expand Up @@ -119,6 +121,10 @@ internal actual constructor(

actual override fun compareTo(other: ByteString) = commonCompareTo(other)

actual fun toIndex(size: Int) = commonToIndex(size)

actual fun toFraction() = commonToFraction()

/**
* Returns a human-readable string that describes the contents of this byte string. Typically this
* is a string like `[text=Hello]` or `[hex=0000ffff]`.
Expand Down
6 changes: 6 additions & 0 deletions okio/src/jvmMain/kotlin/okio/ByteString.kt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ import okio.internal.commonToAsciiLowercase
import okio.internal.commonToAsciiUppercase
import okio.internal.commonToByteArray
import okio.internal.commonToByteString
import okio.internal.commonToFraction
import okio.internal.commonToIndex
import okio.internal.commonToString
import okio.internal.commonUtf8
import okio.internal.commonWrite
Expand Down Expand Up @@ -181,6 +183,10 @@ internal actual constructor(

actual override fun compareTo(other: ByteString) = commonCompareTo(other)

actual fun toIndex(size: Int) = commonToIndex(size)

actual fun toFraction() = commonToFraction()

actual override fun toString() = commonToString()

@Throws(IOException::class)
Expand Down
6 changes: 6 additions & 0 deletions okio/src/nativeMain/kotlin/okio/ByteString.kt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ import okio.internal.commonToAsciiLowercase
import okio.internal.commonToAsciiUppercase
import okio.internal.commonToByteArray
import okio.internal.commonToByteString
import okio.internal.commonToFraction
import okio.internal.commonToIndex
import okio.internal.commonToString
import okio.internal.commonUtf8
import okio.internal.commonWrite
Expand Down Expand Up @@ -125,6 +127,10 @@ internal actual constructor(

actual override fun compareTo(other: ByteString) = commonCompareTo(other)

actual fun toIndex(size: Int) = commonToIndex(size)

actual fun toFraction() = commonToFraction()

/**
* Returns a human-readable string that describes the contents of this byte string. Typically this
* is a string like `[text=Hello]` or `[hex=0000ffff]`.
Expand Down