Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize RLP serialization #765

Open
wants to merge 36 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
4e8e433
add a profiling test
chirag-parmar Nov 21, 2024
59a5f66
basic rlp profiler
chirag-parmar Nov 23, 2024
419f98f
force expansion of memory by reducing allocation size
chirag-parmar Nov 23, 2024
7981555
optimize using two pass
chirag-parmar Nov 23, 2024
b9636aa
fix two pass
chirag-parmar Nov 24, 2024
058dcca
add a chunked buffer implementation
chirag-parmar Dec 1, 2024
d22d9d5
add a chunked buffer implementation
chirag-parmar Dec 1, 2024
51a5bb9
multiple writers
chirag-parmar Dec 5, 2024
028fdb1
add api tests
chirag-parmar Dec 6, 2024
27e3d94
fixes
chirag-parmar Dec 8, 2024
98be45e
fix double stack update problem
chirag-parmar Dec 10, 2024
1b3f494
using a length tracker
chirag-parmar Dec 11, 2024
0bbbab5
remove code redundancy
chirag-parmar Dec 11, 2024
625f377
collect length in a different manner
chirag-parmar Dec 11, 2024
3645271
add all tests
chirag-parmar Dec 11, 2024
35c4438
revert changes
chirag-parmar Dec 11, 2024
9a51c27
fix
chirag-parmar Dec 11, 2024
6cdd48e
hash writer
chirag-parmar Dec 12, 2024
2fd4ba6
fix
chirag-parmar Dec 12, 2024
618ebab
remove unwanted files
chirag-parmar Dec 12, 2024
e476cfc
conditional length writer execution
chirag-parmar Dec 12, 2024
e6dabc8
perf improvements - remove adds
chirag-parmar Dec 14, 2024
2e5f6ca
remove delete operations
chirag-parmar Dec 14, 2024
220cf2e
do not use delete
chirag-parmar Dec 15, 2024
0667c92
lint code
chirag-parmar Dec 15, 2024
8af1539
fix build warnings
chirag-parmar Dec 15, 2024
e59befb
new benchmark contradicting results
chirag-parmar Dec 19, 2024
79e2c9b
use a static buffer for big endian
chirag-parmar Dec 19, 2024
b10b1e1
make lengths a tuple
chirag-parmar Dec 19, 2024
c3716ca
compile time evaluation of types
chirag-parmar Dec 26, 2024
7c37809
static and dynamic pending lists
chirag-parmar Jan 6, 2025
88570b2
return hash32
chirag-parmar Jan 6, 2025
599f89e
add block header hashing to profiler
chirag-parmar Jan 7, 2025
4fc0632
add a more reliable profiler
chirag-parmar Jan 15, 2025
90f68a2
add/update copyright
chirag-parmar Jan 16, 2025
32af6b4
took TOO long for TWO lines of code
chirag-parmar Jan 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,7 @@ build/
*.exe
*.dll
*.generated.nim
nimble.paths
nimble.paths

#OS specific files
**/.DS_Store
1 change: 0 additions & 1 deletion eth.nimble
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ let cfg =
" -d:chronicles_log_level=TRACE" &
" --threads:on -d:release"


proc build(args, path, outdir: string) =
exec nimc & " " & lang & " " & cfg & " " & flags & " " & args &
" --outdir:build/" & outdir & " " & path
Expand Down
6 changes: 3 additions & 3 deletions eth/common/eth_types_rlp.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2024 Status Research & Development GmbH
# Copyright (c) 2022-2025 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
Expand Down Expand Up @@ -29,10 +29,10 @@ proc read*(rlp: var Rlp, T: type BlockHashOrNumber): T =
BlockHashOrNumber(isHash: false, number: rlp.read(BlockNumber))

proc rlpHash*[T](v: T): Hash32 =
keccak256(rlp.encode(v))
Hash32(rlp.encodeHash(v))

proc rlpHash*(tx: PooledTransaction): Hash32 =
keccak256(rlp.encode(tx.tx))
Hash32(rlp.encodeHash(tx.tx))

func blockHash*(h: Header): Hash32 {.inline.} =
rlpHash(h)
113 changes: 113 additions & 0 deletions eth/rlp/default_writer.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# eth
# Copyright (c) 2019-2025 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
std/options,
pkg/results,
stew/[arraybuf, assign2, shims/macros],
./priv/defs,
utils

type
RlpDefaultWriter* = object
pendingLists: seq[tuple[remainingItems, startPos: int]]
output: seq[byte]

func writeCount(writer: var RlpDefaultWriter, count: int, baseMarker: byte) =
if count < THRESHOLD_LIST_LEN:
writer.output.add(baseMarker + byte(count))
else:
let lenPrefixBytes = uint64(count).bytesNeeded

writer.output.add baseMarker + (THRESHOLD_LIST_LEN - 1) +
byte(lenPrefixBytes)

writer.output.setLen(writer.output.len + lenPrefixBytes)
writer.output.writeBigEndian(uint64(count), writer.output.len - 1,
lenPrefixBytes)

proc maybeClosePendingLists(self: var RlpDefaultWriter) =
while self.pendingLists.len > 0:
let lastListIdx = self.pendingLists.len - 1
doAssert self.pendingLists[lastListIdx].remainingItems > 0

self.pendingLists[lastListIdx].remainingItems -= 1
# if one last item is remaining in the list
if self.pendingLists[lastListIdx].remainingItems == 0:
# A list have been just finished. It was started in `startList`.
let listStartPos = self.pendingLists[lastListIdx].startPos
self.pendingLists.setLen lastListIdx

let
listLen = self.output.len - listStartPos
totalPrefixBytes = if listLen < int(THRESHOLD_LIST_LEN): 1
else: int(uint64(listLen).bytesNeeded) + 1

#Shift the written data to make room for the prefix length
self.output.setLen(self.output.len + totalPrefixBytes)

moveMem(addr self.output[listStartPos + totalPrefixBytes],
unsafeAddr self.output[listStartPos],
listLen)

# Write out the prefix length
if listLen < THRESHOLD_LIST_LEN:
self.output[listStartPos] = LIST_START_MARKER + byte(listLen)
else:
let listLenBytes = totalPrefixBytes - 1
self.output[listStartPos] = LEN_PREFIXED_LIST_MARKER +
byte(listLenBytes)

self.output.writeBigEndian(uint64(listLen),
listStartPos + listLenBytes, listLenBytes)
else:
# The currently open list is not finished yet. Nothing to do.
return

func writeInt*(writer: var RlpDefaultWriter, i: SomeUnsignedInt) =
if i == typeof(i)(0):
writer.output.add BLOB_START_MARKER
elif i < typeof(i)(BLOB_START_MARKER):
writer.output.add byte(i)
else:
let bytesNeeded = i.bytesNeeded
writer.writeCount(bytesNeeded, BLOB_START_MARKER)

writer.output.setLen(writer.output.len + bytesNeeded)
writer.output.writeBigEndian(i, writer.output.len - 1, bytesNeeded)
writer.maybeClosePendingLists()

func appendRawBytes*(self: var RlpDefaultWriter, bytes: openArray[byte]) =
self.output.setLen(self.output.len + bytes.len)
assign(self.output.toOpenArray(
self.output.len - bytes.len, self.output.len - 1), bytes)
self.maybeClosePendingLists()

proc writeBlob*(self: var RlpDefaultWriter, bytes: openArray[byte]) =
if bytes.len == 1 and byte(bytes[0]) < BLOB_START_MARKER:
self.output.add byte(bytes[0])
self.maybeClosePendingLists()
else:
self.writeCount(bytes.len, BLOB_START_MARKER)
self.appendRawBytes(bytes)

proc startList*(self: var RlpDefaultWriter, listSize: int) =
if listSize == 0:
self.writeCount(0, LIST_START_MARKER)
self.maybeClosePendingLists()
else:
self.pendingLists.add((listSize, self.output.len))

template finish*(self: RlpDefaultWriter): seq[byte] =
doAssert self.pendingLists.len == 0,
"Insufficient number of elements written to a started list"
self.output

func clear*(w: var RlpDefaultWriter) =
chirag-parmar marked this conversation as resolved.
Show resolved Hide resolved
# Prepare writer for reuse
w.pendingLists.setLen(0)
w.output.setLen(0)
95 changes: 95 additions & 0 deletions eth/rlp/hash_writer.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# eth
# Copyright (c) 2019-2025 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
std/options,
pkg/results,
nimcrypto/keccak,
stew/[arraybuf, shims/macros],
./priv/defs,
utils,
../common/hashes,
length_writer

type
RlpHashWriter* = object
keccak: keccak.keccak256
lengths*: seq[tuple[listLen, prefixLen: int]]
listCount: int
bigEndianBuf: array[8, byte]

template update(writer: var RlpHashWriter, data: byte) =
writer.keccak.update([data])

template update(writer: var RlpHashWriter, data: openArray[byte]) =
writer.keccak.update(data)

template updateBigEndian(writer: var RlpHashWriter, i: SomeUnsignedInt,
length: int) =
writer.bigEndianBuf.writeBigEndian(i, length - 1, length)
writer.update(writer.bigEndianBuf.toOpenArray(0, length - 1))

func writeCount(writer: var RlpHashWriter, count: int, baseMarker: byte) =
if count < THRESHOLD_LIST_LEN:
writer.update(baseMarker + byte(count))
else:
let lenPrefixBytes = uint64(count).bytesNeeded

writer.update baseMarker + (THRESHOLD_LIST_LEN - 1) + byte(lenPrefixBytes)

writer.updateBigEndian(uint64(count), lenPrefixBytes)

func writeInt*(writer: var RlpHashWriter, i: SomeUnsignedInt) =
if i == typeof(i)(0):
writer.update BLOB_START_MARKER
elif i < typeof(i)(BLOB_START_MARKER):
writer.update byte(i)
else:
let bytesNeeded = i.bytesNeeded
writer.writeCount(bytesNeeded, BLOB_START_MARKER)

writer.updateBigEndian(uint64(i), bytesNeeded)

template appendRawBytes*(self: var RlpHashWriter, bytes: openArray[byte]) =
self.update(bytes)

proc writeBlob*(self: var RlpHashWriter, bytes: openArray[byte]) =
if bytes.len == 1 and byte(bytes[0]) < BLOB_START_MARKER:
self.update byte(bytes[0])
else:
self.writeCount(bytes.len, BLOB_START_MARKER)
self.appendRawBytes(bytes)

proc startList*(self: var RlpHashWriter, listSize: int) =
if listSize == 0:
self.writeCount(0, LIST_START_MARKER)
else:
let
prefixLen = self.lengths[self.listCount].prefixLen
listLen = self.lengths[self.listCount].listLen

self.listCount += 1

if listLen < THRESHOLD_LIST_LEN:
self.update(LIST_START_MARKER + byte(listLen))
else:
let listLenBytes = prefixLen - 1
self.update(LEN_PREFIXED_LIST_MARKER + byte(listLenBytes))

self.updateBigEndian(uint64(listLen), listLenBytes)

func initHashWriter*(tracker: var RlpLengthTracker): RlpHashWriter =
result.lengths = move(tracker.lengths)

template finish*(self: var RlpHashWriter): Hash32 =
self.lengths.setLen(0)
self.keccak.finish.to(Hash32)

func clear*(w: var RlpHashWriter) =
chirag-parmar marked this conversation as resolved.
Show resolved Hide resolved
# Prepare writer for reuse
w.lengths.setLen(0)

110 changes: 110 additions & 0 deletions eth/rlp/length_writer.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# eth
# Copyright (c) 2019-2025 Status Research & Development GmbH
# Licensed and distributed under either of
# * MIT license (license terms in the root directory or at https://opensource.org/licenses/MIT).
# * Apache v2 license (license terms in the root directory or at https://www.apache.org/licenses/LICENSE-2.0).
# at your option. This file may not be copied, modified, or distributed except according to those terms.

import
std/options,
pkg/results,
stew/[arraybuf, shims/macros],
./priv/defs,
utils

type
PendingListItem = tuple[idx, remainingItems, startLen: int]

StaticRlpLengthTracker*[N: static int] = object
pendingLists: array[N, PendingListItem]
lengths*: seq[tuple[listLen, prefixLen: int]]
listTop: int
listCount: int
totalLength*: int

DynamicRlpLengthTracker* = object
pendingLists: seq[PendingListItem]
lengths*: seq[tuple[listLen, prefixLen: int]]
listTop: int
listCount: int
totalLength*: int

RlpLengthTracker* = StaticRlpLengthTracker | DynamicRlpLengthTracker

const LIST_LENGTH = 50

proc maybeClosePendingLists(self: var RlpLengthTracker) =
while self.listTop > 0:
self.pendingLists[self.listTop - 1].remainingItems -= 1

if self.pendingLists[self.listTop - 1].remainingItems == 0:
let
listIdx = self.pendingLists[self.listTop - 1].idx
startLen = self.pendingLists[self.listTop - 1].startLen
listLen = self.totalLength - startLen
prefixLen = if listLen < int(THRESHOLD_LIST_LEN): 1
else: int(uint64(listLen).bytesNeeded) + 1

# save the list lengths and prefix lengths
self.lengths[listIdx] = (listLen, prefixLen)

# close the list by deleting
self.listTop -= 1
when self is DynamicRlpLengthTracker:
self.pendingLists.setLen(self.listTop)

self.totalLength += prefixLen
else:
return

func appendRawBytes*(self: var RlpLengthTracker, bytes: openArray[byte]) =
self.totalLength += bytes.len
self.maybeClosePendingLists()

proc startList*(self: var RlpLengthTracker, listSize: int) =
if listSize == 0:
self.totalLength += 1
self.maybeClosePendingLists()
else:
# open a list
when self is DynamicRlpLengthTracker:
self.pendingLists.setLen(self.listTop + 1)
self.pendingLists[self.listTop] = (self.listCount, listSize, self.totalLength)
self.listTop += 1
self.listCount += 1
if self.listCount == self.lengths.len:
self.lengths.setLen(self.lengths.len + LIST_LENGTH)

func lengthCount(count: int): int {.inline.} =
return if count < THRESHOLD_LIST_LEN: 1
else: uint64(count).bytesNeeded + 1

func writeBlob*(self: var RlpLengthTracker, data: openArray[byte]) =
if data.len == 1 and byte(data[0]) < BLOB_START_MARKER:
self.totalLength += 1
else:
self.totalLength += lengthCount(data.len) + data.len
self.maybeClosePendingLists()

func writeInt*(self: var RlpLengthTracker, i: SomeUnsignedInt) =
if i < typeof(i)(BLOB_START_MARKER):
self.totalLength += 1
else:
self.totalLength += lengthCount(i.bytesNeeded) + i.bytesNeeded
self.maybeClosePendingLists()

func initLengthTracker*(self: var RlpLengthTracker) =
# we preset the lengths since we want to skip using add method for
# these lists
when self is DynamicRlpLengthTracker:
self.pendingLists = newSeqOfCap[(int, int, int)](5)
self.lengths = newSeq[(int, int)](LIST_LENGTH)

template finish*(self: RlpLengthTracker): int =
self.totalLength

func clear*(w: var RlpLengthTracker) =
chirag-parmar marked this conversation as resolved.
Show resolved Hide resolved
# Prepare writer for reuse
w.lengths.setLen(0)
when w is DynamicRlpLengthTracker:
w.pendingLists.setLen(0)
Loading
Loading