Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve perf for dynamic index / fieldname (Restore the generated function perf) #35

Merged
merged 7 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Blobs"
uuid = "163b9779-6631-5f90-a265-3de947924de8"
authors = []
version = "1.1.0"
version = "1.1.1"

[deps]
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
Expand Down
55 changes: 44 additions & 11 deletions src/blob.jl
Original file line number Diff line number Diff line change
Expand Up @@ -118,27 +118,60 @@ Base.@assume_effects :foldable function _recursive_sum_field_sizes(::Type{T}, ::
end

# Recursion scales better than splatting for large numbers of fields.
Base.@assume_effects :foldable @inline function blob_offset(::Type{T}, i::Int) where {T}
_recursive_sum_field_sizes(T, Val(i - 1))
@inline function blob_offset(::Type{T}, i::Int) where {T}
# Beyond this size, the tuple-construction in blob_offsets(T) refuses to const-fold,
# in the *dynamic* `i` case, so we would end up with runtime tuple
# construction and many many allocations.
# For larger structs, doing dynamic field access, we elect to have a single
# dynamic dispatch here with friendlier performance.
if fieldcount(T) <= 32
blob_offsets(T)[i]
else
_recursive_sum_field_sizes(T, Val(i - 1))
end
end

Base.@assume_effects :foldable function blob_offsets(::Type{T}) where {T}
_recursive_field_offsets(T)
end
_recursive_field_offsets(::Type{T}) where {T} =
_recursive_field_offsets(T, Val(fieldcount(T)))
_recursive_field_offsets(::Type, ::Val{0}) = ()
_recursive_field_offsets(::Type, ::Val{1}) = (0,)
function _recursive_field_offsets(::Type{T}, ::Val{i}) where {T,i}
tup = _recursive_field_offsets(T, Val(i-1))
return (tup..., tup[end] + self_size(fieldtype(T, i-1)))
end


# Manually write a compile-time loop in the type domain, to enforce constant-folding the
# fieldidx even for large structs (with e.g. 100 fields). This might make compiling a touch
# slower, but it allows this to work for even large structs, like the manually-written
# fieldindexes even for large structs (with e.g. 100 fields). This might make compiling a
# touch slower, but it allows this to work for even large structs, like the manually-written
# `@generated` functions did before.
@inline function fieldidx(::Type{T}, ::Val{field}) where {T,field}
return _fieldidx_lookup(T, Val(field), Val(fieldcount(T)))
Base.@assume_effects :foldable function fieldindexes(::Type{T}) where {T}
return _recursive_fieldindexes(T, Val(fieldcount(T)))
end
_recursive_fieldindexes(::Type{T}, ::Val{0}) where {T} = ()
function _recursive_fieldindexes(::Type{T}, ::Val{i}) where {T,i}
next = _recursive_fieldindexes(T, Val(i-1))
names = (fieldnames(typeof(next))..., fieldname(T, i))
return NamedTuple{names}((next..., i))
end
_fieldidx_lookup(::Type{T}, ::Val{field}, ::Val{0}) where {T,field} =
error("$T has no field $field")
_fieldidx_lookup(::Type{T}, ::Val{field}, ::Val{i}) where {T,i,field} =
fieldname(T, i) === field ? i : _fieldidx_lookup(T, Val(field), Val(i-1))

# NOTE: An important optimization here is that the static operations that can be performed
# only on the type do not depend on the possibly runtime value `field`. We precompute the
# fieldname => fieldidx lookup table at compile time (as a NamedTuple), then use it at
# runtime. If the field is a known compiler constant (as in the `x.y` case), all the better.
@inline function Base.getindex(blob::Blob{T}, field::Symbol) where {T}
i = fieldidx(T, Val(field))
fieldidx_lookup = fieldindexes(T)
if !haskey(fieldidx_lookup, field)
_throw_missing_field_error(T, field)
end
i = fieldidx_lookup[field]
FT = fieldtype(T, i)
Blob{FT}(blob + blob_offset(T, i))
end
@noinline _throw_missing_field_error(T, field) = error("$T has no field $field")

@noinline function _throw_getindex_boundserror(blob::Blob, i::Int)
throw(BoundsError(blob, i))
Expand Down
1 change: 0 additions & 1 deletion test/compat-tests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ bbv = Blobs.malloc_and_init(BlobBitVector, 3)
pbv = @v bbv
pbv[2] = true
@test pbv[2] == true
@test pv[2] == Foo(2, 2.2)
pbv[1] = false
pbv[3] = false
# tests iteration
Expand Down
Loading