From a051fc896932cc7b31853c52b72f04ebbcffa8f4 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Sun, 19 May 2024 12:55:45 +0200 Subject: [PATCH 1/2] implement a preliminary version of pairwise reduce --- src/value.jl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/value.jl b/src/value.jl index e9776a1..f7439ab 100644 --- a/src/value.jl +++ b/src/value.jl @@ -37,3 +37,36 @@ overload for the purpose of having [`substitute_values`](@ref) to run on both [`Constraint`](@ref)s and [`Value`](@ref)s. """ substitute_values(x::Value, y::AbstractVector, _ = eltype(y)) = substitute(x, y) + +""" +$(TYPEDSIGNATURES) + +An alternative of `Base.reduce` which does a "pairwise" reduction in the shape +of a binary merge tree, like in mergesort. In general this is a little more +complex, but if the reduced value "grows" with more elements added (such as +when adding a lot of [`LinearValue`](@ref)s together), this is able to prevent +a complexity explosion by postponing "large" reducing operations as much as +possible. + +In the specific case with adding lots of [`LinearValue`](@ref)s and +[`QuadraticValue`](@ref)s together, this effectively squashes the reduction +complexity from something around `O(n^2)` to `O(n)` (with a little larger +constant factor. +""" +function preduce(op, xs; init) + # TODO improve type stability here (it's veeeery far from optimal). + # TODO find a way to smuggle this into mapreduce + up(::Nothing, _, i) = i + up(next::Tuple, l, i) = + let + (next1, i1) = down(next, l) + up(next1, l + 1, op(i, i1)) + end + down(::Nothing, _) = (nothing, init) + down(next::Tuple, l) = + l == 0 ? (iterate(xs, last(next)), first(next)) : + let (next1, x1) = down(next, l - 1), (next2, x2) = down(next1, l - 1) + (next2, op(x1, x2)) + end + up(iterate(xs), 0, init) +end From 7edb31fa13e058e57886be4fd41c0ca496e7d913 Mon Sep 17 00:00:00 2001 From: Mirek Kratochvil Date: Wed, 5 Jun 2024 20:33:34 +0200 Subject: [PATCH 2/2] use a stacky version of parallel reduce --- src/value.jl | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/src/value.jl b/src/value.jl index f7439ab..825196a 100644 --- a/src/value.jl +++ b/src/value.jl @@ -53,20 +53,33 @@ In the specific case with adding lots of [`LinearValue`](@ref)s and complexity from something around `O(n^2)` to `O(n)` (with a little larger constant factor. """ -function preduce(op, xs; init) - # TODO improve type stability here (it's veeeery far from optimal). - # TODO find a way to smuggle this into mapreduce - up(::Nothing, _, i) = i - up(next::Tuple, l, i) = - let - (next1, i1) = down(next, l) - up(next1, l + 1, op(i, i1)) +function preduce(op, xs; init = zero(eltype(xs))) + n = length(xs) + n == 0 && return init + + # This works by simulating integer increment and carry to organize the + # additions in a (mildly begin-biased) tree. `used` stores the integer, + # `val` the associated values. + stksize = sizeof(typeof(n)) * 8 - leading_zeros(n) + used = fill(false, stksize) + val = fill(init, stksize) + + for item in xs + idx = 1 + while used[idx] + item = op(item, val[idx]) # collect the bit and carry + used[idx] = false + idx += 1 end - down(::Nothing, _) = (nothing, init) - down(next::Tuple, l) = - l == 0 ? (iterate(xs, last(next)), first(next)) : - let (next1, x1) = down(next, l - 1), (next2, x2) = down(next1, l - 1) - (next2, op(x1, x2)) + val[idx] = item # hit a zero, no more carrying + used[idx] = true + end + # collect all used bits + item = init + for idx = 1:stksize + if used[idx] + item = op(item, val[idx]) end - up(iterate(xs), 0, init) + end + return item end