diff --git a/immut/array/README.md b/immut/array/README.md index 5382925d0..9a59ecb38 100644 --- a/immut/array/README.md +++ b/immut/array/README.md @@ -35,6 +35,17 @@ println(arr1) // of([1, 2, 3, 4, 5]) println(arr2) // of([1, 2, 10, 4, 5, 6]) ``` +## Concatenation + +You can use `concat()` to concatenate two arrays. + +```moonbit +let arr1 = @immut/array.of([1, 2, 3]) +let arr2 = @immut/array.of([4, 5, 6]) +let arr3 = arr1.concat(arr2) +println(arr3) // of([1, 2, 3, 4, 5, 6]) +``` + ## Query You can use `op_get()` to get the value at the index, or `length()` to get the length of the array, or `is_empty()` to check whether the array is empty. @@ -57,4 +68,9 @@ println(arr.each(fn(v) { println("element \{v}") })) println(arr.eachi(fn(i, v) { println("index: \{i}, element: \{v}") })) ``` +# TODO +- [] Add `split` and other operations that can be derived from `split` and `concat` like `insert` and `delete`. +- [] Add an algorithm description in README, since this algorithm does not use the invariant in the ICFP paper. Instead, it uses the "search step invariant" in Hypirion's thesis. +- [] Add a benchmark to compare the performance with the previous version. +- [] Optimizations such as tail. diff --git a/immut/array/array.mbt b/immut/array/array.mbt index 37eee8e86..68a25b227 100644 --- a/immut/array/array.mbt +++ b/immut/array/array.mbt @@ -12,6 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +//----------------------------------------------------------------------------- +// Constructors (construct T or convert other types to T) +//----------------------------------------------------------------------------- + ///| /// Return a new empty array pub fn new[A]() -> T[A] { @@ -19,34 +23,59 @@ pub fn new[A]() -> T[A] { } ///| -pub impl[A : Show] Show for T[A] with output(self, logger) { - logger.write_iter(self.iter(), prefix="@immut/array.of([", suffix="])") +/// Create a persistent array with a given length and value. +pub fn make[A](len : Int, value : A) -> T[A] { + new_by_leaves(len, fn(_s, l) { FixedArray::make(l, value) }) } ///| -pub fn is_empty[A](self : T[A]) -> Bool { - self.size == 0 +/// Create a persistent array with a given length and a function to generate values. +pub fn makei[A](len : Int, f : (Int) -> A) -> T[A] { + new_by_leaves(len, fn(s, l) { FixedArray::makei(l, fn(i) { f(s + i) }) }) } ///| -pub fn to_array[A](self : T[A]) -> Array[A] { - let arr = [] - self.each(fn(v) { arr.push(v) }) - arr +/// Convert a FixedArray to an @immut/array. +pub fn of[A](arr : FixedArray[A]) -> T[A] { + makei(arr.length(), fn(i) { arr[i] }) } ///| -pub fn iter[A](self : T[A]) -> Iter[A] { - Iter::new(fn(yield_) { - let arr = self.to_array() - for i = 0; i < self.size; i = i + 1 { - if yield_(arr[i]) == IterEnd { - break IterEnd - } - } else { - IterContinue +/// Physically copy the array. +/// Since it is an immutable data structure, +/// it is rarely the case that you would need this function. +/// +/// @alert deprecated "We don't copy immutable array" +/// @coverage.skip +pub fn copy[A](self : T[A]) -> T[A] { + fn copy(t : Tree[A]) -> Tree[A] { + match t { + Leaf(l) => Leaf(l.copy()) + Empty => Empty + Node(node, sizes) => + Node( + FixedArray::makei(node.length(), fn(i) { copy(node[i]) }), + match sizes { + Some(sizes) => Some(FixedArray::copy(sizes)) + None => None + }, + ) } - }) + } + + { tree: copy(self.tree), size: self.size, shift: self.shift } +} + +///| +/// Create a persistent array from an array. +/// +/// # Example +/// ``` +/// let v = @array.of([1, 2, 3]) +/// assert_eq!(v, @array.from_array([1, 2, 3])) +/// ``` +pub fn from_array[A](arr : Array[A]) -> T[A] { + makei(arr.length(), fn(i) { arr[i] }) } ///| @@ -54,25 +83,35 @@ pub fn from_iter[A](iter : Iter[A]) -> T[A] { iter.fold(init=new(), fn(arr, e) { arr.push(e) }) } +//----------------------------------------------------------------------------- +// Convertor (convert T to other types) +//----------------------------------------------------------------------------- + ///| -pub fn length[A](self : T[A]) -> Int { - self.size +pub fn to_array[A](self : T[A]) -> Array[A] { + let arr = [] + self.each(fn(v) { arr.push(v) }) + arr } +//----------------------------------------------------------------------------- +// Properties +//----------------------------------------------------------------------------- + ///| -pub fn copy[A](self : T[A]) -> T[A] { - fn copy(t : Tree[A]) -> Tree[A] { - match t { - Leaf(l) => Leaf(l.copy()) - Empty => Empty - Node(node) => - Node(FixedArray::makei(node.length(), fn(i) { copy(node[i]) })) - } - } +pub fn is_empty[A](self : T[A]) -> Bool { + self.size == 0 +} - { tree: copy(self.tree), size: self.size, shift: self.shift } +///| +pub fn length[A](self : T[A]) -> Int { + self.size } +//----------------------------------------------------------------------------- +// Lookup +//----------------------------------------------------------------------------- + ///| /// Get a value at the given index. /// @@ -91,6 +130,10 @@ pub fn op_get[A](self : T[A], index : Int) -> A { } } +//----------------------------------------------------------------------------- +// Modifier +//----------------------------------------------------------------------------- + ///| /// Set a value at the given index (immutable). /// @@ -100,19 +143,8 @@ pub fn op_get[A](self : T[A], index : Int) -> A { /// assert_eq!(v.set(1, 10), @array.of([1, 10, 3, 4, 5])) /// ``` pub fn set[A](self : T[A], index : Int, value : A) -> T[A] { - fn set(i : Int, e, s, t : Tree[A]) -> Tree[A] { - match t { - Leaf(l) => Leaf(immutable_set(l, i & bitmask, e)) - Node(node) => { - let idx = shr_as_uint(i, s) & bitmask - Node(immutable_set(node, idx, set(i, e, s - num_bits, node[idx]))) - } - Empty => abort("Index out of bounds") - } - } - { - tree: set(index, value, self.shift, self.tree), + tree: self.tree.set(index, self.shift, value), size: self.size, shift: self.shift, } @@ -127,31 +159,52 @@ pub fn set[A](self : T[A], index : Int, value : A) -> T[A] { /// assert_eq!(v.push(4), @array.of([1, 2, 3, 4])) /// ``` pub fn push[A](self : T[A], value : A) -> T[A] { - if self.size == (branching_factor << self.shift) { - { - tree: Node([self.tree, new_branch([value], self.shift)]), - size: self.size + 1, - shift: self.shift + num_bits, - } - } else { - { - tree: self.tree.add(self.size, self.shift, value), - size: self.size + 1, - shift: self.shift, - } + let (tree, shift) = self.tree.push_end(self.shift, value) + { tree, size: self.size + 1, shift } +} + +///| +/// Given two trees, concatenate them into a new tree. +pub fn concat[A](self : T[A], other : T[A]) -> T[A] { + if self.is_empty() { + return other } + if other.is_empty() { + return self + } + let (tree, shift) = Tree::concat( + self.tree, + self.shift, + other.tree, + other.shift, + true, + ) + { tree, size: self.size + other.size, shift } } ///| -/// Create a persistent array from an array. -/// -/// # Example -/// ``` -/// let v = @array.of([1, 2, 3]) -/// assert_eq!(v, @array.from_array([1, 2, 3])) -/// ``` -pub fn from_array[A](arr : Array[A]) -> T[A] { - makei(arr.length(), fn(i) { arr[i] }) +/// Concat two arrays. +pub fn op_add[A](self : T[A], other : T[A]) -> T[A] { + self.concat(other) +} + +//----------------------------------------------------------------------------- +// Iterators +//----------------------------------------------------------------------------- + +///| +/// Return an iterator over the array. +pub fn iter[A](self : T[A]) -> Iter[A] { + Iter::new(fn(yield_) { + let arr = self.to_array() // TODO: it first converts to an array, which is not efficient + for i = 0; i < self.size; i = i + 1 { + if yield_(arr[i]) == IterEnd { + break IterEnd + } + } else { + IterContinue + } + }) } ///| @@ -165,15 +218,7 @@ pub fn from_array[A](arr : Array[A]) -> T[A] { /// assert_eq!(arr, [1, 2, 3, 4, 5]) /// ``` pub fn each[A](self : T[A], f : (A) -> Unit) -> Unit { - fn go(t : Tree[A]) -> Unit { - match t { - Empty => () - Leaf(l) => l.each(f) - Node(n) => n.each(fn(t) { go(t) }) - } - } - - go(self.tree) + self.tree.each(f) } ///| @@ -187,30 +232,7 @@ pub fn each[A](self : T[A], f : (A) -> Unit) -> Unit { /// assert_eq!(arr, [0, 2, 6, 12, 20]) /// ``` pub fn eachi[A](self : T[A], f : (Int, A) -> Unit) -> Unit { - fn go(t : Tree[A], shift : Int, start : Int) -> Unit { - match t { - Empty => () - Leaf(l) => - for i = 0; i < l.length(); i = i + 1 { - f(start + i, l[i]) - } - Node(n) => { - let child_shift = shift - num_bits - let mut start = start - for i = 0; i < n.length(); i = i + 1 { - go(n[i], child_shift, start) - start += 1 << shift - } - } - } - } - - go(self.tree, self.shift, 0) -} - -///| -pub impl[A : Eq] Eq for T[A] with op_equal(self, other) { - self.size == other.size && self.tree == other.tree + self.tree.eachi(f, self.shift, 0) } ///| @@ -222,15 +244,7 @@ pub impl[A : Eq] Eq for T[A] with op_equal(self, other) { /// assert_eq!(v.fold(fn(a, b) { a + b }, init=0), 15) /// ``` pub fn fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { - fn go(t : Tree[A], acc : B) -> B { - match t { - Empty => acc - Leaf(l) => l.fold(f, init=acc) - Node(n) => n.fold(fn(t, acc) { go(acc, t) }, init=acc) - } - } - - go(self.tree, init) + self.tree.fold(init, f) } ///| @@ -242,15 +256,7 @@ pub fn fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { /// assert_eq!(v.rev_fold(fn(a, b) { a + b }, init=0), 15) /// ``` pub fn rev_fold[A, B](self : T[A], init~ : B, f : (B, A) -> B) -> B { - fn go(t : Tree[A], acc : B) -> B { - match t { - Empty => acc - Leaf(l) => l.rev_fold(f, init=acc) - Node(n) => n.rev_fold(fn(t, acc) { go(acc, t) }, init=acc) - } - } - - go(self.tree, init) + self.tree.rev_fold(init, f) } ///| @@ -290,17 +296,42 @@ pub fn fold_right[A](self : T[A], f : (A, A) -> A, init~ : A) -> A { /// assert_eq!(v.map(fn(e) { e * 2 }), @array.of([2, 4, 6, 8, 10])) /// ``` pub fn map[A, B](self : T[A], f : (A) -> B) -> T[B] { - fn go(t : Tree[A]) -> Tree[B] { - match t { - Empty => Empty - Leaf(l) => Leaf(l.map(f)) - Node(n) => Node(FixedArray::makei(n.length(), fn(i) { go(n[i]) })) - } + { tree: self.tree.map(f), size: self.size, shift: self.shift } +} + +//----------------------------------------------------------------------------- +// Common Traits Implementation +//----------------------------------------------------------------------------- + +///| +pub impl[X : @quickcheck.Arbitrary] @quickcheck.Arbitrary for T[X] with arbitrary( + size, + rs +) { + @quickcheck.Arbitrary::arbitrary(size, rs) |> from_array +} + +///| +pub impl[A : Hash] Hash for T[A] with hash_combine(self, hasher) { + for e in self { + hasher.combine(e) } +} - { tree: go(self.tree), size: self.size, shift: self.shift } +///| +pub impl[A : Eq] Eq for T[A] with op_equal(self, other) { + self.size == other.size && self.tree == other.tree } +///| +pub impl[A : Show] Show for T[A] with output(self, logger) { + logger.write_iter(self.iter(), prefix="@immut/array.of([", suffix="])") +} + +//----------------------------------------------------------------------------- +// For Internal Use +//----------------------------------------------------------------------------- + ///| fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { fn tree(cap, len, s) -> Tree[A] { @@ -319,7 +350,9 @@ fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { tree(cap / branching_factor, len, i) } - Node(FixedArray::makei(child_count, child)) + // Use None here because the implementation of `new_by_leaves` ensures that the tree is full + // and we can use radix indexing. + Node(FixedArray::makei(child_count, child), None) } } @@ -328,7 +361,7 @@ fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { } else { let (cap, shift) = loop len, branching_factor, 1 { len, m_pow, depth => - match len < m_pow { + match len <= m_pow { false => continue len, m_pow * branching_factor, depth + 1 true => (m_pow, (depth - 1) * num_bits) } @@ -337,88 +370,8 @@ fn new_by_leaves[A](len : Int, gen_leaf : (Int, Int) -> FixedArray[A]) -> T[A] { } } -test "new_by_leaves" { - let e : T[Int] = new_by_leaves(0, fn(_s, _l) { abort("never reach") }) - let v = new_by_leaves(5, fn(_s, l) { FixedArray::make(l, 1) }) - let v2 = new_by_leaves(33, fn(_s, l) { FixedArray::make(l, 10) }) - inspect!(e, content="@immut/array.of([])") - inspect!(v, content="@immut/array.of([1, 1, 1, 1, 1])") - inspect!( - v2, - content="@immut/array.of([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])", - ) - let v3 = new_by_leaves(32, fn(_s, l) { FixedArray::make(l, 10) }) - inspect!( - v3, - content="@immut/array.of([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])", - ) -} - -///| -/// Create a persistent array with a given length and value. -pub fn make[A](len : Int, value : A) -> T[A] { - new_by_leaves(len, fn(_s, l) { FixedArray::make(l, value) }) -} - -///| -/// Create a persistent array with a given length and a function to generate values. -pub fn makei[A](len : Int, f : (Int) -> A) -> T[A] { - new_by_leaves(len, fn(s, l) { FixedArray::makei(l, fn(i) { f(s + i) }) }) -} - ///| -pub fn of[A](arr : FixedArray[A]) -> T[A] { - makei(arr.length(), fn(i) { arr[i] }) -} - -///| -pub impl[X : @quickcheck.Arbitrary] @quickcheck.Arbitrary for T[X] with arbitrary( - size, - rs -) { - @quickcheck.Arbitrary::arbitrary(size, rs) |> from_array -} - -///| -pub impl[A : Hash] Hash for T[A] with hash_combine(self, hasher) { - for e in self { - hasher.combine(e) - } -} - -test "mix" { - let mut v = new() - inspect!(v.tree.is_empty_tree(), content="true") - for i = 0; i < 100; i = i + 1 { - v = v.push(i) - } - inspect!( - v, - content="@immut/array.of([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])", - ) - let mut v2 = v.copy() - for i = 0; i < 100; i = i + 1 { - v2 = v2.set(i, i * 2) - } - let mut ct = 0 - v.each(fn(e) { ct = ct + e }) - inspect!(ct, content="4950") - v2.each(fn(e) { ct = ct + e }) - inspect!(ct, content="14850") - v2 = v2.map(fn(e) { e * 2 }) - let ct1 = fold(v2, fn(a, b) { a + b }, init=0) - let ct2 = rev_fold(v2, fn(a, b) { a + b }, init=0) - inspect!(ct1, content="19800") - inspect!(ct2, content="19800") - inspect!(v.tree.is_empty_tree(), content="false") - let large_const = branching_factor * branching_factor + 1 - let mut v = new() - for i = 0; i < large_const; i = i + 1 { - v = v.push(i) - } - let vec = [] - v.eachi(fn(i, _e) { vec.push(i) }) - for i = 0; i < large_const; i = i + 1 { - assert_eq!(vec[i], i) - } +/// Print the tree structure of the internal tree. For debug use. +fn to_debug_string[A : Show](self : T[A]) -> String { + self.tree.to_string() } diff --git a/immut/array/array.mbti b/immut/array/array.mbti index b92b5e539..098bbdf8a 100644 --- a/immut/array/array.mbti +++ b/immut/array/array.mbti @@ -18,7 +18,8 @@ fn of[A](FixedArray[A]) -> T[A] // Types and methods type T impl T { - copy[A](Self[A]) -> Self[A] + concat[A](Self[A], Self[A]) -> Self[A] + copy[A](Self[A]) -> Self[A] //deprecated each[A](Self[A], (A) -> Unit) -> Unit eachi[A](Self[A], (Int, A) -> Unit) -> Unit fold[A, B](Self[A], init~ : B, (B, A) -> B) -> B @@ -34,6 +35,7 @@ impl T { map[A, B](Self[A], (A) -> B) -> Self[B] new[A]() -> Self[A] //deprecated of[A](FixedArray[A]) -> Self[A] //deprecated + op_add[A](Self[A], Self[A]) -> Self[A] op_get[A](Self[A], Int) -> A push[A](Self[A], A) -> Self[A] rev_fold[A, B](Self[A], init~ : B, (B, A) -> B) -> B diff --git a/immut/array/array_mix_wbtest.mbt b/immut/array/array_mix_wbtest.mbt new file mode 100644 index 000000000..b45eb003a --- /dev/null +++ b/immut/array/array_mix_wbtest.mbt @@ -0,0 +1,42 @@ +// Copyright 2025 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// The naming of test follows the format +/// random seed - number of operations - maximum level of the tree +/// If the given random seed is shorter than 32, it will be repeated +/// up to 32 chars. + +test "DEADBEEF-10-4" { + run_test!("DEADBEEF", 10, 4) +} + +test "LIVEBEEF-50-3" { + run_test!("LIVEBEEF", 50, 3) +} + +test "HAPPYDOG-1000-2" { + run_test!("HAPPYDOG", 200, 2) +} + +test "HELLOWOLRD-5-3" { + run_test!("HELLOWOLRD", 4, 1) +} + +///| +fn run_test(seed : String, rep : Int, max_lvl : Int) -> Unit! { + let seed = repeat_up_to_32(seed) + let rng = @random.new(seed~) + let rs = random_test_gen(rng, rep, max_lvl) + execute_array_test!(rs) +} diff --git a/immut/array/array_test.mbt b/immut/array/array_test.mbt index 77ede2c3b..c4d29cfa6 100644 --- a/immut/array/array_test.mbt +++ b/immut/array/array_test.mbt @@ -61,19 +61,6 @@ test "length" { inspect!(ve.length(), content="0") } -test "copy" { - let v = @array.of([1, 2, 3, 4, 5]) - let vc = v.copy() - inspect!(vc, content="@immut/array.of([1, 2, 3, 4, 5])") - inspect!(v == vc, content="true") - assert_false!(physical_equal(v, vc)) - let v = @array.new() - let vc : @array.T[Int] = v.copy() - inspect!(vc, content="@immut/array.of([])") - inspect!(v == vc, content="true") - assert_false!(physical_equal(v, vc)) -} - test "op_get" { let v = @array.of([1, 2, 3, 4, 5]) inspect!(v[0], content="1") @@ -222,3 +209,14 @@ test "hash" { inspect!(l1.hash() == l4.hash(), content="false") inspect!(l4.hash() == l4.hash(), content="true") } + +test "concat_empty" { + let a = @array.of([1, 2, 3]) + let b : @array.T[Int] = @array.new() + let c = a.concat(b) + inspect!(c, content="@immut/array.of([1, 2, 3])") + let a : @array.T[Int] = @array.new() + let b = @array.of([1, 2, 3]) + let c = a.concat(b) + inspect!(c, content="@immut/array.of([1, 2, 3])") +} diff --git a/immut/array/array_wbtest.mbt b/immut/array/array_wbtest.mbt new file mode 100644 index 000000000..d3560a3e8 --- /dev/null +++ b/immut/array/array_wbtest.mbt @@ -0,0 +1,145 @@ +// Copyright 2025 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +test "new_by_leaves" { + let full : T[Int] = new_by_leaves(branching_factor_power(2), fn(_s, l) { + FixedArray::make(l, 1) + }) + let full_arr = FixedArray::make(branching_factor_power(2), 1) + check_fixedarray_eq!(full_arr, full) + let e : T[Int] = new_by_leaves(0, fn(_s, _l) { abort("never reach") }) + let v = new_by_leaves(5, fn(_s, l) { FixedArray::make(l, 1) }) + let v2 = new_by_leaves(33, fn(_s, l) { FixedArray::make(l, 10) }) + inspect!(e, content="@immut/array.of([])") + inspect!(v, content="@immut/array.of([1, 1, 1, 1, 1])") + inspect!( + v2, + content="@immut/array.of([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])", + ) + let v3 = new_by_leaves(32, fn(_s, l) { FixedArray::make(l, 10) }) + inspect!( + v3, + content="@immut/array.of([10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10])", + ) +} + +test "mix" { + let mut v = new() + inspect!(v.tree.is_empty_tree(), content="true") + for i = 0; i < 100; i = i + 1 { + v = v.push(i) + } + inspect!( + v, + content="@immut/array.of([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])", + ) + let mut v2 = v + for i = 0; i < 100; i = i + 1 { + v2 = v2.set(i, i * 2) + } + let mut ct = 0 + v.each(fn(e) { ct = ct + e }) + inspect!(ct, content="4950") + v2.each(fn(e) { ct = ct + e }) + inspect!(ct, content="14850") + v2 = v2.map(fn(e) { e * 2 }) + let ct1 = v2.fold(fn(a, b) { a + b }, init=0) + let ct2 = v2.rev_fold(fn(a, b) { a + b }, init=0) + inspect!(ct1, content="19800") + inspect!(ct2, content="19800") + inspect!(v.tree.is_empty_tree(), content="false") + let large_const = branching_factor * branching_factor + 1 + let mut v = new() + for i = 0; i < large_const; i = i + 1 { + v = v.push(i) + } + let vec = [] + v.eachi(fn(i, _e) { vec.push(i) }) + for i = 0; i < large_const; i = i + 1 { + assert_eq!(vec[i], i) + } +} + +test "op_get" { + let bf = branching_factor_power(4) + let v_content = random_array(bf) + let v = from_iter(v_content.iter()) + inspect!(v.length(), content=bf.to_string()) + inspect!(v[0], content=v_content[0].to_string()) + inspect!(v[bf - 1], content=v_content[bf - 1].to_string()) + inspect!(v[bf / 2], content=v_content[bf / 2].to_string()) + let bf = branching_factor + let v_content = random_array(bf) + let v = from_iter(v_content.iter()) + inspect!(v.length(), content=bf.to_string()) + inspect!(v[0], content=v_content[0].to_string()) + inspect!(v[bf - 1], content=v_content[bf - 1].to_string()) + inspect!(v[bf / 2], content=v_content[bf / 2].to_string()) +} + +test "concat-two-full-tree" { + let bf = branching_factor_power(4) + execute_array_test!(gen_concat_seq_from_len_array([bf, bf])) +} + +test "concat-full-tree-and-a-leaf" { + let bf = branching_factor_power(4) + execute_array_test!(gen_concat_seq_from_len_array([bf, 1])) +} + +test "concat-a-leaf-and-full-tree" { + let bf = branching_factor_power(4) + execute_array_test!(gen_concat_seq_from_len_array([1, bf])) +} + +test "concat-two-leaf" { + execute_array_test!(gen_concat_seq_from_len_array([1, 1])) +} + +test "concat-multiple-full-tree" { + let bf = branching_factor_power(4) + execute_array_test!(gen_concat_seq(3, fn(_i) { bf })) +} + +test "concat-multiple-random-tree" { + let bf = branching_factor_power(2) + let rng = @random.new(seed=b"DEADBEEFDEADBEEFDEADBEEFDEADBEEF") + execute_array_test!(gen_concat_seq(2, fn(_i) { rng.int(limit=bf) })) +} + +///| +/// Generate a sequence of concatenation operations as a test case, +/// +/// Inputs: +/// - `n`: the number of operations to be generated +/// - `len_gen`: a function that generates the length of the array to be concatenated +fn gen_concat_seq(n : Int, len_gen : (Int) -> Int) -> Array[Op] { + let ret = [] + for i = 0; i < n; i = i + 1 { + ret.push(Op::Concat(random_array(len_gen(i)))) + } + ret +} + +///| +/// Generate a sequence of concatenation operations as a test case, +/// similar to `gen_concat_seq`, but this time, the length of the array +/// to be concatenated is given as an array. +fn gen_concat_seq_from_len_array(len : Array[Int]) -> Array[Op] { + let ret = [] + for i = 0; i < len.length(); i = i + 1 { + ret.push(Op::Concat(random_array(len[i]))) + } + ret +} diff --git a/immut/array/moon.pkg.json b/immut/array/moon.pkg.json index d8a1f022b..38a2553a6 100644 --- a/immut/array/moon.pkg.json +++ b/immut/array/moon.pkg.json @@ -7,8 +7,18 @@ "alias": "_core/array" } ], + "wbtest-import": [ + "moonbitlang/core/random", + "moonbitlang/core/bytes" + ], "targets": { - "panic_test.mbt": ["not", "native"], - "panic_wbtest.mbt": ["not", "native"] + "panic_test.mbt": [ + "not", + "native" + ], + "panic_wbtest.mbt": [ + "not", + "native" + ] } } diff --git a/immut/array/operation.mbt b/immut/array/operation.mbt deleted file mode 100644 index 8dfd8ced9..000000000 --- a/immut/array/operation.mbt +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2025 International Digital Economy Academy -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -///| -fn immutable_set[T](arr : FixedArray[T], i : Int, v : T) -> FixedArray[T] { - let arr = arr.copy() - arr[i] = v - arr -} - -///| -fn immutable_push[T](arr : FixedArray[T], val : T) -> FixedArray[T] { - let len = arr.length() - let new_arr = FixedArray::make(len + 1, val) - arr.blit_to(new_arr, len~) - new_arr[len] = val - new_arr -} - -///| -fn shr_as_uint(x : Int, y : Int) -> Int { - (x.reinterpret_as_uint() >> y).reinterpret_as_int() -} diff --git a/immut/array/panic_wbtest.mbt b/immut/array/panic_wbtest.mbt index f9309a194..366968855 100644 --- a/immut/array/panic_wbtest.mbt +++ b/immut/array/panic_wbtest.mbt @@ -14,15 +14,15 @@ test "panic get_first on empty tree should panic" { let tree : Tree[Int] = Tree::Empty - get_first(tree) |> ignore + tree.get_first() |> ignore } test "panic get_last on empty tree should panic" { let tree : Tree[Int] = Tree::Empty - get_last(tree) |> ignore + tree.get_last() |> ignore } test "panic get on empty tree should panic" { let tree : Tree[Int] = Tree::Empty - get(tree, 0, 5) |> ignore + tree.get(0, 5) |> ignore } diff --git a/immut/array/tree.mbt b/immut/array/tree.mbt index 0ecbf3f87..e74b8da29 100644 --- a/immut/array/tree.mbt +++ b/immut/array/tree.mbt @@ -12,76 +12,732 @@ // See the License for the specific language governing permissions and // limitations under the License. +/// A tree data structure that backs the `immut/array`. + +//----------------------------------------------------------------------------- +// Hyperparameters +//----------------------------------------------------------------------------- + ///| +/// The controlling factor of the tree depth. +/// This is the only parameter you normally would adjust. let num_bits = 5 ///| +/// Invariant: `branching_factor` is a power of 2. let branching_factor : Int = 1 << num_bits ///| let bitmask : Int = branching_factor - 1 +///| +/// The threshold for switching to a linear search. +const LINEAR_THRESHOLD : Int = 4 + +///| +/// The $e_{max}$ parameter of the search step invariant. +const E_MAX : Int = 2 + +///| +/// $e_{max} / 2$. +let e_max_2 : Int = E_MAX / 2 + +//----------------------------------------------------------------------------- +// Constructors +//----------------------------------------------------------------------------- + ///| fn Tree::empty[T]() -> Tree[T] { Tree::Empty } ///| -fn get_first[T](self : Tree[T]) -> T { +/// Create a new tree with a single leaf. Note that the resulting tree is a left-skewed tree. +fn new_branch_left[T](leaf : FixedArray[T], shift : Int) -> Tree[T] { + match shift { + 0 => Leaf(leaf) + s => Node([new_branch_left(leaf, s - num_bits)], None) // size is None because we can use radix indexing + } +} + +//----------------------------------------------------------------------------- +// Properties +//----------------------------------------------------------------------------- + +///| +fn Tree::is_empty_tree[T](self : Tree[T]) -> Bool { + match self { + Tree::Empty => true + _ => false + } +} + +//----------------------------------------------------------------------------- +// Getters +//----------------------------------------------------------------------------- + +///| +/// Get the rightmost child of a tree. +fn Tree::get_first[T](self : Tree[T]) -> T { match self { Leaf(leaf) => leaf[0] - Node(node) => get_first(node[0]) + Node(node, _) => node[0].get_first() Empty => abort("Index out of bounds") } } ///| -fn get_last[T](self : Tree[T]) -> T { +/// Get the leftmost child of a tree. +fn Tree::get_last[T](self : Tree[T]) -> T { match self { Leaf(leaf) => leaf[leaf.length() - 1] - Node(node) => get_last(node[node.length() - 1]) + Node(node, _) => node[node.length() - 1].get_last() Empty => abort("Index out of bounds") } } ///| -fn get[T](self : Tree[T], index : Int, shift : Int) -> T { +/// Get the element at the given index. +/// +/// Precondition: +/// - `self` is of height `shift / num_bits`. +fn Tree::get[T](self : Tree[T], index : Int, shift : Int) -> T { + fn get_radix(node : Tree[T], shift : Int) -> T { + match node { + Leaf(leaf) => leaf[index & bitmask] + Node(node, None) => + get_radix(node[radix_indexing(index, shift)], shift - num_bits) + Node(_, Some(_)) => + abort("Unreachable: Node should not have sizes in get_radix") + Empty => abort("Index out of bounds") + } + } + match self { - Leaf(leaf) => leaf[index & bitmask] - Node(node) => - get(node[shr_as_uint(index, shift) & bitmask], index, shift - num_bits) + Leaf(leaf) => leaf[index] + Node(children, Some(sizes)) => { + let branch_index = get_branch_index(sizes, index) + let sub_index = if branch_index == 0 { + index + } else { + index - sizes[branch_index - 1] + } + children[branch_index].get(sub_index, shift - num_bits) + } + Node(_, None) => get_radix(self, shift) Empty => abort("Index out of bounds") } } +//----------------------------------------------------------------------------- +// Mutators +//----------------------------------------------------------------------------- + ///| -fn is_empty_tree[T](self : Tree[T]) -> Bool { +/// Set a value at the given index. +/// +/// Precondition: +/// - `self` is of height `shift / num_bits`. +fn Tree::set[T](self : Tree[T], index : Int, shift : Int, value : T) -> Tree[T] { + // TODO: optimize this as loop + fn set_radix(node : Tree[T], shift : Int) -> Tree[T] { + match node { + Leaf(leaf) => Leaf(immutable_set(leaf, index & bitmask, value)) + Node(node, None) => { + let sub_idx = radix_indexing(index, shift) + Node( + immutable_set( + node, + sub_idx, + set_radix(node[radix_indexing(index, shift)], shift - num_bits), + ), + None, + ) + } + Node(_, Some(_)) => + abort("Unreachable: Node should not have sizes in set_radix") + Empty => abort("Index out of bounds") + } + } + match self { - Tree::Empty => true - _ => false + Leaf(leaf) => Leaf(immutable_set(leaf, index & bitmask, value)) + Node(children, Some(sizes)) => { + let branch_index = get_branch_index(sizes, index) + let sub_index = if branch_index == 0 { + index + } else { + index - sizes[branch_index - 1] + } + Node( + immutable_set( + children, + branch_index, + children[branch_index].set(sub_index, shift - num_bits, value), + ), + Some(sizes), + ) + } + Node(_children, None) => set_radix(self, shift) + Empty => abort("Index out of bounds") } } ///| -fn new_branch[T](leaf : FixedArray[T], shift : Int) -> Tree[T] { - match shift { - 0 => Leaf(leaf) - s => Node([new_branch(leaf, s - num_bits)]) +/// Push a value to the end of the tree. +/// +/// Precondition: +/// - The height of `self` = `shift` / `num_bits` (the height starts from 0). +/// - `length` is the number of elements in the tree. +fn Tree::push_end[T](self : Tree[T], shift : Int, value : T) -> (Tree[T], Int) { + fn update_sizes_last(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => { + let new_sizes = sizes.copy() + new_sizes[new_sizes.length() - 1] += 1 + Some(new_sizes) + } + None => None + } + } + + fn push_sizes_last(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => Some(immutable_push(sizes, 1 + sizes[sizes.length() - 1])) + None => None + } + } + + fn worker(node : Tree[T], shift : Int) -> Tree[T]? { + match node { + Leaf(leaf) => { + if shift != 0 { + abort( + "Unreachable: Leaf should not have a non-zero shift, which means we have not reached the bottom of the tree", + ) + } + if leaf.length() < branching_factor { + Some(Leaf(immutable_push(leaf, value))) + } else { + None + } + } + Node(nodes, sizes) => { + let len = nodes.length() + match worker(nodes[len - 1], shift - num_bits) { + // We have successfully pushed the value, now duplicate its ancestor nodes. + Some(new_node) => { + let new_nodes = nodes.copy() + new_nodes[len - 1] = new_node + let sizes = update_sizes_last(sizes) + Some(Node(new_nodes, sizes)) + } + // We need to create a new node to push the value. + None => + if len < branching_factor { + let sizes = push_sizes_last(sizes) + Some( + Node( + immutable_push( + nodes, + new_branch_left([value], shift - num_bits), + ), + sizes, + ), + ) + } else { + None + } + } + } + Empty => Some(Leaf([value])) + } + } + + match worker(self, shift) { + Some(new_tree) => (new_tree, shift) + None => { + let new_branch = new_branch_left([value], shift) + ( + match self { + Leaf(_leaf) => Node([self, new_branch], None) + Node(_nodes, Some(sizes)) => { + let len = sizes[sizes.length() - 1] + let sizes = FixedArray::from_array([len, 1 + len]) + Node([self, new_branch], Some(sizes)) + } + Node(_nodes, None) => Node([self, new_branch], None) + Empty => + abort( + "Unreachable: Empty tree should have fallen into the Some(new_tree) branch", + ) + }, + shift + num_bits, + ) + } } } +//----------------------------------------------------------------------------- +// Iteration +//----------------------------------------------------------------------------- + ///| -fn add[T](self : Tree[T], index : Int, shift : Int, value : T) -> Tree[T] { +/// For each element in the tree, apply the function `f`. +fn Tree::each[A](self : Tree[A], f : (A) -> Unit) -> Unit { match self { - Leaf(l) => Leaf(immutable_push(l, value)) - Node(n) => { - let idx = shr_as_uint(index, shift) & bitmask - if idx < n.length() { - Node(immutable_set(n, idx, n[idx].add(index, shift - num_bits, value))) + Empty => () + Leaf(l) => l.each(f) + Node(ns, _) => ns.each(fn(t) { t.each(f) }) + } +} + +///| +/// For each element in the tree, apply the function `f` with the index of the element. +fn Tree::eachi[A]( + self : Tree[A], + f : (Int, A) -> Unit, + shift : Int, + start : Int +) -> Unit { + match self { + Empty => () + Leaf(l) => + for i = 0; i < l.length(); i = i + 1 { + f(start + i, l[i]) + } + Node(ns, None) => { + let child_shift = shift - num_bits + let mut start = start + for i = 0; i < ns.length(); i = i + 1 { + ns[i].eachi(f, child_shift, start) + start += 1 << shift + } + } + Node(ns, Some(sizes)) => { + let child_shift = shift - num_bits + let mut start = start + for i = 0; i < ns.length(); i = i + 1 { + ns[i].eachi(f, child_shift, start) + start += sizes[i] + } + } + } +} + +///| +/// Fold the tree. +fn Tree::fold[A, B](self : Tree[A], acc : B, f : (B, A) -> B) -> B { + match self { + Empty => acc + Leaf(l) => l.fold(f, init=acc) + Node(n, _) => n.fold(fn(acc, t) { t.fold(acc, f) }, init=acc) + } +} + +///| +/// Fold the tree in reverse order. +fn Tree::rev_fold[A, B](self : Tree[A], acc : B, f : (B, A) -> B) -> B { + match self { + Empty => acc + Leaf(l) => l.rev_fold(f, init=acc) + Node(n, _) => n.rev_fold(fn(acc, t) { t.rev_fold(acc, f) }, init=acc) + } +} + +///| +/// Map the tree. +fn Tree::map[A, B](self : Tree[A], f : (A) -> B) -> Tree[B] { + match self { + Empty => Empty + Leaf(l) => Leaf(l.map(f)) + Node(n, szs) => + Node( + FixedArray::makei(n.length(), fn(i) { n[i].map(f) }), + copy_sizes(szs), + ) + } +} + +//----------------------------------------------------------------------------- +// Concatenation +//----------------------------------------------------------------------------- + +///| +/// Concatenate two trees. +/// Should be called as with `top = true`. +/// +/// Preconditions: +/// - `left` and `right` are not `Empty`. +/// - `left` and `right` are of height `left_shift / num_bits` and `right_shift / num_bits`, respectively. +fn Tree::concat[A]( + left : Tree[A], + left_shift : Int, + right : Tree[A], + right_shift : Int, + top : Bool +) -> (Tree[A], Int) { + if left_shift > right_shift { + let (c, c_shift) = Tree::concat( + left.right_child(), + left_shift - num_bits, + right, + right_shift, + false, + ) + guard c_shift == left_shift + return rebalance(left, c, Empty, left_shift, top) + } else if right_shift > left_shift { + let (c, c_shift) = Tree::concat( + left, + left_shift, + right.left_child(), + right_shift - num_bits, + false, + ) + guard c_shift == right_shift + return rebalance(Empty, c, right, right_shift, top) + } else if left_shift == 0 { + // Handle Leaf case + let left_elems = left.leaf_elements() + let right_elems = right.leaf_elements() + let left_len = left_elems.length() + let right_len = right_elems.length() + let len = left_len + right_len + if top && len <= branching_factor { + return ( + Leaf( + FixedArray::makei(len, fn(i : Int) { + if i < left_len { + left_elems[i] + } else { + right_elems[i - left_len] + } + }), + ), + 0, + ) + } else { + return ( + Node( + FixedArray::from_array([left, right]), + Some(FixedArray::from_array([left_len, len])), + ), + num_bits, + ) + } + } else { + // Handle Node case + let (c, c_shift) = Tree::concat( + left.right_child(), + left_shift - num_bits, + right.left_child(), + right_shift - num_bits, + false, + ) + guard c_shift == left_shift + guard c_shift == right_shift + return rebalance(left, c, right, left_shift, top) + } +} + +///| +/// Given three `Node`s of the same height (`shift` / `num_bits`), rebalance them into two. +/// `top` is `true` if the resulting node has no upper node. +/// Returns the new node and its shift. +fn rebalance[A]( + left : Tree[A], + center : Tree[A], + right : Tree[A], + shift : Int, + top : Bool +) -> (Tree[A], Int) { + // Suppose H = shift / num_bits + let t = tri_merge(left, center, right) // t is a list of trees of (H-1) height + let (nc, nc_len) = redis_plan(t) + let new_t = redis(t, nc, nc_len, shift - num_bits) // new_t is a list of trees of (H-1) height + guard new_t.length() == nc_len + if nc_len <= branching_factor { + // All nodes can be accommodated in a single node + let node = Node(new_t, compute_sizes(new_t, shift - num_bits)) // node of H height + if not(top) { + return (Node(FixedArray::from_array([node]), None), shift + num_bits) + // return (H+1) height node, add another layer to align with the case at the end of the thisfunction + } else { + return (node, shift) + // return H height node, no upper node so no need to add another layer on top of it + } + } else { + let new_child_1 = FixedArray::makei(branching_factor, fn(i) { new_t[i] }) + let new_child_2 = FixedArray::makei(new_t.length() - branching_factor, fn( + i + ) { + new_t[i + branching_factor] + }) + let new_node_1 = Node( + new_child_1, + compute_sizes(new_child_1, shift - num_bits), + ) // height H + let new_node_2 = Node( + new_child_2, + compute_sizes(new_child_2, shift - num_bits), + ) // height H + let new_children = FixedArray::from_array([new_node_1, new_node_2]) + return ( + Node(new_children, compute_sizes(new_children, shift)), + shift + num_bits, + ) // return (H+1) height node + } +} + +///| +/// Given three trees of the same height (if not `Empty`), merge them into one. +/// `left` and `right` might be `Node` or `Empty`. +/// `center` is always a `Node`. +/// The resulting array might be longer than `branching_factor`, +/// which will be handled by `rebalance` later. +/// +/// Preconditions: +/// - `left` and `right` are `Empty` or `Node`. +/// - `center` is `Node`. +/// +/// Postconditions: +/// - The resulting array is of length `left.size() + center.size() + right.size()`. +/// - The height of a `Tree` in the resulting array is one less than the height of the input `Tree`s. +fn tri_merge[A]( + left : Tree[A], + center : Tree[A], + right : Tree[A] +) -> FixedArray[Tree[A]] { + if left.is_leaf() || not(center.is_node()) || right.is_leaf() { + abort("Unreachable: input to merge is invalid") + } + fn get_children(self : Tree[A]) -> FixedArray[Tree[A]] { + match self { + Node(children, _) => children + Empty => [] + Leaf(_) => abort("Unreachable") + } + } + + let left_children = get_children(left) + let center_children = get_children(center) + let right_children = get_children(right) + let left_len = left_children.length() + let left_len = if left_len == 0 { 0 } else { left_len - 1 } + let center_len = center_children.length() + let right_len = right_children.length() + let right_len = if right_len == 0 { 0 } else { right_len - 1 } + FixedArray::makei(left_len + center_len + right_len, fn(i) { + if i < left_len { + left_children[i] + } else if i < left_len + center_len { + center_children[i - left_len] + } else if right_len > 0 { + right_children[1 + i - left_len - center_len] + } else { + abort("Unreachable") + } + }) +} + +///| +/// Create a redistribution plan for the tree. +fn redis_plan[A](t : FixedArray[Tree[A]]) -> (FixedArray[Int], Int) { + let node_counts = FixedArray::makei(t.length(), fn { i => t[i].local_size() }) + let total_nodes = node_counts.fold(init=0, fn { acc, x => acc + x }) + // round up to the nearest integer of S/branching_factor + let opt_len = (total_nodes + branching_factor - 1) / branching_factor + let mut new_len = t.length() + let mut i = 0 + while opt_len + e_max_2 < new_len { + // Skip over all nodes satisfying the invariant. + while node_counts[i] > branching_factor - e_max_2 { + i += 1 + } + + // Found short node, so redistribute over the next nodes + let mut remaining_nodes = node_counts[i] + while remaining_nodes > 0 { + let min_size = min(remaining_nodes + node_counts[i + 1], branching_factor) + node_counts[i] = min_size + remaining_nodes = remaining_nodes + node_counts[i + 1] - min_size + i += 1 + } + for j = i; j < new_len - 1; j = j + 1 { + node_counts[j] = node_counts[j + 1] + } + new_len -= 1 + i -= 1 + } + return (node_counts, new_len) +} + +///| +/// This function redistributes the nodes in `old_t` according to the plan in `node_counts`. +/// +/// Preconditions: +/// - forall i in 0..node_nums, old_t[i] != Empty. +/// - `old_t` contains a list of trees, each of (`shift` / `num_bits`) height. +/// - `node_counts` contains the number of children of each node in `new_t` (the redistributed version of `old_t`). +/// - `node_nums` is the length of `node_counts`. +/// +/// Postcondition: +/// - The resulting trees in `new_t` are of the same height as trees in `old_t`. +fn redis[A]( + old_t : FixedArray[Tree[A]], + node_counts : FixedArray[Int], + node_nums : Int, + shift : Int +) -> FixedArray[Tree[A]] { + let old_len = old_t.length() + let new_t = FixedArray::make(node_nums, Empty) + let mut old_offset = 0 + let mut j = 0 // the index of in the old tree + if shift == 0 { + // Handle Leaf case + + let mut old_leaf_elems = FixedArray::default() + let mut old_leaf_len = 0 + for i = 0; i < node_nums; i = i + 1 { + + // old_t[j] is the next to be redistributed + // old_offset is the index of the next node to be redistributed in old_t[j] + // old_offset == 0 means all nodes in old_t[j] are to be redistributed + // new_t[i] is the current node to be filled with redistributed nodes + old_leaf_elems = old_t[j].leaf_elements() + old_leaf_len = old_leaf_elems.length() + if old_offset == 0 && old_leaf_len == node_counts[i] { + // Perfect, we just point to the old leaf + new_t[i] = old_t[j] + j += 1 + } else { + let mut new_offset = 0 // the accumulated number of elements in the new leaf + let new_leaf_len = node_counts[i] + let new_leaf_elems = FixedArray::make(new_leaf_len, old_leaf_elems[0]) + while new_offset < new_leaf_len { + old_leaf_elems = old_t[j].leaf_elements() + old_leaf_len = old_leaf_elems.length() + guard j < old_len // This shouldn't be triggered if the plan was correctly generated + let remaining = min( + new_leaf_len - new_offset, + old_leaf_len - old_offset, + ) + FixedArray::unsafe_blit( + new_leaf_elems, new_offset, old_leaf_elems, old_offset, remaining, + ) + new_offset += remaining + old_offset += remaining + if old_offset == old_leaf_len { + j += 1 + old_offset = 0 + } + } + new_t[i] = Leaf(new_leaf_elems) + } + } + } else { + // Handle Node case, pretty much the same as the Leaf case + + let mut old_node_chldrn = FixedArray::default() + let mut old_node_len = 0 + for i = 0; i < node_nums; i = i + 1 { + old_node_chldrn = old_t[j].node_children() + old_node_len = old_node_chldrn.length() + if old_offset == 0 && old_node_len == node_counts[i] { + new_t[i] = old_t[j] + j += 1 } else { - Node(immutable_push(n, new_branch([value], shift - num_bits))) + let mut new_offset = 0 + let new_node_len = node_counts[i] + let new_node_chldrn = FixedArray::make(new_node_len, old_node_chldrn[0]) + while new_offset < new_node_len { + old_node_chldrn = old_t[j].node_children() + old_node_len = old_node_chldrn.length() + guard j < old_len + let remaining = min( + new_node_len - new_offset, + old_node_len - old_offset, + ) + FixedArray::unsafe_blit( + new_node_chldrn, new_offset, old_node_chldrn, old_offset, remaining, + ) + new_offset += remaining + old_offset += remaining + if old_offset == old_node_len { + j += 1 + old_offset = 0 + } + } + new_t[i] = Node( + new_node_chldrn, + compute_sizes(new_node_chldrn, shift - num_bits), + ) // each node in `new_t` is of height (`shift` / `num_bits`) } } - Empty => Leaf([value]) } + new_t +} + +///| +/// Given a list of trees as `children` with heights of (`shift` / `num_bits`), compute the sizes array of the subtrees. +fn compute_sizes[A]( + children : FixedArray[Tree[A]], + shift : Int +) -> FixedArray[Int]? { + let len = children.length() + let sizes = FixedArray::make(len, 0) + let mut sum = 0 + let mut flag = true + let full_subtree_size = branching_factor << shift + for i = 0; i < len; i = i + 1 { + let sz = children[i].size(shift) + flag = flag && sz == full_subtree_size + sum += sz + sizes[i] = sum + } + if flag { + None + } else { + Some(sizes) + } +} + +//----------------------------------------------------------------------------- +// Common Trait Implementations +//----------------------------------------------------------------------------- + +///| +/// Print the tree structure. For debug use only. +/// @coverage.skip +impl[A : Show] Show for Tree[A] with output(self, logger : &Logger) { + fn indent_str(s : String, indent : Int) -> String { + String::make(indent, ' ') + s + } + + fn rec(t : Tree[A], ident : Int) { + match t { + Empty => indent_str("Empty", ident) + Leaf(l) => { + let mut s = "Leaf(" + for i = 0; i < l.length(); i = i + 1 { + s += l[i].to_string() + if i != l.length() - 1 { + s += ", " + } + } + s += ")" + indent_str(s, ident) + "\n" + } + Node(children, _sizes) => { + let mut s = indent_str("Node(", ident) + "\n" + for i = 0; i < children.length(); i = i + 1 { + s += rec(children[i], ident + 2) + } + s + indent_str(")", ident) + "\n" + } + } + } + + logger.write_string(rec(self, 0)) } diff --git a/immut/array/tree_utils.mbt b/immut/array/tree_utils.mbt new file mode 100644 index 000000000..c648b2001 --- /dev/null +++ b/immut/array/tree_utils.mbt @@ -0,0 +1,103 @@ +// Copyright 2025 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Utility functions for working with trees. +/// Other utility functions are in `utils.mbt`. + +///| +/// If the tree is a `Node`. +fn Tree::is_node[A](self : Tree[A]) -> Bool { + match self { + Node(_, _) => true + _ => false + } +} + +///| +fn Tree::is_leaf[A](self : Tree[A]) -> Bool { + match self { + Leaf(_) => true + _ => false + } +} + +///| +fn Tree::is_empty[A](self : Tree[A]) -> Bool { + match self { + Empty => true + _ => false + } +} + +///| +/// Get the rightmost child of a tree node. Abort if +/// it is not a `Node`. +fn Tree::right_child[A](self : Tree[A]) -> Tree[A] { + match self { + Node(children, _) => children[children.length() - 1] + Leaf(_) | Empty => abort("Should not get children on non-`Node`s") + } +} + +///| +/// Get the leftmost child of a tree node. Abort if +/// it is not a `Node`. +fn Tree::left_child[A](self : Tree[A]) -> Tree[A] { + match self { + Node(children, _) => children[0] + Leaf(_) | Empty => abort("Should not get children on non-`Node`s") + } +} + +///| +/// Get the leaf contents. Abort if it is not a `Leaf`. +fn Tree::leaf_elements[A](self : Tree[A]) -> FixedArray[A] { + match self { + Leaf(children) => children + _ => abort("Should not call `get_leaf_elements` on non-leaf nodes") + } +} + +///| +/// Get the children of a `Node`. Abort if it is not a `Node`. +fn Tree::node_children[A](self : Tree[A]) -> FixedArray[Tree[A]] { + match self { + Node(children, _) => children + _ => abort("Should not call `node_children` on non-`Node`s") + } +} + +///| +/// Get the physical size of the current node, not the total number of elements in the tree. +fn Tree::local_size[A](self : Tree[A]) -> Int { + match self { + Empty => 0 + Leaf(l) => l.length() + Node(children, _) => children.length() + } +} + +///| +/// Get the total number of elements in the tree. +fn Tree::size[A](self : Tree[A], shift : Int) -> Int { + match self { + Empty => 0 + Leaf(l) => l.length() + Node(_, Some(sizes)) => sizes[sizes.length() - 1] + Node(children, None) => { + let len_1 = children.length() - 1 + (len_1 << shift) + children[len_1].size(shift - num_bits) + } + } +} diff --git a/immut/array/types.mbt b/immut/array/types.mbt index f1c9231fe..5f869e94a 100644 --- a/immut/array/types.mbt +++ b/immut/array/types.mbt @@ -13,6 +13,10 @@ // limitations under the License. ///| +/// Invariants: +/// - `shift` = tree height * `num_bits`. When it is 0, we are at the leaf level. +/// - `size` = the number of elements in the tree. +/// - `shift` is not used when `tree` is `Empty`. struct T[A] { tree : Tree[A] size : Int @@ -20,8 +24,10 @@ struct T[A] { } ///| +/// Invariants: +/// - For `Node`, the sizes array is `None` if the tree is full, i.e., we can use radix indexing. priv enum Tree[A] { Empty - Node(FixedArray[Tree[A]]) + Node(FixedArray[Tree[A]], FixedArray[Int]?) // (Subtrees, Sizes of subtrees) Leaf(FixedArray[A]) -} derive(Eq, Show) +} derive(Eq) diff --git a/immut/array/utils.mbt b/immut/array/utils.mbt new file mode 100644 index 000000000..4d79a26f7 --- /dev/null +++ b/immut/array/utils.mbt @@ -0,0 +1,84 @@ +// Copyright 2025 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Utils for `FixedArray`s in the immutable array implementation. +/// Typically utility functions that are not related with trees. + +///| +/// Set the value at the given index. This operation is O(n). +fn immutable_set[T](arr : FixedArray[T], i : Int, v : T) -> FixedArray[T] { + let arr = arr.copy() + arr[i] = v + arr +} + +///| +/// Add an element to the end of the array. This operation is O(n). +fn immutable_push[T](arr : FixedArray[T], val : T) -> FixedArray[T] { + let len = arr.length() + let new_arr = FixedArray::make(len + 1, val) + arr.blit_to(new_arr, len~) + new_arr[len] = val + new_arr +} + +///| +/// x >> y as unsigned integers, then reinterpret as signed integers. +fn shr_as_uint(x : Int, y : Int) -> Int { + (x.reinterpret_as_uint() >> y).reinterpret_as_int() +} + +///| +/// Given an index and a shift, return the index of the branch that contains the given index. +fn radix_indexing(index : Int, shift : Int) -> Int { + shr_as_uint(index, shift) & bitmask +} + +///| +/// Get the index of the branch that contains the given index. +/// For example, if the sizes are [0, 3, 6, 10] and the index is 5, the function should return 2. +fn get_branch_index(sizes : FixedArray[Int], index : Int) -> Int { + let mut lo = 0 + let mut hi = sizes.length() + while LINEAR_THRESHOLD < hi - lo { + let mid = (lo + hi) / 2 + if sizes[mid] <= index { + lo = mid + } else { + hi = mid + } + } + while sizes[lo] <= index { + lo += 1 + } + lo +} + +///| +/// Copy the sizes array. +fn copy_sizes(sizes : FixedArray[Int]?) -> FixedArray[Int]? { + match sizes { + Some(sizes) => Some(sizes.copy()) + None => None + } +} + +///| +fn min(a : Int, b : Int) -> Int { + if a < b { + a + } else { + b + } +} diff --git a/immut/array/utils_wbtest.mbt b/immut/array/utils_wbtest.mbt new file mode 100644 index 000000000..30ba32b66 --- /dev/null +++ b/immut/array/utils_wbtest.mbt @@ -0,0 +1,164 @@ +// Copyright 2025 International Digital Economy Academy +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +///| +enum Op { + PushEnd(Int) + Concat(Array[Int]) + Set(Int, Int) +} + +///| +/// Generate a random test sequence for the array, +/// following the format described in the `execute_array_test` function. +/// +/// Inputs: +/// - `rng`: a random number generator +/// - `times`: the number of operations to be generated +/// - `max_lvl`: the maximum level of the tree +fn random_test_gen(rng : @random.Rand, times : Int, max_lvl : Int) -> Array[Op] { + // Hyperparameters + let op_count = 3 + let max_len = branching_factor_power(max_lvl) + let max_val = 2025 + + // Start constructing the array + let ret = [] + let mut cur_len = 0 + for i = 0; i < times; i = i + 1 { + let op = rng.int(limit=op_count) + match op { + 0 => { + // push_end + ret.push(Op::PushEnd(rng.int(limit=max_val))) + cur_len += 1 + } + 1 => { + // concat + let len = rng.int(limit=max_len) + let a = Array::make(len, rng.int(limit=max_val)) + cur_len += len + ret.push(Op::Concat(a)) + } + 2 => { + // set + if cur_len == 0 { + continue + } + let idx = rng.int(limit=cur_len) + let val = rng.int(limit=max_val) + ret.push(Op::Set(idx, val)) + } + _ => abort("Invalid op") + } + } + ret +} + +///| +/// This function runs a series of operations on an array and checks +/// if the result matches the expected array. +/// +/// Currently, the operations are: +/// 0. push_end +/// 1. concat +/// 2. set +/// +/// The `rs` array is a sequence of operations to be executed. +fn execute_array_test(rs : Array[Op]) -> Unit! { + let mut t = new() + let a : Array[Int] = [] + for op in rs { + match op { + PushEnd(v) => { + // push_end + a.push(v) + t = t.push(v) + check_array_eq!(a, t) + } + Concat(v) => { + // concat + let len = v.length() + for i = 0; i < len; i = i + 1 { + a.push(v[i]) + } + t = t.concat(from_iter(v.iter())) + check_array_eq!(a, t) + } + Set(idx, v) => { + // set + a[idx] = v + t = t.set(idx, v) + check_array_eq!(a, t) + } + } + } +} + +///| +/// Compute the power of the branching factor. +fn branching_factor_power(a : Int) -> Int { + let mut ret = 1 + for i = 0; i < a; i = i + 1 { + ret *= branching_factor + } + ret +} + +///| +/// Repeat the given string up to 32 characters. +/// Used to generate a random seed for the test. +fn repeat_up_to_32(s : String) -> Bytes { + let a_len = 32 + let s_len = s.length() + let a = FixedArray::make(a_len, b'0') + let mut j = 0 + for i = 0; i < a_len; i = i + 1 { + let l = a.set_utf8_char(i, s[j]) + guard l == 1 + j = (j + 1) % s_len + } + Bytes::from_fixedarray(a) +} + +///| +/// Use this function to check if the array and the @immut/array are equal. +/// If we `inspect` the array, it will raise an error if the arrays are too long. +/// I guess that's because it exceeds the heap limit of the VM. +fn check_array_eq(a : Array[Int], t : T[Int]) -> Unit! { + assert_eq!(a.length(), t.size) + let len = t.size + for i = 0; i < len; i = i + 1 { + assert_eq!(t[i], a[i]) + } +} + +///| +/// Use this function to check if the FixedArray and the @immut/array are equal. +/// If we `inspect` the array, it will raise an error if the arrays are too long. +/// I guess that's because it exceeds the heap limit of the VM. +fn check_fixedarray_eq(a : FixedArray[Int], t : T[Int]) -> Unit! { + assert_eq!(a.length(), t.size) + let len = t.size + for i = 0; i < len; i = i + 1 { + assert_eq!(t[i], a[i]) + } +} + +///| +/// Generate a random array of length `n`. +fn random_array(n : Int) -> Array[Int] { + let rng = @random.new(seed=b"DEADBEEFLIVEBEEFDEADBEEFDEADBEEF") + Array::makei(n, fn(i) { rng.int(limit=i) }) +}