Merge pull request #2 from guidorice/bug-fixes

Bug fixes, add more tests
guidorice · Dec 1, 2023 · 6efcfe5 · 6efcfe5
2 parents ec0af46 + c35a8e4
commit 6efcfe5
Show file tree

Hide file tree

Showing 5 changed files with 117 additions and 41 deletions.
diff --git a/mojo_impl/naive.mojo b/mojo_impl/naive.mojo
@@ -1,5 +1,5 @@
 import benchmark
-from math.limit import inf, neginf
+from math.limit import inf, neginf, max_finite, min_finite
 from random import rand
 from sys import argv
 from tensor import Tensor, TensorSpec
@@ -13,16 +13,28 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d
     @parameter
     constrained[dims > 0 and dims % 2 == 0, "power-of-two dims only"]()
 
-    let NegInf = neginf[dtype]()
-    let Inf = inf[dtype]()
     let num_features = tensor.shape()[1]
     var result = SIMD[dtype, 2 * dims]()
 
-    for d in range(dims):
-        result[d] = Inf
-
-    for d in range(dims, 2 * dims):
-        result[d] = NegInf
+    @parameter
+    if dtype.is_floating_point():
+        let min_start = inf[dtype]()
+        let max_start = neginf[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
+    else:  # integral types
+        let min_start = max_finite[dtype]()
+        let max_start = min_finite[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
 
     for y in range(dims):
         for x in range(num_features):

diff --git a/mojo_impl/optimized_a.mojo b/mojo_impl/optimized_a.mojo
@@ -1,6 +1,6 @@
 import benchmark
 from algorithm import vectorize
-from math.limit import inf, neginf
+from math.limit import inf, neginf, max_finite, min_finite
 from random import rand
 from sys import argv
 from sys.info import simdbitwidth
@@ -9,26 +9,36 @@ from utils.index import Index
 
 alias nelts = simdbitwidth()
 
-fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, dims * 2]:
+fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * dims]:
     """
     Calculate envelope: vectorized, unrolled, single-threaded.
     """
 
     @parameter
     constrained[dims > 0 and dims % 2 == 0, "power-of-two dims only"]()
 
-    let NegInf = neginf[dtype]()
-    let Inf = inf[dtype]()
     let num_features = tensor.shape()[1]
-    var result = SIMD[dtype, dims * 2]()
+    var result = SIMD[dtype, 2 * dims]()
 
-    @unroll
-    for d in range(dims):
-        result[d] = Inf
-
-    @unroll
-    for d in range(dims, 2 * dims):
-        result[d] = NegInf
+    @parameter
+    if dtype.is_floating_point():
+        let min_start = inf[dtype]()
+        let max_start = neginf[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
+    else:  # integral types
+        let min_start = max_finite[dtype]()
+        let max_start = min_finite[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
 
     @unroll
     for dim in range(dims):

diff --git a/mojo_impl/optimized_b.mojo b/mojo_impl/optimized_b.mojo
@@ -1,7 +1,7 @@
 import benchmark
 from algorithm import vectorize
 from algorithm.functional import parallelize
-from math.limit import inf, neginf
+from math.limit import inf, neginf, max_finite, min_finite
 from random import rand
 from sys import argv
 from sys.info import simdbitwidth
@@ -17,19 +17,28 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d
     @parameter
     constrained[dims > 0 and dims % 2 == 0, "power-of-two dims only"]()
 
-    let NegInf = neginf[dtype]()
-    let Inf = inf[dtype]()
     let num_features = tensor.shape()[1]
+    var result = Tensor[dtype](TensorSpec(dtype, 1, 2 * dims))
 
-    var result = SIMD[dtype, 2 * dims]()
-
-    @unroll
-    for d in range(dims):
-        result[d] = Inf
-
-    @unroll
-    for d in range(dims, 2 * dims):
-        result[d] = NegInf
+    @parameter
+    if dtype.is_floating_point():
+        let min_start = inf[dtype]()
+        let max_start = neginf[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
+    else:  # integral types
+        let min_start = max_finite[dtype]()
+        let max_start = min_finite[dtype]()
+        @unroll
+        for d in range(dims):
+            result[d] = min_start
+        @unroll
+        for d in range(dims, 2 * dims):
+            result[d] = max_start
 
     @parameter
     fn min_max_task(dim: Int):
@@ -48,7 +57,7 @@ fn envelope[dtype: DType, dims: Int](tensor: Tensor[dtype]) -> SIMD[dtype, 2 * d
 
     parallelize[min_max_task](dims)
 
-    return result
+    return result.simd_load[2 * dims]()
 
 alias dtype = DType.float32
 alias dims = 2
@@ -80,3 +89,4 @@ fn main() raises:
     print("microsecs:", secs * 10 ** 6)
     print("ms:", ms)
     print("s:", secs)
+    print()
diff --git a/mojo_impl/tests/test_impls.mojo b/mojo_impl/tests/test_impls.mojo
@@ -8,17 +8,61 @@ from mojo_impl.naive import envelope as envelope_naive
 from mojo_impl.optimized_a import envelope as envelope_opt_a
 from mojo_impl.optimized_b import envelope as envelope_opt_b
 
-alias dtype = DType.float32
-alias dims = 2
-alias width = 1000
-
 
 fn main() raises:
-    test_mojo_impls()
+    test_mojo_impls_int16()
+    test_mojo_impls_float32()
+    test_mojo_impls_float64()
+
+
+fn test_mojo_impls_int16():
+    alias dtype = DType.int16
+    alias dims = 2
+    alias width = 1000
+
+    let test = MojoTest("mojo implementations are all consistent: " + dtype.__str__())
+
+    # create a tensor, filled with random values
+    let spec = TensorSpec(dtype, dims, width)
+    let tensor = rand[dtype](spec)
+
+    # check the 3 mojo implementations all return the same value
+    let result_naive = envelope_naive[dtype, dims](tensor)
+
+    let result_opt_a = envelope_opt_a[dtype, dims](tensor)
+    test.assert_true(result_naive == result_opt_a, "naive == envelope_opt_a")
+
+    let result_opt_b = envelope_opt_b[dtype, dims](tensor)
+    test.assert_true(result_naive == result_opt_b, "naive == envelope_opt_b")
+
+
+fn test_mojo_impls_float64():
+    alias dtype = DType.float64
+    alias dims = 4
+    alias width = 1000
+
+    let test = MojoTest("mojo implementations are all consistent: " + dtype.__str__())
+
+    # create a tensor, filled with random values
+    let spec = TensorSpec(dtype, dims, width)
+    let tensor = rand[dtype](spec)
+
+    # check the 3 mojo implementations all return the same value
+    let result_naive = envelope_naive[dtype, dims](tensor)
+
+    let result_opt_a = envelope_opt_a[dtype, dims](tensor)
+    test.assert_true(result_naive == result_opt_a, "naive == envelope_opt_a")
+
+    let result_opt_b = envelope_opt_b[dtype, dims](tensor)
+    test.assert_true(result_naive == result_opt_b, "naive == envelope_opt_b")
+
 
+fn test_mojo_impls_float32():
+    alias dtype = DType.float32
+    alias dims = 8
+    alias width = 1000
 
-fn test_mojo_impls():
-    let test = MojoTest("mojo implementations are all consistent")
+    let test = MojoTest("mojo implementations are all consistent: " + dtype.__str__())
 
     # create a tensor, filled with random values
     let spec = TensorSpec(dtype, dims, width)

diff --git a/py_impl/tests/test_impls.py b/py_impl/tests/test_impls.py
@@ -8,7 +8,7 @@ def test_python_impls():
     """
     Test the python implementations are consistent.
     """
-    multipoint_10_3 = np.array(np.random.rand(2, 10**3), dtype=np.float32)
+    multipoint_10_3 = np.array(np.random.rand(2, 10**3), np.float64)
 
     result_naive = envelope_naive(
         x_coords=list(multipoint_10_3[0]), y_coords=list(multipoint_10_3[1])