Add primes module

Prime numbers are an essential building block of many algorithms in diverse areas such as cryptography, digital communications and many others. This module adds the following functions: - primes: Procedure to generate lists of primes upto a certain value. - factor: Procedure which returns a Tensor containing the prime factors of the input. - isprime: Single element and element-wise procedures that return true when their inputs are prime numbers.
SciNim · Jun 16, 2024 · 469c878 · 469c878
1 parent 972d84a
commit 469c878
Show file tree

Hide file tree

Showing 2 changed files with 334 additions and 0 deletions.
diff --git a/scinim/primes.nim b/scinim/primes.nim
@@ -0,0 +1,201 @@
+## Module that implements several procedures related to prime numbers
+##
+## Prime numbers are an essential building block of many algorithms in diverse
+## areas such as cryptography, digital communications and many others.
+## This module adds a function to generate rank-1 tensors of primes upto a
+## certain value; as well as a function to calculate the prime factors of a
+## number.
+
+import arraymancer
+
+proc primes*[T: SomeInteger | SomeFloat](upto: T): Tensor[T] =
+  ## Generate a Tensor of prime numbers up to a certain value
+  ##
+  ## Return a Tensor of the prime numbers less than or equal to `upto`.
+  ## A prime number is one that has no factors other than 1 and itself.
+  ##
+  ## Input:
+  ##   - upto: Integer up to which primes will be generated
+  ##
+  ## Result:
+  ##   - Integer Tensor of prime values less than or equal to `upto`
+  ##
+  ## Note:
+  ##   - This function implements a "half" Sieve of Erathostenes algorithm
+  ##     which is a classical Sieve of Erathostenes in which only odd numbers
+  ##     are checked. Many examples of this algorithm can be found online.
+  ##     It also stops checking after sqrt(upto)
+  ##   - The memory required by this procedure is proportional to the input
+  ##     number.
+  when T is SomeFloat:
+    if upto != round(upto):
+      raise newException(ValueError,
+        "`upto` value (" & $upto & ") must be a whole number")
+
+  if upto < 11:
+    # Handle the primes below 11 to simplify the general code below
+    # (by removing the need to handle the few cases in which the index to
+    # `isprime`, calculated based on `factor` is negative)
+    # This is the minimum set of primes that we must handle, but we could
+    # extend this list to make the calculation faster for more of the
+    # smallest primes
+    let prime_candidates = [2.T, 3, 5, 7].toTensor()
+    return prime_candidates[prime_candidates <=. upto]
+
+  # General algorithm (valid for numbers higher than 10)
+  let prime_candidates = arange(3.T, T(upto + 1), 2.T)
+  var isprime = ones[bool]((upto.int - 1) div 2)
+  let max_possible_factor_idx = int(sqrt(upto.float)) div 2
+  for factor in prime_candidates[_ ..< max_possible_factor_idx]:
+    if isprime[(factor.int - 2) div 2]:
+      isprime[(factor.int * 3 - 2) div 2 .. _ | factor.int] = false
+
+  # Note that 2 is missing from the result, so it must be manually added to
+  # the front of the result tensor
+  return [2.T].toTensor().append(prime_candidates[isprime])
+
+# The maximum float64 that can be represented as an integer that is followed by a
+# another integer that is representable as a float64 as well
+const maximumConsecutiveFloat64Int = pow(2.0, 53) - 1.0
+
+proc factor*[T: SomeInteger | SomeFloat](n: T): Tensor[T] =
+  ## Return a Tensor containing the prime factors of the input
+  ##
+  ## Input:
+  ##   - n: A value that will be factorized.
+  ##        If its type is floating-point it must be a whole number. Otherwise
+  ##        a ValueError will be raised.
+  ## Result:
+  ##   - A sorted Tensor containing the prime factors of the input.
+  ##
+  ## Example:
+  ## ```nim
+  ## echo factor(60)
+  ## # Tensor[system.int] of shape "[4]" on backend "Cpu"
+  ## #     2     2     3     5
+  ## ```
+  if n < 0:
+    raise newException(ValueError,
+      "Negative values (" & $n & ") cannot be factorized")
+  when T is int64:
+    if n > T(maximumConsecutiveFloat64Int):
+      raise newException(ValueError,
+        "Value (" & $n & ") is too large to be factorized")
+  elif T is SomeFloat:
+    if floor(n) != n:
+      raise newException(ValueError,
+        "Non whole numbers (" & $n & ") cannot be factorized")
+
+  if n < 4:
+    return [n].toTensor
+
+  # The algorithm works by keeping track of the list of unique potential,
+  # candidate prime factors of the input, and iteratively adding those
+  # that are confirmed to be factors into a list of confirmed factors
+  # (which is stored in the `result` tensor variable).
+
+  # First we must initialize the `candidate_factor` Tensor
+  # The factors of the input can be found among the list of primes
+  # that are smaller than or equal to input. However we can significantly
+  # reduce the candidate list by taking into account the fact that only a
+  # single factor can be greater than the square root of the input.
+  # The algorithm is such that if that is the case we will add the input number
+  # at the very end of the loop below
+  var candidate_factors = primes(T(ceil(sqrt(float(n)))))
+
+  # This list of prime candidate_factors is refined by finding those of them
+  # that divide the input value (i.e. those whose `input mod prime` == 0).
+  # Those candiates that don't divide the input are known to not be valid
+  # factors and can be removed from the candidate_factors list. Those that do
+  # divide the input are confirmed as valid factors and as such are added to
+  # the result list. Then the input is divided by all of the remaining
+  # candidates (by dividing the input by the product of all the remaining
+  # candidates). The result is a number that is the product of all the factors
+  # that are still unknown (which must be among the remaining candidates!) and
+  # which we can call `unknown_factor_product`.
+  # Then we can simply repeat the same process over and over, replacing the
+  # original input with the remaining `unknown_factor_product` after each
+  # iteration, until the `unknown_factors_product` (which is reduced by each
+  # division at the end of each iteration) reaches 1. Alternatively, we might
+  # run out of candidates, which will only happen when there is only one factor
+  # left (which must be greater than the square root of the input) and is stored
+  # in the `unknown_factors_product`. In that case we add it to the confirmed
+  # factors (result) list and the process can stop.
+  var unknown_factor_product = n
+  while unknown_factor_product > 1:
+    # Find the primes that are divisors of the remaining unknown_factors_product
+    # Note that this tells us which of the remaining candidate_factors are
+    # factors of the input _at least once_ (but they could divide it more
+    # than once)
+    let is_factor = (unknown_factor_product mod candidate_factors) ==. 0
+    # Keep only the factors that divide the remainder and remove the rest
+    # from the list of candidates
+    candidate_factors = candidate_factors[is_factor]
+    # after this step, all the items incandidate_factors are _known_ to be
+    # factors of `unknown_factor_product` _at least once_!
+    if candidate_factors.len == 0:
+      # If there are no more prime candidates left, it means that the remainder
+      # is a prime (and that it must be greater than the sqrt of the input),
+      # and that we are done (after adding it to the result list)
+      result = result.append([unknown_factor_product].toTensor)
+      break
+    # If we didn't stop it means that there are still candidates which we
+    # _know_ are factors of the remainder, so we must add them to the result
+    result = result.append(candidate_factors)
+    # Now we can prepare for the next iteration by dividing the remainder,
+    # by the factors we just added to the result. This reminder is the product
+    # of the factors we don't know yet
+    unknown_factor_product = T(unknown_factor_product / product(candidate_factors))
+    # After this division the items in `candidate_factors` become candidates again
+    # and we can start a new iteration
+  result = sorted(result)
+
+proc isprimeImpl[T: SomeInteger | SomeFloat](n: T, candidate_factors: Tensor[int]): bool {.inline.} =
+  ## Actual implementation of the isprime check
+  # This function is optimized for speed in 2 ways:
+  # 1. By first rejecting all non-whole float numbers and then performing the
+  #    actual isprime check using integers (which is faster than using floats)
+  # 2. By receving a pre-calculated tensor of candidate_factors. This does not
+  #    speed up the check of a single value, but it does speed up the check of
+  #    a tensor of values. Note that because of #1 the candidate_factors must
+  #    be a tensor of ints (even if the number being checked is a float)
+  when T is SomeFloat:
+    if floor(n) != n:
+      return false
+    let n = int(n)
+  result = (n > 1) and all(n mod candidate_factors[candidate_factors <. n])
+
+proc isprime*[T: SomeInteger | SomeFloat](n: T): bool =
+  ## Check whether the input is a prime number
+  ##
+  ## Only positive values higher than 1 can be primes (i.e. we exclude 1 and -1
+  ## which are sometimes considered primes).
+  ##
+  ## Note that this function also works with floats, which are considered
+  ## non-prime when they are not whole numbers.
+  # Note that we do here some checks that are repeated later inside of
+  # `isprimeImpl`. This is done to avoid the unnecessary calculation of
+  # the `candidate_factors` tensor in those cases
+  if n <= 1:
+    return false
+  when T is int64:
+    if n > T(maximumConsecutiveFloat64Int):
+      raise newException(ValueError,
+        "Value (" & $n & ") is too large to be factorized")
+  elif T is SomeFloat:
+    if floor(n) != n:
+      return false
+  var candidate_factors = primes(int(ceil(sqrt(float(n)))))
+  isprimeImpl(n, candidate_factors)
+
+proc isprime*[T: SomeInteger | SomeFloat](t: Tensor[T]): Tensor[bool] =
+  ## Element-wise check if the input values are prime numbers
+  result = zeros[bool](t.len)
+  # Pre-calculate the list of primes that will be used for the element-wise
+  # isprime check and then call isprimeImpl on each element
+  # Note that the candidate_factors must be a tensor of ints (for performance
+  # reasons)
+  var candidate_factors = primes(int(ceil(sqrt(float(max(t.flatten()))))))
+  for idx, val in t.enumerate():
+    result[idx] = isprimeImpl(val, candidate_factors)
+  return result.reshape(t.shape)
diff --git a/tests/test_primes.nim b/tests/test_primes.nim
@@ -0,0 +1,133 @@
+import ../scinim/primes
+import arraymancer
+import std / [unittest, random]
+
+proc test_primes() =
+  ## Test the `primes` function
+  test "Prime number generation (integer values)":
+    check: primes(0).len == 0
+    check: primes(1).len == 0
+    check: primes(2) == [2].toTensor
+    check: primes(3) == [2, 3].toTensor
+    check: primes(4) == [2, 3].toTensor
+    check: primes(11) == [2, 3, 5, 7, 11].toTensor
+    check: primes(12) == [2, 3, 5, 7, 11].toTensor
+    check: primes(19) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(20) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(22) == [2, 3, 5, 7, 11, 13, 17, 19].toTensor
+    check: primes(100000).len == 9592
+    check: primes(100003).len == 9593
+    check: primes(100000)[^1].item == 99991
+
+  test "Prime number generation (floating-point values)":
+    check: primes(100000.0).len == 9592
+    check: primes(100000.0)[^1].item == 99991.0
+
+    # An exception must be raised if the `upto` value is not a whole number
+    try:
+      discard primes(100.5)
+      check: false
+    except ValueError:
+      # This is what should happen!
+      discard
+
+proc generate_random_factor_tensor[T](
+    max_value: T, max_factors: int, prime_list: Tensor[T]): Tensor[T] =
+  ## Generate a tensor of random prime factors taken from a tensor of primes
+  ## The tensor length will not exceed the `max_factors` and the product of
+  ## the factors will not exceed `max_value` either.
+  ## This is not just a random list of values taken from the `prime_list`
+  ## Instead we artificially introduce a random level of repetition of the
+  ## chosen factors to emulate the fact that many numbers have repeated
+  ## prime factors
+  let max_value = rand(4 .. 2 ^ 53)
+  let max_factors = rand(1 .. 20)
+  result = newTensor[int](max_factors)
+  var value = 1
+  var factor = prime_list[rand(prime_list.len - 1)]
+  for idx in 0 ..< max_factors:
+    # Randomly repeat the previous factor
+    # Higher number of repetitions are less likely
+    let repeat_factor = rand(5) < 1
+    if not repeat_factor:
+      factor = prime_list[rand(prime_list.len - 1)]
+    let new_value = factor * value
+    if new_value >= max_value:
+      break
+    result[idx] = factor
+    value = new_value
+  result = sorted(result)
+  result = result[result >. 0]
+
+proc test_factor() =
+  test "Prime factorization of integer values (factor)":
+    check: factor(60) == [2, 2, 3, 5].toTensor
+    check: factor(100001) == [11, 9091].toTensor
+
+    # Check that the product of the factorization of a few random values
+    # equals the original numbers
+    for n in 0 ..< 10:
+      let value = rand(10000)
+      check: value == product(factor(value))
+
+    # Repeat the previous test in a more sophisticated manner
+    # Instead of generating random values and checking that the
+    # product of their factorization is the same as the original values
+    # (which could work for many incorrect implementations of factor),
+    # generate a few random factor tensors, multiply them to get
+    # the number that has them as prime factors, factorize those numbers
+    # and check that their factorizations matches the original tensors
+    let prime_list = primes(100)
+    for n in 0 ..< 10:
+      let max_value = rand(4 .. 2 ^ 53)
+      let max_factors = rand(1 .. 20)
+      var factors = generate_random_factor_tensor(
+        max_value, max_factors, prime_list)
+      let value = product(factors)
+      check: factor(value) == factors
+
+  test "Prime factorization of floating-point values (factor)":
+    # Floating-point
+    check: factor(60.0) == [2.0, 2, 3, 5].toTensor
+    check: factor(100001.0) == [11.0, 9091].toTensor
+
+    # Check that the product of the factorization of a few random values
+    # equals the original numbers
+    # Note that here we do not also do the reverse check (as we do for ints)
+    # in order to make the test faster
+    for n in 0 ..< 10:
+      let value = floor(rand(10000.0))
+      check: value == product(factor(value))
+
+    # An exception must be raised if we try to factorize a non-whole number
+    try:
+      discard factor(100.5)
+      check: false
+    except ValueError:
+      # This is what should happen!
+      discard
+
+proc test_isprime() =
+  test "isprime":
+    check: isprime(7) == true
+    check: isprime(7.0) == true
+    check: isprime(7.5) == false
+    check: isprime(1) == false
+    check: isprime(0) == false
+    check: isprime(-1) == false
+    let t = [
+      [-1, 0, 2,  4],
+      [ 5, 6, 7, 11]
+    ].toTensor
+    let expected = [
+      [false, false, true, false],
+      [ true, false, true,  true]
+    ].toTensor
+    check: isprime(t) == expected
+    check: isprime(t.asType(float)) == expected
+
+# Run the tests
+suite "Primes":
+  test_primes()
+  test_factor()
+  test_isprime()