Merge pull request #348 from pq-code-package/cbmc_gen_matrix

CBMC: Add spec+proof for `gen_matrix_entry()`
pq-code-package · Nov 6, 2024 · b5a9034 · b5a9034
2 parents 7774b84 + 9271035
commit b5a9034
Show file tree

Hide file tree

Showing 11 changed files with 186 additions and 58 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -242,7 +242,7 @@ jobs:
     if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
     with:
       name: CBMC (MLKEM-512)
-      ec2_instance_type: c7g.4xlarge
+      ec2_instance_type: c7g.2xlarge
       ec2_ami: ubuntu-latest (custom AMI)
       ec2_ami_id: ami-08ddb0acd99dc3d33 # aarch64, ubuntu-latest, 64g
       compile_mode: native
@@ -264,7 +264,7 @@ jobs:
     if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
     with:
       name: CBMC (MLKEM-768)
-      ec2_instance_type: c7g.4xlarge
+      ec2_instance_type: c7g.2xlarge
       ec2_ami: ubuntu-latest (custom AMI)
       ec2_ami_id: ami-08ddb0acd99dc3d33 # aarch64, ubuntu-latest, 64g
       compile_mode: native
@@ -286,7 +286,7 @@ jobs:
     if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork
     with:
       name: CBMC (MLKEM-1024)
-      ec2_instance_type: c7g.4xlarge
+      ec2_instance_type: c7g.2xlarge
       ec2_ami: ubuntu-latest (custom AMI)
       ec2_ami_id: ami-08ddb0acd99dc3d33 # aarch64, ubuntu-latest, 64g
       compile_mode: native

diff --git a/cbmc/proofs/gen_matrix_entry/Makefile b/cbmc/proofs/gen_matrix_entry/Makefile
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+
+include ../Makefile_params.common
+
+HARNESS_ENTRY = harness
+HARNESS_FILE = gen_matrix_entry_harness
+
+# This should be a unique identifier for this proof, and will appear on the
+# Litani dashboard. It can be human-readable and contain spaces if you wish.
+PROOF_UID = gen_matrix_entry
+
+DEFINES +=
+INCLUDES +=
+
+REMOVE_FUNCTION_BODY +=
+UNWINDSET +=
+
+PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
+PROJECT_SOURCES += $(SRCDIR)/mlkem/indcpa.c
+
+CHECK_FUNCTION_CONTRACTS=gen_matrix_entry
+USE_FUNCTION_CONTRACTS=$(FIPS202_NAMESPACE)shake128_absorb $(FIPS202_NAMESPACE)shake128_squeezeblocks $(MLKEM_NAMESPACE)rej_uniform
+APPLY_LOOP_CONTRACTS=on
+USE_DYNAMIC_FRAMES=1
+
+# Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
+EXTERNAL_SAT_SOLVER=
+CBMCFLAGS=--bitwuzla
+
+FUNCTION_NAME = $(MLKEM_NAMESPACE)gen_matrix_entry
+
+# If this proof is found to consume huge amounts of RAM, you can set the
+# EXPENSIVE variable. With new enough versions of the proof tools, this will
+# restrict the number of EXPENSIVE CBMC jobs running at once. See the
+# documentation in Makefile.common under the "Job Pools" heading for details.
+# EXPENSIVE = true
+
+# This function is large enough to need...
+CBMC_OBJECT_BITS = 8
+
+# If you require access to a file-local ("static") function or object to conduct
+# your proof, set the following (and do not include the original source file
+# ("mlkem/poly.c") in PROJECT_SOURCES).
+# REWRITTEN_SOURCES = $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i
+# include ../Makefile.common
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_SOURCE = $(SRCDIR)/mlkem/poly.c
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_FUNCTIONS = foo bar
+# $(PROOFDIR)/<__SOURCE_FILE_BASENAME__>.i_OBJECTS = baz
+# Care is required with variables on the left-hand side: REWRITTEN_SOURCES must
+# be set before including Makefile.common, but any use of variables on the
+# left-hand side requires those variables to be defined. Hence, _SOURCE,
+# _FUNCTIONS, _OBJECTS is set after including Makefile.common.
+
+include ../Makefile.common
diff --git a/cbmc/proofs/gen_matrix_entry/cbmc-proof.txt b/cbmc/proofs/gen_matrix_entry/cbmc-proof.txt
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# This file marks this directory as containing a CBMC proof.
diff --git a/cbmc/proofs/gen_matrix_entry/gen_matrix_entry_harness.c b/cbmc/proofs/gen_matrix_entry/gen_matrix_entry_harness.c
@@ -0,0 +1,32 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: MIT-0
+
+/*
+ * Insert copyright notice
+ */
+
+/**
+ * @file rej_uniform_harness.c
+ * @brief Implements the proof harness for rej_uniform function.
+ */
+
+/*
+ * Insert project header files that
+ *   - include the declaration of the function
+ *   - include the types needed to declare function arguments
+ */
+#include <stdint.h>
+#include "poly.h"
+
+// declare here since it's static in non-CBMC builds
+void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 16]);
+
+/**
+ * @brief Starting point for formal analysis
+ *
+ */
+void harness(void) {
+  poly *out;
+  uint8_t *seed;
+  gen_matrix_entry(out, seed);
+}
diff --git a/cbmc/proofs/polyvec_tobytes/Makefile b/cbmc/proofs/polyvec_tobytes/Makefile
@@ -13,7 +13,7 @@ DEFINES +=
 INCLUDES +=
 
 REMOVE_FUNCTION_BODY +=
-UNWINDSET += $(MLKEM_NAMESPACE)polyvec_tobytes.0:4 # Largest value of MLKEM_K
+UNWINDSET +=
 
 PROOF_SOURCES += $(PROOFDIR)/$(HARNESS_FILE).c
 PROJECT_SOURCES += $(SRCDIR)/mlkem/polyvec.c
@@ -25,7 +25,7 @@ USE_DYNAMIC_FRAMES=1
 
 # Disable any setting of EXTERNAL_SAT_SOLVER, and choose SMT backend instead
 EXTERNAL_SAT_SOLVER=
-CBMCFLAGS=--bitwuzla
+CBMCFLAGS=--smt2
 
 FUNCTION_NAME = $(MLKEM_NAMESPACE)polyvec_tobytes
 

diff --git a/cbmc/proofs/rej_uniform/rej_uniform_harness.c b/cbmc/proofs/rej_uniform/rej_uniform_harness.c
@@ -23,9 +23,9 @@
  *
  */
 void harness(void) {
-  unsigned int outlen, inlen;
+  unsigned int target, offset, inlen;
   int16_t *r;
   uint8_t *buf;
 
-  rej_uniform(r, outlen, buf, inlen);
+  rej_uniform(r, target, offset, buf, inlen);
 }
diff --git a/fips202/fips202.h b/fips202/fips202.h
@@ -40,14 +40,23 @@ typedef struct {
  * with the same state.
  */
 #define shake128_absorb FIPS202_NAMESPACE(shake128_absorb)
-void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen);
+void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen)
+    // clang-format off
+REQUIRES(IS_FRESH(state, sizeof(shake128ctx)))
+REQUIRES(IS_FRESH(input, inlen)) ASSIGNS(OBJECT_WHOLE(state));
+// clang-format on
+
 /* Squeeze output out of the sponge.
  *
  * Supports being called multiple times
  */
 #define shake128_squeezeblocks FIPS202_NAMESPACE(shake128_squeezeblocks)
-void shake128_squeezeblocks(uint8_t *output, size_t nblocks,
-                            shake128ctx *state);
+void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state)
+    // clang-format off
+REQUIRES(IS_FRESH(state, sizeof(shake128ctx)))
+REQUIRES(IS_FRESH(output, nblocks *SHAKE128_RATE))
+ASSIGNS(OBJECT_WHOLE(output), OBJECT_WHOLE(state));
+// clang-format on
 
 /* Initialize incremental hashing API */
 #define shake256_inc_init FIPS202_NAMESPACE(shake256_inc_init)

diff --git a/mlkem/indcpa.c b/mlkem/indcpa.c
@@ -166,7 +166,7 @@ static void gen_matrix_entry_x4(poly *vec[4],
                            GEN_MATRIX_NBLOCKS, &statex);
   buflen = GEN_MATRIX_NBLOCKS * SHAKE128_RATE;
   for (unsigned int j = 0; j < KECCAK_WAY; j++) {
-    ctr[j] = rej_uniform(vec[j]->coeffs, MLKEM_N, bufx[j], buflen);
+    ctr[j] = rej_uniform(vec[j]->coeffs, MLKEM_N, 0, bufx[j], buflen);
   }
 
   // So long as not all matrix entries have been generated, squeeze
@@ -176,15 +176,21 @@ static void gen_matrix_entry_x4(poly *vec[4],
          ctr[3] < MLKEM_N) {
     shake128x4_squeezeblocks(bufx[0], bufx[1], bufx[2], bufx[3], 1, &statex);
     for (unsigned j = 0; j < KECCAK_WAY; j++) {
-      ctr[j] += rej_uniform(vec[j]->coeffs + ctr[j], MLKEM_N - ctr[j], bufx[j],
-                            buflen);
+      ctr[j] = rej_uniform(vec[j]->coeffs, MLKEM_N, ctr[j], bufx[j], buflen);
     }
   }
 }
 
 // Generate a single A matrix entry from a seed, using rejection
 // sampling on the output of a XOF.
-static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 16]) {
+STATIC_TESTABLE
+void gen_matrix_entry(poly *entry,
+                      uint8_t seed[MLKEM_SYMBYTES + 16])  // clang-format off
+  REQUIRES(IS_FRESH(entry, sizeof(poly)))
+  REQUIRES(IS_FRESH(seed, MLKEM_SYMBYTES + 16))
+  ASSIGNS(OBJECT_UPTO(entry, sizeof(poly)))
+  ENSURES(ARRAY_IN_BOUNDS(int, k, 0, MLKEM_N - 1, entry->coeffs, 0, (MLKEM_Q - 1)))
+{  // clang-format on
   shake128ctx state;
   uint8_t buf[GEN_MATRIX_NBLOCKS * SHAKE128_RATE];
   unsigned int ctr, buflen;
@@ -196,14 +202,19 @@ static void gen_matrix_entry(poly *entry, uint8_t seed[MLKEM_SYMBYTES + 16]) {
   // This should generate the matrix entry with high probability.
   shake128_squeezeblocks(buf, GEN_MATRIX_NBLOCKS, &state);
   buflen = GEN_MATRIX_NBLOCKS * SHAKE128_RATE;
-  ctr = rej_uniform(entry->coeffs, MLKEM_N, buf, buflen);
+  ctr = rej_uniform(entry->coeffs, MLKEM_N, 0, buf, buflen);
 
   // Squeeze + sampel one more block a time until we're done
   buflen = SHAKE128_RATE;
-  while (ctr < MLKEM_N) {
-    shake128_squeezeblocks(buf, 1, &state);
-    ctr += rej_uniform(entry->coeffs + ctr, MLKEM_N - ctr, buf, buflen);
-  }
+  while (ctr < MLKEM_N)  // clang-format off
+    ASSIGNS(ctr, state, OBJECT_UPTO(entry, sizeof(poly)), OBJECT_WHOLE(buf))
+    INVARIANT(0 <= ctr && ctr <= MLKEM_N)
+    INVARIANT(ctr > 0 ==> ARRAY_IN_BOUNDS(int, k, 0, ctr - 1, entry->coeffs,
+                                          0, (MLKEM_Q - 1)))  // clang-format on
+    {
+      shake128_squeezeblocks(buf, 1, &state);
+      ctr = rej_uniform(entry->coeffs, MLKEM_N, ctr, buf, SHAKE128_RATE);
+    }
 }
 
 /*************************************************

diff --git a/mlkem/polyvec.c b/mlkem/polyvec.c
@@ -140,13 +140,16 @@ void polyvec_decompress(polyvec *r,
 
 void polyvec_tobytes(uint8_t r[MLKEM_POLYVECBYTES], const polyvec *a) {
   unsigned int i;
-  for (i = 0; i < MLKEM_K; i++) {
-    poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
-  }
+  for (i = 0; i < MLKEM_K; i++)  // clang-format off
+    ASSIGNS(i, OBJECT_WHOLE(r))
+    INVARIANT(i <= MLKEM_K)  // clang-format on
+    {
+      poly_tobytes(r + i * MLKEM_POLYBYTES, &a->vec[i]);
+    }
 }
 
 void polyvec_frombytes(polyvec *r, const uint8_t a[MLKEM_POLYVECBYTES]) {
-  unsigned int i;
+  int i;
   for (i = 0; i < MLKEM_K; i++) {
     poly_frombytes(&r->vec[i], a + i * MLKEM_POLYBYTES);
   }

diff --git a/mlkem/rej_uniform.c b/mlkem/rej_uniform.c
@@ -11,8 +11,12 @@
  *              uniform random integers mod q
  *
  * Arguments:   - int16_t *r:          pointer to output buffer
- *              - unsigned int len:    requested number of 16-bit integers
+ *              - unsigned int target: requested number of 16-bit integers
  *                                     (uniform mod q).
+ *                                     Must be <= 4096.
+ *              - unsigned int offset: number of 16-bit integers that have
+ *                                     already been sampled.
+ *                                     Must be <= target.
  *              - const uint8_t *buf:  pointer to input buffer
  *                                     (assumed to be uniform random bytes)
  *              - unsigned int buflen: length of input buffer in bytes
@@ -21,25 +25,26 @@
  *
  * Note: Strictly speaking, only a few values of buflen near UINT_MAX need
  * excluding. The limit of 4096 is somewhat arbitary but sufficient for all
- * uses of this function.
+ * uses of this function. Similarly, the actual limit for target is UINT_MAX/2.
  *
- * Returns the number of sampled 16-bit integers, at most len.
- * If it is strictly less than len, all of the input buffers is
- * guaranteed to have been consumed. If it is equal to len, no
- * information is provided on how many bytes of the input buffer
- * have been consumed.
+ * Returns the new offset of sampled 16-bit integers, at most target,
+ * and at least the initial offset.
+ * If the new offset is strictly less than len, all of the input buffers
+ * is guaranteed to have been consumed. If it is equal to len, no information
+ * is provided on how many bytes of the input buffer have been consumed.
  **************************************************/
-static unsigned int rej_uniform_scalar(int16_t *r, unsigned int len,
-                                       const uint8_t *buf,
+static unsigned int rej_uniform_scalar(int16_t *r, unsigned int target,
+                                       unsigned int offset, const uint8_t *buf,
                                        unsigned int buflen) {
   unsigned int ctr, pos;
   uint16_t val0, val1;
 
-  ctr = pos = 0;
+  ctr = offset;
+  pos = 0;
   // pos + 3 cannot overflow due to the assumption buflen <= 4096
-  while (ctr < len && pos + 3 <= buflen)  // clang-format off
+  while (ctr < target && pos + 3 <= buflen)  // clang-format off
     ASSIGNS(ctr, val0, val1, pos, OBJECT_WHOLE(r))
-    INVARIANT(ctr <= len && pos <= buflen)
+    INVARIANT(offset <= ctr && ctr <= target && pos <= buflen)
     INVARIANT(ctr > 0 ==> ARRAY_IN_BOUNDS(int, k, 0, ctr - 1, r, 0, (MLKEM_Q - 1)))  // clang-format on
     {
       val0 = ((buf[pos + 0] >> 0) | ((uint16_t)buf[pos + 1] << 8)) & 0xFFF;
@@ -49,29 +54,29 @@ static unsigned int rej_uniform_scalar(int16_t *r, unsigned int len,
       if (val0 < MLKEM_Q) {
         r[ctr++] = val0;
       }
-      if (ctr < len && val1 < MLKEM_Q) {
+      if (ctr < target && val1 < MLKEM_Q) {
         r[ctr++] = val1;
       }
     }
   return ctr;
 }
 
 #if !defined(MLKEM_USE_NATIVE_AARCH64)
-unsigned int rej_uniform(int16_t *r, unsigned int len, const uint8_t *buf,
-                         unsigned int buflen) {
-  return rej_uniform_scalar(r, len, buf, buflen);
+unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
+                         const uint8_t *buf, unsigned int buflen) {
+  return rej_uniform_scalar(r, target, offset, buf, buflen);
 }
 #else  /* MLKEM_USE_NATIVE_AARCH64 */
 
-unsigned int rej_uniform(int16_t *r, unsigned int len, const uint8_t *buf,
-                         unsigned int buflen) {
+unsigned int rej_uniform(int16_t *r, unsigned int target, unsigned int offset,
+                         const uint8_t *buf, unsigned int buflen) {
   int ret;
 
   // Sample from large buffer with full lane as much as possible.
-  ret = rej_uniform_native(r, len, buf, buflen);
+  ret = rej_uniform_native(r + offset, target - offset, buf, buflen);
   if (ret != -1)
-    return (unsigned)ret;
+    return offset + (unsigned)ret;
 
-  return rej_uniform_scalar(r, len, buf, buflen);
+  return rej_uniform_scalar(r, target, offset, buf, buflen);
 }
 #endif /* MLKEM_USE_NATIVE_AARCH64 */
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# SPDX-License-Identifier: Apache-2.0

		# This file marks this directory as containing a CBMC proof.