Merge branch 'rems-project:master' into check-countermodel

rems-project · Feb 2, 2025 · bd4144c · bd4144c
2 parents 4661aa0 + d7d4e67
commit bd4144c
Show file tree

Hide file tree

Showing 9 changed files with 117 additions and 94 deletions.
diff --git a/backend/cn/TESTING.md b/backend/cn/TESTING.md
@@ -0,0 +1,45 @@
+# CN Testing
+
+CN has testing capabilities available via the `cn test` subcommand.
+
+## Overview
+
+Currently, CN supports only per-function tests, but additional types of testing may become available in the future.
+
+Running `cn test <filename.c>` generates C files with the testing infrastructure, the instrumented program under test, and a build script named `run_tests.sh`.
+This script compiles the C files and runs the tests.
+
+By default, running `cn test` will automatically run `run_tests.sh`, which produces a test executable `tests.out`.
+This can be disabled by using the `--no-run` flag.
+
+The output directory for these files can be set by using `--output-dir=<DIR>`.
+If the directory does not already exist, it is created.
+
+### Per-function tests
+
+When testing, there are currently two types of tests, constant tests and generator-based tests.
+For *each function with a body*, CN will create either a constant test or generator-based test.
+
+If a function takes no arguments, does not use accesses on global variables, and is correct, it should always return the same value and free any memory it allocates.
+In this case, a constant test is generated, which runs the function once and uses [Fulminate](FULMINATE_README.md) to check for post-condition violations.
+
+In all other cases, it creates generator-based tests, which are in the style of property-based testing.
+A "generator" is created, which randomly generates function arguments, values for globals accessed and heap states, all of which adhere to the given function's pre-condition.
+It calls the function with this input and uses [Fulminate](FULMINATE_README.md) similar to the constant tests.
+
+#### Understanding errors
+
+Currently, the tool does not print out counterexamples, but this is [planned](https://github.com/rems-project/cerberus/issues/841).
+Until then, `tests.out` can be run with the `--trap` flag in a debugger.
+Since seeds are printed each time the tool runs, `--seed <seed>` can be used to reproduce the test case.
+The debugger should automatically pause right before rerunning the failing test case.
+
+#### Writing custom tests
+
+There is currently no way to write custom property-based tests.
+However, once lemmas can be tested, a lemma describing the desired property could be written to test it.
+
+In terms of unit tests, one can simply define a function that performs the desired operations.
+This function will get detected by `cn test` and turned into a constant test.
+Any assertions that one would make about the result would have to be captured by the post-condition.
+In the future, existing infrastructure like `cn_assert` might be adapted for general use.
diff --git a/backend/cn/bin/main.ml b/backend/cn/bin/main.ml
@@ -467,7 +467,6 @@ let run_tests
   seed
   logging_level
   progress_level
-  interactive
   until_timeout
   exit_fast
   max_stack_depth
@@ -478,6 +477,7 @@ let run_tests
   sized_null
   coverage
   disable_passes
+  trap
   =
   (* flags *)
   Cerb_debug.debug_level := debug_level;
@@ -513,7 +513,6 @@ let run_tests
           seed;
           logging_level;
           progress_level;
-          interactive;
           until_timeout;
           exit_fast;
           max_stack_depth;
@@ -523,7 +522,8 @@ let run_tests
           allowed_size_split_backtracks;
           sized_null;
           coverage;
-          disable_passes
+          disable_passes;
+          trap
         }
       in
       TestGeneration.set_config config;
@@ -901,7 +901,7 @@ let verify_cmd =
 module Testing_flags = struct
   let output_test_dir =
     let doc = "Place generated tests in the provided directory" in
-    Arg.(required & opt (some string) None & info [ "output-dir" ] ~docv:"FILE" ~doc)
+    Arg.(value & opt string "." & info [ "output-dir" ] ~docv:"DIR" ~doc)
 
 
   let only =
@@ -1016,13 +1016,6 @@ module Testing_flags = struct
       & info [ "progress-level" ] ~doc)
 
 
-  let interactive =
-    let doc =
-      "Enable interactive features for testing, such as requesting more detailed logs"
-    in
-    Arg.(value & flag & info [ "interactive" ] ~doc)
-
-
   let until_timeout =
     let doc =
       "Keep rerunning tests until the given timeout (in seconds) has been reached"
@@ -1105,6 +1098,11 @@ module Testing_flags = struct
                 ]))
           []
       & info [ "disable" ] ~doc)
+
+
+  let trap =
+    let doc = "Raise SIGTRAP on test failure" in
+    Arg.(value & flag & info [ "trap" ] ~doc)
 end
 
 let testing_cmd =
@@ -1138,7 +1136,6 @@ let testing_cmd =
     $ Testing_flags.seed
     $ Testing_flags.logging_level
     $ Testing_flags.progress_level
-    $ Testing_flags.interactive
     $ Testing_flags.until_timeout
     $ Testing_flags.exit_fast
     $ Testing_flags.max_stack_depth
@@ -1149,6 +1146,7 @@ let testing_cmd =
     $ Testing_flags.sized_null
     $ Testing_flags.coverage
     $ Testing_flags.disable_passes
+    $ Testing_flags.trap
   in
   let doc =
     "Generates tests for all functions in [FILE] with CN specifications.\n\

diff --git a/backend/cn/lib/solver.ml b/backend/cn/lib/solver.ml
@@ -1325,17 +1325,22 @@ let model_state = ref No_model
 let model () = match !model_state with No_model -> assert false | Model mo -> mo
 
 (** Evaluate terms in the context of a model computed by the solver. *)
-let model_evaluator =
-  let model_evaluator_solver = ref None in
+let model_evaluator, reset_model_evaluator_state =
+  (* internal state for the model evaluator, reuses the solver across consecutive calls for efficiency *)
+  let model_evaluator_solver : Simple_smt.solver option ref = ref None in
   let currently_loaded_model = ref 0 in
-  let new_model_id =
-    let model_id = ref 0 in
-    fun () ->
-      (* Start with 1, as 0 is the id of the empty model *)
-      model_id := !model_id + 1;
-      !model_id
+  let model_id = ref 0 in
+  let new_model_id () =
+    (* Start with 1, as 0 is the id of the empty model *)
+    model_id := !model_id + 1;
+    !model_id
   in
-  fun solver mo ->
+  let reset_model_evaluator_state () =
+    currently_loaded_model := 0;
+    model_evaluator_solver := None;
+    model_id := 0
+  in
+  let model_evaluator solver mo =
     match SMT.to_list mo with
     | None -> failwith "model is an atom"
     | Some defs ->
@@ -1383,6 +1388,8 @@ let model_evaluator =
       in
       Hashtbl.add models_tbl model_id model_fn;
       model_id
+  in
+  (model_evaluator, reset_model_evaluator_state)
 
 
 (* ---------------------------------------------------------------------------*)

diff --git a/backend/cn/lib/solver.mli b/backend/cn/lib/solver.mli
@@ -35,6 +35,9 @@ val pop : solver -> int -> unit
     but may be unnecessary https://github.com/rems-project/cerberus/issues/752 *)
 val num_scopes : solver -> int
 
+(* Resets internal state for the model evaluator *)
+val reset_model_evaluator_state : unit -> unit
+
 (* Run the solver. Note that we pass the assumptions explicitly even though they are also
    available in the solver context, because CN is going some simplification on its own. *)
 val provable

diff --git a/backend/cn/lib/testGeneration/buildScript.ml b/backend/cn/lib/testGeneration/buildScript.ml
@@ -151,10 +151,6 @@ let run () =
           |> Option.map (fun level -> [ "--progress-level"; string_of_int level ])
           |> Option.to_list
           |> List.flatten)
-       @ (if Config.is_interactive () then
-            [ "--interactive" ]
-          else
-            [])
        @ (match Config.is_until_timeout () with
           | Some timeout -> [ "--until-timeout"; string_of_int timeout ]
           | None -> [])
@@ -187,7 +183,12 @@ let run () =
               string_of_int allowed_size_split_backtracks
             ])
           |> Option.to_list
-          |> List.flatten))
+          |> List.flatten)
+       @
+       if Config.is_trap () then
+         [ "--trap" ]
+       else
+         [])
   in
   string "# Run"
   ^^ hardline

diff --git a/backend/cn/lib/testGeneration/testGenConfig.ml b/backend/cn/lib/testGeneration/testGenConfig.ml
@@ -12,7 +12,6 @@ type t =
     seed : string option;
     logging_level : int option;
     progress_level : int option;
-    interactive : bool;
     until_timeout : int option;
     exit_fast : bool;
     max_stack_depth : int option;
@@ -22,7 +21,8 @@ type t =
     allowed_size_split_backtracks : int option;
     sized_null : bool;
     coverage : bool;
-    disable_passes : string list
+    disable_passes : string list;
+    trap : bool
   }
 
 let default =
@@ -37,7 +37,6 @@ let default =
     seed = None;
     logging_level = None;
     progress_level = None;
-    interactive = false;
     until_timeout = None;
     exit_fast = false;
     max_stack_depth = None;
@@ -47,7 +46,8 @@ let default =
     allowed_size_split_backtracks = None;
     sized_null = false;
     coverage = false;
-    disable_passes = []
+    disable_passes = [];
+    trap = false
   }
 
 
@@ -77,8 +77,6 @@ let has_logging_level () = !instance.logging_level
 
 let has_progress_level () = !instance.progress_level
 
-let is_interactive () = !instance.interactive
-
 let is_until_timeout () = !instance.until_timeout
 
 let is_exit_fast () = !instance.exit_fast
@@ -98,3 +96,5 @@ let is_sized_null () = !instance.sized_null
 let is_coverage () = !instance.coverage
 
 let has_pass s = not (List.mem String.equal s !instance.disable_passes)
+
+let is_trap () = !instance.trap
diff --git a/backend/cn/lib/testGeneration/testGenConfig.mli b/backend/cn/lib/testGeneration/testGenConfig.mli
@@ -12,7 +12,6 @@ type t =
     seed : string option;
     logging_level : int option;
     progress_level : int option;
-    interactive : bool;
     until_timeout : int option;
     exit_fast : bool;
     max_stack_depth : int option;
@@ -22,7 +21,8 @@ type t =
     allowed_size_split_backtracks : int option;
     sized_null : bool;
     coverage : bool;
-    disable_passes : string list
+    disable_passes : string list;
+    trap : bool
   }
 
 val default : t
@@ -51,8 +51,6 @@ val has_logging_level : unit -> int option
 
 val has_progress_level : unit -> int option
 
-val is_interactive : unit -> bool
-
 val is_until_timeout : unit -> int option
 
 val is_exit_fast : unit -> bool
@@ -72,3 +70,5 @@ val is_sized_null : unit -> bool
 val is_coverage : unit -> bool
 
 val has_pass : string -> bool
+
+val is_trap : unit -> bool
diff --git a/runtime/libcn/include/cn-testing/test.h b/runtime/libcn/include/cn-testing/test.h
@@ -12,12 +12,14 @@ enum cn_test_gen_progress {
     CN_TEST_GEN_PROGRESS_ALL = 2
 };
 
-typedef enum cn_test_result cn_test_case_fn(enum cn_test_gen_progress);
+typedef enum cn_test_result cn_test_case_fn(enum cn_test_gen_progress, int);
 
 void cn_register_test_case(const char* suite, const char* name, cn_test_case_fn* func);
 
 void print_test_info(const char* suite, const char* name, int tests, int discards);
 
+/** This function is called right before rerunning a failing test case. */
+void cn_trap(void);
 
 #define CN_UNIT_TEST_CASE_NAME(FuncName) cn_test_const_##FuncName
 
@@ -53,7 +55,10 @@ void print_test_info(const char* suite, const char* name, int tests, int discard
         longjmp(buf_##Name, mode);                                                      \
     }                                                                                   \
                                                                                         \
-    enum cn_test_result cn_test_gen_##Name (enum cn_test_gen_progress progress_level) { \
+    enum cn_test_result cn_test_gen_##Name (                                            \
+        enum cn_test_gen_progress progress_level,                                       \
+        int trap                                                                        \
+    ) {                                                                                 \
         cn_gen_rand_checkpoint checkpoint = cn_gen_rand_save();                         \
         int i = 0, d = 0;                                                               \
         set_cn_failure_cb(&cn_test_gen_##Name##_fail);                                  \
@@ -94,6 +99,9 @@ void print_test_info(const char* suite, const char* name, int tests, int discard
             }                                                                           \
             assume_##Name(__VA_ARGS__);                                                 \
             Init(res);                                                                  \
+            if (trap) {                                                                 \
+                cn_trap();                                                              \
+            }                                                                           \
             Name(__VA_ARGS__);                                                          \
             cn_gen_rand_replace(checkpoint);                                            \
         }                                                                               \