From 924ad43d95c4dbbf4f72b5e65a1294239fb78803 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Mon, 14 Oct 2024 13:24:44 -0700 Subject: [PATCH 1/2] Implement ebpf_check_constraints_at_label Signed-off-by: Alan Jowett --- src/asm_parse.hpp | 14 ++++++++ src/asm_syntax.hpp | 23 ++++++++++++ src/config.cpp | 1 + src/config.hpp | 2 ++ src/crab_verifier.cpp | 68 +++++++++++++++++++++++++++++++++++ src/crab_verifier.hpp | 23 ++++++++++++ src/ebpf_yaml.cpp | 33 ++++++++++++++++- src/ebpf_yaml.hpp | 1 + test-data/add.yaml | 84 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 248 insertions(+), 1 deletion(-) diff --git a/src/asm_parse.hpp b/src/asm_parse.hpp index 212aca97f..eacdb30ed 100644 --- a/src/asm_parse.hpp +++ b/src/asm_parse.hpp @@ -3,7 +3,21 @@ #pragma once #include +#include #include "asm_syntax.hpp" +#include "crab/interval.hpp" +#include "crab/linear_constraint.hpp" Instruction parse_instruction(const std::string& line, const std::map& label_name_to_label); + +/*** + * Parse a set of string form linear constraints into a vector of crab::linear_constraint_t + * + * @param[in] constraints: A set of string form linear constraints. + * @param[out] numeric_ranges: A vector of numeric ranges that are extracted from the constraints. + * + * @return A vector of crab::linear_constraint_t + */ +std::vector parse_linear_constraints(const std::set& constraints, + std::vector& numeric_ranges); diff --git a/src/asm_syntax.hpp b/src/asm_syntax.hpp index 9e7d13ab1..472edc989 100644 --- a/src/asm_syntax.hpp +++ b/src/asm_syntax.hpp @@ -23,6 +23,29 @@ struct label_t { explicit label_t(const int index, const int to = -1, std::string stack_frame_prefix = {}) noexcept : from(index), to(to), stack_frame_prefix(std::move(stack_frame_prefix)) {} + explicit label_t(std::string_view string_label) { + auto pos = string_label.find(STACK_FRAME_DELIMITER); + if (pos != std::string_view::npos) { + stack_frame_prefix = std::string(string_label.substr(0, pos)); + string_label = string_label.substr(pos + 1); + } + + pos = string_label.find(':'); + try { + if (pos != std::string_view::npos) { + from = std::stoi(std::string(string_label.substr(0, pos))); + to = std::stoi(std::string(string_label.substr(pos + 1))); + } else { + from = std::stoi(std::string(string_label)); + to = -1; + } + } catch (const std::invalid_argument& e) { + throw std::invalid_argument("Invalid label format: " + std::string(string_label)); + } catch (const std::out_of_range& e) { + throw std::out_of_range("Label value out of range: " + std::string(string_label)); + } + } + static label_t make_jump(const label_t& src_label, const label_t& target_label) { return label_t{src_label.from, target_label.from, target_label.stack_frame_prefix}; } diff --git a/src/config.cpp b/src/config.cpp index 6c40e86d2..ea5e05117 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -14,4 +14,5 @@ const ebpf_verifier_options_t ebpf_verifier_default_options = { .allow_division_by_zero = true, .setup_constraints = true, .big_endian = false, + .store_pre_invariants = false, }; diff --git a/src/config.hpp b/src/config.hpp index 26963b0a9..32f03eef3 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -21,6 +21,8 @@ struct ebpf_verifier_options_t { bool big_endian; bool dump_btf_types_json; + + bool store_pre_invariants; ///< Store pre-invariants for use in ebpf_check_constraints_at_label. }; struct ebpf_verifier_stats_t { diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index f1ceccd18..2f4625ec0 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -17,6 +17,7 @@ #include "crab_utils/lazy_allocator.hpp" #include "asm_syntax.hpp" +#include "asm_parse.hpp" #include "crab_verifier.hpp" #include "string_constraints.hpp" @@ -161,6 +162,8 @@ static checks_db get_analysis_report(std::ostream& s, cfg_t& cfg, const crab::in return db; } +static thread_local std::optional save_pre_invariants = std::nullopt; + static checks_db get_ebpf_report(std::ostream& s, cfg_t& cfg, program_info info, const ebpf_verifier_options_t* options, const std::optional& prog = std::nullopt) { global_program_info = std::move(info); @@ -172,6 +175,9 @@ static checks_db get_ebpf_report(std::ostream& s, cfg_t& cfg, program_info info, // Get dictionaries of pre-invariants and post-invariants for each basic block. ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); + if (thread_local_options.store_pre_invariants) { + save_pre_invariants = pre_invariants; + } return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { // Convert verifier runtime_error exceptions to failure. @@ -221,6 +227,9 @@ std::tuple ebpf_analyze_program_for_test(std::ostream& o try { cfg_t cfg = prepare_cfg(prog, info, options.simplify, false); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_inv)); + if (thread_local_options.store_pre_invariants) { + save_pre_invariants = pre_invariants; + } const checks_db report = get_analysis_report(std::cerr, cfg, pre_invariants, post_invariants); print_report(os, report, prog, false); @@ -267,4 +276,63 @@ void ebpf_verifier_clear_thread_local_state() { global_program_info.clear(); crab::domains::clear_thread_local_state(); crab::domains::SplitDBM::clear_thread_local_state(); + save_pre_invariants = std::nullopt; +} + +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label_string, + const std::set& constraints) try { + label_t label = label_t(label_string); + if (!save_pre_invariants.has_value()) { + os << "No pre-invariants available\n"; + return false; + } + if (save_pre_invariants.value().find(label) == save_pre_invariants.value().end()) { + os << "No pre-invariants available for label " << label << "\n"; + return false; + } + ebpf_domain_t from_inv(save_pre_invariants.value().at(label)); + auto concrete_domain = ebpf_domain_t::from_constraints(constraints, false); + + if (concrete_domain.is_bottom()) { + os << "The provided constraints are unsatisfiable (concrete domain is bottom)\n"; + os << "Concrete constraints are self-contradictory\n"; + os << concrete_domain << "\n"; + return false; + } + + if (from_inv.is_bottom()) { + os << "The abstract state is unreachable\n"; + os << from_inv << "\n"; + return false; + } + + if ((from_inv & concrete_domain).is_bottom()) { + os << "Concrete state does not match invariant\n"; + + // Print the concrete state + os << "--- Concrete state ---\n"; + os << concrete_domain << "\n"; + + os << "--- Abstract state ---\n"; + os << from_inv << "\n"; + + return false; + } + + return true; +} catch (std::exception& e) { + os << "Error occurred while checking constraints: " << e.what() << "\n"; + return false; +} + +std::set ebpf_get_invariants_at_label(const std::string& label) +{ + label_t l = label_t(label); + if (!save_pre_invariants.has_value()) { + return {}; + } + if (save_pre_invariants.value().find(l) == save_pre_invariants.value().end()) { + return {}; + } + return save_pre_invariants.value().at(l).to_set().value(); } diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index 14eeb89a7..bfb48a6b7 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -26,3 +26,26 @@ int create_map_crab(const EbpfMapType& map_type, uint32_t key_size, uint32_t val EbpfMapDescriptor* find_map_descriptor(int map_fd); void ebpf_verifier_clear_thread_local_state(); + +/** + * @brief Given a label and a set of concrete constraints, check if the concrete constraints match the abstract + * verifier constraints at the label. Requires the `store_pre_invariants` option to be set. + * + * Abstract constraints are computed by the verifier and stored if the `store_pre_invariants` option is set. + * + * @param[in,out] os Print output to this stream. + * @param[in] label The location in the CFG to check against. + * @param[in] constraints The concrete state to check. + * @return true If the state is valid. + * @return false If the state is invalid. + */ +bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label, + const std::set& constraints); +/** + * @brief Get the invariants at a given label. Requires the `store_pre_invariants` option to be set. + * + * @param[in] label + * @return The set of invariants at the given label. + * @throw std::out_of_range If the label is not found. + */ +std::set ebpf_get_invariants_at_label(const std::string& label); diff --git a/src/ebpf_yaml.cpp b/src/ebpf_yaml.cpp index 9b2e57120..1cedf1583 100644 --- a/src/ebpf_yaml.cpp +++ b/src/ebpf_yaml.cpp @@ -99,6 +99,7 @@ struct RawTestCase { vector>> raw_blocks; vector post; std::set messages; + std::map> invariants_to_check; }; static vector parse_block(const YAML::Node& block_node) { @@ -126,6 +127,19 @@ static std::set as_set_empty_default(const YAML::Node& optional_node) { return vector_to_set(optional_node.as>()); } +static std::map> parse_invariants_to_check(const YAML::Node& case_node) { + if (!case_node["invariants-to-check"].IsDefined()) { + return {}; + } + + std::map> res; + for (const auto& node : case_node["invariants-to-check"]) { + res.emplace(node.first.as(), vector_to_set(node.second.as>())); + } + + return res; +} + static RawTestCase parse_case(const YAML::Node& case_node) { return RawTestCase{ .test_case = case_node["test-case"].as(), @@ -134,6 +148,7 @@ static RawTestCase parse_case(const YAML::Node& case_node) { .raw_blocks = parse_code(case_node["code"]), .post = case_node["post"].as>(), .messages = as_set_empty_default(case_node["messages"]), + .invariants_to_check = parse_invariants_to_check(case_node), }; } @@ -205,7 +220,9 @@ static TestCase read_case(const RawTestCase& raw_case) { .assumed_pre_invariant = read_invariant(raw_case.pre), .instruction_seq = raw_cfg_to_instruction_seq(raw_case.raw_blocks), .expected_post_invariant = read_invariant(raw_case.post), - .expected_messages = raw_case.messages}; + .expected_messages = raw_case.messages, + .invariants_to_check = raw_case.invariants_to_check, + }; } static vector read_suite(const string& path) { @@ -246,6 +263,10 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { test_case.options.simplify = false; } + if (!test_case.invariants_to_check.empty()) { + test_case.options.store_pre_invariants = true; + } + ebpf_context_descriptor_t context_descriptor{64, 0, 4, -1}; EbpfProgramType program_type = make_program_type(test_case.name, &context_descriptor); @@ -256,6 +277,16 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { ss, test_case.instruction_seq, test_case.assumed_pre_invariant, info, test_case.options); std::set actual_messages = extract_messages(ss.str()); + for (auto& [label, expected_invariant] : test_case.invariants_to_check) { + if (!ebpf_check_constraints_at_label(ss, label, expected_invariant)) { + // If debug is enabled, print the output of ebpf_check_constraints_at_label. + if (debug) { + std::cout << ss.str(); + } + actual_messages.insert(label + ": Concrete invariants at do not match abstract invariants"); + } + } + if (actual_last_invariant == test_case.expected_post_invariant && actual_messages == test_case.expected_messages) { return {}; } diff --git a/src/ebpf_yaml.hpp b/src/ebpf_yaml.hpp index 89d7ab592..cd336f6d3 100644 --- a/src/ebpf_yaml.hpp +++ b/src/ebpf_yaml.hpp @@ -14,6 +14,7 @@ struct TestCase { InstructionSeq instruction_seq; string_invariant expected_post_invariant; std::set expected_messages; + std::map> invariants_to_check; ///< Map from label to expected invariants, used for checking invariants at specific labels. }; void foreach_suite(const std::string& path, const std::function& f); diff --git a/test-data/add.yaml b/test-data/add.yaml index 7d29c6011..8e190fb72 100644 --- a/test-data/add.yaml +++ b/test-data/add.yaml @@ -175,3 +175,87 @@ post: - r7.packet_offset-r7.uvalue<=-7 - r7.svalue-r7.packet_offset<=11 - r7.uvalue-r7.packet_offset<=11 + +--- +test-case: check concrete state + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + r2 = 2 + r1 += r2 + r0 = r1 + +invariants-to-check: + 1: + - r1.type=number + - r1.svalue=1 + - r1.uvalue=1 + 2: + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + 3: + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + -2: + - r0.type=number + - r0.svalue=3 + - r0.uvalue=3 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 +--- +test-case: check concrete state - negative + +pre: ["r0.type=number", "r0.svalue=0", "r0.uvalue=0", r1.type=number, r1.svalue=0, r1.uvalue=0, r2.type=number, r2.svalue=0, r2.uvalue=0] + +code: + : | + r1 = 1 + r2 = 2 + r1 += r2 + r0 = r1 + +invariants-to-check: + 1: + - r1.type=number + - r1.svalue=1 + - r1.uvalue=1 + 2: + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + 3: + - r1.type=number + - r1.svalue=2 + - r1.uvalue=2 + -2: + - r0.type=number + - r0.svalue=3 + - r0.uvalue=3 + +post: + - r0.type=number + - r0.uvalue=3 + - r0.svalue=3 + - r1.type=number + - r1.svalue=3 + - r1.uvalue=3 + - r2.type=number + - r2.svalue=2 + - r2.uvalue=2 + +messages: + - "3: Concrete invariants at do not match abstract invariants" From fc5d74a87ac0d9ef700423c1000c8c8769660556 Mon Sep 17 00:00:00 2001 From: Alan Jowett Date: Tue, 15 Oct 2024 22:09:40 -0700 Subject: [PATCH 2/2] PR feedback Signed-off-by: Alan Jowett --- src/config.cpp | 2 +- src/crab_verifier.cpp | 32 ++++++++++++++++++-------------- src/crab_verifier.hpp | 2 ++ src/ebpf_yaml.cpp | 6 ++++-- test-data/add.yaml | 18 +++++++++--------- 5 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/config.cpp b/src/config.cpp index ea5e05117..f1f73b0ef 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -14,5 +14,5 @@ const ebpf_verifier_options_t ebpf_verifier_default_options = { .allow_division_by_zero = true, .setup_constraints = true, .big_endian = false, - .store_pre_invariants = false, + .store_pre_invariants = false, // Enable this to permit usage of the ebpf_check_constraints_at_label and ebpf_get_invariants_at_label functions. }; diff --git a/src/crab_verifier.cpp b/src/crab_verifier.cpp index 2f4625ec0..9e49c0b2b 100644 --- a/src/crab_verifier.cpp +++ b/src/crab_verifier.cpp @@ -162,7 +162,13 @@ static checks_db get_analysis_report(std::ostream& s, cfg_t& cfg, const crab::in return db; } -static thread_local std::optional save_pre_invariants = std::nullopt; +static thread_local std::optional saved_pre_invariants = std::nullopt; + +static void save_invariants_if_needed(const crab::invariant_table_t& pre_invariants) { + if (thread_local_options.store_pre_invariants) { + saved_pre_invariants = pre_invariants; + } +} static checks_db get_ebpf_report(std::ostream& s, cfg_t& cfg, program_info info, const ebpf_verifier_options_t* options, const std::optional& prog = std::nullopt) { @@ -175,9 +181,7 @@ static checks_db get_ebpf_report(std::ostream& s, cfg_t& cfg, program_info info, // Get dictionaries of pre-invariants and post-invariants for each basic block. ebpf_domain_t entry_dom = ebpf_domain_t::setup_entry(true); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_dom)); - if (thread_local_options.store_pre_invariants) { - save_pre_invariants = pre_invariants; - } + save_invariants_if_needed(pre_invariants); return get_analysis_report(s, cfg, pre_invariants, post_invariants, prog); } catch (std::runtime_error& e) { // Convert verifier runtime_error exceptions to failure. @@ -227,9 +231,7 @@ std::tuple ebpf_analyze_program_for_test(std::ostream& o try { cfg_t cfg = prepare_cfg(prog, info, options.simplify, false); auto [pre_invariants, post_invariants] = run_forward_analyzer(cfg, std::move(entry_inv)); - if (thread_local_options.store_pre_invariants) { - save_pre_invariants = pre_invariants; - } + save_invariants_if_needed(pre_invariants); const checks_db report = get_analysis_report(std::cerr, cfg, pre_invariants, post_invariants); print_report(os, report, prog, false); @@ -276,21 +278,21 @@ void ebpf_verifier_clear_thread_local_state() { global_program_info.clear(); crab::domains::clear_thread_local_state(); crab::domains::SplitDBM::clear_thread_local_state(); - save_pre_invariants = std::nullopt; + saved_pre_invariants = std::nullopt; } bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label_string, const std::set& constraints) try { label_t label = label_t(label_string); - if (!save_pre_invariants.has_value()) { + if (!saved_pre_invariants.has_value()) { os << "No pre-invariants available\n"; return false; } - if (save_pre_invariants.value().find(label) == save_pre_invariants.value().end()) { + if (saved_pre_invariants.value().find(label) == saved_pre_invariants.value().end()) { os << "No pre-invariants available for label " << label << "\n"; return false; } - ebpf_domain_t from_inv(save_pre_invariants.value().at(label)); + ebpf_domain_t from_inv(saved_pre_invariants.value().at(label)); auto concrete_domain = ebpf_domain_t::from_constraints(constraints, false); if (concrete_domain.is_bottom()) { @@ -327,12 +329,14 @@ bool ebpf_check_constraints_at_label(std::ostream& os, const std::string& label_ std::set ebpf_get_invariants_at_label(const std::string& label) { + // If the label is malformed, throw an exception so the caller can handle it. label_t l = label_t(label); - if (!save_pre_invariants.has_value()) { + + if (!saved_pre_invariants.has_value()) { return {}; } - if (save_pre_invariants.value().find(l) == save_pre_invariants.value().end()) { + if (saved_pre_invariants.value().find(l) == saved_pre_invariants.value().end()) { return {}; } - return save_pre_invariants.value().at(l).to_set().value(); + return saved_pre_invariants.value().at(l).to_set().value(); } diff --git a/src/crab_verifier.hpp b/src/crab_verifier.hpp index bfb48a6b7..0e9bf109d 100644 --- a/src/crab_verifier.hpp +++ b/src/crab_verifier.hpp @@ -32,6 +32,8 @@ void ebpf_verifier_clear_thread_local_state(); * verifier constraints at the label. Requires the `store_pre_invariants` option to be set. * * Abstract constraints are computed by the verifier and stored if the `store_pre_invariants` option is set. + * These constraints represent the program state at a specific point in the control flow graph, + * as determined by the static analysis performed by the verifier. * * @param[in,out] os Print output to this stream. * @param[in] label The location in the CFG to check against. diff --git a/src/ebpf_yaml.cpp b/src/ebpf_yaml.cpp index 1cedf1583..4dbb3acd3 100644 --- a/src/ebpf_yaml.cpp +++ b/src/ebpf_yaml.cpp @@ -128,7 +128,7 @@ static std::set as_set_empty_default(const YAML::Node& optional_node) { } static std::map> parse_invariants_to_check(const YAML::Node& case_node) { - if (!case_node["invariants-to-check"].IsDefined()) { + if (!case_node["invariants-to-check"].IsDefined() || case_node["invariants-to-check"].IsNull()) { return {}; } @@ -278,12 +278,14 @@ std::optional run_yaml_test_case(TestCase test_case, bool debug) { std::set actual_messages = extract_messages(ss.str()); for (auto& [label, expected_invariant] : test_case.invariants_to_check) { + ss.str(""); + ss.clear(); if (!ebpf_check_constraints_at_label(ss, label, expected_invariant)) { // If debug is enabled, print the output of ebpf_check_constraints_at_label. if (debug) { std::cout << ss.str(); } - actual_messages.insert(label + ": Concrete invariants at do not match abstract invariants"); + actual_messages.insert(label + ": Concrete invariants do not match abstract invariants"); } } diff --git a/test-data/add.yaml b/test-data/add.yaml index 8e190fb72..790f271ef 100644 --- a/test-data/add.yaml +++ b/test-data/add.yaml @@ -189,19 +189,19 @@ code: r0 = r1 invariants-to-check: - 1: + 1: # After the first assignment, r1 should be 1. - r1.type=number - r1.svalue=1 - r1.uvalue=1 - 2: + 2: # After the second assignment, r2 should be 2. - r2.type=number - r2.svalue=2 - r2.uvalue=2 - 3: + 3: # After the addition, r1 should be 3. - r1.type=number - r1.svalue=3 - r1.uvalue=3 - -2: + -2: # After the last assignment, r0 should be 3. - r0.type=number - r0.svalue=3 - r0.uvalue=3 @@ -229,19 +229,19 @@ code: r0 = r1 invariants-to-check: - 1: + 1: # After the first assignment, r1 should be 1. - r1.type=number - r1.svalue=1 - r1.uvalue=1 - 2: + 2: # After the second assignment, r2 should be 2. - r2.type=number - r2.svalue=2 - r2.uvalue=2 - 3: + 3: # After the addition, r1 should be 3. Invalid invariant to test negative case. - r1.type=number - r1.svalue=2 - r1.uvalue=2 - -2: + -2: # After the last assignment, r0 should be 3. - r0.type=number - r0.svalue=3 - r0.uvalue=3 @@ -258,4 +258,4 @@ post: - r2.uvalue=2 messages: - - "3: Concrete invariants at do not match abstract invariants" + - "3: Concrete invariants do not match abstract invariants"