Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ffi: Move SchemaTreeNode into SchemaTree; Use an optional for the parent ID in the schema tree node to track nodes which have no parents (the root). #554

Merged
merged 11 commits into from
Oct 12, 2024
1 change: 0 additions & 1 deletion components/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,6 @@ set(SOURCE_FILES_unitTest
src/clp/ffi/KeyValuePairLogEvent.hpp
src/clp/ffi/SchemaTree.cpp
src/clp/ffi/SchemaTree.hpp
src/clp/ffi/SchemaTreeNode.hpp
src/clp/ffi/search/CompositeWildcardToken.cpp
src/clp/ffi/search/CompositeWildcardToken.hpp
src/clp/ffi/search/ExactVariableToken.cpp
Expand Down
81 changes: 43 additions & 38 deletions components/core/src/clp/ffi/KeyValuePairLogEvent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "../ir/EncodedTextAst.hpp"
#include "../time_types.hpp"
#include "SchemaTree.hpp"
#include "SchemaTreeNode.hpp"
#include "Value.hpp"

using clp::ir::EightByteEncodedTextAst;
Expand Down Expand Up @@ -49,7 +48,7 @@ class JsonSerializationIterator {
public:
// Constructor
JsonSerializationIterator(
SchemaTreeNode const* schema_tree_node,
SchemaTree::Node const* schema_tree_node,
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
vector<bool> const& schema_subtree_bitmap,
nlohmann::json::object_t* parent_json_obj,
JsonExceptionHandler json_exception_callback
Expand All @@ -76,7 +75,7 @@ class JsonSerializationIterator {
try {
// If the current node is the root, then replace the `parent` with this node's JSON
// object. Otherwise, add this node's JSON object as a child of the parent JSON object.
if (m_schema_tree_node->get_id() == SchemaTree::cRootId) {
if (m_schema_tree_node->is_root()) {
*m_parent_json_obj = std::move(m_json_obj);
} else {
m_parent_json_obj->emplace(
Expand All @@ -100,16 +99,16 @@ class JsonSerializationIterator {
* Gets the next child schema tree node and advances the iterator.
* @return The next child schema tree node.
*/
[[nodiscard]] auto get_next_child_schema_tree_node() -> SchemaTreeNode::id_t {
[[nodiscard]] auto get_next_child_schema_tree_node() -> SchemaTree::Node::id_t {
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
return *(m_child_schema_tree_node_it++);
}

[[nodiscard]] auto get_json_obj() -> nlohmann::json::object_t& { return m_json_obj; }

private:
SchemaTreeNode const* m_schema_tree_node;
vector<SchemaTreeNode::id_t> m_child_schema_tree_nodes;
vector<SchemaTreeNode::id_t>::const_iterator m_child_schema_tree_node_it;
SchemaTree::Node const* m_schema_tree_node;
vector<SchemaTree::Node::id_t> m_child_schema_tree_nodes;
vector<SchemaTree::Node::id_t>::const_iterator m_child_schema_tree_node_it;
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
nlohmann::json::object_t* m_parent_json_obj;
nlohmann::json::object_t m_json_obj;
JsonExceptionHandler m_json_exception_callback;
Expand All @@ -121,7 +120,7 @@ class JsonSerializationIterator {
* @return Whether the given schema tree node type matches the given value's type.
*/
[[nodiscard]] auto
node_type_matches_value_type(SchemaTreeNode::Type type, Value const& value) -> bool;
node_type_matches_value_type(SchemaTree::Node::Type type, Value const& value) -> bool;
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved

/**
* Validates whether the given node-ID value pairs are leaf nodes in the `SchemaTree` forming a
Expand Down Expand Up @@ -150,7 +149,7 @@ node_type_matches_value_type(SchemaTreeNode::Type type, Value const& value) -> b
*/
[[nodiscard]] auto is_leaf_node(
SchemaTree const& schema_tree,
SchemaTreeNode::id_t node_id,
SchemaTree::Node::id_t node_id,
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs
) -> bool;

Expand All @@ -176,7 +175,7 @@ node_type_matches_value_type(SchemaTreeNode::Type type, Value const& value) -> b
* @return Whether the insertion was successful.
*/
[[nodiscard]] auto insert_kv_pair_into_json_obj(
SchemaTreeNode const& node,
SchemaTree::Node const& node,
std::optional<Value> const& optional_val,
nlohmann::json::object_t& json_obj
) -> bool;
Expand All @@ -190,19 +189,19 @@ node_type_matches_value_type(SchemaTreeNode::Type type, Value const& value) -> b
*/
[[nodiscard]] auto decode_as_encoded_text_ast(Value const& val) -> std::optional<string>;

auto node_type_matches_value_type(SchemaTreeNode::Type type, Value const& value) -> bool {
auto node_type_matches_value_type(SchemaTree::Node::Type type, Value const& value) -> bool {
switch (type) {
case SchemaTreeNode::Type::Obj:
case SchemaTree::Node::Type::Obj:
return value.is_null();
case SchemaTreeNode::Type::Int:
case SchemaTree::Node::Type::Int:
return value.is<value_int_t>();
case SchemaTreeNode::Type::Float:
case SchemaTree::Node::Type::Float:
return value.is<value_float_t>();
case SchemaTreeNode::Type::Bool:
case SchemaTree::Node::Type::Bool:
return value.is<value_bool_t>();
case SchemaTreeNode::Type::UnstructuredArray:
case SchemaTree::Node::Type::UnstructuredArray:
return value.is<FourByteEncodedTextAst>() || value.is<EightByteEncodedTextAst>();
case SchemaTreeNode::Type::Str:
case SchemaTree::Node::Type::Str:
return value.is<string>() || value.is<FourByteEncodedTextAst>()
|| value.is<EightByteEncodedTextAst>();
default:
Expand All @@ -215,33 +214,35 @@ auto validate_node_id_value_pairs(
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs
) -> std::errc {
try {
std::unordered_map<SchemaTreeNode::id_t, std::unordered_set<std::string_view>>
std::unordered_map<SchemaTree::Node::id_t, std::unordered_set<std::string_view>>
parent_node_id_to_key_names;
for (auto const& [node_id, value] : node_id_value_pairs) {
if (SchemaTree::cRootId == node_id) {
auto const& node{schema_tree.get_node(node_id)};
if (node.is_root()) {
return std::errc::operation_not_permitted;
}

auto const& node{schema_tree.get_node(node_id)};
auto const node_type{node.get_type()};
if (false == value.has_value()) {
// Value is an empty object (`{}`, which is not the same as `null`)
if (SchemaTreeNode::Type::Obj != node_type) {
if (SchemaTree::Node::Type::Obj != node_type) {
return std::errc::protocol_error;
}
} else if (false == node_type_matches_value_type(node_type, value.value())) {
return std::errc::protocol_error;
}

if (SchemaTreeNode::Type::Obj == node_type
if (SchemaTree::Node::Type::Obj == node_type
&& false == is_leaf_node(schema_tree, node_id, node_id_value_pairs))
{
// The node's value is `null` or `{}` but its descendants appear in
// `node_id_value_pairs`.
return std::errc::operation_not_permitted;
}

auto const parent_node_id{node.get_parent_id()};
// We checked that the node isn't the root above, so we can query the underlying ID
// safely without a repeated check.
auto const parent_node_id{node.get_parent_id_unsafe()};
auto const key_name{node.get_key_name()};
if (parent_node_id_to_key_names.contains(parent_node_id)) {
auto const [it, new_key_inserted]{
Expand All @@ -263,10 +264,10 @@ auto validate_node_id_value_pairs(

auto is_leaf_node(
SchemaTree const& schema_tree,
SchemaTreeNode::id_t node_id,
SchemaTree::Node::id_t node_id,
KeyValuePairLogEvent::NodeIdValuePairs const& node_id_value_pairs
) -> bool {
vector<SchemaTreeNode::id_t> dfs_stack;
vector<SchemaTree::Node::id_t> dfs_stack;
dfs_stack.reserve(schema_tree.get_size());
dfs_stack.push_back(node_id);
while (false == dfs_stack.empty()) {
Expand Down Expand Up @@ -294,24 +295,28 @@ auto get_schema_subtree_bitmap(
schema_subtree_bitmap[node_id] = true;

// Iteratively mark the parents as true
auto parent_id{schema_tree.get_node(node_id).get_parent_id()};
auto optional_parent_id{schema_tree.get_node(node_id).get_parent_id()};
while (true) {
// Ideally, we'd use this if statement as the loop condition, but clang-tidy will
// complain about an unchecked `optional` access.
if (false == optional_parent_id.has_value()) {
// Reached the root
break;
}
auto const parent_id{optional_parent_id.value()};
if (schema_subtree_bitmap[parent_id]) {
// Parent already set by other child
break;
}
schema_subtree_bitmap[parent_id] = true;
if (SchemaTree::cRootId == parent_id) {
break;
}
parent_id = schema_tree.get_node(parent_id).get_parent_id();
optional_parent_id = schema_tree.get_node(parent_id).get_parent_id();
}
}
return schema_subtree_bitmap;
}

auto insert_kv_pair_into_json_obj(
SchemaTreeNode const& node,
SchemaTree::Node const& node,
std::optional<Value> const& optional_val,
nlohmann::json::object_t& json_obj
) -> bool {
Expand All @@ -325,16 +330,16 @@ auto insert_kv_pair_into_json_obj(
try {
auto const& val{optional_val.value()};
switch (type) {
case SchemaTreeNode::Type::Int:
case SchemaTree::Node::Type::Int:
json_obj.emplace(key_name, val.get_immutable_view<value_int_t>());
break;
case SchemaTreeNode::Type::Float:
case SchemaTree::Node::Type::Float:
json_obj.emplace(key_name, val.get_immutable_view<value_float_t>());
break;
case SchemaTreeNode::Type::Bool:
case SchemaTree::Node::Type::Bool:
json_obj.emplace(key_name, val.get_immutable_view<bool>());
break;
case SchemaTreeNode::Type::Str:
case SchemaTree::Node::Type::Str:
if (val.is<string>()) {
json_obj.emplace(key_name, string{val.get_immutable_view<string>()});
} else {
Expand All @@ -345,15 +350,15 @@ auto insert_kv_pair_into_json_obj(
json_obj.emplace(key_name, decoded_result.value());
}
break;
case SchemaTreeNode::Type::UnstructuredArray: {
case SchemaTree::Node::Type::UnstructuredArray: {
auto const decoded_result{decode_as_encoded_text_ast(val)};
if (false == decoded_result.has_value()) {
return false;
}
json_obj.emplace(key_name, nlohmann::json::parse(decoded_result.value()));
break;
}
case SchemaTreeNode::Type::Obj:
case SchemaTree::Node::Type::Obj:
json_obj.emplace(key_name, nullptr);
break;
default:
Expand Down Expand Up @@ -421,7 +426,7 @@ auto KeyValuePairLogEvent::serialize_to_json(
//
// On the way up, add the current node's `nlohmann::json::object_t` to the parent's
// `nlohmann::json::object_t`.
auto const& root_schema_tree_node{m_schema_tree->get_node(SchemaTree::cRootId)};
auto const& root_schema_tree_node{m_schema_tree->get_root()};
auto root_json_obj = nlohmann::json::object_t();

dfs_stack.emplace(
Expand Down
3 changes: 1 addition & 2 deletions components/core/src/clp/ffi/KeyValuePairLogEvent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include "../time_types.hpp"
#include "SchemaTree.hpp"
#include "SchemaTreeNode.hpp"
#include "Value.hpp"

namespace clp::ffi {
Expand All @@ -25,7 +24,7 @@ namespace clp::ffi {
class KeyValuePairLogEvent {
public:
// Types
using NodeIdValuePairs = std::unordered_map<SchemaTreeNode::id_t, std::optional<Value>>;
using NodeIdValuePairs = std::unordered_map<SchemaTree::Node::id_t, std::optional<Value>>;

// Factory functions
/**
Expand Down
28 changes: 12 additions & 16 deletions components/core/src/clp/ffi/SchemaTree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
#include <string>

#include "../ErrorCode.hpp"
#include "SchemaTreeNode.hpp"

namespace clp::ffi {
auto SchemaTree::get_node(SchemaTreeNode::id_t id) const -> SchemaTreeNode const& {
auto SchemaTree::get_node(Node::id_t id) const -> Node const& {
if (m_tree_nodes.size() <= static_cast<size_t>(id)) {
throw OperationFailed(
ErrorCode_OutOfBounds,
Expand All @@ -20,13 +19,12 @@ auto SchemaTree::get_node(SchemaTreeNode::id_t id) const -> SchemaTreeNode const
return m_tree_nodes[id];
}

auto SchemaTree::try_get_node_id(NodeLocator const& locator
) const -> std::optional<SchemaTreeNode::id_t> {
auto SchemaTree::try_get_node_id(NodeLocator const& locator) const -> std::optional<Node::id_t> {
auto const parent_id{static_cast<size_t>(locator.get_parent_id())};
if (m_tree_nodes.size() <= parent_id) {
return false;
return std::nullopt;
}
std::optional<SchemaTreeNode::id_t> node_id;
std::optional<Node::id_t> node_id;
for (auto const child_id : m_tree_nodes[parent_id].get_children_ids()) {
auto const& node{m_tree_nodes[child_id]};
if (node.get_key_name() == locator.get_key_name() && node.get_type() == locator.get_type())
Expand All @@ -38,19 +36,14 @@ auto SchemaTree::try_get_node_id(NodeLocator const& locator
return node_id;
}

auto SchemaTree::insert_node(NodeLocator const& locator) -> SchemaTreeNode::id_t {
auto SchemaTree::insert_node(NodeLocator const& locator) -> Node::id_t {
if (try_get_node_id(locator).has_value()) {
throw OperationFailed(ErrorCode_Failure, __FILE__, __LINE__, "Node already exists.");
}
auto const node_id{static_cast<SchemaTreeNode::id_t>(m_tree_nodes.size())};
m_tree_nodes.emplace_back(
node_id,
locator.get_parent_id(),
locator.get_key_name(),
locator.get_type()
);
auto const node_id{static_cast<Node::id_t>(m_tree_nodes.size())};
m_tree_nodes.emplace_back(Node::create(node_id, locator));
auto& parent_node{m_tree_nodes[locator.get_parent_id()]};
if (SchemaTreeNode::Type::Obj != parent_node.get_type()) {
if (Node::Type::Obj != parent_node.get_type()) {
throw OperationFailed(
LinZhihao-723 marked this conversation as resolved.
Show resolved Hide resolved
ErrorCode_Failure,
__FILE__,
Comment on lines +44 to 49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Potential null dereference when accessing parent node in insert_node

In the insert_node method, you access locator.get_parent_id() without checking if it has a value:

auto& parent_node{m_tree_nodes[locator.get_parent_id()]};

Since locator.get_parent_id() can now be std::nullopt for the root node, accessing m_tree_nodes[locator.get_parent_id()] without checking may lead to undefined behaviour.

Consider checking whether locator.get_parent_id() has a value before accessing m_tree_nodes[locator.get_parent_id().value()].

Apply this diff to fix the issue:

+ if (locator.get_parent_id().has_value()) {
    auto& parent_node{m_tree_nodes[locator.get_parent_id().value()]};
    if (Node::Type::Obj != parent_node.get_type()) {
        throw OperationFailed(
                ErrorCode_Failure,
                __FILE__,
                __LINE__,
                "Non-object nodes cannot have children."
        );
    }
    parent_node.append_new_child(node_id);
+ } else {
+     // Handle the case where the node is the root node
+ }

Expand All @@ -68,7 +61,10 @@ auto SchemaTree::revert() -> void {
}
while (m_tree_nodes.size() != m_snapshot_size) {
auto const& node{m_tree_nodes.back()};
m_tree_nodes[node.get_parent_id()].remove_last_appended_child();
auto const optional_parent_id{node.get_parent_id()};
if (optional_parent_id.has_value()) {
m_tree_nodes[optional_parent_id.value()].remove_last_appended_child();
}
Comment on lines +64 to +67
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Consistent handling of optional parent IDs

In the revert method, you correctly check if node.get_parent_id() has a value before accessing it:

auto const optional_parent_id{node.get_parent_id()};
if (optional_parent_id.has_value()) {
    m_tree_nodes[optional_parent_id.value()].remove_last_appended_child();
}

Consider applying the same pattern in other methods, such as insert_node and try_get_node_id, to ensure consistent and safe handling of optional parent IDs throughout the codebase.

m_tree_nodes.pop_back();
}
m_snapshot_size.reset();
Expand Down
Loading
Loading