Skip to content

Commit

Permalink
fix(userspace/libsinsp): revert to old concatenate_paths helper fun…
Browse files Browse the repository at this point in the history
…ction for perf reasons.

Also, added some small fixes and lots of new tests.

Signed-off-by: Federico Di Pierro <[email protected]>
  • Loading branch information
FedeDP authored and poiana committed Jan 26, 2024
1 parent 6d57bde commit ff39fd3
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 29 deletions.
69 changes: 61 additions & 8 deletions userspace/libsinsp/test/sinsp_utils.ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ TEST(sinsp_utils_test, concatenate_paths)
// Some tests were motivated by this resource:
// https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap04.html#tag_04_11

// PLEASE NOTE:
// * current impl does not support unicode.
// * current impl does not sanitize path1
// * current impl expects path1 to end with '/'
// * current impl skips path1 altogether if path2 is absolute

std::string path1, path2, res;

res = sinsp_utils::concatenate_paths("", "");
Expand All @@ -32,12 +38,57 @@ TEST(sinsp_utils_test, concatenate_paths)
path1 = "";
path2 = "../";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("..", res);
EXPECT_EQ("", res);

path1 = "";
path2 = "..";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("", res);

path1 = "/";
path2 = "../";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/", res);

path1 = "a";
path2 = "../";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("a..", res); // since the helper does not add any "/" between path1 and path2, we end up with this.

path1 = "a/";
path2 = "../";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("", res);

path1 = "";
path2 = "/foo";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/foo", res);

path1 = "foo/";
path2 = "..//a";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("a", res); // path2 has been sanitized, plus we moved up a folder because of ".."

path1 = "/foo/";
path2 = "..//a";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/a", res); // path2 has been sanitized, plus we moved up a folder because of ".."

path1 = "heolo";
path2 = "w////////////..//////.////////r.|";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("r.|", res); // since the helper does not add any "/" between path1 and path2, we end up with this.

path1 = "heolo";
path2 = "w/////////////..//"; // heolow/////////////..// > heolow/..// -> /
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("", res); // since the helper does not add any "/" between path1 and path2, we end up with this, ie a folder up from "heolow/"

path1 = "";
path2 = "./";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ(".", res);
EXPECT_EQ("", res);

path1 = "";
path2 = "dir/term";
Expand Down Expand Up @@ -92,27 +143,28 @@ TEST(sinsp_utils_test, concatenate_paths)
path1 = "./app";
path2 = "custom/term";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("app/custom/term", res);
EXPECT_EQ("./appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields

path1 = "/app";
path2 = "custom/term";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/app/custom/term", res);
EXPECT_EQ("/appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields

path1 = "app";
path2 = "custom/term";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("app/custom/term", res);
EXPECT_EQ("appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields

path1 = "app//";
path1 = "app/";
path2 = "custom/term";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("app/custom/term", res);

// We don't support sanitizing path1
path1 = "app/////";
path2 = "custom////term";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("app/custom/term", res);
EXPECT_EQ("app/////custom/term", res);

path1 = "/";
path2 = "/app/custom/dir/././././../../term/";
Expand All @@ -124,6 +176,7 @@ TEST(sinsp_utils_test, concatenate_paths)
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/app", res);

/* No unicode support
path1 = "/root/";
path2 = "../😉";
res = sinsp_utils::concatenate_paths(path1, path2);
Expand All @@ -142,5 +195,5 @@ TEST(sinsp_utils_test, concatenate_paths)
path1 = "/root";
path2 = "c:/hello/world/";
res = sinsp_utils::concatenate_paths(path1, path2);
EXPECT_EQ("/root/c:/hello/world", res);
EXPECT_EQ("/root/c:/hello/world", res); */
}
188 changes: 168 additions & 20 deletions userspace/libsinsp/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,36 +613,184 @@ std::filesystem::path workaround_win_root_name(std::filesystem::path p)
return std::filesystem::path("./" + p.string());
}

std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len)
//
// Helper function to move a directory up in a path string
//
static inline void rewind_to_parent_path(const char* targetbase, char** tc, const char** pc, uint32_t delta)
{
auto p1 = std::filesystem::path(path1, std::filesystem::path::format::generic_format);
auto p2 = std::filesystem::path(path2, std::filesystem::path::format::generic_format);
if(*tc <= targetbase + 1)
{
(*pc) += delta;
return;
}

#ifdef _WIN32
// This is an ugly workaround to make sure we will not try to interpret root names (e.g. "c:/", "//?/") on Windows
// since this function only deals with unix-like paths
p1 = workaround_win_root_name(p1);
p2 = workaround_win_root_name(p2);
#endif // _WIN32
(*tc)--;

while((*tc) >= targetbase + 1 && *((*tc) - 1) != '/')
{
(*tc)--;
}

// note: if p2 happens to be an absolute path, p1 / p2 == p2
auto path_concat = (p1 / p2).lexically_normal();
std::string result = path_concat.generic_string();
(*pc) += delta;
}

//
// If the path ends with a separator, remove it, as the OS does.
//
if (result.length() > 1 && result.back() == '/')
//
// Args:
// - target: the string where we are supposed to start copying
// - targetbase: the base of the path, i.e. the furthest we can go back when
// following parent directories
// - path: the path to copy
//
static inline void copy_and_sanitize_path(char* target, char* targetbase, const char *path, char separator)
{
char* tc = target;
const char* pc = path;
g_invalidchar ic;
const bool empty_base = target == targetbase;

while(true)
{
if(*pc == 0)
{
*tc = 0;

//
// If the path ends with a separator, remove it, as the OS does.
// Properly manage case where path is just "/".
//
if((tc > (targetbase + 1)) && (*(tc - 1) == separator))
{
*(tc - 1) = 0;
}

return;
}

if(ic(*pc))
{
//
// Invalid char, substitute with a '.'
//
*tc = '.';
tc++;
pc++;
}
else
{
//
// If path begins with '.' or '.' is the first char after a '/'
//
if(*pc == '.' && (tc == targetbase || *(tc - 1) == separator))
{
//
// '../', rewind to the previous separator
//
if(*(pc + 1) == '.' && *(pc + 2) == separator)
{
rewind_to_parent_path(targetbase, &tc, &pc, 3);
}
//
// '..', with no separator.
// This is valid if we are at the end of the string, and in that case we rewind.
//
else if(*(pc + 1) == '.' && *(pc + 2) == 0)
{
rewind_to_parent_path(targetbase, &tc, &pc, 2);
}
//
// './', just skip it
//
else if(*(pc + 1) == separator)
{
pc += 2;
}
//
// '.', with no separator.
// This is valid if we are at the end of the string, and in that case we rewind.
//
else if(*(pc + 1) == 0)
{
pc++;
}
//
// Otherwise, we leave the string intact.
//
else
{
*tc = *pc;
pc++;
tc++;
}
}
else if(*pc == separator)
{
//
// separator:
// * if the last char is already a separator, skip it
// * if we are back at targetbase but targetbase was not empty before, it means we
// fully rewinded back to targetbase and the string is now empty. Skip separator.
// Example: "/foo/../a" -> "/a" BUT "foo/../a" -> "a"
// -> Otherwise: "foo/../a" -> "/a"
//
if((tc > targetbase && *(tc - 1) == separator) || (tc == targetbase && !empty_base))
{
pc++;
}
else
{
*tc = *pc;
tc++;
pc++;
}
}
else
{
//
// Normal char, copy it
//
*tc = *pc;
tc++;
pc++;
}
}
}
}

/*
* Return false if path2 is an absolute path.
* path1 MUST be '/' terminated.
* path1 is not sanitized.
* If path2 is absolute, we only account for it.
*/
static inline bool concatenate_paths_(char* target, uint32_t targetlen, const char* path1, uint32_t len1,
const char* path2, uint32_t len2)
{
if(targetlen < (len1 + len2 + 1))
{
result.pop_back();
strlcpy(target, "/PATH_TOO_LONG", targetlen);
return false;
}

if (result.length() > max_len)
if(len2 != 0 && path2[0] != '/')
{
memcpy(target, path1, len1);
copy_and_sanitize_path(target + len1, target, path2, '/');
return true;
}
else
{
return "/PATH_TOO_LONG";
target[0] = 0;
copy_and_sanitize_path(target, target, path2, '/');
return false;
}
}

return result;
std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2)
{
char fullpath[SCAP_MAX_PATH_SIZE];
concatenate_paths_(fullpath, SCAP_MAX_PATH_SIZE, path1.data(), (uint32_t)path1.length(), path2.data(),
path2.size());
return std::string(fullpath);
}


Expand Down
3 changes: 2 additions & 1 deletion userspace/libsinsp/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,11 @@ class sinsp_utils

//
// Concatenate posix-style path1 and path2 up to max_len in size, normalizing the result.
// path1 MUST be '/' terminated and is not sanitized.
// If path2 is absolute, the result will be equivalent to path2.
// If the result would be too long, the output will contain the string "/PATH_TOO_LONG" instead.
//
static std::string concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len=SCAP_MAX_PATH_SIZE-1);
static std::string concatenate_paths(std::string_view path1, std::string_view path2);

//
// Determines if an IPv6 address is IPv4-mapped
Expand Down

0 comments on commit ff39fd3

Please sign in to comment.