From 27033f4fb580a4781b2b8b2ffce5771a3725d32c Mon Sep 17 00:00:00 2001 From: Kouame Behouba Manasse Date: Wed, 6 Mar 2024 07:46:13 -0500 Subject: [PATCH 1/2] feat(crit): add SearchPattern method on MemoryReader This commit adds a new method `SearchPattern` to `MemoryReader` to search for patterns inside the process memory pages. This method accept regular expressions for flexible pattern matching, a context (number of bytes before and after the pattern match), and a size of memory chunk to be read at a time. Signed-off-by: Kouame Behouba Manasse --- crit/mempages.go | 99 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/crit/mempages.go b/crit/mempages.go index c823edbaa..e2932eee1 100644 --- a/crit/mempages.go +++ b/crit/mempages.go @@ -4,8 +4,10 @@ import ( "bytes" "errors" "fmt" + "io" "os" "path/filepath" + "regexp" "github.com/checkpoint-restore/go-criu/v7/crit/images/mm" "github.com/checkpoint-restore/go-criu/v7/crit/images/pagemap" @@ -193,3 +195,100 @@ func (mr *MemoryReader) GetShmemSize() (int64, error) { return size, nil } + +// PatternMatch represents a match when searching for a pattern in memory. +type PatternMatch struct { + Vaddr uint64 + Length int + Context int + Match string +} + +// SearchPattern searches for a pattern in the process memory pages. +func (mr *MemoryReader) SearchPattern(pattern string, escapeRegExpCharacters bool, context, chunkSize int) ([]PatternMatch, error) { + if context < 0 { + return nil, errors.New("context size cannot be negative") + } + + // Set a default chunk size of 10MB to be read at a time + if chunkSize <= 0 { + chunkSize = 10 * 1024 * 1024 + } + + // Escape regular expression characters in the pattern + if escapeRegExpCharacters { + pattern = regexp.QuoteMeta(pattern) + } + + regexPattern, err := regexp.Compile(pattern) + if err != nil { + return nil, err + } + + var results []PatternMatch + + f, err := os.Open(filepath.Join(mr.checkpointDir, fmt.Sprintf("pages-%d.img", mr.pagesID))) + if err != nil { + return nil, err + } + defer f.Close() + + for _, entry := range mr.pagemapEntries { + startAddr := entry.GetVaddr() + endAddr := startAddr + uint64(entry.GetNrPages())*uint64(mr.pageSize) + + initialOffset := uint64(0) + for _, e := range mr.pagemapEntries { + if e == entry { + break + } + initialOffset += uint64(e.GetNrPages()) * uint64(mr.pageSize) + } + + for offset := uint64(0); offset < endAddr-startAddr; offset += uint64(chunkSize) { + readSize := chunkSize + if endAddr-startAddr-offset < uint64(chunkSize) { + readSize = int(endAddr - startAddr - offset) + } + + buff := make([]byte, readSize) + if _, err := f.ReadAt(buff, int64(initialOffset+offset)); err != nil { + if err == io.EOF { + break + } + return nil, err + } + + // Replace non-printable ASCII characters in the buffer with a question mark (0x3f) to prevent unexpected behavior + // during regex matching. Non-printable characters might cause incorrect interpretation or premature + // termination of strings, leading to inaccuracies in pattern matching. + for i := range buff { + if buff[i] < 32 || buff[i] >= 127 { + buff[i] = 0x3F + } + } + + indexes := regexPattern.FindAllIndex(buff, -1) + for _, index := range indexes { + startContext := index[0] - context + if startContext < 0 { + startContext = 0 + } + + endContext := index[1] + context + if endContext > len(buff) { + endContext = len(buff) + } + + results = append(results, PatternMatch{ + Vaddr: startAddr + offset + uint64(index[0]), + Length: index[1] - index[0], + Context: context, + Match: string(buff[startContext:endContext]), + }) + } + } + } + + return results, nil +} From 20dfec875cfe58f40ddc3be083e4e2e713ed8149 Mon Sep 17 00:00:00 2001 From: Kouame Behouba Manasse Date: Wed, 6 Mar 2024 07:48:41 -0500 Subject: [PATCH 2/2] test(crit): add unit test for SearchPattern method of MemoryReader Signed-off-by: Kouame Behouba Manasse --- crit/mempages_test.go | 93 +++++++++++++++++++++++++++++++++++++++---- test/loop/loop.c | 7 ++++ 2 files changed, 92 insertions(+), 8 deletions(-) diff --git a/crit/mempages_test.go b/crit/mempages_test.go index 2c087b8a6..e4cbe1000 100644 --- a/crit/mempages_test.go +++ b/crit/mempages_test.go @@ -248,7 +248,7 @@ func TestGetPsArgsAndEnvVars(t *testing.T) { } } -func TestGetShmemSize(t *testing.T) { +func TestSearchPattern(t *testing.T) { pid, err := getTestImgPID() if err != nil { t.Fatal(err) @@ -259,15 +259,92 @@ func TestGetShmemSize(t *testing.T) { t.Fatal(err) } - size, err := mr.GetShmemSize() - if err != nil { - t.Fatal(err) + testCases := []struct { + name string + pattern string + context int + escapeRegExpCharacters bool + shouldMatch bool + expectedError error + }{ + { + name: "PATH environment variable", + pattern: "PATH=", + shouldMatch: true, + }, + { + name: "PATH environment variable regex", + pattern: `\bPATH=([^\s]+)\b`, + shouldMatch: true, + }, + { + name: "PATH environment variable regex with 10 bytes context", + pattern: `\bPATH=([^\s]+)\b`, + context: 10, + shouldMatch: true, + }, + { + name: "PATH environment variable regex with a negative context", + pattern: `\bPATH=([^\s]+)\b`, + context: -1, + expectedError: errors.New("context size cannot be negative"), + }, + { + name: "PATH environment variable regex with a large context", + pattern: `\bPATH=([^\s]+)\b`, + context: 100000, + shouldMatch: true, + }, + { + name: "Non-existent pattern", + pattern: "NON_EXISTENT_PATTERN", + }, + { + name: "PASSWORD environment variable value as regex", + pattern: "123 Hello.*?", + shouldMatch: true, + }, + { + name: "PASSWORD environment variable value with regex metacharacters to escape", + pattern: `123 Hello.*?[^]@WORLD(|x)`, + escapeRegExpCharacters: true, + shouldMatch: true, + }, } - // Verify that the shared memory size is as expected (0) - expectedSize := int64(0) - if size != expectedSize { - t.Fatalf("Expected shared memory size: %d, but got: %d", expectedSize, size) + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + matches, err := mr.SearchPattern(tc.pattern, tc.escapeRegExpCharacters, tc.context, 0) + if err != nil && tc.expectedError == nil { + t.Errorf("Unexpected error for pattern %s: %v", tc.pattern, err) + } else if err == nil && tc.expectedError != nil { + t.Errorf("Expected error for pattern %s: %v", tc.pattern, tc.expectedError) + } + + if tc.shouldMatch && len(matches) == 0 { + t.Errorf("Expected to find a match for pattern \"%s\"", tc.pattern) + } else if !tc.shouldMatch && len(matches) > 0 { + t.Errorf("Expected not to find any match for pattern \"%s\"", tc.pattern) + } + + for _, match := range matches { + content, err := mr.GetMemPages(match.Vaddr, match.Vaddr+uint64(match.Length)) + if err != nil { + t.Fatalf("Failed to get memory pages: %v", err) + } + + buff := content.Bytes() + for i := range buff { + if buff[i] < 32 || buff[i] >= 127 { + buff[i] = 0x3F + } + } + + if !strings.Contains(match.Match, content.String()) { + t.Errorf("Expected to find %s in matched pattern %s", content.String(), match.Match) + } + } + }) } } diff --git a/test/loop/loop.c b/test/loop/loop.c index 733caad4c..8409331fd 100644 --- a/test/loop/loop.c +++ b/test/loop/loop.c @@ -12,6 +12,13 @@ int main(void) int res = EXIT_FAILURE; int start_pipe[2]; + // Set a PASSWORD environment variable to test the search pattern + // within process memory pages using regex metacharacters. + if (setenv("PASSWORD", "123 Hello.*?[^]@WORLD(|x)", 1) != 0) { + perror("setenv"); + return 1; + } + if (pipe(start_pipe)) { perror("pipe failed!"); goto out;