Skip to content

Commit

Permalink
Merge pull request checkpoint-restore#163 from behouba/search-memory-…
Browse files Browse the repository at this point in the history
…pages

feat(crit): add SearchPattern method on MemoryReader
  • Loading branch information
rst0git authored Jul 28, 2024
2 parents 2dd9980 + 20dfec8 commit a9064d7
Show file tree
Hide file tree
Showing 3 changed files with 191 additions and 8 deletions.
99 changes: 99 additions & 0 deletions crit/mempages.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"

"github.com/checkpoint-restore/go-criu/v7/crit/images/mm"
"github.com/checkpoint-restore/go-criu/v7/crit/images/pagemap"
Expand Down Expand Up @@ -193,3 +195,100 @@ func (mr *MemoryReader) GetShmemSize() (int64, error) {

return size, nil
}

// PatternMatch represents a match when searching for a pattern in memory.
type PatternMatch struct {
Vaddr uint64
Length int
Context int
Match string
}

// SearchPattern searches for a pattern in the process memory pages.
func (mr *MemoryReader) SearchPattern(pattern string, escapeRegExpCharacters bool, context, chunkSize int) ([]PatternMatch, error) {
if context < 0 {
return nil, errors.New("context size cannot be negative")
}

// Set a default chunk size of 10MB to be read at a time
if chunkSize <= 0 {
chunkSize = 10 * 1024 * 1024
}

// Escape regular expression characters in the pattern
if escapeRegExpCharacters {
pattern = regexp.QuoteMeta(pattern)
}

regexPattern, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}

var results []PatternMatch

f, err := os.Open(filepath.Join(mr.checkpointDir, fmt.Sprintf("pages-%d.img", mr.pagesID)))
if err != nil {
return nil, err
}
defer f.Close()

for _, entry := range mr.pagemapEntries {
startAddr := entry.GetVaddr()
endAddr := startAddr + uint64(entry.GetNrPages())*uint64(mr.pageSize)

initialOffset := uint64(0)
for _, e := range mr.pagemapEntries {
if e == entry {
break
}
initialOffset += uint64(e.GetNrPages()) * uint64(mr.pageSize)
}

for offset := uint64(0); offset < endAddr-startAddr; offset += uint64(chunkSize) {
readSize := chunkSize
if endAddr-startAddr-offset < uint64(chunkSize) {
readSize = int(endAddr - startAddr - offset)
}

buff := make([]byte, readSize)
if _, err := f.ReadAt(buff, int64(initialOffset+offset)); err != nil {
if err == io.EOF {
break
}
return nil, err
}

// Replace non-printable ASCII characters in the buffer with a question mark (0x3f) to prevent unexpected behavior
// during regex matching. Non-printable characters might cause incorrect interpretation or premature
// termination of strings, leading to inaccuracies in pattern matching.
for i := range buff {
if buff[i] < 32 || buff[i] >= 127 {
buff[i] = 0x3F
}
}

indexes := regexPattern.FindAllIndex(buff, -1)
for _, index := range indexes {
startContext := index[0] - context
if startContext < 0 {
startContext = 0
}

endContext := index[1] + context
if endContext > len(buff) {
endContext = len(buff)
}

results = append(results, PatternMatch{
Vaddr: startAddr + offset + uint64(index[0]),
Length: index[1] - index[0],
Context: context,
Match: string(buff[startContext:endContext]),
})
}
}
}

return results, nil
}
93 changes: 85 additions & 8 deletions crit/mempages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func TestGetPsArgsAndEnvVars(t *testing.T) {
}
}

func TestGetShmemSize(t *testing.T) {
func TestSearchPattern(t *testing.T) {
pid, err := getTestImgPID()
if err != nil {
t.Fatal(err)
Expand All @@ -259,15 +259,92 @@ func TestGetShmemSize(t *testing.T) {
t.Fatal(err)
}

size, err := mr.GetShmemSize()
if err != nil {
t.Fatal(err)
testCases := []struct {
name string
pattern string
context int
escapeRegExpCharacters bool
shouldMatch bool
expectedError error
}{
{
name: "PATH environment variable",
pattern: "PATH=",
shouldMatch: true,
},
{
name: "PATH environment variable regex",
pattern: `\bPATH=([^\s]+)\b`,
shouldMatch: true,
},
{
name: "PATH environment variable regex with 10 bytes context",
pattern: `\bPATH=([^\s]+)\b`,
context: 10,
shouldMatch: true,
},
{
name: "PATH environment variable regex with a negative context",
pattern: `\bPATH=([^\s]+)\b`,
context: -1,
expectedError: errors.New("context size cannot be negative"),
},
{
name: "PATH environment variable regex with a large context",
pattern: `\bPATH=([^\s]+)\b`,
context: 100000,
shouldMatch: true,
},
{
name: "Non-existent pattern",
pattern: "NON_EXISTENT_PATTERN",
},
{
name: "PASSWORD environment variable value as regex",
pattern: "123 Hello.*?",
shouldMatch: true,
},
{
name: "PASSWORD environment variable value with regex metacharacters to escape",
pattern: `123 Hello.*?[^]@WORLD(|x)`,
escapeRegExpCharacters: true,
shouldMatch: true,
},
}

// Verify that the shared memory size is as expected (0)
expectedSize := int64(0)
if size != expectedSize {
t.Fatalf("Expected shared memory size: %d, but got: %d", expectedSize, size)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
matches, err := mr.SearchPattern(tc.pattern, tc.escapeRegExpCharacters, tc.context, 0)
if err != nil && tc.expectedError == nil {
t.Errorf("Unexpected error for pattern %s: %v", tc.pattern, err)
} else if err == nil && tc.expectedError != nil {
t.Errorf("Expected error for pattern %s: %v", tc.pattern, tc.expectedError)
}

if tc.shouldMatch && len(matches) == 0 {
t.Errorf("Expected to find a match for pattern \"%s\"", tc.pattern)
} else if !tc.shouldMatch && len(matches) > 0 {
t.Errorf("Expected not to find any match for pattern \"%s\"", tc.pattern)
}

for _, match := range matches {
content, err := mr.GetMemPages(match.Vaddr, match.Vaddr+uint64(match.Length))
if err != nil {
t.Fatalf("Failed to get memory pages: %v", err)
}

buff := content.Bytes()
for i := range buff {
if buff[i] < 32 || buff[i] >= 127 {
buff[i] = 0x3F
}
}

if !strings.Contains(match.Match, content.String()) {
t.Errorf("Expected to find %s in matched pattern %s", content.String(), match.Match)
}
}
})
}
}

Expand Down
7 changes: 7 additions & 0 deletions test/loop/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ int main(void)
int res = EXIT_FAILURE;
int start_pipe[2];

// Set a PASSWORD environment variable to test the search pattern
// within process memory pages using regex metacharacters.
if (setenv("PASSWORD", "123 Hello.*?[^]@WORLD(|x)", 1) != 0) {
perror("setenv");
return 1;
}

if (pipe(start_pipe)) {
perror("pipe failed!");
goto out;
Expand Down

0 comments on commit a9064d7

Please sign in to comment.