Skip to content

Commit

Permalink
Use fstatat(2) for gufi_dir2index and gufi_dir2trace
Browse files Browse the repository at this point in the history
Pathname resolution has a significant overhead during
GUFI indexing. Using the _at variant of stat(2) when processing
files means less work is required to resolve the paths.

Profiling GUFI runs with flamegraphs suggests that this is an
effective change to reduce the overhead of pathname resolution.
I observed the time spent resolving names go from ~ 14% using
lstat(2) down to ~ 6% using fstatat(2), in one example run on an
NFS filestystem with approx. 450,000 files.
  • Loading branch information
bertschinger committed Sep 10, 2024
1 parent ccc35d6 commit ef3fe40
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 3 deletions.
1 change: 1 addition & 0 deletions include/bf.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ struct work {

/* extra data used by entries that does not depend on data from other directories */
struct entry_data {
int parent_fd; /* holds an FD that can be used for fstatat(2), etc. */
char type;
char linkname[MAXPATH];
uint8_t lstat_called;
Expand Down
5 changes: 4 additions & 1 deletion src/gufi_dir2index.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ OF SUCH DAMAGE.

#include <errno.h>
#include <dirent.h>
#include <fcntl.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
Expand Down Expand Up @@ -129,7 +130,9 @@ static int process_nondir(struct work *entry, struct entry_data *ed, void *args)
struct input *in = nda->in;

if (!ed->lstat_called) {
if (lstat(entry->name, &ed->statuso) != 0) {
char *basename = entry->name + entry->name_len - entry->basename_len;

if (fstatat(ed->parent_fd, basename, &ed->statuso, AT_SYMLINK_NOFOLLOW) != 0) {
return 1;
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/gufi_dir2trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ static int process_external(struct input *in, void *args,
static int process_nondir(struct work *entry, struct entry_data *ed, void *args) {
struct NondirArgs *nda = (struct NondirArgs *) args;
if (!ed->lstat_called) {
if (lstat(entry->name, &ed->statuso) != 0) {
return 0;
char *basename = entry->name + entry->name_len - entry->basename_len;

if (fstatat(ed->parent_fd, basename, &ed->statuso, AT_SYMLINK_NOFOLLOW) != 0) {
return 1;
}

if (ed->type == 'l') {
Expand Down
14 changes: 14 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,17 @@ int descend(QPTPool_t *ctx, const size_t id, void *args,
const size_t next_level = work->level + 1;
const size_t recursion_level = work->recursion_level + 1;

int d_fd = dirfd(dir);
if (d_fd < 0) {
/*
* We should never get here. glibc's dirfd(3) never return errors, and
* Apple's libc only returns an error if the DIR * is invalid, which would
* indicate a bug in GUFI.
*/
fprintf(stderr, "BUG: dirfd(3) failed: errno = %d\n", errno);
return 1;
}

struct dirent *dir_child = NULL;
while ((dir_child = readdir(dir))) {
const size_t len = strlen(dir_child->d_name);
Expand All @@ -428,6 +439,7 @@ int descend(QPTPool_t *ctx, const size_t id, void *args,

struct entry_data child_ed;
memset(&child_ed, 0, sizeof(child_ed));
child_ed.parent_fd = -1;

/* get child path */
child.name_len = SNFORMAT_S(child.name, MAXPATH, 3,
Expand All @@ -439,6 +451,7 @@ int descend(QPTPool_t *ctx, const size_t id, void *args,
child.root_parent = work->root_parent;
child.pinode = inode;


switch (dir_child->d_type) {
case DT_DIR:
child_ed.statuso.st_mode = S_IFDIR;
Expand Down Expand Up @@ -520,6 +533,7 @@ int descend(QPTPool_t *ctx, const size_t id, void *args,
xattrs_get(child.name, &child_ed.xattrs);
}

child_ed.parent_fd = d_fd;
processnondir(&child, &child_ed, nondir_args);
ctrs.nondirs_processed++;

Expand Down

0 comments on commit ef3fe40

Please sign in to comment.