Skip to content

Commit

Permalink
Working fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom-Newton committed Oct 4, 2024
1 parent 9994912 commit 17cf63f
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions cpp/src/arrow/filesystem/azurefs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1806,6 +1806,8 @@ class AzureFileSystem::Impl {
// BlobPrefixes. A BlobPrefix always ends with kDelimiter ("/"), so we can
// distinguish between a directory and a file by checking if we received a
// prefix or a blob.
// This strategy allows us to implement GetFileInfo with just 1 blob storage
// operation in almost every case.
if (!list_response.BlobPrefixes.empty()) {
// Ensure the returned BlobPrefixes[0] string doesn't contain more characters than
// the requested Prefix. For instance, if we request with Prefix="dir/abra" and
Expand All @@ -1821,12 +1823,32 @@ class AzureFileSystem::Impl {
}
if (!list_response.Blobs.empty()) {
const auto& blob = list_response.Blobs[0];
const auto next_character = blob.Name[options.Prefix.Value().length()];
if (blob.Name == location.path) {
info.set_type(FileType::File);
info.set_size(blob.BlobSize);
info.set_mtime(
std::chrono::system_clock::time_point{blob.Details.LastModified});
return info;
} else if (next_character < '/') {
// First list result did not indicate a directory and there is definitely no
// exactly matching blob. However, there may still be a directory that we
// initially missed because the first list result came before
// `options.Prefix + '/'` lexigraphically.
// For example the flat namespace storage account has the following blobs:
// - container/dir.txt
// - container/dir/file.txt
// GetFileInfo(container/dir) should return FileType::Directory but in this
// edge case `blob = "dir.txt"`, so without further checks we would incorrectly
// return FileType::NotFound.
// Therefore we make an extra list operation with the trailing slash to confirm
// whether the path is a directory.
options.Prefix = internal::EnsureTrailingSlash(location.path);
auto list_with_trailing_slash_response = container_client.ListBlobs(options);
if (!list_with_trailing_slash_response.Blobs.empty()) {
info.set_type(FileType::Directory);
return info;
}
}
}
info.set_type(FileType::NotFound);
Expand Down

0 comments on commit 17cf63f

Please sign in to comment.