Skip to content

Commit

Permalink
Extract video reader discovery logic
Browse files Browse the repository at this point in the history
Signed-off-by: Joaquin Anton Guirao <[email protected]>
  • Loading branch information
jantonguirao committed Mar 5, 2025
1 parent c6ec26e commit 79ef26d
Show file tree
Hide file tree
Showing 8 changed files with 404 additions and 219 deletions.
27 changes: 27 additions & 0 deletions dali/operators/video/frames_decoder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,33 @@ void FramesDecoderBase::SeekFrame(int frame_id) {
assert(next_frame_idx_ == frame_id);
}

int FramesDecoderBase::GetFrameIdxByTimestamp(int64_t timestamp, bool inclusive) {
// TODO(janton): Optimize for CFR videos (no need to iterate over the index)
DALI_ENFORCE(HasIndex(), "No index available, cannot seek by timestamp");
int frame_idx = 0;
for (size_t i = 1; i < index_.size(); i++) {
if (index_[i].pts >= timestamp) {
frame_idx = i;
break;
}
}
if (inclusive) {
if (frame_idx > 0 && index_[frame_idx-1].pts <= timestamp) {
frame_idx--;
}
assert(index_[frame_idx].pts <= timestamp);
}
return frame_idx;
}

int FramesDecoderBase::GetFrameIdxByTimeInSeconds(float seconds, bool inclusive) {
DALI_ENFORCE(HasIndex(), "No index available, cannot seek by timestamp");
// Convert seconds to PTS (presentation time stamp) units
auto timebase = ctx_->streams[stream_id_]->time_base;
int64_t timestamp = static_cast<int64_t>(seconds * timebase.den / timebase.num);
return GetFrameIdxByTimestamp(timestamp, inclusive);
}

bool FramesDecoderBase::ReadFlushFrame(uint8_t *data) {
bool copy_to_output = data != nullptr;
if (avcodec_receive_frame(codec_ctx_, frame_) < 0) {
Expand Down
16 changes: 16 additions & 0 deletions dali/operators/video/frames_decoder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,22 @@ class DLL_PUBLIC FramesDecoderBase {
*/
virtual void SeekFrame(int frame_id);

/**
* @brief Returns the index of the frame that has the given timestamp
*
* @param timestamp Timestamp of the frame to seek to
* @param inclusive If true, the seek will be to a frame that has this timestamp or a previous one
*/
virtual int GetFrameIdxByTimestamp(int64_t timestamp, bool inclusive = false);

/**
* @brief Returns the index of the frame that has the given time in seconds
*
* @param seconds Time in seconds to seek to
* @param inclusive If true, the seek will be to a frame that has this timestamp or a previous one
*/
virtual int GetFrameIdxByTimeInSeconds(float seconds, bool inclusive = false);

/**
* @brief Seeks to the first frame
*/
Expand Down
126 changes: 0 additions & 126 deletions dali/operators/video/legacy/reader/video_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,132 +76,6 @@ auto codecpar(AVStream* stream) -> decltype(stream->codec) {
}
#endif

inline void assemble_video_list(const std::string& path, const std::string& curr_entry, int label,
std::vector<dali::file_meta> &file_info) {
std::string curr_dir_path = path + "/" + curr_entry;
DIR *dir = opendir(curr_dir_path.c_str());
DALI_ENFORCE(dir != nullptr, "Directory " + curr_dir_path + " could not be opened");

struct dirent *entry;

while ((entry = readdir(dir))) {
std::string full_path = curr_dir_path + "/" + std::string{entry->d_name};
#ifdef _DIRENT_HAVE_D_TYPE
/*
* Regular files and symlinks supported. If FS returns DT_UNKNOWN,
* filename is validated.
*/
if (entry->d_type != DT_REG && entry->d_type != DT_LNK &&
entry->d_type != DT_UNKNOWN) {
continue;
}
#endif
file_info.push_back(file_meta{full_path, label, 0, 0});
}
closedir(dir);
}

std::vector<dali::file_meta> filesystem::get_file_label_pair(
const std::string& file_root,
const std::vector<std::string>& filenames,
bool use_labels,
const std::vector<int>& labels,
const std::string& file_list) {
// open the root
std::vector<dali::file_meta> file_info;
std::vector<std::string> entry_name_list;

if (!file_root.empty()) {
DIR *dir = opendir(file_root.c_str());

DALI_ENFORCE(dir != nullptr,
"Directory " + file_root + " could not be opened.");

struct dirent *entry;

while ((entry = readdir(dir))) {
struct stat s;
std::string entry_name(entry->d_name);
std::string full_path = file_root + "/" + entry_name;
int ret = stat(full_path.c_str(), &s);
DALI_ENFORCE(ret == 0,
"Could not access " + full_path + " during directory traversal.");
if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue;
if (S_ISDIR(s.st_mode)) {
entry_name_list.push_back(entry_name);
}
}
closedir(dir);
// sort directories to preserve class alphabetic order, as readdir could
// return unordered dir list. Otherwise file reader for training and validation
// could return directories with the same names in completely different order
std::sort(entry_name_list.begin(), entry_name_list.end());
for (unsigned dir_count = 0; dir_count < entry_name_list.size(); ++dir_count) {
assemble_video_list(file_root, entry_name_list[dir_count], dir_count, file_info);
}

// sort file names as well
std::sort(file_info.begin(), file_info.end());
} else if (!file_list.empty()) {
// load (path, label) pairs from list
std::ifstream s(file_list);
DALI_ENFORCE(s.is_open(), file_list + " could not be opened.");

string line;
string video_file;
int label;
float start_time;
float end_time;
int line_num = 0;
while (std::getline(s, line)) {
line_num++;
video_file.clear();
label = -1;
start_time = end_time = 0;
std::istringstream file_line(line);
file_line >> video_file >> label;
if (video_file.empty()) continue;
DALI_ENFORCE(label >= 0, "Label value should be >= 0 in file_list at line number: "
+ to_string(line_num) + ", filename: "+ video_file);
if (file_line >> start_time) {
if (file_line >> end_time) {
if (start_time == end_time) {
DALI_WARN("Start and end time/frame are the same, skipping the file, in file_list "
"at line number: " + to_string(line_num) + ", filename: "+ video_file);
continue;
}
}
}
file_info.push_back(file_meta{video_file, label, start_time, end_time});
}

DALI_ENFORCE(s.eof(), "Wrong format of file_list.");
s.close();
} else {
file_info.reserve(filenames.size());
if (use_labels) {
if (!labels.empty()) {
for (size_t i = 0; i < filenames.size(); ++i) {
file_info.push_back(file_meta{filenames[i], labels[i], 0, 0});
}
} else {
for (size_t i = 0; i < filenames.size(); ++i) {
file_info.push_back(file_meta{filenames[i], static_cast<int>(i), 0, 0});
}
}
} else {
for (size_t i = 0; i < filenames.size(); ++i) {
file_info.push_back(file_meta{filenames[i], 0, 0, 0});
}
}
}

LOG_LINE << "read " << file_info.size() << " files from "
<< entry_name_list.size() << " directories\n";

return file_info;
}

// Are these good numbers? Allow them to be set?
static constexpr auto frames_used_warning_ratio = 3.0f;
static constexpr auto frames_used_warning_minimum = 1000;
Expand Down
24 changes: 3 additions & 21 deletions dali/operators/video/legacy/reader/video_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ extern "C" {
#include "dali/operators/reader/loader/loader.h"
#include "dali/operators/video/legacy/reader/nvdecoder/nvdecoder.h"
#include "dali/operators/video/legacy/reader/nvdecoder/sequencewrapper.h"
#include "dali/operators/video/video_reader_utils.h"
#include "dali/pipeline/util/worker_thread.h"

template<typename T>
Expand All @@ -55,24 +56,6 @@ auto codecpar(AVStream* stream) -> decltype(stream->codecpar);
auto codecpar(AVStream* stream) -> decltype(stream->codec);
#endif

struct file_meta {
std::string video_file;
int label;
float start_time;
float end_time;
bool operator< (const file_meta& right) {
return video_file < right.video_file;
}
};

namespace filesystem {

std::vector<dali::file_meta> get_file_label_pair(const std::string& path,
const std::vector<std::string>& filenames, bool use_labels,
const std::vector<int>& labels, const std::string& file_list);

} // namespace filesystem

struct VideoFileDesc {
FILE *file_stream = nullptr;
uint64_t file_position = 0;
Expand Down Expand Up @@ -181,8 +164,7 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper, true> {
}

bool use_labels = spec.TryGetRepeatedArgument(labels_, "labels");
file_info_ = filesystem::get_file_label_pair(file_root_, filenames_, use_labels, labels_,
file_list_);
file_info_ = GetVideoFiles(file_root_, filenames_, use_labels, labels_, file_list_);
DALI_ENFORCE(!file_info_.empty(), "No files were read.");

auto ret = cuvidInitChecked();
Expand Down Expand Up @@ -383,7 +365,7 @@ class VideoLoader : public Loader<GPUBackend, SequenceWrapper, true> {
Index current_frame_idx_;

volatile bool stop_;
std::vector<file_meta> file_info_;
std::vector<VideoFileMeta> file_info_;
};

} // namespace dali
Expand Down
101 changes: 80 additions & 21 deletions dali/operators/video/reader/video_loader_decoder.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@
#include "dali/operators/video/frames_decoder_base.h"
#include "dali/operators/video/frames_decoder_gpu.h"
#include "dali/operators/video/frames_decoder_cpu.h"

#include "dali/operators/video/video_reader_utils.h"
namespace dali {

struct VideoSampleDesc {
Expand Down Expand Up @@ -50,21 +50,49 @@ struct VideoSample : public VideoSampleDesc {
int64_t start_timestamp_ = -1;
};

enum class FileListFormat {
kFrameIndex,
kTimestamp,
kTimestampInclusive
};

template <typename Backend, typename FramesDecoderImpl, typename Sample = VideoSample<Backend>>
class VideoLoaderDecoder : public Loader<Backend, Sample, true> {
public:
explicit inline VideoLoaderDecoder(const OpSpec &spec):
Loader<Backend, Sample, true>(spec),
filenames_(spec.GetRepeatedArgument<std::string>("filenames")),
sequence_len_(spec.GetArgument<int>("sequence_length")),
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
explicit inline VideoLoaderDecoder(const OpSpec &spec)
: Loader<Backend, Sample, true>(spec),
file_root_(spec.GetArgument<std::string>("file_root")),
file_list_(spec.GetArgument<std::string>("file_list")),
filenames_(spec.GetRepeatedArgument<std::string>("filenames")),
sequence_len_(spec.GetArgument<int>("sequence_length")),
stride_(spec.GetArgument<int>("stride")),
step_(spec.GetArgument<int>("step")) {
if ((spec.HasArgument("file_list") || spec.HasArgument("file_root") || spec.HasArgument("filenames")) != 1) {
DALI_FAIL("Only one of the following arguments can be provided: ``file_list``, ``file_root``, ``filenames``");
}
has_labels_ = spec.TryGetRepeatedArgument(labels_, "labels");
DALI_ENFORCE(
!has_labels_ || labels_.size() == filenames_.size(),
make_string(
"Number of provided files and labels should match. Provided ",
filenames_.size(), " files and ", labels_.size(), " labels."));
if (has_labels_) {
DALI_ENFORCE(
labels_.size() == filenames_.size(),
make_string(
"Number of provided files and labels should match. Provided ",
filenames_.size(), " files and ", labels_.size(), " labels."));
}

video_files_info_ = GetVideoFiles(file_root_, filenames_, has_labels_, labels_, file_list_);
DALI_ENFORCE(!video_files_info_.empty(), "No files were read.");

if (!file_list_.empty()) {
auto file_list_format_str = spec.GetArgument<std::string>("file_list_format");
if (file_list_format_str == "frame_index") {
file_list_format_ = FileListFormat::kFrameIndex;
} else if (file_list_format_str == "timestamp") {
file_list_format_ = FileListFormat::kTimestamp;
} else if (file_list_format_str == "timestamp_inclusive") {
file_list_format_ = FileListFormat::kTimestampInclusive;
}
}

if (step_ <= 0) {
step_ = stride_ * sequence_len_;
}
Expand All @@ -89,20 +117,46 @@ class VideoLoaderDecoder : public Loader<Backend, Sample, true> {

void PrepareMetadataImpl() override {
std::unique_ptr<FramesDecoderImpl> decoder;
for (size_t i = 0; i < filenames_.size(); ++i) {
const auto &filename = filenames_[i];
int label = has_labels_ ? labels_[i] : -1;
decoder = std::make_unique<FramesDecoderImpl>(filename, true);
for (size_t i = 0; i < video_files_info_.size(); ++i) {
auto& entry = video_files_info_[i];
decoder = std::make_unique<FramesDecoderImpl>(entry.video_file, true);
if (!decoder->IsValid()) {
LOG_LINE << "Invalid video file: " << filename << std::endl;
LOG_LINE << "Invalid video file: " << entry.video_file << std::endl;
continue;
}
int64_t num_frames = decoder->NumFrames();
for (int start = 0; start + stride_ * sequence_len_ <= num_frames;
int start_frame = 0, end_frame = num_frames;
if (entry.start_time != 0.0f || entry.end_time != 0.0f) {
switch (file_list_format_) {
case FileListFormat::kFrameIndex:
start_frame = entry.start_time;
end_frame = entry.end_time;
break;
case FileListFormat::kTimestamp:
start_frame = decoder->GetFrameIdxByTimeInSeconds(entry.start_time, false);
end_frame = decoder->GetFrameIdxByTimeInSeconds(entry.end_time, true);
break;
case FileListFormat::kTimestampInclusive:
start_frame = decoder->GetFrameIdxByTimeInSeconds(entry.start_time, true);
end_frame = decoder->GetFrameIdxByTimeInSeconds(entry.end_time, false);
break;
default:
DALI_FAIL("Invalid file_list_format");
}
}

if (start_frame >= end_frame) {
DALI_WARN(make_string("Empty frame range [", start_frame, ", ", end_frame,
") for file ", entry.video_file, ". Skipping."));
continue;
}

for (int start = start_frame; start + stride_ * sequence_len_ <= end_frame;
start += step_) {
LOG_LINE << "Sample #" << samples_.size() << ": " << filename << " " << label << " "
LOG_LINE << "Sample #" << samples_.size() << ": " << entry.video_file << " " << entry.label << " "
<< start << ".." << start + stride_ * sequence_len_ << std::endl;
samples_.emplace_back(filename, label, start, start + stride_ * sequence_len_, stride_);
samples_.emplace_back(
entry.video_file, entry.label, start, start + stride_ * sequence_len_, stride_);
}
}

Expand Down Expand Up @@ -137,16 +191,21 @@ class VideoLoaderDecoder : public Loader<Backend, Sample, true> {
using Base::MoveToNextShard;
using Base::ShouldSkipImage;

std::string file_root_;
std::string file_list_;
std::vector<std::string> filenames_;
std::vector<int> labels_;
bool has_labels_ = false;

FileListFormat file_list_format_ = FileListFormat::kTimestamp;

Index current_index_ = 0;

int sequence_len_;
int stride_;
int step_;

std::vector<VideoFileMeta> video_files_info_;
std::vector<VideoSampleDesc> samples_;
CUDAStreamLease cuda_stream_;
};
Expand Down
Loading

0 comments on commit 79ef26d

Please sign in to comment.