Skip to content

Commit

Permalink
Fixes for #212 (#239)
Browse files Browse the repository at this point in the history
- remove hardcoded pipeline length in PipelinedExecutor
- fix PyTorch iterator for multi-GPU
- adjust PyTorch example to use new nvJpeg API

Signed-off-by: Janusz Lisiecki <[email protected]>
  • Loading branch information
JanuszL committed Oct 31, 2018
1 parent 2c1c9c8 commit db82480
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 8 deletions.
6 changes: 3 additions & 3 deletions dali/benchmark/resnet50_bench.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ BENCHMARK_DEFINE_F(RN50, C2Pipe)(benchmark::State& st) { // NOLINT
Pipeline pipe(
batch_size,
num_thread,
0, -1, pipelined, 2,
0, -1, pipelined, 3,
async);

TensorList<CPUBackend> data;
Expand Down Expand Up @@ -167,7 +167,7 @@ BENCHMARK_DEFINE_F(RN50, HybridPipe)(benchmark::State& st) { // NOLINT
Pipeline pipe(
batch_size,
num_thread,
0, -1, pipelined, 2,
0, -1, pipelined, 3,
async);

TensorList<CPUBackend> data;
Expand Down Expand Up @@ -299,7 +299,7 @@ BENCHMARK_DEFINE_F(RN50, nvJPEGPipe)(benchmark::State& st) { // NOLINT
Pipeline pipe(
batch_size,
num_thread,
0, -1, pipelined, 2,
0, -1, pipelined, 3,
async);

TensorList<CPUBackend> data;
Expand Down
1 change: 0 additions & 1 deletion dali/pipeline/executor/pipelined_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class DLL_PUBLIC PipelinedExecutor : public Executor {
bool set_affinity = false, int max_num_stream = -1, int prefetch_queue_depth = 2) :
Executor(batch_size, num_thread, device_id, bytes_per_sample_hint,
set_affinity, max_num_stream, prefetch_queue_depth) {
Executor::queue_depth_ = 3;
}

DLL_PUBLIC virtual ~PipelinedExecutor() = default;
Expand Down
1 change: 1 addition & 0 deletions dali/python/nvidia/dali/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ def deserialize_and_build(self, serialized_pipeline):
self._num_threads,
self._device_id,
self._exec_pipelined,
self._prefetch_queue_depth,
self._exec_async,
self._bytes_per_sample,
self._set_affinity,
Expand Down
6 changes: 3 additions & 3 deletions dali/python/nvidia/dali/plugin/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,10 @@ def __next__(self):
feed_ndarray(d_arr, pyt_data[j])
for j, l_arr in enumerate(labels):
feed_ndarray(l_arr, pyt_labels[j])
for p in self._pipes:
p._release_outputs()
p._start_run()

for p in self._pipes:
p._release_outputs()
p._start_run()

copy_db_index = self._current_data_batch
# Change index for double buffering
Expand Down
4 changes: 3 additions & 1 deletion docs/examples/pytorch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ class HybridTrainPipe(Pipeline):
def __init__(self, batch_size, num_threads, device_id, data_dir, crop):
super(HybridTrainPipe, self).__init__(batch_size, num_threads, device_id, seed=12 + device_id)
self.input = ops.FileReader(file_root=data_dir, shard_id=args.local_rank, num_shards=args.world_size, random_shuffle=True)
self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB)
# This padding sets the size of the internal nvJPEG buffers to be able to handle all images from full-sized ImageNet
# without additional reallocations
self.decode = ops.nvJPEGDecoder(device="mixed", output_type=types.RGB, device_memory_padding=211025920, host_memory_padding=140544512)
self.rrc = ops.RandomResizedCrop(device="gpu", size =(crop, crop))
self.cmnp = ops.CropMirrorNormalize(device="gpu",
output_dtype=types.FLOAT,
Expand Down

0 comments on commit db82480

Please sign in to comment.