From fba588352ad22d8e979a51b28cefee89df78344e Mon Sep 17 00:00:00 2001 From: Vincent Demoulin Date: Thu, 21 Nov 2024 18:41:33 +0100 Subject: [PATCH 1/4] Update model output processing --- src/aliceVision/segmentation/segmentation.cpp | 56 ++++++++++++------- src/aliceVision/segmentation/segmentation.hpp | 9 ++- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index aa7538a163..877be6ab75 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -244,7 +244,7 @@ bool Segmentation::mergeLabels(image::Image& labels, image::Image& labels, const std::vector& modelOutput) +bool Segmentation::labelsFromOutputTensor(image::Image& labels, Ort::Value& modelOutput) { for (int outputY = 0; outputY < _parameters.modelHeight; outputY++) { @@ -255,10 +255,8 @@ bool Segmentation::labelsFromModelOutput(image::Image& labels, cons for (int classe = 0; classe < _parameters.classes.size(); classe++) { - int classPos = classe * _parameters.modelWidth * _parameters.modelHeight; - int pos = classPos + outputY * _parameters.modelWidth + outputX; - - float val = modelOutput[pos]; + const std::vector coords = {0,classe,outputY,outputX}; + const float val = modelOutput.At(coords); if (val > maxVal) { maxVal = val; @@ -281,11 +279,6 @@ bool Segmentation::processTile(image::Image& labels, const image::I std::vector inputNames{"input"}; std::vector outputNames{"output"}; std::vector inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth}; - std::vector outputDimensions = {1, static_cast(_parameters.classes.size()), _parameters.modelHeight, _parameters.modelWidth}; - - std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); - Ort::Value outputTensors = - Ort::Value::CreateTensor(memInfo, output.data(), output.size(), outputDimensions.data(), outputDimensions.size()); std::vector transformedInput; imageToPlanes(transformedInput, source); @@ -293,9 +286,11 @@ bool Segmentation::processTile(image::Image& labels, const image::I Ort::Value inputTensors = Ort::Value::CreateTensor(memInfo, transformedInput.data(), transformedInput.size(), inputDimensions.data(), inputDimensions.size()); + std::vector outTensor; + try { - _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + outTensor = _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), 1); } catch (const Ort::Exception& exception) { @@ -303,7 +298,21 @@ bool Segmentation::processTile(image::Image& labels, const image::I return false; } - if (!labelsFromModelOutput(labels, output)) + std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); + int idx = 0; + for (int ch = 0; ch < _parameters.classes.size(); ch++) + { + for (int i = 0; i < _parameters.modelHeight; i++) + { + for (int j = 0; j < _parameters.modelWidth; j++) + { + const std::vector coords = {0, ch, i, j}; + output[idx++] = outTensor[0].At(coords); + } + } + } + + if (!labelsFromOutputTensor(labels, outTensor[0])) { return false; } @@ -321,10 +330,6 @@ bool Segmentation::processTileGPU(image::Image& labels, const image std::vector inputNames{"input"}; std::vector outputNames{"output"}; std::vector inputDimensions = {1, 3, _parameters.modelHeight, _parameters.modelWidth}; - std::vector outputDimensions = {1, static_cast(_parameters.classes.size()), _parameters.modelHeight, _parameters.modelWidth}; - - Ort::Value outputTensors = Ort::Value::CreateTensor( - mem_info_cuda, reinterpret_cast(_cudaOutput), _output.size(), outputDimensions.data(), outputDimensions.size()); std::vector transformedInput; imageToPlanes(transformedInput, source); @@ -334,9 +339,11 @@ bool Segmentation::processTileGPU(image::Image& labels, const image Ort::Value inputTensors = Ort::Value::CreateTensor( mem_info_cuda, reinterpret_cast(_cudaInput), transformedInput.size(), inputDimensions.data(), inputDimensions.size()); + std::vector outTensor; + try { - _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), &outputTensors, 1); + outTensor = _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), 1); } catch (const Ort::Exception& exception) { @@ -344,9 +351,20 @@ bool Segmentation::processTileGPU(image::Image& labels, const image return false; } - cudaMemcpy(_output.data(), _cudaOutput, sizeof(float) * _output.size(), cudaMemcpyDeviceToHost); + int idx = 0; + for (int ch = 0; ch < _parameters.classes.size(); ch++) + { + for (int i = 0; i < _parameters.modelHeight; i++) + { + for (int j = 0; j < _parameters.modelWidth; j++) + { + const std::vector coords = {0, ch, i, j}; + _output[idx++] = outTensor[0].At(coords); + } + } + } - if (!labelsFromModelOutput(labels, _output)) + if (!labelsFromOutputTensor(labels, outTensor[0])) { return false; } diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp index bb90ae8fda..22f5894fe1 100644 --- a/src/aliceVision/segmentation/segmentation.hpp +++ b/src/aliceVision/segmentation/segmentation.hpp @@ -86,11 +86,18 @@ class Segmentation /** * Transform model output to a label image - * @param labels the output labels imaage + * @param labels the output labels image * @param modeloutput the model output vector */ bool labelsFromModelOutput(image::Image& labels, const std::vector& modelOutput); + /** + * Transform model output to a label image + * @param labels the output labels image + * @param modeloutput the model output tensor + */ + bool labelsFromOutputTensor(image::Image& labels, Ort::Value& modelOutput); + /** * Process effectively a buffer of the model input size * param labels the output labels From 9d35b0589aec573eef5763cda239b6340996d37d Mon Sep 17 00:00:00 2001 From: Vincent Demoulin Date: Thu, 21 Nov 2024 18:57:49 +0100 Subject: [PATCH 2/4] Remove cuda output allocator --- src/aliceVision/segmentation/segmentation.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index 877be6ab75..e104532e09 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -72,7 +72,6 @@ bool Segmentation::initialize() _output.resize(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); _cudaInput = cudaAllocator.Alloc(_output.size() * sizeof(float)); - _cudaOutput = cudaAllocator.Alloc(_output.size() * sizeof(float)); #endif } else @@ -94,7 +93,6 @@ bool Segmentation::terminate() Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); cudaAllocator.Free(_cudaInput); - cudaAllocator.Free(_cudaOutput); #endif return true; From dd64da887d0d81845b7f0a59025e52c145a126ed Mon Sep 17 00:00:00 2001 From: Vincent Demoulin Date: Fri, 22 Nov 2024 09:55:52 +0100 Subject: [PATCH 3/4] bugfix in terminate function --- src/aliceVision/segmentation/segmentation.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index e104532e09..6a00d286dd 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -89,11 +89,14 @@ bool Segmentation::initialize() bool Segmentation::terminate() { + if (_parameters.useGpu) + { #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_ONNX_GPU) - Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); - Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); - cudaAllocator.Free(_cudaInput); + Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); + Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); + cudaAllocator.Free(_cudaInput); #endif + } return true; } From 86d5e055cfa4ea7dec55a278fac7ef32664bd345 Mon Sep 17 00:00:00 2001 From: Vincent Demoulin Date: Wed, 22 Jan 2025 17:55:17 +0100 Subject: [PATCH 4/4] Let onnxRuntime handles output memory --- src/aliceVision/segmentation/segmentation.cpp | 67 +++++-------------- src/aliceVision/segmentation/segmentation.hpp | 14 +--- 2 files changed, 16 insertions(+), 65 deletions(-) diff --git a/src/aliceVision/segmentation/segmentation.cpp b/src/aliceVision/segmentation/segmentation.cpp index 6a00d286dd..e31eb5e4f2 100644 --- a/src/aliceVision/segmentation/segmentation.cpp +++ b/src/aliceVision/segmentation/segmentation.cpp @@ -67,11 +67,7 @@ bool Segmentation::initialize() _ortSession = std::make_unique(*_ortEnvironment, _parameters.modelWeights.c_str(), ortSessionOptions); #endif - Ort::MemoryInfo memInfoCuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); - Ort::Allocator cudaAllocator(*_ortSession, memInfoCuda); - _output.resize(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); - _cudaInput = cudaAllocator.Alloc(_output.size() * sizeof(float)); #endif } else @@ -87,20 +83,6 @@ bool Segmentation::initialize() return true; } -bool Segmentation::terminate() -{ - if (_parameters.useGpu) - { -#if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_ONNX_GPU) - Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); - Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); - cudaAllocator.Free(_cudaInput); -#endif - } - - return true; -} - bool Segmentation::processImage(image::Image& labels, const image::Image& source) { // Todo : handle orientation and small images smaller than model input @@ -299,25 +281,15 @@ bool Segmentation::processTile(image::Image& labels, const image::I return false; } - std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); - int idx = 0; - for (int ch = 0; ch < _parameters.classes.size(); ch++) - { - for (int i = 0; i < _parameters.modelHeight; i++) - { - for (int j = 0; j < _parameters.modelWidth; j++) - { - const std::vector coords = {0, ch, i, j}; - output[idx++] = outTensor[0].At(coords); - } - } - } - if (!labelsFromOutputTensor(labels, outTensor[0])) { return false; } + std::vector output(_parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); + auto *outTData = outTensor.front().GetTensorMutableData(); + output.assign(outTData, outTData + _parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); + return true; } @@ -325,8 +297,7 @@ bool Segmentation::processTileGPU(image::Image& labels, const image { ALICEVISION_LOG_TRACE("Process tile using gpu"); #if ALICEVISION_IS_DEFINED(ALICEVISION_HAVE_CUDA) - Ort::MemoryInfo mem_info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemType::OrtMemTypeDefault); - Ort::Allocator cudaAllocator(*_ortSession, mem_info_cuda); + Ort::MemoryInfo memInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); std::vector inputNames{"input"}; std::vector outputNames{"output"}; @@ -335,16 +306,18 @@ bool Segmentation::processTileGPU(image::Image& labels, const image std::vector transformedInput; imageToPlanes(transformedInput, source); - cudaMemcpy(_cudaInput, transformedInput.data(), sizeof(float) * transformedInput.size(), cudaMemcpyHostToDevice); - - Ort::Value inputTensors = Ort::Value::CreateTensor( - mem_info_cuda, reinterpret_cast(_cudaInput), transformedInput.size(), inputDimensions.data(), inputDimensions.size()); + std::vector inputTensors; + inputTensors.emplace_back(Ort::Value::CreateTensor(memInfo, + transformedInput.data(), + transformedInput.size(), + inputDimensions.data(), + inputDimensions.size())); std::vector outTensor; try { - outTensor = _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensors, 1, outputNames.data(), 1); + outTensor = _ortSession->Run(Ort::RunOptions{nullptr}, inputNames.data(), inputTensors.data(), 1, outputNames.data(), 1); } catch (const Ort::Exception& exception) { @@ -352,24 +325,14 @@ bool Segmentation::processTileGPU(image::Image& labels, const image return false; } - int idx = 0; - for (int ch = 0; ch < _parameters.classes.size(); ch++) - { - for (int i = 0; i < _parameters.modelHeight; i++) - { - for (int j = 0; j < _parameters.modelWidth; j++) - { - const std::vector coords = {0, ch, i, j}; - _output[idx++] = outTensor[0].At(coords); - } - } - } - if (!labelsFromOutputTensor(labels, outTensor[0])) { return false; } + auto *outTData = outTensor.front().GetTensorMutableData(); + _output.assign(outTData, outTData + _parameters.classes.size() * _parameters.modelHeight * _parameters.modelWidth); + #endif return true; diff --git a/src/aliceVision/segmentation/segmentation.hpp b/src/aliceVision/segmentation/segmentation.hpp index 22f5894fe1..9d1bdf8a2e 100644 --- a/src/aliceVision/segmentation/segmentation.hpp +++ b/src/aliceVision/segmentation/segmentation.hpp @@ -57,7 +57,7 @@ class Segmentation } } - virtual ~Segmentation() { terminate(); } + virtual ~Segmentation() {} /** * Process an input image to estimate segmentation @@ -72,11 +72,6 @@ class Segmentation */ bool initialize(); - /** - * Onnx destruction code - */ - bool terminate(); - /** * Assume the source image is the correct size * @param labels the output label image @@ -84,13 +79,6 @@ class Segmentation */ bool tiledProcess(image::Image& labels, const image::Image& source); - /** - * Transform model output to a label image - * @param labels the output labels image - * @param modeloutput the model output vector - */ - bool labelsFromModelOutput(image::Image& labels, const std::vector& modelOutput); - /** * Transform model output to a label image * @param labels the output labels image