From 82620ce3935f5800a0d222554c3971464485dc79 Mon Sep 17 00:00:00 2001 From: Deukhyun Cha Date: Mon, 18 Mar 2024 17:06:32 -0500 Subject: [PATCH] add an example for a usage of stream tags --- examples/cpp/19_stream_tags/.gitignore | 3 + examples/cpp/19_stream_tags/CMakeLists.txt | 4 + examples/cpp/19_stream_tags/Makefile | 27 +++++++ examples/cpp/19_stream_tags/README.md | 28 +++++++ examples/cpp/19_stream_tags/main.cpp | 92 ++++++++++++++++++++++ examples/cpp/19_stream_tags/powerOfPi2.okl | 10 +++ 6 files changed, 164 insertions(+) create mode 100755 examples/cpp/19_stream_tags/.gitignore create mode 100755 examples/cpp/19_stream_tags/CMakeLists.txt create mode 100755 examples/cpp/19_stream_tags/Makefile create mode 100755 examples/cpp/19_stream_tags/README.md create mode 100755 examples/cpp/19_stream_tags/main.cpp create mode 100755 examples/cpp/19_stream_tags/powerOfPi2.okl diff --git a/examples/cpp/19_stream_tags/.gitignore b/examples/cpp/19_stream_tags/.gitignore new file mode 100755 index 000000000..8d83e25dc --- /dev/null +++ b/examples/cpp/19_stream_tags/.gitignore @@ -0,0 +1,3 @@ +main +main.o +main_c \ No newline at end of file diff --git a/examples/cpp/19_stream_tags/CMakeLists.txt b/examples/cpp/19_stream_tags/CMakeLists.txt new file mode 100755 index 000000000..8b73428d4 --- /dev/null +++ b/examples/cpp/19_stream_tags/CMakeLists.txt @@ -0,0 +1,4 @@ +compile_cpp_example_with_modes(stream_tags main.cpp) + +add_custom_target(cpp_example_stream_tags_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/powerOfPi2.okl powerOfPi2.okl) +add_dependencies(examples_cpp_stream_tags cpp_example_stream_tags_okl) diff --git a/examples/cpp/19_stream_tags/Makefile b/examples/cpp/19_stream_tags/Makefile new file mode 100755 index 000000000..c43286030 --- /dev/null +++ b/examples/cpp/19_stream_tags/Makefile @@ -0,0 +1,27 @@ + +PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +ifndef OCCA_DIR + include $(PROJ_DIR)/../../../scripts/build/Makefile +else + include ${OCCA_DIR}/scripts/build/Makefile +endif + +#---[ COMPILATION ]------------------------------- +headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) +sources = $(wildcard $(srcPath)/*.cpp) + +objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) + +executables: ${PROJ_DIR}/main + +${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp + $(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) + +$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) + $(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< + +clean: + rm -f $(objPath)/*; + rm -f ${PROJ_DIR}/main; +#================================================= diff --git a/examples/cpp/19_stream_tags/README.md b/examples/cpp/19_stream_tags/README.md new file mode 100755 index 000000000..0ee04c0b7 --- /dev/null +++ b/examples/cpp/19_stream_tags/README.md @@ -0,0 +1,28 @@ +# Example: Events + +GPU devices introduce `streams`, which potentially allow parallel queueing of instructions + +`Stream tags` are used to query and manage (synchronize) those streams + +This example shows how to setup `occa::streamTag` to manage jobs in different streams + +# Compiling the Example + +```bash +make +``` + +## Usage + +``` +> ./main --help + +Usage: ./main [OPTIONS] + +Example showing the use of multiple non-blocking streams in a device + +Options: + -d, --device Device properties (default: "{mode: 'CUDA', device_id: 0}") + -h, --help Print usage + -v, --verbose Compile kernels in verbose mode +``` diff --git a/examples/cpp/19_stream_tags/main.cpp b/examples/cpp/19_stream_tags/main.cpp new file mode 100755 index 000000000..1c9f12385 --- /dev/null +++ b/examples/cpp/19_stream_tags/main.cpp @@ -0,0 +1,92 @@ +#include + +#include + +//---[ Internal Tools ]----------------- +// Note: These headers are not officially supported +// Please don't rely on it outside of the occa examples +#include +//====================================== + + +occa::json parseArgs(int argc, const char **argv); + +int main(int argc, const char **argv) { + occa::json args = parseArgs(argc, argv); + + occa::setDevice(occa::json::parse(args["options/device"])); + + int entries = 1<<20; + int block = 64; + int group = 1; + + float *a = new float[entries]; + for (int i = 0; i < entries; i++) + a[i] = 0.f; + + occa::memory o_a = occa::malloc(entries); + o_a.copyFrom(a); + + occa::json kernelProps({ + {"defines/block", block}, + {"defines/group", group}, + {"serial/include_std", true}, + }); + occa::kernel powerOfPi2 = occa::buildKernel("powerOfPi2.okl", + "powerOfPi2", + kernelProps); + occa::json streamProps({ + {"nonblocking", true}, + }); + occa::stream stream_a = occa::createStream(streamProps); + occa::stream stream_b = occa::createStream(streamProps); + + occa::setStream(stream_a); + powerOfPi2(o_a, entries); + occa::streamTag tag_a = occa::tagStream(); + + // set stream_b to wait for the job(s) to be finished in stream_a + occa::streamWait(stream_b, tag_a); + + occa::setStream(stream_b); + powerOfPi2(o_a, entries); + occa::streamTag tag_b = occa::tagStream(); + + // set the device to wait for stream_b to finish + occa::waitFor(tag_b); + + o_a.copyTo(a); + + const float tol = 1e-3; + for (auto i = 0; i < entries; i++) { + if (fabs(a[i] - 3.14159) > tol) { + std::cerr << "Invalid output value: " << a[i] << " in " << i << std::endl; + return -1; + } + } + return 0; +} + + +occa::json parseArgs(int argc, const char **argv) { + occa::cli::parser parser; + parser + .withDescription( + "Example showing the use of multiple device streams" + ) + .addOption( + occa::cli::option('d', "device", + "Device properties (default: \"{mode: 'CUDA', device_id: 0}\")") + .withArg() + .withDefaultValue("{mode: 'CUDA', device_id: 0}") + ) + .addOption( + occa::cli::option('v', "verbose", + "Compile kernels in verbose mode") + ); + + occa::json args = parser.parseArgs(argc, argv); + occa::settings()["kernel/verbose"] = args["options/verbose"]; + + return args; +} diff --git a/examples/cpp/19_stream_tags/powerOfPi2.okl b/examples/cpp/19_stream_tags/powerOfPi2.okl new file mode 100755 index 000000000..9d4946c8d --- /dev/null +++ b/examples/cpp/19_stream_tags/powerOfPi2.okl @@ -0,0 +1,10 @@ +@kernel void powerOfPi2(float* x, + int entries) { + for (int g = 0; g < group; g++; @outer) { + for (int i = 0; i < block; ++i; @inner) { + for (int j=i+g*block; j < entries; j+=block*group) { + x[j] = pow(3.14159,x[j]); + } + } + } +}