-
Notifications
You must be signed in to change notification settings - Fork 86
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add an example for a usage of stream tags
- Loading branch information
1 parent
af9b4d0
commit 82620ce
Showing
6 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
main | ||
main.o | ||
main_c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
compile_cpp_example_with_modes(stream_tags main.cpp) | ||
|
||
add_custom_target(cpp_example_stream_tags_okl ALL COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/powerOfPi2.okl powerOfPi2.okl) | ||
add_dependencies(examples_cpp_stream_tags cpp_example_stream_tags_okl) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
|
||
PROJ_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) | ||
|
||
ifndef OCCA_DIR | ||
include $(PROJ_DIR)/../../../scripts/build/Makefile | ||
else | ||
include ${OCCA_DIR}/scripts/build/Makefile | ||
endif | ||
|
||
#---[ COMPILATION ]------------------------------- | ||
headers = $(wildcard $(incPath)/*.hpp) $(wildcard $(incPath)/*.tpp) | ||
sources = $(wildcard $(srcPath)/*.cpp) | ||
|
||
objects = $(subst $(srcPath)/,$(objPath)/,$(sources:.cpp=.o)) | ||
|
||
executables: ${PROJ_DIR}/main | ||
|
||
${PROJ_DIR}/main: $(objects) $(headers) ${PROJ_DIR}/main.cpp | ||
$(compiler) $(compilerFlags) -o ${PROJ_DIR}/main $(flags) $(objects) ${PROJ_DIR}/main.cpp $(paths) $(linkerFlags) | ||
|
||
$(objPath)/%.o:$(srcPath)/%.cpp $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.hpp))) $(wildcard $(subst $(srcPath)/,$(incPath)/,$(<:.cpp=.tpp))) | ||
$(compiler) $(compilerFlags) -o $@ $(flags) -c $(paths) $< | ||
|
||
clean: | ||
rm -f $(objPath)/*; | ||
rm -f ${PROJ_DIR}/main; | ||
#================================================= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Example: Events | ||
|
||
GPU devices introduce `streams`, which potentially allow parallel queueing of instructions | ||
|
||
`Stream tags` are used to query and manage (synchronize) those streams | ||
|
||
This example shows how to setup `occa::streamTag` to manage jobs in different streams | ||
|
||
# Compiling the Example | ||
|
||
```bash | ||
make | ||
``` | ||
|
||
## Usage | ||
|
||
``` | ||
> ./main --help | ||
Usage: ./main [OPTIONS] | ||
Example showing the use of multiple non-blocking streams in a device | ||
Options: | ||
-d, --device Device properties (default: "{mode: 'CUDA', device_id: 0}") | ||
-h, --help Print usage | ||
-v, --verbose Compile kernels in verbose mode | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
#include <iostream> | ||
|
||
#include <occa.hpp> | ||
|
||
//---[ Internal Tools ]----------------- | ||
// Note: These headers are not officially supported | ||
// Please don't rely on it outside of the occa examples | ||
#include <occa/internal/utils/cli.hpp> | ||
//====================================== | ||
|
||
|
||
occa::json parseArgs(int argc, const char **argv); | ||
|
||
int main(int argc, const char **argv) { | ||
occa::json args = parseArgs(argc, argv); | ||
|
||
occa::setDevice(occa::json::parse(args["options/device"])); | ||
|
||
int entries = 1<<20; | ||
int block = 64; | ||
int group = 1; | ||
|
||
float *a = new float[entries]; | ||
for (int i = 0; i < entries; i++) | ||
a[i] = 0.f; | ||
|
||
occa::memory o_a = occa::malloc<float>(entries); | ||
o_a.copyFrom(a); | ||
|
||
occa::json kernelProps({ | ||
{"defines/block", block}, | ||
{"defines/group", group}, | ||
{"serial/include_std", true}, | ||
}); | ||
occa::kernel powerOfPi2 = occa::buildKernel("powerOfPi2.okl", | ||
"powerOfPi2", | ||
kernelProps); | ||
occa::json streamProps({ | ||
{"nonblocking", true}, | ||
}); | ||
occa::stream stream_a = occa::createStream(streamProps); | ||
occa::stream stream_b = occa::createStream(streamProps); | ||
|
||
occa::setStream(stream_a); | ||
powerOfPi2(o_a, entries); | ||
occa::streamTag tag_a = occa::tagStream(); | ||
|
||
// set stream_b to wait for the job(s) to be finished in stream_a | ||
occa::streamWait(stream_b, tag_a); | ||
|
||
occa::setStream(stream_b); | ||
powerOfPi2(o_a, entries); | ||
occa::streamTag tag_b = occa::tagStream(); | ||
|
||
// set the device to wait for stream_b to finish | ||
occa::waitFor(tag_b); | ||
|
||
o_a.copyTo(a); | ||
|
||
const float tol = 1e-3; | ||
for (auto i = 0; i < entries; i++) { | ||
if (fabs(a[i] - 3.14159) > tol) { | ||
std::cerr << "Invalid output value: " << a[i] << " in " << i << std::endl; | ||
return -1; | ||
} | ||
} | ||
return 0; | ||
} | ||
|
||
|
||
occa::json parseArgs(int argc, const char **argv) { | ||
occa::cli::parser parser; | ||
parser | ||
.withDescription( | ||
"Example showing the use of multiple device streams" | ||
) | ||
.addOption( | ||
occa::cli::option('d', "device", | ||
"Device properties (default: \"{mode: 'CUDA', device_id: 0}\")") | ||
.withArg() | ||
.withDefaultValue("{mode: 'CUDA', device_id: 0}") | ||
) | ||
.addOption( | ||
occa::cli::option('v', "verbose", | ||
"Compile kernels in verbose mode") | ||
); | ||
|
||
occa::json args = parser.parseArgs(argc, argv); | ||
occa::settings()["kernel/verbose"] = args["options/verbose"]; | ||
|
||
return args; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
@kernel void powerOfPi2(float* x, | ||
int entries) { | ||
for (int g = 0; g < group; g++; @outer) { | ||
for (int i = 0; i < block; ++i; @inner) { | ||
for (int j=i+g*block; j < entries; j+=block*group) { | ||
x[j] = pow(3.14159,x[j]); | ||
} | ||
} | ||
} | ||
} |