-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ca5079c
commit 645f00d
Showing
773 changed files
with
8,941,207 additions
and
1,430 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
# Idea files | ||
# IDE files | ||
/**/.idea | ||
|
||
# Build files | ||
/**/cmake-build-debug | ||
/**/*build* | ||
|
||
# Python cache | ||
/**/__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
[submodule "deps/gtest"] | ||
path = deps/gtest | ||
url = https://github.com/google/googletest.git | ||
[submodule "deps/cub"] | ||
path = deps/cub | ||
url = https://github.com/NVIDIA/cub.git |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,58 @@ | ||
cmake_minimum_required(VERSION 3.17 FATAL_ERROR) | ||
# CuBool library Cmake config file | ||
# Add this file as sub-directory to your project to use library functionality | ||
|
||
cmake_minimum_required(VERSION 3.15 FATAL_ERROR) | ||
project(spbla LANGUAGES CXX) | ||
|
||
option(SPBLA_WITH_CPU_BACKEND "Build library with cpu backend as fallback" ON) | ||
option(SPBLA_WITH_CUDA_BACKEND "Build library with cuda backend" OFF) | ||
option(SPBLA_WITH_OPENCL_BACKEND "Build library with opencl backend" OFF) | ||
# Exposed to the user build options | ||
option(SPBLA_WITH_CUDA "Build library with cuda backend (default)" ON) | ||
option(SPBLA_WITH_SEQUENTIAL "Build library with cpu sequential backend (fallback)" ON) | ||
option(SPBLA_BUILD_TESTS "Build project unit-tests with gtest" ON) | ||
option(SPBLA_COPY_TO_PY_PACKAGE "Copy compiled shared library into python package folder (for package use purposes)" ON) | ||
|
||
set(SPBLA_VERSION_MAJOR 1) | ||
set(SPBLA_VERSION_MINOR 0) | ||
set(SPBLA_VERSION_SUB 0) | ||
|
||
set(SPBLA_DEBUG OFF) | ||
set(SPBLA_RELEASE OFF) | ||
|
||
if (${CMAKE_BUILD_TYPE} MATCHES Release) | ||
message(STATUS "Build cubool in release mode") | ||
set(SPBLA_RELEASE ON) | ||
elseif (${CMAKE_BUILD_TYPE} MATCHES Debug) | ||
message(STATUS "Build cubool in debug mode") | ||
set(SPBLA_DEBUG ON) | ||
else() | ||
message(STATUS "Build cubool in release mode (default: was not specified)") | ||
set(SPBLA_RELEASE ON) | ||
set(CMAKE_BUILD_TYPE Release) | ||
endif() | ||
|
||
option(SPBLA_WITH_TESTS "Build library source code with provided unit tests" ON) | ||
# Configure cuda dependencies | ||
if (SPBLA_WITH_CUDA) | ||
message(STATUS "Add cub as cuda utility") | ||
set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE) | ||
set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE) | ||
set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE) | ||
add_subdirectory(deps/cub) | ||
add_library(cub INTERFACE IMPORTED) | ||
target_link_libraries(cub INTERFACE CUB::CUB) | ||
|
||
if (SPBLA_WITH_TESTS) | ||
message(STATUS "Add gtest dependency for library unit tests") | ||
message(STATUS "Add nsparse library as crs matrix multiplication backend") | ||
add_subdirectory(deps/nsparse) | ||
endif() | ||
|
||
if (SPBLA_BUILD_TESTS) | ||
message(STATUS "Add googletest as unit-testing library") | ||
add_subdirectory(deps/gtest) | ||
endif() | ||
|
||
# Actual cxx implementation | ||
add_subdirectory(spbla) | ||
|
||
# Copy scripts into binary directory | ||
file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/) | ||
|
||
# Copy python related stuff | ||
file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
# spbla | ||
Sparse Boolean Linear Algebra | ||
|
||
Sparse Boolean linear algebra for CPU/GPU computations. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
cmake_minimum_required(VERSION 3.15) | ||
project(nsparse_um LANGUAGES CXX CUDA) | ||
|
||
add_library(nsparse_um INTERFACE) | ||
target_include_directories(nsparse_um INTERFACE include/) | ||
target_link_libraries(nsparse_um INTERFACE cub) | ||
target_compile_options(nsparse_um INTERFACE $<$<COMPILE_LANGUAGE:CUDA>: --expt-relaxed-constexpr --expt-extended-lambda>) | ||
|
||
if (SPBLA_BUILD_NSPARSE_TESTS) | ||
add_subdirectory(test) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# NSPARSE-UNIFIED-MEMORY | ||
|
||
Sparse boolean matrix multiplication and addition implementation | ||
for matrix in compressed-sparse row (csr) format, which employs | ||
cuda unified memory allocator for allocating large gpu resources. | ||
|
||
Original source code is hosted [here](https://github.com/YaccConstructor/RedisGraph). | ||
For more info view branches `CFPQ-gpu` and `CFPQ-gpu-um`, where the source code | ||
is stored at path `deps/cfpq/algorithms/cuda/nsparse`. | ||
|
||
## Useful links | ||
|
||
- High-performance and Memory-saving | ||
Sparse General Matrix-Matrix Multiplication for NVIDIA Pascal GPU | ||
[paper](https://ieeexplore.ieee.org/document/8025284) | ||
- GPU Merge Path - A GPU Merging Algorithm | ||
[paper](https://www.researchgate.net/publication/254462662_GPU_merge_path_a_GPU_merging_algorithm) | ||
- Context-Free Path Querying with Single-Path | ||
Semantics by Matrix Multiplication | ||
[paper](https://www.researchgate.net/publication/342164347_Context-Free_Path_Querying_with_Single-Path_Semantics_by_Matrix_Multiplication) | ||
|
||
## Also | ||
|
||
The author of the original implementation is Artyom Khoroshev | ||
(Github: [profile](https://github.com/akhoroshev)). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#pragma once | ||
|
||
#include <cuda_runtime.h> | ||
#include <device_launch_parameters.h> | ||
|
||
namespace nsparse { | ||
|
||
template <typename value_type, typename index_type> | ||
__global__ void add_values(thrust::device_ptr<const index_type> skeleton_col_idx, | ||
thrust::device_ptr<const index_type> skeleton_row_idx, | ||
thrust::device_ptr<value_type> values, | ||
thrust::device_ptr<const index_type> edges_col_idx, | ||
thrust::device_ptr<const index_type> edges_row_idx, value_type value) { | ||
auto rid = blockIdx.x; | ||
|
||
index_type skeleton_row_begin = skeleton_row_idx[rid]; | ||
index_type skeleton_row_end = skeleton_row_idx[rid + 1]; | ||
|
||
index_type edges_row_begin = edges_row_idx[rid]; | ||
index_type edges_row_end = edges_row_idx[rid + 1]; | ||
|
||
for (auto i = edges_row_begin + threadIdx.x; i < edges_row_end; i += blockDim.x) { | ||
index_type edges_column = edges_col_idx[i]; | ||
|
||
bool was_found = false; | ||
for (auto j = skeleton_row_begin; j < skeleton_row_end; j++) { | ||
index_type skeleton_column = skeleton_col_idx[j]; | ||
|
||
if (skeleton_column == edges_column) { | ||
values[j] = value; | ||
was_found = true; | ||
break; | ||
} | ||
} | ||
assert(was_found); | ||
} | ||
} | ||
|
||
} // namespace nsparse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#pragma once | ||
|
||
#include <thrust/device_ptr.h> | ||
#include <thrust/device_vector.h> | ||
|
||
#include <nsparse/detail/util.h> | ||
|
||
#include <nsparse/detail/add_values.cuh> | ||
|
||
namespace nsparse { | ||
|
||
template <typename value_type, typename index_type> | ||
void add_values(index_type rows, const thrust::device_vector<index_type>& skeleton_col_idx, | ||
const thrust::device_vector<index_type>& skeleton_row_idx, | ||
thrust::device_vector<value_type>& values, | ||
const thrust::device_vector<index_type>& edges_col_idx, | ||
const thrust::device_vector<index_type>& edges_row_idx, value_type value) { | ||
if (rows > 0) | ||
add_values<<<rows, 64>>>(skeleton_col_idx.data(), skeleton_row_idx.data(), values.data(), | ||
edges_col_idx.data(), edges_row_idx.data(), value); | ||
} | ||
|
||
} // namespace nsparse |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
#pragma once | ||
|
||
#include <cuda_runtime.h> | ||
#include <device_launch_parameters.h> | ||
#include <cooperative_groups.h> | ||
|
||
namespace nsparse { | ||
|
||
template <typename T> | ||
__device__ void Comparator(T& keyA, T& keyB, uint dir) { | ||
T t; | ||
|
||
if ((keyA > keyB) == dir) { | ||
t = keyA; | ||
keyA = keyB; | ||
keyB = t; | ||
} | ||
} | ||
|
||
template <typename T, uint array_size, typename group_t> | ||
__device__ void bitonic_sort_shared(group_t group, T* s_key, uint dir = 1) { | ||
for (uint size = 2; size < array_size; size <<= 1) { | ||
for (uint stride = size / 2; stride > 0; stride >>= 1) { | ||
group.sync(); | ||
for (uint id = group.thread_rank(); id < array_size / 2; id += group.size()) { | ||
uint ddd = dir ^ ((id & (size / 2)) != 0); | ||
|
||
uint pos = 2 * id - (id & (stride - 1)); | ||
Comparator(s_key[pos + 0], s_key[pos + stride], ddd); | ||
} | ||
} | ||
} | ||
|
||
for (uint stride = array_size / 2; stride > 0; stride >>= 1) { | ||
group.sync(); | ||
for (uint id = group.thread_rank(); id < array_size / 2; id += group.size()) { | ||
uint pos = 2 * id - (id & (stride - 1)); | ||
Comparator(s_key[pos + 0], s_key[pos + stride], dir); | ||
} | ||
} | ||
group.sync(); | ||
} | ||
|
||
template <typename T> | ||
__device__ void bitonicSortGlobal(T* key, T array_size, uint dir = 1) { | ||
for (uint size = 2; size < array_size; size <<= 1) { | ||
for (uint stride = size / 2; stride > 0; stride >>= 1) { | ||
__syncthreads(); | ||
for (uint id = threadIdx.x; id < array_size / 2; id += blockDim.x) { | ||
uint ddd = dir ^ ((id & (size / 2)) != 0); | ||
|
||
uint pos = 2 * id - (id & (stride - 1)); | ||
Comparator(key[pos + 0], key[pos + stride], ddd); | ||
} | ||
} | ||
} | ||
|
||
for (uint stride = array_size / 2; stride > 0; stride >>= 1) { | ||
__syncthreads(); | ||
for (uint id = threadIdx.x; id < array_size / 2; id += blockDim.x) { | ||
uint pos = 2 * id - (id & (stride - 1)); | ||
Comparator(key[pos + 0], key[pos + stride], dir); | ||
} | ||
} | ||
} | ||
|
||
} // namespace nsparse |
Oops, something went wrong.