Skip to content

Commit

Permalink
[Project] Add existing code legacy
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorOrachyov committed Apr 14, 2021
1 parent ca5079c commit 645f00d
Show file tree
Hide file tree
Showing 773 changed files with 8,941,207 additions and 1,430 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Idea files
# IDE files
/**/.idea

# Build files
/**/cmake-build-debug
/**/*build*

# Python cache
/**/__pycache__
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "deps/gtest"]
path = deps/gtest
url = https://github.com/google/googletest.git
[submodule "deps/cub"]
path = deps/cub
url = https://github.com/NVIDIA/cub.git
57 changes: 50 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,58 @@
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
# CuBool library Cmake config file
# Add this file as sub-directory to your project to use library functionality

cmake_minimum_required(VERSION 3.15 FATAL_ERROR)
project(spbla LANGUAGES CXX)

option(SPBLA_WITH_CPU_BACKEND "Build library with cpu backend as fallback" ON)
option(SPBLA_WITH_CUDA_BACKEND "Build library with cuda backend" OFF)
option(SPBLA_WITH_OPENCL_BACKEND "Build library with opencl backend" OFF)
# Exposed to the user build options
option(SPBLA_WITH_CUDA "Build library with cuda backend (default)" ON)
option(SPBLA_WITH_SEQUENTIAL "Build library with cpu sequential backend (fallback)" ON)
option(SPBLA_BUILD_TESTS "Build project unit-tests with gtest" ON)
option(SPBLA_COPY_TO_PY_PACKAGE "Copy compiled shared library into python package folder (for package use purposes)" ON)

set(SPBLA_VERSION_MAJOR 1)
set(SPBLA_VERSION_MINOR 0)
set(SPBLA_VERSION_SUB 0)

set(SPBLA_DEBUG OFF)
set(SPBLA_RELEASE OFF)

if (${CMAKE_BUILD_TYPE} MATCHES Release)
message(STATUS "Build cubool in release mode")
set(SPBLA_RELEASE ON)
elseif (${CMAKE_BUILD_TYPE} MATCHES Debug)
message(STATUS "Build cubool in debug mode")
set(SPBLA_DEBUG ON)
else()
message(STATUS "Build cubool in release mode (default: was not specified)")
set(SPBLA_RELEASE ON)
set(CMAKE_BUILD_TYPE Release)
endif()

option(SPBLA_WITH_TESTS "Build library source code with provided unit tests" ON)
# Configure cuda dependencies
if (SPBLA_WITH_CUDA)
message(STATUS "Add cub as cuda utility")
set(CUB_ENABLE_HEADER_TESTING OFF CACHE BOOL "" FORCE)
set(CUB_ENABLE_TESTING OFF CACHE BOOL "" FORCE)
set(CUB_ENABLE_EXAMPLES OFF CACHE BOOL "" FORCE)
add_subdirectory(deps/cub)
add_library(cub INTERFACE IMPORTED)
target_link_libraries(cub INTERFACE CUB::CUB)

if (SPBLA_WITH_TESTS)
message(STATUS "Add gtest dependency for library unit tests")
message(STATUS "Add nsparse library as crs matrix multiplication backend")
add_subdirectory(deps/nsparse)
endif()

if (SPBLA_BUILD_TESTS)
message(STATUS "Add googletest as unit-testing library")
add_subdirectory(deps/gtest)
endif()

# Actual cxx implementation
add_subdirectory(spbla)

# Copy scripts into binary directory
file(COPY scripts DESTINATION ${CMAKE_BINARY_DIR}/)

# Copy python related stuff
file(COPY python DESTINATION ${CMAKE_BINARY_DIR}/)
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# spbla
Sparse Boolean Linear Algebra

Sparse Boolean linear algebra for CPU/GPU computations.
1 change: 1 addition & 0 deletions deps/cub
Submodule cub added at a8910a
11 changes: 11 additions & 0 deletions deps/nsparse/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cmake_minimum_required(VERSION 3.15)
project(nsparse_um LANGUAGES CXX CUDA)

add_library(nsparse_um INTERFACE)
target_include_directories(nsparse_um INTERFACE include/)
target_link_libraries(nsparse_um INTERFACE cub)
target_compile_options(nsparse_um INTERFACE $<$<COMPILE_LANGUAGE:CUDA>: --expt-relaxed-constexpr --expt-extended-lambda>)

if (SPBLA_BUILD_NSPARSE_TESTS)
add_subdirectory(test)
endif()
25 changes: 25 additions & 0 deletions deps/nsparse/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# NSPARSE-UNIFIED-MEMORY

Sparse boolean matrix multiplication and addition implementation
for matrix in compressed-sparse row (csr) format, which employs
cuda unified memory allocator for allocating large gpu resources.

Original source code is hosted [here](https://github.com/YaccConstructor/RedisGraph).
For more info view branches `CFPQ-gpu` and `CFPQ-gpu-um`, where the source code
is stored at path `deps/cfpq/algorithms/cuda/nsparse`.

## Useful links

- High-performance and Memory-saving
Sparse General Matrix-Matrix Multiplication for NVIDIA Pascal GPU
[paper](https://ieeexplore.ieee.org/document/8025284)
- GPU Merge Path - A GPU Merging Algorithm
[paper](https://www.researchgate.net/publication/254462662_GPU_merge_path_a_GPU_merging_algorithm)
- Context-Free Path Querying with Single-Path
Semantics by Matrix Multiplication
[paper](https://www.researchgate.net/publication/342164347_Context-Free_Path_Querying_with_Single-Path_Semantics_by_Matrix_Multiplication)

## Also

The author of the original implementation is Artyom Khoroshev
(Github: [profile](https://github.com/akhoroshev)).
39 changes: 39 additions & 0 deletions deps/nsparse/include/nsparse/detail/add_values.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#include <cuda_runtime.h>
#include <device_launch_parameters.h>

namespace nsparse {

template <typename value_type, typename index_type>
__global__ void add_values(thrust::device_ptr<const index_type> skeleton_col_idx,
thrust::device_ptr<const index_type> skeleton_row_idx,
thrust::device_ptr<value_type> values,
thrust::device_ptr<const index_type> edges_col_idx,
thrust::device_ptr<const index_type> edges_row_idx, value_type value) {
auto rid = blockIdx.x;

index_type skeleton_row_begin = skeleton_row_idx[rid];
index_type skeleton_row_end = skeleton_row_idx[rid + 1];

index_type edges_row_begin = edges_row_idx[rid];
index_type edges_row_end = edges_row_idx[rid + 1];

for (auto i = edges_row_begin + threadIdx.x; i < edges_row_end; i += blockDim.x) {
index_type edges_column = edges_col_idx[i];

bool was_found = false;
for (auto j = skeleton_row_begin; j < skeleton_row_end; j++) {
index_type skeleton_column = skeleton_col_idx[j];

if (skeleton_column == edges_column) {
values[j] = value;
was_found = true;
break;
}
}
assert(was_found);
}
}

} // namespace nsparse
23 changes: 23 additions & 0 deletions deps/nsparse/include/nsparse/detail/add_values.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#pragma once

#include <thrust/device_ptr.h>
#include <thrust/device_vector.h>

#include <nsparse/detail/util.h>

#include <nsparse/detail/add_values.cuh>

namespace nsparse {

template <typename value_type, typename index_type>
void add_values(index_type rows, const thrust::device_vector<index_type>& skeleton_col_idx,
const thrust::device_vector<index_type>& skeleton_row_idx,
thrust::device_vector<value_type>& values,
const thrust::device_vector<index_type>& edges_col_idx,
const thrust::device_vector<index_type>& edges_row_idx, value_type value) {
if (rows > 0)
add_values<<<rows, 64>>>(skeleton_col_idx.data(), skeleton_row_idx.data(), values.data(),
edges_col_idx.data(), edges_row_idx.data(), value);
}

} // namespace nsparse
67 changes: 67 additions & 0 deletions deps/nsparse/include/nsparse/detail/bitonic.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#pragma once

#include <cuda_runtime.h>
#include <device_launch_parameters.h>
#include <cooperative_groups.h>

namespace nsparse {

template <typename T>
__device__ void Comparator(T& keyA, T& keyB, uint dir) {
T t;

if ((keyA > keyB) == dir) {
t = keyA;
keyA = keyB;
keyB = t;
}
}

template <typename T, uint array_size, typename group_t>
__device__ void bitonic_sort_shared(group_t group, T* s_key, uint dir = 1) {
for (uint size = 2; size < array_size; size <<= 1) {
for (uint stride = size / 2; stride > 0; stride >>= 1) {
group.sync();
for (uint id = group.thread_rank(); id < array_size / 2; id += group.size()) {
uint ddd = dir ^ ((id & (size / 2)) != 0);

uint pos = 2 * id - (id & (stride - 1));
Comparator(s_key[pos + 0], s_key[pos + stride], ddd);
}
}
}

for (uint stride = array_size / 2; stride > 0; stride >>= 1) {
group.sync();
for (uint id = group.thread_rank(); id < array_size / 2; id += group.size()) {
uint pos = 2 * id - (id & (stride - 1));
Comparator(s_key[pos + 0], s_key[pos + stride], dir);
}
}
group.sync();
}

template <typename T>
__device__ void bitonicSortGlobal(T* key, T array_size, uint dir = 1) {
for (uint size = 2; size < array_size; size <<= 1) {
for (uint stride = size / 2; stride > 0; stride >>= 1) {
__syncthreads();
for (uint id = threadIdx.x; id < array_size / 2; id += blockDim.x) {
uint ddd = dir ^ ((id & (size / 2)) != 0);

uint pos = 2 * id - (id & (stride - 1));
Comparator(key[pos + 0], key[pos + stride], ddd);
}
}
}

for (uint stride = array_size / 2; stride > 0; stride >>= 1) {
__syncthreads();
for (uint id = threadIdx.x; id < array_size / 2; id += blockDim.x) {
uint pos = 2 * id - (id & (stride - 1));
Comparator(key[pos + 0], key[pos + stride], dir);
}
}
}

} // namespace nsparse
Loading

0 comments on commit 645f00d

Please sign in to comment.