diff --git a/dynet/CMakeLists.txt b/dynet/CMakeLists.txt index 5888c7332..28283050a 100644 --- a/dynet/CMakeLists.txt +++ b/dynet/CMakeLists.txt @@ -123,6 +123,46 @@ if(ENABLE_BOOST) list(APPEND dynet_library_HDRS mp.h) endif() +set(dynet_gpu_SRCS + cuda.cc + cudnn-ops.cu + gpu-ops.cu + gpu-nodes-activations.cu + gpu-nodes-affinetransform.cu + gpu-nodes-arith-const.cu + gpu-nodes-arith-cwise.cu + gpu-nodes-arith-scalar.cu + gpu-nodes-arith-sum.cu + gpu-nodes-arith-unary.cu + gpu-nodes-concat.cu + gpu-nodes-const.cu + gpu-nodes-contract.cu + gpu-nodes-conv2d.cu + gpu-nodes-conv.cu + gpu-nodes-dropout.cu + gpu-nodes-flow.cu + gpu-nodes-hinge.cu + gpu-nodes-linalg.cu + gpu-nodes-logsumexp.cu + gpu-nodes-losses.cu + gpu-nodes-matrixmultiply.cu + gpu-nodes-maxpooling2d.cu + gpu-nodes-minmax.cu + gpu-nodes-moments.cu + gpu-nodes-normalization.cu + gpu-nodes-norms.cu + gpu-nodes-pickneglogsoftmax.cu + gpu-nodes-random.cu + gpu-nodes-select.cu + gpu-nodes-similarities.cu + gpu-nodes-softmaxes.cu + gpu-nodes-trig.cu + gpu-param-nodes.cu + gpu-tensor.cu + gpu-training.cu + gpu-model.cu +) + file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc) if (NOT MSVC) set(BUILD_SHARED_LIBS ON) @@ -175,10 +215,10 @@ if(WITH_CUDA_BACKEND) list(APPEND CUDA_NVCC_FLAGS_DEBUG "--compiler-options \"/MDd\"") list(APPEND CUDA_NVCC_FLAGS_RELEASE "--compiler-options \"/MD\"") SET(CUDA_PROPAGATE_HOST_FLAGS OFF) - cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu) + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS}) else() SET(CUDA_PROPAGATE_HOST_FLAGS OFF) - cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu OPTIONS --compiler-options "-fPIC") + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS} OPTIONS --compiler-options "-fPIC") endif() set_target_properties(gdynet PROPERTIES COMPILE_DEFINITIONS HAVE_CUDA) @@ -197,4 +237,3 @@ if(WITH_CUDA_BACKEND) endif(WITH_CUDA_BACKEND) # target_compile_features(dynet PRIVATE cxx_range_for) - diff --git a/dynet/gpu-nodes-activations.cu b/dynet/gpu-nodes-activations.cu new file mode 100644 index 000000000..b50d5a62c --- /dev/null +++ b/dynet/gpu-nodes-activations.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-activations.cc" diff --git a/dynet/gpu-nodes-arith-const.cu b/dynet/gpu-nodes-arith-const.cu new file mode 100644 index 000000000..2abc645f9 --- /dev/null +++ b/dynet/gpu-nodes-arith-const.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-const.cc" diff --git a/dynet/gpu-nodes-arith-cwise.cu b/dynet/gpu-nodes-arith-cwise.cu new file mode 100644 index 000000000..93e62b7df --- /dev/null +++ b/dynet/gpu-nodes-arith-cwise.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-cwise.cc" diff --git a/dynet/gpu-nodes-arith-scalar.cu b/dynet/gpu-nodes-arith-scalar.cu new file mode 100644 index 000000000..2e4ff0c0e --- /dev/null +++ b/dynet/gpu-nodes-arith-scalar.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-scalar.cc" diff --git a/dynet/gpu-nodes-arith-sum.cu b/dynet/gpu-nodes-arith-sum.cu new file mode 100644 index 000000000..c80bdfe49 --- /dev/null +++ b/dynet/gpu-nodes-arith-sum.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-sum.cc" diff --git a/dynet/gpu-nodes-arith-unary.cu b/dynet/gpu-nodes-arith-unary.cu index 15198bef2..d5e6c6917 100644 --- a/dynet/gpu-nodes-arith-unary.cu +++ b/dynet/gpu-nodes-arith-unary.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-unary-arith.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-arith-unary.cc" diff --git a/dynet/gpu-nodes-concat.cu b/dynet/gpu-nodes-concat.cu new file mode 100644 index 000000000..2fcfc98c4 --- /dev/null +++ b/dynet/gpu-nodes-concat.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-concat.cc" diff --git a/dynet/gpu-nodes-const.cu b/dynet/gpu-nodes-const.cu new file mode 100644 index 000000000..8a28ebe56 --- /dev/null +++ b/dynet/gpu-nodes-const.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-const.cc" diff --git a/dynet/gpu-nodes-conv.cu b/dynet/gpu-nodes-conv.cu index 451f71b36..0bff1eca4 100644 --- a/dynet/gpu-nodes-conv.cu +++ b/dynet/gpu-nodes-conv.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-conv.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-conv.cc" diff --git a/dynet/gpu-nodes-conv2d.cu b/dynet/gpu-nodes-conv2d.cu index 347aaadcf..cc2f78e4f 100644 --- a/dynet/gpu-nodes-conv2d.cu +++ b/dynet/gpu-nodes-conv2d.cu @@ -1 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA #include "nodes-conv2d.cc" diff --git a/dynet/gpu-nodes-dropout.cu b/dynet/gpu-nodes-dropout.cu new file mode 100644 index 000000000..3911d2bc1 --- /dev/null +++ b/dynet/gpu-nodes-dropout.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-dropout.cc" diff --git a/dynet/gpu-nodes-flow.cu b/dynet/gpu-nodes-flow.cu new file mode 100644 index 000000000..27cfed8c8 --- /dev/null +++ b/dynet/gpu-nodes-flow.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-flow.cc" diff --git a/dynet/gpu-nodes-linalg.cu b/dynet/gpu-nodes-linalg.cu new file mode 100644 index 000000000..cbebed454 --- /dev/null +++ b/dynet/gpu-nodes-linalg.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-linalg.cc" diff --git a/dynet/gpu-nodes-logsumexp.cu b/dynet/gpu-nodes-logsumexp.cu new file mode 100644 index 000000000..f7abe4950 --- /dev/null +++ b/dynet/gpu-nodes-logsumexp.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-logsumexp.cc" diff --git a/dynet/gpu-nodes-losses.cu b/dynet/gpu-nodes-losses.cu new file mode 100644 index 000000000..4bb8863fd --- /dev/null +++ b/dynet/gpu-nodes-losses.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-losses.cc" diff --git a/dynet/gpu-nodes-maxpooling2d.cu b/dynet/gpu-nodes-maxpooling2d.cu index ea93114c9..090ef624e 100644 --- a/dynet/gpu-nodes-maxpooling2d.cu +++ b/dynet/gpu-nodes-maxpooling2d.cu @@ -1 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA #include "nodes-maxpooling2d.cc" diff --git a/dynet/gpu-nodes-minmax.cu b/dynet/gpu-nodes-minmax.cu new file mode 100644 index 000000000..dcac97cc4 --- /dev/null +++ b/dynet/gpu-nodes-minmax.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-minmax.cc" diff --git a/dynet/gpu-nodes-moments.cu b/dynet/gpu-nodes-moments.cu new file mode 100644 index 000000000..253a0860f --- /dev/null +++ b/dynet/gpu-nodes-moments.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-moments.cc" diff --git a/dynet/gpu-nodes-normalization.cu b/dynet/gpu-nodes-normalization.cu new file mode 100644 index 000000000..16d4a3048 --- /dev/null +++ b/dynet/gpu-nodes-normalization.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-normalization.cc" diff --git a/dynet/gpu-nodes-norms.cu b/dynet/gpu-nodes-norms.cu index 4fa94dc81..470e1f97c 100644 --- a/dynet/gpu-nodes-norms.cu +++ b/dynet/gpu-nodes-norms.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-norms.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-norms.cc" diff --git a/dynet/gpu-nodes-random.cu b/dynet/gpu-nodes-random.cu new file mode 100644 index 000000000..7ef0d2564 --- /dev/null +++ b/dynet/gpu-nodes-random.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-random.cc" diff --git a/dynet/gpu-nodes-select.cu b/dynet/gpu-nodes-select.cu new file mode 100644 index 000000000..25871a1bb --- /dev/null +++ b/dynet/gpu-nodes-select.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-select.cc" diff --git a/dynet/gpu-nodes-similarities.cu b/dynet/gpu-nodes-similarities.cu index 068d0cec7..933edb421 100644 --- a/dynet/gpu-nodes-similarities.cu +++ b/dynet/gpu-nodes-similarities.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-similarities.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-similarities.cc" diff --git a/dynet/gpu-nodes-softmaxes.cu b/dynet/gpu-nodes-softmaxes.cu new file mode 100644 index 000000000..43730a67b --- /dev/null +++ b/dynet/gpu-nodes-softmaxes.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-softmaxes.cc" diff --git a/dynet/gpu-nodes-trig.cu b/dynet/gpu-nodes-trig.cu new file mode 100644 index 000000000..47449a016 --- /dev/null +++ b/dynet/gpu-nodes-trig.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-trig.cc" diff --git a/dynet/nodes-conv2d.cc b/dynet/nodes-conv2d.cc index 0a7cdedc9..b5f554152 100644 --- a/dynet/nodes-conv2d.cc +++ b/dynet/nodes-conv2d.cc @@ -16,6 +16,7 @@ #if HAVE_CUDA #include "dynet/cuda.h" #include "dynet/gpu-ops.h" +#include "dynet/cudnn-ops.h" #endif using namespace std; diff --git a/dynet/nodes-conv2d.h b/dynet/nodes-conv2d.h index 484c3222a..fc7140cf5 100644 --- a/dynet/nodes-conv2d.h +++ b/dynet/nodes-conv2d.h @@ -4,6 +4,10 @@ #include "dynet/dynet.h" #include "dynet/nodes-macros.h" +#if HAVE_CUDNN +#include "dynet/cudnn-ops.h" +#endif + namespace dynet { // conv2d diff --git a/dynet/nodes-losses.cc b/dynet/nodes-losses.cc index c67a3da2d..a0d498832 100644 --- a/dynet/nodes-losses.cc +++ b/dynet/nodes-losses.cc @@ -26,6 +26,8 @@ Dim PairwiseRankLoss::dim_forward(const vector& xs) const { return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; } +#endif + template void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin)); @@ -46,8 +48,6 @@ void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss) -#endif - // ************* BinaryLogLoss ************* #ifndef __CUDACC__ diff --git a/dynet/nodes-maxpooling2d.h b/dynet/nodes-maxpooling2d.h index 20bfff67d..1172b14dc 100644 --- a/dynet/nodes-maxpooling2d.h +++ b/dynet/nodes-maxpooling2d.h @@ -4,6 +4,10 @@ #include "dynet/dynet.h" #include "dynet/nodes-macros.h" +#if HAVE_CUDNN +#include "dynet/cudnn-ops.h" +#endif + namespace dynet { // maxpooling2d