diff --git a/dynet/CMakeLists.txt b/dynet/CMakeLists.txt
index 5888c7332..28283050a 100644
--- a/dynet/CMakeLists.txt
+++ b/dynet/CMakeLists.txt
@@ -123,6 +123,46 @@ if(ENABLE_BOOST)
   list(APPEND dynet_library_HDRS mp.h)
 endif()
 
+set(dynet_gpu_SRCS
+    cuda.cc
+    cudnn-ops.cu
+    gpu-ops.cu 
+    gpu-nodes-activations.cu
+    gpu-nodes-affinetransform.cu
+    gpu-nodes-arith-const.cu
+    gpu-nodes-arith-cwise.cu
+    gpu-nodes-arith-scalar.cu
+    gpu-nodes-arith-sum.cu
+    gpu-nodes-arith-unary.cu
+    gpu-nodes-concat.cu
+    gpu-nodes-const.cu
+    gpu-nodes-contract.cu
+    gpu-nodes-conv2d.cu
+    gpu-nodes-conv.cu
+    gpu-nodes-dropout.cu
+    gpu-nodes-flow.cu
+    gpu-nodes-hinge.cu
+    gpu-nodes-linalg.cu
+    gpu-nodes-logsumexp.cu
+    gpu-nodes-losses.cu
+    gpu-nodes-matrixmultiply.cu
+    gpu-nodes-maxpooling2d.cu
+    gpu-nodes-minmax.cu
+    gpu-nodes-moments.cu
+    gpu-nodes-normalization.cu
+    gpu-nodes-norms.cu
+    gpu-nodes-pickneglogsoftmax.cu
+    gpu-nodes-random.cu
+    gpu-nodes-select.cu
+    gpu-nodes-similarities.cu
+    gpu-nodes-softmaxes.cu
+    gpu-nodes-trig.cu
+    gpu-param-nodes.cu
+    gpu-tensor.cu
+    gpu-training.cu
+    gpu-model.cu
+)
+
 file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc)
 if (NOT MSVC)
   set(BUILD_SHARED_LIBS ON)
@@ -175,10 +215,10 @@ if(WITH_CUDA_BACKEND)
     list(APPEND CUDA_NVCC_FLAGS_DEBUG "--compiler-options \"/MDd\"")
     list(APPEND CUDA_NVCC_FLAGS_RELEASE "--compiler-options \"/MD\"")
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu)
+    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS})
   else()
     SET(CUDA_PROPAGATE_HOST_FLAGS OFF)
-    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu OPTIONS --compiler-options "-fPIC")
+    cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS} OPTIONS --compiler-options "-fPIC")
   endif()
   set_target_properties(gdynet PROPERTIES
                         COMPILE_DEFINITIONS HAVE_CUDA)
@@ -197,4 +237,3 @@ if(WITH_CUDA_BACKEND)
 endif(WITH_CUDA_BACKEND)
 
 # target_compile_features(dynet PRIVATE cxx_range_for)
-
diff --git a/dynet/gpu-nodes-activations.cu b/dynet/gpu-nodes-activations.cu
new file mode 100644
index 000000000..b50d5a62c
--- /dev/null
+++ b/dynet/gpu-nodes-activations.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-activations.cc"
diff --git a/dynet/gpu-nodes-arith-const.cu b/dynet/gpu-nodes-arith-const.cu
new file mode 100644
index 000000000..2abc645f9
--- /dev/null
+++ b/dynet/gpu-nodes-arith-const.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-const.cc"
diff --git a/dynet/gpu-nodes-arith-cwise.cu b/dynet/gpu-nodes-arith-cwise.cu
new file mode 100644
index 000000000..93e62b7df
--- /dev/null
+++ b/dynet/gpu-nodes-arith-cwise.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-cwise.cc"
diff --git a/dynet/gpu-nodes-arith-scalar.cu b/dynet/gpu-nodes-arith-scalar.cu
new file mode 100644
index 000000000..2e4ff0c0e
--- /dev/null
+++ b/dynet/gpu-nodes-arith-scalar.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-scalar.cc"
diff --git a/dynet/gpu-nodes-arith-sum.cu b/dynet/gpu-nodes-arith-sum.cu
new file mode 100644
index 000000000..c80bdfe49
--- /dev/null
+++ b/dynet/gpu-nodes-arith-sum.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-arith-sum.cc"
diff --git a/dynet/gpu-nodes-arith-unary.cu b/dynet/gpu-nodes-arith-unary.cu
index 15198bef2..d5e6c6917 100644
--- a/dynet/gpu-nodes-arith-unary.cu
+++ b/dynet/gpu-nodes-arith-unary.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-unary-arith.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-arith-unary.cc"
diff --git a/dynet/gpu-nodes-concat.cu b/dynet/gpu-nodes-concat.cu
new file mode 100644
index 000000000..2fcfc98c4
--- /dev/null
+++ b/dynet/gpu-nodes-concat.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-concat.cc"
diff --git a/dynet/gpu-nodes-const.cu b/dynet/gpu-nodes-const.cu
new file mode 100644
index 000000000..8a28ebe56
--- /dev/null
+++ b/dynet/gpu-nodes-const.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-const.cc"
diff --git a/dynet/gpu-nodes-conv.cu b/dynet/gpu-nodes-conv.cu
index 451f71b36..0bff1eca4 100644
--- a/dynet/gpu-nodes-conv.cu
+++ b/dynet/gpu-nodes-conv.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-conv.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-conv.cc"
diff --git a/dynet/gpu-nodes-conv2d.cu b/dynet/gpu-nodes-conv2d.cu
index 347aaadcf..cc2f78e4f 100644
--- a/dynet/gpu-nodes-conv2d.cu
+++ b/dynet/gpu-nodes-conv2d.cu
@@ -1 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
 #include "nodes-conv2d.cc"
diff --git a/dynet/gpu-nodes-dropout.cu b/dynet/gpu-nodes-dropout.cu
new file mode 100644
index 000000000..3911d2bc1
--- /dev/null
+++ b/dynet/gpu-nodes-dropout.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-dropout.cc"
diff --git a/dynet/gpu-nodes-flow.cu b/dynet/gpu-nodes-flow.cu
new file mode 100644
index 000000000..27cfed8c8
--- /dev/null
+++ b/dynet/gpu-nodes-flow.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-flow.cc"
diff --git a/dynet/gpu-nodes-linalg.cu b/dynet/gpu-nodes-linalg.cu
new file mode 100644
index 000000000..cbebed454
--- /dev/null
+++ b/dynet/gpu-nodes-linalg.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-linalg.cc"
diff --git a/dynet/gpu-nodes-logsumexp.cu b/dynet/gpu-nodes-logsumexp.cu
new file mode 100644
index 000000000..f7abe4950
--- /dev/null
+++ b/dynet/gpu-nodes-logsumexp.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-logsumexp.cc"
diff --git a/dynet/gpu-nodes-losses.cu b/dynet/gpu-nodes-losses.cu
new file mode 100644
index 000000000..4bb8863fd
--- /dev/null
+++ b/dynet/gpu-nodes-losses.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-losses.cc"
diff --git a/dynet/gpu-nodes-maxpooling2d.cu b/dynet/gpu-nodes-maxpooling2d.cu
index ea93114c9..090ef624e 100644
--- a/dynet/gpu-nodes-maxpooling2d.cu
+++ b/dynet/gpu-nodes-maxpooling2d.cu
@@ -1 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
 #include "nodes-maxpooling2d.cc"
diff --git a/dynet/gpu-nodes-minmax.cu b/dynet/gpu-nodes-minmax.cu
new file mode 100644
index 000000000..dcac97cc4
--- /dev/null
+++ b/dynet/gpu-nodes-minmax.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-minmax.cc"
diff --git a/dynet/gpu-nodes-moments.cu b/dynet/gpu-nodes-moments.cu
new file mode 100644
index 000000000..253a0860f
--- /dev/null
+++ b/dynet/gpu-nodes-moments.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-moments.cc"
diff --git a/dynet/gpu-nodes-normalization.cu b/dynet/gpu-nodes-normalization.cu
new file mode 100644
index 000000000..16d4a3048
--- /dev/null
+++ b/dynet/gpu-nodes-normalization.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-normalization.cc"
diff --git a/dynet/gpu-nodes-norms.cu b/dynet/gpu-nodes-norms.cu
index 4fa94dc81..470e1f97c 100644
--- a/dynet/gpu-nodes-norms.cu
+++ b/dynet/gpu-nodes-norms.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-norms.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-norms.cc"
diff --git a/dynet/gpu-nodes-random.cu b/dynet/gpu-nodes-random.cu
new file mode 100644
index 000000000..7ef0d2564
--- /dev/null
+++ b/dynet/gpu-nodes-random.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-random.cc"
diff --git a/dynet/gpu-nodes-select.cu b/dynet/gpu-nodes-select.cu
new file mode 100644
index 000000000..25871a1bb
--- /dev/null
+++ b/dynet/gpu-nodes-select.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-select.cc"
diff --git a/dynet/gpu-nodes-similarities.cu b/dynet/gpu-nodes-similarities.cu
index 068d0cec7..933edb421 100644
--- a/dynet/gpu-nodes-similarities.cu
+++ b/dynet/gpu-nodes-similarities.cu
@@ -1,3 +1,3 @@
-// This is a dummy file that contains the same content as nodes-similarities.cc but compiled
+// This is a dummy file that contains the same content as nodes.cc but compiled
 // on CUDA
 #include "nodes-similarities.cc"
diff --git a/dynet/gpu-nodes-softmaxes.cu b/dynet/gpu-nodes-softmaxes.cu
new file mode 100644
index 000000000..43730a67b
--- /dev/null
+++ b/dynet/gpu-nodes-softmaxes.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-softmaxes.cc"
diff --git a/dynet/gpu-nodes-trig.cu b/dynet/gpu-nodes-trig.cu
new file mode 100644
index 000000000..47449a016
--- /dev/null
+++ b/dynet/gpu-nodes-trig.cu
@@ -0,0 +1,3 @@
+// This is a dummy file that contains the same content as nodes.cc but compiled
+// on CUDA
+#include "nodes-trig.cc"
diff --git a/dynet/nodes-conv2d.cc b/dynet/nodes-conv2d.cc
index 0a7cdedc9..b5f554152 100644
--- a/dynet/nodes-conv2d.cc
+++ b/dynet/nodes-conv2d.cc
@@ -16,6 +16,7 @@
 #if HAVE_CUDA
 #include "dynet/cuda.h"
 #include "dynet/gpu-ops.h"
+#include "dynet/cudnn-ops.h"
 #endif
 
 using namespace std;
diff --git a/dynet/nodes-conv2d.h b/dynet/nodes-conv2d.h
index 484c3222a..fc7140cf5 100644
--- a/dynet/nodes-conv2d.h
+++ b/dynet/nodes-conv2d.h
@@ -4,6 +4,10 @@
 #include "dynet/dynet.h"
 #include "dynet/nodes-macros.h"
 
+#if HAVE_CUDNN
+#include "dynet/cudnn-ops.h"
+#endif
+
 namespace dynet {
 
 // conv2d 
diff --git a/dynet/nodes-losses.cc b/dynet/nodes-losses.cc
index c67a3da2d..a0d498832 100644
--- a/dynet/nodes-losses.cc
+++ b/dynet/nodes-losses.cc
@@ -26,6 +26,8 @@ Dim PairwiseRankLoss::dim_forward(const vector<Dim>& xs) const {
   return xs[0].bd >= xs[1].bd ? xs[0] : xs[1];
 }
 
+#endif
+
 template<class MyDevice>
 void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector<const Tensor*>& xs, Tensor& fx) const {
   fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin));
@@ -46,8 +48,6 @@ void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev,
 }
 DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss)
 
-#endif
-
 // ************* BinaryLogLoss *************
 
 #ifndef __CUDACC__
diff --git a/dynet/nodes-maxpooling2d.h b/dynet/nodes-maxpooling2d.h
index 20bfff67d..1172b14dc 100644
--- a/dynet/nodes-maxpooling2d.h
+++ b/dynet/nodes-maxpooling2d.h
@@ -4,6 +4,10 @@
 #include "dynet/dynet.h"
 #include "dynet/nodes-macros.h"
 
+#if HAVE_CUDNN
+#include "dynet/cudnn-ops.h"
+#endif
+
 namespace dynet {
 
 // maxpooling2d