From 358c37bea2e39f0c53706f8d08fa19b83bcaace8 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 10 Jul 2017 08:33:50 -0400 Subject: [PATCH 1/3] Finished separating nodes Former-commit-id: cd2a8c77af19d24f5493aca98e941406f98c8556 --- doc/source/code_style.rst | 24 +- dynet/CMakeLists.txt | 59 +- ...nary-arith.cu => gpu-nodes-arith-unary.cu} | 2 +- dynet/nodes-activations.cc | 181 ++ dynet/nodes-arith-const.cc | 111 + dynet/nodes-arith-cwise.cc | 188 ++ dynet/nodes-arith-scalar.cc | 173 ++ dynet/nodes-arith-sum.cc | 348 +++ ...es-unary-arith.cc => nodes-arith-unary.cc} | 33 + dynet/nodes-common.cc | 996 -------- dynet/nodes-concat.cc | 148 ++ dynet/nodes-const.cc | 42 + dynet/nodes-conv.cc | 313 +-- dynet/nodes-conv.h | 39 +- dynet/nodes-dropout.cc | 187 ++ dynet/nodes-flow.cc | 151 ++ dynet/nodes-linalg.cc | 224 ++ dynet/nodes-logsumexp.cc | 115 + dynet/nodes-losses.cc | 123 + dynet/nodes-minmax.cc | 252 ++ dynet/nodes-moments.cc | 440 ++++ dynet/nodes-normalization.cc | 54 + dynet/nodes-random.cc | 184 ++ dynet/nodes-select.cc | 333 +++ dynet/nodes-softmaxes.cc | 362 +++ dynet/nodes-trig.cc | 43 + dynet/nodes.cc | 2200 ----------------- dynet/nodes.h | 21 +- 28 files changed, 3876 insertions(+), 3470 deletions(-) rename dynet/{gpu-nodes-unary-arith.cu => gpu-nodes-arith-unary.cu} (76%) create mode 100644 dynet/nodes-activations.cc create mode 100644 dynet/nodes-arith-const.cc create mode 100644 dynet/nodes-arith-cwise.cc create mode 100644 dynet/nodes-arith-scalar.cc create mode 100644 dynet/nodes-arith-sum.cc rename dynet/{nodes-unary-arith.cc => nodes-arith-unary.cc} (87%) delete mode 100644 dynet/nodes-common.cc create mode 100644 dynet/nodes-concat.cc create mode 100644 dynet/nodes-const.cc create mode 100644 dynet/nodes-dropout.cc create mode 100644 dynet/nodes-flow.cc create mode 100644 dynet/nodes-linalg.cc create mode 100644 dynet/nodes-logsumexp.cc create mode 100644 dynet/nodes-losses.cc create mode 100644 dynet/nodes-minmax.cc create mode 100644 dynet/nodes-moments.cc create mode 100644 dynet/nodes-normalization.cc create mode 100644 dynet/nodes-random.cc create mode 100644 dynet/nodes-select.cc create mode 100644 dynet/nodes-softmaxes.cc create mode 100644 dynet/nodes-trig.cc delete mode 100644 dynet/nodes.cc diff --git a/doc/source/code_style.rst b/doc/source/code_style.rst index b28ebbc8e..a65416258 100644 --- a/doc/source/code_style.rst +++ b/doc/source/code_style.rst @@ -8,7 +8,29 @@ Coding Tips One of the most common things that one will want to do to modify DyNet is to add a new operation to calculate a new function. You can find more information on how to do so at the end of the tutorial slides -`here `_. +`here `_ (note that some file +names are old). + +Taking a look at the existing operations in the ``nodes-XXX.h`` and ``nodes-XXX.cc`` files +will be the best guide in creating new operations. Here are some fine-grained tips for +those that want to dive into the process. + +1. ``fx`` is a pointer to the (preallocated) location for the result + of forward to be stored +2. ``fx`` is not initialized, so after calling forward ``fx`` must contain the correct answer +3. dEdxi MUST **ACCUMULATE** a result since multiple calls to forward may depend on + the same ``x_i``. Even, e.g., Identity must be implemented as ``dEdx1 += dEdf``. +4. scalars results of forward are placed in ``fx.v[0]`` +5. DyNet manages its own memory, not Eigen, and it is configured with the + EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate + memory, it is (probably) because of an implicit creation of a temporary variable. + If you really do need a temporary variable, its capacity must be requested by + Node::aux_storage_size + +And here are some notes on debugging problems with new operations + +1. fx is uninitialized when forward is called- are you relying on it being 0? +2. dEdxi must accumulate (see point 3 above!) Coding Practices ---------------- diff --git a/dynet/CMakeLists.txt b/dynet/CMakeLists.txt index 160ad6aa0..5888c7332 100644 --- a/dynet/CMakeLists.txt +++ b/dynet/CMakeLists.txt @@ -3,11 +3,11 @@ set(dynet_library_SRCS aligned-mem-pool.cc cfsm-builder.cc - dynet.cc deep-lstm.cc devices.cc dict.cc dim.cc + dynet.cc exec.cc expr.cc fast-lstm.cc @@ -17,34 +17,51 @@ set(dynet_library_SRCS gru.cc hsm-builder.cc init.cc + io.cc lstm.cc mem.cc model.cc - nodes.cc - nodes-common.cc + nodes-activations.cc + nodes-affinetransform.cc + nodes-arith-const.cc + nodes-arith-cwise.cc + nodes-arith-scalar.cc + nodes-arith-sum.cc + nodes-arith-unary.cc + nodes-concat.cc + nodes-const.cc nodes-contract.cc nodes-conv.cc nodes-conv2d.cc + nodes-dropout.cc + nodes-flow.cc + nodes-hinge.cc + nodes-linalg.cc + nodes-logsumexp.cc + nodes-losses.cc + nodes-matrixmultiply.cc nodes-maxpooling2d.cc + nodes-minmax.cc + nodes-moments.cc + nodes-normalization.cc + nodes-norms.cc nodes-pickneglogsoftmax.cc - nodes-matrixmultiply.cc - nodes-hinge.cc - nodes-affinetransform.cc + nodes-random.cc + nodes-select.cc nodes-similarities.cc - nodes-norms.cc - nodes-unary-arith.cc + nodes-softmaxes.cc + nodes-trig.cc param-init.cc param-nodes.cc pretrain.cc - rnn.cc rnn-state-machine.cc + rnn.cc saxe-init.cc shadow-params.cc tensor.cc training.cc treelstm.cc weight-decay.cc - io.cc ) if(ENABLE_BOOST) list(APPEND dynet_library_SRCS mp.cc) @@ -53,14 +70,18 @@ endif() # Headers: set(dynet_library_HDRS aligned-mem-pool.h - cfsm-builder.h - cudnn-ops.h c2w.h - dynet.h + cfsm-builder.h + cuda-matrix-multiply.h cuda.h + cudnn-ops.h + deep-lstm.h devices.h dict.h dim.h + dynet-helper.h + dynet.h + except.h exec.h expr.h fast-lstm.h @@ -68,31 +89,35 @@ set(dynet_library_HDRS globals.h gpu-kernels.h gpu-ops.h + grad-check.h graph.h gru.h hsm-builder.h init.h + io.h lstm.h mem.h model.h - nodes.h nodes-contract.h nodes-conv.h + nodes-macros.h + nodes.h op-helper.h + param-init.h param-nodes.h + pretrain.h rnn-state-machine.h rnn.h saxe-init.h shadow-params.h + sig.h simd-functors.h + str-util.h tensor.h timing.h training.h treelstm.h - except.h - nodes-macros.h weight-decay.h - io.h ) if(ENABLE_BOOST) list(APPEND dynet_library_HDRS mp.h) diff --git a/dynet/gpu-nodes-unary-arith.cu b/dynet/gpu-nodes-arith-unary.cu similarity index 76% rename from dynet/gpu-nodes-unary-arith.cu rename to dynet/gpu-nodes-arith-unary.cu index b53030f48..15198bef2 100644 --- a/dynet/gpu-nodes-unary-arith.cu +++ b/dynet/gpu-nodes-arith-unary.cu @@ -1,3 +1,3 @@ // This is a dummy file that contains the same content as nodes-unary-arith.cc but compiled // on CUDA -#include "nodes-unary-arith.cc" +#include "nodes-arith-unary.cc" diff --git a/dynet/nodes-activations.cc b/dynet/nodes-activations.cc new file mode 100644 index 000000000..678626ba9 --- /dev/null +++ b/dynet/nodes-activations.cc @@ -0,0 +1,181 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" +#include "dynet/simd-functors.h" + +using namespace std; + +namespace dynet { + +// ************* Rectify ************* + +#ifndef __CUDACC__ + +string Rectify::as_string(const vector& arg_names) const { + ostringstream s; + s << "ReLU(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Rectify::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Rectify"); + return xs[0]; +} + +#endif + +template +void Rectify::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Rectify::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(0.f); +} + +template +void Rectify::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); +} +DYNET_NODE_INST_DEV_IMPL(Rectify) + +// ************* LogisticSigmoid ************* + +#ifndef __CUDACC__ + +string LogisticSigmoid::as_string(const vector& arg_names) const { + ostringstream s; + s << "\\sigma(" << arg_names[0] << ')'; + return s.str(); +} + +Dim LogisticSigmoid::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in LogisticSigmoid") + return xs[0]; +} + +#endif + +template +void LogisticSigmoid::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogisticSigmoid::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(scalar_logistic_sigmoid_op()); +} + +template +void LogisticSigmoid::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_logistic_sigmoid_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(LogisticSigmoid) + +// ************* SoftSign ************* + +#ifndef __CUDACC__ + +string SoftSign::as_string(const vector& arg_names) const { + ostringstream s; + s << "softsign(" << arg_names[0] << ')'; + return s.str(); +} + +Dim SoftSign::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SoftSign"); + DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in SoftSign: " << xs); + return xs[0]; +} + +#endif + +template +void SoftSign::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SoftSign::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(FSoftSign()); +} + +template +void SoftSign::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FSoftSignBackward()); +} +DYNET_NODE_INST_DEV_IMPL(SoftSign) + +// ************* Erf ************* + +#ifndef __CUDACC__ + +string Erf::as_string(const vector& arg_names) const { + ostringstream s; + s << "erf(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Erf::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Erf") + return xs[0]; +} + +#endif + +template +void Erf::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().erf(); +} + +template +void Erf::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().binaryExpr(dEdf.tvec(), scalar_erf_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(Erf) + +// ************* ExponentialLinearUnit ************* + +#ifndef __CUDACC__ + +string ExponentialLinearUnit::as_string(const vector& arg_names) const { + ostringstream s; + s << "ELU(" << arg_names[0] << ", lambda=" << lambda << ", alpha=" << alpha << ')'; + return s.str(); +} + +Dim ExponentialLinearUnit::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ExponentialLinearUnit"); + return xs[0]; +} + +#endif + +template +void ExponentialLinearUnit::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in ExponentialLinearUnit::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(FELUForward(alpha, lambda));; +} + +template +void ExponentialLinearUnit::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().binaryExpr(dEdf.tvec(), FELUBackward(alpha, lambda)); +} +DYNET_NODE_INST_DEV_IMPL(ExponentialLinearUnit) + +} diff --git a/dynet/nodes-arith-const.cc b/dynet/nodes-arith-const.cc new file mode 100644 index 000000000..ce7818e71 --- /dev/null +++ b/dynet/nodes-arith-const.cc @@ -0,0 +1,111 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" +#include "dynet/simd-functors.h" + +using namespace std; + +namespace dynet { + +// ************* ConstantMinusX ************* + +#ifndef __CUDACC__ + +string ConstantMinusX::as_string(const vector& arg_names) const { + ostringstream s; + s << c << " - " << arg_names[0]; + return s.str(); +} + +Dim ConstantMinusX::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantMinusX") + return xs[0]; +} + +#endif + +template +void ConstantMinusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_minus_op(c)); +} + +template +void ConstantMinusX::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(ConstantMinusX) + +// ************* ConstantPlusX ************* + +#ifndef __CUDACC__ + +string ConstantPlusX::as_string(const vector& arg_names) const { + ostringstream s; + s << c << " + " << arg_names[0]; + return s.str(); +} + +Dim ConstantPlusX::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantPlusX") + return xs[0]; +} + +#endif + +template +void ConstantPlusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_add_op(c)); +} + +template +void ConstantPlusX::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(ConstantPlusX) + +// ************* ConstScalarMultiply ************* + +#ifndef __CUDACC__ + +string ConstScalarMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " * " << alpha; + return s.str(); +} + +Dim ConstScalarMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "ConstScalarMultiply expects one argument: " << xs); + return xs[0]; +} + +#endif + +template +void ConstScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * alpha; +} + +template +void ConstScalarMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in ConstScalarMultiply"); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * alpha; +} +DYNET_NODE_INST_DEV_IMPL(ConstScalarMultiply) + +} diff --git a/dynet/nodes-arith-cwise.cc b/dynet/nodes-arith-cwise.cc new file mode 100644 index 000000000..26ac47b0e --- /dev/null +++ b/dynet/nodes-arith-cwise.cc @@ -0,0 +1,188 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* CwiseMultiply ************* + +#ifndef __CUDACC__ + +string CwiseMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " \\cdot " << arg_names[1]; + return s.str(); +} + +Dim CwiseMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseMultiply") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), + "Mismatched input dimensions in CwiseMultiply: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +int CwiseMultiply::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { + // TODO: This does not handle the case where dimensions differ + Sig s(nt::cmult); + return cg.nodes[args[0]]->dim == cg.nodes[args[1]]->dim ? sm.get_idx(s) : 0; +} + +std::vector CwiseMultiply::autobatch_concat(const ComputationGraph & cg) const { + return vector(2, 1); +} + +#endif + +template +void CwiseMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseMultiply::forward (cmult)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * xs[1]->tvec(); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + if(xs[0]->d.bd == 1) + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast) * xs[1]->tbvec(); + else + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * xs[1]->tbvec().broadcast(bcast); + } +} + +template +void CwiseMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in CwiseMultiply::backward (cmult)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[1-i]->tvec(); + } else if(xs[1-i]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[1-i]->tbvec().broadcast(bcast); + } else { + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[1-i]->tbvec()).sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(CwiseMultiply) + +// ************* CwiseQuotient ************* + +#ifndef __CUDACC__ + +string CwiseQuotient::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " / " << arg_names[1]; + return s.str(); +} + +Dim CwiseQuotient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseQuotient") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), "Bad input dimensions in CwiseQuotient: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +#endif + +template +void CwiseQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseQuotient::forward (cdiv)"); + if(xs[0]->d.bd == xs[1]->d.bd) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec() / xs[1]->tvec(); + } else if(xs[0]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>().broadcast(bcast) / xs[1]->tb<1>(); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>() / xs[1]->tb<1>().broadcast(bcast); + } +} + +template +void CwiseQuotient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in CwiseQuotient::backward (cdiv)"); + if (i == 0) { + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() / xs[1]->tvec(); + } else if(xs[1]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<1>() / xs[1]->tb<1>().broadcast(bcast); + } else { + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.t<1>().device(*dev.edevice) += (dEdf.tb<1>() / xs[1]->tb<1>()).sum(red_axis); + } + } else { // i = 1 + if(xs[0]->d.bd == xs[1]->d.bd) { + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec() / xs[1]->tvec().square() * xs[0]->tvec(); + } else if(xs[1]->d.bd == 1) { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + Eigen::array red_axis; red_axis[0] = 1; + dEdxi.t<1>().device(*dev.edevice) -= (dEdf.tb<1>() / xs[1]->tb<1>().square().broadcast(bcast) * xs[0]->tb<1>()).sum(red_axis); + } else { + Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; + dEdxi.tb<1>().device(*dev.edevice) -= dEdf.tb<1>() / xs[1]->tb<1>().square() * xs[0]->tb<1>().broadcast(bcast); + } + } +} +DYNET_NODE_INST_DEV_IMPL(CwiseQuotient) + +// ************* Pow ************* + +#ifndef __CUDACC__ + +string Pow::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " ** " << arg_names[1]; + return s.str(); +} + +Dim Pow::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in Pow") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].truncate().single_batch().size() == 1, "Bad input dimensions in Pow: " << xs); + return d; +} + +#endif + +template +void Pow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::forward"); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().pow(as_scalar(*xs[1])); +} + +template +void Pow::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::backward"); + real x2 = as_scalar(*xs[1]); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().pow(x2 - 1) * dEdf.tvec() * x2; + } else { +#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) + DYNET_RUNTIME_ERR("CUDA memory allocation in Pow"); +#endif + // y = a^x + // dy/dx = a^x * log(a) + dEdxi.t<0>().device(*dev.edevice) += (fx.tvec() * xs[0]->tvec().log() * dEdf.tvec()).sum(); + } +} +DYNET_NODE_INST_DEV_IMPL(Pow) + +} diff --git a/dynet/nodes-arith-scalar.cc b/dynet/nodes-arith-scalar.cc new file mode 100644 index 000000000..baca3fe0a --- /dev/null +++ b/dynet/nodes-arith-scalar.cc @@ -0,0 +1,173 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* ScalarAdd ************* + +#ifndef __CUDACC__ + +string ScalarAdd::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " + " << arg_names[1]; + return s.str(); +} + +Dim ScalarAdd::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarAdd") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].batch_size() == 1, + "Mismatched input dimensions in ScalarAdd: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +#endif + +template +void ScalarAdd::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarAdd::forward (+)"); + Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) + xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarAdd::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarAdd::backward (+)"); + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec(); + } else { + if (xs[1]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axes_01); + else + dEdxi.tb<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_0); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarAdd) + +// ************* ScalarMultiply ************* + +#ifndef __CUDACC__ + +string ScalarMultiply::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " \\cdot " << arg_names[1]; + return s.str(); +} + +Dim ScalarMultiply::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarMultiply") + Dim d = xs[1]; + DYNET_ARG_CHECK(xs[0].batch_size() == 1, + "Mismatched input dimensions in ScalarMultiply: " << xs); + d.bd = max(xs[0].bd, d.bd); + return d; +} + +#endif + +template +void ScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarMultiply::forward (cmult)"); + + Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {1, (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) * xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarMultiply::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarMultiply::backward (cmult)"); + Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int)( fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {1, (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axes_01); + else + dEdxi.tb<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axis_0); + } else { + if (xs[1]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0)).sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarMultiply) + +// ************* ScalarQuotient ************* + +#ifndef __CUDACC__ + +string ScalarQuotient::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " / " << arg_names[1]; + return s.str(); +} + +Dim ScalarQuotient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarQuotient") + Dim d = xs[0].truncate(); + DYNET_ARG_CHECK(xs[1].batch_size() == 1, + "Mismatched input dimensions in ScalarQuotient: " << xs); + d.bd = max(xs[1].bd, d.bd); + return d; +} + +#endif + +template +void ScalarQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarQuotient::forward (cdiv)"); + Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) / xs[1]->tbvec().broadcast(bcast_1); +} + +template +void ScalarQuotient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in ScalarQuotient::backward (cdiv)"); + Eigen::array bcast = {(int)fx.d.batch_size(), (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; + Eigen::array bcast2 = {1, (int)(fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; + Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; + Eigen::array red_axes_01 = {0, 1}; + if (i == 0) { + if (xs[0]->d.bd == 1) + dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast)).sum(red_axis_1); + else + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast); + } else { + if (xs[1]->d.bd == 1) + dEdxi.t<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axes_01) / xs[1]->t<0>().square(); + else + dEdxi.tb<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axis_0) / xs[1]->tb<0>().square(); + } +} +DYNET_NODE_INST_DEV_IMPL(ScalarQuotient) + +} diff --git a/dynet/nodes-arith-sum.cc b/dynet/nodes-arith-sum.cc new file mode 100644 index 000000000..d3b96f24f --- /dev/null +++ b/dynet/nodes-arith-sum.cc @@ -0,0 +1,348 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* Sum ************* + +#ifndef __CUDACC__ + +string Sum::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << " + " << arg_names[i]; + return s.str(); +} + +Dim Sum::dim_forward(const vector& xs) const { + Dim d = xs[0].truncate(); + unsigned int batch = d.bd; + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), + "Mismatched input dimensions in Sum: " << xs); + batch = max(xs[i].bd, batch); + } + d = xs[0]; d.bd = batch; + return d; +} + +int Sum::autobatch_sig(const ComputationGraph &cg, SigMap &sm) const { + Sig s(nt::sum); + s.add_node(args.size()); + // Two cases: + // If unbatched, it's just an elementwise addition + // TODO: This will be more efficient if we identify arguments that are used + // multiple times (e.g. bias vectors) + if(dim.bd == 1) { + s.add_int(-2); + // Otherwise, make sure the dimensions match and that batched nodes don't intersect + } else { + s.add_dim(dim); + for(auto ai : args) { + s.add_int(cg.nodes[ai]->dim.bd == 1 ? ai : -1); + } + } + return sm.get_idx(s); +} + +std::vector Sum::autobatch_concat(const ComputationGraph & cg) const { + vector ret(args.size(), 1); + // If batched, true if multiple batched input as well + if(dim.bd != 1) + for(size_t i = 0; i < args.size(); ++i) + ret[i] = cg.nodes[args[i]]->dim.bd == 1 ? 0 : 1; + return ret; +} + +#endif + +template +void Sum::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const unsigned num_args = xs.size(); + if (num_args == 1) + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + else if (num_args == 2 && xs[0]->d.bd == xs[1]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); + else if (num_args == 3 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); + else if (num_args == 4 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd && xs[2]->d.bd == xs[3]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + else { + bool allSameBatchSize = std::all_of(xs.begin(), xs.end(), [&](const Tensor* x) { return x->d.bd == xs[0]->d.bd;}); + if (allSameBatchSize) { + // Since they are all the same batch size, we can easily unroll the addition (results in lower GPU latency by merging multiple adds together in one CUDA call): + DYNET_ASSERT(num_args > 4, "Bad loop unrolling in Sum::forward"); // If it was <=4, we would have handled it in the special cases above + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + + const unsigned remainder = (num_args - 4 ) % 4; + switch (remainder) { + case 0: break; + case 1: fx.tvec().device(*dev.edevice) += xs[4]->tvec(); break; + case 2: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec(); break; + case 3: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec() + xs[6]->tvec(); break; + } + for (unsigned i = 4 + remainder; i < num_args; i += 4) + fx.tvec().device(*dev.edevice) += xs[i]->tvec() + xs[i + 1]->tvec() + xs[i + 2]->tvec() + xs[i + 3]->tvec(); + } + else { + // Not all the same batch size, so need to broadcast in the cases where they differ + TensorTools::zero(fx); +#ifdef __CUDACC__ + Eigen::array bcast({ 1, (int)fx.d.bd }); +#endif + for (unsigned i = 0; i < num_args; ++i) { + if (xs[i]->d.bd == fx.d.bd) { + fx.tvec().device(*dev.edevice) += xs[i]->tvec(); + } + else { +#ifdef __CUDACC__ + fx.tbvec().device(*dev.edevice) += xs[i]->tbvec().broadcast(bcast); +#else + for (unsigned b = 0; b < fx.d.bd; ++b) + fx.tbvec().chip<1>(b).device(*dev.edevice) += xs[i]->tvec(); +#endif + } + } + } + } +} + +template +void Sum::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if(dEdxi.d.bd == fx.d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(Sum) + +// ************* SumElements ************* + +#ifndef __CUDACC__ + +string SumElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_elems( " << arg_names[0] << " )"; + return s.str(); +} + +Dim SumElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumElements") + return Dim({1}, xs[0].bd); +} + +#endif + +template +void SumElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumElements::forward"); + Eigen::array red_axis; red_axis[0] = 0; + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis); +} + +template +void SumElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumElements::backward"); + Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(SumElements) + +// ************* SumDimension ************* + +#ifndef __CUDACC__ + +string SumDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_dim(matrix=" << arg_names[0] << ',' << dimension << '}'; + return s.str(); +} + +Dim SumDimension::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Dim ret(xs[0]); + ret.delete_dim(dimension); + return ret; +} + +#endif + +template +void SumDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); + Eigen::array reduction_axis = {(int)dimension}; + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis); +} + +template +void SumDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // TODO: limit to 3-dimensional tensor is arbitrary + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = dEdxi.d[dimension]; + Eigen::array morph = {(int)dEdxi.d[0],(int)dEdxi.d[1],(int)dEdxi.d[2],(int)dEdxi.d.bd}; morph[dimension] = 1; + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>().reshape(morph).broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(SumDimension) + +// ************* SumBatches ************* + +#ifndef __CUDACC__ + +string SumBatches::as_string(const vector& arg_names) const { + ostringstream s; + s << "sum_batches( " << arg_names[0] << " )"; + return s.str(); +} + +Dim SumBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumBatches") + return xs[0].single_batch(); +} + +#endif + +template +void SumBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumBatches::forward"); + unsigned num_args = xs[0]->d.bd; +#ifdef __CUDACC__ + Eigen::array red_axis; red_axis[0] = 2; + fx.t<2>().device(*dev.edevice) = xs[0]->tb<2>().sum(red_axis); +#else + // TODO: Is this CPU version really good? Overhead can probably be reduced. + auto res = *fx; + const unsigned remainder = num_args % 4; + switch (remainder) { + case 0: res.setZero(); break; + case 1: res = xs[0]->batch_matrix(0); break; + case 2: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1); break; + case 3: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1) + xs[0]->batch_matrix(2); break; + } + for (unsigned i = remainder; i < num_args; i += 4) + res += xs[0]->batch_matrix(i) + xs[0]->batch_matrix(i+1) + xs[0]->batch_matrix(i+2) + xs[0]->batch_matrix(i+3); +#endif +} + +template +void SumBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumBatches::backward"); +#ifdef __CUDACC__ + Eigen::array bcast({1, 1, (int)fx.d.bd}); + dEdxi.tb<2>().device(*dev.edevice) += dEdf.tb<2>().broadcast(bcast); +#else + for (unsigned i = 0; i < dEdxi.d.bd; ++i) + dEdxi.batch_matrix(i) += *dEdf; +#endif +} +DYNET_NODE_INST_DEV_IMPL(SumBatches) + +// ************* AddVectorToAllColumns ************* + +#ifndef __CUDACC__ + +string AddVectorToAllColumns::as_string(const vector& arg_names) const { + ostringstream os; + os << "colwise_add(" << arg_names[0] << ", " << arg_names[1] << ')'; + return os.str(); +} + +Dim AddVectorToAllColumns::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && + xs[0].rows() == xs[1].rows() && + xs[0].ndims() == 2 && + (xs[1].ndims() == 1 || (xs[1].ndims() == 2 && xs[1].cols() == 1)), + "Bad input dimensions in AddVectorToAllColumns: " << xs); + return Dim({xs[0][0], xs[0][1]}, max(xs[0].bd,xs[1].bd)); +} + +#endif + +template +void AddVectorToAllColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + // Broadcasting is slow on CPU, so split codepaths +#ifdef __CUDACC__ + if(xs[0]->d.bd >= xs[1]->d.bd) { + Eigen::array bcasts = {1, (int)xs[0]->d[1], (int)(xs[0]->d.bd/xs[1]->d.bd)}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() + xs[1]->tb<2>().broadcast(bcasts); + } else { + DYNET_ASSERT(xs[0]->d.bd == 1, + "Bad dimensions in AddVectorToAllColumns::forward: " << xs[0]->d << ", " << xs[1]->d); + Eigen::array bcasts0 = {1, 1, (int)xs[1]->d.bd}; + Eigen::array bcasts1 = {1, (int)xs[0]->d[1], 1}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>().broadcast(bcasts0) + xs[1]->tb<2>().broadcast(bcasts1); + } +#else + // First, add the matrix + if(xs[0]->d.bd == fx.d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + else + for(size_t b = 0; b < fx.d.bd; ++b) + fx.tbvec().chip<1>(b).device(*dev.edevice) = xs[0]->tvec(); + // Second, add the columns + if(xs[1]->d.bd == fx.d.bd) { + for(size_t i = 0; i < xs[0]->d[1]; ++i) + fx.tb<2>().chip<1>(i).device(*dev.edevice) += xs[1]->tb<1>(); + } else { + for(size_t b = 0; b < fx.d.bd; ++b) + for(size_t i = 0; i < fx.d[1]; ++i) + fx.tb<2>().chip<2>(b).chip<1>(i).device(*dev.edevice) += xs[1]->t<1>(); + } +#endif +} + +template +void AddVectorToAllColumns::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in AddVetorToAllColumns::backward"); + // TODO: profile on CPU and see whether the chip version is better + if (i == 0) { // x + if(dEdf.d.bd == dEdxi.d.bd) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + Eigen::array red_axis = {2}; + dEdxi.t<2>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } + } else { // bias + if(dEdf.d.bd == dEdxi.d.bd) { + Eigen::array red_axis = {1}; + dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } else { + DYNET_ASSERT(dEdxi.d.bd == 1, + "Bad dimensions in AddVectorToAllColumns::backward: " << xs[0]->d << ", " << xs[1]->d); + Eigen::array red_axis = {1,2}; + dEdxi.t<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); + } + } +} +DYNET_NODE_INST_DEV_IMPL(AddVectorToAllColumns) + +} diff --git a/dynet/nodes-unary-arith.cc b/dynet/nodes-arith-unary.cc similarity index 87% rename from dynet/nodes-unary-arith.cc rename to dynet/nodes-arith-unary.cc index 4bf6a41f6..4d779279e 100644 --- a/dynet/nodes-unary-arith.cc +++ b/dynet/nodes-arith-unary.cc @@ -240,4 +240,37 @@ void Abs::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(Abs) +// ************* LogGamma ************* + +#ifndef __CUDACC__ + +string LogGamma::as_string(const vector& arg_names) const { + ostringstream os; + os << "lgamma(" << arg_names[0] << ')'; + return os.str(); +} + +Dim LogGamma::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogGamma") + return xs[0]; +} + +#endif + +template +void LogGamma::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().lgamma(); +} + +template +void LogGamma::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().digamma() * dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(LogGamma) + } diff --git a/dynet/nodes-common.cc b/dynet/nodes-common.cc deleted file mode 100644 index 9a6ab7eb4..000000000 --- a/dynet/nodes-common.cc +++ /dev/null @@ -1,996 +0,0 @@ -#include "dynet/nodes.h" - -#include -#include -#include - -#include "dynet/nodes-macros.h" -#include "dynet/globals.h" - -using namespace std; - -namespace dynet { - -string AddVectorToAllColumns::as_string(const vector& arg_names) const { - ostringstream os; - os << "colwise_add(" << arg_names[0] << ", " << arg_names[1] << ')'; - return os.str(); -} - -Dim AddVectorToAllColumns::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2 && - xs[0].rows() == xs[1].rows() && - xs[0].ndims() == 2 && - (xs[1].ndims() == 1 || (xs[1].ndims() == 2 && xs[1].cols() == 1)), - "Bad input dimensions in AddVectorToAllColumns: " << xs); - return Dim({xs[0][0], xs[0][1]}, max(xs[0].bd,xs[1].bd)); -} - -string SparsemaxLoss::as_string(const vector& arg_names) const { - ostringstream s; - s << "sparsemax(" << arg_names[0] << ", q)"; - return s.str(); -} - -Dim SparsemaxLoss::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in SparsemaxLoss: " << xs); - return Dim({1}); -} - -string Sparsemax::as_string(const vector& arg_names) const { - ostringstream s; - s << "sparsemax(" << arg_names[0] << ")"; - return s.str(); -} - -Dim Sparsemax::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in Sparsemax: " << xs); - return xs[0]; -} - -string MatrixInverse::as_string(const vector& arg_names) const { - ostringstream s; - s << "inverse(" << arg_names[0] << ")"; - return s.str(); -} - -Dim MatrixInverse::dim_forward(const vector& xs) const { - return xs[0]; -} - -string LogDet::as_string(const vector& arg_names) const { - ostringstream s; - s << "logdet(" << arg_names[0] << ")"; - return s.str(); -} - -Dim LogDet::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs[0].ndims() <= 2 && (xs[0].rows() == xs[0].cols()), "Bad arguments in LogDet: " << xs); - return Dim({1}); -} - -string SelectRows::as_string(const vector& arg_names) const { - ostringstream s; - s << "select_rows(" << arg_names[0] << ", {rsize=" << prows->size() << "})"; - return s.str(); -} - -Dim SelectRows::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Bad arguments in SelectRows: " << xs); - unsigned nrows = prows->size(); - Dim ret(xs[0]); - ret.d[0] = nrows; - return ret; -} - -string SelectCols::as_string(const vector& arg_names) const { - ostringstream s; - s << "select_cols(" << arg_names[0] << ", {csize=" << pcols->size() << "})"; - return s.str(); -} - -Dim SelectCols::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1 && xs[0].ndims() == 2, "Bad arguments in SelectCols: " << xs); - unsigned ncols = pcols->size(); - return Dim({xs[0].rows(), ncols}); -} - -string Min::as_string(const vector& arg_names) const { - ostringstream s; - s << "min{" << arg_names[0] << ", " << arg_names[1] << "}"; - return s.str(); -} - -Dim Min::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Min: " << xs); - return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; -} - -string Max::as_string(const vector& arg_names) const { - ostringstream s; - s << "max{" << arg_names[0] << ", " << arg_names[1] << "}"; - return s.str(); -} - -Dim Max::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Max: " << xs); - return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; -} - -string TraceOfProduct::as_string(const vector& arg_names) const { - ostringstream s; - s << "Tr(" << arg_names[0] << " * " << arg_names[1] << "^T)"; - return s.str(); -} - -Dim TraceOfProduct::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in TraceOfProduct: " << xs); - return Dim({1}, max(xs[0].bd, xs[1].bd)); -} - -string ConstScalarMultiply::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " * " << alpha; - return s.str(); -} - -Dim ConstScalarMultiply::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "ConstScalarMultiply expects one argument: " << xs); - return xs[0]; -} - -string Transpose::as_string(const vector& arg_names) const { - ostringstream s; - s << "transpose("<< arg_names[0] << ", "; - for(size_t i = 0; i < dims.size(); ++i) - s << (i == 0?'{':',') << dims[i]; - s << "})"; - return s.str(); -} - -Dim Transpose::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Bad arguments to Transpose: " << xs); - DYNET_ARG_CHECK(xs[0].nd == dims.size() || xs[0].num_nonone_dims() == 1, "Dimensions passed to transpose (" << dims.size() << ") must be equal to dimensions in input tensor (" << xs[0].nd << ')'); - Dim ret(xs[0]); - ret.nd = dims.size(); - for(size_t i = 0; i < dims.size(); ++i) - ret.d[i] = xs[0][dims[i]]; - return ret; -} - -string Reshape::as_string(const vector& arg_names) const { - ostringstream s; - s << "reshape(" << arg_names[0] << " --> " << to << ')'; - return s.str(); -} - -Dim Reshape::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Reshape") - if(to.size() == xs[0].size()) { - return to; - } else { - DYNET_ARG_CHECK(to.batch_elems() == 1 && to.batch_size() == xs[0].batch_size(), - "Bad arguments to Reshape: " << to << ", " << xs[0]); - Dim ret(to); - ret.bd = xs[0].batch_elems(); - return ret; - } -} - -string KMHNGram::as_string(const vector& arg_names) const { - ostringstream s; - s << "kmh-ngram(" << arg_names[0] << ')'; - return s.str(); -} - -Dim KMHNGram::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs[0].ndims() == 2, "Bad input dimensions in KMHNGram: " << xs); - const unsigned new_cols = xs[0].cols() - n + 1; - DYNET_ARG_CHECK(new_cols >= 1, "Bad input dimensions in KMHNGram: " << xs); - return Dim({xs[0][0], new_cols}); -} - -string GaussianNoise::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " + N(0," << stddev << ')'; - return s.str(); -} - -Dim GaussianNoise::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in GaussianNoise") - return xs[0]; -} - -string Dropout::as_string(const vector& arg_names) const { - ostringstream s; - s << "dropout(" << arg_names[0] << ",p=" << p << ')'; - return s.str(); -} - -Dim Dropout::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Dropout") - return xs[0]; -} - -string DropoutBatch::as_string(const vector& arg_names) const { - ostringstream s; - s << "dropout_batch(" << arg_names[0] << ",p=" << p << ')'; - return s.str(); -} - -Dim DropoutBatch::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutBatch") - return xs[0]; -} - -string DropoutDim::as_string(const vector& arg_names) const { - ostringstream s; - s << "dropout_dim(" << arg_names[0] << ",p=" << p << ')'; - return s.str(); -} - -Dim DropoutDim::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutDim") - DYNET_ARG_CHECK(xs[0].nd < 4, "DropoutDim only supports tensor up to order 3 + batch dimension, got tensor of order"< dimension, "In DropoutDim : tried to drop along dimension "<& arg_names) const { - ostringstream s; - s << "block_dropout(" << arg_names[0] << ",dropout_probability=" << dropout_probability << ')'; - return s.str(); -} - -Dim BlockDropout::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in BlockDropout") - return xs[0]; -} - -string ConstantPlusX::as_string(const vector& arg_names) const { - ostringstream s; - s << c << " + " << arg_names[0]; - return s.str(); -} - -Dim ConstantPlusX::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantPlusX") - return xs[0]; -} - -string ConstantMinusX::as_string(const vector& arg_names) const { - ostringstream s; - s << c << " - " << arg_names[0]; - return s.str(); -} - -Dim ConstantMinusX::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ConstantMinusX") - return xs[0]; -} - -string LogSumExp::as_string(const vector& arg_names) const { - ostringstream s; - s << "log(exp " << arg_names[0]; - for (unsigned i = 1; i < arg_names.size(); ++i) - s << " + exp " << arg_names[i]; - s << ")"; - return s.str(); -} - -Dim LogSumExp::dim_forward(const vector& xs) const { - Dim d = xs[0].truncate(); - for (unsigned i = 1; i < xs.size(); ++i) { - DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), - "Mismatched input dimensions in LogSumExp: " << xs); - d.bd = max(xs[i].bd, d.bd); - } - return d; -} -string Sum::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0]; - for (unsigned i = 1; i < arg_names.size(); ++i) - s << " + " << arg_names[i]; - return s.str(); -} - -int Sum::autobatch_sig(const ComputationGraph &cg, SigMap &sm) const { - Sig s(nt::sum); - s.add_node(args.size()); - // Two cases: - // If unbatched, it's just an elementwise addition - // TODO: This will be more efficient if we identify arguments that are used - // multiple times (e.g. bias vectors) - if(dim.bd == 1) { - s.add_int(-2); - // Otherwise, make sure the dimensions match and that batched nodes don't intersect - } else { - s.add_dim(dim); - for(auto ai : args) { - s.add_int(cg.nodes[ai]->dim.bd == 1 ? ai : -1); - } - } - return sm.get_idx(s); -} - -std::vector Sum::autobatch_concat(const ComputationGraph & cg) const { - vector ret(args.size(), 1); - // If batched, true if multiple batched input as well - if(dim.bd != 1) - for(size_t i = 0; i < args.size(); ++i) - ret[i] = cg.nodes[args[i]]->dim.bd == 1 ? 0 : 1; - return ret; -} - - -Dim Sum::dim_forward(const vector& xs) const { - Dim d = xs[0].truncate(); - unsigned int batch = d.bd; - for (unsigned i = 1; i < xs.size(); ++i) { - DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), - "Mismatched input dimensions in Sum: " << xs); - batch = max(xs[i].bd, batch); - } - d = xs[0]; d.bd = batch; - return d; -} - -string SumElements::as_string(const vector& arg_names) const { - ostringstream s; - s << "sum_elems( " << arg_names[0] << " )"; - return s.str(); -} - -Dim SumElements::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumElements") - return Dim({1}, xs[0].bd); -} - -string SumBatches::as_string(const vector& arg_names) const { - ostringstream s; - s << "sum_batches( " << arg_names[0] << " )"; - return s.str(); -} - -Dim SumBatches::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SumBatches") - return xs[0].single_batch(); -} - -string MomentElements::as_string(const vector& arg_names) const { - ostringstream s; - s << "moment_elems( expression=" << arg_names[0] << ", order=" << order << " )"; - return s.str(); -} - -Dim MomentElements::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentElements") - DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentElements (recieved "<& arg_names) const { - ostringstream s; - s << "moment_batches( expression=" << arg_names[0] << ", order=" << order << " )"; - return s.str(); -} - -Dim MomentBatches::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentBatches") - DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentBatches (recieved "<& arg_names) const { - ostringstream s; - s << "std_elems( expression=" << arg_names[0] << " )"; - return s.str(); -} - -Dim StdElements::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdElements") - return Dim({1}, xs[0].bd); -} - -string StdBatches::as_string(const vector& arg_names) const { - ostringstream s; - s << "std_batches( expression=" << arg_names[0] << " )"; - return s.str(); -} - -Dim StdBatches::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdBatches") - - return xs[0].single_batch(); -} - -string StdDimension::as_string(const vector& arg_names) const { - ostringstream s; - s << "moment_dim(expression=" << arg_names[0] << ',' << dimension <<'}'; - return s.str(); -} - -Dim StdDimension::dim_forward(const vector& xs) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in StdDimension"); - DYNET_ARG_CHECK(xs[0].nd <= 3, "StdDimension implemented up to tensors of order 3 (with minibatch) for now") - DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in StdDimension" ) - Dim ret(xs[0]); - ret.delete_dim(dimension); - return ret; -} - -string MomentDimension::as_string(const vector& arg_names) const { - ostringstream s; - s << "moment_dim(expression=" << arg_names[0] << ',' << dimension << ", order="<& xs) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in MomentDimension"); - DYNET_ARG_CHECK(xs[0].nd <= 3, "MomentDimension implemented up to tensors of order 3 (with minibatch) for now") - DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in MomentDimension" ) - DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentDimension (recieved "<& arg_names) const { - ostringstream s; - s << "average(" << arg_names[0]; - for (unsigned i = 1; i < arg_names.size(); ++i) - s << ", " << arg_names[i]; - s << ")"; - return s.str(); -} - -Dim Average::dim_forward(const vector& xs) const { - Dim d(xs[0]); - for (unsigned i = 1; i < xs.size(); ++i) { - DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), - "Mismatched input dimensions in Average: " << xs); - d.bd = max(xs[i].bd, d.bd); - } - return d; -} - -string Erf::as_string(const vector& arg_names) const { - ostringstream s; - s << "erf(" << arg_names[0] << ')'; - return s.str(); -} - -Dim Erf::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Erf") - return xs[0]; -} - -string Tanh::as_string(const vector& arg_names) const { - ostringstream s; - s << "tanh(" << arg_names[0] << ')'; - return s.str(); -} - -Dim Tanh::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Tanh") - return xs[0]; -} - -string LogGamma::as_string(const vector& arg_names) const { - ostringstream os; - os << "lgamma(" << arg_names[0] << ')'; - return os.str(); -} - -Dim LogGamma::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogGamma") - return xs[0]; -} - -string Concatenate::as_string(const vector& arg_names) const { - ostringstream os; - os << "concat({" << arg_names[0]; - for (unsigned i = 1; i < arg_names.size(); ++i) { - os << ',' << arg_names[i]; - } - os << "}, " << dimension << ')'; - return os.str(); -} - -Dim Concatenate::dim_forward(const vector& xs) const { - unsigned new_rows = 0; - Dim dr = xs[0]; - for (auto c : xs) { - if(dr.nd < c.nd) dr.resize(c.nd); - if(c.nd < dr.nd) c.resize(dr.nd); - new_rows += c[dimension]; - dr.set(dimension, c[dimension]); - DYNET_ARG_CHECK(dr.single_batch() == c.single_batch(), - "Bad input dimensions in Concatenate: " << xs); - dr.bd = max(dr.bd, c.bd); - } - dr.nd = max(xs[0].nd, dimension+1); - dr.set(dimension, new_rows); - return dr; -} - -int Concatenate::autobatch_sig(const ComputationGraph &cg, SigMap &sm) const { - Sig s(nt::concat); - for (auto arg:args) s.add_dim(cg.nodes[arg]->dim); - return sm.get_idx(s); -} - - -string ConcatenateToBatch::as_string(const vector& arg_names) const { - ostringstream os; - os << "concat_batch_elems(" << arg_names[0]; - for (unsigned i = 1; i < arg_names.size(); ++i) { - os << ',' << arg_names[i]; - } - os << ')'; - return os.str(); -} - -Dim ConcatenateToBatch::dim_forward(const vector& xs) const { - DYNET_ASSERT(xs.size() > 0, "Failed input count check in ConcatenateToBatch") - Dim d(xs[0]); - for (unsigned i = 1; i < xs.size(); ++i) { - DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), - "Mismatched input dimensions in ConcatenateToBatch: " << xs); - d.bd += xs[i].bd; - } - return d; -} - -string PairwiseRankLoss::as_string(const vector& arg_names) const { - ostringstream os; - os << "max(0, " << margin << " - " << arg_names[0] << " + " << arg_names[1] << ')'; - return os.str(); -} - -Dim PairwiseRankLoss::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2 && - xs[0] == xs[1] && - xs[0].rows() == 1 && - (xs[0].ndims() == 1 || xs[0].ndims() == 2), - "Bad input dimensions in PairwiseRankLoss: " << xs); - return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; -} - -string Identity::as_string(const vector& arg_names) const { - return arg_names[0]; -} - -Dim Identity::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Identity") - return xs[0]; -} - -string NoBackprop::as_string(const vector& arg_names) const { - ostringstream s; - s << "nobackprop(" << arg_names[0] << ')'; - return s.str(); -} - -Dim NoBackprop::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in NoBackprop") - return xs[0]; -} - -string FlipGradient::as_string(const vector& arg_names) const { - ostringstream s; - s << "flip_gradient(" << arg_names[0] << ')'; - return s.str(); -} - -Dim FlipGradient::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in FlipGradient"); - return xs[0]; -} - -string Softmax::as_string(const vector& arg_names) const { - ostringstream s; - s << "softmax(" << arg_names[0] << ')'; - return s.str(); -} - -Dim Softmax::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Softmax"); - DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in Softmax, must be 2 or fewer: " << xs); - return xs[0]; -} - -int Softmax::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { - Sig s(nt::softmax); - s.add_dim(dim); - return sm.get_idx(s); -} -std::vector Softmax::autobatch_concat(const ComputationGraph & cg) const { - return vector(1, 1); -} - -string SoftSign::as_string(const vector& arg_names) const { - ostringstream s; - s << "softsign(" << arg_names[0] << ')'; - return s.str(); -} - -Dim SoftSign::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in SoftSign"); - DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in SoftSign: " << xs); - return xs[0]; -} - -string LogSoftmax::as_string(const vector& arg_names) const { - ostringstream s; - s << "log_softmax(" << arg_names[0] << ')'; - return s.str(); -} - -Dim LogSoftmax::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogSoftmax") - DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in LogSoftmax, must be 2 or fewer: " << xs); - return xs[0]; -} - -string RestrictedLogSoftmax::as_string(const vector& arg_names) const { - ostringstream s; - s << "r_log_softmax(" << arg_names[0] << ')'; - return s.str(); -} - -Dim RestrictedLogSoftmax::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in RestrictedLogSoftmax") - DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in RestrictedLogSoftmax: " << xs); - return xs[0]; -} - -string PickElement::as_string(const vector& arg_names) const { - ostringstream s; - s << "pick(" << arg_names[0] << ','; - if(pval) { - s << *pval; - } else { - DYNET_ASSERT(pvals, "Have neither index nor index vector in PickElement"); - s << '['; - if(pvals->size()) { - s << (*pvals)[0]; - for(size_t i = 1; i < pvals->size(); ++i) - s << ',' << (*pvals)[i]; - } - s << "]"; - } - s << ", " << dimension << ")"; - return s.str(); -} - -Dim PickElement::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickElement"); - DYNET_ARG_CHECK(dimension < xs[0].nd, - "Tried to PickElement on dimension " << dimension << " bigger than input " << xs[0]); - DYNET_ARG_CHECK(xs[0].nd < 4, - "PickElement not currently supported for tensors of 4 or more dimensions."); - - Dim ret(xs[0]); - if (pvals){ - DYNET_ARG_CHECK(xs[0].bd == 1 || xs[0].bd == pvals->size(), - "Number of elements in the passed-in index vector (" << pvals->size() << ")" - " did not match number of elements in mini-batch elements in expression (of dimension " << xs[0].bd << ") in PickElement"); - ret.bd = pvals->size(); - } - - ret.delete_dim(dimension); - return ret; -} - -// x_1 is a vector -// y = (x_1)[start:end] -string PickRange::as_string(const vector& arg_names) const { - ostringstream s; - s << "slice(" << arg_names[0] << ',' << start << ':' << end << ", dim=" << dim << ')'; - return s.str(); -} - -Dim PickRange::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickRange"); - DYNET_ARG_CHECK(dim < xs[0].nd && start < end && xs[0][dim] >= end, - "Bad input dimensions or range in PickRange: " << xs << " range(" << start << ", " << end << ") with dim=" << dim); - Dim ret = xs[0]; ret.d[dim] = end-start; - return ret; -} - -int PickRange::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { - Sig s(nt::pickrange); - const Dim &in_dim = cg.nodes[args[0]]->dim; - s.add_dim(in_dim); - s.add_node(start); - s.add_node(end); - return sm.get_idx(s); -} - -string PickBatchElements::as_string(const vector& arg_names) const { - ostringstream s; - s << "pick_batch_elems(" << arg_names[0] << ','; - if (pval) { - s << *pval; - } else { - DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); - s << '['; - if (pvals->size()) { - s << (*pvals)[0]; - for (size_t i = 1; i < pvals->size(); ++i) - s << ',' << (*pvals)[i]; - } - s << "]"; - } - s << ")"; - return s.str(); -} - -Dim PickBatchElements::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickBatchElements") - DYNET_ARG_CHECK(xs[0].nd < 4, "PickElement not currently supported for tensors of 4 or more dimensions."); - Dim ret(xs[0]); - if (pval) { - // set batch size to one. - ret.bd = 1; - } else { - DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); - ret.bd = pvals->size(); - } - return ret; -} - -string CwiseMultiply::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " \\cdot " << arg_names[1]; - return s.str(); -} - -Dim CwiseMultiply::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseMultiply") - Dim d = xs[0].truncate(); - DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), - "Mismatched input dimensions in CwiseMultiply: " << xs); - d.bd = max(xs[1].bd, d.bd); - return d; -} - -int CwiseMultiply::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { - // TODO: This does not handle the case where dimensions differ - Sig s(nt::cmult); - return cg.nodes[args[0]]->dim == cg.nodes[args[1]]->dim ? sm.get_idx(s) : 0; -} - -std::vector CwiseMultiply::autobatch_concat(const ComputationGraph & cg) const { - return vector(2, 1); -} - -string ScalarAdd::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " + " << arg_names[1]; - return s.str(); -} - -Dim ScalarAdd::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarAdd") - Dim d = xs[0].truncate(); - DYNET_ARG_CHECK(xs[1].batch_size() == 1, - "Mismatched input dimensions in ScalarAdd: " << xs); - d.bd = max(xs[1].bd, d.bd); - return d; -} - -string ScalarMultiply::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " \\cdot " << arg_names[1]; - return s.str(); -} - -Dim ScalarMultiply::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarMultiply") - Dim d = xs[1]; - DYNET_ARG_CHECK(xs[0].batch_size() == 1, - "Mismatched input dimensions in ScalarMultiply: " << xs); - d.bd = max(xs[0].bd, d.bd); - return d; -} - -string ScalarQuotient::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " / " << arg_names[1]; - return s.str(); -} - -Dim ScalarQuotient::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in ScalarQuotient") - Dim d = xs[0].truncate(); - DYNET_ARG_CHECK(xs[1].batch_size() == 1, - "Mismatched input dimensions in ScalarQuotient: " << xs); - d.bd = max(xs[1].bd, d.bd); - return d; -} - - -string Pow::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " ** " << arg_names[1]; - return s.str(); -} - -Dim Pow::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in Pow") - Dim d = xs[0].truncate(); - DYNET_ARG_CHECK(xs[1].truncate().single_batch().size() == 1, "Bad input dimensions in Pow: " << xs); - return d; -} - -string CwiseQuotient::as_string(const vector& arg_names) const { - ostringstream s; - s << arg_names[0] << " / " << arg_names[1]; - return s.str(); -} - -Dim CwiseQuotient::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in CwiseQuotient") - Dim d = xs[0].truncate(); - DYNET_ARG_CHECK(d.single_batch() == xs[1].truncate().single_batch(), "Bad input dimensions in CwiseQuotient: " << xs); - d.bd = max(xs[1].bd, d.bd); - return d; -} - -string Rectify::as_string(const vector& arg_names) const { - ostringstream s; - s << "ReLU(" << arg_names[0] << ')'; - return s.str(); -} - -Dim Rectify::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Rectify"); - return xs[0]; -} - -string ExponentialLinearUnit::as_string(const vector& arg_names) const { - ostringstream s; - s << "ELU(" << arg_names[0] << ", lambda=" << lambda << ", alpha=" << alpha << ')'; - return s.str(); -} - -Dim ExponentialLinearUnit::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in ExponentialLinearUnit"); - return xs[0]; -} - -string PoissonRegressionLoss::as_string(const vector& arg_names) const { - ostringstream s; - s << "-log Poisson(" << pty << "; lambda=\\exp" << arg_names[0] << ')'; - return s.str(); -} - -Dim PoissonRegressionLoss::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1 && xs[0].size() == 1, "Bad input dimensions in PoissonRegressionLoss: " << xs); - return xs[0]; -} - -string LogisticSigmoid::as_string(const vector& arg_names) const { - ostringstream s; - s << "\\sigma(" << arg_names[0] << ')'; - return s.str(); -} - -Dim LogisticSigmoid::dim_forward(const vector& xs) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in LogisticSigmoid") - return xs[0]; -} - -string BinaryLogLoss::as_string(const vector& arg_names) const { - ostringstream os; - os << "binary_log_loss(" << arg_names[0] << ", " << arg_names[1] << ')'; - return os.str(); -} - -Dim BinaryLogLoss::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in BinaryLogLoss") - DYNET_ARG_CHECK(xs[0].rows() == 2 || xs[0].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); - DYNET_ARG_CHECK(xs[1].rows() == 2 || xs[1].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); - return Dim({1}, max(xs[0].bd, xs[1].bd)); -} - -string Zeroes::as_string(const vector& arg_names) const { - ostringstream s; - s << "zeroes(" << dim << ')'; - return s.str(); -} - -Dim Zeroes::dim_forward(const vector& xs) const { - return dim; -} - -string RandomNormal::as_string(const vector& arg_names) const { - ostringstream s; - s << "random_normal(" << dim << ')'; - return s.str(); -} - -Dim RandomNormal::dim_forward(const vector& xs) const { - return dim; -} - -string RandomBernoulli::as_string(const vector& arg_names) const { - ostringstream s; - s << "random_bernoulli(" << dim << ", " << p << ')'; - return s.str(); -} - -Dim RandomBernoulli::dim_forward(const vector& xs) const { - return dim; -} - -string RandomUniform::as_string(const vector& arg_names) const { - ostringstream s; - s << "random_uniform(" << dim << ", " << left << ", " << right << ')'; - return s.str(); -} - -Dim RandomUniform::dim_forward(const vector& xs) const { - return dim; -} - -string RandomGumbel::as_string(const vector& arg_names) const { - ostringstream s; - s << "random_gumbel(" << dim << ", " << mu << ", " << beta << ')'; - return s.str(); -} - -Dim RandomGumbel::dim_forward(const vector& xs) const { - return dim; -} - -string MaxDimension::as_string(const vector& arg_names) const { - ostringstream s; - s << "max_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; - return s.str(); -} - -Dim MaxDimension::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MaxDimension"); - DYNET_ARG_CHECK(reduced_dim < xs[0].nd, - "Tried to MaxDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); - DYNET_ARG_CHECK(xs[0].nd < 4, - "MaxDimension not currently supported for tensors of 4 or more dimensions."); - Dim ret(xs[0]); - ret.delete_dim(reduced_dim); - return ret; -} - -string MinDimension::as_string(const vector& arg_names) const { - ostringstream s; - s << "min_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; - return s.str(); -} - -Dim MinDimension::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MinDimension"); - DYNET_ARG_CHECK(reduced_dim < xs[0].nd, - "Tried to MinDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); - DYNET_ARG_CHECK(xs[0].nd < 4, - "MinDimension not currently supported for tensors of 4 or more dimensions."); - Dim ret(xs[0]); - ret.delete_dim(reduced_dim); - return ret; -} - -string WeightNormalization::as_string(const vector& arg_names) const { - ostringstream s; - s << "weight_norm(" << arg_names[0] << ", " << arg_names[1] << ')'; - return s.str(); -} - -Dim WeightNormalization::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in WeightNormalization"); - DYNET_ARG_CHECK(1 == xs[1].size()," Size of gain parameter in WeightNormalization should be 1, received " << xs[1].size()); - return xs[0]; -} - -} // namespace dynet diff --git a/dynet/nodes-concat.cc b/dynet/nodes-concat.cc new file mode 100644 index 000000000..f601ef67d --- /dev/null +++ b/dynet/nodes-concat.cc @@ -0,0 +1,148 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" + +using namespace std; + +namespace dynet { + +// ************* Concatenate ************* + +#ifndef __CUDACC__ + +string Concatenate::as_string(const vector& arg_names) const { + ostringstream os; + os << "concat({" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) { + os << ',' << arg_names[i]; + } + os << "}, " << dimension << ')'; + return os.str(); +} + +Dim Concatenate::dim_forward(const vector& xs) const { + unsigned new_rows = 0; + Dim dr = xs[0]; + for (auto c : xs) { + if(dr.nd < c.nd) dr.resize(c.nd); + if(c.nd < dr.nd) c.resize(dr.nd); + new_rows += c[dimension]; + dr.set(dimension, c[dimension]); + DYNET_ARG_CHECK(dr.single_batch() == c.single_batch(), + "Bad input dimensions in Concatenate: " << xs); + dr.bd = max(dr.bd, c.bd); + } + dr.nd = max(xs[0].nd, dimension+1); + dr.set(dimension, new_rows); + return dr; +} + +int Concatenate::autobatch_sig(const ComputationGraph &cg, SigMap &sm) const { + Sig s(nt::concat); + for (auto arg:args) s.add_dim(cg.nodes[arg]->dim); + return sm.get_idx(s); +} + +#endif + +template +void Concatenate::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + unsigned curr_row = 0; + src_indices.resize(xs.size()); + Eigen::DSizes indices(0,0,0,0,0); + Eigen::DSizes sizes(fx.d[0], fx.d[1], fx.d[2], fx.d[3],static_cast(fx.d.bd)); + for (unsigned i = 0; i < xs.size(); ++i) { + indices[dimension] = src_indices[i] = curr_row; + const unsigned row_size = xs[i]->d[dimension]; + sizes[dimension] = row_size; + if(fx.d.bd == xs[i]->d.bd) { + fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>(); + } else { + Eigen::array bcast; bcast[0] = bcast[1] = bcast[2] = bcast[3] = 1; bcast[4] = fx.d.bd; + fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>().broadcast(bcast); + } + curr_row += row_size; + } +} + +template +void Concatenate::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < src_indices.size(), "Failed boundary check in Concatenate::backward: " << i << " >= " << src_indices.size()); + Eigen::DSizes indices(0,0,0,0,0); indices[dimension] = src_indices[i]; + Eigen::DSizes sizes(static_cast(dEdxi.d[0]), + static_cast(dEdxi.d[1]), + static_cast(dEdxi.d[2]), + static_cast(dEdxi.d[3]), + static_cast(fx.d.bd)); + if(dEdxi.d.bd == dEdf.d.bd) { + dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes); + } else { + Eigen::array red_axis; red_axis[0] = 4; + dEdxi.t<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes).sum(red_axis); + } +} +DYNET_NODE_INST_DEV_IMPL(Concatenate) + +// ************* ConcatenateToBatch ************* + +#ifndef __CUDACC__ + +string ConcatenateToBatch::as_string(const vector& arg_names) const { + ostringstream os; + os << "concat_batch_elems(" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) { + os << ',' << arg_names[i]; + } + os << ')'; + return os.str(); +} + +Dim ConcatenateToBatch::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() > 0, "Failed input count check in ConcatenateToBatch") + Dim d(xs[0]); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), + "Mismatched input dimensions in ConcatenateToBatch: " << xs); + d.bd += xs[i].bd; + } + return d; +} + +#endif + +template +void ConcatenateToBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + unsigned curr_e = 0; + src_element_indices.resize(xs.size()); + Eigen::DSizes indices(0,0); + Eigen::DSizes sizes(static_cast(fx.d.batch_size()), 0); + for (unsigned i = 0; i < xs.size(); ++i) { + indices[1] = src_element_indices[i] = curr_e; + sizes[1] = xs[i]->d.bd; + fx.tbvec().slice(indices, sizes).device(*dev.edevice) = xs[i]->tbvec(); + curr_e += xs[i]->d.bd; + } + +} + +template +void ConcatenateToBatch::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < src_element_indices.size(), "Failed boundary check in ConcatenateToBatch::backward: " << i << " >= " << src_element_indices.size()); + Eigen::DSizes indices(0, static_cast(src_element_indices[i])); + Eigen::DSizes sizes(static_cast(fx.d.batch_size()), static_cast(xs[i]->d.bd)); + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().slice(indices, sizes); +} +DYNET_NODE_INST_DEV_IMPL(ConcatenateToBatch) + +} diff --git a/dynet/nodes-const.cc b/dynet/nodes-const.cc new file mode 100644 index 000000000..3f99a84d4 --- /dev/null +++ b/dynet/nodes-const.cc @@ -0,0 +1,42 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* Zeroes ************* + +#ifndef __CUDACC__ + +string Zeroes::as_string(const vector& arg_names) const { + ostringstream s; + s << "zeroes(" << dim << ')'; + return s.str(); +} + +Dim Zeroes::dim_forward(const vector& xs) const { + return dim; +} + +#endif + +template +void Zeroes::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in Zeroes::forward"); + TensorTools::zero(fx); +} + +template +void Zeroes::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(Zeroes) + +} diff --git a/dynet/nodes-conv.cc b/dynet/nodes-conv.cc index 8bec337ee..d8c56ee1b 100644 --- a/dynet/nodes-conv.cc +++ b/dynet/nodes-conv.cc @@ -20,77 +20,9 @@ using namespace std; namespace dynet { -#ifndef __CUDACC__ - -string AverageColumns::as_string(const vector& arg_names) const { - ostringstream s; - s << "average_cols(matrix=" << arg_names[0] << ')'; - return s.str(); -} - -Dim AverageColumns::dim_forward(const vector& xs) const { - DYNET_ASSERT(xs.size() == 1 || xs.size() == 2, "Failed input count check in AverageColumns"); - int bd = (xs.size() == 1 ? xs[0].bd : max(xs[0].bd, xs[1].bd)); - return Dim({xs[0].rows()}, bd); -} - -string FoldRows::as_string(const vector& arg_names) const { - ostringstream os; - os << "fold_rows(" << arg_names[0] << ", nrows=" << nrows << ')'; - return os.str(); -} +// ************* Filter1DNarrow ************* -Dim FoldRows::dim_forward(const vector& xs) const { - unsigned orows = xs[0].rows() / nrows; - if ((orows * nrows != xs[0].rows()) || xs.size() != 1 || xs[0].ndims() > 2) { - ostringstream s; s << "Bad input dimensions in FoldRows: " << xs; - throw std::invalid_argument(s.str()); - } - return Dim({orows, xs[0].cols()}); -} - -/* Deprecated -string Conv1DNarrow::as_string(const vector& arg_names) const { - ostringstream os; - os << "conv1d_narrow(" << arg_names[0] << ", f=" << arg_names[1] << ')'; - return os.str(); -} - -Dim Conv1DNarrow::dim_forward(const vector& xs) const { - if (xs.size() != 2) { - ostringstream s; s << "Conv1DNarrow requires two inputs: " << xs; - throw std::invalid_argument(s.str()); - } - int ocols = xs[0].cols() - xs[1].cols() + 1; - if (xs[0].ndims() != 2 || xs[1].ndims() != 2 || - xs[0].rows() != xs[1].rows() || - ocols < 1) { - ostringstream s; s << "Bad input dimensions in Conv1DNarrow: " << xs; - throw std::invalid_argument(s.str()); - } - return Dim({xs[0].rows(), (unsigned)ocols}); -} - -string Conv1DWide::as_string(const vector& arg_names) const { - ostringstream os; - os << "conv1d_wide(" << arg_names[0] << ", f=" << arg_names[1] << ')'; - return os.str(); -} - -Dim Conv1DWide::dim_forward(const vector& xs) const { - if (xs.size() != 2) { - ostringstream s; s << "Conv1DWide requires two inputs: " << xs; - throw std::invalid_argument(s.str()); - } - unsigned ocols = xs[0].cols() + xs[1].cols() - 1; - if (xs[0].ndims() != 2 || xs[1].ndims() != 2 || - xs[0].rows() != xs[1].rows()) { - ostringstream s; s << "Bad input dimensions in Conv1DWide: " << xs; - throw std::invalid_argument(s.str()); - } - return Dim({xs[0].rows(), ocols}); -} -*/ +#ifndef __CUDACC__ string Filter1DNarrow::as_string(const vector& arg_names) const { ostringstream os; @@ -114,143 +46,7 @@ Dim Filter1DNarrow::dim_forward(const vector& xs) const { return Dim({fids, (unsigned)ocols}); } -string KMaxPooling::as_string(const vector& arg_names) const { - ostringstream os; - os << "kmaxpool(" << arg_names[0] << ", k=" << k << ", d=" << pooled_dim << ')'; - return os.str(); -} - -Dim KMaxPooling::dim_forward(const vector& xs) const { - DYNET_ARG_CHECK(pooled_dim < xs[0].nd, - "Tried to MaxDimension on dimension " << pooled_dim << " bigger than input " << xs[0]); - DYNET_ARG_CHECK(xs[0].nd < 4, - "MaxDimension not currently supported for tensors of 4 or more dimensions."); - DYNET_ARG_CHECK(k >= 1, "Bad bad k in KMaxPooling: " << k); - DYNET_ARG_CHECK(k <= xs[0][pooled_dim], - "Bad k in KMaxPooling: k = " << k << " bigger than the size of pooled dimension " - << pooled_dim << " with size = " << xs[0][pooled_dim]); - Dim ret(xs[0]); - ret.set(pooled_dim, k); - return ret; -} - -size_t KMaxPooling::aux_storage_size() const { - // map of where the entries in f(x) go to entries in x - return sizeof(Eigen::DenseIndex) * dim.size(); -} - -string SumDimension::as_string(const vector& arg_names) const { - ostringstream s; - s << "sum_dim(matrix=" << arg_names[0] << ',' << dimension << '}'; - return s.str(); -} - -Dim SumDimension::dim_forward(const vector& xs) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); - Dim ret(xs[0]); - ret.delete_dim(dimension); - return ret; -} -#endif - -template -void AverageColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in AverageColumns"); - unsigned cols = xs[0]->d.cols(); -#ifdef __CUDACC__ - // The reduction used on CPU is better, but not implemented in GPU - fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().chip<1>(0); - for(unsigned i = 1; i < cols; ++i) - fx.t<1>().device(*dev.edevice) += xs[0]->t<2>().chip<1>(i); - fx.t<1>().device(*dev.edevice) = fx.t<1>() / (float)cols; -#else - const Eigen::array reduction_axis = {1}; - fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis) / (float)cols; #endif -} - -template -void AverageColumns::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - const Eigen::array broadcasts = {1, xs[0]->d[1]}; - dEdxi.t<2>().device(*dev.edevice) += (dEdf.t<2>() / (float)xs[0]->d[1]).broadcast(broadcasts); -} -DYNET_NODE_INST_DEV_IMPL(AverageColumns) - -/* Deprecated -template -void Conv1DNarrow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - const unsigned ycols = dim.cols(); - const unsigned fcols = xs[1]->d.cols(); - for (unsigned j = 0; j < ycols; ++j) { - fx.t<2>().chip<1>(j).device(*dev.edevice) = xs[0]->t<2>().chip<1>(j) * xs[1]->t<2>().chip<1>(0); - for (unsigned k = 1; k < fcols; ++k) - fx.t<2>().chip<1>(j).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j+k) * xs[1]->t<2>().chip<1>(k); - } - // TODO: This following version without chip is better, but for some reason dimensions don't match. - // Eigen::array dims; dims[0] = 1; - // fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().convolve(xs[1]->t<2>(), dims); -} - -template -void Conv1DNarrow::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed input count check in Conv1DNarrow"); - const unsigned ycols = dim.cols(); - const unsigned fcols = xs[1]->d.cols(); - // TODO: Can this be done with a kernel and without using chip? - if (i == 0) { // derivative wrt input x - for (unsigned j = 0; j < ycols; ++j) - for (unsigned k = 0; k < fcols; ++k) - dEdxi.t<2>().chip<1>(j+k).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * dEdf.t<2>().chip<1>(j); - } else { // derivative wrt filter f - for (unsigned j = 0; j < ycols; ++j) - for (unsigned k = 0; k < fcols; ++k) - dEdxi.t<2>().chip<1>(k).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j+k) * dEdf.t<2>().chip<1>(j); - } -} -DYNET_NODE_INST_DEV_IMPL(Conv1DNarrow) - -template -void Conv1DWide::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - TensorTools::zero(fx); - const unsigned xcols = xs[0]->d.cols(); - const unsigned fcols = xs[1]->d.cols(); - for (unsigned j = 0; j < xcols; ++j) - for (unsigned k = 0; k < fcols; ++k) - fx.t<2>().chip<1>(j+k).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * xs[0]->t<2>().chip<1>(j); -} - - -template -void Conv1DWide::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - const unsigned xcols = xs[0]->d.cols(); - const unsigned fcols = xs[1]->d.cols(); - if (i == 0) { // derivative wrt input x - for (unsigned j = 0; j < xcols; ++j) - for (unsigned k = 0; k < fcols; ++k) - dEdxi.t<2>().chip<1>(j).device(*dev.edevice) += xs[1]->t<2>().chip<1>(k) * dEdf.t<2>().chip<1>(j + k); - } else { // derivative wrt filter f - for (unsigned j = 0; j < xcols; ++j) - for (unsigned k = 0; k < fcols; ++k) - dEdxi.t<2>().chip<1>(k).device(*dev.edevice) += xs[0]->t<2>().chip<1>(j) * dEdf.t<2>().chip<1>(j + k); - } -} -DYNET_NODE_INST_DEV_IMPL(Conv1DWide) -*/ template void Filter1DNarrow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { @@ -313,6 +109,26 @@ void Filter1DNarrow::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(Filter1DNarrow) +// ************* FoldRows ************* + +#ifndef __CUDACC__ + +string FoldRows::as_string(const vector& arg_names) const { + ostringstream os; + os << "fold_rows(" << arg_names[0] << ", nrows=" << nrows << ')'; + return os.str(); +} + +Dim FoldRows::dim_forward(const vector& xs) const { + unsigned orows = xs[0].rows() / nrows; + if ((orows * nrows != xs[0].rows()) || xs.size() != 1 || xs[0].ndims() > 2) { + ostringstream s; s << "Bad input dimensions in FoldRows: " << xs; + throw std::invalid_argument(s.str()); + } + return Dim({orows, xs[0].cols()}); +} + +#endif template void FoldRows::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { @@ -343,6 +159,37 @@ void FoldRows::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(FoldRows) +// ************* KMaxPooling ************* + +#ifndef __CUDACC__ + +string KMaxPooling::as_string(const vector& arg_names) const { + ostringstream os; + os << "kmaxpool(" << arg_names[0] << ", k=" << k << ", d=" << pooled_dim << ')'; + return os.str(); +} + +Dim KMaxPooling::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(pooled_dim < xs[0].nd, + "Tried to MaxDimension on dimension " << pooled_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MaxDimension not currently supported for tensors of 4 or more dimensions."); + DYNET_ARG_CHECK(k >= 1, "Bad bad k in KMaxPooling: " << k); + DYNET_ARG_CHECK(k <= xs[0][pooled_dim], + "Bad k in KMaxPooling: k = " << k << " bigger than the size of pooled dimension " + << pooled_dim << " with size = " << xs[0][pooled_dim]); + Dim ret(xs[0]); + ret.set(pooled_dim, k); + return ret; +} + +size_t KMaxPooling::aux_storage_size() const { + // map of where the entries in f(x) go to entries in x + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +#endif + template void KMaxPooling::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { #ifdef __CUDACC__ @@ -429,25 +276,59 @@ void KMaxPooling::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(KMaxPooling) +// ************* KMHNgram ************* + +#ifndef __CUDACC__ + +string KMHNGram::as_string(const vector& arg_names) const { + ostringstream s; + s << "kmh-ngram(" << arg_names[0] << ')'; + return s.str(); +} + +Dim KMHNGram::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs[0].ndims() == 2, "Bad input dimensions in KMHNGram: " << xs); + const unsigned new_cols = xs[0].cols() - n + 1; + DYNET_ARG_CHECK(new_cols >= 1, "Bad input dimensions in KMHNGram: " << xs); + return Dim({xs[0][0], new_cols}); +} + +#endif + template -void SumDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); - Eigen::array reduction_axis = {(int)dimension}; - fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis); +void KMHNGram::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + auto x = **xs[0]; + const int new_cols = x.cols() - n + 1; + DYNET_ASSERT(new_cols > 0, "Failed dimension check in KMHNGram"); + auto res = *fx; + res.setZero(); + for (int j = 0; j < new_cols; ++j) { + auto c_j = res.col(j); + for (unsigned k = 0; k < n; ++k) + c_j += x.col(j + k); + } +#endif } template -void SumDimension::backward_dev_impl(const MyDevice & dev, +void KMHNGram::backward_dev_impl(const MyDevice & dev, const vector& xs, const Tensor& fx, const Tensor& dEdf, unsigned i, Tensor& dEdxi) const { - // TODO: limit to 3-dimensional tensor is arbitrary - Eigen::array bcast = {1,1,1,1}; bcast[dimension] = dEdxi.d[dimension]; - Eigen::array morph = {(int)dEdxi.d[0],(int)dEdxi.d[1],(int)dEdxi.d[2],(int)dEdxi.d.bd}; morph[dimension] = 1; - dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>().reshape(morph).broadcast(bcast); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + const int c = dEdf.d.cols(); + for (int j = 0; j < c; ++j) + for (unsigned k = 0; k < n; ++k) + (*dEdxi).col(j+k) += (*dEdf).col(j); +#endif } -DYNET_NODE_INST_DEV_IMPL(SumDimension) +DYNET_NODE_INST_DEV_IMPL(KMHNGram) } // namespace dynet diff --git a/dynet/nodes-conv.h b/dynet/nodes-conv.h index 9465b16be..a4fd2ca02 100644 --- a/dynet/nodes-conv.h +++ b/dynet/nodes-conv.h @@ -11,31 +11,6 @@ namespace dynet { -// with a single argument x \in R^{n x m} -// y_i = \sum_j x_i,j / m -struct AverageColumns : public Node { - template explicit AverageColumns(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -/* Deprecated -// y = x_1 *conv x_2 -// x_1 \in R^{d x s} (input) -// x_2 \in R^{d x m} (filter) -struct Conv1DNarrow : public Node { - explicit Conv1DNarrow(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 *conv x_2 -// x_1 \in R^{d x s} (input) -// x_2 \in R^{d x m} (filter) -struct Conv1DWide : public Node { - explicit Conv1DWide(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; -*/ - // y = x_1 *filter x_2 // x_1 \in R^{d x s} (input) // x_2 \in R^{d x m} (filter) @@ -64,13 +39,6 @@ struct KMaxPooling : public Node { unsigned second_dim; }; -// sum along a single dimension -struct SumDimension : public Node { - template explicit SumDimension(const T& a, unsigned d) : Node(a), dimension(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - unsigned dimension; -}; - // conv2d // y = x_1 *conv2d x_2 // x_1 \in R^{H x W x Ci x N} (input) @@ -119,6 +87,13 @@ struct MaxPooling2D: public Node { #endif }; +// y_i = \sum_{j=1}^n x_1:{i-1+j} +struct KMHNGram : public Node { + explicit KMHNGram(const std::initializer_list& a, unsigned n) : Node(a), n(n) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned n; // width, n=2 for Karl's paper +}; + } // namespace dynet diff --git a/dynet/nodes-dropout.cc b/dynet/nodes-dropout.cc new file mode 100644 index 000000000..7addca034 --- /dev/null +++ b/dynet/nodes-dropout.cc @@ -0,0 +1,187 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* Dropout ************* + +#ifndef __CUDACC__ + +string Dropout::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim Dropout::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Dropout") + return xs[0]; +} + +size_t Dropout::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +#endif + +template +void Dropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * m.tvec(); +} + +template +void Dropout::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * m.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Dropout) + +// ************* DropoutDim ************* + +#ifndef __CUDACC__ + +string DropoutDim::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout_dim(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim DropoutDim::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutDim") + DYNET_ARG_CHECK(xs[0].nd < 4, "DropoutDim only supports tensor up to order 3 + batch dimension, got tensor of order"< dimension, "In DropoutDim : tried to drop along dimension "< +void DropoutDim::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Dim mask_dim(dim); + mask_dim.d[dimension]=1; + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = xs[0]->d[dimension]; + fx.tb<3>().device(*dev.edevice) = xs[0]->tb<3>() * m.tb<3>().broadcast(bcast); +} + +template +void DropoutDim::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Dim mask_dim(dim); + mask_dim.d[dimension]=1; + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = dEdf.d[dimension]; + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>() * m.tb<3>().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(DropoutDim) + +// ************* DropoutBatch ************* + +#ifndef __CUDACC__ + +string DropoutBatch::as_string(const vector& arg_names) const { + ostringstream s; + s << "dropout_batch(" << arg_names[0] << ",p=" << p << ')'; + return s.str(); +} + +Dim DropoutBatch::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in DropoutBatch") + return xs[0]; +} + +size_t DropoutBatch::aux_storage_size() const { + return dim.batch_elems() * sizeof(float); +} + +#endif + +template +void DropoutBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Dim mask_dim({1},xs[0]->d.batch_elems()); + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * m.tbvec().broadcast(bcast); +} + +template +void DropoutBatch::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Dim mask_dim({1},xs[0]->d.batch_elems()); + Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * m.tbvec().broadcast(bcast); +} +DYNET_NODE_INST_DEV_IMPL(DropoutBatch) + +// ************* BlockDropout ************* + +#ifndef __CUDACC__ + +string BlockDropout::as_string(const vector& arg_names) const { + ostringstream s; + s << "block_dropout(" << arg_names[0] << ",dropout_probability=" << dropout_probability << ')'; + return s.str(); +} + +Dim BlockDropout::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in BlockDropout") + return xs[0]; +} + +size_t BlockDropout::aux_storage_size() const { + // we just need to remember whether this entire block is turned on (1.0) or off (0.0) + return 1 * sizeof(float); +} + +#endif + +template +void BlockDropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + bernoulli_distribution distribution(1.0 - dropout_probability); + float block_multiplier = distribution(*rndeng)? 1.0 : 0.0; + block_multiplier = + dropout_probability == 1.0? 0.0 : block_multiplier / (1.0 - dropout_probability); + if (dropout_probability > 1.0 || dropout_probability < 0.0) + DYNET_INVALID_ARG("Dropout probability must be in the range [0, 1]"); + *(static_cast(aux_mem)) = block_multiplier; + fx.tvec().device(*dev.edevice) = xs[0]->tvec() * block_multiplier; +} + +template +void BlockDropout::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + float block_multiplier = *(static_cast(aux_mem)); + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * block_multiplier; +} +DYNET_NODE_INST_DEV_IMPL(BlockDropout) + +} diff --git a/dynet/nodes-flow.cc b/dynet/nodes-flow.cc new file mode 100644 index 000000000..95d0b10ae --- /dev/null +++ b/dynet/nodes-flow.cc @@ -0,0 +1,151 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* Reshape ************* + +#ifndef __CUDACC__ + +string Reshape::as_string(const vector& arg_names) const { + ostringstream s; + s << "reshape(" << arg_names[0] << " --> " << to << ')'; + return s.str(); +} + +Dim Reshape::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Reshape") + if(to.size() == xs[0].size()) { + return to; + } else { + DYNET_ARG_CHECK(to.batch_elems() == 1 && to.batch_size() == xs[0].batch_size(), + "Bad arguments to Reshape: " << to << ", " << xs[0]); + Dim ret(to); + ret.bd = xs[0].batch_elems(); + return ret; + } +} + +#endif + +template +void Reshape::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + // just point to the input memory and change dimensions + // dimensions are handled by forward_dim + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); +} + +template +void Reshape::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const Tensor reshaped(dEdxi.d, dEdf.v, dEdxi.device, dEdf.mem_pool); + dEdxi.tvec().device(*dev.edevice) += reshaped.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Reshape) + +// ************* Identity ************* + +#ifndef __CUDACC__ + +string Identity::as_string(const vector& arg_names) const { + return arg_names[0]; +} + +Dim Identity::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Identity") + return xs[0]; +} + +#endif + +template +void Identity::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); +} + +template +void Identity::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(Identity) + +// ************* NoBackprop ************* + +#ifndef __CUDACC__ + +string NoBackprop::as_string(const vector& arg_names) const { + ostringstream s; + s << "nobackprop(" << arg_names[0] << ')'; + return s.str(); +} + +Dim NoBackprop::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in NoBackprop") + return xs[0]; +} + +#endif + +template +void NoBackprop::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); +} + +template +void NoBackprop::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // no op +} +DYNET_NODE_INST_DEV_IMPL(NoBackprop) + +// ************* FlipGradient ************* + +#ifndef __CUDACC__ + +string FlipGradient::as_string(const vector& arg_names) const { + ostringstream s; + s << "flip_gradient(" << arg_names[0] << ')'; + return s.str(); +} + +Dim FlipGradient::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in FlipGradient"); + return xs[0]; +} + +#endif + +template +void FlipGradient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); +} + +template +void FlipGradient::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + // takes negative on backprop + dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(FlipGradient) + +} diff --git a/dynet/nodes-linalg.cc b/dynet/nodes-linalg.cc new file mode 100644 index 000000000..a1c17eaec --- /dev/null +++ b/dynet/nodes-linalg.cc @@ -0,0 +1,224 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* Transpose ************* + +#ifndef __CUDACC__ + +string Transpose::as_string(const vector& arg_names) const { + ostringstream s; + s << "transpose("<< arg_names[0] << ", "; + for(size_t i = 0; i < dims.size(); ++i) + s << (i == 0?'{':',') << dims[i]; + s << "})"; + return s.str(); +} + +Dim Transpose::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Bad arguments to Transpose: " << xs); + DYNET_ARG_CHECK(xs[0].nd == dims.size() || xs[0].num_nonone_dims() == 1, "Dimensions passed to transpose (" << dims.size() << ") must be equal to dimensions in input tensor (" << xs[0].nd << ')'); + Dim ret(xs[0]); + ret.nd = dims.size(); + for(size_t i = 0; i < dims.size(); ++i) + ret.d[i] = xs[0][dims[i]]; + return ret; +} + +#endif + +template +void Transpose::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (dim.num_nonone_dims() <= 1) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + } else { + Eigen::array order; + for(size_t i = 0; i < 5; ++i) + order[i] = (i >= dims.size() ? i : dims[i]); + fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().shuffle(order); + } +} + +template +void Transpose::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Eigen::array order; + for(size_t i = 0; i < 5; ++i) + order[(i >= dims.size() ? i : dims[i])] = i; + dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().shuffle(order); +} +DYNET_NODE_INST_DEV_IMPL(Transpose) + +// ************* MatrixInverse ************* + +#ifndef __CUDACC__ + +string MatrixInverse::as_string(const vector& arg_names) const { + ostringstream s; + s << "inverse(" << arg_names[0] << ")"; + return s.str(); +} + +Dim MatrixInverse::dim_forward(const vector& xs) const { + return xs[0]; +} + +#endif + +template +void MatrixInverse::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::forward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); +#else + auto x = **xs[0]; + auto y = *fx; + y = x.inverse(); +#endif + // TODO: Change into tensors after resolving test errors + // fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().inverse(); +} + +template +void MatrixInverse::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::backward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); +#else + auto d = *dEdf; + auto y = *fx; + (*dEdxi) -= y * d * y; +#endif +} +DYNET_NODE_INST_DEV_IMPL(MatrixInverse) + +// ************* LogDet ************* + +#ifndef __CUDACC__ + +string LogDet::as_string(const vector& arg_names) const { + ostringstream s; + s << "logdet(" << arg_names[0] << ")"; + return s.str(); +} + +Dim LogDet::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs[0].ndims() <= 2 && (xs[0].rows() == xs[0].cols()), "Bad arguments in LogDet: " << xs); + return Dim({1}); +} + +// set use_cholesky if M is symmetric - it's faster and more stable +// for dep parsing it won't be +template +inline typename MatrixType::Scalar logdet(const MatrixType& M, bool use_cholesky = false) { + using namespace Eigen; + using std::log; + typedef typename MatrixType::Scalar Scalar; + Scalar ld = 0; + if (use_cholesky) { + LLT> chol(M); + auto& U = chol.matrixL(); + for (unsigned i = 0; i < M.rows(); ++i) + ld += log(U(i,i)); + ld *= 2; + } else { + PartialPivLU> lu(M); + auto& LU = lu.matrixLU(); + Scalar c = lu.permutationP().determinant(); // -1 or 1 + for (unsigned i = 0; i < LU.rows(); ++i) { + const auto& lii = LU(i,i); + if (lii < Scalar(0)) c *= -1; + ld += log(abs(lii)); + } + ld += log(c); + } + return ld; +} + +#endif + +template +void LogDet::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("LogDet not implemented for CUDA"); +#else + fx.v[0] = logdet(**xs[0], false); +#endif +} + +template +void LogDet::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); +#else + auto trans = (**xs[0]).transpose(); + (*dEdxi) += (dEdf.v[0]) * trans.inverse(); +#endif +} +DYNET_NODE_INST_DEV_IMPL(LogDet) + +// ************* TraceOfProduct ************* + +#ifndef __CUDACC__ + +string TraceOfProduct::as_string(const vector& arg_names) const { + ostringstream s; + s << "Tr(" << arg_names[0] << " * " << arg_names[1] << "^T)"; + return s.str(); +} + +Dim TraceOfProduct::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in TraceOfProduct: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +#endif + +template +void TraceOfProduct::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); +#else + auto x1 = **xs[0]; + auto x2 = **xs[1]; + fx.v[0] = (x1 * x2.transpose()).trace(); +#endif +} + +template +void TraceOfProduct::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i < 2, "Failed dimension check in TraceOfProduce::backward"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); +#else + const float d = dEdf.v[0]; + auto xother = **xs[1 - i]; + *dEdxi += d * xother; +#endif +} +DYNET_NODE_INST_DEV_IMPL(TraceOfProduct) + +} diff --git a/dynet/nodes-logsumexp.cc b/dynet/nodes-logsumexp.cc new file mode 100644 index 000000000..71fb3e0b8 --- /dev/null +++ b/dynet/nodes-logsumexp.cc @@ -0,0 +1,115 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* LogSumExp ************* + +#define MAX_LOG_SUM_EXP 65536 + +#ifndef __CUDACC__ + +// template +// EIGEN_STRONG_INLINE real logsumexp(const T& x, const vector& denom) { +// real m = x(denom[0],0); +// for (auto i : denom) { +// real r = x(i,0); +// if (r > m) m = r; +// } +// real z = 0; +// for (auto i : denom) +// z += expf(x(i,0) - m); +// return m + logf(z); +// } + +string LogSumExp::as_string(const vector& arg_names) const { + ostringstream s; + s << "log(exp " << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << " + exp " << arg_names[i]; + s << ")"; + return s.str(); +} + +Dim LogSumExp::dim_forward(const vector& xs) const { + Dim d = xs[0].truncate(); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(d.single_batch() == xs[i].truncate().single_batch(), + "Mismatched input dimensions in LogSumExp: " << xs); + d.bd = max(xs[i].bd, d.bd); + } + return d; +} + +// this i need to do something better, but this is a work-around +// if this is too small, just make it bigger +size_t LogSumExp::aux_storage_size() const { + return (MAX_LOG_SUM_EXP + 1) * sizeof(float); +} + +#endif + +template +void LogSumExp::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs.size() == 1) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + } else { + // TODO: Ideally we wouldn't need to allocate this memory permanently. + // We need a good method for allocating "scratch" memory that is only used temporarily. + Tensor ms(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + Eigen::array bcast = {1,fx.d.bd}; + // Calculate the max + if(ms.d.bd == xs[0]->d.bd) + ms.tvec().device(*dev.edevice) = xs[0]->tvec(); + else + ms.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast); + for (size_t i = 1; i < xs.size(); ++i) { + if(ms.d.bd == xs[i]->d.bd) + ms.tvec().device(*dev.edevice) = ms.tvec().cwiseMax(xs[i]->tvec()); + else + ms.tbvec().device(*dev.edevice) = ms.tbvec().cwiseMax(xs[i]->tbvec().broadcast(bcast)); + } + // sumexp + if(ms.d.bd == xs[0]->d.bd) + fx.tvec().device(*dev.edevice) = (xs[0]->tvec() - ms.tvec()).exp(); + else + fx.tbvec().device(*dev.edevice) = (xs[0]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); + for (size_t i = 1; i < xs.size(); ++i) { + if(ms.d.bd == xs[i]->d.bd) + fx.tvec().device(*dev.edevice) += (xs[i]->tvec() - ms.tvec()).exp(); + else + fx.tbvec().device(*dev.edevice) += (xs[i]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); + } + // log and add max + fx.tvec().device(*dev.edevice) = fx.tvec().log() + ms.tvec(); + } +} + +template +void LogSumExp::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if (xs.size() == 1) { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); + } else { + // df/dx_i = 1/{sum_j exp(x_j)} * exp(x_i)} + // = 1/{exp f(x)} * exp(x_i) + // = exp(x_i - f(x)) + if(fx.d.bd == xs[i]->d.bd) { + dEdxi.tvec().device(*dev.edevice) += (xs[i]->tvec() - fx.tvec()).exp() * dEdf.tvec(); + } else { + Eigen::array bcast = {1,fx.d.bd}; + Eigen::array red_axis = {1}; + dEdxi.tvec().device(*dev.edevice) += ((xs[i]->tbvec().broadcast(bcast) - fx.tbvec()).exp() * dEdf.tbvec()).sum(red_axis); + } + } +} +DYNET_NODE_INST_DEV_IMPL(LogSumExp) + +} diff --git a/dynet/nodes-losses.cc b/dynet/nodes-losses.cc new file mode 100644 index 000000000..a12db7bc8 --- /dev/null +++ b/dynet/nodes-losses.cc @@ -0,0 +1,123 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" + +using namespace std; + +namespace dynet { + +// ************* PairwiseRankLoss ************* + +#ifndef __CUDACC__ + +string PairwiseRankLoss::as_string(const vector& arg_names) const { + ostringstream os; + os << "max(0, " << margin << " - " << arg_names[0] << " + " << arg_names[1] << ')'; + return os.str(); +} + +Dim PairwiseRankLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && + xs[0] == xs[1] && + xs[0].rows() == 1 && + (xs[0].ndims() == 1 || xs[0].ndims() == 2), + "Bad input dimensions in PairwiseRankLoss: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +template +void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin)); +} + +template +void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) -= fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); + } else { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); + } +} +DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss) + +#endif + +// ************* BinaryLogLoss ************* + +#ifndef __CUDACC__ + +string BinaryLogLoss::as_string(const vector& arg_names) const { + ostringstream os; + os << "binary_log_loss(" << arg_names[0] << ", " << arg_names[1] << ')'; + return os.str(); +} + +Dim BinaryLogLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in BinaryLogLoss") + DYNET_ARG_CHECK(xs[0].rows() == 2 || xs[0].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); + DYNET_ARG_CHECK(xs[1].rows() == 2 || xs[1].ndims() == 1, "Bad input dimensions in BinaryLogLoss: " << xs); + return Dim({1}, max(xs[0].bd, xs[1].bd)); +} + +#endif + +template +void BinaryLogLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.t<0>().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FBinaryLogLoss()).sum(); +} + +template +void BinaryLogLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += xs[i]->tvec().binaryExpr(xs[1-i]->tvec(), FBinaryLogLossBackward(as_scalar(dEdf))); +} +DYNET_NODE_INST_DEV_IMPL(BinaryLogLoss) + +// ************* PoissonRegressionLoss ************* + +#ifndef __CUDACC__ + +string PoissonRegressionLoss::as_string(const vector& arg_names) const { + ostringstream s; + s << "-log Poisson(" << pty << "; lambda=\\exp" << arg_names[0] << ')'; + return s.str(); +} + +Dim PoissonRegressionLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && xs[0].size() == 1, "Bad input dimensions in PoissonRegressionLoss: " << xs); + return xs[0]; +} + +#endif + +template +void PoissonRegressionLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const real y = *pty; + const auto z = std::lgamma(y + 1); + // const auto x = as_scalar(*xs[0]); + fx.t<0>().device(*dev.edevice) = xs[0]->t<0>().exp() + z - xs[0]->t<0>() * y; +} + +template +void PoissonRegressionLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const real y = *pty; + dEdxi.t<0>().device(*dev.edevice) += xs[0]->t<0>().exp() - y; +} +DYNET_NODE_INST_DEV_IMPL(PoissonRegressionLoss) + +} diff --git a/dynet/nodes-minmax.cc b/dynet/nodes-minmax.cc new file mode 100644 index 000000000..6b5d45f87 --- /dev/null +++ b/dynet/nodes-minmax.cc @@ -0,0 +1,252 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" + +using namespace std; + +namespace dynet { + +// ************* Min ************* + +#ifndef __CUDACC__ + +string Min::as_string(const vector& arg_names) const { + ostringstream s; + s << "min{" << arg_names[0] << ", " << arg_names[1] << "}"; + return s.str(); +} + +Dim Min::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Min: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +size_t Min::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +#endif + +template +void Min::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + t.tvec().device(*dev.edevice) = (xs[0]->tvec() < xs[1]->tvec()).cast(); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMin(xs[1]->tvec()); +} + +template +void Min::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in Min::backward"); + const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); + } else { + dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); + } +} +DYNET_NODE_INST_DEV_IMPL(Min) + +// ************* Max ************* + +#ifndef __CUDACC__ + +string Max::as_string(const vector& arg_names) const { + ostringstream s; + s << "max{" << arg_names[0] << ", " << arg_names[1] << "}"; + return s.str(); +} + +Dim Max::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2 && xs[0] == xs[1], "Bad arguments in Max: " << xs); + return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; +} + +size_t Max::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +#endif + +template +void Max::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + t.tvec().device(*dev.edevice) = (xs[0]->tvec() > xs[1]->tvec()).cast(); + fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(xs[1]->tvec()); +} + +template +void Max::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 2, "Failed dimension check in Max::backward"); + const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); + if (i == 0) { + dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); + } else { + dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); + } +} +DYNET_NODE_INST_DEV_IMPL(Max) + +// ************* MinDimension ************* + +#ifndef __CUDACC__ + +string MinDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "min_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; + return s.str(); +} + +Dim MinDimension::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MinDimension"); + DYNET_ARG_CHECK(reduced_dim < xs[0].nd, + "Tried to MinDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MinDimension not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + ret.delete_dim(reduced_dim); + return ret; +} + +size_t MinDimension::aux_storage_size() const { + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +#endif + +template +void MinDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DenseIndex* minmap = static_cast(aux_mem); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); + const Eigen::array reduction_axis = {reduced_dim}; + locs.device(*dev.edevice) = xs[0]->tb<3>().argmin(reduced_dim); + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().minimum(reduction_axis); +} + +template +void MinDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MinDimension::backward"); +#ifdef __CUDACC__ + vector indices(dim.size()); + Eigen::DenseIndex* minmap = &indices[0]; + CUDA_CHECK(cudaMemcpy((void*)minmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); +#else + Eigen::DenseIndex* minmap = static_cast(aux_mem); +#endif + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); + for(unsigned b = 0; b < batch_size; ++b){ + for(unsigned j = 0; j < second_dim_size; ++j){ + for(unsigned i = 0; i < first_dim_size; ++i){ + if (reduced_dim > second_dim) + dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else if (reduced_dim > first_dim) + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(MinDimension) + +// ************* MaxDimension ************* + +#ifndef __CUDACC__ + +string MaxDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "max_dim(" << arg_names[0] << ", reduced_dim=" << reduced_dim << ')'; + return s.str(); +} + +Dim MaxDimension::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MaxDimension"); + DYNET_ARG_CHECK(reduced_dim < xs[0].nd, + "Tried to MaxDimension on dimension " << reduced_dim << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "MaxDimension not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + ret.delete_dim(reduced_dim); + return ret; +} + +size_t MaxDimension::aux_storage_size() const { + return sizeof(Eigen::DenseIndex) * dim.size(); +} + +#endif + +template +void MaxDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DenseIndex* maxmap = static_cast(aux_mem); + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); + const Eigen::array reduction_axis = {reduced_dim}; + locs.device(*dev.edevice) = xs[0]->tb<3>().argmax(reduced_dim); + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().maximum(reduction_axis); +} + +template +void MaxDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MaxDimension::backward"); +#ifdef __CUDACC__ + vector indices(dim.size()); + Eigen::DenseIndex* maxmap = &indices[0]; + CUDA_CHECK(cudaMemcpy((void*)maxmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); +#else + Eigen::DenseIndex* maxmap = static_cast(aux_mem); +#endif + const unsigned batch_size = dim.batch_elems(); + const unsigned first_dim_size = dim[0]; + const unsigned second_dim_size = dim[1]; + Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); + for(unsigned b = 0; b < batch_size; ++b){ + for(unsigned j = 0; j < second_dim_size; ++j){ + for(unsigned i = 0; i < first_dim_size; ++i){ + if (reduced_dim > second_dim) + dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else if (reduced_dim > first_dim) + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + else + dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) + += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(MaxDimension) + +} diff --git a/dynet/nodes-moments.cc b/dynet/nodes-moments.cc new file mode 100644 index 000000000..b4d618165 --- /dev/null +++ b/dynet/nodes-moments.cc @@ -0,0 +1,440 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" + +using namespace std; + +namespace dynet { + +// ************* Average ************* + +#ifndef __CUDACC__ + +string Average::as_string(const vector& arg_names) const { + ostringstream s; + s << "average(" << arg_names[0]; + for (unsigned i = 1; i < arg_names.size(); ++i) + s << ", " << arg_names[i]; + s << ")"; + return s.str(); +} + +Dim Average::dim_forward(const vector& xs) const { + Dim d(xs[0]); + for (unsigned i = 1; i < xs.size(); ++i) { + DYNET_ARG_CHECK(xs[0].single_batch() == xs[i].single_batch(), + "Mismatched input dimensions in Average: " << xs); + d.bd = max(xs[i].bd, d.bd); + } + return d; +} + +#endif + +template +void Average::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + const unsigned num_args = xs.size(); + if (num_args == 1) { + fx.tvec().device(*dev.edevice) = xs[0]->tvec(); + return; + } + if (num_args == 2 && xs[0]->d.bd == xs[1]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); + else if (num_args == 3 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); + else if (num_args == 4 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd && xs[2]->d.bd == xs[3]->d.bd) + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + else { + bool allSameBatchSize = std::all_of(xs.begin(), xs.end(), [&](const Tensor* x) { return x->d.bd == xs[0]->d.bd;}); + if (allSameBatchSize) { + // Since they are all the same batch size, we can easily unroll the addition (results in lower GPU latency by merging multiple adds together in one CUDA call): + DYNET_ASSERT(num_args > 4, "Bad loop unrolling in Average::forward"); // If it was <=4, we would have handled it in the special cases above + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); + + const unsigned remainder = (num_args - 4 ) % 4; + switch (remainder) { + case 0: break; + case 1: fx.tvec().device(*dev.edevice) += xs[4]->tvec(); break; + case 2: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec(); break; + case 3: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec() + xs[6]->tvec(); break; + } + for (unsigned i = 4 + remainder; i < num_args; i += 4) + fx.tvec().device(*dev.edevice) += xs[i]->tvec() + xs[i + 1]->tvec() + xs[i + 2]->tvec() + xs[i + 3]->tvec(); + } + else { + // Not all the same batch size, so need to broadcast in the cases where they differ + TensorTools::zero(fx); +#ifdef __CUDACC__ + Eigen::array bcast({ 1, (int)fx.d.bd }); +#endif + for (unsigned i = 0; i < num_args; ++i) { + if (xs[i]->d.bd == fx.d.bd) { + fx.tvec().device(*dev.edevice) += xs[i]->tvec(); + } + else { +#ifdef __CUDACC__ + fx.tbvec().device(*dev.edevice) += xs[i]->tbvec().broadcast(bcast); +#else + for (unsigned b = 0; b < fx.d.bd; ++b) + fx.tbvec().chip<1>(b).device(*dev.edevice) += xs[i]->tvec(); +#endif + } + } + } + } + fx.tvec().device(*dev.edevice) = fx.tvec() / (float)xs.size(); +} + +template +void Average::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += (dEdf.tvec() / (float)xs.size()); +} +DYNET_NODE_INST_DEV_IMPL(Average) + +// ************* AverageColumns ************* + +#ifndef __CUDACC__ + +string AverageColumns::as_string(const vector& arg_names) const { + ostringstream s; + s << "average_cols(matrix=" << arg_names[0] << ')'; + return s.str(); +} + +Dim AverageColumns::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1 || xs.size() == 2, "Failed input count check in AverageColumns"); + int bd = (xs.size() == 1 ? xs[0].bd : max(xs[0].bd, xs[1].bd)); + return Dim({xs[0].rows()}, bd); +} + +#endif + +template +void AverageColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in AverageColumns"); + unsigned cols = xs[0]->d.cols(); +#ifdef __CUDACC__ + // The reduction used on CPU is better, but not implemented in GPU + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().chip<1>(0); + for(unsigned i = 1; i < cols; ++i) + fx.t<1>().device(*dev.edevice) += xs[0]->t<2>().chip<1>(i); + fx.t<1>().device(*dev.edevice) = fx.t<1>() / (float)cols; +#else + const Eigen::array reduction_axis = {1}; + fx.t<1>().device(*dev.edevice) = xs[0]->t<2>().sum(reduction_axis) / (float)cols; +#endif +} + +template +void AverageColumns::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + const Eigen::array broadcasts = {1, xs[0]->d[1]}; + dEdxi.t<2>().device(*dev.edevice) += (dEdf.t<2>() / (float)xs[0]->d[1]).broadcast(broadcasts); +} +DYNET_NODE_INST_DEV_IMPL(AverageColumns) + +// ************* MomentElements ************* + +#ifndef __CUDACC__ + +string MomentElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_elems( expression=" << arg_names[0] << ", order=" << order << " )"; + return s.str(); +} + +Dim MomentElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentElements") + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentElements (recieved "< +void MomentElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentElements::forward"); + Eigen::array red_axis; red_axis[0] = 0; + if(order == 1) + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis) / (float) xs[0]->d.batch_size(); + else if (order == 2) + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.batch_size(); + else + fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().pow(order).sum(red_axis) / (float) xs[0]->d.batch_size(); +} + +template +void MomentElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentElements::backward"); + Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; + if (order == 1) + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.batch_size(); + else if (order == 2) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.batch_size()); + else if (order == 3) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.batch_size()); + else + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.batch_size()); +} +DYNET_NODE_INST_DEV_IMPL(MomentElements) + +// ************* MomentDimension ************* + +#ifndef __CUDACC__ + +string MomentDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_dim(expression=" << arg_names[0] << ',' << dimension << ", order="<& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in MomentDimension"); + DYNET_ARG_CHECK(xs[0].nd <= 3, "MomentDimension implemented up to tensors of order 3 (with minibatch) for now") + DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in MomentDimension" ) + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentDimension (recieved "< +void MomentDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in MomentDimension"); + Eigen::array reduction_axis = {(int)dimension}; + float n = (float) xs[0]->d[dimension]; + if(order == 1) + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().sum(reduction_axis) / n; + else if (order == 2) + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().square().sum(reduction_axis) / n; + else + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().pow(order).sum(reduction_axis) / n; +} + +template +void MomentDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentDimension::backward"); + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + float n = (float) xs[0]->d[dimension]; + if (order == 1) + dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<2>().reshape(morph).broadcast(bcast) / n; + else if (order == 2) + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>()) * ( 2.f / n); + else if (order == 3) + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().square()) * ( 3.f / n); + else + dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().pow(order - 1)) * ( (float) order / n); +} +DYNET_NODE_INST_DEV_IMPL(MomentDimension) + +// ************* MomentBatches ************* + +#ifndef __CUDACC__ + +string MomentBatches::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_batches( expression=" << arg_names[0] << ", order=" << order << " )"; + return s.str(); +} + +Dim MomentBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in MomentBatches") + DYNET_ARG_CHECK(order>= 1, "Order of moment should be >=1 in MomentBatches (recieved "< +void MomentBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentBatches::forward"); + Eigen::array red_axis; red_axis[0] = 1; + if(order == 1) + fx.tvec().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis) / (float) xs[0]->d.bd; + else if (order == 2) + fx.tvec().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.bd; + else + fx.tvec().device(*dev.edevice) = xs[0]->tbvec().pow(order).sum(red_axis) / (float) xs[0]->d.bd; +} + +template +void MomentBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentBatches::backward"); + Eigen::array bcast = {1, (int)xs[0]->d.bd}; + if (order == 1) + dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.bd; + else if (order == 2) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.bd); + else if (order == 3) + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.bd); + else + dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.bd); +} +DYNET_NODE_INST_DEV_IMPL(MomentBatches) + +// ************* StdElements ************* + +#ifndef __CUDACC__ + +string StdElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "std_elems( expression=" << arg_names[0] << " )"; + return s.str(); +} + +Dim StdElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdElements") + return Dim({1}, xs[0].bd); +} + +#endif + +template +void StdElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdElements::forward"); + Eigen::array red_axis = {0}; + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + Eigen::array newaxis = {1, xs[0]->d.bd}; + float n = (float) xs[0]->d.batch_size(); + fx.tb<0>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in StdElements::backward"); + Eigen::array bcast = {xs[0]->d.batch_size(), 1}; + Eigen::array newaxis = {1, xs[0]->d.bd}; + Eigen::array red_axis = {0}; + float n = (float) xs[0]->d.batch_size(); + dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdElements) + +// ************* StdDimension ************* + +#ifndef __CUDACC__ + +string StdDimension::as_string(const vector& arg_names) const { + ostringstream s; + s << "moment_dim(expression=" << arg_names[0] << ',' << dimension <<'}'; + return s.str(); +} + +Dim StdDimension::dim_forward(const vector& xs) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in StdDimension"); + DYNET_ARG_CHECK(xs[0].nd <= 3, "StdDimension implemented up to tensors of order 3 (with minibatch) for now") + DYNET_ARG_CHECK(dimension < xs[0].nd, "dimension " << dimension << " is out of bounds of tensor of order " << xs[0].nd << " in StdDimension" ) + Dim ret(xs[0]); + ret.delete_dim(dimension); + return ret; +} + +#endif + +template +void StdDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed input count check in StdDimension"); + Eigen::array red_axis = {(int)dimension}; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + float n = (float) xs[0]->d[dimension]; + fx.tb<2>().device(*dev.edevice) = ((xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdDimension::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in StdDimension::backward"); + Eigen::array red_axis = {(int)dimension}; + Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; + Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; + float n = (float) xs[0]->d[dimension]; + dEdxi.tb<3>().device(*dev.edevice) += (2 / n) * (xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)) * (fx.tb<2>().binaryExpr(dEdf.tb<2>(), FSqrtBackward())).reshape(morph).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdDimension) + +// ************* StdBatches ************* + +#ifndef __CUDACC__ + +string StdBatches::as_string(const vector& arg_names) const { + ostringstream s; + s << "std_batches( expression=" << arg_names[0] << " )"; + return s.str(); +} + +Dim StdBatches::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in StdBatches") + + return xs[0].single_batch(); +} + +#endif + +template +void StdBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdBatches::forward"); + Eigen::array red_axis = {1}; + Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; + Eigen::array bcast = {1, xs[0]->d.bd}; + float n = (float)xs[0]->d.bd; + fx.t<1>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); +} + +template +void StdBatches::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i < 1, "Failed dimension check in StdBatches::backward"); + Eigen::array red_axis = {1}; + Eigen::array bcast = {1, xs[0]->d.bd}; + Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; + float n = (float)xs[0]->d.bd; + dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); + +} +DYNET_NODE_INST_DEV_IMPL(StdBatches) + +} diff --git a/dynet/nodes-normalization.cc b/dynet/nodes-normalization.cc new file mode 100644 index 000000000..d4faacb9d --- /dev/null +++ b/dynet/nodes-normalization.cc @@ -0,0 +1,54 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* WeightNormalization ************* + +#ifndef __CUDACC__ + +string WeightNormalization::as_string(const vector& arg_names) const { + ostringstream s; + s << "weight_norm(" << arg_names[0] << ", " << arg_names[1] << ')'; + return s.str(); +} + +Dim WeightNormalization::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 2, "Failed input count check in WeightNormalization"); + DYNET_ARG_CHECK(1 == xs[1].size()," Size of gain parameter in WeightNormalization should be 1, received " << xs[1].size()); + return xs[0]; +} + +#endif + +template +void WeightNormalization::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 2, "Failed dimension check in WeightNormalization::forward"); + Eigen::array red_axis = {0}; + Eigen::array bcast = {xs[0]->d.size()}; + Eigen::array morph = {1}; + fx.tvec().device(*dev.edevice) = (xs[0]->tvec() / xs[0]->tvec().square().sum(red_axis).sqrt().reshape(morph).broadcast(bcast)) * as_scalar(*xs[1]); +} + +template +void WeightNormalization::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Eigen::array red_axis = {0}; + Eigen::array bcast = {xs[0]->d.size()}; + Eigen::array morph = {1}; + if (i==0){ + dEdxi.tvec().device(*dev.edevice) += (dEdf.tvec() / xs[0]->tvec().square().sum(red_axis).sqrt().reshape(morph).broadcast(bcast)) * as_scalar(*xs[1]) - fx.tvec() * (((dEdf.tvec() * xs[0]->tvec()).sum(red_axis)) / xs[0]->tvec().square().sum(red_axis)).reshape(morph).broadcast(bcast); + }else{ + dEdxi.t<0>().device(*dev.edevice) += ((dEdf.tvec() * xs[0]->tvec()).sum(red_axis)) / xs[0]->tvec().square().sum(red_axis).sqrt(); + } +} +DYNET_NODE_INST_DEV_IMPL(WeightNormalization) + +} diff --git a/dynet/nodes-random.cc b/dynet/nodes-random.cc new file mode 100644 index 000000000..9e221112b --- /dev/null +++ b/dynet/nodes-random.cc @@ -0,0 +1,184 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* GaussianNoise ************* + +#ifndef __CUDACC__ + +string GaussianNoise::as_string(const vector& arg_names) const { + ostringstream s; + s << arg_names[0] << " + N(0," << stddev << ')'; + return s.str(); +} + +Dim GaussianNoise::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in GaussianNoise") + return xs[0]; +} + +size_t GaussianNoise::aux_storage_size() const { + return dim.size() * sizeof(float); +} + +#endif + +template +void GaussianNoise::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); + TensorTools::randomize_normal(m, 0, stddev); + fx.tvec().device(*dev.edevice) = xs[0]->tvec() + m.tvec(); +} + +template +void GaussianNoise::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); +} +DYNET_NODE_INST_DEV_IMPL(GaussianNoise) + +// ************* RandomNormal ************* + +#ifndef __CUDACC__ + +string RandomNormal::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_normal(" << dim << ')'; + return s.str(); +} + +Dim RandomNormal::dim_forward(const vector& xs) const { + return dim; +} + +#endif + +template +void RandomNormal::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomNormal::forward"); + TensorTools::randomize_normal(fx); +} + +template +void RandomNormal::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomNormal) + +// ************* RandomBernoulli ************* + +#ifndef __CUDACC__ + +string RandomBernoulli::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_bernoulli(" << dim << ", " << p << ')'; + return s.str(); +} + +Dim RandomBernoulli::dim_forward(const vector& xs) const { + return dim; +} + +#endif + +template +void RandomBernoulli::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomBernoulli::forward"); + TensorTools::randomize_bernoulli(fx, p, scale); +} + +template +void RandomBernoulli::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomBernoulli) + +// ************* RandomUniform ************* + +#ifndef __CUDACC__ + +string RandomUniform::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_uniform(" << dim << ", " << left << ", " << right << ')'; + return s.str(); +} + +Dim RandomUniform::dim_forward(const vector& xs) const { + return dim; +} + +#endif + +template +void RandomUniform::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomUniform::forward"); + TensorTools::randomize_uniform(fx, left, right); +} + +template +void RandomUniform::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomUniform) + +// ************* RandomGumbel ************* + +#ifndef __CUDACC__ + +string RandomGumbel::as_string(const vector& arg_names) const { + ostringstream s; + s << "random_gumbel(" << dim << ", " << mu << ", " << beta << ')'; + return s.str(); +} + +Dim RandomGumbel::dim_forward(const vector& xs) const { + return dim; +} + +#endif + +template +void RandomGumbel::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomGumbel::forward"); + DYNET_ARG_CHECK(mu == 0.0 && beta == 1.0, "RandomGumbel only supports Gumbel(0,1) at the moment (pull requests welcome)"); + TensorTools::randomize_uniform(fx, 0, 1); + float eps = 1e-20; + fx.tvec().device(*dev.edevice) = -(-fx.tvec().cwiseMax(eps).log()).cwiseMax(eps).log(); +} + +template +void RandomGumbel::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); +} +DYNET_NODE_INST_DEV_IMPL(RandomGumbel) + + +} diff --git a/dynet/nodes-select.cc b/dynet/nodes-select.cc new file mode 100644 index 000000000..f8760b4c7 --- /dev/null +++ b/dynet/nodes-select.cc @@ -0,0 +1,333 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" + +using namespace std; + +namespace dynet { + +// ************* SelectRows ************* + +#ifndef __CUDACC__ + +string SelectRows::as_string(const vector& arg_names) const { + ostringstream s; + s << "select_rows(" << arg_names[0] << ", {rsize=" << prows->size() << "})"; + return s.str(); +} + +Dim SelectRows::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Bad arguments in SelectRows: " << xs); + unsigned nrows = prows->size(); + Dim ret(xs[0]); + ret.d[0] = nrows; + return ret; +} + +#endif + +template +void SelectRows::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::forward"); + auto& rm = *prows; + for (unsigned i = 0; i < rm.size(); ++i) { + DYNET_ARG_CHECK(rm[i] < xs[0]->d.rows(), + "Out-of-bounds index " << rm[i] << " in SelectRows over expression of dimensions " << xs[0]->d); + fx.t<4>().chip<0>(i).device(*dev.edevice) = xs[0]->t<4>().chip<0>(rm[i]); + } +} + +template +void SelectRows::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::backward"); + auto& rm = *prows; + for (unsigned i = 0; i < rm.size(); ++i) + dEdxi.t<4>().chip<0>(rm[i]).device(*dev.edevice) += dEdf.t<4>().chip<0>(i); +} +DYNET_NODE_INST_DEV_IMPL(SelectRows) + +// ************* SelectCols ************* + +#ifndef __CUDACC__ + +string SelectCols::as_string(const vector& arg_names) const { + ostringstream s; + s << "select_cols(" << arg_names[0] << ", {csize=" << pcols->size() << "})"; + return s.str(); +} + +Dim SelectCols::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && xs[0].ndims() == 2, "Bad arguments in SelectCols: " << xs); + unsigned ncols = pcols->size(); + return Dim({xs[0].rows(), ncols}); +} + +#endif + +template +void SelectCols::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::forward"); + auto& rm = *pcols; + for (unsigned i = 0; i < rm.size(); ++i) { + DYNET_ARG_CHECK(rm[i] < xs[0]->d.cols(), + "Out-of-bounds index " << rm[i] << " in SelectCols over expression of dimensions " << xs[0]->d); + fx.t<2>().chip<1>(i).device(*dev.edevice) = xs[0]->t<2>().chip<1>(rm[i]); + } +} + +template +void SelectCols::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::backward"); + auto& rm = *pcols; + for (unsigned i = 0; i < rm.size(); ++i) + dEdxi.t<2>().chip<1>(rm[i]).device(*dev.edevice) += dEdf.t<2>().chip<1>(i); +} +DYNET_NODE_INST_DEV_IMPL(SelectCols) + +// ************* PickElement ************* + +#ifndef __CUDACC__ + +string PickElement::as_string(const vector& arg_names) const { + ostringstream s; + s << "pick(" << arg_names[0] << ','; + if(pval) { + s << *pval; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickElement"); + s << '['; + if(pvals->size()) { + s << (*pvals)[0]; + for(size_t i = 1; i < pvals->size(); ++i) + s << ',' << (*pvals)[i]; + } + s << "]"; + } + s << ", " << dimension << ")"; + return s.str(); +} + +Dim PickElement::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickElement"); + DYNET_ARG_CHECK(dimension < xs[0].nd, + "Tried to PickElement on dimension " << dimension << " bigger than input " << xs[0]); + DYNET_ARG_CHECK(xs[0].nd < 4, + "PickElement not currently supported for tensors of 4 or more dimensions."); + + Dim ret(xs[0]); + if (pvals){ + DYNET_ARG_CHECK(xs[0].bd == 1 || xs[0].bd == pvals->size(), + "Number of elements in the passed-in index vector (" << pvals->size() << ")" + " did not match number of elements in mini-batch elements in expression (of dimension " << xs[0].bd << ") in PickElement"); + ret.bd = pvals->size(); + } + + ret.delete_dim(dimension); + return ret; +} + +#endif + +// x_1 is a vector +// y = (x_1)_{*pval} +template +void PickElement::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if(pval) { + DYNET_ARG_CHECK(*pval < xs[0]->d[dimension], + "PickElement::forward_impl requested element " << *pval << " from a dimension of length " << xs[0]->d[dimension]); + // TODO: This limit of up to 4 is somewhat arbitrary. We need to decide how to handle + // things with "maximum tensor size". + fx.tb<3>().device(*dev.edevice) = xs[0]->tb<4>().chip(*pval, dimension); + } else { + DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickElement::forward"); + DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), + "In PickElement::forward, number of elements in the passed-in index vector (" << pvals->size() << ")" + " did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); + for(unsigned b = 0; b < pvals->size(); ++b) { + DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d[dimension], + "PickElement::forward_impl requested element " << (*pvals)[b] << " from a dimension of length " << xs[0]->d[dimension]); + if(xs[0]->d.bd == 1){ + fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->t<3>().chip((*pvals)[b], dimension); + }else{ + fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->tb<3>().chip<3>(b).chip((*pvals)[b], dimension); + } + } + } +} + +// derivative is 0 in all dimensions except 1 for the selected element +template +void PickElement::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ARG_CHECK(i == 0, "Failed dimension check in PickElement::backward"); + if(pval) { + dEdxi.tb<3>().chip(*pval, dimension).device(*dev.edevice) += dEdf.tb<2>(); + } else { + DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickElement::forward"); + for(unsigned b = 0; b < pvals->size(); ++b){ + if(xs[0]->d.bd == 1){ + dEdxi.t<3>().chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); + }else{ + dEdxi.tb<3>().chip<3>(b).chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); + } + } + } +} +DYNET_NODE_INST_DEV_IMPL(PickElement) + +// ************* PickRange ************* + +#ifndef __CUDACC__ + +// x_1 is a vector +// y = (x_1)[start:end] +string PickRange::as_string(const vector& arg_names) const { + ostringstream s; + s << "slice(" << arg_names[0] << ',' << start << ':' << end << ", dim=" << dim << ')'; + return s.str(); +} + +Dim PickRange::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickRange"); + DYNET_ARG_CHECK(dim < xs[0].nd && start < end && xs[0][dim] >= end, + "Bad input dimensions or range in PickRange: " << xs << " range(" << start << ", " << end << ") with dim=" << dim); + Dim ret = xs[0]; ret.d[dim] = end-start; + return ret; +} + +int PickRange::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { + Sig s(nt::pickrange); + const Dim &in_dim = cg.nodes[args[0]]->dim; + s.add_dim(in_dim); + s.add_node(start); + s.add_node(end); + return sm.get_idx(s); +} + +#endif + +// x_1 is a matrix +// y = (x_1)[start:end] +// slice of matrix from index start (inclusive) to index end (exclusive) +template +void PickRange::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + Eigen::DSizes indices(0,0,0,0,0); + indices[dim] = start; + Eigen::DSizes sizes(static_cast(fx.d[0]), + static_cast(fx.d[1]), + static_cast(fx.d[2]), + static_cast(fx.d[3]), + static_cast(fx.d.bd)); + sizes[dim] = end-start; + fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().slice(indices, sizes); +} + +// derivative is 0 in all dimensions except the slice range +template +void PickRange::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Eigen::DSizes indices(0,0,0,0,0); + indices[dim] = start; + Eigen::DSizes sizes(static_cast(fx.d[0]), + static_cast(fx.d[1]), + static_cast(fx.d[2]), + static_cast(fx.d[3]), + static_cast(fx.d.bd)); + sizes[dim] = end-start; + dEdxi.tb<4>().slice(indices, sizes).device(*dev.edevice) += dEdf.tb<4>(); +} +DYNET_NODE_INST_DEV_IMPL(PickRange) + +// ************* PickBatchElements ************* + +#ifndef __CUDACC__ + +string PickBatchElements::as_string(const vector& arg_names) const { + ostringstream s; + s << "pick_batch_elems(" << arg_names[0] << ','; + if (pval) { + s << *pval; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); + s << '['; + if (pvals->size()) { + s << (*pvals)[0]; + for (size_t i = 1; i < pvals->size(); ++i) + s << ',' << (*pvals)[i]; + } + s << "]"; + } + s << ")"; + return s.str(); +} + +Dim PickBatchElements::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in PickBatchElements") + DYNET_ARG_CHECK(xs[0].nd < 4, "PickElement not currently supported for tensors of 4 or more dimensions."); + Dim ret(xs[0]); + if (pval) { + // set batch size to one. + ret.bd = 1; + } else { + DYNET_ASSERT(pvals, "Have neither index nor index vector in PickBatchElements"); + ret.bd = pvals->size(); + } + return ret; +} + +#endif + +template +void PickBatchElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (pval) { + fx.tvec().device(*dev.edevice) = xs[0]->tbvec().chip<1>(*pval); + } else { + DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickBatchElements::forward"); + DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), + "In PickBatchElements::forward, number of elements in the passed-in index vector (" << pvals->size() << ") " + "did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); + for (unsigned b = 0; b < pvals->size(); ++b) { + DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d.bd, + "PickBatchElements::forward_impl requested element " << (*pvals)[b] << " from a batch size of " << xs[0]->d.bd); + fx.tbvec().chip<1>(b).device(*dev.edevice) = xs[0]->tbvec().chip<1>((*pvals)[b]); + } + } +} + +template +void PickBatchElements::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in PickBatchElements::backward"); + if (pval) { + dEdxi.tbvec().chip<1>(*pval).device(*dev.edevice) += dEdf.tvec(); + } else { + DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickBatchElements::backward"); + for (unsigned b = 0; b < pvals->size(); ++b) + dEdxi.tbvec().chip<1>((*pvals)[b]).device(*dev.edevice) += dEdf.tbvec().chip<1>(b); + } +} +DYNET_NODE_INST_DEV_IMPL(PickBatchElements) + +} diff --git a/dynet/nodes-softmaxes.cc b/dynet/nodes-softmaxes.cc new file mode 100644 index 000000000..e8b672757 --- /dev/null +++ b/dynet/nodes-softmaxes.cc @@ -0,0 +1,362 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/functors.h" + +using namespace std; + +namespace dynet { + +// ************* Softmax ************* + +#ifndef __CUDACC__ + +string Softmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Softmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Softmax"); + DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in Softmax, must be 2 or fewer: " << xs); + return xs[0]; +} + +int Softmax::autobatch_sig(const ComputationGraph & cg, SigMap &sm) const { + Sig s(nt::softmax); + s.add_dim(dim); + return sm.get_idx(s); +} + +std::vector Softmax::autobatch_concat(const ComputationGraph & cg) const { + return vector(1, 1); +} + +size_t Softmax::aux_storage_size() const { + return 2 * dim.size() / dim.rows() * sizeof(float); +} + +#endif + +template +void Softmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Softmax::forward"); + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); + TensorTools::logsumexp_dev(dev, *xs[0], m, z); + // TODO? Is this broadcast efficient on CPU? + Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + fx.tb<2>().device(*dev.edevice) = (xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts)).exp(); +} + +template +void Softmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor z(Dim({fx.d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + // TODO? Is this broadcast efficient on CPU? + Eigen::array red_axis = {0}; + z.tb<1>().device(*dev.edevice) = (fx.tb<2>() * dEdf.tb<2>()).sum(red_axis); + Eigen::array bcast = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + dEdxi.tb<2>().device(*dev.edevice) += (dEdf.tb<2>() - z.tvec().reshape(morph).broadcast(bcast)) * fx.tb<2>(); +} +DYNET_NODE_INST_DEV_IMPL(Softmax) + +// ************* LogSoftmax ************* + +#ifndef __CUDACC__ + +string LogSoftmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "log_softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim LogSoftmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in LogSoftmax") + DYNET_ARG_CHECK(xs[0].nd <= 2, "Bad input dimensions in LogSoftmax, must be 2 or fewer: " << xs); + return xs[0]; +} + +size_t LogSoftmax::aux_storage_size() const { + return 2 * dim.size() / dim.rows() * sizeof(float); +} + +#endif + +template +void LogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogSoftmax::forward"); + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); + TensorTools::logsumexp_dev(dev, *xs[0], m, z); + if(fx.d.size() == fx.d.rows()) { +#ifdef __CUDACC__ + Eigen::array bcast; + bcast[0] = xs[0]->d[0]; + fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - z.t<1>().broadcast(bcast); +#else + fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - as_scalar(z); +#endif + } else { + // TODO? Is this broadcast efficient on CPU? + Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts); + } +} + +template +void LogSoftmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); + // TODO? Is this broadcast efficient on CPU? + Eigen::array red_axis; red_axis[0] = 0; + z.tb<1>().device(*dev.edevice) = dEdf.tb<2>().sum(red_axis); + Eigen::array bcast = {(int)fx.d.rows(), 1, 1}; + Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; + dEdxi.tb<2>().device(*dev.edevice) += fx.tb<2>().exp() * -z.tvec().reshape(morph).broadcast(bcast) + dEdf.tb<2>(); +} +DYNET_NODE_INST_DEV_IMPL(LogSoftmax) + +// ************* RestrictedLogSoftmax ************* + +#ifndef __CUDACC__ + +string RestrictedLogSoftmax::as_string(const vector& arg_names) const { + ostringstream s; + s << "r_log_softmax(" << arg_names[0] << ')'; + return s.str(); +} + +Dim RestrictedLogSoftmax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in RestrictedLogSoftmax") + DYNET_ARG_CHECK(LooksLikeVector(xs[0]), "Bad input dimensions in RestrictedLogSoftmax: " << xs); + return xs[0]; +} + +template +EIGEN_STRONG_INLINE real logsumexp(const T& x, const vector& denom) { + real m = x(denom[0],0); + for (auto i : denom) { + real r = x(i,0); + if (r > m) m = r; + } + real z = 0; + for (auto i : denom) + z += expf(x(i,0) - m); + return m + logf(z); +} + +#endif + +template +void RestrictedLogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + DYNET_ASSERT(xs.size() == 1, "Failed dimension check in RestrictedLogSoftmax"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); +#else + // TODO create auxiliary mask with -infty's + // and do usual LogSoftmax stuff + if(denom.size() == 0) + DYNET_INVALID_ARG("Number of elements in denominator of RestrictedLogSoftmax::forward must be zero"); + auto x = **xs[0]; + if(denom.size() == 0) + DYNET_RUNTIME_ERR("RestrictedLogSoftmax currently only supports single column expressions (contributions expanding support to multiple columns welcome!)"); + const real logz = logsumexp(x, denom); + TensorTools::constant(fx, -numeric_limits::infinity()); + for (auto i : denom) + (*fx)(i,0) = x(i,0) - logz; + if (denom.size() == 1) (*fx)(denom.front(), 0) = 0; +#endif +} + +template +void RestrictedLogSoftmax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + DYNET_ASSERT(i == 0, "Failed dimension check in RestrictedLogSoftmax"); +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); +#else + float z = 0; + for (auto ind : denom) + z += (*dEdf)(ind, 0); + for (auto ind : denom) + (*dEdxi)(ind, 0) += (*dEdf)(ind, 0) - expf((*fx)(ind, 0)) * z; +#endif +} +DYNET_NODE_INST_DEV_IMPL(RestrictedLogSoftmax) + +// ************* Sparsemax ************* + +#define MAX_SPARSEMAX_LOSS_ROWS 65536 + +#ifndef __CUDACC__ + +string Sparsemax::as_string(const vector& arg_names) const { + ostringstream s; + s << "sparsemax(" << arg_names[0] << ")"; + return s.str(); +} + +Dim Sparsemax::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in Sparsemax: " << xs); + return xs[0]; +} + +size_t Sparsemax::aux_storage_size() const { + return (dim.size() + 1) * sizeof(float); +} + +#endif + +template +void Sparsemax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs[0]->d.cols() == 1) { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); +#else + const unsigned rows = xs[0]->d.rows(); + float *zs = static_cast(aux_mem); + std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); + float sum = 0, maxsum = 0; + unsigned k = 0; + for (k = 0; k < rows; ++k) { + sum += zs[k]; + float t = 1 + (k + 1) * zs[k]; + if (t <= sum) break; + maxsum = sum; + } + float tau = (maxsum - 1) / k; + auto y = *fx; + fx.tvec() = (xs[0]->tvec() - tau).cwiseMax(0.f); + int c = 1; + int *cc = static_cast(aux_mem); + for (unsigned i = 0; i < rows; ++i) + if (y(i,0) > 0.f) cc[c++] = i; + cc[0] = c - 1; +#endif + } else { + DYNET_RUNTIME_ERR("Sparsemax not yet implemented for multiple columns"); + } +} + +template +void Sparsemax::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); +#else + const int ssize = static_cast(aux_mem)[0]; + int *support = static_cast(aux_mem) + 1; + float dhat = 0; + auto& d = *dEdf; + for (int i = 0; i < ssize; ++i) + dhat += d(support[i], 0); + dhat /= ssize; + for (int i = 0; i < ssize; ++i) + (*dEdxi)(support[i], 0) += d(support[i], 0) - dhat; +#endif +} +DYNET_NODE_INST_DEV_IMPL(Sparsemax) + +// ************* SparsemaxLoss ************* + +#ifndef __CUDACC__ + +string SparsemaxLoss::as_string(const vector& arg_names) const { + ostringstream s; + s << "sparsemax(" << arg_names[0] << ", q)"; + return s.str(); +} + +Dim SparsemaxLoss::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1 && LooksLikeVector(xs[0]), "Bad input dimensions in SparsemaxLoss: " << xs); + return Dim({1}); +} + +size_t SparsemaxLoss::aux_storage_size() const { + // first dim.size dimensions is the sparsemax + const unsigned rows = MAX_SPARSEMAX_LOSS_ROWS; // this should be xs[0]->d.rows() + return rows * sizeof(float); +} + +#endif + +template +void SparsemaxLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + if (xs[0]->d.cols() == 1) { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); +#else + const int rows = xs[0]->d.rows(); + if (rows > MAX_SPARSEMAX_LOSS_ROWS) + DYNET_RUNTIME_ERR("MAX_SPARSEMAX_LOSS_ROWS is not sufficient. Recompile with larger value."); + const unsigned qsupport_size = pq->size(); + const float qprop = 1.f / qsupport_size; + + float *zs = static_cast(aux_mem); + std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); + float sum = 0, maxsum = 0; + int k = 0; + for (k = 0; k < rows; ++k) { + sum += zs[k]; + float t = 1 + (k + 1) * zs[k]; + if (t <= sum) break; + maxsum = sum; + } + float tau = (maxsum - 1) / k; + Tensor tsm(xs[0]->d, (float*)aux_mem, xs[0]->device, DeviceMempool::FXS); + tsm.t<1>() = (xs[0]->t<1>() - tau).cwiseMax(0.f); + fx.t<0>() = ( (tsm.t<1>() != 0.f).cast() * (xs[0]->t<1>().square() - (tau * tau)) ).sum(); + fx.t<0>() = ( fx.t<0>() + qprop * qprop * qsupport_size ) / 2.f; + for (unsigned i = 0; i < qsupport_size; ++i) + fx.t<0>() = fx.t<0>() - xs[0]->t<1>().chip<0>((*pq)[i]) * qprop; + fx.t<0>() = fx.t<0>().cwiseMax(0.f); +#endif + } else { + DYNET_RUNTIME_ERR("SparsemaxLoss not yet implemented for multiple columns"); + } +} + +template +void SparsemaxLoss::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { +#ifdef __CUDACC__ + DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); +#else + const float d = dEdf.v[0]; + float* psm = static_cast(aux_mem); + float dqprop = d / pq->size(); + Tensor tsm(xs[0]->d, psm, xs[0]->device, DeviceMempool::FXS); + auto sm = *tsm; // sparsemax(z) + *dEdxi += sm * d; + for (unsigned i = 0; i < pq->size(); ++i) + (*dEdxi)((*pq)[i], 0) -= dqprop; +#endif +} +DYNET_NODE_INST_DEV_IMPL(SparsemaxLoss) + +} diff --git a/dynet/nodes-trig.cc b/dynet/nodes-trig.cc new file mode 100644 index 000000000..c5965879b --- /dev/null +++ b/dynet/nodes-trig.cc @@ -0,0 +1,43 @@ +#include "dynet/nodes.h" + +#include "dynet/nodes-macros.h" +#include "dynet/simd-functors.h" + +using namespace std; + +namespace dynet { + +// ************* ************* + +#ifndef __CUDACC__ + +string Tanh::as_string(const vector& arg_names) const { + ostringstream s; + s << "tanh(" << arg_names[0] << ')'; + return s.str(); +} + +Dim Tanh::dim_forward(const vector& xs) const { + DYNET_ARG_CHECK(xs.size() == 1, "Failed input count check in Tanh") + return xs[0]; +} + +#endif + +template +void Tanh::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { + fx.tvec().device(*dev.edevice) = xs[0]->tvec().tanh(); +} + +template +void Tanh::backward_dev_impl(const MyDevice & dev, + const vector& xs, + const Tensor& fx, + const Tensor& dEdf, + unsigned i, + Tensor& dEdxi) const { + dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_tanh_backward_op()); +} +DYNET_NODE_INST_DEV_IMPL(Tanh) + +} diff --git a/dynet/nodes.cc b/dynet/nodes.cc deleted file mode 100644 index 4b4a06a6c..000000000 --- a/dynet/nodes.cc +++ /dev/null @@ -1,2200 +0,0 @@ -#include "dynet/nodes.h" - -#include -#include -#include - -#include "dynet/simd-functors.h" -#include "dynet/functors.h" -#include "dynet/nodes-macros.h" -#include "dynet/globals.h" - -#ifdef __CUDACC__ -#include "dynet/cuda.h" -#include "dynet/gpu-ops.h" -#endif - -using namespace std; - -inline string print_vec(const std::vector & vec) { - string sep = "["; - ostringstream oss; - for(auto f : vec) { - oss << sep << f; sep = ","; - } - oss << "]"; - return oss.str(); -} - -// notes on implementing differentiable components -// 1) fx can be understood as a pointer to the (preallocated) location for the result -// of forward to be stored -// 2) fx is not initialized, so after calling forward fx must point to the correct answer -// 3) fx can be repointed to an input, if forward(x) evaluates to x (e.g., in reshaping) -// 4) dEdxi MUST **ACCUMULATE** a result since multiple calls to forward may depend on -// the same x_i. Even, e.g., Identity must be implemented as -// dEdx1 += dEdf. THIS IS EXTREMELY IMPORTANT -// 5) scalars results of forward are placed in fx.v[0] -// 6) DYNET manages its own memory, not Eigen, and it is configured with the -// EIGEN_NO_MALLOC option. If you get an error about Eigen attempting to allocate -// memory, it is (probably) because of an implicit creation of a temporary variable. -// To tell Eigen this is not necessary, the noalias() method is available. If you really -// do need a temporary variable, its capacity must be requested by Node::aux_storage_size -// -// notes on debugging problems with differentiable components -// 1) fx is uninitialized when forward is called- are you relying on it being 0? -// 2) dEdxi must accummulate (see point 4 above!) -// - -namespace dynet { - -// ======= Shared definitions -#define MAX_LOG_SUM_EXP 65536 -#define MAX_SPARSEMAX_LOSS_ROWS 65536 - -// ======= Functions to be compiled on only CPU -#ifndef __CUDACC__ - -// set use_cholesky if M is symmetric - it's faster and more stable -// for dep paring it won't be -template -inline typename MatrixType::Scalar logdet(const MatrixType& M, bool use_cholesky = false) { - using namespace Eigen; - using std::log; - typedef typename MatrixType::Scalar Scalar; - Scalar ld = 0; - if (use_cholesky) { - LLT> chol(M); - auto& U = chol.matrixL(); - for (unsigned i = 0; i < M.rows(); ++i) - ld += log(U(i,i)); - ld *= 2; - } else { - PartialPivLU> lu(M); - auto& LU = lu.matrixLU(); - Scalar c = lu.permutationP().determinant(); // -1 or 1 - for (unsigned i = 0; i < LU.rows(); ++i) { - const auto& lii = LU(i,i); - if (lii < Scalar(0)) c *= -1; - ld += log(abs(lii)); - } - ld += log(c); - } - return ld; -} - -template -EIGEN_STRONG_INLINE real logsumexp(const T& x, const vector& denom) { - real m = x(denom[0],0); - for (auto i : denom) { - real r = x(i,0); - if (r > m) m = r; - } - real z = 0; - for (auto i : denom) - z += expf(x(i,0) - m); - return m + logf(z); -} - -// ===== Auxiliary functions - -size_t BlockDropout::aux_storage_size() const { - // we just need to remember whether this entire block is turned on (1.0) or off (0.0) - return 1 * sizeof(float); -} - -size_t Dropout::aux_storage_size() const { - return dim.size() * sizeof(float); -} - -size_t DropoutDim::aux_storage_size() const { - return (dim.size() / dim[dimension]) * sizeof(float); -} - -size_t DropoutBatch::aux_storage_size() const { - return dim.batch_elems() * sizeof(float); -} - -size_t GaussianNoise::aux_storage_size() const { - return dim.size() * sizeof(float); -} - -size_t LogSoftmax::aux_storage_size() const { - return 2 * dim.size() / dim.rows() * sizeof(float); -} - -// this i need to do something better, but this is a work-around -// if this is too small, just make it bigger -size_t LogSumExp::aux_storage_size() const { - return (MAX_LOG_SUM_EXP + 1) * sizeof(float); -} - -size_t Max::aux_storage_size() const { - return dim.size() * sizeof(float); -} - -size_t Min::aux_storage_size() const { - return dim.size() * sizeof(float); -} - -size_t Softmax::aux_storage_size() const { - return 2 * dim.size() / dim.rows() * sizeof(float); -} - -size_t Sparsemax::aux_storage_size() const { - return (dim.size() + 1) * sizeof(float); -} - -size_t SparsemaxLoss::aux_storage_size() const { - // first dim.size dimensions is the sparsemax - const unsigned rows = MAX_SPARSEMAX_LOSS_ROWS; // this should be xs[0]->d.rows() - return rows * sizeof(float); -} - -size_t MaxDimension::aux_storage_size() const { - return sizeof(Eigen::DenseIndex) * dim.size(); -} - -size_t MinDimension::aux_storage_size() const { - return sizeof(Eigen::DenseIndex) * dim.size(); -} - -#endif // Finish CPU only functions - -// ===== Functions to be compiled on both CPU and GPU - -template -void AddVectorToAllColumns::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - // Broadcasting is slow on CPU, so split codepaths -#ifdef __CUDACC__ - if(xs[0]->d.bd >= xs[1]->d.bd) { - Eigen::array bcasts = {1, (int)xs[0]->d[1], (int)(xs[0]->d.bd/xs[1]->d.bd)}; - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() + xs[1]->tb<2>().broadcast(bcasts); - } else { - DYNET_ASSERT(xs[0]->d.bd == 1, - "Bad dimensions in AddVectorToAllColumns::forward: " << xs[0]->d << ", " << xs[1]->d); - Eigen::array bcasts0 = {1, 1, (int)xs[1]->d.bd}; - Eigen::array bcasts1 = {1, (int)xs[0]->d[1], 1}; - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>().broadcast(bcasts0) + xs[1]->tb<2>().broadcast(bcasts1); - } -#else - // First, add the matrix - if(xs[0]->d.bd == fx.d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); - else - for(size_t b = 0; b < fx.d.bd; ++b) - fx.tbvec().chip<1>(b).device(*dev.edevice) = xs[0]->tvec(); - // Second, add the columns - if(xs[1]->d.bd == fx.d.bd) { - for(size_t i = 0; i < xs[0]->d[1]; ++i) - fx.tb<2>().chip<1>(i).device(*dev.edevice) += xs[1]->tb<1>(); - } else { - for(size_t b = 0; b < fx.d.bd; ++b) - for(size_t i = 0; i < fx.d[1]; ++i) - fx.tb<2>().chip<2>(b).chip<1>(i).device(*dev.edevice) += xs[1]->t<1>(); - } -#endif -} - -template -void AddVectorToAllColumns::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in AddVetorToAllColumns::backward"); - // TODO: profile on CPU and see whether the chip version is better - if (i == 0) { // x - if(dEdf.d.bd == dEdxi.d.bd) { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); - } else { - Eigen::array red_axis = {2}; - dEdxi.t<2>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); - } - } else { // bias - if(dEdf.d.bd == dEdxi.d.bd) { - Eigen::array red_axis = {1}; - dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); - } else { - DYNET_ASSERT(dEdxi.d.bd == 1, - "Bad dimensions in AddVectorToAllColumns::backward: " << xs[0]->d << ", " << xs[1]->d); - Eigen::array red_axis = {1,2}; - dEdxi.t<1>().device(*dev.edevice) += dEdf.tb<2>().sum(red_axis); - } - } -} -DYNET_NODE_INST_DEV_IMPL(AddVectorToAllColumns) - -template -void Average::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - const unsigned num_args = xs.size(); - if (num_args == 1) { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); - return; - } - if (num_args == 2 && xs[0]->d.bd == xs[1]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); - else if (num_args == 3 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); - else if (num_args == 4 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd && xs[2]->d.bd == xs[3]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); - else { - bool allSameBatchSize = std::all_of(xs.begin(), xs.end(), [&](const Tensor* x) { return x->d.bd == xs[0]->d.bd;}); - if (allSameBatchSize) { - // Since they are all the same batch size, we can easily unroll the addition (results in lower GPU latency by merging multiple adds together in one CUDA call): - DYNET_ASSERT(num_args > 4, "Bad loop unrolling in Sum::forward"); // If it was <=4, we would have handled it in the special cases above - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); - - const unsigned remainder = (num_args - 4 ) % 4; - switch (remainder) { - case 0: break; - case 1: fx.tvec().device(*dev.edevice) += xs[4]->tvec(); break; - case 2: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec(); break; - case 3: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec() + xs[6]->tvec(); break; - } - for (unsigned i = 4 + remainder; i < num_args; i += 4) - fx.tvec().device(*dev.edevice) += xs[i]->tvec() + xs[i + 1]->tvec() + xs[i + 2]->tvec() + xs[i + 3]->tvec(); - } - else { - // Not all the same batch size, so need to broadcast in the cases where they differ - TensorTools::zero(fx); -#ifdef __CUDACC__ - Eigen::array bcast({ 1, (int)fx.d.bd }); -#endif - for (unsigned i = 0; i < num_args; ++i) { - if (xs[i]->d.bd == fx.d.bd) { - fx.tvec().device(*dev.edevice) += xs[i]->tvec(); - } - else { -#ifdef __CUDACC__ - fx.tbvec().device(*dev.edevice) += xs[i]->tbvec().broadcast(bcast); -#else - for (unsigned b = 0; b < fx.d.bd; ++b) - fx.tbvec().chip<1>(b).device(*dev.edevice) += xs[i]->tvec(); -#endif - } - } - } - } - fx.tvec().device(*dev.edevice) = fx.tvec() / (float)xs.size(); -} - -template -void Average::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += (dEdf.tvec() / (float)xs.size()); -} -DYNET_NODE_INST_DEV_IMPL(Average) - -template -void Concatenate::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - unsigned curr_row = 0; - src_indices.resize(xs.size()); - Eigen::DSizes indices(0,0,0,0,0); - Eigen::DSizes sizes(fx.d[0], fx.d[1], fx.d[2], fx.d[3],static_cast(fx.d.bd)); - for (unsigned i = 0; i < xs.size(); ++i) { - indices[dimension] = src_indices[i] = curr_row; - const unsigned row_size = xs[i]->d[dimension]; - sizes[dimension] = row_size; - if(fx.d.bd == xs[i]->d.bd) { - fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>(); - } else { - Eigen::array bcast; bcast[0] = bcast[1] = bcast[2] = bcast[3] = 1; bcast[4] = fx.d.bd; - fx.tb<4>().slice(indices, sizes).device(*dev.edevice) = xs[i]->tb<4>().broadcast(bcast); - } - curr_row += row_size; - } -} - -template -void Concatenate::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < src_indices.size(), "Failed boundary check in Concatenate::backward: " << i << " >= " << src_indices.size()); - Eigen::DSizes indices(0,0,0,0,0); indices[dimension] = src_indices[i]; - Eigen::DSizes sizes(static_cast(dEdxi.d[0]), - static_cast(dEdxi.d[1]), - static_cast(dEdxi.d[2]), - static_cast(dEdxi.d[3]), - static_cast(fx.d.bd)); - if(dEdxi.d.bd == dEdf.d.bd) { - dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes); - } else { - Eigen::array red_axis; red_axis[0] = 4; - dEdxi.t<4>().device(*dev.edevice) += dEdf.tb<4>().slice(indices, sizes).sum(red_axis); - } -} -DYNET_NODE_INST_DEV_IMPL(Concatenate) - -template -void ConcatenateToBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - unsigned curr_e = 0; - src_element_indices.resize(xs.size()); - Eigen::DSizes indices(0,0); - Eigen::DSizes sizes(static_cast(fx.d.batch_size()), 0); - for (unsigned i = 0; i < xs.size(); ++i) { - indices[1] = src_element_indices[i] = curr_e; - sizes[1] = xs[i]->d.bd; - fx.tbvec().slice(indices, sizes).device(*dev.edevice) = xs[i]->tbvec(); - curr_e += xs[i]->d.bd; - } - -} - -template -void ConcatenateToBatch::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < src_element_indices.size(), "Failed boundary check in ConcatenateToBatch::backward: " << i << " >= " << src_element_indices.size()); - Eigen::DSizes indices(0, static_cast(src_element_indices[i])); - Eigen::DSizes sizes(static_cast(fx.d.batch_size()), static_cast(xs[i]->d.bd)); - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().slice(indices, sizes); -} -DYNET_NODE_INST_DEV_IMPL(ConcatenateToBatch) - -template -void BinaryLogLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.t<0>().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FBinaryLogLoss()).sum(); -} - -template -void BinaryLogLoss::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += xs[i]->tvec().binaryExpr(xs[1-i]->tvec(), FBinaryLogLossBackward(as_scalar(dEdf))); -} -DYNET_NODE_INST_DEV_IMPL(BinaryLogLoss) - -template -void BlockDropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - bernoulli_distribution distribution(1.0 - dropout_probability); - float block_multiplier = distribution(*rndeng)? 1.0 : 0.0; - block_multiplier = - dropout_probability == 1.0? 0.0 : block_multiplier / (1.0 - dropout_probability); - if (dropout_probability > 1.0 || dropout_probability < 0.0) - DYNET_INVALID_ARG("Dropout probability must be in the range [0, 1]"); - *(static_cast(aux_mem)) = block_multiplier; - fx.tvec().device(*dev.edevice) = xs[0]->tvec() * block_multiplier; -} - -template -void BlockDropout::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - float block_multiplier = *(static_cast(aux_mem)); - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * block_multiplier; -} -DYNET_NODE_INST_DEV_IMPL(BlockDropout) - -template -void ConstantMinusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_minus_op(c)); -} - -template -void ConstantMinusX::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(ConstantMinusX) - -template -void ConstantPlusX::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(const_add_op(c)); -} - -template -void ConstantPlusX::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(ConstantPlusX) - -template -void ConstScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec() * alpha; -} - -template -void ConstScalarMultiply::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i == 0, "Failed dimension check in ConstScalarMultiply"); - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * alpha; -} -DYNET_NODE_INST_DEV_IMPL(ConstScalarMultiply) - -template -void CwiseQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseQuotient::forward (cdiv)"); - if(xs[0]->d.bd == xs[1]->d.bd) { - fx.tvec().device(*dev.edevice) = xs[0]->tvec() / xs[1]->tvec(); - } else if(xs[0]->d.bd == 1) { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>().broadcast(bcast) / xs[1]->tb<1>(); - } else { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - fx.tb<1>().device(*dev.edevice) = xs[0]->tb<1>() / xs[1]->tb<1>().broadcast(bcast); - } -} - -template -void CwiseQuotient::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in CwiseQuotient::backward (cdiv)"); - if (i == 0) { - if(xs[0]->d.bd == xs[1]->d.bd) { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() / xs[1]->tvec(); - } else if(xs[1]->d.bd == 1) { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - dEdxi.tb<1>().device(*dev.edevice) += dEdf.tb<1>() / xs[1]->tb<1>().broadcast(bcast); - } else { - Eigen::array red_axis; red_axis[0] = 1; - dEdxi.t<1>().device(*dev.edevice) += (dEdf.tb<1>() / xs[1]->tb<1>()).sum(red_axis); - } - } else { // i = 1 - if(xs[0]->d.bd == xs[1]->d.bd) { - dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec() / xs[1]->tvec().square() * xs[0]->tvec(); - } else if(xs[1]->d.bd == 1) { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - Eigen::array red_axis; red_axis[0] = 1; - dEdxi.t<1>().device(*dev.edevice) -= (dEdf.tb<1>() / xs[1]->tb<1>().square().broadcast(bcast) * xs[0]->tb<1>()).sum(red_axis); - } else { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - dEdxi.tb<1>().device(*dev.edevice) -= dEdf.tb<1>() / xs[1]->tb<1>().square() * xs[0]->tb<1>().broadcast(bcast); - } - } -} -DYNET_NODE_INST_DEV_IMPL(CwiseQuotient) - -template -void CwiseMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in CwiseMultiply::forward (cmult)"); - if(xs[0]->d.bd == xs[1]->d.bd) { - fx.tvec().device(*dev.edevice) = xs[0]->tvec() * xs[1]->tvec(); - } else { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - if(xs[0]->d.bd == 1) - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast) * xs[1]->tbvec(); - else - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * xs[1]->tbvec().broadcast(bcast); - } -} - -template -void CwiseMultiply::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in CwiseMultiply::backward (cmult)"); - if(xs[0]->d.bd == xs[1]->d.bd) { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * xs[1-i]->tvec(); - } else if(xs[1-i]->d.bd == 1) { - Eigen::array bcast; bcast[0] = 1; bcast[1] = fx.d.bd; - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[1-i]->tbvec().broadcast(bcast); - } else { - Eigen::array red_axis; red_axis[0] = 1; - dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[1-i]->tbvec()).sum(red_axis); - } -} -DYNET_NODE_INST_DEV_IMPL(CwiseMultiply) - -template -void ScalarAdd::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarAdd::forward (+)"); - Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; - Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) + xs[1]->tbvec().broadcast(bcast_1); -} - -template -void ScalarAdd::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in ScalarAdd::backward (+)"); - Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; - Eigen::array red_axes_01 = {0, 1}; - if (i == 0) { - if (xs[0]->d.bd == 1) - dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_1); - else - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec(); - } else { - if (xs[1]->d.bd == 1) - dEdxi.t<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axes_01); - else - dEdxi.tb<0>().device(*dev.edevice) += dEdf.tbvec().sum(red_axis_0); - } -} -DYNET_NODE_INST_DEV_IMPL(ScalarAdd) - -template -void ScalarMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarMultiply::forward (cmult)"); - - Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; - Eigen::array bcast_1 = {1, (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) * xs[1]->tbvec().broadcast(bcast_1); -} - -template -void ScalarMultiply::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in ScalarMultiply::backward (cmult)"); - Eigen::array bcast_0 = {(int) fx.d.batch_size(), (int)( fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; - Eigen::array bcast_1 = {1, (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; - Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; - Eigen::array red_axes_01 = {0, 1}; - if (i == 0) { - if (xs[0]->d.bd == 1) - dEdxi.t<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axes_01); - else - dEdxi.tb<0>().device(*dev.edevice) += (dEdf.tbvec() * xs[1]->tbvec().broadcast(bcast_1)).sum(red_axis_0); - } else { - if (xs[1]->d.bd == 1) - dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0)).sum(red_axis_1); - else - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast_0); - } -} -DYNET_NODE_INST_DEV_IMPL(ScalarMultiply) - -template -void ScalarQuotient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in ScalarQuotient::forward (cdiv)"); - Eigen::array bcast_0 = {1, (int) (fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; - Eigen::array bcast_1 = {(int) fx.d.batch_size(), (int) (fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast_0) / xs[1]->tbvec().broadcast(bcast_1); -} - -template -void ScalarQuotient::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in ScalarQuotient::backward (cdiv)"); - Eigen::array bcast = {(int)fx.d.batch_size(), (int)(fx.d.bd == xs[1]->d.bd ? 1 : fx.d.bd)}; - Eigen::array bcast2 = {1, (int)(fx.d.bd == xs[0]->d.bd ? 1 : fx.d.bd)}; - Eigen::array red_axis_0 = {0}, red_axis_1 = {1}; - Eigen::array red_axes_01 = {0, 1}; - if (i == 0) { - if (xs[0]->d.bd == 1) - dEdxi.tvec().device(*dev.edevice) += (dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast)).sum(red_axis_1); - else - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() / xs[1]->tbvec().broadcast(bcast); - } else { - if (xs[1]->d.bd == 1) - dEdxi.t<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axes_01) / xs[1]->t<0>().square(); - else - dEdxi.tb<0>().device(*dev.edevice) += - (dEdf.tbvec() * xs[0]->tbvec().broadcast(bcast2)).sum(red_axis_0) / xs[1]->tb<0>().square(); - } -} -DYNET_NODE_INST_DEV_IMPL(ScalarQuotient) - -template -void Dropout::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); - fx.tvec().device(*dev.edevice) = xs[0]->tvec() * m.tvec(); -} - -template -void Dropout::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec() * m.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(Dropout) - - -template -void DropoutBatch::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Dim mask_dim({1},xs[0]->d.batch_elems()); - Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); - Eigen::array bcast = {xs[0]->d.batch_size(), 1}; - fx.tbvec().device(*dev.edevice) = xs[0]->tbvec() * m.tbvec().broadcast(bcast); -} - -template -void DropoutBatch::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Dim mask_dim({1},xs[0]->d.batch_elems()); - Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - Eigen::array bcast = {xs[0]->d.batch_size(), 1}; - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec() * m.tbvec().broadcast(bcast); -} -DYNET_NODE_INST_DEV_IMPL(DropoutBatch) - - -template -void DropoutDim::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Dim mask_dim(dim); - mask_dim.d[dimension]=1; - Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - TensorTools::randomize_bernoulli(m, (1.f-p), 1.f / (1.f-p)); - Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = xs[0]->d[dimension]; - fx.tb<3>().device(*dev.edevice) = xs[0]->tb<3>() * m.tb<3>().broadcast(bcast); -} - -template -void DropoutDim::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Dim mask_dim(dim); - mask_dim.d[dimension]=1; - Tensor m(mask_dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - Eigen::array bcast = {1, 1, 1, 1}; bcast[dimension] = dEdf.d[dimension]; - dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<3>() * m.tb<3>().broadcast(bcast); -} -DYNET_NODE_INST_DEV_IMPL(DropoutDim) - -template -void Erf::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().erf(); -} - -template -void Erf::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().binaryExpr(dEdf.tvec(), scalar_erf_backward_op()); -} -DYNET_NODE_INST_DEV_IMPL(Erf) - -template -void GaussianNoise::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Tensor m(dim, (float*)aux_mem, fx.device, DeviceMempool::FXS); - TensorTools::randomize_normal(m, 0, stddev); - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + m.tvec(); -} - -template -void GaussianNoise::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(GaussianNoise) - -template -void Identity::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); -} - -template -void Identity::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(Identity) - -template -void KMHNGram::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); -#else - auto x = **xs[0]; - const int new_cols = x.cols() - n + 1; - DYNET_ASSERT(new_cols > 0, "Failed dimension check in KMHNGram"); - auto res = *fx; - res.setZero(); - for (int j = 0; j < new_cols; ++j) { - auto c_j = res.col(j); - for (unsigned k = 0; k < n; ++k) - c_j += x.col(j + k); - } -#endif -} - -template -void KMHNGram::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); -#else - const int c = dEdf.d.cols(); - for (int j = 0; j < c; ++j) - for (unsigned k = 0; k < n; ++k) - (*dEdxi).col(j+k) += (*dEdf).col(j); -#endif -} -DYNET_NODE_INST_DEV_IMPL(KMHNGram) - -template -void LogDet::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("LogDet not implemented for CUDA"); -#else - fx.v[0] = logdet(**xs[0], false); -#endif -} - -template -void LogDet::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("KMHNGram not implemented for CUDA"); -#else - auto trans = (**xs[0]).transpose(); - (*dEdxi) += (dEdf.v[0]) * trans.inverse(); -#endif -} -DYNET_NODE_INST_DEV_IMPL(LogDet) - -template -void LogGamma::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().lgamma(); -} - -template -void LogGamma::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().digamma() * dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(LogGamma) - -template -void LogisticSigmoid::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogisticSigmoid::forward"); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(scalar_logistic_sigmoid_op()); -} - -template -void LogisticSigmoid::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_logistic_sigmoid_backward_op()); -} -DYNET_NODE_INST_DEV_IMPL(LogisticSigmoid) - -template -void LogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in LogSoftmax::forward"); - Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); - Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); - TensorTools::logsumexp_dev(dev, *xs[0], m, z); - if(fx.d.size() == fx.d.rows()) { -#ifdef __CUDACC__ - Eigen::array bcast; - bcast[0] = xs[0]->d[0]; - fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - z.t<1>().broadcast(bcast); -#else - fx.t<1>().device(*dev.edevice) = xs[0]->t<1>() - as_scalar(z); -#endif - } else { - // TODO? Is this broadcast efficient on CPU? - Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; - Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts); - } -} - -template -void LogSoftmax::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); - // TODO? Is this broadcast efficient on CPU? - Eigen::array red_axis; red_axis[0] = 0; - z.tb<1>().device(*dev.edevice) = dEdf.tb<2>().sum(red_axis); - Eigen::array bcast = {(int)fx.d.rows(), 1, 1}; - Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; - dEdxi.tb<2>().device(*dev.edevice) += fx.tb<2>().exp() * -z.tvec().reshape(morph).broadcast(bcast) + dEdf.tb<2>(); -} -DYNET_NODE_INST_DEV_IMPL(LogSoftmax) - -template -void LogSumExp::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if (xs.size() == 1) { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); - } else { - // TODO: Ideally we wouldn't need to allocate this memory permanently. - // We need a good method for allocating "scratch" memory that is only used temporarily. - Tensor ms(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); - Eigen::array bcast = {1,fx.d.bd}; - // Calculate the max - if(ms.d.bd == xs[0]->d.bd) - ms.tvec().device(*dev.edevice) = xs[0]->tvec(); - else - ms.tbvec().device(*dev.edevice) = xs[0]->tbvec().broadcast(bcast); - for (size_t i = 1; i < xs.size(); ++i) { - if(ms.d.bd == xs[i]->d.bd) - ms.tvec().device(*dev.edevice) = ms.tvec().cwiseMax(xs[i]->tvec()); - else - ms.tbvec().device(*dev.edevice) = ms.tbvec().cwiseMax(xs[i]->tbvec().broadcast(bcast)); - } - // sumexp - if(ms.d.bd == xs[0]->d.bd) - fx.tvec().device(*dev.edevice) = (xs[0]->tvec() - ms.tvec()).exp(); - else - fx.tbvec().device(*dev.edevice) = (xs[0]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); - for (size_t i = 1; i < xs.size(); ++i) { - if(ms.d.bd == xs[i]->d.bd) - fx.tvec().device(*dev.edevice) += (xs[i]->tvec() - ms.tvec()).exp(); - else - fx.tbvec().device(*dev.edevice) += (xs[i]->tbvec().broadcast(bcast) - ms.tbvec()).exp(); - } - // log and add max - fx.tvec().device(*dev.edevice) = fx.tvec().log() + ms.tvec(); - } -} - -template -void LogSumExp::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - if (xs.size() == 1) { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); - } else { - // df/dx_i = 1/{sum_j exp(x_j)} * exp(x_i)} - // = 1/{exp f(x)} * exp(x_i) - // = exp(x_i - f(x)) - if(fx.d.bd == xs[i]->d.bd) { - dEdxi.tvec().device(*dev.edevice) += (xs[i]->tvec() - fx.tvec()).exp() * dEdf.tvec(); - } else { - Eigen::array bcast = {1,fx.d.bd}; - Eigen::array red_axis = {1}; - dEdxi.tvec().device(*dev.edevice) += ((xs[i]->tbvec().broadcast(bcast) - fx.tbvec()).exp() * dEdf.tbvec()).sum(red_axis); - } - } -} -DYNET_NODE_INST_DEV_IMPL(LogSumExp) - -template -void MatrixInverse::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::forward"); -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); -#else - auto x = **xs[0]; - auto y = *fx; - y = x.inverse(); -#endif - // TODO: Change into tensors after resolving test errors - // fx.t<2>().device(*dev.edevice) = xs[0]->t<2>().inverse(); -} - -template -void MatrixInverse::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MatrixInverse::backward"); -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("MatrixInverse not yet implemented for CUDA"); -#else - auto d = *dEdf; - auto y = *fx; - (*dEdxi) -= y * d * y; -#endif -} -DYNET_NODE_INST_DEV_IMPL(MatrixInverse) - -template -void Max::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); - t.tvec().device(*dev.edevice) = (xs[0]->tvec() > xs[1]->tvec()).cast(); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(xs[1]->tvec()); -} - -template -void Max::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in Max::backward"); - const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); - if (i == 0) { - dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); - } else { - dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); - } -} -DYNET_NODE_INST_DEV_IMPL(Max) - -template -void NoBackprop::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); -} - -template -void NoBackprop::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - // no op -} -DYNET_NODE_INST_DEV_IMPL(NoBackprop) - -template -void FlipGradient::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); -} - -template -void FlipGradient::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - // takes negative on backprop - dEdxi.tvec().device(*dev.edevice) -= dEdf.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(FlipGradient) - -template -void MaxPooling1D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_RUNTIME_ERR("MaxPooling1D::forward_dev_impl not implemented yet"); -#if 0 - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in MaxPooling1D::forward"); - const Tensor& x = *xs.front(); - const unsigned x_rows = x.rows(); - DYNET_ASSERT(x.cols() == 1, "Failed dimension check in MaxPooling1D::forward"); - const unsigned fx_rows = x_rows / width; - ind.resize(fx_rows); - Tensor fx = Zero(Dim(fx_rows, 1)); - for (unsigned i = 0; i < fx_rows; ++i) { - unsigned from = i * width; - unsigned to = from + width; - if (to > x_rows) to = x_rows; - real best = x(from, 0); - unsigned bestr = from; - for (unsigned r = from + 1; r < to; ++r) { - if (x(r, 0) > best) { - best = x(r,0); - bestr = r; - } - } - ind[i] = bestr; - fx(i, 0) = best; - } - return fx; -#endif -} - -template -void MaxPooling1D::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("MaxPooling1D::backward_dev_impl not implemented yet"); -#if 0 - const Tensor& x = *xs.front(); - const unsigned x_rows = x.rows(); - Tensor dEdx = Zero(Dim(x_rows, 1)); - const unsigned fx_rows = x_rows / width; - DYNET_ASSERT(fx_rows == ind.size(), "Failed dimension check in MaxPooling1D::backward"); - DYNET_ASSERT(fx_rows == dEdf.rows(), "Failed dimension check in MaxPooling1D::backward"); - for (unsigned i = 0; i < fx_rows; ++i) - dEdx(ind[i], 0) = dEdf(i, 0); - return dEdx; -#endif -} -DYNET_NODE_INST_DEV_IMPL(MaxPooling1D) - -template -void Min::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Tensor t(fx.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); - t.tvec().device(*dev.edevice) = (xs[0]->tvec() < xs[1]->tvec()).cast(); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMin(xs[1]->tvec()); -} - -template -void Min::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 2, "Failed dimension check in Min::backward"); - const Tensor t(dEdxi.d, static_cast(aux_mem), fx.device, DeviceMempool::FXS); - if (i == 0) { - dEdxi.tvec().device(*dev.edevice) += t.tvec() * dEdf.tvec(); - } else { - dEdxi.tvec().device(*dev.edevice) += t.tvec().binaryExpr(dEdf.tvec(), FMaxBackwardInv()); - } -} -DYNET_NODE_INST_DEV_IMPL(Min) - -template -void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin)); -} - -template -void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - if (i == 0) { - dEdxi.tvec().device(*dev.edevice) -= fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); - } else { - dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); - } -} -DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss) - -// x_1 is a vector -// y = (x_1)_{*pval} -template -void PickElement::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if(pval) { - DYNET_ARG_CHECK(*pval < xs[0]->d[dimension], - "PickElement::forward_impl requested element " << *pval << " from a dimension of length " << xs[0]->d[dimension]); - // TODO: This limit of up to 4 is somewhat arbitrary. We need to decide how to handle - // things with "maximum tensor size". - fx.tb<3>().device(*dev.edevice) = xs[0]->tb<4>().chip(*pval, dimension); - } else { - DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickElement::forward"); - DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), - "In PickElement::forward, number of elements in the passed-in index vector (" << pvals->size() << ")" - " did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); - for(unsigned b = 0; b < pvals->size(); ++b) { - DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d[dimension], - "PickElement::forward_impl requested element " << (*pvals)[b] << " from a dimension of length " << xs[0]->d[dimension]); - if(xs[0]->d.bd == 1){ - fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->t<3>().chip((*pvals)[b], dimension); - }else{ - fx.tb<2>().chip<2>(b).device(*dev.edevice) = xs[0]->tb<3>().chip<3>(b).chip((*pvals)[b], dimension); - } - } - } -} - -// derivative is 0 in all dimensions except 1 for the selected element -template -void PickElement::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in PickElement::backward"); - if(pval) { - dEdxi.tb<3>().chip(*pval, dimension).device(*dev.edevice) += dEdf.tb<2>(); - } else { - DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickElement::forward"); - for(unsigned b = 0; b < pvals->size(); ++b){ - if(xs[0]->d.bd == 1){ - dEdxi.t<3>().chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); - }else{ - dEdxi.tb<3>().chip<3>(b).chip((*pvals)[b], dimension).device(*dev.edevice) += dEdf.tb<2>().chip<2>(b); - } - } - } -} -DYNET_NODE_INST_DEV_IMPL(PickElement) - -// x_1 is a matrix -// y = (x_1)[start:end] -// slice of matrix from index start (inclusive) to index end (exclusive) -template -void PickRange::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Eigen::DSizes indices(0,0,0,0,0); - indices[dim] = start; - Eigen::DSizes sizes(static_cast(fx.d[0]), - static_cast(fx.d[1]), - static_cast(fx.d[2]), - static_cast(fx.d[3]), - static_cast(fx.d.bd)); - sizes[dim] = end-start; - fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().slice(indices, sizes); -} - -// derivative is 0 in all dimensions except the slice range -template -void PickRange::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Eigen::DSizes indices(0,0,0,0,0); - indices[dim] = start; - Eigen::DSizes sizes(static_cast(fx.d[0]), - static_cast(fx.d[1]), - static_cast(fx.d[2]), - static_cast(fx.d[3]), - static_cast(fx.d.bd)); - sizes[dim] = end-start; - dEdxi.tb<4>().slice(indices, sizes).device(*dev.edevice) += dEdf.tb<4>(); -} -DYNET_NODE_INST_DEV_IMPL(PickRange) - -template -void PickBatchElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if (pval) { - fx.tvec().device(*dev.edevice) = xs[0]->tbvec().chip<1>(*pval); - } else { - DYNET_ASSERT(pvals != nullptr, "Neither single nor vector of elements available in PickBatchElements::forward"); - DYNET_ARG_CHECK(pvals->size() == fx.d.batch_elems(), - "In PickBatchElements::forward, number of elements in the passed-in index vector (" << pvals->size() << ") " - "did not match number of elements in mini-batch elements in expression (of dimension" << fx.d << ")"); - for (unsigned b = 0; b < pvals->size(); ++b) { - DYNET_ARG_CHECK((*pvals)[b] < xs[0]->d.bd, - "PickBatchElements::forward_impl requested element " << (*pvals)[b] << " from a batch size of " << xs[0]->d.bd); - fx.tbvec().chip<1>(b).device(*dev.edevice) = xs[0]->tbvec().chip<1>((*pvals)[b]); - } - } -} - -template -void PickBatchElements::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i == 0, "Failed dimension check in PickBatchElements::backward"); - if (pval) { - dEdxi.tbvec().chip<1>(*pval).device(*dev.edevice) += dEdf.tvec(); - } else { - DYNET_ASSERT(pvals, "Neither single nor vector of elements available in PickBatchElements::backward"); - for (unsigned b = 0; b < pvals->size(); ++b) - dEdxi.tbvec().chip<1>((*pvals)[b]).device(*dev.edevice) += dEdf.tbvec().chip<1>(b); - } -} -DYNET_NODE_INST_DEV_IMPL(PickBatchElements) - -template -void PoissonRegressionLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - const real y = *pty; - const auto z = std::lgamma(y + 1); - // const auto x = as_scalar(*xs[0]); - fx.t<0>().device(*dev.edevice) = xs[0]->t<0>().exp() + z - xs[0]->t<0>() * y; -} - -template -void PoissonRegressionLoss::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - const real y = *pty; - dEdxi.t<0>().device(*dev.edevice) += xs[0]->t<0>().exp() - y; -} -DYNET_NODE_INST_DEV_IMPL(PoissonRegressionLoss) - -template -void Pow::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::forward"); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().pow(as_scalar(*xs[1])); -} - -template -void Pow::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(xs.size() == 2, "Failed dimension check in Pow::backward"); - real x2 = as_scalar(*xs[1]); - if (i == 0) { - dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().pow(x2 - 1) * dEdf.tvec() * x2; - } else { -#if defined(__CUDACC__) && defined(EIGEN_NO_MALLOC) - DYNET_RUNTIME_ERR("CUDA memory allocation in Pow"); -#endif - // y = a^x - // dy/dx = a^x * log(a) - dEdxi.t<0>().device(*dev.edevice) += (fx.tvec() * xs[0]->tvec().log() * dEdf.tvec()).sum(); - } -} -DYNET_NODE_INST_DEV_IMPL(Pow) - -template -void Rectify::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Rectify::forward"); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().cwiseMax(0.f); -} - -template -void Rectify::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FRectifyBackward()); -} -DYNET_NODE_INST_DEV_IMPL(Rectify) - -template -void ExponentialLinearUnit::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in ExponentialLinearUnit::forward"); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(FELUForward(alpha, lambda));; -} - -template -void ExponentialLinearUnit::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += xs[0]->tvec().binaryExpr(dEdf.tvec(), FELUBackward(alpha, lambda)); -} -DYNET_NODE_INST_DEV_IMPL(ExponentialLinearUnit) - -template -void Reshape::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - // just point to the input memory and change dimensions - // dimensions are handled by forward_dim - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); -} - -template -void Reshape::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - const Tensor reshaped(dEdxi.d, dEdf.v, dEdxi.device, dEdf.mem_pool); - dEdxi.tvec().device(*dev.edevice) += reshaped.tvec(); -} -DYNET_NODE_INST_DEV_IMPL(Reshape) - -template -void RestrictedLogSoftmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in RestrictedLogSoftmax"); -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); -#else - // TODO create auxiliary mask with -infty's - // and do usual LogSoftmax stuff - if(denom.size() == 0) - DYNET_INVALID_ARG("Number of elements in denominator of RestrictedLogSoftmax::forward must be zero"); - auto x = **xs[0]; - if(denom.size() == 0) - DYNET_RUNTIME_ERR("RestrictedLogSoftmax currently only supports single column expressions (contributions expanding support to multiple columns welcome!)"); - const real logz = logsumexp(x, denom); - TensorTools::constant(fx, -numeric_limits::infinity()); - for (auto i : denom) - (*fx)(i,0) = x(i,0) - logz; - if (denom.size() == 1) (*fx)(denom.front(), 0) = 0; -#endif -} - -template -void RestrictedLogSoftmax::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i == 0, "Failed dimension check in RestrictedLogSoftmax"); -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("RestrictedLogSoftmax not yet implemented for CUDA (contributions welcome!)"); -#else - float z = 0; - for (auto ind : denom) - z += (*dEdf)(ind, 0); - for (auto ind : denom) - (*dEdxi)(ind, 0) += (*dEdf)(ind, 0) - expf((*fx)(ind, 0)) * z; -#endif -} -DYNET_NODE_INST_DEV_IMPL(RestrictedLogSoftmax) - -template -void SelectCols::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::forward"); - auto& rm = *pcols; - for (unsigned i = 0; i < rm.size(); ++i) { - DYNET_ARG_CHECK(rm[i] < xs[0]->d.cols(), - "Out-of-bounds index " << rm[i] << " in SelectCols over expression of dimensions " << xs[0]->d); - fx.t<2>().chip<1>(i).device(*dev.edevice) = xs[0]->t<2>().chip<1>(rm[i]); - } -} - -template -void SelectCols::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectCols::backward"); - auto& rm = *pcols; - for (unsigned i = 0; i < rm.size(); ++i) - dEdxi.t<2>().chip<1>(rm[i]).device(*dev.edevice) += dEdf.t<2>().chip<1>(i); -} -DYNET_NODE_INST_DEV_IMPL(SelectCols) - -template -void SelectRows::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::forward"); - auto& rm = *prows; - for (unsigned i = 0; i < rm.size(); ++i) { - DYNET_ARG_CHECK(rm[i] < xs[0]->d.rows(), - "Out-of-bounds index " << rm[i] << " in SelectRows over expression of dimensions " << xs[0]->d); - fx.t<4>().chip<0>(i).device(*dev.edevice) = xs[0]->t<4>().chip<0>(rm[i]); - } -} - -template -void SelectRows::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SelectRows::backward"); - auto& rm = *prows; - for (unsigned i = 0; i < rm.size(); ++i) - dEdxi.t<4>().chip<0>(rm[i]).device(*dev.edevice) += dEdf.t<4>().chip<0>(i); -} -DYNET_NODE_INST_DEV_IMPL(SelectRows) - -template -void Softmax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in Softmax::forward"); - Tensor z(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); - Tensor m(Dim({xs[0]->d.cols()},fx.d.bd), (float*)aux_mem + z.d.size(), fx.device, DeviceMempool::FXS); - TensorTools::logsumexp_dev(dev, *xs[0], m, z); - // TODO? Is this broadcast efficient on CPU? - Eigen::array bcasts = {(int)xs[0]->d.rows(), 1, 1}; - Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; - fx.tb<2>().device(*dev.edevice) = (xs[0]->tb<2>() - z.tvec().reshape(morph).broadcast(bcasts)).exp(); -} - -template -void Softmax::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Tensor z(Dim({fx.d.cols()},fx.d.bd), (float*)aux_mem, fx.device, DeviceMempool::FXS); - // TODO? Is this broadcast efficient on CPU? - Eigen::array red_axis = {0}; - z.tb<1>().device(*dev.edevice) = (fx.tb<2>() * dEdf.tb<2>()).sum(red_axis); - Eigen::array bcast = {(int)xs[0]->d.rows(), 1, 1}; - Eigen::array morph = {1, (int)z.d[0], (int)z.d.bd}; - dEdxi.tb<2>().device(*dev.edevice) += (dEdf.tb<2>() - z.tvec().reshape(morph).broadcast(bcast)) * fx.tb<2>(); -} -DYNET_NODE_INST_DEV_IMPL(Softmax) - -template -void SoftSign::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SoftSign::forward"); - fx.tvec().device(*dev.edevice) = xs[0]->tvec().unaryExpr(FSoftSign()); -} - -template -void SoftSign::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), FSoftSignBackward()); -} -DYNET_NODE_INST_DEV_IMPL(SoftSign) - -template -void Sparsemax::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if (xs[0]->d.cols() == 1) { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); -#else - const unsigned rows = xs[0]->d.rows(); - float *zs = static_cast(aux_mem); - std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); - float sum = 0, maxsum = 0; - unsigned k = 0; - for (k = 0; k < rows; ++k) { - sum += zs[k]; - float t = 1 + (k + 1) * zs[k]; - if (t <= sum) break; - maxsum = sum; - } - float tau = (maxsum - 1) / k; - auto y = *fx; - fx.tvec() = (xs[0]->tvec() - tau).cwiseMax(0.f); - int c = 1; - int *cc = static_cast(aux_mem); - for (unsigned i = 0; i < rows; ++i) - if (y(i,0) > 0.f) cc[c++] = i; - cc[0] = c - 1; -#endif - } else { - DYNET_RUNTIME_ERR("Sparsemax not yet implemented for multiple columns"); - } -} - -template -void Sparsemax::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("Sparsemax not implemented for CUDA"); -#else - const int ssize = static_cast(aux_mem)[0]; - int *support = static_cast(aux_mem) + 1; - float dhat = 0; - auto& d = *dEdf; - for (int i = 0; i < ssize; ++i) - dhat += d(support[i], 0); - dhat /= ssize; - for (int i = 0; i < ssize; ++i) - (*dEdxi)(support[i], 0) += d(support[i], 0) - dhat; -#endif -} -DYNET_NODE_INST_DEV_IMPL(Sparsemax) - -template -void SparsemaxLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if (xs[0]->d.cols() == 1) { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); -#else - const int rows = xs[0]->d.rows(); - if (rows > MAX_SPARSEMAX_LOSS_ROWS) - DYNET_RUNTIME_ERR("MAX_SPARSEMAX_LOSS_ROWS is not sufficient. Recompile with larger value."); - const unsigned qsupport_size = pq->size(); - const float qprop = 1.f / qsupport_size; - - float *zs = static_cast(aux_mem); - std::partial_sort_copy(xs[0]->v, xs[0]->v+rows, zs, zs + rows, std::greater()); - float sum = 0, maxsum = 0; - int k = 0; - for (k = 0; k < rows; ++k) { - sum += zs[k]; - float t = 1 + (k + 1) * zs[k]; - if (t <= sum) break; - maxsum = sum; - } - float tau = (maxsum - 1) / k; - Tensor tsm(xs[0]->d, (float*)aux_mem, xs[0]->device, DeviceMempool::FXS); - tsm.t<1>() = (xs[0]->t<1>() - tau).cwiseMax(0.f); - fx.t<0>() = ( (tsm.t<1>() != 0.f).cast() * (xs[0]->t<1>().square() - (tau * tau)) ).sum(); - fx.t<0>() = ( fx.t<0>() + qprop * qprop * qsupport_size ) / 2.f; - for (unsigned i = 0; i < qsupport_size; ++i) - fx.t<0>() = fx.t<0>() - xs[0]->t<1>().chip<0>((*pq)[i]) * qprop; - fx.t<0>() = fx.t<0>().cwiseMax(0.f); -#endif - } else { - DYNET_RUNTIME_ERR("SparsemaxLoss not yet implemented for multiple columns"); - } -} - -template -void SparsemaxLoss::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("SparsemaxLoss not implemented for CUDA"); -#else - const float d = dEdf.v[0]; - float* psm = static_cast(aux_mem); - float dqprop = d / pq->size(); - Tensor tsm(xs[0]->d, psm, xs[0]->device, DeviceMempool::FXS); - auto sm = *tsm; // sparsemax(z) - *dEdxi += sm * d; - for (unsigned i = 0; i < pq->size(); ++i) - (*dEdxi)((*pq)[i], 0) -= dqprop; -#endif -} -DYNET_NODE_INST_DEV_IMPL(SparsemaxLoss) - -template -void Sum::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - const unsigned num_args = xs.size(); - if (num_args == 1) - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); - else if (num_args == 2 && xs[0]->d.bd == xs[1]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec(); - else if (num_args == 3 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec(); - else if (num_args == 4 && xs[0]->d.bd == xs[1]->d.bd && xs[1]->d.bd == xs[2]->d.bd && xs[2]->d.bd == xs[3]->d.bd) - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); - else { - bool allSameBatchSize = std::all_of(xs.begin(), xs.end(), [&](const Tensor* x) { return x->d.bd == xs[0]->d.bd;}); - if (allSameBatchSize) { - // Since they are all the same batch size, we can easily unroll the addition (results in lower GPU latency by merging multiple adds together in one CUDA call): - DYNET_ASSERT(num_args > 4, "Bad loop unrolling in Sum::forward"); // If it was <=4, we would have handled it in the special cases above - fx.tvec().device(*dev.edevice) = xs[0]->tvec() + xs[1]->tvec() + xs[2]->tvec() + xs[3]->tvec(); - - const unsigned remainder = (num_args - 4 ) % 4; - switch (remainder) { - case 0: break; - case 1: fx.tvec().device(*dev.edevice) += xs[4]->tvec(); break; - case 2: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec(); break; - case 3: fx.tvec().device(*dev.edevice) += xs[4]->tvec() + xs[5]->tvec() + xs[6]->tvec(); break; - } - for (unsigned i = 4 + remainder; i < num_args; i += 4) - fx.tvec().device(*dev.edevice) += xs[i]->tvec() + xs[i + 1]->tvec() + xs[i + 2]->tvec() + xs[i + 3]->tvec(); - } - else { - // Not all the same batch size, so need to broadcast in the cases where they differ - TensorTools::zero(fx); -#ifdef __CUDACC__ - Eigen::array bcast({ 1, (int)fx.d.bd }); -#endif - for (unsigned i = 0; i < num_args; ++i) { - if (xs[i]->d.bd == fx.d.bd) { - fx.tvec().device(*dev.edevice) += xs[i]->tvec(); - } - else { -#ifdef __CUDACC__ - fx.tbvec().device(*dev.edevice) += xs[i]->tbvec().broadcast(bcast); -#else - for (unsigned b = 0; b < fx.d.bd; ++b) - fx.tbvec().chip<1>(b).device(*dev.edevice) += xs[i]->tvec(); -#endif - } - } - } - } -} - -template -void Sum::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - if(dEdxi.d.bd == fx.d.bd) { - dEdxi.tvec().device(*dev.edevice) += dEdf.tvec(); - } else { - Eigen::array red_axis = {1}; - dEdxi.tvec().device(*dev.edevice) += dEdf.tbvec().sum(red_axis); - } -} -DYNET_NODE_INST_DEV_IMPL(Sum) - -template -void SumElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumElements::forward"); - Eigen::array red_axis; red_axis[0] = 0; - fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis); -} - -template -void SumElements::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumElements::backward"); - Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast); -} -DYNET_NODE_INST_DEV_IMPL(SumElements) - -template -void MomentElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentElements::forward"); - Eigen::array red_axis; red_axis[0] = 0; - if(order == 1) - fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis) / (float) xs[0]->d.batch_size(); - else if (order == 2) - fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.batch_size(); - else - fx.tb<0>().device(*dev.edevice) = xs[0]->tbvec().pow(order).sum(red_axis) / (float) xs[0]->d.batch_size(); -} - -template -void MomentElements::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentElements::backward"); - Eigen::array bcast = {(int)xs[0]->d.batch_size(), 1}; - if (order == 1) - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.batch_size(); - else if (order == 2) - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.batch_size()); - else if (order == 3) - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.batch_size()); - else - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.batch_size()); -} -DYNET_NODE_INST_DEV_IMPL(MomentElements) - - -template -void StdElements::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdElements::forward"); - Eigen::array red_axis = {0}; - Eigen::array bcast = {xs[0]->d.batch_size(), 1}; - Eigen::array newaxis = {1, xs[0]->d.bd}; - float n = (float) xs[0]->d.batch_size(); - fx.tb<0>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); -} - -template -void StdElements::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 1, "Failed dimension check in StdElements::backward"); - Eigen::array bcast = {xs[0]->d.batch_size(), 1}; - Eigen::array newaxis = {1, xs[0]->d.bd}; - Eigen::array red_axis = {0}; - float n = (float) xs[0]->d.batch_size(); - dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); - -} -DYNET_NODE_INST_DEV_IMPL(StdElements) - -template -void MomentBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in MomentBatches::forward"); - Eigen::array red_axis; red_axis[0] = 1; - if(order == 1) - fx.tvec().device(*dev.edevice) = xs[0]->tbvec().sum(red_axis) / (float) xs[0]->d.bd; - else if (order == 2) - fx.tvec().device(*dev.edevice) = xs[0]->tbvec().square().sum(red_axis) / (float) xs[0]->d.bd; - else - fx.tvec().device(*dev.edevice) = xs[0]->tbvec().pow(order).sum(red_axis) / (float) xs[0]->d.bd; -} - -template -void MomentBatches::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentBatches::backward"); - Eigen::array bcast = {1, (int)xs[0]->d.bd}; - if (order == 1) - dEdxi.tbvec().device(*dev.edevice) += dEdf.tbvec().broadcast(bcast) / (float) xs[0]->d.bd; - else if (order == 2) - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec()) * ( 2.f / (float) xs[0]->d.bd); - else if (order == 3) - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().square()) * ( 3.f / (float) xs[0]->d.bd); - else - dEdxi.tbvec().device(*dev.edevice) += (dEdf.tbvec().broadcast(bcast) * xs[0]->tbvec().pow(order - 1)) * ( (float) order / (float) xs[0]->d.bd); -} -DYNET_NODE_INST_DEV_IMPL(MomentBatches) - -template -void MomentDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); - Eigen::array reduction_axis = {(int)dimension}; - float n = (float) xs[0]->d[dimension]; - if(order == 1) - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().sum(reduction_axis) / n; - else if (order == 2) - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().square().sum(reduction_axis) / n; - else - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().pow(order).sum(reduction_axis) / n; -} - -template -void MomentDimension::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in MomentDimension::backward"); - Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; - Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; - float n = (float) xs[0]->d[dimension]; - if (order == 1) - dEdxi.tb<3>().device(*dev.edevice) += dEdf.tb<2>().reshape(morph).broadcast(bcast) / n; - else if (order == 2) - dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>()) * ( 2.f / n); - else if (order == 3) - dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().square()) * ( 3.f / n); - else - dEdxi.tb<3>().device(*dev.edevice) += (dEdf.tb<2>().reshape(morph).broadcast(bcast) * xs[0]->tb<3>().pow(order - 1)) * ( (float) order / n); -} -DYNET_NODE_INST_DEV_IMPL(MomentDimension) - -template -void StdDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed input count check in SumDimension"); - Eigen::array red_axis = {(int)dimension}; - Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; - Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; - float n = (float) xs[0]->d[dimension]; - fx.tb<2>().device(*dev.edevice) = ((xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); -} - -template -void StdDimension::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in StdDimension::backward"); - Eigen::array red_axis = {(int)dimension}; - Eigen::array bcast = {1,1,1,1}; bcast[dimension] = xs[0]->d[dimension]; - Eigen::array morph = {(int)xs[0]->d[0],(int)xs[0]->d[1],(int)xs[0]->d[2],(int)xs[0]->d.bd}; morph[dimension] = 1; - float n = (float) xs[0]->d[dimension]; - dEdxi.tb<3>().device(*dev.edevice) += (2 / n) * (xs[0]->tb<3>() - (xs[0]->tb<3>().sum(red_axis).reshape(morph) / n).broadcast(bcast)) * (fx.tb<2>().binaryExpr(dEdf.tb<2>(), FSqrtBackward())).reshape(morph).broadcast(bcast); - -} -DYNET_NODE_INST_DEV_IMPL(StdDimension) - - -template -void StdBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 1, "Failed dimension check in StdBatches::forward"); - Eigen::array red_axis = {1}; - Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; - Eigen::array bcast = {1, xs[0]->d.bd}; - float n = (float)xs[0]->d.bd; - fx.t<1>().device(*dev.edevice) = ((xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)).square().sum(red_axis) / n).sqrt(); -} - -template -void StdBatches::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ASSERT(i < 1, "Failed dimension check in StdBatches::backward"); - Eigen::array red_axis = {1}; - Eigen::array bcast = {1, xs[0]->d.bd}; - Eigen::array newaxis = {xs[0]->d.batch_size(), 1}; - float n = (float)xs[0]->d.bd; - dEdxi.tbvec().device(*dev.edevice) += (2 / n) * (xs[0]->tbvec() - (xs[0]->tbvec().sum(red_axis).reshape(newaxis) / n).broadcast(bcast)) * (fx.tbvec().binaryExpr(dEdf.tbvec(), FSqrtBackward())).broadcast(bcast); - -} -DYNET_NODE_INST_DEV_IMPL(StdBatches) - - -template -void SumBatches::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ARG_CHECK(xs.size() == 1, "Failed dimension check in SumBatches::forward"); - unsigned num_args = xs[0]->d.bd; -#ifdef __CUDACC__ - Eigen::array red_axis; red_axis[0] = 2; - fx.t<2>().device(*dev.edevice) = xs[0]->tb<2>().sum(red_axis); -#else - // TODO: Is this CPU version really good? Overhead can probably be reduced. - auto res = *fx; - const unsigned remainder = num_args % 4; - switch (remainder) { - case 0: res.setZero(); break; - case 1: res = xs[0]->batch_matrix(0); break; - case 2: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1); break; - case 3: res = xs[0]->batch_matrix(0) + xs[0]->batch_matrix(1) + xs[0]->batch_matrix(2); break; - } - for (unsigned i = remainder; i < num_args; i += 4) - res += xs[0]->batch_matrix(i) + xs[0]->batch_matrix(i+1) + xs[0]->batch_matrix(i+2) + xs[0]->batch_matrix(i+3); -#endif -} - -template -void SumBatches::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in SumBatches::backward"); -#ifdef __CUDACC__ - Eigen::array bcast({1, 1, (int)fx.d.bd}); - dEdxi.tb<2>().device(*dev.edevice) += dEdf.tb<2>().broadcast(bcast); -#else - for (unsigned i = 0; i < dEdxi.d.bd; ++i) - dEdxi.batch_matrix(i) += *dEdf; -#endif -} -DYNET_NODE_INST_DEV_IMPL(SumBatches) - -template -void TraceOfProduct::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); -#else - auto x1 = **xs[0]; - auto x2 = **xs[1]; - fx.v[0] = (x1 * x2.transpose()).trace(); -#endif -} - -template -void TraceOfProduct::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i < 2, "Failed dimension check in TraceOfProduce::backward"); -#ifdef __CUDACC__ - DYNET_RUNTIME_ERR("TraceOfProduct not yet implemented for CUDA"); -#else - const float d = dEdf.v[0]; - auto xother = **xs[1 - i]; - *dEdxi += d * xother; -#endif -} -DYNET_NODE_INST_DEV_IMPL(TraceOfProduct) - -template -void Tanh::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - fx.tvec().device(*dev.edevice) = xs[0]->tvec().tanh(); -} - -template -void Tanh::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - dEdxi.tvec().device(*dev.edevice) += fx.tvec().binaryExpr(dEdf.tvec(), scalar_tanh_backward_op()); -} -DYNET_NODE_INST_DEV_IMPL(Tanh) - -template -void Transpose::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - if (dim.num_nonone_dims() <= 1) { - fx.tvec().device(*dev.edevice) = xs[0]->tvec(); - } else { - Eigen::array order; - for(size_t i = 0; i < 5; ++i) - order[i] = (i >= dims.size() ? i : dims[i]); - fx.tb<4>().device(*dev.edevice) = xs[0]->tb<4>().shuffle(order); - } -} - -template -void Transpose::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Eigen::array order; - for(size_t i = 0; i < 5; ++i) - order[(i >= dims.size() ? i : dims[i])] = i; - dEdxi.tb<4>().device(*dev.edevice) += dEdf.tb<4>().shuffle(order); -} -DYNET_NODE_INST_DEV_IMPL(Transpose) - -template -void Zeroes::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 0, "Failed dimension check in Zeroes::forward"); - TensorTools::zero(fx); -} - -template -void Zeroes::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); -} -DYNET_NODE_INST_DEV_IMPL(Zeroes) - -template -void RandomNormal::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomNormal::forward"); - TensorTools::randomize_normal(fx); -} - -template -void RandomNormal::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); -} -DYNET_NODE_INST_DEV_IMPL(RandomNormal) - -template -void RandomBernoulli::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomBernoulli::forward"); - TensorTools::randomize_bernoulli(fx, p, scale); -} - -template -void RandomBernoulli::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); -} -DYNET_NODE_INST_DEV_IMPL(RandomBernoulli) - -template -void RandomUniform::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomUniform::forward"); - TensorTools::randomize_uniform(fx, left, right); -} - -template -void RandomUniform::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); -} -DYNET_NODE_INST_DEV_IMPL(RandomUniform) - -template -void RandomGumbel::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 0, "Failed dimension check in RandomGumbel::forward"); - DYNET_ARG_CHECK(mu == 0.0 && beta == 1.0, "RandomGumbel only supports Gumbel(0,1) at the moment (pull requests welcome)"); - TensorTools::randomize_uniform(fx, 0, 1); - float eps = 1e-20; - fx.tvec().device(*dev.edevice) = -(-fx.tvec().cwiseMax(eps).log()).cwiseMax(eps).log(); -} - -template -void RandomGumbel::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_RUNTIME_ERR("Called backward() on an arity 0 node"); -} -DYNET_NODE_INST_DEV_IMPL(RandomGumbel) - -template -void MaxDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Eigen::DenseIndex* maxmap = static_cast(aux_mem); - const unsigned batch_size = dim.batch_elems(); - const unsigned first_dim_size = dim[0]; - const unsigned second_dim_size = dim[1]; - Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); - const Eigen::array reduction_axis = {reduced_dim}; - locs.device(*dev.edevice) = xs[0]->tb<3>().argmax(reduced_dim); - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().maximum(reduction_axis); -} - -template -void MaxDimension::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in MaxDimension::backward"); -#ifdef __CUDACC__ - vector indices(dim.size()); - Eigen::DenseIndex* maxmap = &indices[0]; - CUDA_CHECK(cudaMemcpy((void*)maxmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); -#else - Eigen::DenseIndex* maxmap = static_cast(aux_mem); -#endif - const unsigned batch_size = dim.batch_elems(); - const unsigned first_dim_size = dim[0]; - const unsigned second_dim_size = dim[1]; - Eigen::TensorMap> locs(maxmap, first_dim_size, second_dim_size, batch_size); - for(unsigned b = 0; b < batch_size; ++b){ - for(unsigned j = 0; j < second_dim_size; ++j){ - for(unsigned i = 0; i < first_dim_size; ++i){ - if (reduced_dim > second_dim) - dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - else if (reduced_dim > first_dim) - dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - else - dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - } - } - } -} -DYNET_NODE_INST_DEV_IMPL(MaxDimension) - -template -void MinDimension::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - Eigen::DenseIndex* minmap = static_cast(aux_mem); - const unsigned batch_size = dim.batch_elems(); - const unsigned first_dim_size = dim[0]; - const unsigned second_dim_size = dim[1]; - Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); - const Eigen::array reduction_axis = {reduced_dim}; - locs.device(*dev.edevice) = xs[0]->tb<3>().argmin(reduced_dim); - fx.tb<2>().device(*dev.edevice) = xs[0]->tb<3>().minimum(reduction_axis); -} - -template -void MinDimension::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - DYNET_ARG_CHECK(i == 0, "Failed dimension check in MinDimension::backward"); -#ifdef __CUDACC__ - vector indices(dim.size()); - Eigen::DenseIndex* minmap = &indices[0]; - CUDA_CHECK(cudaMemcpy((void*)minmap, aux_mem, sizeof(Eigen::DenseIndex) * dim.size(), cudaMemcpyDeviceToHost)); -#else - Eigen::DenseIndex* minmap = static_cast(aux_mem); -#endif - const unsigned batch_size = dim.batch_elems(); - const unsigned first_dim_size = dim[0]; - const unsigned second_dim_size = dim[1]; - Eigen::TensorMap> locs(minmap, first_dim_size, second_dim_size, batch_size); - for(unsigned b = 0; b < batch_size; ++b){ - for(unsigned j = 0; j < second_dim_size; ++j){ - for(unsigned i = 0; i < first_dim_size; ++i){ - if (reduced_dim > second_dim) - dEdxi.tb<3>().chip<3>(b).chip(locs(i, j, b), reduced_dim).chip(j, second_dim).chip(i, first_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - else if (reduced_dim > first_dim) - dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(locs(i, j, b), reduced_dim).chip(i, first_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - else - dEdxi.tb<3>().chip<3>(b).chip(j, second_dim).chip(i, first_dim).chip(locs(i, j, b), reduced_dim).device(*dev.edevice) - += dEdf.tb<2>().chip<2>(b).chip<1>(j).chip<0>(i); - } - } - } -} -DYNET_NODE_INST_DEV_IMPL(MinDimension) - -template -void WeightNormalization::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { - DYNET_ASSERT(xs.size() == 2, "Failed dimension check in WeightNormalization::forward"); - Eigen::array red_axis = {0}; - Eigen::array bcast = {xs[0]->d.size()}; - Eigen::array morph = {1}; - fx.tvec().device(*dev.edevice) = (xs[0]->tvec() / xs[0]->tvec().square().sum(red_axis).sqrt().reshape(morph).broadcast(bcast)) * as_scalar(*xs[1]); -} - -template -void WeightNormalization::backward_dev_impl(const MyDevice & dev, - const vector& xs, - const Tensor& fx, - const Tensor& dEdf, - unsigned i, - Tensor& dEdxi) const { - Eigen::array red_axis = {0}; - Eigen::array bcast = {xs[0]->d.size()}; - Eigen::array morph = {1}; - if (i==0){ - dEdxi.tvec().device(*dev.edevice) += (dEdf.tvec() / xs[0]->tvec().square().sum(red_axis).sqrt().reshape(morph).broadcast(bcast)) * as_scalar(*xs[1]) - fx.tvec() * (((dEdf.tvec() * xs[0]->tvec()).sum(red_axis)) / xs[0]->tvec().square().sum(red_axis)).reshape(morph).broadcast(bcast); - }else{ - dEdxi.t<0>().device(*dev.edevice) += ((dEdf.tvec() * xs[0]->tvec()).sum(red_axis)) / xs[0]->tvec().square().sum(red_axis).sqrt(); - } -} -DYNET_NODE_INST_DEV_IMPL(WeightNormalization) - -} // namespace dynet diff --git a/dynet/nodes.h b/dynet/nodes.h index b54663a0d..ef421418b 100644 --- a/dynet/nodes.h +++ b/dynet/nodes.h @@ -17,6 +17,20 @@ struct AddVectorToAllColumns : public Node { DYNET_NODE_DEFINE_DEV_IMPL() }; +// with a single argument x \in R^{n x m} +// y_i = \sum_j x_i,j / m +struct AverageColumns : public Node { + template explicit AverageColumns(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// sum along a single dimension +struct SumDimension : public Node { + template explicit SumDimension(const T& a, unsigned d) : Node(a), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned dimension; +}; + // y = L_sparsemax(x_0; q) // where x_0 is a std::vector of "unnormalized" probabilities // q are the std::vector of labels @@ -128,13 +142,6 @@ struct Reshape : public Node { Dim to; }; -// y_i = \sum_{j=1}^n x_1:{i-1+j} -struct KMHNGram : public Node { - explicit KMHNGram(const std::initializer_list& a, unsigned n) : Node(a), n(n) {} - DYNET_NODE_DEFINE_DEV_IMPL() - unsigned n; // width, n=2 for Karl's paper -}; - // n_{i,j} ~ N(0,stddev) // y = x + n struct GaussianNoise : public Node { From c1d0a56804af9f60c72e5a91f19421cfa4de9c89 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 10 Jul 2017 10:41:13 -0400 Subject: [PATCH 2/3] Separated header files Former-commit-id: c4da3b7712b218c62523ca0a7d6129c625501f5d --- dynet/dynet.cc | 1 - dynet/nodes-activations.cc | 2 +- dynet/nodes-activations.h | 57 ++ dynet/nodes-affinetransform.cc | 4 +- dynet/nodes-affinetransform.h | 28 + dynet/nodes-arith-const.cc | 2 +- dynet/nodes-arith-const.h | 41 ++ dynet/nodes-arith-cwise.cc | 2 +- dynet/nodes-arith-cwise.h | 34 ++ dynet/nodes-arith-scalar.cc | 2 +- dynet/nodes-arith-scalar.h | 32 ++ dynet/nodes-arith-sum.cc | 2 +- dynet/nodes-arith-sum.h | 57 ++ dynet/nodes-arith-unary.cc | 2 +- dynet/nodes-arith-unary.h | 83 +++ dynet/nodes-concat.cc | 2 +- dynet/nodes-concat.h | 39 ++ dynet/nodes-const.cc | 2 +- dynet/nodes-const.h | 18 + dynet/nodes-contract.cc | 63 ++- dynet/nodes-contract.h | 11 +- dynet/nodes-conv.h | 63 +-- dynet/nodes-conv2d.cc | 3 +- dynet/nodes-conv2d.h | 34 ++ dynet/nodes-dropout.cc | 2 +- dynet/nodes-dropout.h | 47 ++ dynet/nodes-flow.cc | 2 +- dynet/nodes-flow.h | 46 ++ dynet/nodes-hinge.cc | 4 +- dynet/nodes-hinge.h | 28 + dynet/nodes-linalg.cc | 2 +- dynet/nodes-linalg.h | 40 ++ dynet/nodes-logsumexp.cc | 2 +- dynet/nodes-logsumexp.h | 20 + dynet/nodes-losses.cc | 2 +- dynet/nodes-losses.h | 47 ++ dynet/nodes-matrixmultiply.cc | 4 +- dynet/nodes-matrixmultiply.h | 27 + dynet/nodes-maxpooling2d.cc | 3 +- dynet/nodes-maxpooling2d.h | 36 ++ dynet/nodes-minmax.cc | 2 +- dynet/nodes-minmax.h | 53 ++ dynet/nodes-moments.cc | 2 +- dynet/nodes-moments.h | 76 +++ dynet/nodes-normalization.cc | 2 +- dynet/nodes-normalization.h | 18 + dynet/nodes-norms.cc | 2 +- dynet/nodes-norms.h | 25 + dynet/nodes-pickneglogsoftmax.cc | 4 +- dynet/nodes-pickneglogsoftmax.h | 40 ++ dynet/nodes-random.cc | 2 +- dynet/nodes-random.h | 59 ++ dynet/nodes-select.cc | 2 +- dynet/nodes-select.h | 84 +++ dynet/nodes-similarities.cc | 2 +- dynet/nodes-similarities.h | 47 ++ dynet/nodes-softmaxes.cc | 2 +- dynet/nodes-softmaxes.h | 66 +++ dynet/nodes-trig.cc | 4 +- dynet/nodes-trig.h | 20 + dynet/nodes.h | 899 ++----------------------------- 61 files changed, 1316 insertions(+), 991 deletions(-) create mode 100644 dynet/nodes-activations.h create mode 100644 dynet/nodes-affinetransform.h create mode 100644 dynet/nodes-arith-const.h create mode 100644 dynet/nodes-arith-cwise.h create mode 100644 dynet/nodes-arith-scalar.h create mode 100644 dynet/nodes-arith-sum.h create mode 100644 dynet/nodes-arith-unary.h create mode 100644 dynet/nodes-concat.h create mode 100644 dynet/nodes-const.h create mode 100644 dynet/nodes-conv2d.h create mode 100644 dynet/nodes-dropout.h create mode 100644 dynet/nodes-flow.h create mode 100644 dynet/nodes-hinge.h create mode 100644 dynet/nodes-linalg.h create mode 100644 dynet/nodes-logsumexp.h create mode 100644 dynet/nodes-losses.h create mode 100644 dynet/nodes-matrixmultiply.h create mode 100644 dynet/nodes-maxpooling2d.h create mode 100644 dynet/nodes-minmax.h create mode 100644 dynet/nodes-moments.h create mode 100644 dynet/nodes-normalization.h create mode 100644 dynet/nodes-norms.h create mode 100644 dynet/nodes-pickneglogsoftmax.h create mode 100644 dynet/nodes-random.h create mode 100644 dynet/nodes-select.h create mode 100644 dynet/nodes-similarities.h create mode 100644 dynet/nodes-softmaxes.h create mode 100644 dynet/nodes-trig.h diff --git a/dynet/dynet.cc b/dynet/dynet.cc index eb01eb301..fc46cfbaa 100644 --- a/dynet/dynet.cc +++ b/dynet/dynet.cc @@ -1,7 +1,6 @@ #include "dynet/dynet.h" #include "dynet/exec.h" -#include "dynet/nodes.h" #include "dynet/param-nodes.h" #include "dynet/aligned-mem-pool.h" #include "dynet/dynet-helper.h" diff --git a/dynet/nodes-activations.cc b/dynet/nodes-activations.cc index 678626ba9..e1050230c 100644 --- a/dynet/nodes-activations.cc +++ b/dynet/nodes-activations.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-activations.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-activations.h b/dynet/nodes-activations.h new file mode 100644 index 000000000..a5552d035 --- /dev/null +++ b/dynet/nodes-activations.h @@ -0,0 +1,57 @@ +#ifndef DYNET_NODES_ACTIVATIONS_H_ +#define DYNET_NODES_ACTIVATIONS_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = max(0,x) +struct Rectify : public Node { + explicit Rectify(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::rectify); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = \sigma(x_1) +struct LogisticSigmoid : public Node { + explicit LogisticSigmoid(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::logistic); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x / (1 + |x|) +struct SoftSign : public Node { + explicit SoftSign(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::softsign); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = erf x_1 +struct Erf : public Node { + explicit Erf(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::erf); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = ELU(0,x) +struct ExponentialLinearUnit : public Node { + explicit ExponentialLinearUnit(const std::initializer_list& a, float lambda=1.f, float alpha=1.f) : Node(a), lambda(lambda), alpha(alpha) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::rectify); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() + float lambda, alpha; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-affinetransform.cc b/dynet/nodes-affinetransform.cc index a417a0651..596761248 100644 --- a/dynet/nodes-affinetransform.cc +++ b/dynet/nodes-affinetransform.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-affinetransform.h" #include "dynet/nodes-macros.h" #include "dynet/cuda-matrix-multiply.h" @@ -7,6 +7,8 @@ using namespace std; namespace dynet { +// ************* AffineTransform ************* + #ifndef __CUDACC__ string AffineTransform::as_string(const vector& arg_names) const { diff --git a/dynet/nodes-affinetransform.h b/dynet/nodes-affinetransform.h new file mode 100644 index 000000000..4f67ebc05 --- /dev/null +++ b/dynet/nodes-affinetransform.h @@ -0,0 +1,28 @@ +#ifndef DYNET_NODES_AFFINETRANSFORM_H_ +#define DYNET_NODES_AFFINETRANSFORM_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 \sum_{i=2, 4 ...} A_i * x_{i+1} +struct AffineTransform : public Node { + template explicit AffineTransform(const T& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() + mutable float* dEdf_mem; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-arith-const.cc b/dynet/nodes-arith-const.cc index ce7818e71..13b260301 100644 --- a/dynet/nodes-arith-const.cc +++ b/dynet/nodes-arith-const.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-arith-const.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-arith-const.h b/dynet/nodes-arith-const.h new file mode 100644 index 000000000..86c736206 --- /dev/null +++ b/dynet/nodes-arith-const.h @@ -0,0 +1,41 @@ +#ifndef DYNET_NODES_ARITH_CONST_H_ +#define DYNET_NODES_ARITH_CONST_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = c + x_1 +// (c is a std::vector or matrix of the constant, usually 1, but can be configured) +struct ConstantPlusX : public Node { + explicit ConstantPlusX(const std::initializer_list& a, real o) : Node(a), c(o) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::plus_const); s.add_float(c); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() + real c; +}; + +// y = c - x_1 +// (c is a std::vector or matrix of the constant, usually 1, but can be configured) +struct ConstantMinusX : public Node { + explicit ConstantMinusX(const std::initializer_list& a, real o) : Node(a), c(o) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + real c; +}; + +// y = alpha * x_1 +struct ConstScalarMultiply : public Node { + explicit ConstScalarMultiply(const std::initializer_list& a, float alpha) : Node(a), alpha(alpha) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::scalar_mult); s.add_float(alpha); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() + float alpha; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-arith-cwise.cc b/dynet/nodes-arith-cwise.cc index 26ac47b0e..28eccf3cf 100644 --- a/dynet/nodes-arith-cwise.cc +++ b/dynet/nodes-arith-cwise.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-arith-cwise.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-arith-cwise.h b/dynet/nodes-arith-cwise.h new file mode 100644 index 000000000..925332ee2 --- /dev/null +++ b/dynet/nodes-arith-cwise.h @@ -0,0 +1,34 @@ +#ifndef DYNET_NODES_ARITH_CWISE_H_ +#define DYNET_NODES_ARITH_CWISE_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 \cdot x_2 (Hadamard product) +struct CwiseMultiply : public Node { + explicit CwiseMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 / x_2 (cwiseQuotient) +struct CwiseQuotient : public Node { + explicit CwiseQuotient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = pow(x_1, x_2) +// x_2 raise every element in x_1 to the power of scalar x_2 +struct Pow : public Node { + explicit Pow(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-arith-scalar.cc b/dynet/nodes-arith-scalar.cc index baca3fe0a..eafd19af5 100644 --- a/dynet/nodes-arith-scalar.cc +++ b/dynet/nodes-arith-scalar.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-arith-scalar.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-arith-scalar.h b/dynet/nodes-arith-scalar.h new file mode 100644 index 000000000..f3d536895 --- /dev/null +++ b/dynet/nodes-arith-scalar.h @@ -0,0 +1,32 @@ +#ifndef DYNET_NODES_ARITH_SCALAR_H_ +#define DYNET_NODES_ARITH_SCALAR_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 + x_2 (Addition where x_2 is a scalar) +struct ScalarAdd : public Node { + explicit ScalarAdd(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \cdot x_2 (Hadamard product where x_1 is a scalar) +struct ScalarMultiply : public Node { + explicit ScalarMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 / x_2 (Elementwise division where x_2 is a scalar) +struct ScalarQuotient : public Node { + explicit ScalarQuotient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-arith-sum.cc b/dynet/nodes-arith-sum.cc index d3b96f24f..f7622e99c 100644 --- a/dynet/nodes-arith-sum.cc +++ b/dynet/nodes-arith-sum.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-arith-sum.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-arith-sum.h b/dynet/nodes-arith-sum.h new file mode 100644 index 000000000..4a419f545 --- /dev/null +++ b/dynet/nodes-arith-sum.h @@ -0,0 +1,57 @@ +#ifndef DYNET_NODES_ARITH_SUM_H_ +#define DYNET_NODES_ARITH_SUM_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = \sum_i x_i +struct Sum : public Node { + template explicit Sum(const T& a) : Node(a) {} + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + if(dim.bd != 1) + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i,j,... x[i,j,...] +struct SumElements : public Node { + template explicit SumElements(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// sum along a single dimension +struct SumDimension : public Node { + template explicit SumDimension(const T& a, unsigned d) : Node(a), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned dimension; +}; + +// y = \sum_i x_i +struct SumBatches : public Node { + template explicit SumBatches(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// M = x_0, v = x_1 +// y = M + v (broadcasting over columns) +struct AddVectorToAllColumns : public Node { + explicit AddVectorToAllColumns(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-arith-unary.cc b/dynet/nodes-arith-unary.cc index 4d779279e..c712ee3c2 100644 --- a/dynet/nodes-arith-unary.cc +++ b/dynet/nodes-arith-unary.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-arith-unary.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-arith-unary.h b/dynet/nodes-arith-unary.h new file mode 100644 index 000000000..9200287e2 --- /dev/null +++ b/dynet/nodes-arith-unary.h @@ -0,0 +1,83 @@ +#ifndef DYNET_NODES_ARITH_UNARY_H_ +#define DYNET_NODES_ARITH_UNARY_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 \odot x_1 +struct Square : public Node { + explicit Square(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::square); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1 \odot x_1 \odot x_1 +struct Cube : public Node { + explicit Cube(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::cube); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = sqrt x_1 +struct Sqrt : public Node { + explicit Sqrt(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::sqrt); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = exp x_1 +struct Exp : public Node { + explicit Exp(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::exp); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = log x_1 (base e, i.e., natural log) +struct Log : public Node { + explicit Log(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::log); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = -x_1 +struct Negate : public Node { + explicit Negate(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::negate); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = abs x_1 +struct Abs : public Node { + explicit Abs(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::abs); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = lgamma x_1 +struct LogGamma : public Node { + explicit LogGamma(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::loggamma); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-concat.cc b/dynet/nodes-concat.cc index f601ef67d..fa96965b2 100644 --- a/dynet/nodes-concat.cc +++ b/dynet/nodes-concat.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-concat.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-concat.h b/dynet/nodes-concat.h new file mode 100644 index 000000000..fabc35b08 --- /dev/null +++ b/dynet/nodes-concat.h @@ -0,0 +1,39 @@ +#ifndef DYNET_NODES_CONCAT_H_ +#define DYNET_NODES_CONCAT_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// concatenate along a particular dimension +struct Concatenate : public Node { + template explicit Concatenate(const T& a, unsigned d) : Node(a), dimension(d) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(args.size(), 1); } + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() + // src_row_indices[i] says what row in fx the ith x std::vector was assigned to + // used to simplify backprop + mutable std::vector src_indices; + unsigned dimension; +}; + +// concatenate different batched experssions into one single batched tensor +struct ConcatenateToBatch : public Node { + template explicit ConcatenateToBatch(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override {return true;} + mutable std::vector src_element_indices; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-const.cc b/dynet/nodes-const.cc index 3f99a84d4..0f3ec4fc2 100644 --- a/dynet/nodes-const.cc +++ b/dynet/nodes-const.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-const.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-const.h b/dynet/nodes-const.h new file mode 100644 index 000000000..6f21183e9 --- /dev/null +++ b/dynet/nodes-const.h @@ -0,0 +1,18 @@ +#ifndef DYNET_NODES_CONST_H_ +#define DYNET_NODES_CONST_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// represents a simple std::vector of 0s +struct Zeroes : public Node { + explicit Zeroes(const Dim& d) : dim(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-contract.cc b/dynet/nodes-contract.cc index 99719ccfa..aad22fa87 100644 --- a/dynet/nodes-contract.cc +++ b/dynet/nodes-contract.cc @@ -5,14 +5,11 @@ #include #include "dynet/nodes-macros.h" -#include "dynet/nodes.h" // This file takes a long time to compile on GPU. Uncomment this line to skip it. #define DYNET_SKIP_CUDA_CONTRACTIONS - #if defined(__CUDACC__) && !defined(DYNET_SKIP_CUDA_CONTRACTIONS) -#include "dynet/nodes.cc" #include "dynet/cuda.h" #include "dynet/gpu-ops.h" #include "dynet/cuda-matrix-multiply.h" @@ -23,6 +20,8 @@ using namespace std; namespace dynet { +// ************* InnerProduct3D_1D ************* + #ifndef __CUDACC__ string InnerProduct3D_1D::as_string(const vector& arg_names) const { @@ -50,32 +49,6 @@ Dim InnerProduct3D_1D::dim_forward(const vector& xs) const { return d; } -string InnerProduct3D_1D_1D::as_string(const vector& arg_names) const { - ostringstream s; - s << "dotdot(" << arg_names[0] << "," << arg_names[1] << "," << arg_names[2] << ')'; - if (arg_names.size() == 4) s << " + " << arg_names[3]; - return s.str(); -} - -Dim InnerProduct3D_1D_1D::dim_forward(const vector& xs) const { - if (xs.size() != 3 && xs.size() != 4) - throw std::invalid_argument("Expected three or four arguments in InnerProduct3D_1D"); - if (xs[0].ndims() != 3 || - !LooksLikeVector(xs[1]) || - !LooksLikeVector(xs[2])) { - // TODO fix add check - ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; - throw std::invalid_argument(s.str()); - } - Dim d({xs[0].size(0)}, max(max(xs[0].bd, xs[1].bd), xs[2].bd)); - if (xs.size() == 4) d.bd = max(d.bd, xs[3].bd); - if (xs.size() == 4 && xs[3] != d) { - ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; - throw std::invalid_argument(s.str()); - } - return d; -} - #endif // Y_ij = A_ijk * B_k (+ C_ij) @@ -273,6 +246,38 @@ void InnerProduct3D_1D::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(InnerProduct3D_1D) +// ************* InnerProduct3D_1D_1D ************* + +#ifndef __CUDACC__ + +string InnerProduct3D_1D_1D::as_string(const vector& arg_names) const { + ostringstream s; + s << "dotdot(" << arg_names[0] << "," << arg_names[1] << "," << arg_names[2] << ')'; + if (arg_names.size() == 4) s << " + " << arg_names[3]; + return s.str(); +} + +Dim InnerProduct3D_1D_1D::dim_forward(const vector& xs) const { + if (xs.size() != 3 && xs.size() != 4) + throw std::invalid_argument("Expected three or four arguments in InnerProduct3D_1D"); + if (xs[0].ndims() != 3 || + !LooksLikeVector(xs[1]) || + !LooksLikeVector(xs[2])) { + // TODO fix add check + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + Dim d({xs[0].size(0)}, max(max(xs[0].bd, xs[1].bd), xs[2].bd)); + if (xs.size() == 4) d.bd = max(d.bd, xs[3].bd); + if (xs.size() == 4 && xs[3] != d) { + ostringstream s; s << "Bad input dimensions in InnerProduct3D_1D_1D: " << xs; + throw std::invalid_argument(s.str()); + } + return d; +} + +#endif + // Y_ij = A_ijk * B_k * C_j (+ D_i) template void InnerProduct3D_1D_1D::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { diff --git a/dynet/nodes-contract.h b/dynet/nodes-contract.h index ce56fc289..58e7637ad 100644 --- a/dynet/nodes-contract.h +++ b/dynet/nodes-contract.h @@ -2,20 +2,11 @@ #define DYNET_NODES_CONTRACT_H_ #include "dynet/dynet.h" -#include "dynet/devices.h" #include "dynet/nodes-macros.h" -// See nodes-macros.h for more details about DYNET_NODE_DEFINE_DEV_IMPL(). - namespace dynet { -// Forward: -// Y_ij = A_ijk * B_k + C_ij -// -// Backward: -// (dE/dA)_ijk = (dE/dY)_ij * L_k -// (dE/dB)_k = (dE/dY)_ij * A_ijk -// (dE/dC)_ij = (dE/dY)_ij +// Y_i = A_ijk * B_k struct InnerProduct3D_1D : public Node { InnerProduct3D_1D(const std::initializer_list& a) : Node(a) {} virtual bool supports_multibatch() const override { return true; } diff --git a/dynet/nodes-conv.h b/dynet/nodes-conv.h index a4fd2ca02..b9bd6ff31 100644 --- a/dynet/nodes-conv.h +++ b/dynet/nodes-conv.h @@ -3,11 +3,6 @@ #include "dynet/dynet.h" #include "dynet/nodes-macros.h" -#include "dynet/op-helper.h" - -#if HAVE_CUDNN -#include "dynet/cudnn-ops.h" -#endif namespace dynet { @@ -39,54 +34,6 @@ struct KMaxPooling : public Node { unsigned second_dim; }; -// conv2d -// y = x_1 *conv2d x_2 -// x_1 \in R^{H x W x Ci x N} (input) -// x_2 \in R^{H x W x Ci x Co} (filter) -// stride[0] corresponds to H -// stride[1] corresponds to W -// is_valid: true for 'VALID' and false for 'SAME' -struct Conv2D: public Node { - explicit Conv2D(const std::initializer_list& a, const std::vector& s, - const bool padding_type = true) - : Node(a), stride(s), is_valid(padding_type) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - const std::vector stride; - const bool is_valid; - - private: -#if HAVE_CUDNN - mutable CudnnConvOp* cudnn_conv_op_ = NULL; -#endif -}; - -// maxpooling2d -// y = x_1 * maxpooling2d -// x_1 \in R^{H x W x Ci x N} (input) -// ksize[0] corresponds to H -// ksize[1] corresponds to W -// stride[0] corresponds to H -// stride[1] corresponds to W -// is_valid: true for 'VALID' and false for 'SAME' -struct MaxPooling2D: public Node { - explicit MaxPooling2D(const std::initializer_list& a, const std::vector& k, const std::vector& s, - const bool padding_type = true) - : Node(a), ksize(k), stride(s), is_valid(padding_type) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - const std::vector ksize; - const std::vector stride; - const bool is_valid; - - private: -#if HAVE_CUDNN - mutable CudnnMaxPooling2DOp* cudnn_maxpool_op_ = NULL; -#endif -}; - // y_i = \sum_{j=1}^n x_1:{i-1+j} struct KMHNGram : public Node { explicit KMHNGram(const std::initializer_list& a, unsigned n) : Node(a), n(n) {} @@ -94,6 +41,16 @@ struct KMHNGram : public Node { unsigned n; // width, n=2 for Karl's paper }; +// hyperparameter: width > 1 +// x_1 is a std::vector in R^n, which we write x +// y is a std::vector in R^{n / width} +// y_i = max_{x_{i * width - width + 1}, ..., x_{i * width}} +struct MaxPooling1D : public Node { + MaxPooling1D(const std::initializer_list& a, unsigned w) : Node(a), width(w) {} + DYNET_NODE_DEFINE_DEV_IMPL() + unsigned width; + mutable std::vector ind; +}; } // namespace dynet diff --git a/dynet/nodes-conv2d.cc b/dynet/nodes-conv2d.cc index 9918081ef..0a7cdedc9 100644 --- a/dynet/nodes-conv2d.cc +++ b/dynet/nodes-conv2d.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes-conv.h" +#include "dynet/nodes-conv2d.h" #include #include @@ -9,6 +9,7 @@ #include "dynet/functors.h" #include "dynet/nodes-macros.h" +#include "dynet/op-helper.h" #include "third_party/eigen_spatial_convolutions.h" #include "third_party/eigen_backward_spatial_convolutions.h" diff --git a/dynet/nodes-conv2d.h b/dynet/nodes-conv2d.h new file mode 100644 index 000000000..484c3222a --- /dev/null +++ b/dynet/nodes-conv2d.h @@ -0,0 +1,34 @@ +#ifndef DYNET_NODES_CONV2D_H_ +#define DYNET_NODES_CONV2D_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// conv2d +// y = x_1 *conv2d x_2 +// x_1 \in R^{H x W x Ci x N} (input) +// x_2 \in R^{H x W x Ci x Co} (filter) +// stride[0] corresponds to H +// stride[1] corresponds to W +// is_valid: true for 'VALID' and false for 'SAME' +struct Conv2D: public Node { + explicit Conv2D(const std::initializer_list& a, const std::vector& s, + const bool padding_type = true) + : Node(a), stride(s), is_valid(padding_type) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + const std::vector stride; + const bool is_valid; + + private: +#if HAVE_CUDNN + mutable CudnnConvOp* cudnn_conv_op_ = NULL; +#endif +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-dropout.cc b/dynet/nodes-dropout.cc index 7addca034..1d052f18b 100644 --- a/dynet/nodes-dropout.cc +++ b/dynet/nodes-dropout.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-dropout.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-dropout.h b/dynet/nodes-dropout.h new file mode 100644 index 000000000..fc9913bd2 --- /dev/null +++ b/dynet/nodes-dropout.h @@ -0,0 +1,47 @@ +#ifndef DYNET_NODES_DROPOUT_H_ +#define DYNET_NODES_DROPOUT_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = dropout(x,p) where p specifies the dropout probability +struct Dropout : public Node { + explicit Dropout(const std::initializer_list& a, real p) : Node(a), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real p; +}; + +// y = dropout(x,p) where p specifies the dropout probability +struct DropoutDim : public Node { + explicit DropoutDim(const std::initializer_list& a, unsigned d,real p) : Node(a), dimension(d), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + unsigned dimension; + real p; +}; + +// y = dropout(x,p) where p specifies the dropout probability +struct DropoutBatch : public Node { + explicit DropoutBatch(const std::initializer_list& a, real p) : Node(a), p(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real p; +}; + +// y = block_dropout(x,p) where p specifies the probability for dropping-out the entire block +struct BlockDropout : public Node { + explicit BlockDropout(const std::initializer_list& a, real p) : Node(a), dropout_probability(p) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + real dropout_probability; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-flow.cc b/dynet/nodes-flow.cc index 95d0b10ae..4ec5507fe 100644 --- a/dynet/nodes-flow.cc +++ b/dynet/nodes-flow.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-flow.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-flow.h b/dynet/nodes-flow.h new file mode 100644 index 000000000..189afba30 --- /dev/null +++ b/dynet/nodes-flow.h @@ -0,0 +1,46 @@ +#ifndef DYNET_NODES_FLOW_H_ +#define DYNET_NODES_FLOW_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = reshape(x_1, --> to) +struct Reshape : public Node { + explicit Reshape(const std::initializer_list& a, const Dim& to) : Node(a), to(to) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + Dim to; +}; + +// y = x_1 +struct Identity : public Node { + explicit Identity(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::identity); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1, but dy/dx is set to 0 +struct NoBackprop : public Node { + explicit NoBackprop(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::nobackprop); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = x_1, dy/dx is set to negative. +struct FlipGradient : public Node { + explicit FlipGradient(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::flipgradient); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-hinge.cc b/dynet/nodes-hinge.cc index 50db9b3b3..f3af41cce 100644 --- a/dynet/nodes-hinge.cc +++ b/dynet/nodes-hinge.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-hinge.h" #include "dynet/nodes-macros.h" @@ -6,6 +6,8 @@ using namespace std; namespace dynet { +// ************* Hinge ************* + #ifndef __CUDACC__ string Hinge::as_string(const vector& arg_names) const { diff --git a/dynet/nodes-hinge.h b/dynet/nodes-hinge.h new file mode 100644 index 000000000..7f68fa886 --- /dev/null +++ b/dynet/nodes-hinge.h @@ -0,0 +1,28 @@ +#ifndef DYNET_NODES_HINGE_H_ +#define DYNET_NODES_HINGE_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// Let x be a std::vector-valued input, x_i represents the score of the ith element, then +// y = \sum{i != element} max{0, margin - x_element + x_i} +struct Hinge : public Node { + explicit Hinge(const std::initializer_list& a, unsigned e, real m = 1.0) : Node(a), element(e), pelement(&element), margin(m) {} + explicit Hinge(const std::initializer_list& a, const unsigned* pe, real m = 1.0) : Node(a), element(), pelement(pe), margin(m) {} + explicit Hinge(const std::initializer_list& a, const std::vector& e, real m = 1.0) : Node(a), element(), pelement(), elements(e), pelements(&elements), margin(m) {} + explicit Hinge(const std::initializer_list& a, const std::vector* pe, real m = 1.0) : Node(a), element(), pelement(), elements(), pelements(pe), margin(m) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + unsigned element; + const unsigned* pelement; + std::vector elements; + const std::vector* pelements; + real margin; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-linalg.cc b/dynet/nodes-linalg.cc index a1c17eaec..f9d3c772d 100644 --- a/dynet/nodes-linalg.cc +++ b/dynet/nodes-linalg.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-linalg.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-linalg.h b/dynet/nodes-linalg.h new file mode 100644 index 000000000..1529cb26e --- /dev/null +++ b/dynet/nodes-linalg.h @@ -0,0 +1,40 @@ +#ifndef DYNET_NODES_LINALG_H_ +#define DYNET_NODES_LINALG_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1^T +// NOTE: if you have a column or row std::vector as input, runtime is constant +// if you have a matrix as input, the runtime is O(mn) - try to avoid using this +struct Transpose : public Node { + explicit Transpose(const std::initializer_list& a, const std::vector & dims) : Node(a), dims(dims) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + std::vector dims; +}; + +// y = inv(x) +// x = an invertible matrix +struct MatrixInverse : public Node { + explicit MatrixInverse(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = log det(x) +struct LogDet : public Node { + template explicit LogDet(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = Tr(x_1 * x_2^T) +struct TraceOfProduct : public Node { + explicit TraceOfProduct(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-logsumexp.cc b/dynet/nodes-logsumexp.cc index 71fb3e0b8..a21075242 100644 --- a/dynet/nodes-logsumexp.cc +++ b/dynet/nodes-logsumexp.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-logsumexp.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-logsumexp.h b/dynet/nodes-logsumexp.h new file mode 100644 index 000000000..39404a841 --- /dev/null +++ b/dynet/nodes-logsumexp.h @@ -0,0 +1,20 @@ +#ifndef DYNET_NODES_LOGSUMEXP_H_ +#define DYNET_NODES_LOGSUMEXP_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = \log \sum_i \exp x_i +// done in log space carefully to avoid over/underflow issues +struct LogSumExp : public Node { + template explicit LogSumExp(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-losses.cc b/dynet/nodes-losses.cc index a12db7bc8..c67a3da2d 100644 --- a/dynet/nodes-losses.cc +++ b/dynet/nodes-losses.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-losses.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-losses.h b/dynet/nodes-losses.h new file mode 100644 index 000000000..8a5747b29 --- /dev/null +++ b/dynet/nodes-losses.h @@ -0,0 +1,47 @@ +#ifndef DYNET_NODES_LOSSES_H_ +#define DYNET_NODES_LOSSES_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// x_1 is a scalar (or row std::vector) +// x_2 is a scalar (or row std::vector) +// y = max(0, margin - x_1 + x_2) +struct PairwiseRankLoss : public Node { + explicit PairwiseRankLoss(const std::initializer_list& a, real m = 1.0) : Node(a), margin(m) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + real margin; +}; + +// you could do this with LogisticSigmoid, Softmax or a variety of other +// functions, but this is often useful. +// x_1 must be a std::vector with values between 0 and 1 +// target_y is an equivalently sized std::vector w values between 0 and 1 +// y = ty * log(x_1) + (1 - ty) * log(x_1) +struct BinaryLogLoss : public Node { + BinaryLogLoss(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// this is used to implement poisson regression +// x_1 = log predicted mean +// ty = true y (this is not a VariableIndex since it has to be a nonnegative integer and +// is therefore nondifferentiable. There are various continuous extensions +// using the incomplete gamma function that could be used, but meh) +// y = log Poisson(ty; \lambda = \exp x_1) +// = ty*x_1 - exp(x_1) - log(ty!) +struct PoissonRegressionLoss : public Node { + explicit PoissonRegressionLoss(const std::initializer_list& a, unsigned true_y) : Node(a), ty(true_y), pty(&ty) {} + explicit PoissonRegressionLoss(const std::initializer_list& a, const unsigned* ptrue_y) : Node(a), ty(), pty(ptrue_y) {} + DYNET_NODE_DEFINE_DEV_IMPL() + private: + unsigned ty; + const unsigned* pty; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-matrixmultiply.cc b/dynet/nodes-matrixmultiply.cc index 7c13dd753..b8bc5a5ac 100644 --- a/dynet/nodes-matrixmultiply.cc +++ b/dynet/nodes-matrixmultiply.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-matrixmultiply.h" #include "dynet/nodes-macros.h" #include "dynet/cuda-matrix-multiply.h" @@ -7,6 +7,8 @@ using namespace std; namespace dynet { +// ************* MatrixMultiply ************* + #ifndef __CUDACC__ string MatrixMultiply::as_string(const vector& arg_names) const { diff --git a/dynet/nodes-matrixmultiply.h b/dynet/nodes-matrixmultiply.h new file mode 100644 index 000000000..ba78957b4 --- /dev/null +++ b/dynet/nodes-matrixmultiply.h @@ -0,0 +1,27 @@ +#ifndef DYNET_NODES_MATRIXMULTIPLY_H_ +#define DYNET_NODES_MATRIXMULTIPLY_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 * x_2 +struct MatrixMultiply : public Node { + explicit MatrixMultiply(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-maxpooling2d.cc b/dynet/nodes-maxpooling2d.cc index b679af7f3..7249c9578 100644 --- a/dynet/nodes-maxpooling2d.cc +++ b/dynet/nodes-maxpooling2d.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes-conv.h" +#include "dynet/nodes-maxpooling2d.h" #include #include @@ -8,6 +8,7 @@ #include "dynet/functors.h" #include "dynet/nodes-macros.h" +#include "dynet/op-helper.h" #include "third_party/eigen_pooling.h" #if HAVE_CUDA diff --git a/dynet/nodes-maxpooling2d.h b/dynet/nodes-maxpooling2d.h new file mode 100644 index 000000000..20bfff67d --- /dev/null +++ b/dynet/nodes-maxpooling2d.h @@ -0,0 +1,36 @@ +#ifndef DYNET_NODES_MAXPOOLING2D_H_ +#define DYNET_NODES_MAXPOOLING2D_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// maxpooling2d +// y = x_1 * maxpooling2d +// x_1 \in R^{H x W x Ci x N} (input) +// ksize[0] corresponds to H +// ksize[1] corresponds to W +// stride[0] corresponds to H +// stride[1] corresponds to W +// is_valid: true for 'VALID' and false for 'SAME' +struct MaxPooling2D: public Node { + explicit MaxPooling2D(const std::initializer_list& a, const std::vector& k, const std::vector& s, + const bool padding_type = true) + : Node(a), ksize(k), stride(s), is_valid(padding_type) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + const std::vector ksize; + const std::vector stride; + const bool is_valid; + + private: +#if HAVE_CUDNN + mutable CudnnMaxPooling2DOp* cudnn_maxpool_op_ = NULL; +#endif +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-minmax.cc b/dynet/nodes-minmax.cc index 6b5d45f87..cc0913eab 100644 --- a/dynet/nodes-minmax.cc +++ b/dynet/nodes-minmax.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-minmax.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-minmax.h b/dynet/nodes-minmax.h new file mode 100644 index 000000000..b1a500e55 --- /dev/null +++ b/dynet/nodes-minmax.h @@ -0,0 +1,53 @@ +#ifndef DYNET_NODES_MINMAX_H_ +#define DYNET_NODES_MINMAX_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = min{x_1, x_2} +struct Min : public Node { + explicit Min(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +// y = max{x_1, x_2} +struct Max : public Node { + template explicit Max(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; +}; + +struct MinDimension : public Node { + explicit MinDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { + first_dim = reduced_dim == 0 ? 1 : 0; + second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + unsigned reduced_dim; + unsigned first_dim; + unsigned second_dim; +}; + +struct MaxDimension : public Node { + explicit MaxDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { + first_dim = reduced_dim == 0 ? 1 : 0; + second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + unsigned reduced_dim; + unsigned first_dim; + unsigned second_dim; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-moments.cc b/dynet/nodes-moments.cc index b4d618165..99a8cf52e 100644 --- a/dynet/nodes-moments.cc +++ b/dynet/nodes-moments.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-moments.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-moments.h b/dynet/nodes-moments.h new file mode 100644 index 000000000..57f4df095 --- /dev/null +++ b/dynet/nodes-moments.h @@ -0,0 +1,76 @@ +#ifndef DYNET_NODES_MOMENTS_H_ +#define DYNET_NODES_MOMENTS_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = ( \sum_i x_i ) / |x| +struct Average : public Node { + template explicit Average(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// with a single argument x \in R^{n x m} +// y_i = \sum_j x_i,j / m +struct AverageColumns : public Node { + template explicit AverageColumns(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = \sum_i,j,... x[i,j,...] +struct MomentElements : public Node { + template explicit MomentElements(const T& a, unsigned o) : Node(a), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned order; +}; + +// y = \sum_i x_i +struct MomentBatches : public Node { + template explicit MomentBatches(const T& a, unsigned o) : Node(a), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned order; +}; + +//y = \sum_i x_i +struct MomentDimension : public Node { + template explicit MomentDimension(const T& a, unsigned d, unsigned o) : Node(a), dimension(d), order(o) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned dimension; + unsigned order; +}; + +// y = \sum_i,j,... x[i,j,...] +struct StdElements : public Node { + template explicit StdElements(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +// y = \sum_i x_i +struct StdBatches : public Node { + template explicit StdBatches(const T& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +}; + +//y = \sum_i x_i +struct StdDimension : public Node { + template explicit StdDimension(const T& a, unsigned d) : Node(a), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } +private: + unsigned dimension; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-normalization.cc b/dynet/nodes-normalization.cc index d4faacb9d..c96f38e7d 100644 --- a/dynet/nodes-normalization.cc +++ b/dynet/nodes-normalization.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-normalization.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-normalization.h b/dynet/nodes-normalization.h new file mode 100644 index 000000000..29e94e5c5 --- /dev/null +++ b/dynet/nodes-normalization.h @@ -0,0 +1,18 @@ +#ifndef DYNET_NODES_NORMALIZATION_H_ +#define DYNET_NODES_NORMALIZATION_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1 * x_2 +struct WeightNormalization : public Node { + explicit WeightNormalization(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return false; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-norms.cc b/dynet/nodes-norms.cc index 65d18e7b6..29cf8b8bf 100644 --- a/dynet/nodes-norms.cc +++ b/dynet/nodes-norms.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-norms.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-norms.h b/dynet/nodes-norms.h new file mode 100644 index 000000000..f18ea33cc --- /dev/null +++ b/dynet/nodes-norms.h @@ -0,0 +1,25 @@ +#ifndef DYNET_NODES_NORMS_H_ +#define DYNET_NODES_NORMS_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = || x_1 ||^2 +struct SquaredNorm : public Node { + explicit SquaredNorm(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 || +struct L2Norm : public Node { + explicit L2Norm(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-pickneglogsoftmax.cc b/dynet/nodes-pickneglogsoftmax.cc index 99e7abda9..a543e178d 100644 --- a/dynet/nodes-pickneglogsoftmax.cc +++ b/dynet/nodes-pickneglogsoftmax.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-pickneglogsoftmax.h" #include "dynet/nodes-macros.h" @@ -11,6 +11,8 @@ using namespace std; namespace dynet { +// ************* PickNegLogSoftmax ************* + #ifndef __CUDACC__ string PickNegLogSoftmax::as_string(const vector& arg_names) const { diff --git a/dynet/nodes-pickneglogsoftmax.h b/dynet/nodes-pickneglogsoftmax.h new file mode 100644 index 000000000..ae8007b52 --- /dev/null +++ b/dynet/nodes-pickneglogsoftmax.h @@ -0,0 +1,40 @@ +#ifndef DYNET_NODES_PICKNEGLOGSOFTMAX_H_ +#define DYNET_NODES_PICKNEGLOGSOFTMAX_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// z = \sum_j \exp (x_i)_j +// y = (x_1)_element - \log z +struct PickNegLogSoftmax : public Node { + explicit PickNegLogSoftmax(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} + // use this constructor if you want to perform mini-batching + explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} + // use these constructors if you want to change the value after the graph is constructed + explicit PickNegLogSoftmax(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} + explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + size_t aux_storage_size() const override; + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual Node* autobatch_pseudo_node(const ComputationGraph & cg, + const std::vector & batch_ids) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-random.cc b/dynet/nodes-random.cc index 9e221112b..56f5b289d 100644 --- a/dynet/nodes-random.cc +++ b/dynet/nodes-random.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-random.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-random.h b/dynet/nodes-random.h new file mode 100644 index 000000000..7e9e43582 --- /dev/null +++ b/dynet/nodes-random.h @@ -0,0 +1,59 @@ +#ifndef DYNET_NODES_RANDOM_H_ +#define DYNET_NODES_RANDOM_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// n_{i,j} ~ N(0,stddev) +// y = x + n +struct GaussianNoise : public Node { + explicit GaussianNoise(const std::initializer_list& a, real stddev) : Node(a), stddev(stddev) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + real stddev; +}; + +// draw random noise from Normal(0, 1) +struct RandomNormal : public Node { + explicit RandomNormal(const Dim& d) : dim(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; +}; + +// draw from Bernoulli(p) +struct RandomBernoulli : public Node { + explicit RandomBernoulli(const std::initializer_list& a, const Dim& d, real p, real scale = 1.0f) : dim(d), p(p), scale(scale) { + DYNET_ASSERT(a.size() == 0, "RandomBernoulli doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real p; + real scale; +}; + +// draw a random real from Uniform(left, right) +struct RandomUniform : public Node { + explicit RandomUniform(const std::initializer_list& a, const Dim& d, real left, real right) : dim(d), left(left), right(right) { + DYNET_ASSERT(a.size() == 0, "RandomUniform doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real left, right; +}; + +// draw a random real from Uniform(left, right) +struct RandomGumbel : public Node { + explicit RandomGumbel(const std::initializer_list& a, const Dim& d, real mu, real beta) : dim(d), mu(mu), beta(beta) { + DYNET_ASSERT(a.size() == 0, "RandomGumbel doesn't accept nodes as input"); + } + DYNET_NODE_DEFINE_DEV_IMPL() + Dim dim; + real mu, beta; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-select.cc b/dynet/nodes-select.cc index f8760b4c7..359ac5532 100644 --- a/dynet/nodes-select.cc +++ b/dynet/nodes-select.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-select.h" #include "dynet/nodes-macros.h" diff --git a/dynet/nodes-select.h b/dynet/nodes-select.h new file mode 100644 index 000000000..927c97d7a --- /dev/null +++ b/dynet/nodes-select.h @@ -0,0 +1,84 @@ +#ifndef DYNET_NODES_SELECT_H_ +#define DYNET_NODES_SELECT_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = select_rows(x, rows) +// x = a matrix +struct SelectRows : public Node { + explicit SelectRows(const std::initializer_list& a, const std::vector& r) : Node(a), rows(r), prows(&rows) {} + explicit SelectRows(const std::initializer_list& a, const std::vector* pr) : Node(a), prows(pr) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector rows; + const std::vector* prows; +}; + +// y = select_cols(x, cols) +// x = a matrix +struct SelectCols : public Node { + explicit SelectCols(const std::initializer_list& a, const std::vector& c) : Node(a), cols(c), pcols(&cols) {} + explicit SelectCols(const std::initializer_list& a, const std::vector* pc) : Node(a), pcols(pc) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector cols; + const std::vector* pcols; +}; + +// x_1 is a std::vector +// y = (x_1)_{*pval} +// this is used to implement cross-entropy training +struct PickElement : public Node { + explicit PickElement(const std::initializer_list& a, unsigned v, unsigned d = 0) : Node(a), val(v), pval(&val), vals(), pvals(), dimension(d) {} + // use this constructor if you want to perform mini-batching + explicit PickElement(const std::initializer_list& a, const std::vector& v, unsigned d = 0) : Node(a), val(), pval(), vals(v), pvals(&vals), dimension(d) {} + // use these constructors if you want to change the value after the graph is constructed + explicit PickElement(const std::initializer_list& a, const unsigned* pv, unsigned d = 0) : Node(a), val(), pval(pv), vals(), pvals(), dimension(d) {} + explicit PickElement(const std::initializer_list& a, const std::vector* pv, unsigned d = 0) : Node(a), val(), pval(), vals(), pvals(pv), dimension(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; + unsigned dimension; +}; + +// x_1 is a tensor +// y = x_1[start:end] along dimension d +// (start inclusive, end exclusive) +struct PickRange : public Node { + explicit PickRange(const std::initializer_list& a, unsigned s, unsigned e, unsigned d = 0) : Node(a), start(s), end(e), dim(d) {} + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned start, end, dim; +}; + +// x is a batched tensor +// y = (x)_{[*pval]} +struct PickBatchElements : public Node { + explicit PickBatchElements(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} + explicit PickBatchElements(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} + explicit PickBatchElements(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} + explicit PickBatchElements(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} + DYNET_NODE_DEFINE_DEV_IMPL() + virtual bool supports_multibatch() const override { return true; } + unsigned val; + const unsigned* pval; + std::vector vals; + const std::vector* pvals; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-similarities.cc b/dynet/nodes-similarities.cc index 51bd8e367..275485da5 100644 --- a/dynet/nodes-similarities.cc +++ b/dynet/nodes-similarities.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-similarities.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-similarities.h b/dynet/nodes-similarities.h new file mode 100644 index 000000000..9ec500c73 --- /dev/null +++ b/dynet/nodes-similarities.h @@ -0,0 +1,47 @@ +#ifndef DYNET_NODES_SIMILARITIES_H_ +#define DYNET_NODES_SIMILARITIES_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = x_1^T . x_2 +struct DotProduct : public Node { + explicit DotProduct(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 - x_2 ||_H(d) +struct HuberDistance : public Node { + explicit HuberDistance(const std::initializer_list& a, float d = 1.345f) : Node(a), d(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + float d; +}; + +// y = || x_1 - x_2 ||_1 +struct L1Distance : public Node { + explicit L1Distance(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +// y = || x_1 - x_2 ||^2 +struct SquaredEuclideanDistance : public Node { + explicit SquaredEuclideanDistance(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-softmaxes.cc b/dynet/nodes-softmaxes.cc index e8b672757..fb222c903 100644 --- a/dynet/nodes-softmaxes.cc +++ b/dynet/nodes-softmaxes.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-softmaxes.h" #include "dynet/nodes-macros.h" #include "dynet/functors.h" diff --git a/dynet/nodes-softmaxes.h b/dynet/nodes-softmaxes.h new file mode 100644 index 000000000..2775fe049 --- /dev/null +++ b/dynet/nodes-softmaxes.h @@ -0,0 +1,66 @@ +#ifndef DYNET_NODES_SOFTMAXES_H_ +#define DYNET_NODES_SOFTMAXES_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// z = \sum_j \exp (x_i)_j +// y_i = (x_1)_i / z +struct Softmax : public Node { + explicit Softmax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; + virtual void autobatch_reshape(const ComputationGraph & cg, + const std::vector & batch_ids, + const std::vector & concat, + std::vector& xs, + Tensor& fx) const override { + autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); + } +}; + +// z = \sum_j \exp (x_i)_j +// y_i = (x_1)_i - \log z +struct LogSoftmax : public Node { + explicit LogSoftmax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + virtual bool supports_multibatch() const override { return true; } +}; + +// z = \sum_{j \in denom} \exp (x_i)_j +// y_i = (x_1)_i - \log z +struct RestrictedLogSoftmax : public Node { + explicit RestrictedLogSoftmax(const std::initializer_list& a, const std::vector& d) : Node(a), denom(d) {} + DYNET_NODE_DEFINE_DEV_IMPL() + std::vector denom; +}; + +// y = sparsemax(x) +// y = arg min_y ||y - x||^2 +struct Sparsemax : public Node { + explicit Sparsemax(const std::initializer_list& a) : Node(a) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; +}; + +// y = L_sparsemax(x_0; q) +// where x_0 is a std::vector of "unnormalized" probabilities +// q are the std::vector of labels +struct SparsemaxLoss : public Node { + explicit SparsemaxLoss(const std::initializer_list& a, const std::vector& target) : Node(a), q(target), pq(&q) {} + explicit SparsemaxLoss(const std::initializer_list& a, const std::vector* ptarget) : Node(a), q(), pq(ptarget) {} + DYNET_NODE_DEFINE_DEV_IMPL() + size_t aux_storage_size() const override; + const std::vector q; + const std::vector* pq; +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes-trig.cc b/dynet/nodes-trig.cc index c5965879b..f9c75758f 100644 --- a/dynet/nodes-trig.cc +++ b/dynet/nodes-trig.cc @@ -1,4 +1,4 @@ -#include "dynet/nodes.h" +#include "dynet/nodes-trig.h" #include "dynet/nodes-macros.h" #include "dynet/simd-functors.h" @@ -7,7 +7,7 @@ using namespace std; namespace dynet { -// ************* ************* +// ************* Tanh ************* #ifndef __CUDACC__ diff --git a/dynet/nodes-trig.h b/dynet/nodes-trig.h new file mode 100644 index 000000000..8212661dd --- /dev/null +++ b/dynet/nodes-trig.h @@ -0,0 +1,20 @@ +#ifndef DYNET_NODES_TRIG_H_ +#define DYNET_NODES_TRIG_H_ + +#include "dynet/dynet.h" +#include "dynet/nodes-macros.h" + +namespace dynet { + +// y = tanh x_1 +struct Tanh : public Node { + explicit Tanh(const std::initializer_list& a) : Node(a) {} + virtual bool supports_multibatch() const override { return true; } + virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::tanh); return sm.get_idx(s); } + virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } + DYNET_NODE_DEFINE_DEV_IMPL() +}; + +} // namespace dynet + +#endif diff --git a/dynet/nodes.h b/dynet/nodes.h index ef421418b..7d6a13211 100644 --- a/dynet/nodes.h +++ b/dynet/nodes.h @@ -1,870 +1,31 @@ -#ifndef DYNET_NODES_H_ -#define DYNET_NODES_H_ - -#include "dynet/dynet.h" -#include "dynet/devices.h" +#include "dynet/nodes-activations.h" +#include "dynet/nodes-affinetransform.h" +#include "dynet/nodes-arith-const.h" +#include "dynet/nodes-arith-cwise.h" +#include "dynet/nodes-arith-scalar.h" +#include "dynet/nodes-arith-sum.h" +#include "dynet/nodes-arith-unary.h" +#include "dynet/nodes-concat.h" +#include "dynet/nodes-const.h" +#include "dynet/nodes-contract.h" +#include "dynet/nodes-conv.h" +#include "dynet/nodes-conv2d.h" +#include "dynet/nodes-dropout.h" +#include "dynet/nodes-flow.h" +#include "dynet/nodes-hinge.h" +#include "dynet/nodes-linalg.h" +#include "dynet/nodes-logsumexp.h" +#include "dynet/nodes-losses.h" #include "dynet/nodes-macros.h" - -// See nodes-macros.h for more details about DYNET_NODE_DEFINE_DEV_IMPL(). - -namespace dynet { - -// M = x_0, v = x_1 -// y = M + v (broadcasting over columns) -struct AddVectorToAllColumns : public Node { - explicit AddVectorToAllColumns(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// with a single argument x \in R^{n x m} -// y_i = \sum_j x_i,j / m -struct AverageColumns : public Node { - template explicit AverageColumns(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// sum along a single dimension -struct SumDimension : public Node { - template explicit SumDimension(const T& a, unsigned d) : Node(a), dimension(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - unsigned dimension; -}; - -// y = L_sparsemax(x_0; q) -// where x_0 is a std::vector of "unnormalized" probabilities -// q are the std::vector of labels -struct SparsemaxLoss : public Node { - explicit SparsemaxLoss(const std::initializer_list& a, const std::vector& target) : Node(a), q(target), pq(&q) {} - explicit SparsemaxLoss(const std::initializer_list& a, const std::vector* ptarget) : Node(a), q(), pq(ptarget) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - const std::vector q; - const std::vector* pq; -}; - -// y = sparsemax(x) -// y = arg min_y ||y - x||^2 -struct Sparsemax : public Node { - explicit Sparsemax(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; -}; - -// y = inv(x) -// x = an invertible matrix -struct MatrixInverse : public Node { - explicit MatrixInverse(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = select_rows(x, rows) -// x = a matrix -struct SelectRows : public Node { - explicit SelectRows(const std::initializer_list& a, const std::vector& r) : Node(a), rows(r), prows(&rows) {} - explicit SelectRows(const std::initializer_list& a, const std::vector* pr) : Node(a), prows(pr) {} - DYNET_NODE_DEFINE_DEV_IMPL() - std::vector rows; - const std::vector* prows; -}; - -// y = select_cols(x, cols) -// x = a matrix -struct SelectCols : public Node { - explicit SelectCols(const std::initializer_list& a, const std::vector& c) : Node(a), cols(c), pcols(&cols) {} - explicit SelectCols(const std::initializer_list& a, const std::vector* pc) : Node(a), pcols(pc) {} - DYNET_NODE_DEFINE_DEV_IMPL() - std::vector cols; - const std::vector* pcols; -}; - -// y = pow(x_1, x_2) -// x_2 raise every element in x_1 to the power of scalar x_2 -struct Pow : public Node { - explicit Pow(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = min{x_1, x_2} -struct Min : public Node { - explicit Min(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; -}; - -// y = max{x_1, x_2} -struct Max : public Node { - template explicit Max(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; -}; - -// y = Tr(x_1 * x_2^T) -struct TraceOfProduct : public Node { - explicit TraceOfProduct(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = alpha * x_1 -struct ConstScalarMultiply : public Node { - explicit ConstScalarMultiply(const std::initializer_list& a, float alpha) : Node(a), alpha(alpha) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::scalar_mult); s.add_float(alpha); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() - float alpha; -}; - -// y = x_1^T . x_2 -struct DotProduct : public Node { - explicit DotProduct(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1^T -// NOTE: if you have a column or row std::vector as input, runtime is constant -// if you have a matrix as input, the runtime is O(mn) - try to avoid using this -struct Transpose : public Node { - explicit Transpose(const std::initializer_list& a, const std::vector & dims) : Node(a), dims(dims) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - std::vector dims; -}; - -// y = reshape(x_1, --> to) -struct Reshape : public Node { - explicit Reshape(const std::initializer_list& a, const Dim& to) : Node(a), to(to) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - Dim to; -}; - -// n_{i,j} ~ N(0,stddev) -// y = x + n -struct GaussianNoise : public Node { - explicit GaussianNoise(const std::initializer_list& a, real stddev) : Node(a), stddev(stddev) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } - real stddev; -}; - -// y = dropout(x,p) where p specifies the dropout probability -struct Dropout : public Node { - explicit Dropout(const std::initializer_list& a, real p) : Node(a), p(p) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } - real p; -}; - -// y = dropout(x,p) where p specifies the dropout probability -struct DropoutDim : public Node { - explicit DropoutDim(const std::initializer_list& a, unsigned d,real p) : Node(a), dimension(d), p(p) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } - unsigned dimension; - real p; -}; - -// y = dropout(x,p) where p specifies the dropout probability -struct DropoutBatch : public Node { - explicit DropoutBatch(const std::initializer_list& a, real p) : Node(a), p(p) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } - real p; -}; - -// y = block_dropout(x,p) where p specifies the probability for dropping-out the entire block -struct BlockDropout : public Node { - explicit BlockDropout(const std::initializer_list& a, real p) : Node(a), dropout_probability(p) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - real dropout_probability; -}; - -// y = c + x_1 -// (c is a std::vector or matrix of the constant, usually 1, but can be configured) -struct ConstantPlusX : public Node { - explicit ConstantPlusX(const std::initializer_list& a, real o) : Node(a), c(o) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::plus_const); s.add_float(c); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() - real c; -}; - -// y = c - x_1 -// (c is a std::vector or matrix of the constant, usually 1, but can be configured) -struct ConstantMinusX : public Node { - explicit ConstantMinusX(const std::initializer_list& a, real o) : Node(a), c(o) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() - real c; -}; - -// y = sqrt x_1 -struct Sqrt : public Node { - explicit Sqrt(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::sqrt); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = abs x_1 -struct Abs : public Node { - explicit Abs(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::abs); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = erf x_1 -struct Erf : public Node { - explicit Erf(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::erf); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = tanh x_1 -struct Tanh : public Node { - explicit Tanh(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::tanh); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 \odot x_1 -struct Square : public Node { - explicit Square(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::square); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 \odot x_1 \odot x_1 -struct Cube : public Node { - explicit Cube(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::cube); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = exp x_1 -struct Exp : public Node { - explicit Exp(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::exp); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = lgamma x_1 -struct LogGamma : public Node { - explicit LogGamma(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::loggamma); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = log x_1 (base e, i.e., natural log) -struct Log : public Node { - explicit Log(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::log); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// concatenate rows -struct Concatenate : public Node { - template explicit Concatenate(const T& a, unsigned d) : Node(a), dimension(d) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(args.size(), 1); } - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() - // src_row_indices[i] says what row in fx the ith x std::vector was assigned to - // used to simplify backprop - mutable std::vector src_indices; - unsigned dimension; -}; - -// concatenate different batched experssions into one single batched tensor -struct ConcatenateToBatch : public Node { - template explicit ConcatenateToBatch(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override {return true;} - mutable std::vector src_element_indices; -}; - -// x_1 is a scalar (or row std::vector) -// x_2 is a scalar (or row std::vector) -// y = max(0, margin - x_1 + x_2) -struct PairwiseRankLoss : public Node { - explicit PairwiseRankLoss(const std::initializer_list& a, real m = 1.0) : Node(a), margin(m) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() - real margin; -}; - -// Let x be a std::vector-valued input, x_i represents the score of the ith element, then -// y = \sum{i != element} max{0, margin - x_element + x_i} -struct Hinge : public Node { - explicit Hinge(const std::initializer_list& a, unsigned e, real m = 1.0) : Node(a), element(e), pelement(&element), margin(m) {} - explicit Hinge(const std::initializer_list& a, const unsigned* pe, real m = 1.0) : Node(a), element(), pelement(pe), margin(m) {} - explicit Hinge(const std::initializer_list& a, const std::vector& e, real m = 1.0) : Node(a), element(), pelement(), elements(e), pelements(&elements), margin(m) {} - explicit Hinge(const std::initializer_list& a, const std::vector* pe, real m = 1.0) : Node(a), element(), pelement(), elements(), pelements(pe), margin(m) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - unsigned element; - const unsigned* pelement; - std::vector elements; - const std::vector* pelements; - real margin; -}; - -// y = x_1, but dy/dx is set to 0 -struct NoBackprop : public Node { - explicit NoBackprop(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::nobackprop); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1, dy/dx is set to negative. -struct FlipGradient : public Node { - explicit FlipGradient(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::flipgradient); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 -struct Identity : public Node { - explicit Identity(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::identity); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// hyperparameter: width > 1 -// x_1 is a std::vector in R^n, which we write x -// y is a std::vector in R^{n / width} -// y_i = max_{x_{i * width - width + 1}, ..., x_{i * width}} -struct MaxPooling1D : public Node { - MaxPooling1D(const std::initializer_list& a, unsigned w) : Node(a), width(w) {} - DYNET_NODE_DEFINE_DEV_IMPL() - unsigned width; - mutable std::vector ind; -}; - -// y = x_1 * x_2 -struct MatrixMultiply : public Node { - explicit MatrixMultiply(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 \cdot x_2 (Hadamard product) -struct CwiseMultiply : public Node { - explicit CwiseMultiply(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 + x_2 (Addition where x_2 is a scalar) -struct ScalarAdd : public Node { - explicit ScalarAdd(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 \cdot x_2 (Hadamard product where x_1 is a scalar) -struct ScalarMultiply : public Node { - explicit ScalarMultiply(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 / x_2 (Elementwise division where x_2 is a scalar) -struct ScalarQuotient : public Node { - explicit ScalarQuotient(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 / x_2 (cwiseQuotient) -struct CwiseQuotient : public Node { - explicit CwiseQuotient(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x_1 \sum_{i=2, 4 ...} A_i * x_{i+1} -struct AffineTransform : public Node { - template explicit AffineTransform(const T& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() - mutable float* dEdf_mem; -}; - -// y = -x_1 -struct Negate : public Node { - explicit Negate(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::negate); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = max(0,x) -struct Rectify : public Node { - explicit Rectify(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::rectify); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = ELU(0,x) -struct ExponentialLinearUnit : public Node { - explicit ExponentialLinearUnit(const std::initializer_list& a, float lambda=1.f, float alpha=1.f) : Node(a), lambda(lambda), alpha(alpha) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::rectify); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() - float lambda, alpha; -}; - -// you could do this with LogisticSigmoid, Softmax or a variety of other -// functions, but this is often useful. -// x_1 must be a std::vector with values between 0 and 1 -// target_y is an equivalently sized std::vector w values between 0 and 1 -// y = ty * log(x_1) + (1 - ty) * log(x_1) -struct BinaryLogLoss : public Node { - BinaryLogLoss(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = \log \sum_i \exp x_i -// done in log space carefully to avoid over/underflow issues -struct LogSumExp : public Node { - template explicit LogSumExp(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; -}; - -struct LogDet : public Node { - template explicit LogDet(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = \sum_i x_i -struct Sum : public Node { - template explicit Sum(const T& a) : Node(a) {} - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - if(dim.bd != 1) - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -// y = \sum_i,j,... x[i,j,...] -struct SumElements : public Node { - template explicit SumElements(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -// y = \sum_i x_i -struct SumBatches : public Node { - template explicit SumBatches(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -// y = \sum_i,j,... x[i,j,...] -struct StdElements : public Node { - template explicit StdElements(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -// y = \sum_i x_i -struct StdBatches : public Node { - template explicit StdBatches(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -//y = \sum_i x_i -struct StdDimension : public Node { - template explicit StdDimension(const T& a, unsigned d) : Node(a), dimension(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -private: - unsigned dimension; -}; - -// y = \sum_i,j,... x[i,j,...] -struct MomentElements : public Node { - template explicit MomentElements(const T& a, unsigned o) : Node(a), order(o) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -private: - unsigned order; -}; - -// y = \sum_i x_i -struct MomentBatches : public Node { - template explicit MomentBatches(const T& a, unsigned o) : Node(a), order(o) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -private: - unsigned order; -}; - -//y = \sum_i x_i -struct MomentDimension : public Node { - template explicit MomentDimension(const T& a, unsigned d, unsigned o) : Node(a), dimension(d), order(o) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -private: - unsigned dimension; - unsigned order; -}; - -// y = ( \sum_i x_i ) / |x| -struct Average : public Node { - template explicit Average(const T& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } -}; - -// this is used to implement poisson regression -// x_1 = log predicted mean -// ty = true y (this is not a VariableIndex since it has to be a nonnegative integer and -// is therefore nondifferentiable. There are various continuous extensions -// using the incomplete gamma function that could be used, but meh) -// y = log Poisson(ty; \lambda = \exp x_1) -// = ty*x_1 - exp(x_1) - log(ty!) -struct PoissonRegressionLoss : public Node { - explicit PoissonRegressionLoss(const std::initializer_list& a, unsigned true_y) : Node(a), ty(true_y), pty(&ty) {} - explicit PoissonRegressionLoss(const std::initializer_list& a, const unsigned* ptrue_y) : Node(a), ty(), pty(ptrue_y) {} - DYNET_NODE_DEFINE_DEV_IMPL() - private: - unsigned ty; - const unsigned* pty; -}; - -// y = || x_1 ||^2 -struct SquaredNorm : public Node { - explicit SquaredNorm(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = || x_1 || -struct L2Norm : public Node { - explicit L2Norm(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = || x_1 - x_2 ||^2 -struct SquaredEuclideanDistance : public Node { - explicit SquaredEuclideanDistance(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = || x_1 - x_2 ||_H(d) -struct HuberDistance : public Node { - explicit HuberDistance(const std::initializer_list& a, float d = 1.345f) : Node(a), d(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - float d; -}; - -// y = || x_1 - x_2 ||_1 -struct L1Distance : public Node { - explicit L1Distance(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = \sigma(x_1) -struct LogisticSigmoid : public Node { - explicit LogisticSigmoid(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::logistic); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// y = x / (1 + |x|) -struct SoftSign : public Node { - explicit SoftSign(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override { Sig s(nt::softsign); return sm.get_idx(s); } - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -// z = \sum_j \exp (x_i)_j -// y_i = (x_1)_i / z -struct Softmax : public Node { - explicit Softmax(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } -}; - -// z = \sum_j \exp (x_i)_j -// y_i = (x_1)_i - \log z -struct LogSoftmax : public Node { - explicit LogSoftmax(const std::initializer_list& a) : Node(a) {} - DYNET_NODE_DEFINE_DEV_IMPL() - size_t aux_storage_size() const override; - virtual bool supports_multibatch() const override { return true; } -}; - -// z = \sum_j \exp (x_i)_j -// y = (x_1)_element - \log z -struct PickNegLogSoftmax : public Node { - explicit PickNegLogSoftmax(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} - // use this constructor if you want to perform mini-batching - explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} - // use these constructors if you want to change the value after the graph is constructed - explicit PickNegLogSoftmax(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} - explicit PickNegLogSoftmax(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override; - virtual Node* autobatch_pseudo_node(const ComputationGraph & cg, - const std::vector & batch_ids) const override; - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - unsigned val; - const unsigned* pval; - std::vector vals; - const std::vector* pvals; -}; - -// z = \sum_{j \in denom} \exp (x_i)_j -// y_i = (x_1)_i - \log z -struct RestrictedLogSoftmax : public Node { - explicit RestrictedLogSoftmax(const std::initializer_list& a, const std::vector& d) : Node(a), denom(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - std::vector denom; -}; - -// x_1 is a std::vector -// y = (x_1)_{*pval} -// this is used to implement cross-entropy training -struct PickElement : public Node { - explicit PickElement(const std::initializer_list& a, unsigned v, unsigned d = 0) : Node(a), val(v), pval(&val), vals(), pvals(), dimension(d) {} - // use this constructor if you want to perform mini-batching - explicit PickElement(const std::initializer_list& a, const std::vector& v, unsigned d = 0) : Node(a), val(), pval(), vals(v), pvals(&vals), dimension(d) {} - // use these constructors if you want to change the value after the graph is constructed - explicit PickElement(const std::initializer_list& a, const unsigned* pv, unsigned d = 0) : Node(a), val(), pval(pv), vals(), pvals(), dimension(d) {} - explicit PickElement(const std::initializer_list& a, const std::vector* pv, unsigned d = 0) : Node(a), val(), pval(), vals(), pvals(pv), dimension(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - unsigned val; - const unsigned* pval; - std::vector vals; - const std::vector* pvals; - unsigned dimension; -}; - -// x_1 is a tensor -// y = x_1[start:end] along dimension d -// (start inclusive, end exclusive) -struct PickRange : public Node { - explicit PickRange(const std::initializer_list& a, unsigned s, unsigned e, unsigned d = 0) : Node(a), start(s), end(e), dim(d) {} - virtual int autobatch_sig(const ComputationGraph &cg, SigMap &sm) const override; - virtual std::vector autobatch_concat(const ComputationGraph & cg) const override { return std::vector(1, 1); } - virtual void autobatch_reshape(const ComputationGraph & cg, - const std::vector & batch_ids, - const std::vector & concat, - std::vector& xs, - Tensor& fx) const override { - autobatch_reshape_concatonly(cg, batch_ids, concat, xs, fx); - } - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - unsigned start, end, dim; -}; - -// x is a batched tensor -// y = (x)_{[*pval]} -struct PickBatchElements : public Node { - explicit PickBatchElements(const std::initializer_list& a, unsigned v) : Node(a), val(v), pval(&val), vals(), pvals() {} - explicit PickBatchElements(const std::initializer_list& a, const std::vector& v) : Node(a), val(), pval(), vals(v), pvals(&vals) {} - explicit PickBatchElements(const std::initializer_list& a, const unsigned* pv) : Node(a), val(), pval(pv), vals(), pvals() {} - explicit PickBatchElements(const std::initializer_list& a, const std::vector* pv) : Node(a), val(), pval(), vals(), pvals(pv) {} - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - unsigned val; - const unsigned* pval; - std::vector vals; - const std::vector* pvals; -}; - -// represents a simple std::vector of 0s -struct Zeroes : public Node { - explicit Zeroes(const Dim& d) : dim(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - Dim dim; -}; - -// draw random noise from Normal(0, 1) -struct RandomNormal : public Node { - explicit RandomNormal(const Dim& d) : dim(d) {} - DYNET_NODE_DEFINE_DEV_IMPL() - Dim dim; -}; - -// draw from Bernoulli(p) -struct RandomBernoulli : public Node { - explicit RandomBernoulli(const std::initializer_list& a, const Dim& d, real p, real scale = 1.0f) : dim(d), p(p), scale(scale) { - DYNET_ASSERT(a.size() == 0, "RandomBernoulli doesn't accept nodes as input"); - } - DYNET_NODE_DEFINE_DEV_IMPL() - Dim dim; - real p; - real scale; -}; - -// draw a random real from Uniform(left, right) -struct RandomUniform : public Node { - explicit RandomUniform(const std::initializer_list& a, const Dim& d, real left, real right) : dim(d), left(left), right(right) { - DYNET_ASSERT(a.size() == 0, "RandomUniform doesn't accept nodes as input"); - } - DYNET_NODE_DEFINE_DEV_IMPL() - Dim dim; - real left, right; -}; - -// draw a random real from Uniform(left, right) -struct RandomGumbel : public Node { - explicit RandomGumbel(const std::initializer_list& a, const Dim& d, real mu, real beta) : dim(d), mu(mu), beta(beta) { - DYNET_ASSERT(a.size() == 0, "RandomGumbel doesn't accept nodes as input"); - } - DYNET_NODE_DEFINE_DEV_IMPL() - Dim dim; - real mu, beta; -}; - -struct MaxDimension : public Node { - explicit MaxDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { - first_dim = reduced_dim == 0 ? 1 : 0; - second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; - } - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; - unsigned reduced_dim; - unsigned first_dim; - unsigned second_dim; -}; - -struct MinDimension : public Node { - explicit MinDimension(const std::initializer_list& a, unsigned dimension = 0) : Node(a), reduced_dim(dimension) { - first_dim = reduced_dim == 0 ? 1 : 0; - second_dim = first_dim + 1 == reduced_dim ? first_dim + 2 : first_dim + 1; - } - DYNET_NODE_DEFINE_DEV_IMPL() - virtual bool supports_multibatch() const override { return true; } - size_t aux_storage_size() const override; - unsigned reduced_dim; - unsigned first_dim; - unsigned second_dim; -}; - -// y = x_1 * x_2 -struct WeightNormalization : public Node { - explicit WeightNormalization(const std::initializer_list& a) : Node(a) {} - virtual bool supports_multibatch() const override { return false; } - DYNET_NODE_DEFINE_DEV_IMPL() -}; - -} // namespace dynet - -#endif +#include "dynet/nodes-matrixmultiply.h" +#include "dynet/nodes-maxpooling2d.h" +#include "dynet/nodes-minmax.h" +#include "dynet/nodes-moments.h" +#include "dynet/nodes-normalization.h" +#include "dynet/nodes-norms.h" +#include "dynet/nodes-pickneglogsoftmax.h" +#include "dynet/nodes-random.h" +#include "dynet/nodes-select.h" +#include "dynet/nodes-similarities.h" +#include "dynet/nodes-softmaxes.h" +#include "dynet/nodes-trig.h" From 786d3a26bb1477597230760c085be755b86ed8ac Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 10 Jul 2017 11:13:02 -0400 Subject: [PATCH 3/3] Fixed compile on GPU Former-commit-id: 3a93e2ba017f5eb6d3696aa9a0ec7f0aec783b9a --- dynet/CMakeLists.txt | 45 +++++++++++++++++++++++++++++--- dynet/gpu-nodes-activations.cu | 3 +++ dynet/gpu-nodes-arith-const.cu | 3 +++ dynet/gpu-nodes-arith-cwise.cu | 3 +++ dynet/gpu-nodes-arith-scalar.cu | 3 +++ dynet/gpu-nodes-arith-sum.cu | 3 +++ dynet/gpu-nodes-arith-unary.cu | 2 +- dynet/gpu-nodes-concat.cu | 3 +++ dynet/gpu-nodes-const.cu | 3 +++ dynet/gpu-nodes-conv.cu | 2 +- dynet/gpu-nodes-conv2d.cu | 2 ++ dynet/gpu-nodes-dropout.cu | 3 +++ dynet/gpu-nodes-flow.cu | 3 +++ dynet/gpu-nodes-linalg.cu | 3 +++ dynet/gpu-nodes-logsumexp.cu | 3 +++ dynet/gpu-nodes-losses.cu | 3 +++ dynet/gpu-nodes-maxpooling2d.cu | 2 ++ dynet/gpu-nodes-minmax.cu | 3 +++ dynet/gpu-nodes-moments.cu | 3 +++ dynet/gpu-nodes-normalization.cu | 3 +++ dynet/gpu-nodes-norms.cu | 2 +- dynet/gpu-nodes-random.cu | 3 +++ dynet/gpu-nodes-select.cu | 3 +++ dynet/gpu-nodes-similarities.cu | 2 +- dynet/gpu-nodes-softmaxes.cu | 3 +++ dynet/gpu-nodes-trig.cu | 3 +++ dynet/nodes-conv2d.cc | 1 + dynet/nodes-conv2d.h | 4 +++ dynet/nodes-losses.cc | 4 +-- dynet/nodes-maxpooling2d.h | 4 +++ 30 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 dynet/gpu-nodes-activations.cu create mode 100644 dynet/gpu-nodes-arith-const.cu create mode 100644 dynet/gpu-nodes-arith-cwise.cu create mode 100644 dynet/gpu-nodes-arith-scalar.cu create mode 100644 dynet/gpu-nodes-arith-sum.cu create mode 100644 dynet/gpu-nodes-concat.cu create mode 100644 dynet/gpu-nodes-const.cu create mode 100644 dynet/gpu-nodes-dropout.cu create mode 100644 dynet/gpu-nodes-flow.cu create mode 100644 dynet/gpu-nodes-linalg.cu create mode 100644 dynet/gpu-nodes-logsumexp.cu create mode 100644 dynet/gpu-nodes-losses.cu create mode 100644 dynet/gpu-nodes-minmax.cu create mode 100644 dynet/gpu-nodes-moments.cu create mode 100644 dynet/gpu-nodes-normalization.cu create mode 100644 dynet/gpu-nodes-random.cu create mode 100644 dynet/gpu-nodes-select.cu create mode 100644 dynet/gpu-nodes-softmaxes.cu create mode 100644 dynet/gpu-nodes-trig.cu diff --git a/dynet/CMakeLists.txt b/dynet/CMakeLists.txt index 5888c7332..28283050a 100644 --- a/dynet/CMakeLists.txt +++ b/dynet/CMakeLists.txt @@ -123,6 +123,46 @@ if(ENABLE_BOOST) list(APPEND dynet_library_HDRS mp.h) endif() +set(dynet_gpu_SRCS + cuda.cc + cudnn-ops.cu + gpu-ops.cu + gpu-nodes-activations.cu + gpu-nodes-affinetransform.cu + gpu-nodes-arith-const.cu + gpu-nodes-arith-cwise.cu + gpu-nodes-arith-scalar.cu + gpu-nodes-arith-sum.cu + gpu-nodes-arith-unary.cu + gpu-nodes-concat.cu + gpu-nodes-const.cu + gpu-nodes-contract.cu + gpu-nodes-conv2d.cu + gpu-nodes-conv.cu + gpu-nodes-dropout.cu + gpu-nodes-flow.cu + gpu-nodes-hinge.cu + gpu-nodes-linalg.cu + gpu-nodes-logsumexp.cu + gpu-nodes-losses.cu + gpu-nodes-matrixmultiply.cu + gpu-nodes-maxpooling2d.cu + gpu-nodes-minmax.cu + gpu-nodes-moments.cu + gpu-nodes-normalization.cu + gpu-nodes-norms.cu + gpu-nodes-pickneglogsoftmax.cu + gpu-nodes-random.cu + gpu-nodes-select.cu + gpu-nodes-similarities.cu + gpu-nodes-softmaxes.cu + gpu-nodes-trig.cu + gpu-param-nodes.cu + gpu-tensor.cu + gpu-training.cu + gpu-model.cu +) + file(GLOB TEST_SRCS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} tests/*.cc) if (NOT MSVC) set(BUILD_SHARED_LIBS ON) @@ -175,10 +215,10 @@ if(WITH_CUDA_BACKEND) list(APPEND CUDA_NVCC_FLAGS_DEBUG "--compiler-options \"/MDd\"") list(APPEND CUDA_NVCC_FLAGS_RELEASE "--compiler-options \"/MD\"") SET(CUDA_PROPAGATE_HOST_FLAGS OFF) - cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu) + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS}) else() SET(CUDA_PROPAGATE_HOST_FLAGS OFF) - cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} cuda.cc cudnn-ops.cu gpu-ops.cu gpu-nodes.cu gpu-nodes-contract.cu gpu-nodes-conv.cu gpu-nodes-conv2d.cu gpu-nodes-maxpooling2d.cu gpu-param-nodes.cu gpu-tensor.cu gpu-training.cu gpu-model.cu gpu-nodes-pickneglogsoftmax.cu gpu-nodes-matrixmultiply.cu gpu-nodes-hinge.cu gpu-nodes-affinetransform.cu gpu-nodes-similarities.cu gpu-nodes-norms.cu gpu-nodes-unary-arith.cu OPTIONS --compiler-options "-fPIC") + cuda_add_library(gdynet ${dynet_library_SRCS} ${dynet_library_HDRS} ${dynet_gpu_SRCS} OPTIONS --compiler-options "-fPIC") endif() set_target_properties(gdynet PROPERTIES COMPILE_DEFINITIONS HAVE_CUDA) @@ -197,4 +237,3 @@ if(WITH_CUDA_BACKEND) endif(WITH_CUDA_BACKEND) # target_compile_features(dynet PRIVATE cxx_range_for) - diff --git a/dynet/gpu-nodes-activations.cu b/dynet/gpu-nodes-activations.cu new file mode 100644 index 000000000..b50d5a62c --- /dev/null +++ b/dynet/gpu-nodes-activations.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-activations.cc" diff --git a/dynet/gpu-nodes-arith-const.cu b/dynet/gpu-nodes-arith-const.cu new file mode 100644 index 000000000..2abc645f9 --- /dev/null +++ b/dynet/gpu-nodes-arith-const.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-const.cc" diff --git a/dynet/gpu-nodes-arith-cwise.cu b/dynet/gpu-nodes-arith-cwise.cu new file mode 100644 index 000000000..93e62b7df --- /dev/null +++ b/dynet/gpu-nodes-arith-cwise.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-cwise.cc" diff --git a/dynet/gpu-nodes-arith-scalar.cu b/dynet/gpu-nodes-arith-scalar.cu new file mode 100644 index 000000000..2e4ff0c0e --- /dev/null +++ b/dynet/gpu-nodes-arith-scalar.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-scalar.cc" diff --git a/dynet/gpu-nodes-arith-sum.cu b/dynet/gpu-nodes-arith-sum.cu new file mode 100644 index 000000000..c80bdfe49 --- /dev/null +++ b/dynet/gpu-nodes-arith-sum.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-arith-sum.cc" diff --git a/dynet/gpu-nodes-arith-unary.cu b/dynet/gpu-nodes-arith-unary.cu index 15198bef2..d5e6c6917 100644 --- a/dynet/gpu-nodes-arith-unary.cu +++ b/dynet/gpu-nodes-arith-unary.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-unary-arith.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-arith-unary.cc" diff --git a/dynet/gpu-nodes-concat.cu b/dynet/gpu-nodes-concat.cu new file mode 100644 index 000000000..2fcfc98c4 --- /dev/null +++ b/dynet/gpu-nodes-concat.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-concat.cc" diff --git a/dynet/gpu-nodes-const.cu b/dynet/gpu-nodes-const.cu new file mode 100644 index 000000000..8a28ebe56 --- /dev/null +++ b/dynet/gpu-nodes-const.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-const.cc" diff --git a/dynet/gpu-nodes-conv.cu b/dynet/gpu-nodes-conv.cu index 451f71b36..0bff1eca4 100644 --- a/dynet/gpu-nodes-conv.cu +++ b/dynet/gpu-nodes-conv.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-conv.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-conv.cc" diff --git a/dynet/gpu-nodes-conv2d.cu b/dynet/gpu-nodes-conv2d.cu index 347aaadcf..cc2f78e4f 100644 --- a/dynet/gpu-nodes-conv2d.cu +++ b/dynet/gpu-nodes-conv2d.cu @@ -1 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA #include "nodes-conv2d.cc" diff --git a/dynet/gpu-nodes-dropout.cu b/dynet/gpu-nodes-dropout.cu new file mode 100644 index 000000000..3911d2bc1 --- /dev/null +++ b/dynet/gpu-nodes-dropout.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-dropout.cc" diff --git a/dynet/gpu-nodes-flow.cu b/dynet/gpu-nodes-flow.cu new file mode 100644 index 000000000..27cfed8c8 --- /dev/null +++ b/dynet/gpu-nodes-flow.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-flow.cc" diff --git a/dynet/gpu-nodes-linalg.cu b/dynet/gpu-nodes-linalg.cu new file mode 100644 index 000000000..cbebed454 --- /dev/null +++ b/dynet/gpu-nodes-linalg.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-linalg.cc" diff --git a/dynet/gpu-nodes-logsumexp.cu b/dynet/gpu-nodes-logsumexp.cu new file mode 100644 index 000000000..f7abe4950 --- /dev/null +++ b/dynet/gpu-nodes-logsumexp.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-logsumexp.cc" diff --git a/dynet/gpu-nodes-losses.cu b/dynet/gpu-nodes-losses.cu new file mode 100644 index 000000000..4bb8863fd --- /dev/null +++ b/dynet/gpu-nodes-losses.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-losses.cc" diff --git a/dynet/gpu-nodes-maxpooling2d.cu b/dynet/gpu-nodes-maxpooling2d.cu index ea93114c9..090ef624e 100644 --- a/dynet/gpu-nodes-maxpooling2d.cu +++ b/dynet/gpu-nodes-maxpooling2d.cu @@ -1 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA #include "nodes-maxpooling2d.cc" diff --git a/dynet/gpu-nodes-minmax.cu b/dynet/gpu-nodes-minmax.cu new file mode 100644 index 000000000..dcac97cc4 --- /dev/null +++ b/dynet/gpu-nodes-minmax.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-minmax.cc" diff --git a/dynet/gpu-nodes-moments.cu b/dynet/gpu-nodes-moments.cu new file mode 100644 index 000000000..253a0860f --- /dev/null +++ b/dynet/gpu-nodes-moments.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-moments.cc" diff --git a/dynet/gpu-nodes-normalization.cu b/dynet/gpu-nodes-normalization.cu new file mode 100644 index 000000000..16d4a3048 --- /dev/null +++ b/dynet/gpu-nodes-normalization.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-normalization.cc" diff --git a/dynet/gpu-nodes-norms.cu b/dynet/gpu-nodes-norms.cu index 4fa94dc81..470e1f97c 100644 --- a/dynet/gpu-nodes-norms.cu +++ b/dynet/gpu-nodes-norms.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-norms.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-norms.cc" diff --git a/dynet/gpu-nodes-random.cu b/dynet/gpu-nodes-random.cu new file mode 100644 index 000000000..7ef0d2564 --- /dev/null +++ b/dynet/gpu-nodes-random.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-random.cc" diff --git a/dynet/gpu-nodes-select.cu b/dynet/gpu-nodes-select.cu new file mode 100644 index 000000000..25871a1bb --- /dev/null +++ b/dynet/gpu-nodes-select.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-select.cc" diff --git a/dynet/gpu-nodes-similarities.cu b/dynet/gpu-nodes-similarities.cu index 068d0cec7..933edb421 100644 --- a/dynet/gpu-nodes-similarities.cu +++ b/dynet/gpu-nodes-similarities.cu @@ -1,3 +1,3 @@ -// This is a dummy file that contains the same content as nodes-similarities.cc but compiled +// This is a dummy file that contains the same content as nodes.cc but compiled // on CUDA #include "nodes-similarities.cc" diff --git a/dynet/gpu-nodes-softmaxes.cu b/dynet/gpu-nodes-softmaxes.cu new file mode 100644 index 000000000..43730a67b --- /dev/null +++ b/dynet/gpu-nodes-softmaxes.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-softmaxes.cc" diff --git a/dynet/gpu-nodes-trig.cu b/dynet/gpu-nodes-trig.cu new file mode 100644 index 000000000..47449a016 --- /dev/null +++ b/dynet/gpu-nodes-trig.cu @@ -0,0 +1,3 @@ +// This is a dummy file that contains the same content as nodes.cc but compiled +// on CUDA +#include "nodes-trig.cc" diff --git a/dynet/nodes-conv2d.cc b/dynet/nodes-conv2d.cc index 0a7cdedc9..b5f554152 100644 --- a/dynet/nodes-conv2d.cc +++ b/dynet/nodes-conv2d.cc @@ -16,6 +16,7 @@ #if HAVE_CUDA #include "dynet/cuda.h" #include "dynet/gpu-ops.h" +#include "dynet/cudnn-ops.h" #endif using namespace std; diff --git a/dynet/nodes-conv2d.h b/dynet/nodes-conv2d.h index 484c3222a..fc7140cf5 100644 --- a/dynet/nodes-conv2d.h +++ b/dynet/nodes-conv2d.h @@ -4,6 +4,10 @@ #include "dynet/dynet.h" #include "dynet/nodes-macros.h" +#if HAVE_CUDNN +#include "dynet/cudnn-ops.h" +#endif + namespace dynet { // conv2d diff --git a/dynet/nodes-losses.cc b/dynet/nodes-losses.cc index c67a3da2d..a0d498832 100644 --- a/dynet/nodes-losses.cc +++ b/dynet/nodes-losses.cc @@ -26,6 +26,8 @@ Dim PairwiseRankLoss::dim_forward(const vector& xs) const { return xs[0].bd >= xs[1].bd ? xs[0] : xs[1]; } +#endif + template void PairwiseRankLoss::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { fx.tvec().device(*dev.edevice) = xs[0]->tvec().binaryExpr(xs[1]->tvec(), FPairwiseRankLoss(margin)); @@ -46,8 +48,6 @@ void PairwiseRankLoss::backward_dev_impl(const MyDevice & dev, } DYNET_NODE_INST_DEV_IMPL(PairwiseRankLoss) -#endif - // ************* BinaryLogLoss ************* #ifndef __CUDACC__ diff --git a/dynet/nodes-maxpooling2d.h b/dynet/nodes-maxpooling2d.h index 20bfff67d..1172b14dc 100644 --- a/dynet/nodes-maxpooling2d.h +++ b/dynet/nodes-maxpooling2d.h @@ -4,6 +4,10 @@ #include "dynet/dynet.h" #include "dynet/nodes-macros.h" +#if HAVE_CUDNN +#include "dynet/cudnn-ops.h" +#endif + namespace dynet { // maxpooling2d