diff --git a/sxt/curve_g1/operation/BUILD b/sxt/curve_g1/operation/BUILD
index a8803437c..65c8541d7 100644
--- a/sxt/curve_g1/operation/BUILD
+++ b/sxt/curve_g1/operation/BUILD
@@ -7,13 +7,8 @@ sxt_cc_component(
     name = "add",
     impl_deps = [
         ":cmov",
-        ":mul_by_3b",
         "//sxt/curve_g1/property:identity",
         "//sxt/curve_g1/type:element_affine",
-        "//sxt/curve_g1/type:element_p2",
-        "//sxt/field12/operation:add",
-        "//sxt/field12/operation:mul",
-        "//sxt/field12/operation:sub",
     ],
     is_cuda = True,
     test_deps = [
@@ -23,12 +18,15 @@ sxt_cc_component(
         "//sxt/curve_g1/constant:identity",
         "//sxt/curve_g1/property:curve",
         "//sxt/curve_g1/property:identity",
-        "//sxt/curve_g1/type:element_p2",
-        "//sxt/field12/operation:mul",
         "//sxt/field12/type:element",
     ],
     deps = [
+        ":mul_by_3b",
         "//sxt/base/macro:cuda_callable",
+        "//sxt/curve_g1/type:element_p2",
+        "//sxt/field12/operation:add",
+        "//sxt/field12/operation:mul",
+        "//sxt/field12/operation:sub",
     ],
 )
 
diff --git a/sxt/curve_g1/operation/add.cc b/sxt/curve_g1/operation/add.cc
index 19c406e0a..245157628 100644
--- a/sxt/curve_g1/operation/add.cc
+++ b/sxt/curve_g1/operation/add.cc
@@ -26,62 +26,10 @@
 #include "sxt/curve_g1/operation/add.h"
 
 #include "sxt/curve_g1/operation/cmov.h"
-#include "sxt/curve_g1/operation/mul_by_3b.h"
 #include "sxt/curve_g1/property/identity.h"
 #include "sxt/curve_g1/type/element_affine.h"
-#include "sxt/curve_g1/type/element_p2.h"
-#include "sxt/field12/operation/add.h"
-#include "sxt/field12/operation/mul.h"
-#include "sxt/field12/operation/sub.h"
 
 namespace sxt::cg1o {
-//--------------------------------------------------------------------------------------------------
-// add
-//--------------------------------------------------------------------------------------------------
-CUDA_CALLABLE
-void add(cg1t::element_p2& h, const cg1t::element_p2& p, const cg1t::element_p2& q) noexcept {
-  f12t::element t0, t1, t2, t3, t4;
-  f12t::element x3, y3, z3;
-
-  f12o::mul(t0, p.X, q.X);
-  f12o::mul(t1, p.Y, q.Y);
-  f12o::mul(t2, p.Z, q.Z);
-  f12o::add(t3, p.X, p.Y);
-  f12o::add(t4, q.X, q.Y);
-  f12o::mul(t3, t3, t4);
-  f12o::add(t4, t0, t1);
-  f12o::sub(t3, t3, t4);
-  f12o::add(t4, p.Y, p.Z);
-  f12o::add(x3, q.Y, q.Z);
-  f12o::mul(t4, t4, x3);
-  f12o::add(x3, t1, t2);
-  f12o::sub(t4, t4, x3);
-  f12o::add(x3, p.X, p.Z);
-  f12o::add(y3, q.X, q.Z);
-  f12o::mul(x3, x3, y3);
-  f12o::add(y3, t0, t2);
-  f12o::sub(y3, x3, y3);
-  f12o::add(x3, t0, t0);
-  f12o::add(t0, x3, t0);
-  mul_by_3b(t2, t2);
-  f12o::add(z3, t1, t2);
-  f12o::sub(t1, t1, t2);
-  mul_by_3b(y3, y3);
-  f12o::mul(x3, t4, y3);
-  f12o::mul(t2, t3, t1);
-  f12o::sub(x3, t2, x3);
-  f12o::mul(y3, y3, t0);
-  f12o::mul(t1, t1, z3);
-  f12o::add(y3, t1, y3);
-  f12o::mul(t0, t0, t3);
-  f12o::mul(z3, z3, t4);
-  f12o::add(z3, z3, t0);
-
-  h.X = x3;
-  h.Y = y3;
-  h.Z = z3;
-}
-
 //--------------------------------------------------------------------------------------------------
 // add
 //--------------------------------------------------------------------------------------------------
diff --git a/sxt/curve_g1/operation/add.h b/sxt/curve_g1/operation/add.h
index 5de9dad7f..cff80cce0 100644
--- a/sxt/curve_g1/operation/add.h
+++ b/sxt/curve_g1/operation/add.h
@@ -17,13 +17,62 @@
 #pragma once
 
 #include "sxt/base/macro/cuda_callable.h"
+#include "sxt/curve_g1/operation/mul_by_3b.h"
+#include "sxt/curve_g1/type/element_p2.h"
+#include "sxt/field12/operation/add.h"
+#include "sxt/field12/operation/mul.h"
+#include "sxt/field12/operation/sub.h"
 
 namespace sxt::cg1t {
 struct element_affine;
-struct element_p2;
 } // namespace sxt::cg1t
 
 namespace sxt::cg1o {
+//--------------------------------------------------------------------------------------------------
+// add_inplace
+//--------------------------------------------------------------------------------------------------
+/*
+ p = p + q
+ */
+CUDA_CALLABLE inline void add_inplace(cg1t::element_p2& p, const cg1t::element_p2& q) noexcept {
+  f12t::element t0, t1, t2, t3, t4;
+  const f12t::element px{p.X};
+
+  f12o::mul(t0, p.X, q.X);
+  f12o::mul(t1, p.Y, q.Y);
+  f12o::mul(t2, p.Z, q.Z);
+  f12o::add(t3, p.X, p.Y);
+  f12o::add(t4, q.X, q.Y);
+  f12o::mul(t3, t3, t4);
+  f12o::add(t4, t0, t1);
+  f12o::sub(t3, t3, t4);
+  f12o::add(t4, p.Y, p.Z);
+  f12o::add(p.X, q.Y, q.Z);
+  f12o::mul(t4, t4, p.X);
+  f12o::add(p.X, t1, t2);
+  f12o::sub(t4, t4, p.X);
+  f12o::add(p.X, px, p.Z);
+  f12o::add(p.Y, q.X, q.Z);
+  f12o::mul(p.X, p.X, p.Y);
+  f12o::add(p.Y, t0, t2);
+  f12o::sub(p.Y, p.X, p.Y);
+  f12o::add(p.X, t0, t0);
+  f12o::add(t0, p.X, t0);
+  mul_by_3b(t2, t2);
+  f12o::add(p.Z, t1, t2);
+  f12o::sub(t1, t1, t2);
+  mul_by_3b(p.Y, p.Y);
+  f12o::mul(p.X, t4, p.Y);
+  f12o::mul(t2, t3, t1);
+  f12o::sub(p.X, t2, p.X);
+  f12o::mul(p.Y, p.Y, t0);
+  f12o::mul(t1, t1, p.Z);
+  f12o::add(p.Y, t1, p.Y);
+  f12o::mul(t0, t0, t3);
+  f12o::mul(p.Z, p.Z, t4);
+  f12o::add(p.Z, p.Z, t0);
+}
+
 //--------------------------------------------------------------------------------------------------
 // add
 //--------------------------------------------------------------------------------------------------
@@ -31,7 +80,11 @@ namespace sxt::cg1o {
  Algorithm 7, https://eprint.iacr.org/2015/1060.pdf
  */
 CUDA_CALLABLE
-void add(cg1t::element_p2& h, const cg1t::element_p2& p, const cg1t::element_p2& q) noexcept;
+void inline add(cg1t::element_p2& h, const cg1t::element_p2& p,
+                const cg1t::element_p2& q) noexcept {
+  h = p;
+  add_inplace(h, q);
+}
 
 //--------------------------------------------------------------------------------------------------
 // add
diff --git a/sxt/curve_g1/operation/add.t.cc b/sxt/curve_g1/operation/add.t.cc
index 9a8dd3324..d147cefde 100644
--- a/sxt/curve_g1/operation/add.t.cc
+++ b/sxt/curve_g1/operation/add.t.cc
@@ -83,6 +83,15 @@ TEST_CASE("addition with projective elements") {
     REQUIRE(cg1p::is_on_curve(d));
     REQUIRE(c == d);
   }
+
+  SECTION("can be done inplace") {
+    cg1t::element_p2 lhs{cg1t::element_p2::identity()};
+    cg1t::element_p2 rhs{cg1cn::generator_p2_v};
+
+    add_inplace(lhs, rhs);
+
+    REQUIRE(lhs == cg1cn::generator_p2_v);
+  }
 }
 
 TEST_CASE("addition with mixed elements") {