From fef7a567c02b0c3c91795777b60c10b3492f0a47 Mon Sep 17 00:00:00 2001
From: Martun Karapetyan <martun.karapetyan@gmail.com>
Date: Wed, 4 Oct 2023 00:37:42 -0700
Subject: [PATCH] Removing asm code.

---
 .../multiprecision/modular/asm_functions.hpp  | 217 ------------------
 .../modular/modular_adaptor.hpp               |  27 +--
 .../modular/modular_functions_fixed.hpp       |  88 +++----
 3 files changed, 35 insertions(+), 297 deletions(-)
 delete mode 100644 include/nil/crypto3/multiprecision/modular/asm_functions.hpp
diff --git a/include/nil/crypto3/multiprecision/modular/asm_functions.hpp b/include/nil/crypto3/multiprecision/modular/asm_functions.hpp
deleted file mode 100644
index d140ac9d..00000000
--- a/include/nil/crypto3/multiprecision/modular/asm_functions.hpp
+++ /dev/null
@@ -1,217 +0,0 @@
-//---------------------------------------------------------------------------//
-// Copyright (c) 2020 Mikhail Komarov <nemo@nil.foundation>
-// Copyright (c) 2021 Aleksei Moskvin <alalmoskvin@nil.foundation>
-//
-// Distributed under the Boost Software License, Version 1.0
-// See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt
-//---------------------------------------------------------------------------//
-
-#ifndef BOOST_MULTIPRECISION_ASM_FUNCTIONS_HPP
-#define BOOST_MULTIPRECISION_ASM_FUNCTIONS_HPP
-
-#include <boost/predef.h>
-
-namespace nil {
-    namespace crypto3 {
-        namespace multiprecision {
-            namespace backends {
-#if BOOST_ARCH_X86_64
-                template<typename Limb1, typename Limb2>
-                void sub_asm(size_t n, Limb1 *x, const Limb2 *y) {
-                    __asm__ volatile(
-                        "movq    (%[y]), %%rax           \n\t"
-                        "subq    %%rax, (%[x])           \n\t"
-                        "pushf                           \n\t"
-                        "movq $1, %%rbx                  \n\t"
-                        // Loop for sub
-                        "4:                              \n\t"
-                        "popf                            \n\t"
-                        "movq (%[y], %%rbx, 8), %%rax    \n\t"
-                        "sbbq %%rax, (%[x], %%rbx, 8)    \n\t"
-                        "pushf                           \n\t"
-                        "inc %%rbx                       \n\t"
-                        "cmp %%rbx, %[limbs]             \n\t"
-                        "jne 4b                          \n\t"
-                        "popf                            \n\t"
-                        :
-                        : [limbs] "r"(n), [x] "r"(x), [y] "r"(y)
-                        : "cc", "memory", "%rax", "%rcx", "%rbx");
-                }
-
-                template<typename Limb1, typename Limb2, typename Limb3>
-                bool reduce_limb_asm(const size_t &n, Limb1 *res, const Limb2 *x, const Limb3 &inv) {
-                    bool carry = false;
-                    __asm__ volatile(
-                        // Else check result with mod
-                        "movq $0, %%r12                        \n\t"
-                    "0:                                        \n\t"
-                        "movq %%r12, %%r11                     \n\t"
-
-                        "movq   (%[res], %%r11, 8), %%rax      \n\t"
-                        "mulq   %[inv]                         \n\t"
-                        "movq   %%rax, %%r10                   \n\t"
-
-                        "movq   (%[x]), %%rax                  \n\t"
-                        "mulq   %%r10                          \n\t"
-                        "movq   %%rax, %%r8                    \n\t"
-                        "movq   %%rdx, %%r9                    \n\t"
-
-                        "mov $1, %%rbx                         \n\t"
-                    "1:                                        \n\t"
-                        "movq   (%[x], %%rbx, 8), %%rax        \n\t"
-                        "mulq   %%r10                          \n\t"
-                        "addq   %%r8, (%[res], %%r11, 8)       \n\t"
-                        "movq   $0, %%r8                       \n\t"
-                        "adcq   %%rax, %%r9                    \n\t"
-                        "adcq   %%rdx, %%r8                    \n\t"
-                        // swap tmp2, tmp1
-                        "movq %%r9, %%rax                      \n\t"
-                        "movq %%r8, %%r9                       \n\t"
-                        "movq %%rax, %%r8                      \n\t"
-                        // swap end
-                        "movq $1, %%rdx                        \n\t"
-                        "addq %%rdx, %%r11                     \n\t"
-                        "inc %%rbx                             \n\t"
-                        "cmp %%rbx, %[limbs]                   \n\t"
-                        "jne 1b                                \n\t"
-                        "mov  %%r11, %%rbx                     \n\t"
-                        "addq   %%r8, (%[res], %%rbx, 8)       \n\t"
-                        "adcq   %%r9, 8(%[res], %%rbx, 8)      \n\t"
-                        "movb $0, %[carry]                     \n\t"
-                        "jnc 2f                                \n\t"
-                        "adcq   $0, 16(%[res], %%rbx, 8)       \n\t"
-                        "movb $1, %[carry]                     \n\t"
-                    "2:                                        \n\t"
-                        "inc %%r12                             \n\t"
-                        "cmpq %[limbs], %%r12                  \n\t"
-                        "jne 0b                                \n\t"
-                        : [carry] "+r"(carry)
-                        : [limbs] "r"(n), [res] "r"(res), [x] "r"(x), [inv] "r"(inv)
-                        : "cc", "memory", "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r12");
-                    // r8, r9 - tmp1, tmp2
-                    // r10 - k
-                    return carry;
-                }
-
-                template<typename Limb1, typename Limb2>
-                int cmp_asm(size_t n, const Limb1 *x, const Limb2 *y) {
-                    int result = 0;
-                    __asm__ volatile(
-                        // Else check result with mod
-                        "mov $0, %[res]                  \n\t"
-                        "movq %[limbs], %%rbx            \n\t"
-                    "1:                                  \n\t"
-                        "movq  -8(%[y], %%rbx, 8), %%rax \n\t"
-                        "cmpq  %%rax, -8(%[x], %%rbx, 8) \n\t"
-                        "jb  2f                          \n\t"
-                        "ja  3f                          \n\t"
-                        "dec %%rbx                       \n\t"
-                        "jnz 1b                          \n\t"
-                        "jmp 4f                          \n\t"
-                        // Start sub
-                    "2:                                  \n\t"
-                        "dec %[res]                      \n\t"
-                        "jmp 4f                          \n\t"
-                    "3:                                  \n\t"
-                        "inc %[res]                      \n\t"
-                    "4:                                  \n\t"
-                        : [res] "=&r"(result)
-                        : [limbs] "r"(n), [x] "r"(x), [y] "r"(y)
-                        : "cc", "memory", "%rax", "%rcx", "%rbx");
-                    return result;
-                }
-
-                template<typename Limb1, typename Limb2, typename Limb3>
-                void sub_mod_asm(size_t n, Limb1 *x, const Limb2 *y, const Limb3 *mod) {
-                    __asm__ volatile(
-                        "pushf                          \n\t"
-                        "movq    $0, %%rbx              \n\t"
-                        // Start circle sub from 0st limb
-                    "1:                                 \n\t"
-                        "popf                           \n\t"
-                        "movq (%[y], %%rbx, 8), %%rax   \n\t"
-                        "sbbq %%rax, (%[x], %%rbx, 8)   \n\t"
-                        "pushf                          \n\t"
-                        "inc %%rbx                      \n\t"
-                        "cmp %%rbx, %[limbs]            \n\t"
-                        "jne 1b                         \n\t"
-                        "popf                           \n\t"
-                        // If it's more than zero (no carry bit) just go to end
-                        "jnc 4f                         \n\t"
-                        // Else add mod to result
-                        "clc                            \n\t"
-                        "pushf                          \n\t"
-                        "movq    $0, %%rbx              \n\t"
-                    "2:                                 \n\t"
-                        "popf                           \n\t"
-                        "movq (%[mod], %%rbx, 8), %%rax \n\t"
-                        "adcq %%rax, (%[x], %%rbx, 8)   \n\t"
-                        "pushf                          \n\t"
-                        "inc %%rbx                      \n\t"
-                        "cmp %%rbx, %[limbs]            \n\t"
-                        "jne 2b                         \n\t"
-                        "popf                           \n\t"
-                    "4:                                 \n\t"
-                        :
-                        : [limbs] "r"(n), [x] "r"(x), [y] "r"(y), [mod] "r"(mod)
-                        : "cc", "memory", "%rax", "%rcx", "%rbx");
-                }
-
-                template<typename Limb1, typename Limb2, typename Limb3>
-                void add_mod_asm(size_t n, Limb1 *x, const Limb2 *y, const Limb3 *mod) {
-                    __asm__ volatile(
-                        "movq    (%[y]), %%rax              \n\t"
-                        "addq    %%rax, (%[x])              \n\t"
-                        "movq    $1, %%rbx                  \n\t"
-                        "pushf                              \n\t"
-                        // Start circle add from 1st limb
-                    "1:                                     \n\t"
-                        "popf                               \n\t"
-                        "movq    (%[y], %%rbx, 8), %%rax    \n\t"
-                        "adcq    %%rax, (%[x], %%rbx, 8)    \n\t"
-                        "pushf                              \n\t"
-                        "inc %%rbx                          \n\t"
-                        "cmp %%rbx, %[limbs]                \n\t"
-                        "jne 1b                             \n\t"
-                        "popf                               \n\t"
-                        // If was carry, we always need sub mod
-                        "jc 3f                              \n\t"
-
-                        // Else check result with mod
-                        "movq %[limbs], %%rbx               \n\t"
-                    "2:                                     \n\t"
-                        "movq    -8(%[mod], %%rbx, 8), %%rax  \n\t"
-                        "cmpq    %%rax, -8(%[x], %%rbx, 8)  \n\t"
-                        "jb  5f                             \n\t"
-                        "ja  3f                             \n\t"
-                        "dec %%rbx                          \n\t"
-                        "jnz 2b                              \n\t"
-                        // Start sub
-                    "3:                                     \n\t"
-                        "movq    (%[mod]), %%rax            \n\t"
-                        "subq    %%rax, (%[x])              \n\t"
-                        "pushf                              \n\t"
-                        "movq $1, %%rbx                     \n\t"
-                        // Loop for sub
-                    "4:                                     \n\t"
-                        "popf                               \n\t"
-                        "movq    (%[mod], %%rbx, 8), %%rax  \n\t"
-                        "sbbq    %%rax, (%[x], %%rbx, 8)    \n\t"
-                        "pushf                              \n\t"
-                        "inc %%rbx                          \n\t"
-                        "cmp %%rbx, %[limbs]                \n\t"
-                        "jne 4b                             \n\t"
-                        "popf                               \n\t"
-                    "5:                                     \n\t"
-                        :
-                        : [limbs] "r"(n), [x] "r"(x), [y] "r"(y), [mod] "r"(mod)
-                        : "cc", "memory", "%rax", "%rcx", "%rbx");
-                }
-#endif
-            }    // namespace backends
-        }        // namespace multiprecision
-    }            // namespace crypto3
-}    // namespace nil
-
-#endif    //_MULTIPRECISION_BARRETT_PARAMS_HPP
diff --git a/include/nil/crypto3/multiprecision/modular/modular_adaptor.hpp b/include/nil/crypto3/multiprecision/modular/modular_adaptor.hpp
index 3535ed3f..2eed84cb 100644
--- a/include/nil/crypto3/multiprecision/modular/modular_adaptor.hpp
+++ b/include/nil/crypto3/multiprecision/modular/modular_adaptor.hpp
@@ -28,6 +28,8 @@
 #include <cmath>
 #include <vector>
 
+#include </home/martun/nil/solana4/solana-consensus-proof/libs/zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/detail/placeholder_scoped_profiler.hpp>
+
 namespace nil {
     namespace crypto3 {
         namespace multiprecision {
@@ -302,30 +304,15 @@ namespace nil {
                 constexpr void eval_subtract(
                     modular_adaptor<cpp_int_backend<MinBits, MaxBits, SignType, Checked>, StorageType> &result,
                     const modular_adaptor<cpp_int_backend<MinBits, MaxBits, SignType, Checked>, StorageType> &o) {
+
                     BOOST_ASSERT(result.mod_data().get_mod() == o.mod_data().get_mod());
                     using ui_type = typename std::tuple_element<
                         0, typename cpp_int_backend<MinBits, MaxBits, SignType, Checked>::unsigned_types>::type;
                     using default_ops::eval_lt;
-#ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
-#if BOOST_ARCH_X86_64
-                    auto limbs_count = result.base_data().size();
-                    if (!BOOST_MP_IS_CONST_EVALUATED(result.base_data().limbs()) &&
-                        !is_trivial_cpp_int<cpp_int_backend<MinBits, MaxBits, SignType, Checked>>::value &&
-                        result.base_data().size() == o.base_data().size() &&
-                        result.base_data().size() == result.mod_data().get_mod().backend().size()) {
-
-                        sub_mod_asm(limbs_count, result.base_data().limbs(), o.base_data().limbs(),
-                                    result.mod_data().get_mod().backend().limbs());
-                        result.base_data().resize(limbs_count, limbs_count);
-                        result.base_data().normalize();
-                    } else
-#endif
-#endif
-                    {
-                        eval_subtract(result.base_data(), o.base_data());
-                        if (eval_lt(result.base_data(), ui_type(0u))) {
-                            eval_add(result.base_data(), result.mod_data().get_mod().backend());
-                        }
+
+                    eval_subtract(result.base_data(), o.base_data());
+                    if (eval_lt(result.base_data(), ui_type(0u))) {
+                        eval_add(result.base_data(), result.mod_data().get_mod().backend());
                     }
                 }
 
diff --git a/include/nil/crypto3/multiprecision/modular/modular_functions_fixed.hpp b/include/nil/crypto3/multiprecision/modular/modular_functions_fixed.hpp
index 0c9f319b..92123cce 100644
--- a/include/nil/crypto3/multiprecision/modular/modular_functions_fixed.hpp
+++ b/include/nil/crypto3/multiprecision/modular/modular_functions_fixed.hpp
@@ -12,9 +12,10 @@
 #define BOOST_MULTIPRECISION_MODULAR_FUNCTIONS_FIXED_PRECISION_HPP
 
 #include <nil/crypto3/multiprecision/detail/number_base.hpp>
-#include <nil/crypto3/multiprecision/modular/asm_functions.hpp>
 #include <nil/crypto3/multiprecision/modular/modular_policy_fixed.hpp>
 
+#include </home/martun/nil/solana4/solana-consensus-proof/libs/zk/include/nil/crypto3/zk/snark/systems/plonk/placeholder/detail/placeholder_scoped_profiler.hpp>
+
 #include <boost/mpl/if.hpp>
 
 #include <type_traits>
@@ -394,6 +395,7 @@ namespace nil {
                                  /// result should fit in the output parameter
                                  max_precision<Backend1>::value >= max_precision<Backend>::value>::type>
                     constexpr void montgomery_reduce(Backend1 &result) const {
+
                         using default_ops::eval_add;
                         using default_ops::eval_bitwise_and;
                         using default_ops::eval_left_shift;
@@ -403,48 +405,24 @@ namespace nil {
 
                         Backend_doubled_padded_limbs accum(result);
                         Backend_doubled_padded_limbs prod;
-#ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
-#if BOOST_ARCH_X86_64
-                        if (!BOOST_MP_IS_CONST_EVALUATED(result.limbs()) && result.size() == m_mod.backend().size()
-                            && !is_trivial_cpp_int<Backend1>::value && result.size() > 1) {
-                            bool carry =
-                                reduce_limb_asm(m_mod.backend().size(), accum.limbs(), m_mod.backend().limbs(),
-                                                static_cast<double_limb_type>(m_montgomery_p_dash));
-                            if (carry || cmp_asm(m_mod.backend().size(), accum.limbs() + m_mod.backend().size(),
-                                                 m_mod.backend().limbs()) >= 0) {
-                                sub_asm(m_mod.backend().size(), accum.limbs() + m_mod.backend().size(),
-                                        m_mod.backend().limbs());
-                            }
-                            // Now result in first m_mod.backend().size() limbs, so we can do
-                            // eval_bitwise_and(accum, m_modulus_mask);
-                            // or just copy n limbs to result
-                            for (size_t i = 0; i < m_mod.backend().size(); ++i) {
-                                result.limbs()[i] = accum.limbs()[i + m_mod.backend().size()];
-                            }
-                            result.resize(m_mod.backend().size(), m_mod.backend().size());
-                            result.normalize();
-                        } else
-#endif
-#endif
-                        {
-                            for (auto i = 0; i < m_mod.backend().size(); ++i) {
-                                eval_multiply(prod, m_mod.backend(),
-                                              static_cast<double_limb_type>(static_cast<internal_limb_type>(
-                                                  custom_get_limb_value<internal_limb_type>(accum, i) *
-                                                  /// to prevent overflow error in constexpr
-                                                  static_cast<double_limb_type>(m_montgomery_p_dash))));
-                                eval_left_shift(prod, i * limb_bits);
-                                eval_add(accum, prod);
-                            }
-                            custom_right_shift(accum, m_mod.backend().size() * limb_bits);
-                            if (!eval_lt(accum, m_mod.backend())) {
-                                eval_subtract(accum, m_mod.backend());
-                            }
-                            if (m_mod.backend().size() < accum.size()) {
-                                accum.resize(m_mod.backend().size(), m_mod.backend().size());
-                            }
-                            result = accum;
+
+                        for (auto i = 0; i < m_mod.backend().size(); ++i) {
+                            eval_multiply(prod, m_mod.backend(),
+                                          static_cast<double_limb_type>(static_cast<internal_limb_type>(
+                                              custom_get_limb_value<internal_limb_type>(accum, i) *
+                                              /// to prevent overflow error in constexpr
+                                              static_cast<double_limb_type>(m_montgomery_p_dash))));
+                            eval_left_shift(prod, i * limb_bits);
+                            eval_add(accum, prod);
+                        }
+                        custom_right_shift(accum, m_mod.backend().size() * limb_bits);
+                        if (!eval_lt(accum, m_mod.backend())) {
+                            eval_subtract(accum, m_mod.backend());
+                        }
+                        if (m_mod.backend().size() < accum.size()) {
+                            accum.resize(m_mod.backend().size(), m_mod.backend().size());
                         }
+                        result = accum;
                     }
 
                     template<typename Backend1, typename Backend2,
@@ -452,6 +430,7 @@ namespace nil {
                              typename = typename boost::enable_if_c<max_precision<Backend1>::value >=
                                                                     max_precision<Backend>::value>::type>
                     constexpr void regular_add(Backend1 &result, const Backend2 &y) const {
+
                         using default_ops::eval_add;
                         using default_ops::eval_lt;
                         using default_ops::eval_subtract;
@@ -459,25 +438,14 @@ namespace nil {
                         // TODO: maybe reduce input parameters
                         /// input parameters should be lesser than modulus
                         // BOOST_ASSERT(eval_lt(x, m_mod.backend()) && eval_lt(y, m_mod.backend()));
-#ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
-#if BOOST_ARCH_X86_64
-                        if (!BOOST_MP_IS_CONST_EVALUATED(result.limbs()) && result.size() == y.size()
-                            && result.size() == m_mod.backend().size() && !is_trivial_cpp_int<Backend1>::value) {
-                            add_mod_asm(limbs_count, result.limbs(), y.limbs(), m_mod.backend().limbs());
-                            result.resize(limbs_count, limbs_count);
-                            result.normalize();
-                        } else
-#endif
-#endif
-                        {
-                            using T = typename policy_type::Backend_padded_limbs_u;
-                            T tmp(result), modulus(m_mod.backend());
-                            eval_add(tmp, y);
-                            if (!eval_lt(tmp, modulus)) {
-                                eval_subtract(tmp, modulus);
-                            }
-                            result = tmp;
+
+                        using T = typename policy_type::Backend_padded_limbs_u;
+                        T tmp(result), modulus(m_mod.backend());
+                        eval_add(tmp, y);
+                        if (!eval_lt(tmp, modulus)) {
+                            eval_subtract(tmp, modulus);
                         }
+                        result = tmp;
                     }
 
                     template<typename Backend1, typename Backend2,