From af3652bece86045ba2c3ef12c64420f4dae7396e Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:16:54 +0800 Subject: [PATCH 1/6] nix: Package hol_light into a nix derivation Signed-off-by: Thing-han, Lim <15379156+potsrevennil@users.noreply.github.com> --- flake.nix | 5 +++- nix/hol_light/0005-Fix-hollight-path.patch | 33 ++++++++++++++++++++++ nix/hol_light/default.nix | 29 +++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 nix/hol_light/0005-Fix-hollight-path.patch create mode 100644 nix/hol_light/default.nix diff --git a/flake.nix b/flake.nix index f8f101404..d1a801cbe 100644 --- a/flake.nix +++ b/flake.nix @@ -24,6 +24,7 @@ in { packages.cbmc = util.cbmc; + packages.hol_light = pkgs.callPackage ./nix/hol_light { }; devShells.default = util.wrapShell util.mkShell { packages = @@ -31,13 +32,15 @@ util.linters ++ builtins.attrValues { - inherit (config.packages) cbmc; + inherit (config.packages) cbmc hol_light; inherit (pkgs) direnv nix-direnv; }; }; + devShells.hol_light = util.wrapShell util.mkShell { packages = [ config.packages.hol_light ]; }; + devShells.ci = util.wrapShell util.mkShell { packages = util.core { cross = false; }; }; devShells.ci-cross = util.wrapShell util.mkShell { packages = util.core { }; }; devShells.ci-cbmc = util.wrapShell util.mkShell { packages = util.core { cross = false; } ++ [ config.packages.cbmc ]; }; diff --git a/nix/hol_light/0005-Fix-hollight-path.patch b/nix/hol_light/0005-Fix-hollight-path.patch new file mode 100644 index 000000000..ab9ab9d0c --- /dev/null +++ b/nix/hol_light/0005-Fix-hollight-path.patch @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: Apache-2.0 +diff --git a/hol_4.14.sh b/hol_4.14.sh +index 0fa0f64..313133e 100755 +--- a/hol_4.14.sh ++++ b/hol_4.14.sh +@@ -5,7 +5,7 @@ export HOLLIGHT_DIR=__DIR__ + + if [ "$#" -eq 1 ]; then + if [ "$1" == "-pp" ]; then +- echo "camlp5r pa_lexer.cmo pa_extend.cmo q_MLast.cmo -I "__DIR__" pa_j.cmo" ++ echo "camlp5r pa_lexer.cmo pa_extend.cmo q_MLast.cmo -I ${HOLLIGHT_DIR} pa_j.cmo" + exit 0 + elif [ "$1" == "-dir" ]; then + echo "${HOLLIGHT_DIR}" +diff --git a/hol_4.sh b/hol_4.sh +index be55568..259ecae 100755 +--- a/hol_4.sh ++++ b/hol_4.sh +@@ -5,7 +5,7 @@ export HOLLIGHT_DIR=__DIR__ + + if [ "$#" -eq 1 ]; then + if [ "$1" == "-pp" ]; then +- echo "camlp5r pa_lexer.cmo pa_extend.cmo q_MLast.cmo -I "__DIR__" pa_j.cmo" ++ echo "camlp5r pa_lexer.cmo pa_extend.cmo q_MLast.cmo -I "${HOLLIGHT_DIR}" pa_j.cmo"} + exit 0 + elif [ "$1" == "-dir" ]; then + echo "${HOLLIGHT_DIR}" +@@ -27,4 +27,4 @@ if [ -d "${HOLLIGHT_DIR}/_opam" ]; then + eval $(opam env --switch "${HOLLIGHT_DIR}/" --set-switch) + fi + +-${LINE_EDITOR} ${HOLLIGHT_DIR}/ocaml-hol -I `camlp5 -where` camlp5o.cma -init ${HOLLIGHT_DIR}/hol.ml -safe-string -I ${HOLLIGHT_DIR} ++${LINE_EDITOR} ${HOLLIGHT_DIR}/ocaml-hol -I $(camlp5 -where) camlp5o.cma -init ${HOLLIGHT_DIR}/hol.ml -safe-string -I ${HOLLIGHT_DIR} diff --git a/nix/hol_light/default.nix b/nix/hol_light/default.nix new file mode 100644 index 000000000..eef41effb --- /dev/null +++ b/nix/hol_light/default.nix @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: Apache-2.0 + +{ hol_light, fetchFromGitHub, writeText, ... }: +hol_light.overrideAttrs (old: { + setupHook = writeText "setup-hook.sh" '' + export HOLDIR="$1/lib/hol_light" + export HOLLIGHT_DIR="$1/lib/hol_light" + ''; + version = "unstable-2024-12-22"; + src = fetchFromGitHub { + owner = "jrh13"; + repo = "hol-light"; + rev = "28e4aed1019a56fab869f752695a67a4164dd2ee"; + hash = "sha256-Z14pED3oaz30Zp1Ue58KA5srlZc31WyDE8h9tJwCAcI="; + }; + patches = [ ./0005-Fix-hollight-path.patch ]; + propagatedBuildInputs = old.propagatedBuildInputs ++ old.nativeBuildInputs; + buildPhase = '' + HOLLIGHT_USE_MODULE=1 make hol.sh + patchShebangs hol.sh + HOLLIGHT_USE_MODULE=1 make + ''; + installPhase = '' + mkdir -p "$out/lib/hol_light" + cp -a . $out/lib/hol_light + sed "s^__DIR__^$out/lib/hol_light^g; s^__USE_MODULE__^1^g" hol_4.14.sh > hol.sh + mv hol.sh $out/lib/hol_light/ + ''; +}) From 2d795adc42ad6a491dbe378ff6907f2ff7a27a91 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:55:13 +0800 Subject: [PATCH 2/6] nix: Package s2n_bignum into nix derivation The scripts in s2n_bignum assume execution within the $ROOT// directory and rely on relative paths. To integrate these scripts with Nix, it is necessary to adjust the paths used for loading scripts to ensure compatibility. Additionally, the Makefile located under $ROOT/ has been copied to the mlkem-native directory and modified as needed to support this change. Signed-off-by: Thing-han, Lim <15379156+potsrevennil@users.noreply.github.com> --- flake.nix | 9 +- hol_light/.gitignore | 4 + hol_light/arm/Makefile | 525 ++++++++++++++++++++++ nix/s2n_bignum/0001-fix-script-path.patch | 72 +++ nix/s2n_bignum/default.nix | 21 + 5 files changed, 629 insertions(+), 2 deletions(-) create mode 100644 hol_light/.gitignore create mode 100644 hol_light/arm/Makefile create mode 100644 nix/s2n_bignum/0001-fix-script-path.patch create mode 100644 nix/s2n_bignum/default.nix diff --git a/flake.nix b/flake.nix index d1a801cbe..22c9ab73e 100644 --- a/flake.nix +++ b/flake.nix @@ -25,6 +25,7 @@ { packages.cbmc = util.cbmc; packages.hol_light = pkgs.callPackage ./nix/hol_light { }; + packages.s2n_bignum = pkgs.callPackage ./nix/s2n_bignum { }; devShells.default = util.wrapShell util.mkShell { packages = @@ -32,14 +33,18 @@ util.linters ++ builtins.attrValues { - inherit (config.packages) cbmc hol_light; + inherit (config.packages) cbmc hol_light s2n_bignum; inherit (pkgs) direnv nix-direnv; }; }; - devShells.hol_light = util.wrapShell util.mkShell { packages = [ config.packages.hol_light ]; }; + devShells.hol_light = util.wrapShell util.mkShell { + packages = builtins.attrValues { + inherit (config.packages) hol_light s2n_bignum; + }; + }; devShells.ci = util.wrapShell util.mkShell { packages = util.core { cross = false; }; }; devShells.ci-cross = util.wrapShell util.mkShell { packages = util.core { }; }; diff --git a/hol_light/.gitignore b/hol_light/.gitignore new file mode 100644 index 000000000..914f0012e --- /dev/null +++ b/hol_light/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: Apache-2.0 +**/*.o +**/*.native +**/*.correct diff --git a/hol_light/arm/Makefile b/hol_light/arm/Makefile new file mode 100644 index 000000000..1f1e91713 --- /dev/null +++ b/hol_light/arm/Makefile @@ -0,0 +1,525 @@ +############################################################################# +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 +############################################################################# + +OSTYPE_RESULT=$(shell uname -s) +ARCHTYPE_RESULT=$(shell uname -m) + +# Assembler directives that mark symbols as .hidden +# or .private_extern can be enabled by passing +# in the S2N_BN_HIDE_SYMBOLS parameter as: +# +# make S2N_BN_HIDE_SYMBOLS=1 +# + +SRC ?= $(S2N_BIGNUM_DIR) +SRC_ARM ?= $(SRC)/arm + +ifeq ($(S2N_BN_HIDE_SYMBOLS),1) +SYMBOL_HIDING=-DS2N_BN_HIDE_SYMBOLS=1 +else +SYMBOL_HIDING= +endif + + +# Add explicit language input parameter to cpp, otherwise the use of #n for +# numeric literals in ARM code is a problem when used inside #define macros +# since normally that means stringization. +# +# Some clang-based preprocessors seem to behave differently, and get confused +# by single-quote characters in comments, so we eliminate // comments first. + +ifeq ($(OSTYPE_RESULT),Darwin) +PREPROCESS=sed -e 's/\/\/.*//' | $(CC) -E -I$(SRC)/include $(SYMBOL_HIDING) -xassembler-with-cpp - +else +PREPROCESS=$(CC) -E -I$(SRC)/include $(SYMBOL_HIDING) -xassembler-with-cpp - +endif + +# Generally GNU-type assemblers are happy with multiple instructions on +# a line, but we split them up anyway just in case. + +SPLIT=tr ';' '\n' + +# If actually on an ARM8 machine, just use the assembler (as). Otherwise +# use a cross-assembling version so that the code can still be assembled +# and the proofs checked against the object files (though you won't be able +# to run code without additional emulation infrastructure). For the clang +# version on OS X we just add the "-arch arm64" option. For the Linux/gcc +# toolchain we assume the presence of the special cross-assembler. This +# can be installed via something like: +# +# sudo apt-get install binutils-aarch64-linux-gnu + +ifeq ($(ARCHTYPE_RESULT),aarch64) +ASSEMBLE=as +OBJDUMP=objdump -d +else +ifeq ($(ARCHTYPE_RESULT),arm64) +ASSEMBLE=as +OBJDUMP=objdump -d +else +ifeq ($(OSTYPE_RESULT),Darwin) +ASSEMBLE=as -arch arm64 +OBJDUMP=otool -tvV +else +ASSEMBLE=aarch64-linux-gnu-as +OBJDUMP=aarch64-linux-gnu-objdump -d +endif +endif +endif + +# List of object files for point operations and bignum operations + +POINT_OBJ = curve25519/curve25519_ladderstep.o \ + curve25519/curve25519_ladderstep_alt.o \ + curve25519/curve25519_pxscalarmul.o \ + curve25519/curve25519_pxscalarmul_alt.o \ + curve25519/curve25519_x25519.o \ + curve25519/curve25519_x25519_alt.o \ + curve25519/curve25519_x25519_byte.o \ + curve25519/curve25519_x25519_byte_alt.o \ + curve25519/curve25519_x25519base.o \ + curve25519/curve25519_x25519base_alt.o \ + curve25519/curve25519_x25519base_byte.o \ + curve25519/curve25519_x25519base_byte_alt.o \ + curve25519/edwards25519_decode.o \ + curve25519/edwards25519_decode_alt.o \ + curve25519/edwards25519_encode.o \ + curve25519/edwards25519_epadd.o \ + curve25519/edwards25519_epadd_alt.o \ + curve25519/edwards25519_epdouble.o \ + curve25519/edwards25519_epdouble_alt.o \ + curve25519/edwards25519_pdouble.o \ + curve25519/edwards25519_pdouble_alt.o \ + curve25519/edwards25519_pepadd.o \ + curve25519/edwards25519_pepadd_alt.o \ + curve25519/edwards25519_scalarmulbase.o \ + curve25519/edwards25519_scalarmulbase_alt.o \ + curve25519/edwards25519_scalarmuldouble.o \ + curve25519/edwards25519_scalarmuldouble_alt.o \ + p256/p256_montjadd.o \ + p256/p256_montjadd_alt.o \ + p256/p256_montjdouble.o \ + p256/p256_montjdouble_alt.o \ + p256/p256_montjmixadd.o \ + p256/p256_montjmixadd_alt.o \ + p256/p256_montjscalarmul.o \ + p256/p256_montjscalarmul_alt.o \ + p256/p256_scalarmul.o \ + p256/p256_scalarmul_alt.o \ + p256/p256_scalarmulbase.o \ + p256/p256_scalarmulbase_alt.o \ + p384/p384_montjadd.o \ + p384/p384_montjadd_alt.o \ + p384/p384_montjdouble.o \ + p384/p384_montjdouble_alt.o \ + p384/p384_montjmixadd.o \ + p384/p384_montjmixadd_alt.o \ + p384/p384_montjscalarmul.o \ + p384/p384_montjscalarmul_alt.o \ + p521/p521_jadd.o \ + p521/p521_jadd_alt.o \ + p521/p521_jdouble.o \ + p521/p521_jdouble_alt.o \ + p521/p521_jmixadd.o \ + p521/p521_jmixadd_alt.o \ + p521/p521_jscalarmul.o \ + p521/p521_jscalarmul_alt.o \ + secp256k1/secp256k1_jadd.o \ + secp256k1/secp256k1_jadd_alt.o \ + secp256k1/secp256k1_jdouble.o \ + secp256k1/secp256k1_jdouble_alt.o \ + secp256k1/secp256k1_jmixadd.o \ + secp256k1/secp256k1_jmixadd_alt.o \ + sm2/sm2_montjadd.o \ + sm2/sm2_montjadd_alt.o \ + sm2/sm2_montjdouble.o \ + sm2/sm2_montjdouble_alt.o \ + sm2/sm2_montjmixadd.o \ + sm2/sm2_montjmixadd_alt.o \ + sm2/sm2_montjscalarmul.o \ + sm2/sm2_montjscalarmul_alt.o + +BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ + curve25519/bignum_cmul_p25519.o \ + curve25519/bignum_double_p25519.o \ + curve25519/bignum_inv_p25519.o \ + curve25519/bignum_invsqrt_p25519.o \ + curve25519/bignum_invsqrt_p25519_alt.o \ + curve25519/bignum_madd_n25519.o \ + curve25519/bignum_madd_n25519_alt.o \ + curve25519/bignum_mod_m25519_4.o \ + curve25519/bignum_mod_n25519.o \ + curve25519/bignum_mod_n25519_4.o \ + curve25519/bignum_mod_p25519_4.o \ + curve25519/bignum_mul_p25519.o \ + curve25519/bignum_mul_p25519_alt.o \ + curve25519/bignum_neg_p25519.o \ + curve25519/bignum_optneg_p25519.o \ + curve25519/bignum_sqr_p25519.o \ + curve25519/bignum_sqr_p25519_alt.o \ + curve25519/bignum_sqrt_p25519.o \ + curve25519/bignum_sqrt_p25519_alt.o \ + curve25519/bignum_sub_p25519.o \ + fastmul/bignum_emontredc_8n.o \ + fastmul/bignum_emontredc_8n_neon.o \ + fastmul/bignum_emontredc_8n_cdiff.o \ + fastmul/bignum_kmul_16_32.o \ + fastmul/bignum_kmul_16_32_neon.o \ + fastmul/bignum_kmul_32_64.o \ + fastmul/bignum_kmul_32_64_neon.o \ + fastmul/bignum_ksqr_16_32.o \ + fastmul/bignum_ksqr_16_32_neon.o \ + fastmul/bignum_ksqr_32_64.o \ + fastmul/bignum_ksqr_32_64_neon.o \ + fastmul/bignum_mul_4_8.o \ + fastmul/bignum_mul_4_8_alt.o \ + fastmul/bignum_mul_6_12.o \ + fastmul/bignum_mul_6_12_alt.o \ + fastmul/bignum_mul_8_16.o \ + fastmul/bignum_mul_8_16_alt.o \ + fastmul/bignum_mul_8_16_neon.o \ + fastmul/bignum_sqr_4_8.o \ + fastmul/bignum_sqr_4_8_alt.o \ + fastmul/bignum_sqr_6_12.o \ + fastmul/bignum_sqr_6_12_alt.o \ + fastmul/bignum_sqr_8_16.o \ + fastmul/bignum_sqr_8_16_alt.o \ + fastmul/bignum_sqr_8_16_neon.o \ + generic/bignum_add.o \ + generic/bignum_amontifier.o \ + generic/bignum_amontmul.o \ + generic/bignum_amontredc.o \ + generic/bignum_amontsqr.o \ + generic/bignum_bitfield.o \ + generic/bignum_bitsize.o \ + generic/bignum_cdiv.o \ + generic/bignum_cdiv_exact.o \ + generic/bignum_cld.o \ + generic/bignum_clz.o \ + generic/bignum_cmadd.o \ + generic/bignum_cmnegadd.o \ + generic/bignum_cmod.o \ + generic/bignum_cmul.o \ + generic/bignum_coprime.o \ + generic/bignum_copy.o \ + generic/bignum_copy_row_from_table.o \ + generic/bignum_copy_row_from_table_8n_neon.o \ + generic/bignum_copy_row_from_table_16_neon.o \ + generic/bignum_copy_row_from_table_32_neon.o \ + generic/bignum_ctd.o \ + generic/bignum_ctz.o \ + generic/bignum_demont.o \ + generic/bignum_digit.o \ + generic/bignum_digitsize.o \ + generic/bignum_divmod10.o \ + generic/bignum_emontredc.o \ + generic/bignum_eq.o \ + generic/bignum_even.o \ + generic/bignum_ge.o \ + generic/bignum_gt.o \ + generic/bignum_iszero.o \ + generic/bignum_le.o \ + generic/bignum_lt.o \ + generic/bignum_madd.o \ + generic/bignum_modadd.o \ + generic/bignum_moddouble.o \ + generic/bignum_modexp.o \ + generic/bignum_modifier.o \ + generic/bignum_modinv.o \ + generic/bignum_modoptneg.o \ + generic/bignum_modsub.o \ + generic/bignum_montifier.o \ + generic/bignum_montmul.o \ + generic/bignum_montredc.o \ + generic/bignum_montsqr.o \ + generic/bignum_mul.o \ + generic/bignum_muladd10.o \ + generic/bignum_mux.o \ + generic/bignum_mux16.o \ + generic/bignum_negmodinv.o \ + generic/bignum_nonzero.o \ + generic/bignum_normalize.o \ + generic/bignum_odd.o \ + generic/bignum_of_word.o \ + generic/bignum_optadd.o \ + generic/bignum_optneg.o \ + generic/bignum_optsub.o \ + generic/bignum_optsubadd.o \ + generic/bignum_pow2.o \ + generic/bignum_shl_small.o \ + generic/bignum_shr_small.o \ + generic/bignum_sqr.o \ + generic/bignum_sub.o \ + generic/word_bytereverse.o \ + generic/word_clz.o \ + generic/word_ctz.o \ + generic/word_divstep59.o \ + generic/word_max.o \ + generic/word_min.o \ + generic/word_negmodinv.o \ + generic/word_popcount.o \ + generic/word_recip.o \ + p256/bignum_add_p256.o \ + p256/bignum_bigendian_4.o \ + p256/bignum_cmul_p256.o \ + p256/bignum_deamont_p256.o \ + p256/bignum_demont_p256.o \ + p256/bignum_double_p256.o \ + p256/bignum_half_p256.o \ + p256/bignum_inv_p256.o \ + p256/bignum_littleendian_4.o \ + p256/bignum_mod_n256.o \ + p256/bignum_mod_n256_4.o \ + p256/bignum_mod_p256.o \ + p256/bignum_mod_p256_4.o \ + p256/bignum_montinv_p256.o \ + p256/bignum_montmul_p256.o \ + p256/bignum_montmul_p256_alt.o \ + p256/bignum_montmul_p256_neon.o \ + p256/bignum_montsqr_p256.o \ + p256/bignum_montsqr_p256_alt.o \ + p256/bignum_montsqr_p256_neon.o \ + p256/bignum_mux_4.o \ + p256/bignum_neg_p256.o \ + p256/bignum_nonzero_4.o \ + p256/bignum_optneg_p256.o \ + p256/bignum_sub_p256.o \ + p256/bignum_tomont_p256.o \ + p256/bignum_triple_p256.o \ + p384/bignum_add_p384.o \ + p384/bignum_bigendian_6.o \ + p384/bignum_cmul_p384.o \ + p384/bignum_deamont_p384.o \ + p384/bignum_demont_p384.o \ + p384/bignum_double_p384.o \ + p384/bignum_half_p384.o \ + p384/bignum_inv_p384.o \ + p384/bignum_littleendian_6.o \ + p384/bignum_mod_n384.o \ + p384/bignum_mod_n384_6.o \ + p384/bignum_mod_p384.o \ + p384/bignum_mod_p384_6.o \ + p384/bignum_montinv_p384.o \ + p384/bignum_montmul_p384.o \ + p384/bignum_montmul_p384_alt.o \ + p384/bignum_montmul_p384_neon.o \ + p384/bignum_montsqr_p384.o \ + p384/bignum_montsqr_p384_alt.o \ + p384/bignum_montsqr_p384_neon.o \ + p384/bignum_mux_6.o \ + p384/bignum_neg_p384.o \ + p384/bignum_nonzero_6.o \ + p384/bignum_optneg_p384.o \ + p384/bignum_sub_p384.o \ + p384/bignum_tomont_p384.o \ + p384/bignum_triple_p384.o \ + p521/bignum_add_p521.o \ + p521/bignum_cmul_p521.o \ + p521/bignum_deamont_p521.o \ + p521/bignum_demont_p521.o \ + p521/bignum_double_p521.o \ + p521/bignum_fromlebytes_p521.o \ + p521/bignum_half_p521.o \ + p521/bignum_inv_p521.o \ + p521/bignum_mod_n521_9.o \ + p521/bignum_mod_p521_9.o \ + p521/bignum_montmul_p521.o \ + p521/bignum_montmul_p521_alt.o \ + p521/bignum_montmul_p521_neon.o \ + p521/bignum_montsqr_p521.o \ + p521/bignum_montsqr_p521_alt.o \ + p521/bignum_montsqr_p521_neon.o \ + p521/bignum_mul_p521.o \ + p521/bignum_mul_p521_alt.o \ + p521/bignum_mul_p521_neon.o \ + p521/bignum_neg_p521.o \ + p521/bignum_optneg_p521.o \ + p521/bignum_sqr_p521.o \ + p521/bignum_sqr_p521_alt.o \ + p521/bignum_sqr_p521_neon.o \ + p521/bignum_sub_p521.o \ + p521/bignum_tolebytes_p521.o \ + p521/bignum_tomont_p521.o \ + p521/bignum_triple_p521.o \ + secp256k1/bignum_add_p256k1.o \ + secp256k1/bignum_cmul_p256k1.o \ + secp256k1/bignum_deamont_p256k1.o \ + secp256k1/bignum_demont_p256k1.o \ + secp256k1/bignum_double_p256k1.o \ + secp256k1/bignum_half_p256k1.o \ + secp256k1/bignum_mod_n256k1_4.o \ + secp256k1/bignum_mod_p256k1_4.o \ + secp256k1/bignum_montmul_p256k1.o \ + secp256k1/bignum_montmul_p256k1_alt.o \ + secp256k1/bignum_montsqr_p256k1.o \ + secp256k1/bignum_montsqr_p256k1_alt.o \ + secp256k1/bignum_mul_p256k1.o \ + secp256k1/bignum_mul_p256k1_alt.o \ + secp256k1/bignum_neg_p256k1.o \ + secp256k1/bignum_optneg_p256k1.o \ + secp256k1/bignum_sqr_p256k1.o \ + secp256k1/bignum_sqr_p256k1_alt.o \ + secp256k1/bignum_sub_p256k1.o \ + secp256k1/bignum_tomont_p256k1.o \ + secp256k1/bignum_triple_p256k1.o \ + sm2/bignum_add_sm2.o \ + sm2/bignum_cmul_sm2.o \ + sm2/bignum_deamont_sm2.o \ + sm2/bignum_demont_sm2.o \ + sm2/bignum_double_sm2.o \ + sm2/bignum_half_sm2.o \ + sm2/bignum_inv_sm2.o \ + sm2/bignum_mod_nsm2.o \ + sm2/bignum_mod_nsm2_4.o \ + sm2/bignum_mod_sm2.o \ + sm2/bignum_mod_sm2_4.o \ + sm2/bignum_montinv_sm2.o \ + sm2/bignum_montmul_sm2.o \ + sm2/bignum_montmul_sm2_alt.o \ + sm2/bignum_montsqr_sm2.o \ + sm2/bignum_montsqr_sm2_alt.o \ + sm2/bignum_neg_sm2.o \ + sm2/bignum_optneg_sm2.o \ + sm2/bignum_sub_sm2.o \ + sm2/bignum_tomont_sm2.o \ + sm2/bignum_triple_sm2.o + +UNOPT_OBJ = p256/unopt/p256_montjadd.o \ + p256/unopt/p256_montjdouble.o \ + p384/unopt/p384_montjadd.o \ + p384/unopt/p384_montjdouble.o \ + fastmul/unopt/bignum_emontredc_8n_cdiff_base.o + +OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) + +# Tutorial assembly files + +TUTORIAL_PROOFS = $(wildcard tutorial/*.ml) + +TUTORIAL_OBJ = $(TUTORIAL_PROOFS:.ml=.o) tutorial/rel_loop2.o tutorial/rel_simp2.o tutorial/rel_veceq2.o + +# According to +# https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, +# x18 should not be used for Apple platforms. Check this using grep. + +$(OBJ): %.o : $(SRC_ARM)/%.S + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -o $@ - + $(OBJDUMP) $@ | ( ( ! grep --ignore-case -E 'w18|[^0]x18' ) || ( rm $@ ; exit 1 ) ) + cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ - + +libs2nbignum.a: $(OBJ) ; ar -rc libs2nbignum.a $(OBJ) + +clean:; rm -f libs2nbignum.a */*.o */*/*.o */*.correct */*.native + +# Proof-related parts +# +# The proof files are all independent, though each one loads the +# same common infrastructure "base.ml". So you can potentially +# run the proofs in parallel for more speed, e.g. +# +# nohup make -j 16 proofs & +# +# If you build hol-light yourself (see https://github.com/jrh13/hol-light) +# in your home directory, and do "make" inside the subdirectory hol-light, +# then the following HOLDIR setting should be right: + +HOLDIR?=$(HOME)/hol-light +HOLLIGHT:=$(HOLDIR)/hol.sh + +PROOF_BINS = $(OBJ:.o=.native) +PROOF_LOGS = $(OBJ:.o=.correct) +TUTORIAL_PROOF_BINS = $(TUTORIAL_PROOFS:.ml=.native) +TUTORIAL_PROOF_LOGS = $(TUTORIAL_PROOFS:.ml=.correct) + +# Build precompiled native binaries of HOL Light proofs + +proofs/simulator.native: $(SRC_ARM)/proofs/simulator.ml + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + $(SRC)/tools/build-proof.sh $(SRC_ARM)/proofs/simulator.ml "$(HOLLIGHT)" "$@" + +.SECONDEXPANSION: +%.native: $(SRC_ARM)/proofs/$$(*F).ml %.o ; $(SRC)/tools/build-proof.sh $< "$(HOLLIGHT)" "$@" + +# Run them and print the standard output+error at *.correct + +%.correct: %.native ; $(SRC)/tools/run-proof.sh "$<" "$@" + +# Cases where a proof uses other proofs for lemmas and/or subroutines + +p256/bignum_montmul_p256_neon.native: p256/bignum_montmul_p256.native +p384/bignum_montmul_p384_neon.native: p384/bignum_montmul_p384.native +p521/bignum_montmul_p521_neon.native: p521/bignum_montmul_p521.native +p256/bignum_montsqr_p256_neon.native: p256/bignum_montsqr_p256.native +p384/bignum_montsqr_p384_neon.native: p384/bignum_montsqr_p384.native +p521/bignum_montsqr_p521_neon.native: p521/bignum_montsqr_p521.native +p521/bignum_mul_p521_neon.native: p521/bignum_mul_p521.native +p521/bignum_sqr_p521_neon.native: p521/bignum_sqr_p521.native +fastmul/bignum_mul_8_16_neon.native: fastmul/bignum_mul_8_16.native +fastmul/bignum_sqr_8_16_neon.native: fastmul/bignum_sqr_8_16.native +curve25519/curve25519_x25519.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519_alt.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519_byte.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519_byte_alt.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519base.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519base_alt.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519base_byte.native: curve25519/bignum_inv_p25519.native +curve25519/curve25519_x25519base_byte_alt.native: curve25519/bignum_inv_p25519.native +curve25519/edwards25519_scalarmulbase.native: curve25519/bignum_inv_p25519.native +curve25519/edwards25519_scalarmulbase_alt.native: curve25519/bignum_inv_p25519.native +curve25519/edwards25519_scalarmuldouble.native: curve25519/bignum_inv_p25519.native +curve25519/edwards25519_scalarmuldouble_alt.native: curve25519/bignum_inv_p25519.native +generic/bignum_modexp.native: generic/bignum_amontifier.native generic/bignum_amontmul.native generic/bignum_demont.native generic/bignum_mux.native +p256/p256_montjadd.native: p256/unopt/p256_montjadd.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native +p256/p256_montjdouble.native: p256/unopt/p256_montjdouble.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native p256/bignum_add_p256.native +p256/p256_montjscalarmul.native: p256/p256_montjadd.native p256/p256_montjdouble.native +p256/p256_montjscalarmul_alt.native: p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native +p256/p256_scalarmul.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/bignum_tomont_p256.native p256/p256_montjadd.native p256/p256_montjdouble.native p256/p256_montjmixadd.native +p256/p256_scalarmul_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native p256/p256_montjmixadd_alt.native +p256/p256_scalarmulbase.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd.native +p256/p256_scalarmulbase_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd_alt.native +p384/p384_montjadd.native: p384/unopt/p384_montjadd.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native +p384/p384_montjdouble.native: p384/unopt/p384_montjdouble.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native p384/bignum_add_p384.native +p384/p384_montjscalarmul.native: \ + p384/p384_montjadd.native p384/p384_montjdouble.native \ + p384/bignum_sub_p384.native p384/bignum_add_p384.native +p384/p384_montjscalarmul_alt.native: p384/p384_montjadd_alt.native p384/p384_montjdouble_alt.native +p521/p521_jadd.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native +p521/p521_jdouble.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native +p521/p521_jscalarmul.native: p521/bignum_mod_n521_9.native p521/p521_jadd.native p521/p521_jdouble.native +p521/p521_jscalarmul_alt.native: p521/bignum_mod_n521_9.native +sm2/sm2_montjscalarmul.native: sm2/sm2_montjadd.native sm2/sm2_montjdouble.native +sm2/sm2_montjscalarmul_alt.native: sm2/sm2_montjadd_alt.native sm2/sm2_montjdouble_alt.native + +# Tutorial + +.SECONDEXPANSION: +tutorial/%.native: $(SRC_ARM)/tutorial/%.ml tutorial/%.o + $(Q)[ -d $(@D) ] || mkdir -p $(@D) + $(SRC)/tools/build-proof.sh "$<" "$(HOLLIGHT)" "$@" + +# Additional dependencies on .o files +tutorial/rel_loop.native: tutorial/rel_loop2.o +tutorial/rel_simp.native: tutorial/rel_simp2.o +tutorial/rel_veceq.native: tutorial/rel_veceq2.o + + +unopt: $(UNOPT_OBJ) + +build_proofs: $(UNOPT_OBJ) $(PROOF_BINS); +build_tutorial: $(TUTORIAL_OBJ) $(TUTORIAL_PROOF_BINS); +run_proofs: build_proofs $(PROOF_LOGS); + +proofs: run_proofs ; $(SRC)/tools/count-proofs.sh . +tutorial: build_tutorial $(TUTORIAL_PROOF_LOGS); + +# Always run sematest regardless of dependency check +FORCE: ; +# Always use max. # of cores because in Makefile one cannot get the passed number of -j. +# A portable way of getting the number of max. cores: +# https://stackoverflow.com/a/23569003/1488216 +NUM_CORES_FOR_SEMATEST = $(shell getconf _NPROCESSORS_ONLN) +sematest: FORCE $(OBJ) $(SRC_ARM)/proofs/simulator_iclasses.ml $(SRC_ARM)/proofs/simulator.native + $(SRC)/tools/run-sematest.sh arm $(NUM_CORES_FOR_SEMATEST) diff --git a/nix/s2n_bignum/0001-fix-script-path.patch b/nix/s2n_bignum/0001-fix-script-path.patch new file mode 100644 index 000000000..9648df9c9 --- /dev/null +++ b/nix/s2n_bignum/0001-fix-script-path.patch @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: Apache-2.0 +diff --git a/tools/build-proof.sh b/tools/build-proof.sh +index 39d72433..d10c3244 100755 +--- a/tools/build-proof.sh ++++ b/tools/build-proof.sh +@@ -1,6 +1,7 @@ + #!/bin/bash ++ROOT="$(realpath "$(dirname "$0")"/../)" + if [ "$#" -ne 3 ]; then +- echo "../tools/build-proof.sh <.ml file path> " ++ echo "${ROOT}/tools/build-proof.sh <.ml file path> " + echo "This script builds HOL Light proof using OCaml native compiler and puts the " + echo "output binary at ." + exit 1 +@@ -9,14 +10,11 @@ fi + # Return the exit code if any statement fails + set -e + +-s2n_bignum_arch=$(basename "$(pwd)") +- +-cd .. +- +-ml_path_noarch=$1 +-ml_path=${s2n_bignum_arch}/${ml_path_noarch} ++ml_path="$1" + hol_sh_cmd=$2 +-output_path=${s2n_bignum_arch}/$3 ++output_path=$3 ++output_dir=$(dirname "$output_path") ++[ -d "$output_dir" ] || mkdir -p "$output_dir" + + export HOLLIGHT_DIR="$(dirname ${hol_sh_cmd})" + if [ ! -f "${HOLLIGHT_DIR}/hol_lib.cmxa" ]; then +@@ -34,7 +32,7 @@ echo "Generating a template .ml that loads the file...: ${template_ml}" + echo "check_axioms ();;") >> ${template_ml} + + spec_found=0 +-for spec in $(./tools/collect-specs.sh ${s2n_bignum_arch} ${ml_path_noarch}) ; do ++for spec in $("${ROOT}"/tools/collect-specs.sh "$(dirname "${ml_path}")" "${ml_path}"); do + echo "Printf.printf \"val ${spec} : thm = %s\n\" (string_of_thm ${spec});;" + spec_found=1 + done >> ${template_ml} +@@ -51,7 +49,7 @@ fi + inlined_prefix="$(mktemp)" + inlined_ml="${inlined_prefix}.ml" + inlined_cmx="${inlined_prefix}.cmx" +-ocaml ${HOLLIGHT_DIR}/inline_load.ml ${template_ml} ${inlined_ml} ++(cd "$ROOT" && ocaml "${HOLLIGHT_DIR}"/inline_load.ml "${template_ml}" "${inlined_ml}") + + # Give a large stack size. + OCAMLRUNPARAM=l=2000000000 \ +diff --git a/tools/collect-specs.sh b/tools/collect-specs.sh +index a29aa6c8..784e0af3 100755 +--- a/tools/collect-specs.sh ++++ b/tools/collect-specs.sh +@@ -6,14 +6,12 @@ if [ "$#" -ne 1 ] && [ "$#" -ne 2 ]; then + exit 1 + fi + +-s2n_bignum_arch=$1 + if [ "$#" -eq 2 ]; then + filepat="$2" + else +- filepat="*.ml" ++ filepat="$1" + fi +-cd $s2n_bignum_arch > /dev/null + + # An env. var for sorting + export LC_ALL=C +-grep 'let [A-Z_0-9]*_SUBROUTINE_CORRECT' ${filepat} | cut -f2 -d' ' | sort ++grep -r 'let [A-Z_0-9]*_SUBROUTINE_CORRECT' ${filepat} | cut -f2 -d' ' | sort diff --git a/nix/s2n_bignum/default.nix b/nix/s2n_bignum/default.nix new file mode 100644 index 000000000..8f3965706 --- /dev/null +++ b/nix/s2n_bignum/default.nix @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +{ stdenv, fetchFromGitHub, writeText, ... }: +stdenv.mkDerivation rec { + pname = "s2n_bignum"; + version = "90cb5e35a823efee15cde72f0237af39a9bf7371"; + src = fetchFromGitHub { + owner = "jargh"; + repo = "s2n-bignum-dev"; + rev = "${version}"; + hash = "sha256-6uDvLG04h8IKYln612wG/aXPsCB9k8Zsh/cE2Y980tQ="; + }; + setupHook = writeText "setup-hook.sh" '' + export S2N_BIGNUM_DIR="$1" + ''; + patches = [ ./0001-fix-script-path.patch ]; + dontBuild = true; + installPhase = '' + mkdir -p $out + cp -a . $out/ + ''; +} From 876a48e01a5927722a3dedf32bfa39d36c32c004 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:42:29 +0800 Subject: [PATCH 3/6] remove tutorial targets for s2n-bignum Makefile Signed-off-by: Thing-han, Lim <15379156+potsrevennil@users.noreply.github.com> --- hol_light/arm/Makefile | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/hol_light/arm/Makefile b/hol_light/arm/Makefile index 1f1e91713..c0b209704 100644 --- a/hol_light/arm/Makefile +++ b/hol_light/arm/Makefile @@ -394,12 +394,6 @@ UNOPT_OBJ = p256/unopt/p256_montjadd.o \ OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) -# Tutorial assembly files - -TUTORIAL_PROOFS = $(wildcard tutorial/*.ml) - -TUTORIAL_OBJ = $(TUTORIAL_PROOFS:.ml=.o) tutorial/rel_loop2.o tutorial/rel_simp2.o tutorial/rel_veceq2.o - # According to # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, # x18 should not be used for Apple platforms. Check this using grep. @@ -431,8 +425,6 @@ HOLLIGHT:=$(HOLDIR)/hol.sh PROOF_BINS = $(OBJ:.o=.native) PROOF_LOGS = $(OBJ:.o=.correct) -TUTORIAL_PROOF_BINS = $(TUTORIAL_PROOFS:.ml=.native) -TUTORIAL_PROOF_LOGS = $(TUTORIAL_PROOFS:.ml=.correct) # Build precompiled native binaries of HOL Light proofs @@ -493,27 +485,12 @@ p521/p521_jscalarmul_alt.native: p521/bignum_mod_n521_9.native sm2/sm2_montjscalarmul.native: sm2/sm2_montjadd.native sm2/sm2_montjdouble.native sm2/sm2_montjscalarmul_alt.native: sm2/sm2_montjadd_alt.native sm2/sm2_montjdouble_alt.native -# Tutorial - -.SECONDEXPANSION: -tutorial/%.native: $(SRC_ARM)/tutorial/%.ml tutorial/%.o - $(Q)[ -d $(@D) ] || mkdir -p $(@D) - $(SRC)/tools/build-proof.sh "$<" "$(HOLLIGHT)" "$@" - -# Additional dependencies on .o files -tutorial/rel_loop.native: tutorial/rel_loop2.o -tutorial/rel_simp.native: tutorial/rel_simp2.o -tutorial/rel_veceq.native: tutorial/rel_veceq2.o - - unopt: $(UNOPT_OBJ) build_proofs: $(UNOPT_OBJ) $(PROOF_BINS); -build_tutorial: $(TUTORIAL_OBJ) $(TUTORIAL_PROOF_BINS); run_proofs: build_proofs $(PROOF_LOGS); proofs: run_proofs ; $(SRC)/tools/count-proofs.sh . -tutorial: build_tutorial $(TUTORIAL_PROOF_LOGS); # Always run sematest regardless of dependency check FORCE: ; From 24cb9629851232ed9211b378fc3ca5c6a5cb91fa Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Wed, 15 Jan 2025 12:47:14 +0800 Subject: [PATCH 4/6] move hol_light proof under the proofs folder Signed-off-by: Thing-han, Lim <15379156+potsrevennil@users.noreply.github.com> --- {hol_light => proofs/hol_light}/.gitignore | 0 {hol_light => proofs/hol_light}/arm/Makefile | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename {hol_light => proofs/hol_light}/.gitignore (100%) rename {hol_light => proofs/hol_light}/arm/Makefile (100%) diff --git a/hol_light/.gitignore b/proofs/hol_light/.gitignore similarity index 100% rename from hol_light/.gitignore rename to proofs/hol_light/.gitignore diff --git a/hol_light/arm/Makefile b/proofs/hol_light/arm/Makefile similarity index 100% rename from hol_light/arm/Makefile rename to proofs/hol_light/arm/Makefile From af94e2285053f79401350ab220208be3a1fcf002 Mon Sep 17 00:00:00 2001 From: "Thing-han, Lim" <15379156+potsrevennil@users.noreply.github.com> Date: Mon, 20 Jan 2025 16:45:42 +0800 Subject: [PATCH 5/6] CI: add job for simple hol_light check Signed-off-by: Thing-han, Lim <15379156+potsrevennil@users.noreply.github.com> --- .github/workflows/ci.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55c95d9fb..41d7fc174 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -371,6 +371,20 @@ jobs: nix-shell: ${{ matrix.compiler.shell }} cflags: "-std=c17" # The purpose of this job is to test non-default yet valid configurations + hol_light_proofs: + name: hol-light proofs + runs-on: pqcp-arm64 + if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork + needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: ./.github/actions/setup-shell + with: + gh_token: ${{ secrets.GITHUB_TOKEN }} + nix-shell: 'hol_light' + script: | + make -C proofs/hol_light/arm p256/bignum_montmul_p256.correct + config_variations: name: Non-standard configurations needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] From a1b8105829e9163b8edd94b6cd976ea3e79abb78 Mon Sep 17 00:00:00 2001 From: Hanno Becker Date: Fri, 24 Jan 2025 06:09:18 +0000 Subject: [PATCH 6/6] HOL-Light: Run NTT and invNTT proof in CI Signed-off-by: Hanno Becker --- .github/workflows/ci.yml | 15 - .github/workflows/hol_light.yml | 37 ++ proofs/hol_light/arm/Makefile | 425 +------------- proofs/hol_light/arm/README.md | 39 ++ proofs/hol_light/arm/mlkem/mlkem_intt.S | 416 ++++++++++++++ proofs/hol_light/arm/mlkem/mlkem_ntt.S | 358 ++++++++++++ proofs/hol_light/arm/proofs/mlkem_intt.ml | 651 ++++++++++++++++++++++ proofs/hol_light/arm/proofs/mlkem_ntt.ml | 608 ++++++++++++++++++++ 8 files changed, 2137 insertions(+), 412 deletions(-) create mode 100644 .github/workflows/hol_light.yml create mode 100644 proofs/hol_light/arm/README.md create mode 100644 proofs/hol_light/arm/mlkem/mlkem_intt.S create mode 100644 proofs/hol_light/arm/mlkem/mlkem_ntt.S create mode 100644 proofs/hol_light/arm/proofs/mlkem_intt.ml create mode 100644 proofs/hol_light/arm/proofs/mlkem_ntt.ml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 41d7fc174..e0660dd1e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -370,21 +370,6 @@ jobs: acvp: false nix-shell: ${{ matrix.compiler.shell }} cflags: "-std=c17" - # The purpose of this job is to test non-default yet valid configurations - hol_light_proofs: - name: hol-light proofs - runs-on: pqcp-arm64 - if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork - needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: ./.github/actions/setup-shell - with: - gh_token: ${{ secrets.GITHUB_TOKEN }} - nix-shell: 'hol_light' - script: | - make -C proofs/hol_light/arm p256/bignum_montmul_p256.correct - config_variations: name: Non-standard configurations needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml new file mode 100644 index 000000000..d2565bfd5 --- /dev/null +++ b/.github/workflows/hol_light.yml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: HOL-Light +permissions: + contents: read +on: + push: + branches: ["main"] + # Only run upon changes to AArch64 NTT or invNTT + paths: + - 'proofs/hol_light/arm/**/*.S' + pull_request: + branches: ["main"] + paths: + # Only run upon changes to AArch64 NTT or invNTT + - 'proofs/hol_light/arm/**/*.S' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + hol_light_proofs: + strategy: + matrix: + proof: [mlkem_ntt,mlkem_intt] + name: HOL Light proof for ${{ matrix.proof }}.S + runs-on: pqcp-arm64 + if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: ./.github/actions/setup-shell + with: + gh_token: ${{ secrets.GITHUB_TOKEN }} + nix-shell: 'hol_light' + script: | + make -C proofs/hol_light/arm mlkem/${{ matrix.proof }}.correct diff --git a/proofs/hol_light/arm/Makefile b/proofs/hol_light/arm/Makefile index c0b209704..efc58b83f 100644 --- a/proofs/hol_light/arm/Makefile +++ b/proofs/hol_light/arm/Makefile @@ -3,25 +3,20 @@ # SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 ############################################################################# -OSTYPE_RESULT=$(shell uname -s) -ARCHTYPE_RESULT=$(shell uname -m) - -# Assembler directives that mark symbols as .hidden -# or .private_extern can be enabled by passing -# in the S2N_BN_HIDE_SYMBOLS parameter as: # -# make S2N_BN_HIDE_SYMBOLS=1 +# This Makefile is derived from the Makefile arm/Makefile in s2n-bignum. +# - Remove all s2n-bignum proofs and tutorial, add mlkem-native proofs +# - Minor path modifications to support base theories from s2n-bignum +# to reside in a separate read-only directory # -SRC ?= $(S2N_BIGNUM_DIR) -SRC_ARM ?= $(SRC)/arm +.DEFAULT_GOAL := run_proofs -ifeq ($(S2N_BN_HIDE_SYMBOLS),1) -SYMBOL_HIDING=-DS2N_BN_HIDE_SYMBOLS=1 -else -SYMBOL_HIDING= -endif +OSTYPE_RESULT=$(shell uname -s) +ARCHTYPE_RESULT=$(shell uname -m) +SRC ?= $(S2N_BIGNUM_DIR) +SRC_ARM ?= $(SRC)/arm # Add explicit language input parameter to cpp, otherwise the use of #n for # numeric literals in ARM code is a problem when used inside #define macros @@ -69,344 +64,18 @@ endif endif endif -# List of object files for point operations and bignum operations - -POINT_OBJ = curve25519/curve25519_ladderstep.o \ - curve25519/curve25519_ladderstep_alt.o \ - curve25519/curve25519_pxscalarmul.o \ - curve25519/curve25519_pxscalarmul_alt.o \ - curve25519/curve25519_x25519.o \ - curve25519/curve25519_x25519_alt.o \ - curve25519/curve25519_x25519_byte.o \ - curve25519/curve25519_x25519_byte_alt.o \ - curve25519/curve25519_x25519base.o \ - curve25519/curve25519_x25519base_alt.o \ - curve25519/curve25519_x25519base_byte.o \ - curve25519/curve25519_x25519base_byte_alt.o \ - curve25519/edwards25519_decode.o \ - curve25519/edwards25519_decode_alt.o \ - curve25519/edwards25519_encode.o \ - curve25519/edwards25519_epadd.o \ - curve25519/edwards25519_epadd_alt.o \ - curve25519/edwards25519_epdouble.o \ - curve25519/edwards25519_epdouble_alt.o \ - curve25519/edwards25519_pdouble.o \ - curve25519/edwards25519_pdouble_alt.o \ - curve25519/edwards25519_pepadd.o \ - curve25519/edwards25519_pepadd_alt.o \ - curve25519/edwards25519_scalarmulbase.o \ - curve25519/edwards25519_scalarmulbase_alt.o \ - curve25519/edwards25519_scalarmuldouble.o \ - curve25519/edwards25519_scalarmuldouble_alt.o \ - p256/p256_montjadd.o \ - p256/p256_montjadd_alt.o \ - p256/p256_montjdouble.o \ - p256/p256_montjdouble_alt.o \ - p256/p256_montjmixadd.o \ - p256/p256_montjmixadd_alt.o \ - p256/p256_montjscalarmul.o \ - p256/p256_montjscalarmul_alt.o \ - p256/p256_scalarmul.o \ - p256/p256_scalarmul_alt.o \ - p256/p256_scalarmulbase.o \ - p256/p256_scalarmulbase_alt.o \ - p384/p384_montjadd.o \ - p384/p384_montjadd_alt.o \ - p384/p384_montjdouble.o \ - p384/p384_montjdouble_alt.o \ - p384/p384_montjmixadd.o \ - p384/p384_montjmixadd_alt.o \ - p384/p384_montjscalarmul.o \ - p384/p384_montjscalarmul_alt.o \ - p521/p521_jadd.o \ - p521/p521_jadd_alt.o \ - p521/p521_jdouble.o \ - p521/p521_jdouble_alt.o \ - p521/p521_jmixadd.o \ - p521/p521_jmixadd_alt.o \ - p521/p521_jscalarmul.o \ - p521/p521_jscalarmul_alt.o \ - secp256k1/secp256k1_jadd.o \ - secp256k1/secp256k1_jadd_alt.o \ - secp256k1/secp256k1_jdouble.o \ - secp256k1/secp256k1_jdouble_alt.o \ - secp256k1/secp256k1_jmixadd.o \ - secp256k1/secp256k1_jmixadd_alt.o \ - sm2/sm2_montjadd.o \ - sm2/sm2_montjadd_alt.o \ - sm2/sm2_montjdouble.o \ - sm2/sm2_montjdouble_alt.o \ - sm2/sm2_montjmixadd.o \ - sm2/sm2_montjmixadd_alt.o \ - sm2/sm2_montjscalarmul.o \ - sm2/sm2_montjscalarmul_alt.o - -BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ - curve25519/bignum_cmul_p25519.o \ - curve25519/bignum_double_p25519.o \ - curve25519/bignum_inv_p25519.o \ - curve25519/bignum_invsqrt_p25519.o \ - curve25519/bignum_invsqrt_p25519_alt.o \ - curve25519/bignum_madd_n25519.o \ - curve25519/bignum_madd_n25519_alt.o \ - curve25519/bignum_mod_m25519_4.o \ - curve25519/bignum_mod_n25519.o \ - curve25519/bignum_mod_n25519_4.o \ - curve25519/bignum_mod_p25519_4.o \ - curve25519/bignum_mul_p25519.o \ - curve25519/bignum_mul_p25519_alt.o \ - curve25519/bignum_neg_p25519.o \ - curve25519/bignum_optneg_p25519.o \ - curve25519/bignum_sqr_p25519.o \ - curve25519/bignum_sqr_p25519_alt.o \ - curve25519/bignum_sqrt_p25519.o \ - curve25519/bignum_sqrt_p25519_alt.o \ - curve25519/bignum_sub_p25519.o \ - fastmul/bignum_emontredc_8n.o \ - fastmul/bignum_emontredc_8n_neon.o \ - fastmul/bignum_emontredc_8n_cdiff.o \ - fastmul/bignum_kmul_16_32.o \ - fastmul/bignum_kmul_16_32_neon.o \ - fastmul/bignum_kmul_32_64.o \ - fastmul/bignum_kmul_32_64_neon.o \ - fastmul/bignum_ksqr_16_32.o \ - fastmul/bignum_ksqr_16_32_neon.o \ - fastmul/bignum_ksqr_32_64.o \ - fastmul/bignum_ksqr_32_64_neon.o \ - fastmul/bignum_mul_4_8.o \ - fastmul/bignum_mul_4_8_alt.o \ - fastmul/bignum_mul_6_12.o \ - fastmul/bignum_mul_6_12_alt.o \ - fastmul/bignum_mul_8_16.o \ - fastmul/bignum_mul_8_16_alt.o \ - fastmul/bignum_mul_8_16_neon.o \ - fastmul/bignum_sqr_4_8.o \ - fastmul/bignum_sqr_4_8_alt.o \ - fastmul/bignum_sqr_6_12.o \ - fastmul/bignum_sqr_6_12_alt.o \ - fastmul/bignum_sqr_8_16.o \ - fastmul/bignum_sqr_8_16_alt.o \ - fastmul/bignum_sqr_8_16_neon.o \ - generic/bignum_add.o \ - generic/bignum_amontifier.o \ - generic/bignum_amontmul.o \ - generic/bignum_amontredc.o \ - generic/bignum_amontsqr.o \ - generic/bignum_bitfield.o \ - generic/bignum_bitsize.o \ - generic/bignum_cdiv.o \ - generic/bignum_cdiv_exact.o \ - generic/bignum_cld.o \ - generic/bignum_clz.o \ - generic/bignum_cmadd.o \ - generic/bignum_cmnegadd.o \ - generic/bignum_cmod.o \ - generic/bignum_cmul.o \ - generic/bignum_coprime.o \ - generic/bignum_copy.o \ - generic/bignum_copy_row_from_table.o \ - generic/bignum_copy_row_from_table_8n_neon.o \ - generic/bignum_copy_row_from_table_16_neon.o \ - generic/bignum_copy_row_from_table_32_neon.o \ - generic/bignum_ctd.o \ - generic/bignum_ctz.o \ - generic/bignum_demont.o \ - generic/bignum_digit.o \ - generic/bignum_digitsize.o \ - generic/bignum_divmod10.o \ - generic/bignum_emontredc.o \ - generic/bignum_eq.o \ - generic/bignum_even.o \ - generic/bignum_ge.o \ - generic/bignum_gt.o \ - generic/bignum_iszero.o \ - generic/bignum_le.o \ - generic/bignum_lt.o \ - generic/bignum_madd.o \ - generic/bignum_modadd.o \ - generic/bignum_moddouble.o \ - generic/bignum_modexp.o \ - generic/bignum_modifier.o \ - generic/bignum_modinv.o \ - generic/bignum_modoptneg.o \ - generic/bignum_modsub.o \ - generic/bignum_montifier.o \ - generic/bignum_montmul.o \ - generic/bignum_montredc.o \ - generic/bignum_montsqr.o \ - generic/bignum_mul.o \ - generic/bignum_muladd10.o \ - generic/bignum_mux.o \ - generic/bignum_mux16.o \ - generic/bignum_negmodinv.o \ - generic/bignum_nonzero.o \ - generic/bignum_normalize.o \ - generic/bignum_odd.o \ - generic/bignum_of_word.o \ - generic/bignum_optadd.o \ - generic/bignum_optneg.o \ - generic/bignum_optsub.o \ - generic/bignum_optsubadd.o \ - generic/bignum_pow2.o \ - generic/bignum_shl_small.o \ - generic/bignum_shr_small.o \ - generic/bignum_sqr.o \ - generic/bignum_sub.o \ - generic/word_bytereverse.o \ - generic/word_clz.o \ - generic/word_ctz.o \ - generic/word_divstep59.o \ - generic/word_max.o \ - generic/word_min.o \ - generic/word_negmodinv.o \ - generic/word_popcount.o \ - generic/word_recip.o \ - p256/bignum_add_p256.o \ - p256/bignum_bigendian_4.o \ - p256/bignum_cmul_p256.o \ - p256/bignum_deamont_p256.o \ - p256/bignum_demont_p256.o \ - p256/bignum_double_p256.o \ - p256/bignum_half_p256.o \ - p256/bignum_inv_p256.o \ - p256/bignum_littleendian_4.o \ - p256/bignum_mod_n256.o \ - p256/bignum_mod_n256_4.o \ - p256/bignum_mod_p256.o \ - p256/bignum_mod_p256_4.o \ - p256/bignum_montinv_p256.o \ - p256/bignum_montmul_p256.o \ - p256/bignum_montmul_p256_alt.o \ - p256/bignum_montmul_p256_neon.o \ - p256/bignum_montsqr_p256.o \ - p256/bignum_montsqr_p256_alt.o \ - p256/bignum_montsqr_p256_neon.o \ - p256/bignum_mux_4.o \ - p256/bignum_neg_p256.o \ - p256/bignum_nonzero_4.o \ - p256/bignum_optneg_p256.o \ - p256/bignum_sub_p256.o \ - p256/bignum_tomont_p256.o \ - p256/bignum_triple_p256.o \ - p384/bignum_add_p384.o \ - p384/bignum_bigendian_6.o \ - p384/bignum_cmul_p384.o \ - p384/bignum_deamont_p384.o \ - p384/bignum_demont_p384.o \ - p384/bignum_double_p384.o \ - p384/bignum_half_p384.o \ - p384/bignum_inv_p384.o \ - p384/bignum_littleendian_6.o \ - p384/bignum_mod_n384.o \ - p384/bignum_mod_n384_6.o \ - p384/bignum_mod_p384.o \ - p384/bignum_mod_p384_6.o \ - p384/bignum_montinv_p384.o \ - p384/bignum_montmul_p384.o \ - p384/bignum_montmul_p384_alt.o \ - p384/bignum_montmul_p384_neon.o \ - p384/bignum_montsqr_p384.o \ - p384/bignum_montsqr_p384_alt.o \ - p384/bignum_montsqr_p384_neon.o \ - p384/bignum_mux_6.o \ - p384/bignum_neg_p384.o \ - p384/bignum_nonzero_6.o \ - p384/bignum_optneg_p384.o \ - p384/bignum_sub_p384.o \ - p384/bignum_tomont_p384.o \ - p384/bignum_triple_p384.o \ - p521/bignum_add_p521.o \ - p521/bignum_cmul_p521.o \ - p521/bignum_deamont_p521.o \ - p521/bignum_demont_p521.o \ - p521/bignum_double_p521.o \ - p521/bignum_fromlebytes_p521.o \ - p521/bignum_half_p521.o \ - p521/bignum_inv_p521.o \ - p521/bignum_mod_n521_9.o \ - p521/bignum_mod_p521_9.o \ - p521/bignum_montmul_p521.o \ - p521/bignum_montmul_p521_alt.o \ - p521/bignum_montmul_p521_neon.o \ - p521/bignum_montsqr_p521.o \ - p521/bignum_montsqr_p521_alt.o \ - p521/bignum_montsqr_p521_neon.o \ - p521/bignum_mul_p521.o \ - p521/bignum_mul_p521_alt.o \ - p521/bignum_mul_p521_neon.o \ - p521/bignum_neg_p521.o \ - p521/bignum_optneg_p521.o \ - p521/bignum_sqr_p521.o \ - p521/bignum_sqr_p521_alt.o \ - p521/bignum_sqr_p521_neon.o \ - p521/bignum_sub_p521.o \ - p521/bignum_tolebytes_p521.o \ - p521/bignum_tomont_p521.o \ - p521/bignum_triple_p521.o \ - secp256k1/bignum_add_p256k1.o \ - secp256k1/bignum_cmul_p256k1.o \ - secp256k1/bignum_deamont_p256k1.o \ - secp256k1/bignum_demont_p256k1.o \ - secp256k1/bignum_double_p256k1.o \ - secp256k1/bignum_half_p256k1.o \ - secp256k1/bignum_mod_n256k1_4.o \ - secp256k1/bignum_mod_p256k1_4.o \ - secp256k1/bignum_montmul_p256k1.o \ - secp256k1/bignum_montmul_p256k1_alt.o \ - secp256k1/bignum_montsqr_p256k1.o \ - secp256k1/bignum_montsqr_p256k1_alt.o \ - secp256k1/bignum_mul_p256k1.o \ - secp256k1/bignum_mul_p256k1_alt.o \ - secp256k1/bignum_neg_p256k1.o \ - secp256k1/bignum_optneg_p256k1.o \ - secp256k1/bignum_sqr_p256k1.o \ - secp256k1/bignum_sqr_p256k1_alt.o \ - secp256k1/bignum_sub_p256k1.o \ - secp256k1/bignum_tomont_p256k1.o \ - secp256k1/bignum_triple_p256k1.o \ - sm2/bignum_add_sm2.o \ - sm2/bignum_cmul_sm2.o \ - sm2/bignum_deamont_sm2.o \ - sm2/bignum_demont_sm2.o \ - sm2/bignum_double_sm2.o \ - sm2/bignum_half_sm2.o \ - sm2/bignum_inv_sm2.o \ - sm2/bignum_mod_nsm2.o \ - sm2/bignum_mod_nsm2_4.o \ - sm2/bignum_mod_sm2.o \ - sm2/bignum_mod_sm2_4.o \ - sm2/bignum_montinv_sm2.o \ - sm2/bignum_montmul_sm2.o \ - sm2/bignum_montmul_sm2_alt.o \ - sm2/bignum_montsqr_sm2.o \ - sm2/bignum_montsqr_sm2_alt.o \ - sm2/bignum_neg_sm2.o \ - sm2/bignum_optneg_sm2.o \ - sm2/bignum_sub_sm2.o \ - sm2/bignum_tomont_sm2.o \ - sm2/bignum_triple_sm2.o - -UNOPT_OBJ = p256/unopt/p256_montjadd.o \ - p256/unopt/p256_montjdouble.o \ - p384/unopt/p384_montjadd.o \ - p384/unopt/p384_montjdouble.o \ - fastmul/unopt/bignum_emontredc_8n_cdiff_base.o - -OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) +OBJ = mlkem/mlkem_ntt.o mlkem/mlkem_intt.o # According to # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, # x18 should not be used for Apple platforms. Check this using grep. - -$(OBJ): %.o : $(SRC_ARM)/%.S +$(OBJ): %.o : %.S $(Q)[ -d $(@D) ] || mkdir -p $(@D) cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -o $@ - $(OBJDUMP) $@ | ( ( ! grep --ignore-case -E 'w18|[^0]x18' ) || ( rm $@ ; exit 1 ) ) cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ - -libs2nbignum.a: $(OBJ) ; ar -rc libs2nbignum.a $(OBJ) - -clean:; rm -f libs2nbignum.a */*.o */*/*.o */*.correct */*.native +clean:; rm -f */*.o */*/*.o */*.correct */*.native # Proof-related parts # @@ -420,9 +89,11 @@ clean:; rm -f libs2nbignum.a */*.o */*/*.o */*.correct */*.native # in your home directory, and do "make" inside the subdirectory hol-light, # then the following HOLDIR setting should be right: -HOLDIR?=$(HOME)/hol-light +HOLDIR?=$(HOLLIGHTDIR) HOLLIGHT:=$(HOLDIR)/hol.sh +BASE?=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + PROOF_BINS = $(OBJ:.o=.native) PROOF_LOGS = $(OBJ:.o=.correct) @@ -433,65 +104,25 @@ proofs/simulator.native: $(SRC_ARM)/proofs/simulator.ml $(SRC)/tools/build-proof.sh $(SRC_ARM)/proofs/simulator.ml "$(HOLLIGHT)" "$@" .SECONDEXPANSION: -%.native: $(SRC_ARM)/proofs/$$(*F).ml %.o ; $(SRC)/tools/build-proof.sh $< "$(HOLLIGHT)" "$@" +%.native: proofs/$$(*F).ml %.o ; $(SRC)/tools/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@" # Run them and print the standard output+error at *.correct - -%.correct: %.native ; $(SRC)/tools/run-proof.sh "$<" "$@" - -# Cases where a proof uses other proofs for lemmas and/or subroutines - -p256/bignum_montmul_p256_neon.native: p256/bignum_montmul_p256.native -p384/bignum_montmul_p384_neon.native: p384/bignum_montmul_p384.native -p521/bignum_montmul_p521_neon.native: p521/bignum_montmul_p521.native -p256/bignum_montsqr_p256_neon.native: p256/bignum_montsqr_p256.native -p384/bignum_montsqr_p384_neon.native: p384/bignum_montsqr_p384.native -p521/bignum_montsqr_p521_neon.native: p521/bignum_montsqr_p521.native -p521/bignum_mul_p521_neon.native: p521/bignum_mul_p521.native -p521/bignum_sqr_p521_neon.native: p521/bignum_sqr_p521.native -fastmul/bignum_mul_8_16_neon.native: fastmul/bignum_mul_8_16.native -fastmul/bignum_sqr_8_16_neon.native: fastmul/bignum_sqr_8_16.native -curve25519/curve25519_x25519.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_byte.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_byte_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_byte.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_byte_alt.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmulbase.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmulbase_alt.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmuldouble.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmuldouble_alt.native: curve25519/bignum_inv_p25519.native -generic/bignum_modexp.native: generic/bignum_amontifier.native generic/bignum_amontmul.native generic/bignum_demont.native generic/bignum_mux.native -p256/p256_montjadd.native: p256/unopt/p256_montjadd.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native -p256/p256_montjdouble.native: p256/unopt/p256_montjdouble.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native p256/bignum_add_p256.native -p256/p256_montjscalarmul.native: p256/p256_montjadd.native p256/p256_montjdouble.native -p256/p256_montjscalarmul_alt.native: p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native -p256/p256_scalarmul.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/bignum_tomont_p256.native p256/p256_montjadd.native p256/p256_montjdouble.native p256/p256_montjmixadd.native -p256/p256_scalarmul_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native p256/p256_montjmixadd_alt.native -p256/p256_scalarmulbase.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd.native -p256/p256_scalarmulbase_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd_alt.native -p384/p384_montjadd.native: p384/unopt/p384_montjadd.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native -p384/p384_montjdouble.native: p384/unopt/p384_montjdouble.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native p384/bignum_add_p384.native -p384/p384_montjscalarmul.native: \ - p384/p384_montjadd.native p384/p384_montjdouble.native \ - p384/bignum_sub_p384.native p384/bignum_add_p384.native -p384/p384_montjscalarmul_alt.native: p384/p384_montjadd_alt.native p384/p384_montjdouble_alt.native -p521/p521_jadd.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native -p521/p521_jdouble.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native -p521/p521_jscalarmul.native: p521/bignum_mod_n521_9.native p521/p521_jadd.native p521/p521_jdouble.native -p521/p521_jscalarmul_alt.native: p521/bignum_mod_n521_9.native -sm2/sm2_montjscalarmul.native: sm2/sm2_montjadd.native sm2/sm2_montjdouble.native -sm2/sm2_montjscalarmul_alt.native: sm2/sm2_montjadd_alt.native sm2/sm2_montjdouble_alt.native - -unopt: $(UNOPT_OBJ) - -build_proofs: $(UNOPT_OBJ) $(PROOF_BINS); +%.correct: %.native + $< 2>&1 | tee $@ + @if (grep -i "error:\|exception:" "$@" >/dev/null); then \ + echo "$< had errors!"; \ + exit 1; \ + else \ + echo "$< OK"; \ + fi + +build_proofs: $(PROOF_BINS); run_proofs: build_proofs $(PROOF_LOGS); proofs: run_proofs ; $(SRC)/tools/count-proofs.sh . +.PHONY: proofs build_proofs run_proofs sematest clean + # Always run sematest regardless of dependency check FORCE: ; # Always use max. # of cores because in Makefile one cannot get the passed number of -j. diff --git a/proofs/hol_light/arm/README.md b/proofs/hol_light/arm/README.md new file mode 100644 index 000000000..797988593 --- /dev/null +++ b/proofs/hol_light/arm/README.md @@ -0,0 +1,39 @@ +[//]: # (SPDX-License-Identifier: CC-BY-4.0) + +# HOL Light functional correctness proofs + +This directory contains functional correctness proofs for some optimized +ML-KEM AArch64 assembly routines. The proofs were developed by John Harrison +and are written in the [HOL Light](https://hol-light.github.io/) theorem +prover, utilizing the assembly verification infrastructure from [s2n-bignum](https://github.com/awslabs/s2n-bignum). + +Each function is proved in a separate `.ml` file in [proofs/](proofs). Each file +contains the byte code being verified, as well as the specification that is being +proved. Specifications are essentially Hoare triples, with the noteworthy difference +that the program is implicit as the content of memory at the PC; which is asserted to +be the code under verification as part of the precondition. + +## Running the proofs + +To reproduce the proofs, enter the nix shell via + +```bash +nix develop --experimental-features 'nix-command flakes' +``` + +from mlkem-native's base directory. Then + +```bash +make -C proofs/hol_light/arm +``` + +will build and run the proofs. Note that this make take hours even on powerful machines. + +## What is covered? + +At present, this directory contains functional correctness proofs for the following functions: + +- Optimized AArch64 forward NTT: [mlkem_ntt.S](mlkem/mlkem_ntt.S) +- Optimized AArch64 inverse NTT: [mlkem_intt.S](mlkem/mlkem_intt.S) + +Both functions are super-optimized using [SLOTHY](https://github.com/slothy-optimizer/slothy/). diff --git a/proofs/hol_light/arm/mlkem/mlkem_intt.S b/proofs/hol_light/arm/mlkem/mlkem_intt.S new file mode 100644 index 000000000..ee0322c5f --- /dev/null +++ b/proofs/hol_light/arm/mlkem/mlkem_intt.S @@ -0,0 +1,416 @@ +// Copyright (c) 2022 Arm Limited +// Copyright (c) 2022 Hanno Becker +// Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +// Copyright (c) 2024 The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// ---------------------------------------------------------------------------- +// Inverse number-theoretic transform from ML-KEM +// Input a[256], z_01234[80], z_56[384] (all signed 16-bit words); output a[256] (signed 16-bit words). +// +// The transform is in-place with input and output a[256], with the input in +// bitreversed order and the output mapped into the Montgomery domain via +// x |-> (2^16 * x) mod 3329. The two other parameters are expected to point to +// tables of constants whose definitions can be found in the mlkem-native +// repo (mlkem/native/aarch64/src/aarch64_zetas.c) or our "tests/test.c". +// +// extern void mlkem_intt(int16_t a[256],int16_t z_01234[80],int16_t z_56[384]); +// +// Standard ARM ABI: X0 = a, X1 = z_01234, X2 = z_56 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(mlkem_intt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(mlkem_intt) + .text + .balign 4 + +S2N_BN_SYMBOL(mlkem_intt): + +// This implementation is generated by SLOTHY, set up to optimize for +// the Neoverse N1 microarchitecture, starting from the clean version +// +// https://github.com/pq-code-package/mlkem-native/blob/main/mlkem/native/aarch64/intt_clean.S +// +// in the mlkem-native repository. + +PQCP_MLKEM_NATIVE_MLKEM512_intt_asm_opt: + sub sp, sp, #0x40 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov w5, #0xd01 + mov v7.h[0], w5 + mov w5, #0x4ebf + mov v7.h[1], w5 + mov w5, #0x200 + dup v29.8h, w5 + mov w5, #0x13b0 + dup v30.8h, w5 + mov x3, x0 + mov x4, #0x8 + +scale_start: + ldr q8, [x3] + ldr q9, [x3, #16] + ldr q10, [x3, #32] + ldr q11, [x3, #48] + sqrdmulh v27.8h, v8.8h, v30.8h + mul v8.8h, v8.8h, v29.8h + mls v8.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v9.8h, v30.8h + mul v9.8h, v9.8h, v29.8h + mls v9.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v10.8h, v30.8h + mul v10.8h, v10.8h, v29.8h + mls v10.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v11.8h, v30.8h + mul v11.8h, v11.8h, v29.8h + mls v11.8h, v27.8h, v7.h[0] + str q8, [x3], #64 + stur q9, [x3, #-48] + stur q10, [x3, #-32] + stur q11, [x3, #-16] + subs x4, x4, #0x1 + cbnz x4, scale_start + mov x3, x0 + mov x4, #0x8 + ldr q26, [x3] + ldr q8, [x3, #16] + ldr q24, [x3, #32] + ldr q16, [x3, #48] + ldr q9, [x2], #96 + trn1 v0.4s, v24.4s, v16.4s + ldur q6, [x2, #-80] + ldur q3, [x2, #-64] + ldur q15, [x2, #-48] + ldur q4, [x2, #-32] + ldur q28, [x2, #-16] + sub x4, x4, #0x1 + +layer3456_start: + trn1 v12.4s, v26.4s, v8.4s + trn2 v26.4s, v26.4s, v8.4s + trn2 v8.4s, v24.4s, v16.4s + trn2 v11.2d, v12.2d, v0.2d + trn1 v12.2d, v12.2d, v0.2d + trn2 v16.2d, v26.2d, v8.2d + trn1 v26.2d, v26.2d, v8.2d + sub v8.8h, v11.8h, v16.8h + add v11.8h, v11.8h, v16.8h + sub v16.8h, v12.8h, v26.8h + add v12.8h, v12.8h, v26.8h + sqrdmulh v26.8h, v8.8h, v28.8h + sqrdmulh v15.8h, v16.8h, v15.8h + mul v16.8h, v16.8h, v3.8h + mul v8.8h, v8.8h, v4.8h + sub v0.8h, v12.8h, v11.8h + add v12.8h, v12.8h, v11.8h + mls v16.8h, v15.8h, v7.h[0] + mls v8.8h, v26.8h, v7.h[0] + sqrdmulh v26.8h, v0.8h, v6.8h + mul v11.8h, v0.8h, v9.8h + ldr q15, [x1], #16 + sub v0.8h, v16.8h, v8.8h + mls v11.8h, v26.8h, v7.h[0] + add v26.8h, v16.8h, v8.8h + sqrdmulh v8.8h, v0.8h, v6.8h + mul v16.8h, v0.8h, v9.8h + trn1 v0.4s, v12.4s, v26.4s + trn2 v12.4s, v12.4s, v26.4s + ldr q26, [x3, #64] + mls v16.8h, v8.8h, v7.h[0] + ldr q8, [x3, #80] + ldr q24, [x3, #96] + trn1 v9.4s, v11.4s, v16.4s + trn2 v11.4s, v11.4s, v16.4s + ldr q16, [x3, #112] + trn2 v6.2d, v0.2d, v9.2d + trn2 v3.2d, v12.2d, v11.2d + trn1 v0.2d, v0.2d, v9.2d + trn1 v12.2d, v12.2d, v11.2d + sub v11.8h, v6.8h, v3.8h + sub v9.8h, v0.8h, v12.8h + add v12.8h, v0.8h, v12.8h + sqrdmulh v0.8h, v11.8h, v15.h[5] + sqrdmulh v4.8h, v9.8h, v15.h[3] + mul v9.8h, v9.8h, v15.h[2] + mul v11.8h, v11.8h, v15.h[4] + add v6.8h, v6.8h, v3.8h + sqdmulh v3.8h, v12.8h, v7.h[1] + mls v9.8h, v4.8h, v7.h[0] + mls v11.8h, v0.8h, v7.h[0] + sqdmulh v0.8h, v6.8h, v7.h[1] + srshr v3.8h, v3.8h, #11 + sqdmulh v4.8h, v9.8h, v7.h[1] + sqdmulh v28.8h, v11.8h, v7.h[1] + mls v12.8h, v3.8h, v7.h[0] + srshr v0.8h, v0.8h, #11 + srshr v3.8h, v4.8h, #11 + srshr v4.8h, v28.8h, #11 + mls v6.8h, v0.8h, v7.h[0] + mls v9.8h, v3.8h, v7.h[0] + mls v11.8h, v4.8h, v7.h[0] + trn1 v0.4s, v24.4s, v16.4s + sub v3.8h, v12.8h, v6.8h + add v12.8h, v12.8h, v6.8h + sub v6.8h, v9.8h, v11.8h + sqrdmulh v4.8h, v3.8h, v15.h[1] + mul v3.8h, v3.8h, v15.h[0] + sqrdmulh v28.8h, v6.8h, v15.h[1] + mul v15.8h, v6.8h, v15.h[0] + add v11.8h, v9.8h, v11.8h + mls v3.8h, v4.8h, v7.h[0] + str q12, [x3], #64 + mls v15.8h, v28.8h, v7.h[0] + stur q11, [x3, #-48] + ldr q9, [x2], #96 + stur q3, [x3, #-32] + ldur q6, [x2, #-80] + stur q15, [x3, #-16] + ldur q3, [x2, #-64] + ldur q15, [x2, #-48] + ldur q4, [x2, #-32] + ldur q28, [x2, #-16] + sub x4, x4, #0x1 + cbnz x4, layer3456_start + trn1 v11.4s, v26.4s, v8.4s + trn2 v24.4s, v24.4s, v16.4s + trn2 v26.4s, v26.4s, v8.4s + trn1 v18.2d, v11.2d, v0.2d + trn2 v11.2d, v11.2d, v0.2d + trn2 v12.2d, v26.2d, v24.2d + trn1 v8.2d, v26.2d, v24.2d + sub v26.8h, v11.8h, v12.8h + sub v13.8h, v18.8h, v8.8h + add v24.8h, v18.8h, v8.8h + mul v16.8h, v26.8h, v4.8h + sqrdmulh v17.8h, v13.8h, v15.8h + mul v3.8h, v13.8h, v3.8h + sqrdmulh v26.8h, v26.8h, v28.8h + add v10.8h, v11.8h, v12.8h + mls v3.8h, v17.8h, v7.h[0] + mls v16.8h, v26.8h, v7.h[0] + sub v26.8h, v24.8h, v10.8h + ldr q4, [x1], #16 + sub v12.8h, v3.8h, v16.8h + sqrdmulh v15.8h, v26.8h, v6.8h + mul v11.8h, v26.8h, v9.8h + mul v8.8h, v12.8h, v9.8h + sqrdmulh v12.8h, v12.8h, v6.8h + add v0.8h, v24.8h, v10.8h + mls v11.8h, v15.8h, v7.h[0] + add v6.8h, v3.8h, v16.8h + mls v8.8h, v12.8h, v7.h[0] + trn2 v26.4s, v0.4s, v6.4s + trn2 v12.4s, v11.4s, v8.4s + trn1 v3.4s, v11.4s, v8.4s + trn1 v17.4s, v0.4s, v6.4s + trn1 v8.2d, v26.2d, v12.2d + trn2 v13.2d, v26.2d, v12.2d + trn1 v11.2d, v17.2d, v3.2d + trn2 v15.2d, v17.2d, v3.2d + sub v12.8h, v11.8h, v8.8h + add v16.8h, v15.8h, v13.8h + sub v26.8h, v15.8h, v13.8h + mul v0.8h, v12.8h, v4.h[2] + sqrdmulh v9.8h, v12.8h, v4.h[3] + mul v13.8h, v26.8h, v4.h[4] + sqrdmulh v26.8h, v26.8h, v4.h[5] + add v24.8h, v11.8h, v8.8h + mls v0.8h, v9.8h, v7.h[0] + sqdmulh v12.8h, v16.8h, v7.h[1] + mls v13.8h, v26.8h, v7.h[0] + sqdmulh v11.8h, v24.8h, v7.h[1] + sqdmulh v8.8h, v0.8h, v7.h[1] + srshr v12.8h, v12.8h, #11 + sqdmulh v26.8h, v13.8h, v7.h[1] + srshr v11.8h, v11.8h, #11 + mls v16.8h, v12.8h, v7.h[0] + srshr v8.8h, v8.8h, #11 + srshr v26.8h, v26.8h, #11 + mls v24.8h, v11.8h, v7.h[0] + mls v0.8h, v8.8h, v7.h[0] + mls v13.8h, v26.8h, v7.h[0] + sub v26.8h, v24.8h, v16.8h + add v15.8h, v24.8h, v16.8h + sub v12.8h, v0.8h, v13.8h + mul v11.8h, v26.8h, v4.h[0] + sqrdmulh v16.8h, v26.8h, v4.h[1] + mul v26.8h, v12.8h, v4.h[0] + sqrdmulh v8.8h, v12.8h, v4.h[1] + add v12.8h, v0.8h, v13.8h + mls v11.8h, v16.8h, v7.h[0] + str q15, [x3], #64 + mls v26.8h, v8.8h, v7.h[0] + stur q12, [x3, #-48] + stur q11, [x3, #-32] + stur q26, [x3, #-16] + mov x4, #0x4 + ldr q0, [x1], #32 + ldur q1, [x1, #-16] + ldr q24, [x0, #128] + ldr q16, [x0, #192] + ldr q9, [x0, #256] + ldr q6, [x0, #320] + ldr q3, [x0, #384] + ldr q4, [x0, #448] + add v28.8h, v9.8h, v6.8h + add v19.8h, v24.8h, v16.8h + add v13.8h, v3.8h, v4.8h + ldr q11, [x0] + add v23.8h, v28.8h, v13.8h + ldr q15, [x0, #64] + sub x4, x4, #0x1 + +layer012_start: + sub v12.8h, v11.8h, v15.8h + add v26.8h, v11.8h, v15.8h + sub v8.8h, v24.8h, v16.8h + sqrdmulh v11.8h, v12.8h, v0.h[7] + mul v12.8h, v12.8h, v0.h[6] + sub v16.8h, v26.8h, v19.8h + add v26.8h, v26.8h, v19.8h + sqrdmulh v15.8h, v8.8h, v1.h[1] + mul v8.8h, v8.8h, v1.h[0] + mls v12.8h, v11.8h, v7.h[0] + sub v11.8h, v9.8h, v6.8h + sqrdmulh v24.8h, v16.8h, v0.h[3] + mul v16.8h, v16.8h, v0.h[2] + sub v9.8h, v26.8h, v23.8h + add v26.8h, v26.8h, v23.8h + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v15.8h, v11.8h, v1.h[3] + mul v11.8h, v11.8h, v1.h[2] + sub v6.8h, v3.8h, v4.8h + sub v3.8h, v12.8h, v8.8h + add v12.8h, v12.8h, v8.8h + mls v11.8h, v15.8h, v7.h[0] + sqrdmulh v8.8h, v6.8h, v1.h[5] + mls v16.8h, v24.8h, v7.h[0] + mul v15.8h, v6.8h, v1.h[4] + sqrdmulh v24.8h, v3.8h, v0.h[3] + mul v6.8h, v3.8h, v0.h[2] + sqrdmulh v3.8h, v9.8h, v0.h[1] + mul v9.8h, v9.8h, v0.h[0] + str q26, [x0], #16 + mls v15.8h, v8.8h, v7.h[0] + mls v6.8h, v24.8h, v7.h[0] + sub v26.8h, v28.8h, v13.8h + mls v9.8h, v3.8h, v7.h[0] + sub v8.8h, v11.8h, v15.8h + sqrdmulh v24.8h, v26.8h, v0.h[5] + mul v26.8h, v26.8h, v0.h[4] + add v11.8h, v11.8h, v15.8h + sqrdmulh v15.8h, v8.8h, v0.h[5] + mul v8.8h, v8.8h, v0.h[4] + mls v26.8h, v24.8h, v7.h[0] + sub v24.8h, v12.8h, v11.8h + add v12.8h, v12.8h, v11.8h + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v11.8h, v24.8h, v0.h[1] + mul v15.8h, v24.8h, v0.h[0] + sub v24.8h, v16.8h, v26.8h + add v26.8h, v16.8h, v26.8h + sub v16.8h, v6.8h, v8.8h + mls v15.8h, v11.8h, v7.h[0] + sqrdmulh v11.8h, v24.8h, v0.h[1] + mul v24.8h, v24.8h, v0.h[0] + add v8.8h, v6.8h, v8.8h + sqrdmulh v6.8h, v16.8h, v0.h[1] + mul v16.8h, v16.8h, v0.h[0] + mls v24.8h, v11.8h, v7.h[0] + str q9, [x0, #240] + ldr q11, [x0] + mls v16.8h, v6.8h, v7.h[0] + str q15, [x0, #304] + ldr q15, [x0, #64] + str q24, [x0, #368] + ldr q24, [x0, #128] + str q16, [x0, #432] + ldr q16, [x0, #192] + str q12, [x0, #48] + ldr q9, [x0, #256] + ldr q6, [x0, #320] + ldr q3, [x0, #384] + ldr q4, [x0, #448] + str q26, [x0, #112] + add v28.8h, v9.8h, v6.8h + add v13.8h, v3.8h, v4.8h + str q8, [x0, #176] + add v19.8h, v24.8h, v16.8h + add v23.8h, v28.8h, v13.8h + sub x4, x4, #0x1 + cbnz x4, layer012_start + add v10.8h, v11.8h, v15.8h + sub v12.8h, v28.8h, v13.8h + sub v11.8h, v11.8h, v15.8h + sub v22.8h, v10.8h, v19.8h + mul v18.8h, v12.8h, v0.h[4] + sqrdmulh v26.8h, v12.8h, v0.h[5] + sqrdmulh v12.8h, v22.8h, v0.h[3] + mul v13.8h, v22.8h, v0.h[2] + sub v31.8h, v24.8h, v16.8h + sqrdmulh v22.8h, v11.8h, v0.h[7] + mls v18.8h, v26.8h, v7.h[0] + mls v13.8h, v12.8h, v7.h[0] + sqrdmulh v2.8h, v31.8h, v1.h[1] + mul v5.8h, v31.8h, v1.h[0] + mul v15.8h, v11.8h, v0.h[6] + sub v12.8h, v13.8h, v18.8h + sub v4.8h, v3.8h, v4.8h + mls v5.8h, v2.8h, v7.h[0] + sqrdmulh v26.8h, v12.8h, v0.h[1] + mul v12.8h, v12.8h, v0.h[0] + mls v15.8h, v22.8h, v7.h[0] + sqrdmulh v8.8h, v4.8h, v1.h[5] + mul v4.8h, v4.8h, v1.h[4] + mls v12.8h, v26.8h, v7.h[0] + sub v21.8h, v15.8h, v5.8h + sub v28.8h, v9.8h, v6.8h + mls v4.8h, v8.8h, v7.h[0] + mul v24.8h, v21.8h, v0.h[2] + sqrdmulh v8.8h, v21.8h, v0.h[3] + sqrdmulh v6.8h, v28.8h, v1.h[3] + add v19.8h, v10.8h, v19.8h + mul v28.8h, v28.8h, v1.h[2] + mls v24.8h, v8.8h, v7.h[0] + sub v11.8h, v19.8h, v23.8h + str q12, [x0, #384] + mls v28.8h, v6.8h, v7.h[0] + sqrdmulh v16.8h, v11.8h, v0.h[1] + mul v9.8h, v11.8h, v0.h[0] + add v6.8h, v15.8h, v5.8h + add v26.8h, v28.8h, v4.8h + sub v15.8h, v28.8h, v4.8h + mls v9.8h, v16.8h, v7.h[0] + add v3.8h, v6.8h, v26.8h + mul v8.8h, v15.8h, v0.h[4] + sqrdmulh v15.8h, v15.8h, v0.h[5] + str q9, [x0, #256] + sub v2.8h, v6.8h, v26.8h + str q3, [x0, #64] + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v15.8h, v2.8h, v0.h[1] + mul v11.8h, v2.8h, v0.h[0] + add v16.8h, v13.8h, v18.8h + sub v12.8h, v24.8h, v8.8h + add v8.8h, v24.8h, v8.8h + mls v11.8h, v15.8h, v7.h[0] + sqrdmulh v26.8h, v12.8h, v0.h[1] + mul v12.8h, v12.8h, v0.h[0] + str q8, [x0, #192] + add v15.8h, v19.8h, v23.8h + str q11, [x0, #320] + mls v12.8h, v26.8h, v7.h[0] + str q15, [x0], #16 + str q16, [x0, #112] + str q12, [x0, #432] + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #0x40 + ret diff --git a/proofs/hol_light/arm/mlkem/mlkem_ntt.S b/proofs/hol_light/arm/mlkem/mlkem_ntt.S new file mode 100644 index 000000000..e377f81b4 --- /dev/null +++ b/proofs/hol_light/arm/mlkem/mlkem_ntt.S @@ -0,0 +1,358 @@ +// Copyright (c) 2022 Arm Limited +// Copyright (c) 2022 Hanno Becker +// Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +// Copyright (c) 2024 The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// ---------------------------------------------------------------------------- +// Forward number-theoretic transform from ML-KEM +// Input a[256], z_01234[80], z_56[384] (all signed 16-bit words); output a[256] (signed 16-bit words). +// +// The transform is in-place with input and output a[256], with the output +// in bitreversed order. The two other parameters are expected to point to +// tables of constants whose definitions can be found in the mlkem-native +// repo (mlkem/native/aarch64/src/aarch64_zetas.c) or our "tests/test.c". +// +// extern void mlkem_ntt(int16_t a[256],int16_t z_01234[80],int16_t z_56[384]); +// +// Standard ARM ABI: X0 = a, X1 = z_01234, X2 = z_56 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(mlkem_ntt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(mlkem_ntt) + .text + .balign 4 + +S2N_BN_SYMBOL(mlkem_ntt): + +// This implementation is generated by SLOTHY, set up to optimize for +// the Neoverse N1 microarchitecture, starting from the clean version +// +// https://github.com/pq-code-package/mlkem-native/blob/main/mlkem/native/aarch64/src/ntt_clean.S +// +// in the mlkem-native repository. + +PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt: + sub sp, sp, #0x40 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov w5, #0xd01 + mov v7.h[0], w5 + mov w5, #0x4ebf + mov v7.h[1], w5 + mov x3, x0 + mov x4, #0x4 + ldr q0, [x1], #32 + ldur q1, [x1, #-16] + ldr q21, [x0] + ldr q26, [x0, #64] + ldr q29, [x0, #128] + ldr q20, [x0, #192] + ldr q23, [x0, #256] + ldr q11, [x0, #448] + mul v2.8h, v23.8h, v0.h[0] + ldr q17, [x0, #320] + mul v15.8h, v11.8h, v0.h[0] + ldr q13, [x0, #384] + sub x4, x4, #0x1 + sqrdmulh v14.8h, v23.8h, v0.h[1] + sqrdmulh v23.8h, v17.8h, v0.h[1] + mul v17.8h, v17.8h, v0.h[0] + sqrdmulh v28.8h, v13.8h, v0.h[1] + mls v2.8h, v14.8h, v7.h[0] + mul v14.8h, v13.8h, v0.h[0] + mls v17.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v11.8h, v0.h[1] + sub v11.8h, v21.8h, v2.8h + mls v14.8h, v28.8h, v7.h[0] + sub v28.8h, v26.8h, v17.8h + add v17.8h, v26.8h, v17.8h + add v2.8h, v21.8h, v2.8h + sub v13.8h, v29.8h, v14.8h + add v14.8h, v29.8h, v14.8h + mls v15.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v13.8h, v0.h[5] + mul v13.8h, v13.8h, v0.h[4] + sqrdmulh v21.8h, v14.8h, v0.h[3] + sub v26.8h, v20.8h, v15.8h + add v15.8h, v20.8h, v15.8h + mls v13.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v26.8h, v0.h[5] + mul v26.8h, v26.8h, v0.h[4] + mul v14.8h, v14.8h, v0.h[2] + sub v29.8h, v11.8h, v13.8h + add v11.8h, v11.8h, v13.8h + mls v26.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v15.8h, v0.h[3] + mul v13.8h, v15.8h, v0.h[2] + mls v14.8h, v21.8h, v7.h[0] + sub v15.8h, v28.8h, v26.8h + add v28.8h, v28.8h, v26.8h + mls v13.8h, v23.8h, v7.h[0] + sub v23.8h, v2.8h, v14.8h + add v14.8h, v2.8h, v14.8h + sqrdmulh v2.8h, v28.8h, v1.h[3] + sub v21.8h, v17.8h, v13.8h + add v17.8h, v17.8h, v13.8h + mul v28.8h, v28.8h, v1.h[2] + sqrdmulh v13.8h, v21.8h, v1.h[1] + sqrdmulh v26.8h, v17.8h, v0.h[7] + mul v17.8h, v17.8h, v0.h[6] + mul v21.8h, v21.8h, v1.h[0] + mls v28.8h, v2.8h, v7.h[0] + sqrdmulh v2.8h, v15.8h, v1.h[5] + mls v17.8h, v26.8h, v7.h[0] + mls v21.8h, v13.8h, v7.h[0] + sub v13.8h, v11.8h, v28.8h + add v28.8h, v11.8h, v28.8h + sub v11.8h, v14.8h, v17.8h + mul v15.8h, v15.8h, v1.h[4] + add v14.8h, v14.8h, v17.8h + sub v17.8h, v23.8h, v21.8h + add v23.8h, v23.8h, v21.8h + mls v15.8h, v2.8h, v7.h[0] + str q14, [x0], #16 + ldr q21, [x0] + sub v14.8h, v29.8h, v15.8h + add v2.8h, v29.8h, v15.8h + str q11, [x0, #48] + ldr q26, [x0, #64] + str q23, [x0, #112] + ldr q29, [x0, #128] + str q17, [x0, #176] + ldr q20, [x0, #192] + str q28, [x0, #240] + ldr q23, [x0, #256] + str q13, [x0, #304] + ldr q17, [x0, #320] + str q2, [x0, #368] + mul v2.8h, v23.8h, v0.h[0] + str q14, [x0, #432] + ldr q11, [x0, #448] + ldr q13, [x0, #384] + mul v15.8h, v11.8h, v0.h[0] + sub x4, x4, #0x1 + cbnz x4, PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt+0x60 + sqrdmulh v27.8h, v11.8h, v0.h[1] + mul v8.8h, v13.8h, v0.h[0] + sqrdmulh v22.8h, v13.8h, v0.h[1] + mul v11.8h, v17.8h, v0.h[0] + mls v15.8h, v27.8h, v7.h[0] + sqrdmulh v28.8h, v17.8h, v0.h[1] + mls v8.8h, v22.8h, v7.h[0] + sqrdmulh v5.8h, v23.8h, v0.h[1] + add v16.8h, v20.8h, v15.8h + mls v11.8h, v28.8h, v7.h[0] + sub v6.8h, v29.8h, v8.8h + sqrdmulh v17.8h, v16.8h, v0.h[3] + mul v23.8h, v16.8h, v0.h[2] + mul v13.8h, v6.8h, v0.h[4] + sqrdmulh v28.8h, v6.8h, v0.h[5] + mls v2.8h, v5.8h, v7.h[0] + mls v23.8h, v17.8h, v7.h[0] + add v27.8h, v26.8h, v11.8h + mls v13.8h, v28.8h, v7.h[0] + sub v9.8h, v21.8h, v2.8h + add v18.8h, v29.8h, v8.8h + sub v14.8h, v27.8h, v23.8h + add v29.8h, v9.8h, v13.8h + sub v30.8h, v9.8h, v13.8h + mul v28.8h, v14.8h, v1.h[0] + sqrdmulh v9.8h, v18.8h, v0.h[3] + mul v22.8h, v18.8h, v0.h[2] + sqrdmulh v17.8h, v14.8h, v1.h[1] + sub v14.8h, v20.8h, v15.8h + add v24.8h, v21.8h, v2.8h + mls v22.8h, v9.8h, v7.h[0] + sqrdmulh v9.8h, v14.8h, v0.h[5] + mul v13.8h, v14.8h, v0.h[4] + mls v28.8h, v17.8h, v7.h[0] + sub v5.8h, v24.8h, v22.8h + sub v2.8h, v26.8h, v11.8h + mls v13.8h, v9.8h, v7.h[0] + sub v17.8h, v5.8h, v28.8h + add v14.8h, v5.8h, v28.8h + add v28.8h, v27.8h, v23.8h + str q17, [x0, #192] + add v17.8h, v2.8h, v13.8h + str q14, [x0, #128] + sub v13.8h, v2.8h, v13.8h + sqrdmulh v26.8h, v17.8h, v1.h[3] + mul v15.8h, v17.8h, v1.h[2] + add v5.8h, v24.8h, v22.8h + sqrdmulh v23.8h, v13.8h, v1.h[5] + mul v13.8h, v13.8h, v1.h[4] + mls v15.8h, v26.8h, v7.h[0] + sqrdmulh v14.8h, v28.8h, v0.h[7] + mul v17.8h, v28.8h, v0.h[6] + mls v13.8h, v23.8h, v7.h[0] + add v6.8h, v29.8h, v15.8h + sub v28.8h, v29.8h, v15.8h + mls v17.8h, v14.8h, v7.h[0] + str q6, [x0, #256] + add v14.8h, v30.8h, v13.8h + str q28, [x0, #320] + sub v23.8h, v30.8h, v13.8h + str q14, [x0, #384] + add v3.8h, v5.8h, v17.8h + str q23, [x0, #448] + sub v28.8h, v5.8h, v17.8h + str q3, [x0], #16 + str q28, [x0, #48] + mov x0, x3 + mov x4, #0x8 + ldr q2, [x1], #16 + ldr q14, [x0, #48] + ldr q1, [x0, #32] + mul v17.8h, v14.8h, v2.h[0] + sqrdmulh v14.8h, v14.8h, v2.h[1] + mul v8.8h, v1.8h, v2.h[0] + ldr q23, [x0, #16] + mls v17.8h, v14.8h, v7.h[0] + sqrdmulh v1.8h, v1.8h, v2.h[1] + ldr q30, [x2], #96 + sub v14.8h, v23.8h, v17.8h + add v10.8h, v23.8h, v17.8h + mls v8.8h, v1.8h, v7.h[0] + sqrdmulh v1.8h, v14.8h, v2.h[5] + mul v14.8h, v14.8h, v2.h[4] + ldr q27, [x0] + mul v23.8h, v10.8h, v2.h[2] + mls v14.8h, v1.8h, v7.h[0] + sub v1.8h, v27.8h, v8.8h + ldur q28, [x2, #-64] + add v12.8h, v1.8h, v14.8h + sqrdmulh v21.8h, v10.8h, v2.h[3] + sub v5.8h, v1.8h, v14.8h + ldur q13, [x2, #-16] + sub x4, x4, #0x1 + ldr q19, [x0, #112] + ldr q1, [x1], #16 + mls v23.8h, v21.8h, v7.h[0] + add v6.8h, v27.8h, v8.8h + mul v4.8h, v19.8h, v1.h[0] + sqrdmulh v19.8h, v19.8h, v1.h[1] + ldr q25, [x0, #80] + trn1 v11.4s, v12.4s, v5.4s + mls v4.8h, v19.8h, v7.h[0] + sub v0.8h, v6.8h, v23.8h + ldur q16, [x2, #-80] + sub v24.8h, v25.8h, v4.8h + add v26.8h, v6.8h, v23.8h + add v4.8h, v25.8h, v4.8h + sqrdmulh v23.8h, v24.8h, v1.h[5] + mul v20.8h, v24.8h, v1.h[4] + sqrdmulh v21.8h, v4.8h, v1.h[3] + trn1 v27.4s, v26.4s, v0.4s + trn2 v25.4s, v12.4s, v5.4s + mls v20.8h, v23.8h, v7.h[0] + mul v23.8h, v4.8h, v1.h[2] + ldr q31, [x0, #96] + trn2 v12.4s, v26.4s, v0.4s + trn2 v19.2d, v27.2d, v11.2d + mul v8.8h, v31.8h, v1.h[0] + sqrdmulh v1.8h, v31.8h, v1.h[1] + trn2 v10.2d, v12.2d, v25.2d + sqrdmulh v0.8h, v19.8h, v16.8h + sqrdmulh v18.8h, v10.8h, v16.8h + trn1 v16.2d, v27.2d, v11.2d + trn1 v2.2d, v12.2d, v25.2d + mul v12.8h, v10.8h, v30.8h + mul v10.8h, v19.8h, v30.8h + mls v8.8h, v1.8h, v7.h[0] + ldur q14, [x2, #-48] + mls v10.8h, v0.8h, v7.h[0] + mls v12.8h, v18.8h, v7.h[0] + ldr q27, [x0, #64] + add v9.8h, v16.8h, v10.8h + sub v16.8h, v16.8h, v10.8h + sub v25.8h, v2.8h, v12.8h + add v30.8h, v2.8h, v12.8h + sub v10.8h, v27.8h, v8.8h + sqrdmulh v22.8h, v25.8h, v13.8h + sqrdmulh v13.8h, v30.8h, v14.8h + ldur q14, [x2, #-32] + add v12.8h, v10.8h, v20.8h + mul v5.8h, v30.8h, v28.8h + mul v26.8h, v25.8h, v14.8h + ldr q30, [x2], #96 + mls v5.8h, v13.8h, v7.h[0] + mls v26.8h, v22.8h, v7.h[0] + ldur q28, [x2, #-64] + add v13.8h, v9.8h, v5.8h + sub v9.8h, v9.8h, v5.8h + sub v5.8h, v16.8h, v26.8h + add v25.8h, v16.8h, v26.8h + trn1 v15.4s, v13.4s, v9.4s + trn2 v3.4s, v13.4s, v9.4s + trn1 v13.4s, v25.4s, v5.4s + trn2 v31.4s, v25.4s, v5.4s + sub v5.8h, v10.8h, v20.8h + trn1 v2.2d, v15.2d, v13.2d + trn2 v9.2d, v15.2d, v13.2d + str q2, [x0], #64 + trn1 v29.2d, v3.2d, v31.2d + stur q9, [x0, #-32] + trn2 v9.2d, v3.2d, v31.2d + stur q29, [x0, #-48] + ldur q13, [x2, #-16] + stur q9, [x0, #-16] + sub x4, x4, #0x1 + cbnz x4, PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt+0x30c + mls v23.8h, v21.8h, v7.h[0] + add v14.8h, v27.8h, v8.8h + ldur q1, [x2, #-32] + add v17.8h, v14.8h, v23.8h + sub v23.8h, v14.8h, v23.8h + trn2 v11.4s, v12.4s, v5.4s + trn1 v27.4s, v12.4s, v5.4s + trn2 v2.4s, v17.4s, v23.4s + ldur q26, [x2, #-80] + trn2 v14.2d, v2.2d, v11.2d + trn1 v15.4s, v17.4s, v23.4s + mul v5.8h, v14.8h, v30.8h + sqrdmulh v23.8h, v14.8h, v26.8h + trn2 v17.2d, v15.2d, v27.2d + trn1 v14.2d, v2.2d, v11.2d + mul v21.8h, v17.8h, v30.8h + mls v5.8h, v23.8h, v7.h[0] + sqrdmulh v17.8h, v17.8h, v26.8h + ldur q2, [x2, #-48] + sub v23.8h, v14.8h, v5.8h + add v14.8h, v14.8h, v5.8h + mls v21.8h, v17.8h, v7.h[0] + mul v1.8h, v23.8h, v1.8h + sqrdmulh v17.8h, v23.8h, v13.8h + mul v23.8h, v14.8h, v28.8h + sqrdmulh v14.8h, v14.8h, v2.8h + trn1 v28.2d, v15.2d, v27.2d + mls v1.8h, v17.8h, v7.h[0] + sub v11.8h, v28.8h, v21.8h + mls v23.8h, v14.8h, v7.h[0] + add v17.8h, v28.8h, v21.8h + sub v14.8h, v11.8h, v1.8h + add v1.8h, v11.8h, v1.8h + sub v28.8h, v17.8h, v23.8h + add v2.8h, v17.8h, v23.8h + trn1 v23.4s, v1.4s, v14.4s + trn2 v14.4s, v1.4s, v14.4s + trn2 v17.4s, v2.4s, v28.4s + trn1 v28.4s, v2.4s, v28.4s + trn2 v1.2d, v17.2d, v14.2d + trn1 v14.2d, v17.2d, v14.2d + str q1, [x0, #48] + trn2 v1.2d, v28.2d, v23.2d + str q14, [x0, #16] + trn1 v14.2d, v28.2d, v23.2d + str q1, [x0, #32] + str q14, [x0], #64 + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #0x40 + ret diff --git a/proofs/hol_light/arm/proofs/mlkem_intt.ml b/proofs/hol_light/arm/proofs/mlkem_intt.ml new file mode 100644 index 000000000..8b9cbc325 --- /dev/null +++ b/proofs/hol_light/arm/proofs/mlkem_intt.ml @@ -0,0 +1,651 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Inverse number theoretic transform. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "arm/proofs/utils/mlkem.ml";; + +(**** print_literal_from_elf "mlkem/mlkem_intt.o";; + ****) + +let mlkem_intt_mc = define_assert_from_elf + "mlkem_intt_mc" "mlkem/mlkem_intt.o" +[ + 0xd10103ff; (* arm_SUB SP SP (rvalue (word 64)) *) + 0x6d0027e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d0237ec; (* arm_STP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d033fee; (* arm_STP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x5281a025; (* arm_MOV W5 (rvalue (word 3329)) *) + 0x4e021ca7; (* arm_INS_GEN Q7 W5 0 16 *) + 0x5289d7e5; (* arm_MOV W5 (rvalue (word 20159)) *) + 0x4e061ca7; (* arm_INS_GEN Q7 W5 16 16 *) + 0x52804005; (* arm_MOV W5 (rvalue (word 512)) *) + 0x4e020cbd; (* arm_DUP_GEN Q29 X5 16 128 *) + 0x52827605; (* arm_MOV W5 (rvalue (word 5040)) *) + 0x4e020cbe; (* arm_DUP_GEN Q30 X5 16 128 *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3dc00068; (* arm_LDR Q8 X3 (Immediate_Offset (word 0)) *) + 0x3dc00469; (* arm_LDR Q9 X3 (Immediate_Offset (word 16)) *) + 0x3dc0086a; (* arm_LDR Q10 X3 (Immediate_Offset (word 32)) *) + 0x3dc00c6b; (* arm_LDR Q11 X3 (Immediate_Offset (word 48)) *) + 0x6e7eb51b; (* arm_SQRDMULH_VEC Q27 Q8 Q30 16 128 *) + 0x4e7d9d08; (* arm_MUL_VEC Q8 Q8 Q29 16 128 *) + 0x6f474368; (* arm_MLS_VEC Q8 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb53b; (* arm_SQRDMULH_VEC Q27 Q9 Q30 16 128 *) + 0x4e7d9d29; (* arm_MUL_VEC Q9 Q9 Q29 16 128 *) + 0x6f474369; (* arm_MLS_VEC Q9 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb55b; (* arm_SQRDMULH_VEC Q27 Q10 Q30 16 128 *) + 0x4e7d9d4a; (* arm_MUL_VEC Q10 Q10 Q29 16 128 *) + 0x6f47436a; (* arm_MLS_VEC Q10 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb57b; (* arm_SQRDMULH_VEC Q27 Q11 Q30 16 128 *) + 0x4e7d9d6b; (* arm_MUL_VEC Q11 Q11 Q29 16 128 *) + 0x6f47436b; (* arm_MLS_VEC Q11 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x3c840468; (* arm_STR Q8 X3 (Postimmediate_Offset (word 64)) *) + 0x3c9d0069; (* arm_STR Q9 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e006a; (* arm_STR Q10 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0xf1000484; (* arm_SUBS X4 X4 (rvalue (word 1)) *) + 0xb5fffd64; (* arm_CBNZ X4 (word 2097068) *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3dc0007a; (* arm_LDR Q26 X3 (Immediate_Offset (word 0)) *) + 0x3dc00468; (* arm_LDR Q8 X3 (Immediate_Offset (word 16)) *) + 0x3dc00878; (* arm_LDR Q24 X3 (Immediate_Offset (word 32)) *) + 0x3dc00c70; (* arm_LDR Q16 X3 (Immediate_Offset (word 48)) *) + 0x3cc60449; (* arm_LDR Q9 X2 (Postimmediate_Offset (word 96)) *) + 0x4e902b00; (* arm_TRN1 Q0 Q24 Q16 32 128 *) + 0x3cdb0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x3cdc0043; (* arm_LDR Q3 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x3cdd004f; (* arm_LDR Q15 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde0044; (* arm_LDR Q4 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x4e882b4c; (* arm_TRN1 Q12 Q26 Q8 32 128 *) + 0x4e886b5a; (* arm_TRN2 Q26 Q26 Q8 32 128 *) + 0x4e906b08; (* arm_TRN2 Q8 Q24 Q16 32 128 *) + 0x4ec0698b; (* arm_TRN2 Q11 Q12 Q0 64 128 *) + 0x4ec0298c; (* arm_TRN1 Q12 Q12 Q0 64 128 *) + 0x4ec86b50; (* arm_TRN2 Q16 Q26 Q8 64 128 *) + 0x4ec82b5a; (* arm_TRN1 Q26 Q26 Q8 64 128 *) + 0x6e708568; (* arm_SUB_VEC Q8 Q11 Q16 16 128 *) + 0x4e70856b; (* arm_ADD_VEC Q11 Q11 Q16 16 128 *) + 0x6e7a8590; (* arm_SUB_VEC Q16 Q12 Q26 16 128 *) + 0x4e7a858c; (* arm_ADD_VEC Q12 Q12 Q26 16 128 *) + 0x6e7cb51a; (* arm_SQRDMULH_VEC Q26 Q8 Q28 16 128 *) + 0x6e6fb60f; (* arm_SQRDMULH_VEC Q15 Q16 Q15 16 128 *) + 0x4e639e10; (* arm_MUL_VEC Q16 Q16 Q3 16 128 *) + 0x4e649d08; (* arm_MUL_VEC Q8 Q8 Q4 16 128 *) + 0x6e6b8580; (* arm_SUB_VEC Q0 Q12 Q11 16 128 *) + 0x4e6b858c; (* arm_ADD_VEC Q12 Q12 Q11 16 128 *) + 0x6f4741f0; (* arm_MLS_VEC Q16 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474348; (* arm_MLS_VEC Q8 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e66b41a; (* arm_SQRDMULH_VEC Q26 Q0 Q6 16 128 *) + 0x4e699c0b; (* arm_MUL_VEC Q11 Q0 Q9 16 128 *) + 0x3cc1042f; (* arm_LDR Q15 X1 (Postimmediate_Offset (word 16)) *) + 0x6e688600; (* arm_SUB_VEC Q0 Q16 Q8 16 128 *) + 0x6f47434b; (* arm_MLS_VEC Q11 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4e68861a; (* arm_ADD_VEC Q26 Q16 Q8 16 128 *) + 0x6e66b408; (* arm_SQRDMULH_VEC Q8 Q0 Q6 16 128 *) + 0x4e699c10; (* arm_MUL_VEC Q16 Q0 Q9 16 128 *) + 0x4e9a2980; (* arm_TRN1 Q0 Q12 Q26 32 128 *) + 0x4e9a698c; (* arm_TRN2 Q12 Q12 Q26 32 128 *) + 0x3dc0107a; (* arm_LDR Q26 X3 (Immediate_Offset (word 64)) *) + 0x6f474110; (* arm_MLS_VEC Q16 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x3dc01468; (* arm_LDR Q8 X3 (Immediate_Offset (word 80)) *) + 0x3dc01878; (* arm_LDR Q24 X3 (Immediate_Offset (word 96)) *) + 0x4e902969; (* arm_TRN1 Q9 Q11 Q16 32 128 *) + 0x4e90696b; (* arm_TRN2 Q11 Q11 Q16 32 128 *) + 0x3dc01c70; (* arm_LDR Q16 X3 (Immediate_Offset (word 112)) *) + 0x4ec96806; (* arm_TRN2 Q6 Q0 Q9 64 128 *) + 0x4ecb6983; (* arm_TRN2 Q3 Q12 Q11 64 128 *) + 0x4ec92800; (* arm_TRN1 Q0 Q0 Q9 64 128 *) + 0x4ecb298c; (* arm_TRN1 Q12 Q12 Q11 64 128 *) + 0x6e6384cb; (* arm_SUB_VEC Q11 Q6 Q3 16 128 *) + 0x6e6c8409; (* arm_SUB_VEC Q9 Q0 Q12 16 128 *) + 0x4e6c840c; (* arm_ADD_VEC Q12 Q0 Q12 16 128 *) + 0x4f5fd960; (* arm_SQRDMULH_VEC Q0 Q11 (Q15 :> LANE_H 5) 16 128 *) + 0x4f7fd124; (* arm_SQRDMULH_VEC Q4 Q9 (Q15 :> LANE_H 3) 16 128 *) + 0x4f6f8129; (* arm_MUL_VEC Q9 Q9 (Q15 :> LANE_H 2) 16 128 *) + 0x4f4f896b; (* arm_MUL_VEC Q11 Q11 (Q15 :> LANE_H 4) 16 128 *) + 0x4e6384c6; (* arm_ADD_VEC Q6 Q6 Q3 16 128 *) + 0x4f57c183; (* arm_SQDMULH_VEC Q3 Q12 (Q7 :> LANE_H 1) 16 128 *) + 0x6f474089; (* arm_MLS_VEC Q9 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47400b; (* arm_MLS_VEC Q11 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c0c0; (* arm_SQDMULH_VEC Q0 Q6 (Q7 :> LANE_H 1) 16 128 *) + 0x4f152463; (* arm_SRSHR_VEC Q3 Q3 11 16 128 *) + 0x4f57c124; (* arm_SQDMULH_VEC Q4 Q9 (Q7 :> LANE_H 1) 16 128 *) + 0x4f57c17c; (* arm_SQDMULH_VEC Q28 Q11 (Q7 :> LANE_H 1) 16 128 *) + 0x6f47406c; (* arm_MLS_VEC Q12 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x4f152400; (* arm_SRSHR_VEC Q0 Q0 11 16 128 *) + 0x4f152483; (* arm_SRSHR_VEC Q3 Q4 11 16 128 *) + 0x4f152784; (* arm_SRSHR_VEC Q4 Q28 11 16 128 *) + 0x6f474006; (* arm_MLS_VEC Q6 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474069; (* arm_MLS_VEC Q9 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47408b; (* arm_MLS_VEC Q11 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x4e902b00; (* arm_TRN1 Q0 Q24 Q16 32 128 *) + 0x6e668583; (* arm_SUB_VEC Q3 Q12 Q6 16 128 *) + 0x4e66858c; (* arm_ADD_VEC Q12 Q12 Q6 16 128 *) + 0x6e6b8526; (* arm_SUB_VEC Q6 Q9 Q11 16 128 *) + 0x4f5fd064; (* arm_SQRDMULH_VEC Q4 Q3 (Q15 :> LANE_H 1) 16 128 *) + 0x4f4f8063; (* arm_MUL_VEC Q3 Q3 (Q15 :> LANE_H 0) 16 128 *) + 0x4f5fd0dc; (* arm_SQRDMULH_VEC Q28 Q6 (Q15 :> LANE_H 1) 16 128 *) + 0x4f4f80cf; (* arm_MUL_VEC Q15 Q6 (Q15 :> LANE_H 0) 16 128 *) + 0x4e6b852b; (* arm_ADD_VEC Q11 Q9 Q11 16 128 *) + 0x6f474083; (* arm_MLS_VEC Q3 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x3c84046c; (* arm_STR Q12 X3 (Postimmediate_Offset (word 64)) *) + 0x6f47438f; (* arm_MLS_VEC Q15 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x3c9d006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cc60449; (* arm_LDR Q9 X2 (Postimmediate_Offset (word 96)) *) + 0x3c9e0063; (* arm_STR Q3 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdb0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x3c9f006f; (* arm_STR Q15 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0x3cdc0043; (* arm_LDR Q3 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x3cdd004f; (* arm_LDR Q15 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde0044; (* arm_LDR Q4 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff584; (* arm_CBNZ X4 (word 2096816) *) + 0x4e882b4b; (* arm_TRN1 Q11 Q26 Q8 32 128 *) + 0x4e906b18; (* arm_TRN2 Q24 Q24 Q16 32 128 *) + 0x4e886b5a; (* arm_TRN2 Q26 Q26 Q8 32 128 *) + 0x4ec02972; (* arm_TRN1 Q18 Q11 Q0 64 128 *) + 0x4ec0696b; (* arm_TRN2 Q11 Q11 Q0 64 128 *) + 0x4ed86b4c; (* arm_TRN2 Q12 Q26 Q24 64 128 *) + 0x4ed82b48; (* arm_TRN1 Q8 Q26 Q24 64 128 *) + 0x6e6c857a; (* arm_SUB_VEC Q26 Q11 Q12 16 128 *) + 0x6e68864d; (* arm_SUB_VEC Q13 Q18 Q8 16 128 *) + 0x4e688658; (* arm_ADD_VEC Q24 Q18 Q8 16 128 *) + 0x4e649f50; (* arm_MUL_VEC Q16 Q26 Q4 16 128 *) + 0x6e6fb5b1; (* arm_SQRDMULH_VEC Q17 Q13 Q15 16 128 *) + 0x4e639da3; (* arm_MUL_VEC Q3 Q13 Q3 16 128 *) + 0x6e7cb75a; (* arm_SQRDMULH_VEC Q26 Q26 Q28 16 128 *) + 0x4e6c856a; (* arm_ADD_VEC Q10 Q11 Q12 16 128 *) + 0x6f474223; (* arm_MLS_VEC Q3 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474350; (* arm_MLS_VEC Q16 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6a871a; (* arm_SUB_VEC Q26 Q24 Q10 16 128 *) + 0x3cc10424; (* arm_LDR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x6e70846c; (* arm_SUB_VEC Q12 Q3 Q16 16 128 *) + 0x6e66b74f; (* arm_SQRDMULH_VEC Q15 Q26 Q6 16 128 *) + 0x4e699f4b; (* arm_MUL_VEC Q11 Q26 Q9 16 128 *) + 0x4e699d88; (* arm_MUL_VEC Q8 Q12 Q9 16 128 *) + 0x6e66b58c; (* arm_SQRDMULH_VEC Q12 Q12 Q6 16 128 *) + 0x4e6a8700; (* arm_ADD_VEC Q0 Q24 Q10 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4e708466; (* arm_ADD_VEC Q6 Q3 Q16 16 128 *) + 0x6f474188; (* arm_MLS_VEC Q8 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4e86681a; (* arm_TRN2 Q26 Q0 Q6 32 128 *) + 0x4e88696c; (* arm_TRN2 Q12 Q11 Q8 32 128 *) + 0x4e882963; (* arm_TRN1 Q3 Q11 Q8 32 128 *) + 0x4e862811; (* arm_TRN1 Q17 Q0 Q6 32 128 *) + 0x4ecc2b48; (* arm_TRN1 Q8 Q26 Q12 64 128 *) + 0x4ecc6b4d; (* arm_TRN2 Q13 Q26 Q12 64 128 *) + 0x4ec32a2b; (* arm_TRN1 Q11 Q17 Q3 64 128 *) + 0x4ec36a2f; (* arm_TRN2 Q15 Q17 Q3 64 128 *) + 0x6e68856c; (* arm_SUB_VEC Q12 Q11 Q8 16 128 *) + 0x4e6d85f0; (* arm_ADD_VEC Q16 Q15 Q13 16 128 *) + 0x6e6d85fa; (* arm_SUB_VEC Q26 Q15 Q13 16 128 *) + 0x4f648180; (* arm_MUL_VEC Q0 Q12 (Q4 :> LANE_H 2) 16 128 *) + 0x4f74d189; (* arm_SQRDMULH_VEC Q9 Q12 (Q4 :> LANE_H 3) 16 128 *) + 0x4f448b4d; (* arm_MUL_VEC Q13 Q26 (Q4 :> LANE_H 4) 16 128 *) + 0x4f54db5a; (* arm_SQRDMULH_VEC Q26 Q26 (Q4 :> LANE_H 5) 16 128 *) + 0x4e688578; (* arm_ADD_VEC Q24 Q11 Q8 16 128 *) + 0x6f474120; (* arm_MLS_VEC Q0 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c20c; (* arm_SQDMULH_VEC Q12 Q16 (Q7 :> LANE_H 1) 16 128 *) + 0x6f47434d; (* arm_MLS_VEC Q13 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c30b; (* arm_SQDMULH_VEC Q11 Q24 (Q7 :> LANE_H 1) 16 128 *) + 0x4f57c008; (* arm_SQDMULH_VEC Q8 Q0 (Q7 :> LANE_H 1) 16 128 *) + 0x4f15258c; (* arm_SRSHR_VEC Q12 Q12 11 16 128 *) + 0x4f57c1ba; (* arm_SQDMULH_VEC Q26 Q13 (Q7 :> LANE_H 1) 16 128 *) + 0x4f15256b; (* arm_SRSHR_VEC Q11 Q11 11 16 128 *) + 0x6f474190; (* arm_MLS_VEC Q16 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4f152508; (* arm_SRSHR_VEC Q8 Q8 11 16 128 *) + 0x4f15275a; (* arm_SRSHR_VEC Q26 Q26 11 16 128 *) + 0x6f474178; (* arm_MLS_VEC Q24 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474100; (* arm_MLS_VEC Q0 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47434d; (* arm_MLS_VEC Q13 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e70871a; (* arm_SUB_VEC Q26 Q24 Q16 16 128 *) + 0x4e70870f; (* arm_ADD_VEC Q15 Q24 Q16 16 128 *) + 0x6e6d840c; (* arm_SUB_VEC Q12 Q0 Q13 16 128 *) + 0x4f44834b; (* arm_MUL_VEC Q11 Q26 (Q4 :> LANE_H 0) 16 128 *) + 0x4f54d350; (* arm_SQRDMULH_VEC Q16 Q26 (Q4 :> LANE_H 1) 16 128 *) + 0x4f44819a; (* arm_MUL_VEC Q26 Q12 (Q4 :> LANE_H 0) 16 128 *) + 0x4f54d188; (* arm_SQRDMULH_VEC Q8 Q12 (Q4 :> LANE_H 1) 16 128 *) + 0x4e6d840c; (* arm_ADD_VEC Q12 Q0 Q13 16 128 *) + 0x6f47420b; (* arm_MLS_VEC Q11 Q16 (Q7 :> LANE_H 0) 16 128 *) + 0x3c84046f; (* arm_STR Q15 X3 (Postimmediate_Offset (word 64)) *) + 0x6f47411a; (* arm_MLS_VEC Q26 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x3c9d006c; (* arm_STR Q12 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f007a; (* arm_STR Q26 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0xd2800084; (* arm_MOV X4 (rvalue (word 4)) *) + 0x3cc20420; (* arm_LDR Q0 X1 (Postimmediate_Offset (word 32)) *) + 0x3cdf0021; (* arm_LDR Q1 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x3dc02018; (* arm_LDR Q24 X0 (Immediate_Offset (word 128)) *) + 0x3dc03010; (* arm_LDR Q16 X0 (Immediate_Offset (word 192)) *) + 0x3dc04009; (* arm_LDR Q9 X0 (Immediate_Offset (word 256)) *) + 0x3dc05006; (* arm_LDR Q6 X0 (Immediate_Offset (word 320)) *) + 0x3dc06003; (* arm_LDR Q3 X0 (Immediate_Offset (word 384)) *) + 0x3dc07004; (* arm_LDR Q4 X0 (Immediate_Offset (word 448)) *) + 0x4e66853c; (* arm_ADD_VEC Q28 Q9 Q6 16 128 *) + 0x4e708713; (* arm_ADD_VEC Q19 Q24 Q16 16 128 *) + 0x4e64846d; (* arm_ADD_VEC Q13 Q3 Q4 16 128 *) + 0x3dc0000b; (* arm_LDR Q11 X0 (Immediate_Offset (word 0)) *) + 0x4e6d8797; (* arm_ADD_VEC Q23 Q28 Q13 16 128 *) + 0x3dc0100f; (* arm_LDR Q15 X0 (Immediate_Offset (word 64)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x6e6f856c; (* arm_SUB_VEC Q12 Q11 Q15 16 128 *) + 0x4e6f857a; (* arm_ADD_VEC Q26 Q11 Q15 16 128 *) + 0x6e708708; (* arm_SUB_VEC Q8 Q24 Q16 16 128 *) + 0x4f70d98b; (* arm_SQRDMULH_VEC Q11 Q12 (Q0 :> LANE_H 7) 16 128 *) + 0x4f60898c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 6) 16 128 *) + 0x6e738750; (* arm_SUB_VEC Q16 Q26 Q19 16 128 *) + 0x4e73875a; (* arm_ADD_VEC Q26 Q26 Q19 16 128 *) + 0x4f51d10f; (* arm_SQRDMULH_VEC Q15 Q8 (Q1 :> LANE_H 1) 16 128 *) + 0x4f418108; (* arm_MUL_VEC Q8 Q8 (Q1 :> LANE_H 0) 16 128 *) + 0x6f47416c; (* arm_MLS_VEC Q12 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x6e66852b; (* arm_SUB_VEC Q11 Q9 Q6 16 128 *) + 0x4f70d218; (* arm_SQRDMULH_VEC Q24 Q16 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608210; (* arm_MUL_VEC Q16 Q16 (Q0 :> LANE_H 2) 16 128 *) + 0x6e778749; (* arm_SUB_VEC Q9 Q26 Q23 16 128 *) + 0x4e77875a; (* arm_ADD_VEC Q26 Q26 Q23 16 128 *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f71d16f; (* arm_SQRDMULH_VEC Q15 Q11 (Q1 :> LANE_H 3) 16 128 *) + 0x4f61816b; (* arm_MUL_VEC Q11 Q11 (Q1 :> LANE_H 2) 16 128 *) + 0x6e648466; (* arm_SUB_VEC Q6 Q3 Q4 16 128 *) + 0x6e688583; (* arm_SUB_VEC Q3 Q12 Q8 16 128 *) + 0x4e68858c; (* arm_ADD_VEC Q12 Q12 Q8 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d8c8; (* arm_SQRDMULH_VEC Q8 Q6 (Q1 :> LANE_H 5) 16 128 *) + 0x6f474310; (* arm_MLS_VEC Q16 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x4f4188cf; (* arm_MUL_VEC Q15 Q6 (Q1 :> LANE_H 4) 16 128 *) + 0x4f70d078; (* arm_SQRDMULH_VEC Q24 Q3 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608066; (* arm_MUL_VEC Q6 Q3 (Q0 :> LANE_H 2) 16 128 *) + 0x4f50d123; (* arm_SQRDMULH_VEC Q3 Q9 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408129; (* arm_MUL_VEC Q9 Q9 (Q0 :> LANE_H 0) 16 128 *) + 0x3c81041a; (* arm_STR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6f47410f; (* arm_MLS_VEC Q15 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474306; (* arm_MLS_VEC Q6 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6d879a; (* arm_SUB_VEC Q26 Q28 Q13 16 128 *) + 0x6f474069; (* arm_MLS_VEC Q9 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6f8568; (* arm_SUB_VEC Q8 Q11 Q15 16 128 *) + 0x4f50db58; (* arm_SQRDMULH_VEC Q24 Q26 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408b5a; (* arm_MUL_VEC Q26 Q26 (Q0 :> LANE_H 4) 16 128 *) + 0x4e6f856b; (* arm_ADD_VEC Q11 Q11 Q15 16 128 *) + 0x4f50d90f; (* arm_SQRDMULH_VEC Q15 Q8 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408908; (* arm_MUL_VEC Q8 Q8 (Q0 :> LANE_H 4) 16 128 *) + 0x6f47431a; (* arm_MLS_VEC Q26 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6b8598; (* arm_SUB_VEC Q24 Q12 Q11 16 128 *) + 0x4e6b858c; (* arm_ADD_VEC Q12 Q12 Q11 16 128 *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d30b; (* arm_SQRDMULH_VEC Q11 Q24 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40830f; (* arm_MUL_VEC Q15 Q24 (Q0 :> LANE_H 0) 16 128 *) + 0x6e7a8618; (* arm_SUB_VEC Q24 Q16 Q26 16 128 *) + 0x4e7a861a; (* arm_ADD_VEC Q26 Q16 Q26 16 128 *) + 0x6e6884d0; (* arm_SUB_VEC Q16 Q6 Q8 16 128 *) + 0x6f47416f; (* arm_MLS_VEC Q15 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d30b; (* arm_SQRDMULH_VEC Q11 Q24 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408318; (* arm_MUL_VEC Q24 Q24 (Q0 :> LANE_H 0) 16 128 *) + 0x4e6884c8; (* arm_ADD_VEC Q8 Q6 Q8 16 128 *) + 0x4f50d206; (* arm_SQRDMULH_VEC Q6 Q16 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408210; (* arm_MUL_VEC Q16 Q16 (Q0 :> LANE_H 0) 16 128 *) + 0x6f474178; (* arm_MLS_VEC Q24 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x3d803c09; (* arm_STR Q9 X0 (Immediate_Offset (word 240)) *) + 0x3dc0000b; (* arm_LDR Q11 X0 (Immediate_Offset (word 0)) *) + 0x6f4740d0; (* arm_MLS_VEC Q16 Q6 (Q7 :> LANE_H 0) 16 128 *) + 0x3d804c0f; (* arm_STR Q15 X0 (Immediate_Offset (word 304)) *) + 0x3dc0100f; (* arm_LDR Q15 X0 (Immediate_Offset (word 64)) *) + 0x3d805c18; (* arm_STR Q24 X0 (Immediate_Offset (word 368)) *) + 0x3dc02018; (* arm_LDR Q24 X0 (Immediate_Offset (word 128)) *) + 0x3d806c10; (* arm_STR Q16 X0 (Immediate_Offset (word 432)) *) + 0x3dc03010; (* arm_LDR Q16 X0 (Immediate_Offset (word 192)) *) + 0x3d800c0c; (* arm_STR Q12 X0 (Immediate_Offset (word 48)) *) + 0x3dc04009; (* arm_LDR Q9 X0 (Immediate_Offset (word 256)) *) + 0x3dc05006; (* arm_LDR Q6 X0 (Immediate_Offset (word 320)) *) + 0x3dc06003; (* arm_LDR Q3 X0 (Immediate_Offset (word 384)) *) + 0x3dc07004; (* arm_LDR Q4 X0 (Immediate_Offset (word 448)) *) + 0x3d801c1a; (* arm_STR Q26 X0 (Immediate_Offset (word 112)) *) + 0x4e66853c; (* arm_ADD_VEC Q28 Q9 Q6 16 128 *) + 0x4e64846d; (* arm_ADD_VEC Q13 Q3 Q4 16 128 *) + 0x3d802c08; (* arm_STR Q8 X0 (Immediate_Offset (word 176)) *) + 0x4e708713; (* arm_ADD_VEC Q19 Q24 Q16 16 128 *) + 0x4e6d8797; (* arm_ADD_VEC Q23 Q28 Q13 16 128 *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff664; (* arm_CBNZ X4 (word 2096844) *) + 0x4e6f856a; (* arm_ADD_VEC Q10 Q11 Q15 16 128 *) + 0x6e6d878c; (* arm_SUB_VEC Q12 Q28 Q13 16 128 *) + 0x6e6f856b; (* arm_SUB_VEC Q11 Q11 Q15 16 128 *) + 0x6e738556; (* arm_SUB_VEC Q22 Q10 Q19 16 128 *) + 0x4f408992; (* arm_MUL_VEC Q18 Q12 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d99a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 5) 16 128 *) + 0x4f70d2cc; (* arm_SQRDMULH_VEC Q12 Q22 (Q0 :> LANE_H 3) 16 128 *) + 0x4f6082cd; (* arm_MUL_VEC Q13 Q22 (Q0 :> LANE_H 2) 16 128 *) + 0x6e70871f; (* arm_SUB_VEC Q31 Q24 Q16 16 128 *) + 0x4f70d976; (* arm_SQRDMULH_VEC Q22 Q11 (Q0 :> LANE_H 7) 16 128 *) + 0x6f474352; (* arm_MLS_VEC Q18 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47418d; (* arm_MLS_VEC Q13 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d3e2; (* arm_SQRDMULH_VEC Q2 Q31 (Q1 :> LANE_H 1) 16 128 *) + 0x4f4183e5; (* arm_MUL_VEC Q5 Q31 (Q1 :> LANE_H 0) 16 128 *) + 0x4f60896f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 6) 16 128 *) + 0x6e7285ac; (* arm_SUB_VEC Q12 Q13 Q18 16 128 *) + 0x6e648464; (* arm_SUB_VEC Q4 Q3 Q4 16 128 *) + 0x6f474045; (* arm_MLS_VEC Q5 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d19a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40818c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 0) 16 128 *) + 0x6f4742cf; (* arm_MLS_VEC Q15 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d888; (* arm_SQRDMULH_VEC Q8 Q4 (Q1 :> LANE_H 5) 16 128 *) + 0x4f418884; (* arm_MUL_VEC Q4 Q4 (Q1 :> LANE_H 4) 16 128 *) + 0x6f47434c; (* arm_MLS_VEC Q12 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6585f5; (* arm_SUB_VEC Q21 Q15 Q5 16 128 *) + 0x6e66853c; (* arm_SUB_VEC Q28 Q9 Q6 16 128 *) + 0x6f474104; (* arm_MLS_VEC Q4 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x4f6082b8; (* arm_MUL_VEC Q24 Q21 (Q0 :> LANE_H 2) 16 128 *) + 0x4f70d2a8; (* arm_SQRDMULH_VEC Q8 Q21 (Q0 :> LANE_H 3) 16 128 *) + 0x4f71d386; (* arm_SQRDMULH_VEC Q6 Q28 (Q1 :> LANE_H 3) 16 128 *) + 0x4e738553; (* arm_ADD_VEC Q19 Q10 Q19 16 128 *) + 0x4f61839c; (* arm_MUL_VEC Q28 Q28 (Q1 :> LANE_H 2) 16 128 *) + 0x6f474118; (* arm_MLS_VEC Q24 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6e77866b; (* arm_SUB_VEC Q11 Q19 Q23 16 128 *) + 0x3d80600c; (* arm_STR Q12 X0 (Immediate_Offset (word 384)) *) + 0x6f4740dc; (* arm_MLS_VEC Q28 Q6 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d170; (* arm_SQRDMULH_VEC Q16 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408169; (* arm_MUL_VEC Q9 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0x4e6585e6; (* arm_ADD_VEC Q6 Q15 Q5 16 128 *) + 0x4e64879a; (* arm_ADD_VEC Q26 Q28 Q4 16 128 *) + 0x6e64878f; (* arm_SUB_VEC Q15 Q28 Q4 16 128 *) + 0x6f474209; (* arm_MLS_VEC Q9 Q16 (Q7 :> LANE_H 0) 16 128 *) + 0x4e7a84c3; (* arm_ADD_VEC Q3 Q6 Q26 16 128 *) + 0x4f4089e8; (* arm_MUL_VEC Q8 Q15 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d9ef; (* arm_SQRDMULH_VEC Q15 Q15 (Q0 :> LANE_H 5) 16 128 *) + 0x3d804009; (* arm_STR Q9 X0 (Immediate_Offset (word 256)) *) + 0x6e7a84c2; (* arm_SUB_VEC Q2 Q6 Q26 16 128 *) + 0x3d801003; (* arm_STR Q3 X0 (Immediate_Offset (word 64)) *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d04f; (* arm_SQRDMULH_VEC Q15 Q2 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40804b; (* arm_MUL_VEC Q11 Q2 (Q0 :> LANE_H 0) 16 128 *) + 0x4e7285b0; (* arm_ADD_VEC Q16 Q13 Q18 16 128 *) + 0x6e68870c; (* arm_SUB_VEC Q12 Q24 Q8 16 128 *) + 0x4e688708; (* arm_ADD_VEC Q8 Q24 Q8 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d19a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40818c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 0) 16 128 *) + 0x3d803008; (* arm_STR Q8 X0 (Immediate_Offset (word 192)) *) + 0x4e77866f; (* arm_ADD_VEC Q15 Q19 Q23 16 128 *) + 0x3d80500b; (* arm_STR Q11 X0 (Immediate_Offset (word 320)) *) + 0x6f47434c; (* arm_MLS_VEC Q12 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x3c81040f; (* arm_STR Q15 X0 (Postimmediate_Offset (word 16)) *) + 0x3d801c10; (* arm_STR Q16 X0 (Immediate_Offset (word 112)) *) + 0x3d806c0c; (* arm_STR Q12 X0 (Immediate_Offset (word 432)) *) + 0x6d4027e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d4237ec; (* arm_LDP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d433fee; (* arm_LDP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x910103ff; (* arm_ADD SP SP (rvalue (word 64)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; + +let MLKEM_INTT_EXEC = ARM_MK_EXEC_RULE mlkem_intt_mc;; + +(* ------------------------------------------------------------------------- *) +(* Data tables that are assumed in the precondition. *) +(* ------------------------------------------------------------------------- *) + +let intt_zetas_layer01234 = define + `intt_zetas_layer01234:int list = + [&1583; &15582; -- &821; -- &8081; &1355; &13338; &0; &0; -- &569; -- &5601; + &450; &4429; &936; &9213; &0; &0; &69; &679; &447; &4400; -- &535; + -- &5266; &0; &0; &543; &5345; &1235; &12156; -- &1426; -- &14036; &0; + &0; -- &797; -- &7845; -- &1333; -- &13121; &1089; &10719; &0; &0; + -- &193; -- &1900; -- &56; -- &551; &283; &2786; &0; &0; &1410; &13879; + -- &1476; -- &14529; -- &1339; -- &13180; &0; &0; -- &1062; -- &10453; + &882; &8682; -- &296; -- &2914; &0; &0; &1600; &15749; &40; &394; + &749; &7373; -- &848; -- &8347; &1432; &14095; -- &630; -- &6201; + &687; &6762; &0; &0]`;; + +let intt_zetas_layer56 = define + `intt_zetas_layer56:int list = + [-- &910; -- &910; -- &1227; -- &1227; &219; &219; &855; &855; -- &8957; + -- &8957; -- &12078; -- &12078; &2156; &2156; &8416; &8416; &1175; + &1175; &394; &394; -- &1029; -- &1029; -- &1212; -- &1212; &11566; + &11566; &3878; &3878; -- &10129; -- &10129; -- &11930; -- &11930; + -- &885; -- &885; &1219; &1219; &1455; &1455; &1607; &1607; -- &8711; + -- &8711; &11999; &11999; &14322; &14322; &15818; &15818; -- &648; + -- &648; -- &1481; -- &1481; &712; &712; &682; &682; -- &6378; -- &6378; + -- &14578; -- &14578; &7008; &7008; &6713; &6713; -- &886; -- &886; + &1179; &1179; -- &1026; -- &1026; -- &1092; -- &1092; -- &8721; + -- &8721; &11605; &11605; -- &10099; -- &10099; -- &10749; -- &10749; + &554; &554; -- &1143; -- &1143; -- &403; -- &403; &525; &525; &5453; + &5453; -- &11251; -- &11251; -- &3967; -- &3967; &5168; &5168; &927; + &927; -- &1534; -- &1534; &461; &461; -- &1438; -- &1438; &9125; + &9125; -- &15099; -- &15099; &4538; &4538; -- &14155; -- &14155; &735; + &735; -- &561; -- &561; -- &757; -- &757; -- &319; -- &319; &7235; + &7235; -- &5522; -- &5522; -- &7451; -- &7451; -- &3140; -- &3140; + &863; &863; &1230; &1230; &556; &556; -- &1063; -- &1063; &8495; + &8495; &12107; &12107; &5473; &5473; -- &10463; -- &10463; -- &452; + -- &452; -- &807; -- &807; -- &1435; -- &1435; &1010; &1010; -- &4449; + -- &4449; -- &7943; -- &7943; -- &14125; -- &14125; &9942; &9942; + -- &1645; -- &1645; &780; &780; &109; &109; &1031; &1031; -- &16192; + -- &16192; &7678; &7678; &1073; &1073; &10148; &10148; &1239; &1239; + -- &375; -- &375; &1292; &1292; -- &1584; -- &1584; &12196; &12196; + -- &3691; -- &3691; &12717; &12717; -- &15592; -- &15592; &1414; &1414; + -- &1320; -- &1320; -- &33; -- &33; &464; &464; &13918; &13918; + -- &12993; -- &12993; -- &325; -- &325; &4567; &4567; -- &641; -- &641; + &992; &992; &941; &941; &1021; &1021; -- &6309; -- &6309; &9764; + &9764; &9262; &9262; &10050; &10050; -- &268; -- &268; -- &733; + -- &733; &892; &892; -- &939; -- &939; -- &2638; -- &2638; -- &7215; + -- &7215; &8780; &8780; -- &9243; -- &9243; -- &632; -- &632; &816; &816; + &1352; &1352; -- &650; -- &650; -- &6221; -- &6221; &8032; &8032; + &13308; &13308; -- &6398; -- &6398; &642; &642; -- &952; -- &952; + &1540; &1540; -- &1651; -- &1651; &6319; &6319; -- &9371; -- &9371; + &15159; &15159; -- &16251; -- &16251; -- &1461; -- &1461; &1482; + &1482; &540; &540; &1626; &1626; -- &14381; -- &14381; &14588; &14588; + &5315; &5315; &16005; &16005; &1274; &1274; &1052; &1052; &1025; + &1025; -- &1197; -- &1197; &12540; &12540; &10355; &10355; &10089; + &10089; -- &11782; -- &11782; &279; &279; &1173; &1173; -- &233; + -- &233; &667; &667; &2746; &2746; &11546; &11546; -- &2293; -- &2293; + &6565; &6565; &314; &314; -- &756; -- &756; &48; &48; -- &1409; + -- &1409; &3091; &3091; -- &7441; -- &7441; &472; &472; -- &13869; + -- &13869; &1573; &1573; &76; &76; -- &331; -- &331; -- &289; -- &289; + &15483; &15483; &748; &748; -- &3258; -- &3258; -- &2845; -- &2845; + -- &1100; -- &1100; -- &723; -- &723; &680; &680; &568; &568; -- &10828; + -- &10828; -- &7117; -- &7117; &6693; &6693; &5591; &5591; &1041; + &1041; -- &1637; -- &1637; -- &583; -- &583; -- &17; -- &17; &10247; + &10247; -- &16113; -- &16113; -- &5739; -- &5739; -- &167; -- &167]`;; + +let intt_constants = define + `intt_constants z_01234 z_56 s <=> + (!i. i < 80 + ==> read(memory :> bytes16(word_add z_01234 (word(2 * i)))) s = + iword(EL i intt_zetas_layer01234)) /\ + (!i. i < 384 + ==> read(memory :> bytes16(word_add z_56 (word(2 * i)))) s = + iword(EL i intt_zetas_layer56))`;; + +(* ------------------------------------------------------------------------- *) +(* Some convenient proof tools. *) +(* ------------------------------------------------------------------------- *) + +let READ_MEMORY_MERGE_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_SPLIT] THENC + LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let READ_MEMORY_SPLIT_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_UNSPLIT] THENC + BINOP_CONV(LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV)))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let SIMD_SIMPLIFY_CONV = + TOP_DEPTH_CONV + (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC + DEPTH_CONV WORD_NUM_RED_CONV THENC + REWRITE_CONV[GSYM barred; GSYM barmul];; + +let SIMD_SIMPLIFY_TAC = + let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in + TRY(FIRST_X_ASSUM + (ASSUME_TAC o + CONV_RULE(RAND_CONV SIMD_SIMPLIFY_CONV) o + check (simdable o concl)));; + +let MEMORY_128_FROM_16_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(16*i) in + READ_MEMORY_MERGE_CONV 3 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + +(* ------------------------------------------------------------------------- *) +(* Correctness proof. *) +(* ------------------------------------------------------------------------- *) + +let MLKEM_INTT_CORRECT = prove + (`!a z_01234 z_56 x pc. + ALL (nonoverlapping (a,512)) + [(word pc,0x5d0); (z_01234,160); (z_56,768)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_intt_mc /\ + read PC s = word (pc + 0x14) /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + intt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = word(pc + 0x5b8) /\ + !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == inverse_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &26624) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [Q8; Q9; Q10; Q11; Q12; Q13; Q14; Q15] ,, + MAYCHANGE [memory :> bytes(a,512)])`, + MAP_EVERY X_GEN_TAC + [`a:int64`; `z_01234:int64`; `z_56:int64`; `x:num->int16`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Manually expand the cases in the hypotheses ***) + + REWRITE_TAC[intt_constants] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV + (EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV)))) THEN + REWRITE_TAC[intt_zetas_layer01234; intt_zetas_layer56] THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN + CONV_TAC(ONCE_DEPTH_CONV WORD_IWORD_CONV) THEN REWRITE_TAC[WORD_ADD_0] THEN + ENSURES_INIT_TAC "s0" THEN + + (*** Manually restructure to match the 128-bit loads. It would be nicer + *** if the simulation machinery did this automatically. + ***) + + MEMORY_128_FROM_16_TAC "a" 32 THEN + MEMORY_128_FROM_16_TAC "z_01234" 10 THEN + MEMORY_128_FROM_16_TAC "z_56" 48 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes16 a) s = x`] THEN + REPEAT STRIP_TAC THEN + + (*** Ghost up initial contents of Q7 since it isn't fully initialized ***) + + ABBREV_TAC `v7_init:int128 = read Q7 s0` THEN + + (*** Simulate all the way to the end, in effect unrolling loops ***) + + MAP_EVERY (fun n -> ARM_STEPS_TAC MLKEM_INTT_EXEC [n] THEN SIMD_SIMPLIFY_TAC) + (1--1181) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Reverse the restructuring by splitting the 128-bit words up ***) + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE SIMD_SIMPLIFY_CONV o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (*** Turn the conclusion into an explicit conjunction and split it up ***) + + CONV_TAC(ONCE_DEPTH_CONV let_CONV) THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + GEN_REWRITE_TAC (BINDER_CONV o RAND_CONV) [GSYM I_THM] THEN + CONV_TAC(EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[I_THM; WORD_ADD_0] THEN DISCARD_STATE_TAC "s1181" THEN + REPEAT(W(fun (asl,w) -> + if length(conjuncts w) > 3 then CONJ_TAC else NO_TAC)) THEN + + (*** Get congruences and bounds for the result digits and finish ***) + + (W(MP_TAC o CONGBOUND_RULE o rand o lhand o rator o lhand o snd) THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + CONV_TAC(ONCE_DEPTH_CONV INVERSE_NTT_CONV) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM] THEN CONV_TAC INT_REM_DOWN_CONV THEN + REWRITE_TAC[INT_REM_EQ] THEN + REWRITE_TAC[REAL_INT_CONGRUENCE; INT_OF_NUM_EQ; ARITH_EQ] THEN + REWRITE_TAC[GSYM REAL_OF_INT_CLAUSES] THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REAL_INTEGER_TAC; + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]));; + +(*** Subroutine form, somewhat messy elaboration of the usual wrapper ***) + +let MLKEM_INTT_SUBROUTINE_CORRECT = prove + (`!a z_01234 z_56 x pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [(a,512); (word_sub stackpointer (word 64),64)] + [(word pc,0x5d0); (z_01234,160); (z_56,768)] /\ + nonoverlapping (a,512) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_intt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + intt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = returnaddress /\ + !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == inverse_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &26624) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,512); + memory :> bytes(word_sub stackpointer (word 64),64)])`, + let TWEAK_CONV = + REWRITE_CONV[intt_constants] THENC + ONCE_DEPTH_CONV let_CONV THENC + ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV THENC + PURE_REWRITE_CONV [WORD_ADD_0] in + REWRITE_TAC[fst MLKEM_INTT_EXEC] THEN + CONV_TAC TWEAK_CONV THEN + ARM_ADD_RETURN_STACK_TAC ~pre_post_nsteps:(5,5) MLKEM_INTT_EXEC + (REWRITE_RULE[fst MLKEM_INTT_EXEC] (CONV_RULE TWEAK_CONV MLKEM_INTT_CORRECT)) + `[D8; D9; D10; D11; D12; D13; D14; D15]` 64);; diff --git a/proofs/hol_light/arm/proofs/mlkem_ntt.ml b/proofs/hol_light/arm/proofs/mlkem_ntt.ml new file mode 100644 index 000000000..7c0528046 --- /dev/null +++ b/proofs/hol_light/arm/proofs/mlkem_ntt.ml @@ -0,0 +1,608 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Forward number theoretic transform. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "arm/proofs/utils/mlkem.ml";; + +(**** print_literal_from_elf "mlkem/mlkem_ntt.o";; + ****) + +let mlkem_ntt_mc = define_assert_from_elf + "mlkem_ntt_mc" "mlkem/mlkem_ntt.o" +[ + 0xd10103ff; (* arm_SUB SP SP (rvalue (word 64)) *) + 0x6d0027e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d0237ec; (* arm_STP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d033fee; (* arm_STP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x5281a025; (* arm_MOV W5 (rvalue (word 3329)) *) + 0x4e021ca7; (* arm_INS_GEN Q7 W5 0 16 *) + 0x5289d7e5; (* arm_MOV W5 (rvalue (word 20159)) *) + 0x4e061ca7; (* arm_INS_GEN Q7 W5 16 16 *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800084; (* arm_MOV X4 (rvalue (word 4)) *) + 0x3cc20420; (* arm_LDR Q0 X1 (Postimmediate_Offset (word 32)) *) + 0x3cdf0021; (* arm_LDR Q1 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x3dc00015; (* arm_LDR Q21 X0 (Immediate_Offset (word 0)) *) + 0x3dc0101a; (* arm_LDR Q26 X0 (Immediate_Offset (word 64)) *) + 0x3dc0201d; (* arm_LDR Q29 X0 (Immediate_Offset (word 128)) *) + 0x3dc03014; (* arm_LDR Q20 X0 (Immediate_Offset (word 192)) *) + 0x3dc04017; (* arm_LDR Q23 X0 (Immediate_Offset (word 256)) *) + 0x3dc0700b; (* arm_LDR Q11 X0 (Immediate_Offset (word 448)) *) + 0x4f4082e2; (* arm_MUL_VEC Q2 Q23 (Q0 :> LANE_H 0) 16 128 *) + 0x3dc05011; (* arm_LDR Q17 X0 (Immediate_Offset (word 320)) *) + 0x4f40816f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0x3dc0600d; (* arm_LDR Q13 X0 (Immediate_Offset (word 384)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x4f50d2ee; (* arm_SQRDMULH_VEC Q14 Q23 (Q0 :> LANE_H 1) 16 128 *) + 0x4f50d237; (* arm_SQRDMULH_VEC Q23 Q17 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408231; (* arm_MUL_VEC Q17 Q17 (Q0 :> LANE_H 0) 16 128 *) + 0x4f50d1bc; (* arm_SQRDMULH_VEC Q28 Q13 (Q0 :> LANE_H 1) 16 128 *) + 0x6f4741c2; (* arm_MLS_VEC Q2 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4f4081ae; (* arm_MUL_VEC Q14 Q13 (Q0 :> LANE_H 0) 16 128 *) + 0x6f4742f1; (* arm_MLS_VEC Q17 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d177; (* arm_SQRDMULH_VEC Q23 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x6e6286ab; (* arm_SUB_VEC Q11 Q21 Q2 16 128 *) + 0x6f47438e; (* arm_MLS_VEC Q14 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e71875c; (* arm_SUB_VEC Q28 Q26 Q17 16 128 *) + 0x4e718751; (* arm_ADD_VEC Q17 Q26 Q17 16 128 *) + 0x4e6286a2; (* arm_ADD_VEC Q2 Q21 Q2 16 128 *) + 0x6e6e87ad; (* arm_SUB_VEC Q13 Q29 Q14 16 128 *) + 0x4e6e87ae; (* arm_ADD_VEC Q14 Q29 Q14 16 128 *) + 0x6f4742ef; (* arm_MLS_VEC Q15 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d9b7; (* arm_SQRDMULH_VEC Q23 Q13 (Q0 :> LANE_H 5) 16 128 *) + 0x4f4089ad; (* arm_MUL_VEC Q13 Q13 (Q0 :> LANE_H 4) 16 128 *) + 0x4f70d1d5; (* arm_SQRDMULH_VEC Q21 Q14 (Q0 :> LANE_H 3) 16 128 *) + 0x6e6f869a; (* arm_SUB_VEC Q26 Q20 Q15 16 128 *) + 0x4e6f868f; (* arm_ADD_VEC Q15 Q20 Q15 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50db57; (* arm_SQRDMULH_VEC Q23 Q26 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408b5a; (* arm_MUL_VEC Q26 Q26 (Q0 :> LANE_H 4) 16 128 *) + 0x4f6081ce; (* arm_MUL_VEC Q14 Q14 (Q0 :> LANE_H 2) 16 128 *) + 0x6e6d857d; (* arm_SUB_VEC Q29 Q11 Q13 16 128 *) + 0x4e6d856b; (* arm_ADD_VEC Q11 Q11 Q13 16 128 *) + 0x6f4742fa; (* arm_MLS_VEC Q26 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f70d1f7; (* arm_SQRDMULH_VEC Q23 Q15 (Q0 :> LANE_H 3) 16 128 *) + 0x4f6081ed; (* arm_MUL_VEC Q13 Q15 (Q0 :> LANE_H 2) 16 128 *) + 0x6f4742ae; (* arm_MLS_VEC Q14 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7a878f; (* arm_SUB_VEC Q15 Q28 Q26 16 128 *) + 0x4e7a879c; (* arm_ADD_VEC Q28 Q28 Q26 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6e8457; (* arm_SUB_VEC Q23 Q2 Q14 16 128 *) + 0x4e6e844e; (* arm_ADD_VEC Q14 Q2 Q14 16 128 *) + 0x4f71d382; (* arm_SQRDMULH_VEC Q2 Q28 (Q1 :> LANE_H 3) 16 128 *) + 0x6e6d8635; (* arm_SUB_VEC Q21 Q17 Q13 16 128 *) + 0x4e6d8631; (* arm_ADD_VEC Q17 Q17 Q13 16 128 *) + 0x4f61839c; (* arm_MUL_VEC Q28 Q28 (Q1 :> LANE_H 2) 16 128 *) + 0x4f51d2ad; (* arm_SQRDMULH_VEC Q13 Q21 (Q1 :> LANE_H 1) 16 128 *) + 0x4f70da3a; (* arm_SQRDMULH_VEC Q26 Q17 (Q0 :> LANE_H 7) 16 128 *) + 0x4f608a31; (* arm_MUL_VEC Q17 Q17 (Q0 :> LANE_H 6) 16 128 *) + 0x4f4182b5; (* arm_MUL_VEC Q21 Q21 (Q1 :> LANE_H 0) 16 128 *) + 0x6f47405c; (* arm_MLS_VEC Q28 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d9e2; (* arm_SQRDMULH_VEC Q2 Q15 (Q1 :> LANE_H 5) 16 128 *) + 0x6f474351; (* arm_MLS_VEC Q17 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6f4741b5; (* arm_MLS_VEC Q21 Q13 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7c856d; (* arm_SUB_VEC Q13 Q11 Q28 16 128 *) + 0x4e7c857c; (* arm_ADD_VEC Q28 Q11 Q28 16 128 *) + 0x6e7185cb; (* arm_SUB_VEC Q11 Q14 Q17 16 128 *) + 0x4f4189ef; (* arm_MUL_VEC Q15 Q15 (Q1 :> LANE_H 4) 16 128 *) + 0x4e7185ce; (* arm_ADD_VEC Q14 Q14 Q17 16 128 *) + 0x6e7586f1; (* arm_SUB_VEC Q17 Q23 Q21 16 128 *) + 0x4e7586f7; (* arm_ADD_VEC Q23 Q23 Q21 16 128 *) + 0x6f47404f; (* arm_MLS_VEC Q15 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x3c81040e; (* arm_STR Q14 X0 (Postimmediate_Offset (word 16)) *) + 0x3dc00015; (* arm_LDR Q21 X0 (Immediate_Offset (word 0)) *) + 0x6e6f87ae; (* arm_SUB_VEC Q14 Q29 Q15 16 128 *) + 0x4e6f87a2; (* arm_ADD_VEC Q2 Q29 Q15 16 128 *) + 0x3d800c0b; (* arm_STR Q11 X0 (Immediate_Offset (word 48)) *) + 0x3dc0101a; (* arm_LDR Q26 X0 (Immediate_Offset (word 64)) *) + 0x3d801c17; (* arm_STR Q23 X0 (Immediate_Offset (word 112)) *) + 0x3dc0201d; (* arm_LDR Q29 X0 (Immediate_Offset (word 128)) *) + 0x3d802c11; (* arm_STR Q17 X0 (Immediate_Offset (word 176)) *) + 0x3dc03014; (* arm_LDR Q20 X0 (Immediate_Offset (word 192)) *) + 0x3d803c1c; (* arm_STR Q28 X0 (Immediate_Offset (word 240)) *) + 0x3dc04017; (* arm_LDR Q23 X0 (Immediate_Offset (word 256)) *) + 0x3d804c0d; (* arm_STR Q13 X0 (Immediate_Offset (word 304)) *) + 0x3dc05011; (* arm_LDR Q17 X0 (Immediate_Offset (word 320)) *) + 0x3d805c02; (* arm_STR Q2 X0 (Immediate_Offset (word 368)) *) + 0x4f4082e2; (* arm_MUL_VEC Q2 Q23 (Q0 :> LANE_H 0) 16 128 *) + 0x3d806c0e; (* arm_STR Q14 X0 (Immediate_Offset (word 432)) *) + 0x3dc0700b; (* arm_LDR Q11 X0 (Immediate_Offset (word 448)) *) + 0x3dc0600d; (* arm_LDR Q13 X0 (Immediate_Offset (word 384)) *) + 0x4f40816f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff664; (* arm_CBNZ X4 (word 2096844) *) + 0x4f50d17b; (* arm_SQRDMULH_VEC Q27 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x4f4081a8; (* arm_MUL_VEC Q8 Q13 (Q0 :> LANE_H 0) 16 128 *) + 0x4f50d1b6; (* arm_SQRDMULH_VEC Q22 Q13 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40822b; (* arm_MUL_VEC Q11 Q17 (Q0 :> LANE_H 0) 16 128 *) + 0x6f47436f; (* arm_MLS_VEC Q15 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d23c; (* arm_SQRDMULH_VEC Q28 Q17 (Q0 :> LANE_H 1) 16 128 *) + 0x6f4742c8; (* arm_MLS_VEC Q8 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d2e5; (* arm_SQRDMULH_VEC Q5 Q23 (Q0 :> LANE_H 1) 16 128 *) + 0x4e6f8690; (* arm_ADD_VEC Q16 Q20 Q15 16 128 *) + 0x6f47438b; (* arm_MLS_VEC Q11 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6887a6; (* arm_SUB_VEC Q6 Q29 Q8 16 128 *) + 0x4f70d211; (* arm_SQRDMULH_VEC Q17 Q16 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608217; (* arm_MUL_VEC Q23 Q16 (Q0 :> LANE_H 2) 16 128 *) + 0x4f4088cd; (* arm_MUL_VEC Q13 Q6 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d8dc; (* arm_SQRDMULH_VEC Q28 Q6 (Q0 :> LANE_H 5) 16 128 *) + 0x6f4740a2; (* arm_MLS_VEC Q2 Q5 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474237; (* arm_MLS_VEC Q23 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x4e6b875b; (* arm_ADD_VEC Q27 Q26 Q11 16 128 *) + 0x6f47438d; (* arm_MLS_VEC Q13 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6286a9; (* arm_SUB_VEC Q9 Q21 Q2 16 128 *) + 0x4e6887b2; (* arm_ADD_VEC Q18 Q29 Q8 16 128 *) + 0x6e77876e; (* arm_SUB_VEC Q14 Q27 Q23 16 128 *) + 0x4e6d853d; (* arm_ADD_VEC Q29 Q9 Q13 16 128 *) + 0x6e6d853e; (* arm_SUB_VEC Q30 Q9 Q13 16 128 *) + 0x4f4181dc; (* arm_MUL_VEC Q28 Q14 (Q1 :> LANE_H 0) 16 128 *) + 0x4f70d249; (* arm_SQRDMULH_VEC Q9 Q18 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608256; (* arm_MUL_VEC Q22 Q18 (Q0 :> LANE_H 2) 16 128 *) + 0x4f51d1d1; (* arm_SQRDMULH_VEC Q17 Q14 (Q1 :> LANE_H 1) 16 128 *) + 0x6e6f868e; (* arm_SUB_VEC Q14 Q20 Q15 16 128 *) + 0x4e6286b8; (* arm_ADD_VEC Q24 Q21 Q2 16 128 *) + 0x6f474136; (* arm_MLS_VEC Q22 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d9c9; (* arm_SQRDMULH_VEC Q9 Q14 (Q0 :> LANE_H 5) 16 128 *) + 0x4f4089cd; (* arm_MUL_VEC Q13 Q14 (Q0 :> LANE_H 4) 16 128 *) + 0x6f47423c; (* arm_MLS_VEC Q28 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6e768705; (* arm_SUB_VEC Q5 Q24 Q22 16 128 *) + 0x6e6b8742; (* arm_SUB_VEC Q2 Q26 Q11 16 128 *) + 0x6f47412d; (* arm_MLS_VEC Q13 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7c84b1; (* arm_SUB_VEC Q17 Q5 Q28 16 128 *) + 0x4e7c84ae; (* arm_ADD_VEC Q14 Q5 Q28 16 128 *) + 0x4e77877c; (* arm_ADD_VEC Q28 Q27 Q23 16 128 *) + 0x3d803011; (* arm_STR Q17 X0 (Immediate_Offset (word 192)) *) + 0x4e6d8451; (* arm_ADD_VEC Q17 Q2 Q13 16 128 *) + 0x3d80200e; (* arm_STR Q14 X0 (Immediate_Offset (word 128)) *) + 0x6e6d844d; (* arm_SUB_VEC Q13 Q2 Q13 16 128 *) + 0x4f71d23a; (* arm_SQRDMULH_VEC Q26 Q17 (Q1 :> LANE_H 3) 16 128 *) + 0x4f61822f; (* arm_MUL_VEC Q15 Q17 (Q1 :> LANE_H 2) 16 128 *) + 0x4e768705; (* arm_ADD_VEC Q5 Q24 Q22 16 128 *) + 0x4f51d9b7; (* arm_SQRDMULH_VEC Q23 Q13 (Q1 :> LANE_H 5) 16 128 *) + 0x4f4189ad; (* arm_MUL_VEC Q13 Q13 (Q1 :> LANE_H 4) 16 128 *) + 0x6f47434f; (* arm_MLS_VEC Q15 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4f70db8e; (* arm_SQRDMULH_VEC Q14 Q28 (Q0 :> LANE_H 7) 16 128 *) + 0x4f608b91; (* arm_MUL_VEC Q17 Q28 (Q0 :> LANE_H 6) 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4e6f87a6; (* arm_ADD_VEC Q6 Q29 Q15 16 128 *) + 0x6e6f87bc; (* arm_SUB_VEC Q28 Q29 Q15 16 128 *) + 0x6f4741d1; (* arm_MLS_VEC Q17 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x3d804006; (* arm_STR Q6 X0 (Immediate_Offset (word 256)) *) + 0x4e6d87ce; (* arm_ADD_VEC Q14 Q30 Q13 16 128 *) + 0x3d80501c; (* arm_STR Q28 X0 (Immediate_Offset (word 320)) *) + 0x6e6d87d7; (* arm_SUB_VEC Q23 Q30 Q13 16 128 *) + 0x3d80600e; (* arm_STR Q14 X0 (Immediate_Offset (word 384)) *) + 0x4e7184a3; (* arm_ADD_VEC Q3 Q5 Q17 16 128 *) + 0x3d807017; (* arm_STR Q23 X0 (Immediate_Offset (word 448)) *) + 0x6e7184bc; (* arm_SUB_VEC Q28 Q5 Q17 16 128 *) + 0x3c810403; (* arm_STR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x3d800c1c; (* arm_STR Q28 X0 (Immediate_Offset (word 48)) *) + 0xaa0303e0; (* arm_MOV X0 X3 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3cc10422; (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *) + 0x3dc00c0e; (* arm_LDR Q14 X0 (Immediate_Offset (word 48)) *) + 0x3dc00801; (* arm_LDR Q1 X0 (Immediate_Offset (word 32)) *) + 0x4f4281d1; (* arm_MUL_VEC Q17 Q14 (Q2 :> LANE_H 0) 16 128 *) + 0x4f52d1ce; (* arm_SQRDMULH_VEC Q14 Q14 (Q2 :> LANE_H 1) 16 128 *) + 0x4f428028; (* arm_MUL_VEC Q8 Q1 (Q2 :> LANE_H 0) 16 128 *) + 0x3dc00417; (* arm_LDR Q23 X0 (Immediate_Offset (word 16)) *) + 0x6f4741d1; (* arm_MLS_VEC Q17 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4f52d021; (* arm_SQRDMULH_VEC Q1 Q1 (Q2 :> LANE_H 1) 16 128 *) + 0x3cc6045e; (* arm_LDR Q30 X2 (Postimmediate_Offset (word 96)) *) + 0x6e7186ee; (* arm_SUB_VEC Q14 Q23 Q17 16 128 *) + 0x4e7186ea; (* arm_ADD_VEC Q10 Q23 Q17 16 128 *) + 0x6f474028; (* arm_MLS_VEC Q8 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x4f52d9c1; (* arm_SQRDMULH_VEC Q1 Q14 (Q2 :> LANE_H 5) 16 128 *) + 0x4f4289ce; (* arm_MUL_VEC Q14 Q14 (Q2 :> LANE_H 4) 16 128 *) + 0x3dc0001b; (* arm_LDR Q27 X0 (Immediate_Offset (word 0)) *) + 0x4f628157; (* arm_MUL_VEC Q23 Q10 (Q2 :> LANE_H 2) 16 128 *) + 0x6f47402e; (* arm_MLS_VEC Q14 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x6e688761; (* arm_SUB_VEC Q1 Q27 Q8 16 128 *) + 0x3cdc005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x4e6e842c; (* arm_ADD_VEC Q12 Q1 Q14 16 128 *) + 0x4f72d155; (* arm_SQRDMULH_VEC Q21 Q10 (Q2 :> LANE_H 3) 16 128 *) + 0x6e6e8425; (* arm_SUB_VEC Q5 Q1 Q14 16 128 *) + 0x3cdf004d; (* arm_LDR Q13 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x3dc01c13; (* arm_LDR Q19 X0 (Immediate_Offset (word 112)) *) + 0x3cc10421; (* arm_LDR Q1 X1 (Postimmediate_Offset (word 16)) *) + 0x6f4742b7; (* arm_MLS_VEC Q23 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x4e688766; (* arm_ADD_VEC Q6 Q27 Q8 16 128 *) + 0x4f418264; (* arm_MUL_VEC Q4 Q19 (Q1 :> LANE_H 0) 16 128 *) + 0x4f51d273; (* arm_SQRDMULH_VEC Q19 Q19 (Q1 :> LANE_H 1) 16 128 *) + 0x3dc01419; (* arm_LDR Q25 X0 (Immediate_Offset (word 80)) *) + 0x4e85298b; (* arm_TRN1 Q11 Q12 Q5 32 128 *) + 0x6f474264; (* arm_MLS_VEC Q4 Q19 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7784c0; (* arm_SUB_VEC Q0 Q6 Q23 16 128 *) + 0x3cdb0050; (* arm_LDR Q16 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x6e648738; (* arm_SUB_VEC Q24 Q25 Q4 16 128 *) + 0x4e7784da; (* arm_ADD_VEC Q26 Q6 Q23 16 128 *) + 0x4e648724; (* arm_ADD_VEC Q4 Q25 Q4 16 128 *) + 0x4f51db17; (* arm_SQRDMULH_VEC Q23 Q24 (Q1 :> LANE_H 5) 16 128 *) + 0x4f418b14; (* arm_MUL_VEC Q20 Q24 (Q1 :> LANE_H 4) 16 128 *) + 0x4f71d095; (* arm_SQRDMULH_VEC Q21 Q4 (Q1 :> LANE_H 3) 16 128 *) + 0x4e802b5b; (* arm_TRN1 Q27 Q26 Q0 32 128 *) + 0x4e856999; (* arm_TRN2 Q25 Q12 Q5 32 128 *) + 0x6f4742f4; (* arm_MLS_VEC Q20 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f618097; (* arm_MUL_VEC Q23 Q4 (Q1 :> LANE_H 2) 16 128 *) + 0x3dc0181f; (* arm_LDR Q31 X0 (Immediate_Offset (word 96)) *) + 0x4e806b4c; (* arm_TRN2 Q12 Q26 Q0 32 128 *) + 0x4ecb6b73; (* arm_TRN2 Q19 Q27 Q11 64 128 *) + 0x4f4183e8; (* arm_MUL_VEC Q8 Q31 (Q1 :> LANE_H 0) 16 128 *) + 0x4f51d3e1; (* arm_SQRDMULH_VEC Q1 Q31 (Q1 :> LANE_H 1) 16 128 *) + 0x4ed9698a; (* arm_TRN2 Q10 Q12 Q25 64 128 *) + 0x6e70b660; (* arm_SQRDMULH_VEC Q0 Q19 Q16 16 128 *) + 0x6e70b552; (* arm_SQRDMULH_VEC Q18 Q10 Q16 16 128 *) + 0x4ecb2b70; (* arm_TRN1 Q16 Q27 Q11 64 128 *) + 0x4ed92982; (* arm_TRN1 Q2 Q12 Q25 64 128 *) + 0x4e7e9d4c; (* arm_MUL_VEC Q12 Q10 Q30 16 128 *) + 0x4e7e9e6a; (* arm_MUL_VEC Q10 Q19 Q30 16 128 *) + 0x6f474028; (* arm_MLS_VEC Q8 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x3cdd004e; (* arm_LDR Q14 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x6f47400a; (* arm_MLS_VEC Q10 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47424c; (* arm_MLS_VEC Q12 Q18 (Q7 :> LANE_H 0) 16 128 *) + 0x3dc0101b; (* arm_LDR Q27 X0 (Immediate_Offset (word 64)) *) + 0x4e6a8609; (* arm_ADD_VEC Q9 Q16 Q10 16 128 *) + 0x6e6a8610; (* arm_SUB_VEC Q16 Q16 Q10 16 128 *) + 0x6e6c8459; (* arm_SUB_VEC Q25 Q2 Q12 16 128 *) + 0x4e6c845e; (* arm_ADD_VEC Q30 Q2 Q12 16 128 *) + 0x6e68876a; (* arm_SUB_VEC Q10 Q27 Q8 16 128 *) + 0x6e6db736; (* arm_SQRDMULH_VEC Q22 Q25 Q13 16 128 *) + 0x6e6eb7cd; (* arm_SQRDMULH_VEC Q13 Q30 Q14 16 128 *) + 0x3cde004e; (* arm_LDR Q14 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4e74854c; (* arm_ADD_VEC Q12 Q10 Q20 16 128 *) + 0x4e7c9fc5; (* arm_MUL_VEC Q5 Q30 Q28 16 128 *) + 0x4e6e9f3a; (* arm_MUL_VEC Q26 Q25 Q14 16 128 *) + 0x3cc6045e; (* arm_LDR Q30 X2 (Postimmediate_Offset (word 96)) *) + 0x6f4741a5; (* arm_MLS_VEC Q5 Q13 (Q7 :> LANE_H 0) 16 128 *) + 0x6f4742da; (* arm_MLS_VEC Q26 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x3cdc005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x4e65852d; (* arm_ADD_VEC Q13 Q9 Q5 16 128 *) + 0x6e658529; (* arm_SUB_VEC Q9 Q9 Q5 16 128 *) + 0x6e7a8605; (* arm_SUB_VEC Q5 Q16 Q26 16 128 *) + 0x4e7a8619; (* arm_ADD_VEC Q25 Q16 Q26 16 128 *) + 0x4e8929af; (* arm_TRN1 Q15 Q13 Q9 32 128 *) + 0x4e8969a3; (* arm_TRN2 Q3 Q13 Q9 32 128 *) + 0x4e852b2d; (* arm_TRN1 Q13 Q25 Q5 32 128 *) + 0x4e856b3f; (* arm_TRN2 Q31 Q25 Q5 32 128 *) + 0x6e748545; (* arm_SUB_VEC Q5 Q10 Q20 16 128 *) + 0x4ecd29e2; (* arm_TRN1 Q2 Q15 Q13 64 128 *) + 0x4ecd69e9; (* arm_TRN2 Q9 Q15 Q13 64 128 *) + 0x3c840402; (* arm_STR Q2 X0 (Postimmediate_Offset (word 64)) *) + 0x4edf287d; (* arm_TRN1 Q29 Q3 Q31 64 128 *) + 0x3c9e0009; (* arm_STR Q9 X0 (Immediate_Offset (word 18446744073709551584)) *) + 0x4edf6869; (* arm_TRN2 Q9 Q3 Q31 64 128 *) + 0x3c9d001d; (* arm_STR Q29 X0 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cdf004d; (* arm_LDR Q13 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0x3c9f0009; (* arm_STR Q9 X0 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff704; (* arm_CBNZ X4 (word 2096864) *) + 0x6f4742b7; (* arm_MLS_VEC Q23 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x4e68876e; (* arm_ADD_VEC Q14 Q27 Q8 16 128 *) + 0x3cde0041; (* arm_LDR Q1 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4e7785d1; (* arm_ADD_VEC Q17 Q14 Q23 16 128 *) + 0x6e7785d7; (* arm_SUB_VEC Q23 Q14 Q23 16 128 *) + 0x4e85698b; (* arm_TRN2 Q11 Q12 Q5 32 128 *) + 0x4e85299b; (* arm_TRN1 Q27 Q12 Q5 32 128 *) + 0x4e976a22; (* arm_TRN2 Q2 Q17 Q23 32 128 *) + 0x3cdb005a; (* arm_LDR Q26 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x4ecb684e; (* arm_TRN2 Q14 Q2 Q11 64 128 *) + 0x4e972a2f; (* arm_TRN1 Q15 Q17 Q23 32 128 *) + 0x4e7e9dc5; (* arm_MUL_VEC Q5 Q14 Q30 16 128 *) + 0x6e7ab5d7; (* arm_SQRDMULH_VEC Q23 Q14 Q26 16 128 *) + 0x4edb69f1; (* arm_TRN2 Q17 Q15 Q27 64 128 *) + 0x4ecb284e; (* arm_TRN1 Q14 Q2 Q11 64 128 *) + 0x4e7e9e35; (* arm_MUL_VEC Q21 Q17 Q30 16 128 *) + 0x6f4742e5; (* arm_MLS_VEC Q5 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7ab631; (* arm_SQRDMULH_VEC Q17 Q17 Q26 16 128 *) + 0x3cdd0042; (* arm_LDR Q2 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x6e6585d7; (* arm_SUB_VEC Q23 Q14 Q5 16 128 *) + 0x4e6585ce; (* arm_ADD_VEC Q14 Q14 Q5 16 128 *) + 0x6f474235; (* arm_MLS_VEC Q21 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x4e619ee1; (* arm_MUL_VEC Q1 Q23 Q1 16 128 *) + 0x6e6db6f1; (* arm_SQRDMULH_VEC Q17 Q23 Q13 16 128 *) + 0x4e7c9dd7; (* arm_MUL_VEC Q23 Q14 Q28 16 128 *) + 0x6e62b5ce; (* arm_SQRDMULH_VEC Q14 Q14 Q2 16 128 *) + 0x4edb29fc; (* arm_TRN1 Q28 Q15 Q27 64 128 *) + 0x6f474221; (* arm_MLS_VEC Q1 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6e75878b; (* arm_SUB_VEC Q11 Q28 Q21 16 128 *) + 0x6f4741d7; (* arm_MLS_VEC Q23 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4e758791; (* arm_ADD_VEC Q17 Q28 Q21 16 128 *) + 0x6e61856e; (* arm_SUB_VEC Q14 Q11 Q1 16 128 *) + 0x4e618561; (* arm_ADD_VEC Q1 Q11 Q1 16 128 *) + 0x6e77863c; (* arm_SUB_VEC Q28 Q17 Q23 16 128 *) + 0x4e778622; (* arm_ADD_VEC Q2 Q17 Q23 16 128 *) + 0x4e8e2837; (* arm_TRN1 Q23 Q1 Q14 32 128 *) + 0x4e8e682e; (* arm_TRN2 Q14 Q1 Q14 32 128 *) + 0x4e9c6851; (* arm_TRN2 Q17 Q2 Q28 32 128 *) + 0x4e9c285c; (* arm_TRN1 Q28 Q2 Q28 32 128 *) + 0x4ece6a21; (* arm_TRN2 Q1 Q17 Q14 64 128 *) + 0x4ece2a2e; (* arm_TRN1 Q14 Q17 Q14 64 128 *) + 0x3d800c01; (* arm_STR Q1 X0 (Immediate_Offset (word 48)) *) + 0x4ed76b81; (* arm_TRN2 Q1 Q28 Q23 64 128 *) + 0x3d80040e; (* arm_STR Q14 X0 (Immediate_Offset (word 16)) *) + 0x4ed72b8e; (* arm_TRN1 Q14 Q28 Q23 64 128 *) + 0x3d800801; (* arm_STR Q1 X0 (Immediate_Offset (word 32)) *) + 0x3c84040e; (* arm_STR Q14 X0 (Postimmediate_Offset (word 64)) *) + 0x6d4027e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d4237ec; (* arm_LDP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d433fee; (* arm_LDP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x910103ff; (* arm_ADD SP SP (rvalue (word 64)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; + +let MLKEM_NTT_EXEC = ARM_MK_EXEC_RULE mlkem_ntt_mc;; + +(* ------------------------------------------------------------------------- *) +(* Data tables that are assumed in the precondition. *) +(* ------------------------------------------------------------------------- *) + +let ntt_zetas_layer01234 = define + `ntt_zetas_layer01234:int list = + [-- &1600; -- &15749; -- &749; -- &7373; -- &40; -- &394; -- &687; -- &6762; + &630; &6201; -- &1432; -- &14095; &848; &8347; &0; &0; &1062; &10453; &296; + &2914; -- &882; -- &8682; &0; &0; -- &1410; -- &13879; &1339; &13180; &1476; + &14529; &0; &0; &193; &1900; -- &283; -- &2786; &56; &551; &0; &0; &797; + &7845; -- &1089; -- &10719; &1333; &13121; &0; &0; -- &543; -- &5345; + &1426; &14036; -- &1235; -- &12156; &0; &0; -- &69; -- &679; &535; &5266; + -- &447; -- &4400; &0; &0; &569; &5601; -- &936; -- &9213; -- &450; + -- &4429; &0; &0; -- &1583; -- &15582; -- &1355; -- &13338; &821; + &8081; &0; &0]`;; + +let ntt_zetas_layer56 = define +`ntt_zetas_layer56:int list = + [&289; &289; &331; &331; -- &76; -- &76; -- &1573; -- &1573; &2845; + &2845; &3258; &3258; -- &748; -- &748; -- &15483; -- &15483; &17; &17; + &583; &583; &1637; &1637; -- &1041; -- &1041; &167; &167; &5739; + &5739; &16113; &16113; -- &10247; -- &10247; -- &568; -- &568; + -- &680; -- &680; &723; &723; &1100; &1100; -- &5591; -- &5591; -- &6693; + -- &6693; &7117; &7117; &10828; &10828; &1197; &1197; -- &1025; + -- &1025; -- &1052; -- &1052; -- &1274; -- &1274; &11782; &11782; + -- &10089; -- &10089; -- &10355; -- &10355; -- &12540; -- &12540; &1409; + &1409; -- &48; -- &48; &756; &756; -- &314; -- &314; &13869; &13869; + -- &472; -- &472; &7441; &7441; -- &3091; -- &3091; -- &667; -- &667; + &233; &233; -- &1173; -- &1173; -- &279; -- &279; -- &6565; -- &6565; + &2293; &2293; -- &11546; -- &11546; -- &2746; -- &2746; &650; &650; + -- &1352; -- &1352; -- &816; -- &816; &632; &632; &6398; &6398; + -- &13308; -- &13308; -- &8032; -- &8032; &6221; &6221; -- &1626; + -- &1626; -- &540; -- &540; -- &1482; -- &1482; &1461; &1461; -- &16005; + -- &16005; -- &5315; -- &5315; -- &14588; -- &14588; &14381; &14381; + &1651; &1651; -- &1540; -- &1540; &952; &952; -- &642; -- &642; + &16251; &16251; -- &15159; -- &15159; &9371; &9371; -- &6319; + -- &6319; -- &464; -- &464; &33; &33; &1320; &1320; -- &1414; -- &1414; + -- &4567; -- &4567; &325; &325; &12993; &12993; -- &13918; -- &13918; + &939; &939; -- &892; -- &892; &733; &733; &268; &268; &9243; &9243; + -- &8780; -- &8780; &7215; &7215; &2638; &2638; -- &1021; -- &1021; + -- &941; -- &941; -- &992; -- &992; &641; &641; -- &10050; -- &10050; + -- &9262; -- &9262; -- &9764; -- &9764; &6309; &6309; -- &1010; -- &1010; + &1435; &1435; &807; &807; &452; &452; -- &9942; -- &9942; &14125; + &14125; &7943; &7943; &4449; &4449; &1584; &1584; -- &1292; -- &1292; + &375; &375; -- &1239; -- &1239; &15592; &15592; -- &12717; -- &12717; + &3691; &3691; -- &12196; -- &12196; -- &1031; -- &1031; -- &109; + -- &109; -- &780; -- &780; &1645; &1645; -- &10148; -- &10148; -- &1073; + -- &1073; -- &7678; -- &7678; &16192; &16192; &1438; &1438; -- &461; + -- &461; &1534; &1534; -- &927; -- &927; &14155; &14155; -- &4538; + -- &4538; &15099; &15099; -- &9125; -- &9125; &1063; &1063; -- &556; + -- &556; -- &1230; -- &1230; -- &863; -- &863; &10463; &10463; -- &5473; + -- &5473; -- &12107; -- &12107; -- &8495; -- &8495; &319; &319; &757; + &757; &561; &561; -- &735; -- &735; &3140; &3140; &7451; &7451; &5522; + &5522; -- &7235; -- &7235; -- &682; -- &682; -- &712; -- &712; &1481; + &1481; &648; &648; -- &6713; -- &6713; -- &7008; -- &7008; &14578; + &14578; &6378; &6378; -- &525; -- &525; &403; &403; &1143; &1143; + -- &554; -- &554; -- &5168; -- &5168; &3967; &3967; &11251; &11251; + -- &5453; -- &5453; &1092; &1092; &1026; &1026; -- &1179; -- &1179; &886; + &886; &10749; &10749; &10099; &10099; -- &11605; -- &11605; &8721; + &8721; -- &855; -- &855; -- &219; -- &219; &1227; &1227; &910; &910; + -- &8416; -- &8416; -- &2156; -- &2156; &12078; &12078; &8957; &8957; + -- &1607; -- &1607; -- &1455; -- &1455; -- &1219; -- &1219; &885; + &885; -- &15818; -- &15818; -- &14322; -- &14322; -- &11999; + -- &11999; &8711; &8711; &1212; &1212; &1029; &1029; -- &394; -- &394; + -- &1175; -- &1175; &11930; &11930; &10129; &10129; -- &3878; -- &3878; + -- &11566; -- &11566]`;; + +let ntt_constants = define + `ntt_constants z_01234 z_56 s <=> + (!i. i < 80 + ==> read(memory :> bytes16(word_add z_01234 (word(2 * i)))) s = + iword(EL i ntt_zetas_layer01234)) /\ + (!i. i < 384 + ==> read(memory :> bytes16(word_add z_56 (word(2 * i)))) s = + iword(EL i ntt_zetas_layer56))`;; + +(* ------------------------------------------------------------------------- *) +(* Some convenient proof tools. *) +(* ------------------------------------------------------------------------- *) + +let READ_MEMORY_MERGE_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_SPLIT] THENC + LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let READ_MEMORY_SPLIT_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_UNSPLIT] THENC + BINOP_CONV(LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV)))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let SIMD_SIMPLIFY_CONV = + TOP_DEPTH_CONV + (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC + DEPTH_CONV WORD_NUM_RED_CONV THENC + REWRITE_CONV[GSYM barred; GSYM barmul];; + +let SIMD_SIMPLIFY_TAC = + let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in + TRY(FIRST_X_ASSUM + (ASSUME_TAC o + CONV_RULE(RAND_CONV SIMD_SIMPLIFY_CONV) o + check (simdable o concl)));; + +let MEMORY_128_FROM_16_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(16*i) in + READ_MEMORY_MERGE_CONV 3 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + +(* ------------------------------------------------------------------------- *) +(* Correctness proof. *) +(* ------------------------------------------------------------------------- *) + +let MLKEM_NTT_CORRECT = prove + (`!a z_01234 z_56 x pc. + ALL (nonoverlapping (a,512)) + [(word pc,0x504); (z_01234,160); (z_56,768)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_ntt_mc /\ + read PC s = word (pc + 0x14) /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + ntt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = word(pc + 0x4ec) /\ + ((!i. i < 256 ==> abs(ival(x i)) <= &8191) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == forward_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &23594)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [Q8; Q9; Q10; Q11; Q12; Q13; Q14; Q15] ,, + MAYCHANGE [memory :> bytes(a,512)])`, + MAP_EVERY X_GEN_TAC + [`a:int64`; `z_01234:int64`; `z_56:int64`; `x:num->int16`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Manually expand the cases in the hypotheses ***) + + REWRITE_TAC[ntt_constants] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV + (EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV)))) THEN + REWRITE_TAC[ntt_zetas_layer01234; ntt_zetas_layer56] THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN + CONV_TAC(ONCE_DEPTH_CONV WORD_IWORD_CONV) THEN REWRITE_TAC[WORD_ADD_0] THEN + ENSURES_INIT_TAC "s0" THEN + + (*** Manually restructure to match the 128-bit loads. It would be nicer + *** if the simulation machinery did this automatically. + ***) + + MEMORY_128_FROM_16_TAC "a" 32 THEN + MEMORY_128_FROM_16_TAC "z_01234" 10 THEN + MEMORY_128_FROM_16_TAC "z_56" 48 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes16 a) s = x`] THEN + REPEAT STRIP_TAC THEN + + (*** Ghost up initial contents of Q7 since it isn't fully initialized ***) + + ABBREV_TAC `v7_init:int128 = read Q7 s0` THEN + + (*** Simulate all the way to the end, in effect unrolling loops ***) + + MAP_EVERY (fun n -> ARM_STEPS_TAC MLKEM_NTT_EXEC [n] THEN SIMD_SIMPLIFY_TAC) + (1--904) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Reverse the restructuring by splitting the 128-bit words up ***) + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE SIMD_SIMPLIFY_CONV o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (*** Turn the conclusion into an explicit conjunction and split it up ***) + + DISCH_TAC THEN + CONV_TAC(ONCE_DEPTH_CONV let_CONV) THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + GEN_REWRITE_TAC (BINDER_CONV o RAND_CONV) [GSYM I_THM] THEN + CONV_TAC(EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[I_THM; WORD_ADD_0] THEN DISCARD_STATE_TAC "s904" THEN + REPEAT(W(fun (asl,w) -> + if length(conjuncts w) > 3 then CONJ_TAC else NO_TAC)) THEN + + (*** Get congruences and bounds for the result digits and finish ***) + + FIRST_X_ASSUM(MP_TAC o CONV_RULE EXPAND_CASES_CONV) THEN + POP_ASSUM_LIST(K ALL_TAC) THEN + DISCH_THEN(fun aboth -> + W(MP_TAC o GEN_CONGBOUND_RULE (CONJUNCTS aboth) o + rand o lhand o rator o lhand o snd)) THEN + (MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + CONV_TAC(ONCE_DEPTH_CONV FORWARD_NTT_CONV) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM] THEN CONV_TAC INT_REM_DOWN_CONV THEN + REWRITE_TAC[INT_REM_EQ] THEN + REWRITE_TAC[REAL_INT_CONGRUENCE; INT_OF_NUM_EQ; ARITH_EQ] THEN + REWRITE_TAC[GSYM REAL_OF_INT_CLAUSES] THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REAL_INTEGER_TAC; + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]));; + +(*** Subroutine form, somewhat messy elaboration of the usual wrapper ***) + +let MLKEM_NTT_SUBROUTINE_CORRECT = prove + (`!a z_01234 z_56 x pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [(a,512); (word_sub stackpointer (word 64),64)] + [(word pc,0x504); (z_01234,160); (z_56,768)] /\ + nonoverlapping (a,512) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_ntt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + ntt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = returnaddress /\ + ((!i. i < 256 ==> abs(ival(x i)) <= &8191) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == forward_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &23594)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,512); + memory :> bytes(word_sub stackpointer (word 64),64)])`, + let TWEAK_CONV = + REWRITE_CONV[ntt_constants] THENC + ONCE_DEPTH_CONV let_CONV THENC + ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV THENC + PURE_REWRITE_CONV [WORD_ADD_0] in + REWRITE_TAC[fst MLKEM_NTT_EXEC] THEN + CONV_TAC TWEAK_CONV THEN + ARM_ADD_RETURN_STACK_TAC ~pre_post_nsteps:(5,5) MLKEM_NTT_EXEC + (REWRITE_RULE[fst MLKEM_NTT_EXEC] (CONV_RULE TWEAK_CONV MLKEM_NTT_CORRECT)) + `[D8; D9; D10; D11; D12; D13; D14; D15]` 64);;