diff --git a/core/crypto/aead/low_level.odin b/core/crypto/aead/low_level.odin index a7ecef801d4..e90d07bc628 100644 --- a/core/crypto/aead/low_level.odin +++ b/core/crypto/aead/low_level.odin @@ -4,6 +4,7 @@ import "core:crypto/aegis" import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" +import "core:crypto/deoxysii" import "core:reflect" // Implementation is an AEAD implementation. Most callers will not need @@ -30,6 +31,7 @@ Algorithm :: enum { AEGIS_128L_256, // AEGIS-128L (256-bit tag) AEGIS_256, AEGIS_256_256, // AEGIS-256 (256-bit tag) + DEOXYS_II_256, } // ALGORITM_NAMES is the Algorithm to algorithm name string. @@ -44,6 +46,7 @@ ALGORITHM_NAMES := [Algorithm]string { .AEGIS_128L_256 = "AEGIS-128L-256", .AEGIS_256 = "AEGIS-256", .AEGIS_256_256 = "AEGIS-256-256", + .DEOXYS_II_256 = "Deoxys-II-256", } // TAG_SIZES is the Algorithm to tag size in bytes. @@ -58,6 +61,7 @@ TAG_SIZES := [Algorithm]int { .AEGIS_128L_256 = aegis.TAG_SIZE_256, .AEGIS_256 = aegis.TAG_SIZE_128, .AEGIS_256_256 = aegis.TAG_SIZE_256, + .DEOXYS_II_256 = deoxysii.TAG_SIZE, } // KEY_SIZES is the Algorithm to key size in bytes. @@ -72,6 +76,7 @@ KEY_SIZES := [Algorithm]int { .AEGIS_128L_256 = aegis.KEY_SIZE_128L, .AEGIS_256 = aegis.KEY_SIZE_256, .AEGIS_256_256 = aegis.KEY_SIZE_256, + .DEOXYS_II_256 = deoxysii.KEY_SIZE, } // IV_SIZES is the Algorithm to initialization vector size in bytes. @@ -88,6 +93,7 @@ IV_SIZES := [Algorithm]int { .AEGIS_128L_256 = aegis.IV_SIZE_128L, .AEGIS_256 = aegis.IV_SIZE_256, .AEGIS_256_256 = aegis.IV_SIZE_256, + .DEOXYS_II_256 = deoxysii.IV_SIZE, } // Context is a concrete instantiation of a specific AEAD algorithm. @@ -97,6 +103,7 @@ Context :: struct { aes.Context_GCM, chacha20poly1305.Context, aegis.Context, + deoxysii.Context, }, } @@ -112,6 +119,7 @@ _IMPL_IDS := [Algorithm]typeid { .AEGIS_128L_256 = typeid_of(aegis.Context), .AEGIS_256 = typeid_of(aegis.Context), .AEGIS_256_256 = typeid_of(aegis.Context), + .DEOXYS_II_256 = typeid_of(deoxysii.Context), } // init initializes a Context with a specific AEAD Algorithm. @@ -142,6 +150,9 @@ init :: proc(ctx: ^Context, algorithm: Algorithm, key: []byte, impl: Implementat case .AEGIS_128L, .AEGIS_128L_256, .AEGIS_256, .AEGIS_256_256: impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION aegis.init(&ctx._impl.(aegis.Context), key, impl_) + case .DEOXYS_II_256: + impl_ := impl != nil ? impl.(aes.Implementation) : aes.DEFAULT_IMPLEMENTATION + deoxysii.init(&ctx._impl.(deoxysii.Context), key, impl_) case .Invalid: panic("crypto/aead: uninitialized algorithm") case: @@ -167,6 +178,8 @@ seal_ctx :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { chacha20poly1305.seal(&impl, dst, tag, iv, aad, plaintext) case aegis.Context: aegis.seal(&impl, dst, tag, iv, aad, plaintext) + case deoxysii.Context: + deoxysii.seal(&impl, dst, tag, iv, aad, plaintext) case: panic("crypto/aead: uninitialized algorithm") } @@ -191,6 +204,8 @@ open_ctx :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { return chacha20poly1305.open(&impl, dst, iv, aad, ciphertext, tag) case aegis.Context: return aegis.open(&impl, dst, iv, aad, ciphertext, tag) + case deoxysii.Context: + return deoxysii.open(&impl, dst, iv, aad, ciphertext, tag) case: panic("crypto/aead: uninitialized algorithm") } @@ -206,6 +221,8 @@ reset :: proc(ctx: ^Context) { chacha20poly1305.reset(&impl) case aegis.Context: aegis.reset(&impl) + case deoxysii.Context: + deoxysii.reset(&impl) case: // Calling reset repeatedly is fine. } diff --git a/core/crypto/deoxysii/deoxysii.odin b/core/crypto/deoxysii/deoxysii.odin new file mode 100644 index 00000000000..e890264d767 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii.odin @@ -0,0 +1,292 @@ +/* +package deoxysii implements the Deoxys-II-256 Authenticated Encryption +with Additional Data algorithm. + +- [[ https://sites.google.com/view/deoxyscipher ]] +- [[ https://thomaspeyrin.github.io/web/assets/docs/papers/Jean-etal-JoC2021.pdf ]] +*/ +package deoxysii + +import "base:intrinsics" +import "core:bytes" +import "core:crypto/aes" +import "core:mem" +import "core:simd" + +// KEY_SIZE is the Deoxys-II-256 key size in bytes. +KEY_SIZE :: 32 +// IV_SIZE iss the Deoxys-II-256 IV size in bytes. +IV_SIZE :: 15 // 120-bits +// TAG_SIZE is the Deoxys-II-256 tag size in bytes. +TAG_SIZE :: 16 + +@(private) +PREFIX_AD_BLOCK :: 0b0010 +@(private) +PREFIX_AD_FINAL :: 0b0110 +@(private) +PREFIX_MSG_BLOCK :: 0b0000 +@(private) +PREFIX_MSG_FINAL :: 0b0100 +@(private) +PREFIX_TAG :: 0b0001 +@(private) +PREFIX_SHIFT :: 4 + +@(private) +BC_ROUNDS :: 16 + +@(private = "file") +_LFSR2_MASK :: simd.u8x16{ + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, +} +@(private = "file") +_LFSR3_MASK :: simd.u8x16{ + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, + 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, +} +@(private = "file") +_LFSR_SH1 :: _LFSR2_MASK +@(private = "file") +_LFSR_SH5 :: simd.u8x16{ + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, + 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, +} +@(private = "file") +_LFSR_SH7 :: simd.u8x16{ + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, + 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, +} +@(private = "file", rodata) +_RCONS := []byte { + 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, + 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, + 0x72, +} + +// Context is a keyed Deoxys-II-256 instance. +Context :: struct { + _subkeys: [BC_ROUNDS+1][16]byte, + _impl: aes.Implementation, + _is_initialized: bool, +} + +@(private) +_validate_common_slice_sizes :: proc (ctx: ^Context, tag, iv, aad, text: []byte) { + if len(tag) != TAG_SIZE { + panic("crypto/deoxysii: invalid tag size") + } + + if len(iv) != IV_SIZE { + panic("crypto/deoxysii: invalid IV size") + } + + #assert(size_of(int) == 8 || size_of(int) <= 4) + // For the nonce-misuse resistant mode, the total size of the + // associated data and the total size of the message do not exceed + // `16 * 2^max_l * 2^max_m bytes`, thus 2^128 bytes for all variants + // of Deoxys-II. Moreover, the maximum number of messages that can + // be handled for a same key is 2^max_m, that is 2^64 for all variants + // of Deoxys. +} + +// init initializes a Context with the provided key. +init :: proc(ctx: ^Context, key: []byte, impl := aes.DEFAULT_IMPLEMENTATION) { + if len(key) != KEY_SIZE { + panic("crypto/deoxysii: invalid key size") + } + + ctx._impl = .Portable + // ctx._impl = impl + // if ctx._impl == .Hardware && !aes.is_hardware_accelerated() { + // ctx._impl = .Portable + // } + + derive_ks(ctx, key) + + ctx._is_initialized = true +} + +// seal encrypts the plaintext and authenticates the aad and ciphertext, +// with the provided Context and iv, stores the output in dst and tag. +// +// dst and plaintext MUST alias exactly or not at all. +seal :: proc(ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) { + assert(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, plaintext) + if len(dst) != len(plaintext) { + panic("crypto/deoxysii: invalid destination ciphertext size") + } + if bytes.alias_inexactly(dst, plaintext) { + panic("crypto/deoxysii: dst and plaintext alias inexactly") + } + + switch ctx._impl { + case .Hardware: + case .Portable: + e_ref(ctx, dst, tag, iv, aad, plaintext) + } +} + +// open authenticates the aad and ciphertext, and decrypts the ciphertext, +// with the provided Context, iv, and tag, and stores the output in dst, +// returning true iff the authentication was successful. If authentication +// fails, the destination buffer will be zeroed. +// +// dst and plaintext MUST alias exactly or not at all. +@(require_results) +open :: proc(ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + assert(ctx._is_initialized) + + _validate_common_slice_sizes(ctx, tag, iv, aad, ciphertext) + if len(dst) != len(ciphertext) { + panic("crypto/deoxysii: invalid destination plaintext size") + } + if bytes.alias_inexactly(dst, ciphertext) { + panic("crypto/deoxysii: dst and ciphertext alias inexactly") + } + + ok: bool + switch ctx._impl { + case .Hardware: + case .Portable: + ok = d_ref(ctx, dst, iv, aad, ciphertext, tag) + } + if !ok { + mem.zero_explicit(raw_data(dst), len(ciphertext)) + } + + return ok +} + +// reset sanitizes the Context. The Context must be +// re-initialized to be used again. +reset :: proc "contextless" (ctx: ^Context) { + mem.zero_explicit(&ctx._subkeys, len(ctx._subkeys)) + ctx._is_initialized = false +} + +@(private = "file") +derive_ks :: proc "contextless" (ctx: ^Context, key: []byte) { + // Derive the constant component of each subtweakkey. + // + // The key schedule is as thus: + // + // STK_i = TK1_i ^ TK2_i ^ TK3_i ^ RC_i + // + // TK1_i = h(TK1_(i-1)) + // TK2_i = h(LFSR2(TK2_(i-1))) + // TK3_i = h(LFSR3(TK2_(i-1))) + // + // where: + // + // KT = K || T + // W3 = KT[:16] + // W2 = KT[16:32] + // W1 = KT[32:] + // + // TK1_0 = W1 + // TK2_0 = W2 + // TK3_0 = W3 + // + // As `K` is fixed per Context, the XORs of `TK3_0 .. TK3_n`, + // `TK2_0 .. TK2_n` and RC_i can be precomputed in advance like + // thus: + // + // subkey_i = TK3_i ^ TK2_i ^ RC_i + // + // When it is time to actually call Deoxys-BC-384, it is then + // a simple matter of deriving each round subtweakkey via: + // + // TK1_0 = T (Tweak) + // STK_0 = subkey_0 ^ TK1_0 + // STK_i = subkey_i (precomputed) ^ H(TK1_(i-1)) + // + // We opt to use SIMD here and for the subtweakkey deriviation + // as `H()` is typically a single vector instruction. + + tk2 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key[16:]))) + tk3 := intrinsics.unaligned_load((^simd.u8x16)(raw_data(key))) + + // subkey_0 does not apply LFSR2/3 or H. + intrinsics.unaligned_store( + (^simd.u8x16)(&ctx._subkeys[0]), + simd.bit_xor( + tk2, + simd.bit_xor( + tk3, + rcon(0), + ), + ), + ) + + // Precompute k_1 .. k_16. + for i in 1 ..< BC_ROUNDS+1 { + tk2 = h(lfsr2(tk2)) + tk3 = h(lfsr3(tk3)) + intrinsics.unaligned_store( + (^simd.u8x16)(&ctx._subkeys[i]), + simd.bit_xor( + tk2, + simd.bit_xor( + tk3, + rcon(i), + ), + ), + ) + } +} + +@(private = "file") +lfsr2 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + // LFSR2 is a application of the following LFSR to each byte of input. + // (x7||x6||x5||x4||x3||x2||x1||x0) -> (x6||x5||x4||x3||x2||x1||x0||x7 ^ x5) + return simd.bit_or( + simd.shl(tk, _LFSR_SH1), + simd.bit_and( + simd.bit_xor( + simd.shr(tk, _LFSR_SH7), // x7 + simd.shr(tk, _LFSR_SH5), // x5 + ), + _LFSR2_MASK, + ), + ) +} + +@(private = "file") +lfsr3 :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + // LFSR3 is a application of the following LFSR to each byte of input. + // (x7||x6||x5||x4||x3||x2||x1||x0) -> (x0 ^ x6||x7||x6||x5||x4||x3||x2||x1) + return simd.bit_or( + simd.shr(tk, _LFSR_SH1), + simd.bit_and( + simd.bit_xor( + simd.shl(tk, _LFSR_SH7), // x0 + simd.shl(tk, _LFSR_SH1), // x6 + ), + _LFSR3_MASK, + ), + ) +} + +@(private) +h :: #force_inline proc "contextless" (tk: simd.u8x16) -> simd.u8x16 { + return simd.swizzle( + tk, + 0x01, 0x06, 0x0b, 0x0c, 0x05, 0x0a, 0x0f, 0x00, + 0x09, 0x0e, 0x03, 0x04, 0x0d, 0x02, 0x07, 0x08, + ) +} + +@(private = "file") +rcon :: #force_inline proc "contextless" (rd: int) -> simd.u8x16 #no_bounds_check { + rc := _RCONS[rd] + return simd.u8x16{ + 1, 2, 4, 8, + rc, rc, rc, rc, + 0, 0, 0, 0, + 0, 0, 0, 0, + } +} \ No newline at end of file diff --git a/core/crypto/deoxysii/deoxysii_impl_ct64.odin b/core/crypto/deoxysii/deoxysii_impl_ct64.odin new file mode 100644 index 00000000000..ab072b2d360 --- /dev/null +++ b/core/crypto/deoxysii/deoxysii_impl_ct64.odin @@ -0,0 +1,397 @@ +package deoxysii + +import "base:intrinsics" +import "core:crypto" +import aes "core:crypto/_aes/ct64" +import "core:encoding/endian" +import "core:mem" +import "core:simd" + +// This uses the bitlsiced 64-bit general purpose register SWAR AES +// round function. The encryption pass skips orthogonalizing the +// AES round function input as it is aways going to be the leading 0 +// padded IV, and doing a 64-byte copy is faster. + +@(private = "file") +TWEAK_SIZE :: 16 +@(private = "file") +BLOCK_SIZE :: 16 + +@(private = "file") +State_SW :: struct { + ctx: ^Context, + q_stk, q_b: [8]u64, +} + +@(private = "file") +auth_tweak :: #force_inline proc "contextless" ( + dst: ^[TWEAK_SIZE]byte, + prefix: byte, + block_nr: int, +) { + endian.unchecked_put_u64be(dst[8:], u64(block_nr)) + endian.unchecked_put_u64le(dst[0:], u64(prefix) << PREFIX_SHIFT) // dst[0] = prefix << PREFIX_SHIFT +} + +@(private = "file") +enc_tweak :: #force_inline proc "contextless" ( + dst: ^[TWEAK_SIZE]byte, + tag: ^[TAG_SIZE]byte, + block_nr: int, +) { + tmp: [8]byte + endian.unchecked_put_u64be(tmp[:], u64(block_nr)) + + copy(dst[:], tag[:]) + dst[0] |= 0x80 + for i in 0 ..< 8 { + dst[i+8] ~= tmp[i] + } +} + +@(private = "file") +enc_plaintext :: #force_inline proc "contextless" ( + dst: ^[8]u64, + iv: []byte, +) { + tmp: [BLOCK_SIZE]byte = --- + tmp[0] = 0 + copy(tmp[1:], iv[:]) + + q_0, q_1 := aes.load_interleaved(tmp[:]) + for i in 0 ..< 4 { + dst[i], dst[i+4] = q_0, q_1 + } + aes.orthogonalize(dst) +} + +@(private = "file") +bc_x4 :: proc "contextless" ( + ctx: ^Context, + dst: []byte, + tweaks: ^[4][TWEAK_SIZE]byte, + q_stk: ^[8]u64, + q_b: ^[8]u64, // Orthogonalized + n: int, +) { + tk1s: [4]simd.u8x16 + for j in 0 ..< n { + tk1s[j] = intrinsics.unaligned_load((^simd.u8x16)(&tweaks[j])) + } + + // Deoxys-BC-384 + for i in 0 ..= BC_ROUNDS { + // Derive the round's subtweakkey + sk := intrinsics.unaligned_load((^simd.u8x16)(&ctx._subkeys[i])) + for j in 0 ..< n { + if i != 0 { + tk1s[j] = h(tk1s[j]) + } + intrinsics.unaligned_store( + (^simd.u8x16)(raw_data(dst)), + simd.bit_xor(sk, tk1s[j]), + ) + q_stk[j], q_stk[j+4] = aes.load_interleaved(dst[:]) + } + aes.orthogonalize(q_stk) + + if i != 0 { + aes.sub_bytes(q_b) + aes.shift_rows(q_b) + aes.mix_columns(q_b) + } + aes.add_round_key(q_b, q_stk[:]) + } + + aes.orthogonalize(q_b) + for i in 0 ..< n { + aes.store_interleaved(dst[i*BLOCK_SIZE:], q_b[i], q_b[i+4]) + } +} + +@(private = "file", require_results) +bc_absorb :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + src: []byte, + tweak_prefix: byte, + stk_block_nr: int, +) -> int { + tweaks: [4][TWEAK_SIZE]byte = --- + tmp: [BLOCK_SIZE*4]byte = --- + + src, stk_block_nr := src, stk_block_nr + dst_ := intrinsics.unaligned_load((^simd.u8x16)(raw_data(dst))) + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks > 0 { + // Derive the tweak(s), orthogonalize the plaintext + n := min(nr_blocks, 4) + for i in 0 ..< n { + auth_tweak(&tweaks[i], tweak_prefix, stk_block_nr + i) + st.q_b[i], st.q_b[i + 4] = aes.load_interleaved(src) + src = src[BLOCK_SIZE:] + } + aes.orthogonalize(&st.q_b) + + // Deoxys-BC-384 + bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n) + + // XOR in the existing Auth/tag + for i in 0 ..< n { + dst_ = simd.bit_xor( + dst_, + intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))), + ) + } + + stk_block_nr += n + nr_blocks -= n + } + + intrinsics.unaligned_store((^simd.u8x16)(raw_data(dst)), dst_) + + mem.zero_explicit(&tweaks, size_of(tweaks)) + mem.zero_explicit(&tmp, size_of(tmp)) + + return stk_block_nr +} + +@(private = "file") +bc_final :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + iv: []byte, +) { + tweaks: [4][TWEAK_SIZE]byte = --- + + tweaks[0][0] = PREFIX_TAG << PREFIX_SHIFT + copy(tweaks[0][1:], iv) + + st.q_b[0], st.q_b[4] = aes.load_interleaved(dst) + aes.orthogonalize(&st.q_b) + + bc_x4(st.ctx, dst, &tweaks, &st.q_stk, &st.q_b, 1) +} + +@(private = "file", require_results) +bc_encrypt :: proc "contextless" ( + st: ^State_SW, + dst: []byte, + src: []byte, + q_n: ^[8]u64, // Orthogonalized + tweak_tag: ^[TAG_SIZE]byte, + stk_block_nr: int, +) -> int { + tweaks: [4][TWEAK_SIZE]byte = --- + tmp: [BLOCK_SIZE*4]byte = --- + + dst, src, stk_block_nr := dst, src, stk_block_nr + + nr_blocks := len(src) / BLOCK_SIZE + for nr_blocks > 0 { + // Derive the tweak(s) + n := min(nr_blocks, 4) + for i in 0 ..< n { + enc_tweak(&tweaks[i], tweak_tag, stk_block_nr + i) + } + st.q_b = q_n^ // The plaintext is always `0^8 || N` + + // Deoxys-BC-384 + bc_x4(st.ctx, tmp[:], &tweaks, &st.q_stk, &st.q_b, n) + + // XOR the ciphertext + for i in 0 ..< n { + intrinsics.unaligned_store( + (^simd.u8x16)(raw_data(dst[i*BLOCK_SIZE:])), + simd.bit_xor( + intrinsics.unaligned_load((^simd.u8x16)(raw_data(src[i*BLOCK_SIZE:]))), + intrinsics.unaligned_load((^simd.u8x16)(raw_data(tmp[i*BLOCK_SIZE:]))), + ), + ) + } + + dst, src = dst[n*BLOCK_SIZE:], src[n*BLOCK_SIZE:] + stk_block_nr += n + nr_blocks -= n + } + + mem.zero_explicit(&tweaks, size_of(tweaks)) + mem.zero_explicit(&tmp, size_of(tmp)) + + return stk_block_nr +} + +@(private) +e_ref :: proc "contextless" (ctx: ^Context, dst, tag, iv, aad, plaintext: []byte) #no_bounds_check { + auth: [TAG_SIZE]byte + + st: State_SW = --- + st.ctx = ctx + + // Algorithm 3 + // + // Associated data + // A_1 || ... || A_la || A_∗ <- A where each |A_i| = n and |A_∗| < n + // Auth <- 0^n + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A_∗ != nil then + // Auth <- Auth ^ EK(0110 || la, pad10∗(A_∗)) + // end + aad := aad + n := bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0) + aad = aad[BLOCK_SIZE*n:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag <- Auth + // for j = 0 to l − 1 do + // tag <- tag ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag <- tag ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag <- EK(0001 || 0^4 || N, tag) + m := plaintext + n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n) + } + bc_final(&st, auth[:], iv) + + // Message encryption + // for j = 0 to l − 1 do + // C_j <- M_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if M_∗ != nil then + // C_∗ <- M_* ^ EK(1 || tag ^ l, 0^8 || N) + // end + // + // return (C_1 || ... || C_l || C_∗, tag) + q_iv: [8]u64 = --- + enc_plaintext(&q_iv, iv) + + m = plaintext + n = bc_encrypt(&st, dst, m, &q_iv, &auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + } + + copy(tag, auth[:]) + + mem.zero_explicit(&st.q_stk, size_of(st.q_stk)) + mem.zero_explicit(&st.q_b, size_of(st.q_b)) +} + +@(private, require_results) +d_ref :: proc "contextless" (ctx: ^Context, dst, iv, aad, ciphertext, tag: []byte) -> bool { + auth: [TAG_SIZE]byte + + st: State_SW = --- + st.ctx = ctx + + copy(auth[:], tag) + + // Algorithm 4 + // + // Message decryption + // C_1 || ... || C_l || C_∗ <- C where each |C_j| = n and |C_∗| < n + // for j = 0 to l − 1 do + // M_j <- C_j ^ EK(1 || tag ^ j, 0^8 || N) + // end + // if C_∗ != nil then + // M_∗ <- C_∗ ^ EK(1 || tag ^ l, 0^8 || N) + // end + q_iv: [8]u64 = --- + enc_plaintext(&q_iv, iv) + + m := ciphertext + n := bc_encrypt(&st, dst, m, &q_iv, &auth, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + _ = bc_encrypt(&st, m_star[:], m_star[:], &q_iv, &auth, n) + + copy(dst[n*BLOCK_SIZE:], m_star[:]) + } + + // Associated data + // A_1 || ... || Al_a || A_∗ <- A where each |Ai_| = n and |A_∗| < n + // Auth <- 0 + // for i = 0 to la − 1 do + // Auth <- Auth ^ EK(0010 || i, A_i+1) + // end + // if A∗ != nil then + // Auth <- Auth ^ EK(0110| | l_a, pad10∗(A_∗)) + // end + auth = 0 + aad := aad + n = bc_absorb(&st, auth[:], aad, PREFIX_AD_BLOCK, 0) + aad = aad[BLOCK_SIZE*n:] + if l := len(aad); l > 0 { + a_star: [BLOCK_SIZE]byte + + copy(a_star[:], aad) + a_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], a_star[:], PREFIX_AD_FINAL, n) + } + + // Message authentication and tag generation + // M_1 || ... || M_l || M_∗ <- M where each |M_j| = n and |M_∗| < n + // tag0 <- Auth + // for j = 0 to l − 1 do + // tag0 <- tag0 ^ EK(0000 || j, M_j+1) + // end + // if M_∗ != nil then + // tag0 <- tag0 ^ EK(0100 || l, pad10∗(M_∗)) + // end + // tag0 <- EK(0001 || 0^4 || N, tag0) + m = dst[:len(ciphertext)] + n = bc_absorb(&st, auth[:], m, PREFIX_MSG_BLOCK, 0) + m = m[n*BLOCK_SIZE:] + if l := len(m); l > 0 { + m_star: [BLOCK_SIZE]byte + + copy(m_star[:], m) + m_star[l] = 0x80 + + _ = bc_absorb(&st, auth[:], m_star[:], PREFIX_MSG_FINAL, n) + } + bc_final(&st, auth[:], iv) + + // Tag verification + // if tag0 = tag then return (M_1 || ... || M_l || M_∗) + // else return false + ok := crypto.compare_constant_time(auth[:], tag) == 1 + + mem.zero_explicit(&auth, size_of(auth)) + mem.zero_explicit(&st.q_stk, size_of(st.q_stk)) + mem.zero_explicit(&st.q_b, size_of(st.q_b)) + + return ok +} diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index c540dbb3198..32b6f89ee00 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -33,6 +33,7 @@ import blake2s "core:crypto/blake2s" import chacha20 "core:crypto/chacha20" import chacha20poly1305 "core:crypto/chacha20poly1305" import crypto_hash "core:crypto/hash" +import deoxysii "core:crypto/deoxysii" import ed25519 "core:crypto/ed25519" import hkdf "core:crypto/hkdf" import hmac "core:crypto/hmac" @@ -176,6 +177,7 @@ _ :: blake2b _ :: blake2s _ :: chacha20 _ :: chacha20poly1305 +_ :: deoxysii _ :: ed25519 _ :: hmac _ :: hkdf diff --git a/tests/benchmark/crypto/benchmark_crypto.odin b/tests/benchmark/crypto/benchmark_crypto.odin index dfa491917de..9e8824c0363 100644 --- a/tests/benchmark/crypto/benchmark_crypto.odin +++ b/tests/benchmark/crypto/benchmark_crypto.odin @@ -12,6 +12,7 @@ import "core:crypto/aegis" import "core:crypto/aes" import "core:crypto/chacha20" import "core:crypto/chacha20poly1305" +import "core:crypto/deoxysii" import "core:crypto/ed25519" import "core:crypto/poly1305" import "core:crypto/x25519" @@ -202,6 +203,43 @@ benchmark_crypto :: proc(t: ^testing.T) { testing.expect(t, err == nil, name) benchmark_print(&str, name, options) } + { + name := "Deoxys-II-256 64 bytes" + options := &time.Benchmark_Options { + rounds = 1_000, + bytes = 64, + setup = _setup_sized_buf, + bench = _benchmark_deoxysii_256, + teardown = _teardown_sized_buf, + } + + key := [aegis.KEY_SIZE_256]byte { + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + 0xde, 0xad, 0xbe, 0xef, 0xde, 0xad, 0xbe, 0xef, + } + ctx: deoxysii.Context + deoxysii.init(&ctx, key[:]) + + context.user_ptr = &ctx + + err := time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + + name = "Deoxys-II-256 1024 bytes" + options.bytes = 1024 + err = time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + + name = "Deoxys-II-256 65536 bytes" + options.bytes = 65536 + err = time.benchmark(options, context.allocator) + testing.expect(t, err == nil, name) + benchmark_print(&str, name, options) + } { iters :: 10000 @@ -481,6 +519,26 @@ _benchmark_aegis_256 :: proc( return nil } +_benchmark_deoxysii_256 :: proc( + options: ^time.Benchmark_Options, + allocator := context.allocator, +) -> ( + err: time.Benchmark_Error, +) { + buf := options.input + iv: [deoxysii.IV_SIZE]byte + tag: [deoxysii.TAG_SIZE]byte = --- + + ctx := (^deoxysii.Context)(context.user_ptr) + + for _ in 0 ..= options.rounds { + deoxysii.seal(ctx, buf, tag[:], iv[:], nil, buf) + } + options.count = options.rounds + options.processed = options.rounds * options.bytes + return nil +} + @(private) benchmark_print :: proc(str: ^strings.Builder, name: string, options: ^time.Benchmark_Options, loc := #caller_location) { fmt.sbprintfln(str, "[%v] %v rounds, %v bytes processed in %v ns\n\t\t%5.3f rounds/s, %5.3f MiB/s\n", diff --git a/tests/core/crypto/test_core_crypto_aead.odin b/tests/core/crypto/test_core_crypto_aead.odin index dfa251413f6..34b5d4b9dc2 100644 --- a/tests/core/crypto/test_core_crypto_aead.odin +++ b/tests/core/crypto/test_core_crypto_aead.odin @@ -23,6 +23,10 @@ test_aead :: proc(t: ^testing.T) { for impl in supported_aegis_impls() { append(&aegis_impls, impl) } + deoxysii_impls := make([dynamic]aead.Implementation, context.temp_allocator) + for impl in supported_deoxysii_impls() { + append(&deoxysii_impls, impl) + } impls := [aead.Algorithm][dynamic]aead.Implementation{ .Invalid = nil, .AES_GCM_128 = aes_impls, @@ -34,6 +38,7 @@ test_aead :: proc(t: ^testing.T) { .AEGIS_128L_256 = aegis_impls, .AEGIS_256 = aegis_impls, .AEGIS_256_256 = aegis_impls, + .DEOXYS_II_256 = deoxysii_impls, } test_vectors := []struct{ @@ -418,6 +423,79 @@ test_aead :: proc(t: ^testing.T) { "57754a7d09963e7c787583a2e7b859bb24fa1e04d49fd550b2511a358e3bca252a9b1b8b30cc4a67", "a3aca270c006094d71c20e6910b5161c0826df233d08919a566ec2c05990f734", }, + // Deoxys-II-256 + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "", + "", + "2b97bd77712f0cde975309959dfe1d7c", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "", + "", + "54708ae5565a71f147bdb94d7ba3aed7", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "f495c9c03d29989695d98ff5d430650125805c1e0576d06f26cbda42b1f82238b8", + "", + "", + "3277689dc4208cc1ff59d15434a1baf1", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "9da20db1c2781f6669257d87e2a4d9be1970f7581bef2c995e1149331e5e8cc1", + "92ce3aec3a4b72ff9eab71c2a93492fa", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "", + "15cd77732f9d0c4c6e581ef400876ad9188c5b8850ebd38224da95d7cdc99f7acc", + "e5ffd2abc5b459a73667756eda6443ede86c0883fc51dd75d22bb14992c684618c", + "5fa78d57308f19d0252072ee39df5ecc", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f", + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f", + "109f8a168b36dfade02628a9e129d5257f03cc7912aefa79729b67b186a2b08f", + "6549f9bf10acba0a451dbb2484a60d90", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "000102030405060708090a0b0c0d0e0f10", + "422857fb165af0a35c03199fb895604dca9cea6d788954962c419e0d5c225c0327", + "7d772203fa38be296d8d20d805163130c69aba8cb16ed845c2296c61a8f34b394e", + "0b3f10e3933c78190b24b33008bf80e9", + }, + { + .DEOXYS_II_256, + "101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f", + "202122232425262728292a2b2c2d2e", + "3290bb8441279dc6083a43e9048c3dc08966ab30d7a6b35759e7a13339f124918f3b5ab1affa65e6c0e3680eb33a6ec82424ab1ce5a40b8654e13d845c29b13896a1466a75fc875acba4527ded37ed00c600a357c9a6e586c74cf3d85cd3258c813218f319d12b82480e5124ff19ec00bda1fbb8bd25eeb3de9fcbf3296deba250caf7e9f4ef0be1918e24221dd0be888c59c166ad761d7b58462a1b1d44b04265b45827172c133dd5b6c870b9af7b21368d12a88f4efa1751047543d584382d9ec22e7550d50ecddba27d1f65453f1f3398de54ee8c1f4ac8e16f5523d89641e99a632380af0f0b1e6b0e192ec29bf1d8714978ff9fbfb93604142393e9a82c3aaebbbe15e3b4e5cfd18bdfe309315c9f9f830deebe2edcdc24f8eca90fda49f6646e789c5041fb5be933fa843278e95f3a54f8eb41f14777ea949d5ea442b01249e64816151a325769e264ed4acd5c3f21700ca755d5bc0c2c5f9453419510bc74f2d71621dcecb9efc9c24791b4bb560fb70a8231521d6560af89d8d50144d9c080863f043781153bcd59030e60bd17a6d7aa083211b67b581fa4f74cce4d030d1e8f9429fd725c110040d41eb6989ffb1595c72cbe3c9b78a8ab80d71a6a5283da77b89cae295bb13c14fbe466b617f4da8ad60b085e2ea153f6713ae0046aa31e0ba44e43ef36a111bf05c073a4e3624cd35f63a546f9142b35aa81b8826d", + "83dab23b1379e090755c99079cfe918cb737e989f2d720ccaff493a744927644fec3653211fa75306a83486e5c34ecfe63870c97251a73e4b9033ae374809711b211ed5d293a592e466a81170f1d85750b5ca025ccd4579947edbae9ec132bfb1a7233ad79fae30006a6699f143893861b975226ed9d3cfb8a240be232fbf4e83755d59d20bc2faa2ea5e5b0428427485cca5e76a89fe32bdd59ab4177ad7cb1899c101e3c4f7535129591390ebdf30140846078b13867bbb2efd6cf434afe356eb18d716b21fd664c26c908496534bf2cde6d6b897799016594fb6d9f830ae5f44ccec26d42ff0d1a21b80cdbe8c8c170a5f766fad884abcc781b5b8ebc0f559bfeaa4557b04d977d51411a7f47bf437d0280cf9f92bc4f9cd6226337a492320851955adae2cafea22a89c3132dd252e4728328eda05555dff3241404341b8aa502d45c456113af42a8e91a85e4b4e9555028982ec3d144722af0eb04a6d3b8127c3040629de53f5fd187048198e8f8e8cc857afcbae45c693fec12fc2149d5e7587d0121b1717d0147f6979f75e8f085293f705c3399a6cc8df7057bf481e6c374edf0a0af7479f858045357b7fe21021c3fabdaf012652bf2e5db257bd9490ce637a81477bd3f9814a2198fdb9afa9344321f2393798670e588c47a1924d592cda3eb5a96754dfd92d87ee1ffa9d4ee586c85d7518c5d2db57d0451c33de0", + "88294fcef65a1bdfd7baaa472816c64ef5bef2622b88c1ec5a739396157ef4935f3aa76449e391c32da28ee2857f399ac3dd95aed30cfb26cc0063cd4cd8f7431108176fbf370123856662b000a8348e5925fbb97c9ec0c737758330a7983f06b51590c1d2f5e5faaf0eb58e34e19e5fc85cec03d3926dd46a79ba7026e83dec24e07484c9103dd0cdb0edb505500caca5e1d5dbc71348cf00648821488ebaab7f9d84bbbf91b3c521dbef30110e7bd94f8dad5ab8e0cc5411ca9682d210d5d80c0c4bdbba8181789a4273d6deb80899fdcd976ca6f3a9770b54305f586a04256cfbeb4c11254e88559f294db3b9a94b80ab9f9a02cb4c0748de0af7818685521691dba5738be546dba13a56016fb8635af9dff50f25d1b17ad21707db2640a76a741e65e559b2afaaec0f37e18436bf02008f84dbd7b2698687a22376b65dc7524fca8a28709eee3f3caee3b28ed1173d1e08ee849e2ca63d2c90d555755c8fbafd5d2f4b37f06a1dbd6852ee2ffcfe79d510152e98fc4f3094f740a4aede9ee378b606d34576776bf5f1269f5385a84b3928433bfca177550ccfcd22cd0331bbc595e38c2758b2662476fa66354c4e84c7b360405aa3f5b2a48621bdca1a90c69b21789c91b5b8c568e3c741d99e22f6d7e26f2abed045f1d578b782ab4a5cf2af636d842b3012e180e4b045d8d15b057b69c92398a517053daf9be7c2935e", + "a616f0c218e18b526cf2a3f8c115e262", + }, } for v, _ in test_vectors { algo_name := aead.ALGORITHM_NAMES[v.algo] @@ -541,3 +619,13 @@ supported_aegis_impls :: proc() -> [dynamic]aes.Implementation { return impls } + +supported_deoxysii_impls :: proc() -> [dynamic]aes.Implementation { + impls := make([dynamic]aes.Implementation, 0, 2, context.temp_allocator) + append(&impls, aes.Implementation.Portable) + // if deoxysii.is_hardware_accelerated() { + // append(&impls, aes.Implementation.Hardware) + // } + + return impls +}