From 233dd2df98ad57fe32ff72a5e4a0f5e7e7309927 Mon Sep 17 00:00:00 2001 From: Vincent Hanquez Date: Wed, 15 Mar 2023 11:24:27 +0800 Subject: [PATCH] [ChaCha] speed improvement and spec conformance (#44) * [ChaCha] speed improvement and spec conformance * Improve ref speed by 10x removing the use of (fake) simd * Clearly separate the bernstein/original version from the IETF standard version in term of counters size and nonce handling * Separate XChaCha from the 'normal' ChaCha context * add the ability to seek in the chacha stream * fix example and documentation * [poly1305] make key size apparent at type level also switch initialization to be less mutable * [SALSA] improve performance and tweak APIs * performance bump by ~ 3x not using the fake simd instructions * separate salsa and xsalsa * add some docs * add MIGRATION GUIDE and CHANGELOG for this work --- CHANGELOG.md | 16 ++ MIGRATION_GUIDE.md | 37 ++++ src/chacha/reference.rs | 201 ++++++++------------ src/chacha/sse2.rs | 16 ++ src/chacha20.rs | 235 ++++++++++++++++++----- src/chacha20poly1305.rs | 32 ++-- src/cryptoutil.rs | 14 -- src/drg/chacha.rs | 2 +- src/poly1305.rs | 47 ++--- src/salsa20.rs | 405 +++++++++++++++++++++------------------- 10 files changed, 591 insertions(+), 414 deletions(-) create mode 100644 MIGRATION_GUIDE.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ad79ab..611cfb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,19 @@ +# unreleased + +* Improve performance of Salsa by 3x and Chacha by 10x +* Clearly distinguish at the type level various variants of chacha: + * Chacha as IETF (recommended) + * Chacha as original paper (64 bits counters) + * XChacha +* Distinguish at the type level Salsa and XSalsa + +Breaking Changes: + +* Chacha, Salsa and Poly1305 interface changes to expect fixed sized array instead of slice, for stronger type safety + and less runtime error. +* `Chacha::new_xchacha20::()` is now `XChacha::::init()` +* `Salsa::new_salsa20::()` is now `XSalsa::::init()` + # 0.4.4 * fix legacy blake2b and blake2s `output_bits` interface returning a value 8 times bigger. diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md new file mode 100644 index 0000000..ef8aac1 --- /dev/null +++ b/MIGRATION_GUIDE.md @@ -0,0 +1,37 @@ +## Changing slice by array reference + +For slice to array reference changes of the form: + +``` +fn function(value: &[u8]) { ... } +``` + +tto: + +``` +fn function(value: &[u8; 12]) { ... } +``` + +In the case of the caller using array of the right size already, +no changes need to be done. When the caller is using a subslice, +one can use the following construction, from: + +``` +fn caller() { + let slice = &[....]; + function(&slice[0..12]); +} +``` + +to: + +``` +use core::convert::TryFrom; // not-necessary in latest rust edition +fn caller() { + let slice = &[....]; + function(<&[u8; 12]>::try_from(&slice[0..12]).unwrap()); +} + +Note the .unwrap() is just one way to (not) handle the error, and the caller +should integrate the failing `try_from` case with the error handling +conventions of the caller code. diff --git a/src/chacha/reference.rs b/src/chacha/reference.rs index 5f2fc50..80db46f 100644 --- a/src/chacha/reference.rs +++ b/src/chacha/reference.rs @@ -1,54 +1,23 @@ use crate::cryptoutil::{read_u32_le, write_u32v_le}; -use crate::simd::u32x4; #[derive(Clone)] pub(crate) struct State { - a: u32x4, - b: u32x4, - c: u32x4, - d: u32x4, + state: [u32; 16], } -// b row <<< 8, c row <<< 16, d row <<< 24 -macro_rules! swizzle { - ($b: expr, $c: expr, $d: expr) => {{ - let u32x4(b10, b11, b12, b13) = $b; - $b = u32x4(b11, b12, b13, b10); - let u32x4(c10, c11, c12, c13) = $c; - $c = u32x4(c12, c13, c10, c11); - let u32x4(d10, d11, d12, d13) = $d; - $d = u32x4(d13, d10, d11, d12); - }}; +macro_rules! QR { + ($a:ident, $b:ident, $c:ident, $d:ident) => { + $a = $a.wrapping_add($b); + $d = ($d ^ $a).rotate_left(16); + $c = $c.wrapping_add($d); + $b = ($b ^ $c).rotate_left(12); + $a = $a.wrapping_add($b); + $d = ($d ^ $a).rotate_left(8); + $c = $c.wrapping_add($d); + $b = ($b ^ $c).rotate_left(7); + }; } -macro_rules! round { - ($state: expr) => {{ - $state.a = $state.a + $state.b; - rotate!($state.d, $state.a, S16); - $state.c = $state.c + $state.d; - rotate!($state.b, $state.c, S12); - $state.a = $state.a + $state.b; - rotate!($state.d, $state.a, S8); - $state.c = $state.c + $state.d; - rotate!($state.b, $state.c, S7); - }}; -} - -macro_rules! rotate { - ($a: expr, $b: expr, $c:expr) => {{ - let v = $a ^ $b; - let r = S32 - $c; - let right = v >> r; - $a = (v << $c) ^ right - }}; -} - -static S32: u32x4 = u32x4(32, 32, 32, 32); -static S16: u32x4 = u32x4(16, 16, 16, 16); -static S12: u32x4 = u32x4(12, 12, 12, 12); -static S8: u32x4 = u32x4(8, 8, 8, 8); -static S7: u32x4 = u32x4(7, 7, 7, 7); - impl State { // state initialization constant le-32bit array of b"expand 16-byte k" const CST16: [u32; 4] = [0x61707865, 0x3120646e, 0x79622d36, 0x6b206574]; @@ -63,110 +32,104 @@ impl State { /// Initialize the state with key and nonce pub(crate) fn init(key: &[u8], nonce: &[u8]) -> Self { - let (a, b, c) = match key.len() { - 16 => Self::init_key16(key), - 32 => Self::init_key32(key), + let mut state = [0u32; 16]; + match key.len() { + 16 => { + state[0] = Self::CST16[0]; + state[1] = Self::CST16[1]; + state[2] = Self::CST16[2]; + state[3] = Self::CST16[3]; + } + 32 => { + state[0] = Self::CST32[0]; + state[1] = Self::CST32[1]; + state[2] = Self::CST32[2]; + state[3] = Self::CST32[3]; + state[4] = read_u32_le(&key[0..4]); + state[5] = read_u32_le(&key[4..8]); + state[6] = read_u32_le(&key[8..12]); + state[7] = read_u32_le(&key[12..16]); + state[8] = read_u32_le(&key[16..20]); + state[9] = read_u32_le(&key[20..24]); + state[10] = read_u32_le(&key[24..28]); + state[11] = read_u32_le(&key[28..32]); + } _ => unreachable!(), }; - let d = Self::init_nonce(nonce); - Self { a, b, c, d } - } - - #[inline] - fn init_key16(key: &[u8]) -> (u32x4, u32x4, u32x4) { - let constant: &[u32; 4] = &Self::CST16; - let c = u32x4(constant[0], constant[1], constant[2], constant[3]); - let k1 = u32x4( - read_u32_le(&key[0..4]), - read_u32_le(&key[4..8]), - read_u32_le(&key[8..12]), - read_u32_le(&key[12..16]), - ); - (c, k1, k1) - } - - #[inline] - fn init_key32(key: &[u8]) -> (u32x4, u32x4, u32x4) { - let constant: &[u32; 4] = &Self::CST32; - let c = u32x4(constant[0], constant[1], constant[2], constant[3]); - let k1 = u32x4( - read_u32_le(&key[0..4]), - read_u32_le(&key[4..8]), - read_u32_le(&key[8..12]), - read_u32_le(&key[12..16]), - ); - let k2 = u32x4( - read_u32_le(&key[16..20]), - read_u32_le(&key[20..24]), - read_u32_le(&key[24..28]), - read_u32_le(&key[28..32]), - ); - (c, k1, k2) - } - - #[inline] - fn init_nonce(nonce: &[u8]) -> u32x4 { if nonce.len() == 16 { - u32x4( - read_u32_le(&nonce[0..4]), - read_u32_le(&nonce[4..8]), - read_u32_le(&nonce[8..12]), - read_u32_le(&nonce[12..16]), - ) + state[12] = read_u32_le(&nonce[0..4]); + state[13] = read_u32_le(&nonce[4..8]); + state[14] = read_u32_le(&nonce[8..12]); + state[15] = read_u32_le(&nonce[12..16]); } else if nonce.len() == 12 { - u32x4( - 0, - read_u32_le(&nonce[0..4]), - read_u32_le(&nonce[4..8]), - read_u32_le(&nonce[8..12]), - ) + // 12 is already set to 0 + state[13] = read_u32_le(&nonce[0..4]); + state[14] = read_u32_le(&nonce[4..8]); + state[15] = read_u32_le(&nonce[8..12]); } else { - u32x4(0, 0, read_u32_le(&nonce[0..4]), read_u32_le(&nonce[4..8])) + // 12 and 13 already set to 0 + state[14] = read_u32_le(&nonce[0..4]); + state[15] = read_u32_le(&nonce[4..8]); } + Self { state } } #[inline] pub(crate) fn rounds(&mut self) { + let [mut x0, mut x1, mut x2, mut x3, mut x4, mut x5, mut x6, mut x7, mut x8, mut x9, mut x10, mut x11, mut x12, mut x13, mut x14, mut x15] = + self.state; + for _ in 0..(ROUNDS / 2) { - round!(self); - swizzle!(self.b, self.c, self.d); - round!(self); - swizzle!(self.d, self.c, self.b); + QR!(x0, x4, x8, x12); + QR!(x1, x5, x9, x13); + QR!(x2, x6, x10, x14); + QR!(x3, x7, x11, x15); + + QR!(x0, x5, x10, x15); + QR!(x1, x6, x11, x12); + QR!(x2, x7, x8, x13); + QR!(x3, x4, x9, x14); } + + self.state = [ + x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, + ]; + } + + #[inline] + pub(crate) fn set_counter(&mut self, counter: u32) { + self.state[12] = counter; } #[inline] pub(crate) fn increment(&mut self) { - self.d = self.d + u32x4(1, 0, 0, 0); + self.state[12] = self.state[12].wrapping_add(1); + } + + #[inline] + pub(crate) fn increment64(&mut self) { + self.state[12] = self.state[12].wrapping_add(1); + if self.state[12] == 0 { + self.state[13] = self.state[13].wrapping_add(1); + } } #[inline] /// Add back the initial state pub(crate) fn add_back(&mut self, initial: &Self) { - self.a = self.a + initial.a; - self.b = self.b + initial.b; - self.c = self.c + initial.c; - self.d = self.d + initial.d; + for i in 0..16 { + self.state[i] = self.state[i].wrapping_add(initial.state[i]); + } } #[inline] pub(crate) fn output_bytes(&self, output: &mut [u8]) { - let u32x4(a1, a2, a3, a4) = self.a; - let u32x4(b1, b2, b3, b4) = self.b; - let u32x4(c1, c2, c3, c4) = self.c; - let u32x4(d1, d2, d3, d4) = self.d; - write_u32v_le( - output, - &[ - a1, a2, a3, a4, b1, b2, b3, b4, c1, c2, c3, c4, d1, d2, d3, d4, - ], - ); + write_u32v_le(output, &self.state); } #[inline] pub(crate) fn output_ad_bytes(&self, output: &mut [u8; 32]) { - let u32x4(a1, a2, a3, a4) = self.a; - let u32x4(d1, d2, d3, d4) = self.d; - write_u32v_le(&mut output[..], &[a1, a2, a3, a4, d1, d2, d3, d4]); + write_u32v_le(&mut output[0..16], &self.state[0..4]); + write_u32v_le(&mut output[16..32], &self.state[12..16]); } } diff --git a/src/chacha/sse2.rs b/src/chacha/sse2.rs index 1b2e8ae..05faec1 100644 --- a/src/chacha/sse2.rs +++ b/src/chacha/sse2.rs @@ -146,8 +146,24 @@ impl State { } } + #[inline] + pub(crate) fn set_counter(&mut self, counter: u32) { + let mut align = Align128::zero(); + align.from_m128i(self.d); + align.0[0] = counter; + self.d = align.to_m128i(); + } + #[inline] pub(crate) fn increment(&mut self) { + let mut align = Align128::zero(); + align.from_m128i(self.d); + align.0[0] = align.0[0].wrapping_add(1); + self.d = align.to_m128i(); + } + + #[inline] + pub(crate) fn increment64(&mut self) { let mut align = Align128::zero(); align.from_m128i(self.d); let (a, overflowed) = align.0[0].overflowing_add(1); diff --git a/src/chacha20.rs b/src/chacha20.rs index f57311c..3d9c923 100644 --- a/src/chacha20.rs +++ b/src/chacha20.rs @@ -1,14 +1,26 @@ //! ChaCha20 Stream Cipher //! -//! Implementation of [ChaCha spec](https://cr.yp.to/chacha/chacha-20080128.pdf), +//! Implementation of [ChaCha spec](https://www.rfc-editor.org/info/rfc7539) //! which is a fast and lean stream cipher. //! -//! Along with the standard ChaCha20, there is support for the -//! XChaCha20 variant with extended nonce. +//! The maximum amount of data to be processed by a single instance of a ChaCha +//! Context, is 256Gb (due to the 32 bits counter). Note that this is not +//! enforced by the context, and using a context to process more than 256Gb of +//! data would be insecure. +//! +//! Along with the standard IETF ChaCha, there is support for the +//! original ChaCha (with 64 bits counter) and +//! XChaCha variant with extended 192 bits nonce and 64 bits counter. //! //! Note that with stream cipher, there's only one operation [`ChaCha20::process`] //! instead of the typical encrypt and decrypt. //! +//! # Variants +//! +//! Multiple variations of Chacha exists with subtle variations in what they do. +//! The original version of chacha supports 64 bits counter and 64 bits nonce, +//! but the RFC7539 version only supports 32 bits counter and 96 bits nonce. +//! //! # Examples //! //! Combine a simple input using a 128 bits key and 64 bit nonce: @@ -17,7 +29,7 @@ //! use cryptoxide::chacha20::ChaCha20; //! //! let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; -//! let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; +//! let nonce : [u8; 12] = [0,1,2,3,4,5,6,7,8,9,10,11]; //! let input : &[u8; 12] = b"hello world!"; //! let mut out : [u8; 12] = [0u8; 12]; //! @@ -34,7 +46,13 @@ use core::cmp; use crate::chacha::ChaChaEngine as ChaChaState; use crate::cryptoutil::xor_keystream_mut; -/// ChaCha Context +/// ChaCha Context (IETF Variant - RFC7539) +/// +/// Note that the number of rounds is exposed here, and only the +/// value of 8, 12 and 20 are supported. any other values triggers +/// a runtime assertion. +/// +/// If you don't know what rounds values to use, use 20 or the `Chacha20` type directly. #[derive(Clone)] pub struct ChaCha { state: ChaChaState, @@ -44,20 +62,15 @@ pub struct ChaCha { pub type ChaCha20 = ChaCha<20>; -impl ChaCha<20> { - pub fn new_xchacha20(key: &[u8], nonce: &[u8]) -> Self { - Self::new_xchacha(key, nonce) - } -} - impl ChaCha { /// Create a new ChaCha20 context. /// /// * The key must be 16 or 32 bytes - /// * The nonce must be 8 or 12 bytes - pub fn new(key: &[u8], nonce: &[u8]) -> Self { + /// * The nonce must be 12 bytes + /// + /// For using 8 bytes (64bits) nonces, uses `ChaChaOriginal` + pub fn new(key: &[u8], nonce: &[u8; 12]) -> Self { assert!(key.len() == 16 || key.len() == 32); - assert!(nonce.len() == 8 || nonce.len() == 12); assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); Self { @@ -67,46 +80,179 @@ impl ChaCha { } } + /// Seek the stream to a specific (64-bytes) block number + pub fn seek(&mut self, position: u32) { + self.state.set_counter(position); + self.offset = 64; + } + + // put the the next 64 keystream bytes into self.output + fn update(&mut self) { + let mut state = self.state.clone(); + state.rounds(); + state.add_back(&self.state); + + state.output_bytes(&mut self.output); + + self.state.increment(); + self.offset = 0; + } + + /// Process the input in place through the cipher xoring + /// + /// To get only the stream of this cipher, one can just pass the zero + /// buffer (X xor 0 = X) + pub fn process_mut(&mut self, data: &mut [u8]) { + let len = data.len(); + let mut i = 0; + while i < len { + // If there is no keystream available in the output buffer, + // generate the next block. + if self.offset == 64 { + self.update(); + } + + // Process the min(available keystream, remaining input length). + let count = cmp::min(64 - self.offset, len - i); + xor_keystream_mut(&mut data[i..i + count], &self.output[self.offset..]); + i += count; + self.offset += count; + } + } + + /// Process the input through the cipher, xoring the byte one-by-one + /// + /// the output need to be the same size as the input otherwise + /// this function will panic. + pub fn process(&mut self, input: &[u8], output: &mut [u8]) { + assert_eq!( + input.len(), + output.len(), + "chacha::process need to have input and output of the same size" + ); + output.copy_from_slice(input); + self.process_mut(output); + } +} + +/// XChaCha Context +#[derive(Clone)] +pub struct XChaCha { + state: ChaChaState, + output: [u8; 64], + offset: usize, +} + +impl XChaCha { /// Create a new XChaCha20 context. /// /// Key must be 32 bytes and the nonce 24 bytes. - pub fn new_xchacha(key: &[u8], nonce: &[u8]) -> Self { - assert!(key.len() == 32); - assert!(nonce.len() == 24); + pub fn new(key: &[u8; 32], nonce: &[u8; 24]) -> Self { assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); - // HChaCha20 produces a 256-bit output block starting from a 512 bit - // input block where (x0,x1,...,x15) where - // - // * (x0, x1, x2, x3) is the ChaCha20 constant. - // * (x4, x5, ... x11) is a 256 bit key. - // * (x12, x13, x14, x15) is a 128 bit nonce. - let mut xchacha = ChaCha { - state: ChaChaState::init(key, &nonce[0..16]), - output: [0u8; 64], - offset: 64, - }; - // Use HChaCha to derive the subkey, and initialize a ChaCha instance // with the subkey and the remaining 8 bytes of the nonce. + let mut hchacha = ChaChaState::::init(key, &nonce[0..16]); + hchacha.rounds(); let mut new_key = [0; 32]; - xchacha.hchacha(&mut new_key); - xchacha.state = ChaChaState::init(&new_key, &nonce[16..24]); + hchacha.output_ad_bytes(&mut new_key); + + let xchacha = XChaCha { + state: ChaChaState::init(&new_key, &nonce[16..24]), + output: [0u8; 64], + offset: 64, + }; xchacha } - fn hchacha(&mut self, out: &mut [u8; 32]) { - let mut state = self.state.clone(); + /// Seek the stream to a specific (64-bytes) block number + pub fn seek(&mut self, position: u32) { + self.state.set_counter(position); + self.offset = 64; + } - // Apply r/2 iterations of the same "double-round" function, - // obtaining (z0, z1, ... z15) = doubleround r/2 (x0, x1, ... x15). + // put the the next 64 keystream bytes into self.output + fn update(&mut self) { + let mut state = self.state.clone(); state.rounds(); + state.add_back(&self.state); + + state.output_bytes(&mut self.output); + + self.state.increment(); + self.offset = 0; + } + + /// Process the input in place through the cipher xoring + /// + /// To get only the stream of this cipher, one can just pass the zero + /// buffer (X xor 0 = X) + pub fn process_mut(&mut self, data: &mut [u8]) { + let len = data.len(); + let mut i = 0; + while i < len { + // If there is no keystream available in the output buffer, + // generate the next block. + if self.offset == 64 { + self.update(); + } + + // Process the min(available keystream, remaining input length). + let count = cmp::min(64 - self.offset, len - i); + xor_keystream_mut(&mut data[i..i + count], &self.output[self.offset..]); + i += count; + self.offset += count; + } + } - // HChaCha20 then outputs the 256-bit block (z0, z1, z2, z3, z12, z13, - // z14, z15). These correspond to the constant and input positions in - // the ChaCha matrix. - state.output_ad_bytes(out) + /// Process the input through the cipher, xoring the byte one-by-one + /// + /// the output need to be the same size as the input otherwise + /// this function will panic. + pub fn process(&mut self, input: &[u8], output: &mut [u8]) { + assert_eq!( + input.len(), + output.len(), + "chacha::process need to have input and output of the same size" + ); + output.copy_from_slice(input); + self.process_mut(output); + } +} + +/// ChaCha Context (Original version - Bernstein & co) +/// +/// This variant has an 8 bytes nonce initializer, and an 8 bytes counter +/// +/// Note that the number of rounds is exposed here, and only the +/// value of 8, 12 and 20 are supported. any other values triggers +/// a runtime assertion. +/// +/// If you don't know what round values to use, use 20 +#[derive(Clone)] +pub struct ChaChaOriginal { + state: ChaChaState, + output: [u8; 64], + offset: usize, +} + +impl ChaChaOriginal { + /// Create a new ChaCha20 context. + /// + /// * The key must be 16 or 32 bytes + /// * The nonce must be 8 bytes + /// + /// For using 12 bytes (96 bits) nonces, uses the IETF variant `ChaCha` + pub fn new(key: &[u8], nonce: &[u8; 8]) -> Self { + assert!(key.len() == 16 || key.len() == 32); + assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); + + Self { + state: ChaChaState::init(key, nonce), + output: [0u8; 64], + offset: 64, + } } // put the the next 64 keystream bytes into self.output @@ -117,7 +263,8 @@ impl ChaCha { state.output_bytes(&mut self.output); - self.state.increment(); + // this is the only real subtle difference with IETF Chacha (along with initialization difference) + self.state.increment64(); self.offset = 0; } @@ -164,6 +311,8 @@ mod test { use core::iter::repeat; use super::ChaCha20; + use super::ChaChaOriginal; + use super::XChaCha; #[test] fn test_chacha20_256_tls_vectors() { @@ -267,7 +416,7 @@ mod test { ]; for tv in test_vectors.iter() { - let mut c = ChaCha20::new(&tv.key, &tv.nonce); + let mut c = ChaChaOriginal::<20>::new(&tv.key, &tv.nonce); let input: Vec = repeat(0).take(tv.keystream.len()).collect(); let mut output: Vec = repeat(0).take(input.len()).collect(); c.process(&input[..], &mut output[..]); @@ -304,9 +453,9 @@ mod test { 0x0e, 0x21, 0x69, 0x1d, 0x7e, 0xce, 0xc9, 0x3b, 0x75, 0xe6, 0xe4, 0x18, 0x3a, ]; - let mut xchacha20 = ChaCha20::new_xchacha20(&key, &nonce); + let mut xchacha20 = XChaCha::<20>::new(&key, &nonce); xchacha20.process(&input, &mut stream); - assert!(stream[..] == result[..]); + assert_eq!(stream, result); } #[test] diff --git a/src/chacha20poly1305.rs b/src/chacha20poly1305.rs index 83ba03b..7f4adff 100644 --- a/src/chacha20poly1305.rs +++ b/src/chacha20poly1305.rs @@ -19,7 +19,7 @@ //! use cryptoxide::chacha20poly1305::ChaCha20Poly1305; //! //! let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; -//! let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; +//! let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; //! let aad : [u8; 0] = []; //! let input : &[u8; 12] = b"hello world!"; //! let mut out : [u8; 12+16] = [0u8; 12+16]; @@ -39,7 +39,7 @@ //! use cryptoxide::chacha20poly1305::Context; //! //! let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; -//! let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; +//! let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; //! let mut context = Context::<20>::new(&key, &nonce); //! //! // Add incrementally 2 slices of data @@ -72,6 +72,7 @@ use crate::constant_time::{Choice, CtEqual}; use crate::cryptoutil::write_u64_le; use crate::mac::Mac; use crate::poly1305::Poly1305; +use core::convert::TryFrom; /// Chacha20Poly1305 Incremental Context for Authenticated Data (AAD) /// @@ -83,7 +84,7 @@ use crate::poly1305::Poly1305; /// use cryptoxide::chacha20poly1305::Context; /// /// let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; -/// let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; +/// let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; /// let mut context = Context::<20>::new(&key, &nonce); /// /// // Add incrementally 2 slices of data @@ -145,18 +146,17 @@ impl Context { /// use cryptoxide::chacha20poly1305::Context; /// /// let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; - /// let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; + /// let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; /// let context = Context::<20>::new(&key, &nonce); /// ``` - pub fn new(key: &[u8], nonce: &[u8]) -> Self { + pub fn new(key: &[u8], nonce: &[u8; 12]) -> Self { assert!(key.len() == 16 || key.len() == 32); - assert!(nonce.len() == 8 || nonce.len() == 12); let mut cipher = ChaCha::new(key, nonce); let mut mac_key = [0u8; 64]; let zero_key = [0u8; 64]; cipher.process(&zero_key, &mut mac_key); - let mac = Poly1305::new(&mac_key[..32]); + let mac = Poly1305::new(<&[u8; 32]>::try_from(&mac_key[..32]).unwrap()); Context { cipher: cipher, mac: mac, @@ -287,9 +287,9 @@ impl ChaChaPoly1305 { /// Create a new ChaCha20Poly1305 /// /// * key needs to be 16 or 32 bytes - /// * nonce needs to be 8 or 12 bytes + /// * nonce needs to be 12 bytes /// - pub fn new(key: &[u8], nonce: &[u8], aad: &[u8]) -> Self { + pub fn new(key: &[u8], nonce: &[u8; 12], aad: &[u8]) -> Self { let mut context = Context::new(key, nonce); context.add_data(aad); ChaChaPoly1305 { @@ -311,7 +311,7 @@ impl ChaChaPoly1305 { /// use cryptoxide::chacha20poly1305::ChaCha20Poly1305; /// /// let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; - /// let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; + /// let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; /// let aad : [u8; 0] = []; /// let input : &[u8; 12] = b"hello world!"; /// let mut out : [u8; 12+16] = [0u8; 12+16]; @@ -352,11 +352,9 @@ impl ChaChaPoly1305 { /// use cryptoxide::chacha20poly1305::ChaCha20Poly1305; /// /// let key : [u8; 16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; - /// let nonce : [u8; 8] = [1,2,3,4,5,6,7,8]; + /// let nonce : [u8; 12] = [1,2,3,4,5,6,7,8,9,10,11,12]; /// let aad : [u8; 0] = []; - /// let ae_msg : [u8; 12+16] = [98, 155, 81, 205, 163, 244, 162, 254, 57, 96, 183, - /// 101, 167, 88, 238, 184, 17, 109, 89, 185, 72, 150, - /// 97, 95, 149, 82, 179, 220]; + /// let ae_msg : [u8; 12+16] = [108, 82, 26, 254, 225, 35, 236, 248, 197, 246, 224, 48, 26, 63, 45, 5, 196, 47, 207, 128, 34, 182, 149, 185, 193, 73, 147, 29]; /// let mut decrypt_msg : [u8; 12] = [0u8; 12]; /// /// // create a new cipher @@ -392,7 +390,7 @@ mod test { struct TestVector { key: [u8; 32], - nonce: &'static [u8], + nonce: [u8; 12], tag: [u8; 16], plain_text: &'static [u8], cipher_text: &'static [u8], @@ -429,7 +427,7 @@ mod test { 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, ], - nonce: &[ + nonce: [ 0x07, 0x00, 0x00, 0x00, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, ], plain_text: &[ @@ -469,7 +467,7 @@ mod test { 0xf6, 0xb5, 0xf0, 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b, 0x80, 0x09, 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0, ], - nonce: &[ + nonce: [ 0x00, 0x00, 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, ], tag: [ diff --git a/src/cryptoutil.rs b/src/cryptoutil.rs index 6cd5205..bfbb7d5 100644 --- a/src/cryptoutil.rs +++ b/src/cryptoutil.rs @@ -87,20 +87,6 @@ pub fn read_u32_be(input: &[u8]) -> u32 { } */ -/// XOR plaintext and keystream, storing the result in dst. -pub fn xor_keystream(dst: &mut [u8], plaintext: &[u8], keystream: &[u8]) { - assert!(dst.len() == plaintext.len()); - assert!(plaintext.len() <= keystream.len()); - - // Do one byte at a time, using unsafe to skip bounds checking. - let p = plaintext.as_ptr(); - let k = keystream.as_ptr(); - let d = dst.as_mut_ptr(); - for i in 0isize..plaintext.len() as isize { - unsafe { *d.offset(i) = *p.offset(i) ^ *k.offset(i) }; - } -} - /// XOR a keystream in a buffer pub fn xor_keystream_mut(buf: &mut [u8], keystream: &[u8]) { assert!(buf.len() <= keystream.len()); diff --git a/src/drg/chacha.rs b/src/drg/chacha.rs index 2463396..e0ab951 100644 --- a/src/drg/chacha.rs +++ b/src/drg/chacha.rs @@ -38,7 +38,7 @@ pub struct Drg(ChaCha); impl Drg { /// Create a new DRG using the seed pub fn new(seed: &[u8; 32]) -> Self { - Self(ChaCha::new(seed, &[0; 8])) + Self(ChaCha::new(seed, &[0; 12])) } /// Return the next N bytes of random data as a byte array diff --git a/src/poly1305.rs b/src/poly1305.rs index c3b6dd4..53999db 100644 --- a/src/poly1305.rs +++ b/src/poly1305.rs @@ -23,7 +23,7 @@ use crate::mac::{Mac, MacResult}; /// `Poly1305` Context /// /// Use the `Mac` traits for interaction -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct Poly1305 { r: [u32; 5], h: [u32; 5], @@ -40,30 +40,31 @@ fn mul64(a: u32, b: u32) -> u64 { impl Poly1305 { /// Create a new `Poly1305` context using the key (32 bytes) - pub fn new(key: &[u8]) -> Self { - assert!(key.len() == 32); - let mut poly = Poly1305 { - r: [0u32; 5], + pub fn new(key: &[u8; 32]) -> Self { + // r &= 0xffffffc0ffffffc0ffffffc0fffffff + let r = [ + (read_u32_le(&key[0..4])) & 0x3ffffff, + (read_u32_le(&key[3..7]) >> 2) & 0x3ffff03, + (read_u32_le(&key[6..10]) >> 4) & 0x3ffc0ff, + (read_u32_le(&key[9..13]) >> 6) & 0x3f03fff, + (read_u32_le(&key[12..16]) >> 8) & 0x00fffff, + ]; + + let pad = [ + read_u32_le(&key[16..20]), + read_u32_le(&key[20..24]), + read_u32_le(&key[24..28]), + read_u32_le(&key[28..32]), + ]; + + Poly1305 { + r, h: [0u32; 5], - pad: [0u32; 4], + pad, leftover: 0, buffer: [0u8; 16], finalized: false, - }; - - // r &= 0xffffffc0ffffffc0ffffffc0fffffff - poly.r[0] = (read_u32_le(&key[0..4])) & 0x3ffffff; - poly.r[1] = (read_u32_le(&key[3..7]) >> 2) & 0x3ffff03; - poly.r[2] = (read_u32_le(&key[6..10]) >> 4) & 0x3ffc0ff; - poly.r[3] = (read_u32_le(&key[9..13]) >> 6) & 0x3f03fff; - poly.r[4] = (read_u32_le(&key[12..16]) >> 8) & 0x00fffff; - - poly.pad[0] = read_u32_le(&key[16..20]); - poly.pad[1] = read_u32_le(&key[20..24]); - poly.pad[2] = read_u32_le(&key[24..28]); - poly.pad[3] = read_u32_le(&key[28..32]); - - poly + } } #[rustfmt::skip] @@ -255,7 +256,7 @@ mod test { use crate::mac::Mac; use crate::poly1305::Poly1305; - fn poly1305(key: &[u8], msg: &[u8], mac: &mut [u8]) { + fn poly1305(key: &[u8; 32], msg: &[u8], mac: &mut [u8]) { let mut poly = Poly1305::new(key); poly.input(msg); poly.raw_result(mac); @@ -345,7 +346,7 @@ mod test { let key = [i as u8; 32]; let msg = [i as u8; 256]; let mut mac = [0u8; 16]; - poly1305(&key[..], &msg[0..i], &mut mac); + poly1305(&key, &msg[0..i], &mut mac); tpoly.input(&mac); } tpoly.raw_result(&mut mac); diff --git a/src/salsa20.rs b/src/salsa20.rs index 406875c..6afb96e 100644 --- a/src/salsa20.rs +++ b/src/salsa20.rs @@ -20,130 +20,34 @@ //! ``` //! -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use crate::cryptoutil::{read_u32_le, write_u32_le, xor_keystream}; -use crate::simd::u32x4; +use crate::cryptoutil::{read_u32_le, write_u32_le, write_u32v_le, xor_keystream_mut}; use core::cmp; #[derive(Clone)] -struct SalsaState { - a: u32x4, - b: u32x4, - c: u32x4, - d: u32x4, -} - -#[derive(Clone)] -pub struct Salsa { - state: SalsaState, - output: [u8; 64], - offset: usize, -} - -pub type Salsa20 = Salsa<20>; - -const S7: u32x4 = u32x4(7, 7, 7, 7); -const S9: u32x4 = u32x4(9, 9, 9, 9); -const S13: u32x4 = u32x4(13, 13, 13, 13); -const S18: u32x4 = u32x4(18, 18, 18, 18); -const S32: u32x4 = u32x4(32, 32, 32, 32); - -macro_rules! prepare_rowround { - ($a: expr, $b: expr, $c: expr) => {{ - let u32x4(a10, a11, a12, a13) = $a; - $a = u32x4(a13, a10, a11, a12); - let u32x4(b10, b11, b12, b13) = $b; - $b = u32x4(b12, b13, b10, b11); - let u32x4(c10, c11, c12, c13) = $c; - $c = u32x4(c11, c12, c13, c10); - }}; -} - -macro_rules! prepare_columnround { - ($a: expr, $b: expr, $c: expr) => {{ - let u32x4(a13, a10, a11, a12) = $a; - $a = u32x4(a10, a11, a12, a13); - let u32x4(b12, b13, b10, b11) = $b; - $b = u32x4(b10, b11, b12, b13); - let u32x4(c11, c12, c13, c10) = $c; - $c = u32x4(c10, c11, c12, c13); - }}; -} - -macro_rules! add_rotate_xor { - ($dst: expr, $a: expr, $b: expr, $shift: expr) => {{ - let v = $a + $b; - let r = S32 - $shift; - let right = v >> r; - $dst = $dst ^ (v << $shift) ^ right - }}; +struct State { + state: [u32; 16], } -fn columnround(state: &mut SalsaState) { - add_rotate_xor!(state.a, state.d, state.c, S7); - add_rotate_xor!(state.b, state.a, state.d, S9); - add_rotate_xor!(state.c, state.b, state.a, S13); - add_rotate_xor!(state.d, state.c, state.b, S18); -} - -fn rowround(state: &mut SalsaState) { - add_rotate_xor!(state.c, state.d, state.a, S7); - add_rotate_xor!(state.b, state.c, state.d, S9); - add_rotate_xor!(state.a, state.c, state.b, S13); - add_rotate_xor!(state.d, state.a, state.b, S18); +macro_rules! QR { + ($a:ident, $b:ident, $c:ident, $d:ident) => { + $b ^= $a.wrapping_add($d).rotate_left(7); + $c ^= $b.wrapping_add($a).rotate_left(9); + $d ^= $c.wrapping_add($b).rotate_left(13); + $a ^= $d.wrapping_add($c).rotate_left(18); + }; } -impl Salsa { - pub fn new(key: &[u8], nonce: &[u8]) -> Salsa20 { - assert!(key.len() == 16 || key.len() == 32); - assert!(nonce.len() == 8); - Salsa { - state: Salsa::::expand(key, nonce), - output: [0; 64], - offset: 64, - } - } - - pub fn new_xsalsa20(key: &[u8], nonce: &[u8]) -> Salsa20 { - assert!(key.len() == 32); - assert!(nonce.len() == 24); - let mut xsalsa = Salsa { - state: Salsa::::expand(key, &nonce[0..16]), - output: [0; 64], - offset: 64, - }; - - let mut new_key = [0; 32]; - xsalsa.hsalsa_hash(&mut new_key); - xsalsa.state = Salsa20::expand(&new_key, &nonce[16..24]); - - xsalsa - } - - fn expand(key: &[u8], nonce: &[u8]) -> SalsaState { +impl State { + pub(crate) fn init(key: &[u8], nonce: &[u8]) -> Self { let constant = match key.len() { 16 => b"expand 16-byte k", 32 => b"expand 32-byte k", _ => unreachable!(), }; - // The state vectors are laid out to facilitate SIMD operation, - // instead of the natural matrix ordering. - // - // * Constant (x0, x5, x10, x15) - // * Key (x1, x2, x3, x4, x11, x12, x13, x14) - // * Input (x6, x7, x8, x9) - - // (x11, x12, x13, x14) let key_tail = if key.len() == 16 { key } else { &key[16..32] }; - // (x8, x9) let (x8, x9) = if nonce.len() == 16 { // HSalsa uses the full 16 byte nonce. (read_u32_le(&nonce[8..12]), read_u32_le(&nonce[12..16])) @@ -151,77 +55,137 @@ impl Salsa { (0, 0) }; - SalsaState { - a: u32x4( - read_u32_le(&key[12..16]), // x4 - x9, // x9 - read_u32_le(&key_tail[12..16]), // x14 - read_u32_le(&key[8..12]), // x3 - ), - b: u32x4( - x8, // x8 - read_u32_le(&key_tail[8..12]), // x13 - read_u32_le(&key[4..8]), // x2 - read_u32_le(&nonce[4..8]), // x7 - ), - c: u32x4( - read_u32_le(&key_tail[4..8]), // x12 - read_u32_le(&key[0..4]), // x1 - read_u32_le(&nonce[0..4]), // x6 - read_u32_le(&key_tail[0..4]), // x11 - ), - d: u32x4( - read_u32_le(&constant[0..4]), // x0 - read_u32_le(&constant[4..8]), // x5 - read_u32_le(&constant[8..12]), // x10 - read_u32_le(&constant[12..16]), // x15 - ), - } + let state = [ + read_u32_le(&constant[0..4]), + read_u32_le(&key[0..4]), + read_u32_le(&key[4..8]), + read_u32_le(&key[8..12]), + read_u32_le(&key[12..16]), + read_u32_le(&constant[4..8]), + read_u32_le(&nonce[0..4]), + read_u32_le(&nonce[4..8]), + x8, + x9, + read_u32_le(&constant[8..12]), + read_u32_le(&key_tail[0..4]), + read_u32_le(&key_tail[4..8]), + read_u32_le(&key_tail[8..12]), + read_u32_le(&key_tail[12..16]), + read_u32_le(&constant[12..16]), + ]; + Self { state } } - fn hash(&mut self) { - let mut state = self.state.clone(); - for _ in 0..10 { - columnround(&mut state); - prepare_rowround!(state.a, state.b, state.c); - rowround(&mut state); - prepare_columnround!(state.a, state.b, state.c); + #[inline] + pub(crate) fn rounds(&mut self) { + let [mut x0, mut x1, mut x2, mut x3, mut x4, mut x5, mut x6, mut x7, mut x8, mut x9, mut x10, mut x11, mut x12, mut x13, mut x14, mut x15] = + self.state; + + for _ in 0..(ROUNDS / 2) { + QR!(x0, x4, x8, x12); + QR!(x5, x9, x13, x1); + QR!(x10, x14, x2, x6); + QR!(x15, x3, x7, x11); + QR!(x0, x1, x2, x3); + QR!(x5, x6, x7, x4); + QR!(x10, x11, x8, x9); + QR!(x15, x12, x13, x14); } - let u32x4(x4, x9, x14, x3) = self.state.a + state.a; - let u32x4(x8, x13, x2, x7) = self.state.b + state.b; - let u32x4(x12, x1, x6, x11) = self.state.c + state.c; - let u32x4(x0, x5, x10, x15) = self.state.d + state.d; - let lens = [ + + self.state = [ x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, ]; - for (i, lensi) in lens.iter().enumerate() { - write_u32_le(&mut self.output[i * 4..(i + 1) * 4], *lensi); + } + + #[inline] + /// Add back the initial state + pub(crate) fn add_back(&mut self, initial: &Self) { + for i in 0..16 { + self.state[i] = self.state[i].wrapping_add(initial.state[i]); } + } - self.state.b = self.state.b + u32x4(1, 0, 0, 0); - let u32x4(_, _, _, ctr_lo) = self.state.b; - if ctr_lo == 0 { - self.state.a = self.state.a + u32x4(0, 1, 0, 0); + #[inline] + pub(crate) fn increment(&mut self) { + self.state[8] = self.state[8].wrapping_add(1); + if self.state[8] == 0 { + self.state[9] = self.state[9].wrapping_add(1); } + } - self.offset = 0; + #[inline] + pub(crate) fn output_bytes(&self, output: &mut [u8]) { + write_u32v_le(output, &self.state); } - fn hsalsa_hash(&mut self, out: &mut [u8]) { - let mut state = self.state.clone(); - for _ in 0..10 { - columnround(&mut state); - prepare_rowround!(state.a, state.b, state.c); - rowround(&mut state); - prepare_columnround!(state.a, state.b, state.c); + #[inline] + pub(crate) fn output_ad_bytes(&self, output: &mut [u8; 32]) { + write_u32_le(&mut output[0..4], self.state[0]); + write_u32_le(&mut output[4..8], self.state[5]); + write_u32_le(&mut output[8..12], self.state[10]); + write_u32_le(&mut output[12..16], self.state[15]); + write_u32_le(&mut output[16..20], self.state[6]); + write_u32_le(&mut output[20..24], self.state[7]); + write_u32_le(&mut output[24..28], self.state[8]); + write_u32_le(&mut output[28..32], self.state[9]); + } +} + +#[derive(Clone)] +pub struct Salsa { + state: State, + output: [u8; 64], + offset: usize, +} + +pub type Salsa20 = Salsa<20>; + +impl Salsa { + /// Create a new ChaCha20 context. + /// + /// * The key must be 16 or 32 bytes + /// * The nonce must be 8 bytes + pub fn new(key: &[u8], nonce: &[u8; 8]) -> Self { + assert!(key.len() == 16 || key.len() == 32); + assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); + + Salsa { + state: State::::init(key, nonce), + output: [0; 64], + offset: 64, } - let u32x4(_, x9, _, _) = state.a; - let u32x4(x8, _, _, x7) = state.b; - let u32x4(_, _, x6, _) = state.c; - let u32x4(x0, x5, x10, x15) = state.d; - let lens = [x0, x5, x10, x15, x6, x7, x8, x9]; - for i in 0..lens.len() { - write_u32_le(&mut out[i * 4..(i + 1) * 4], lens[i]); + } + + fn update(&mut self) { + let mut state = self.state.clone(); + state.rounds(); + state.add_back(&self.state); + + state.output_bytes(&mut self.output); + + self.state.increment(); + self.offset = 0; + } + + /// Process the input in place through the cipher xoring + /// + /// To get only the stream of this cipher, one can just pass the zero + /// buffer (X xor 0 = X) + pub fn process_mut(&mut self, data: &mut [u8]) { + let len = data.len(); + let mut i = 0; + while i < len { + // If there is no keystream available in the output buffer, + // generate the next block. + if self.offset == 64 { + self.update(); + } + + // Process the min(available keystream, remaining input length). + let count = cmp::min(64 - self.offset, len - i); + xor_keystream_mut(&mut data[i..i + count], &self.output[self.offset..]); + i += count; + self.offset += count; } } @@ -230,49 +194,96 @@ impl Salsa { /// the output need to be the same size as the input otherwise /// this function will panic. pub fn process(&mut self, input: &[u8], output: &mut [u8]) { - assert!(input.len() == output.len()); - let len = input.len(); + assert_eq!( + input.len(), + output.len(), + "chacha::process need to have input and output of the same size" + ); + output.copy_from_slice(input); + self.process_mut(output); + } +} + +#[derive(Clone)] +pub struct XSalsa { + state: State, + output: [u8; 64], + offset: usize, +} + +pub type XSalsa20 = XSalsa<20>; + +impl XSalsa { + /// Create a new XSalsa context. + /// + /// Key must be 32 bytes and the nonce 24 bytes. + pub fn new(key: &[u8; 32], nonce: &[u8; 24]) -> Self { + assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); + + let mut hsalsa = State::::init(key, &nonce[0..16]); + hsalsa.rounds(); + let mut new_key = [0; 32]; + hsalsa.output_ad_bytes(&mut new_key); + + let xsalsa = Self { + state: State::init(&new_key, &nonce[16..24]), + output: [0u8; 64], + offset: 64, + }; + xsalsa + } + + fn update(&mut self) { + let mut state = self.state.clone(); + state.rounds(); + state.add_back(&self.state); + + state.output_bytes(&mut self.output); + + self.state.increment(); + self.offset = 0; + } + + /// Process the input in place through the cipher xoring + /// + /// To get only the stream of this cipher, one can just pass the zero + /// buffer (X xor 0 = X) + pub fn process_mut(&mut self, data: &mut [u8]) { + let len = data.len(); let mut i = 0; while i < len { // If there is no keystream available in the output buffer, // generate the next block. if self.offset == 64 { - self.hash(); + self.update(); } // Process the min(available keystream, remaining input length). let count = cmp::min(64 - self.offset, len - i); - xor_keystream( - &mut output[i..i + count], - &input[i..i + count], - &self.output[self.offset..], - ); + xor_keystream_mut(&mut data[i..i + count], &self.output[self.offset..]); i += count; self.offset += count; } } -} - -pub fn hsalsa(key: &[u8], nonce: &[u8], out: &mut [u8]) { - assert!(key.len() == 32); - assert!(nonce.len() == 16); - assert!(ROUNDS == 8 || ROUNDS == 12 || ROUNDS == 20); - - let mut h = Salsa20 { - state: Salsa20::expand(key, nonce), - output: [0; 64], - offset: 64, - }; - h.hsalsa_hash(out); -} -pub fn hsalsa20(key: &[u8], nonce: &[u8], out: &mut [u8]) { - hsalsa::<20>(key, nonce, out) + /// Process the input through the cipher, xoring the byte one-by-one + /// + /// the output need to be the same size as the input otherwise + /// this function will panic. + pub fn process(&mut self, input: &[u8], output: &mut [u8]) { + assert_eq!( + input.len(), + output.len(), + "chacha::process need to have input and output of the same size" + ); + output.copy_from_slice(input); + self.process_mut(output); + } } #[cfg(test)] mod test { - use super::Salsa20; + use super::{Salsa20, XSalsa20}; use crate::digest::Digest; use crate::sha2::Sha256; @@ -293,7 +304,7 @@ mod test { let mut salsa20 = Salsa20::new(&key, &nonce); salsa20.process(&input, &mut stream); - assert!(stream[..] == result[..]); + assert_eq!(stream, result); } #[test] @@ -315,7 +326,7 @@ mod test { let mut salsa20 = Salsa20::new(&key, &nonce); salsa20.process(&input, &mut stream); - assert!(stream[..] == result[..]); + assert_eq!(stream, result); } #[test] @@ -341,7 +352,7 @@ mod test { } let out_str = sh.result_str(); - assert!(&out_str[..] == output_str); + assert_eq!(out_str, output_str); } #[test] @@ -370,9 +381,9 @@ mod test { 0xf6, 0x04, 0x9d, 0x0a, 0x5c, 0x8a, 0x82, 0xf4, 0x29, 0x23, 0x1f, 0x00, 0x80, ]; - let mut xsalsa20 = Salsa20::new_xsalsa20(&key, &nonce); + let mut xsalsa20 = XSalsa20::new(&key, &nonce); xsalsa20.process(&input, &mut stream); - assert!(stream[..] == result[..]); + assert_eq!(stream, result); } }