diff --git a/accelerate.cabal b/accelerate.cabal index 1b6cab0bd..14bf6f810 100644 --- a/accelerate.cabal +++ b/accelerate.cabal @@ -137,6 +137,12 @@ extra-source-files: CHANGELOG.md cbits/*.c cbits/*.h + -- XKCP + cbits/xkcp/*.c + cbits/xkcp/*.h + cbits/xkcp/*.macros + cbits/xkcp/*.inc + -- TRACY -- These are referenced directly using the FFI cbits/tracy/*.h cbits/tracy/*.hpp @@ -153,8 +159,7 @@ extra-source-files: cbits/tracy/profiler/build/unix/Makefile cbits/tracy/profiler/build/unix/*.mk cbits/tracy/common/*.mk - -- The Makefiles fetch the source files from these Visual Studio project - -- files + -- The Makefiles fetch the source files from these Visual Studio project files cbits/tracy/capture/build/win32/capture.vcxproj cbits/tracy/capture/build/win32/capture.vcxproj.filters cbits/tracy/profiler/build/win32/Tracy.vcxproj @@ -332,7 +337,6 @@ library , base-orphans >= 0.3 , bytestring >= 0.10.2 , containers >= 0.3 - , cryptonite >= 0.21 , deepseq >= 1.3 , directory >= 1.0 , double-conversion >= 2.0 @@ -428,6 +432,7 @@ library -- Other Data.BitSet Data.Primitive.Vec + Crypto.Hash.XKCP other-modules: Data.Array.Accelerate.Analysis.Hash.TH diff --git a/cbits/xkcp/KeccakHash.c b/cbits/xkcp/KeccakHash.c new file mode 100644 index 000000000..c660f9407 --- /dev/null +++ b/cbits/xkcp/KeccakHash.c @@ -0,0 +1,81 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakHash.h" + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) +{ + HashReturn result; + + if (delimitedSuffix == 0) + return KECCAK_FAIL; + result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); + if (result != KECCAK_SUCCESS) + return result; + instance->fixedOutputLength = hashbitlen; + instance->delimitedSuffix = delimitedSuffix; + return KECCAK_SUCCESS; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, BitLength databitlen) +{ + if ((databitlen % 8) == 0) + return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + else { + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + if (ret == KECCAK_SUCCESS) { + /* The last partial byte is assumed to be aligned on the least significant bits */ + unsigned char lastByte = data[databitlen/8]; + /* Concatenate the last few bits provided here with those of the suffix */ + unsigned short delimitedLastBytes = (unsigned short)((unsigned short)(lastByte & ((1 << (databitlen % 8)) - 1)) | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); + if ((delimitedLastBytes & 0xFF00) == 0x0000) { + instance->delimitedSuffix = delimitedLastBytes & 0xFF; + } + else { + unsigned char oneByte[1]; + oneByte[0] = delimitedLastBytes & 0xFF; + ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1); + instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF; + } + } + return ret; + } +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) +{ + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); + if (ret == KECCAK_SUCCESS) + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); + else + return ret; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, BitLength databitlen) +{ + if ((databitlen % 8) != 0) + return KECCAK_FAIL; + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); +} diff --git a/cbits/xkcp/KeccakHash.h b/cbits/xkcp/KeccakHash.h new file mode 100644 index 000000000..e99d99dbc --- /dev/null +++ b/cbits/xkcp/KeccakHash.h @@ -0,0 +1,125 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakHashInterface_h_ +#define _KeccakHashInterface_h_ + +#include "config.h" +#ifdef XKCP_has_KeccakP1600 + +#include +#include +#include "KeccakSponge.h" + +#ifndef _Keccak_BitTypes_ +#define _Keccak_BitTypes_ +typedef uint8_t BitSequence; + +typedef size_t BitLength; +#endif + +typedef enum { KECCAK_SUCCESS = 0, KECCAK_FAIL = 1, KECCAK_BAD_HASHLEN = 2 } HashReturn; + +typedef struct { + KeccakWidth1600_SpongeInstance sponge; + unsigned int fixedOutputLength; + unsigned char delimitedSuffix; +} Keccak_HashInstance; + +/** + * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. + * @param hashInstance Pointer to the hash instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param hashbitlen The desired number of output bits, + * or 0 for an arbitrarily-long output. + * @param delimitedSuffix Bits that will be automatically appended to the end + * of the input message, as in domain separation. + * This is a byte containing from 0 to 7 bits + * formatted like the @a delimitedData parameter of + * the Keccak_SpongeAbsorbLastFewBits() function. + * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. + */ +HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); + +/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) + +/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) + +/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) + +/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) + +/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) + +/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) + +/** + * Function to give input data to be absorbed. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte (little-endian convention). + * In this case, the (8 - @a databitLen mod 8) most significant bits + * of the last byte are ignored. + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. + */ +HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, BitLength databitlen); + +/** + * Function to call after all input blocks have been input and to get + * output bits if the length was specified when calling Keccak_HashInitialize(). + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of + * output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits + * must be extracted using the Keccak_HashSqueeze() function. + * @param hashval Pointer to the buffer where to store the output data. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. + */ +HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); + + /** + * Function to squeeze output data. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param databitlen The number of output bits desired (must be a multiple of 8). + * @pre Keccak_HashFinal() must have been already called. + * @pre @a databitlen is a multiple of 8. + * @return KECCAK_SUCCESS if successful, KECCAK_FAIL otherwise. + */ +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, BitLength databitlen); + +#else +#error This requires an implementation of Keccak-p[1600] +#endif + +#endif diff --git a/cbits/xkcp/KeccakP-1600-64.macros b/cbits/xkcp/KeccakP-1600-64.macros new file mode 100644 index 000000000..aabb307ba --- /dev/null +++ b/cbits/xkcp/KeccakP-1600-64.macros @@ -0,0 +1,748 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + uint64_t Aba, Abe, Abi, Abo, Abu; \ + uint64_t Aga, Age, Agi, Ago, Agu; \ + uint64_t Aka, Ake, Aki, Ako, Aku; \ + uint64_t Ama, Ame, Ami, Amo, Amu; \ + uint64_t Asa, Ase, Asi, Aso, Asu; \ + uint64_t Bba, Bbe, Bbi, Bbo, Bbu; \ + uint64_t Bga, Bge, Bgi, Bgo, Bgu; \ + uint64_t Bka, Bke, Bki, Bko, Bku; \ + uint64_t Bma, Bme, Bmi, Bmo, Bmu; \ + uint64_t Bsa, Bse, Bsi, Bso, Bsu; \ + uint64_t Ca, Ce, Ci, Co, Cu; \ + uint64_t Da, De, Di, Do, Du; \ + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; \ + uint64_t Ega, Ege, Egi, Ego, Egu; \ + uint64_t Eka, Eke, Eki, Eko, Eku; \ + uint64_t Ema, Eme, Emi, Emo, Emu; \ + uint64_t Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define HTOLE64(x) (x) +#else +#define HTOLE64(x) (\ + ((x & 0xff00000000000000ull) >> 56) | \ + ((x & 0x00ff000000000000ull) >> 40) | \ + ((x & 0x0000ff0000000000ull) >> 24) | \ + ((x & 0x000000ff00000000ull) >> 8) | \ + ((x & 0x00000000ff000000ull) << 8) | \ + ((x & 0x0000000000ff0000ull) << 24) | \ + ((x & 0x000000000000ff00ull) << 40) | \ + ((x & 0x00000000000000ffull) << 56)) +#endif + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= HTOLE64(input[ 2]); \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= HTOLE64(input[ 4]); \ + } \ + } \ + else { \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= HTOLE64(input[ 6]); \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + } \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= HTOLE64(input[10]); \ + } \ + } \ + } \ + else { \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= HTOLE64(input[12]); \ + } \ + } \ + else { \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= HTOLE64(input[14]); \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= HTOLE64(input[ 0]); \ + X##be ^= HTOLE64(input[ 1]); \ + X##bi ^= HTOLE64(input[ 2]); \ + X##bo ^= HTOLE64(input[ 3]); \ + X##bu ^= HTOLE64(input[ 4]); \ + X##ga ^= HTOLE64(input[ 5]); \ + X##ge ^= HTOLE64(input[ 6]); \ + X##gi ^= HTOLE64(input[ 7]); \ + X##go ^= HTOLE64(input[ 8]); \ + X##gu ^= HTOLE64(input[ 9]); \ + X##ka ^= HTOLE64(input[10]); \ + X##ke ^= HTOLE64(input[11]); \ + X##ki ^= HTOLE64(input[12]); \ + X##ko ^= HTOLE64(input[13]); \ + X##ku ^= HTOLE64(input[14]); \ + X##ma ^= HTOLE64(input[15]); \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= HTOLE64(input[18]); \ + } \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= HTOLE64(input[20]); \ + } \ + } \ + else { \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= HTOLE64(input[22]); \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= HTOLE64(input[16]); \ + X##mi ^= HTOLE64(input[17]); \ + X##mo ^= HTOLE64(input[18]); \ + X##mu ^= HTOLE64(input[19]); \ + X##sa ^= HTOLE64(input[20]); \ + X##se ^= HTOLE64(input[21]); \ + X##si ^= HTOLE64(input[22]); \ + X##so ^= HTOLE64(input[23]); \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= HTOLE64(input[24]); \ + } \ + } \ + } diff --git a/cbits/xkcp/KeccakP-1600-SnP.h b/cbits/xkcp/KeccakP-1600-SnP.h new file mode 100644 index 000000000..1f811b0bf --- /dev/null +++ b/cbits/xkcp/KeccakP-1600-SnP.h @@ -0,0 +1,54 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +#define KeccakP1600_implementation "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 +#define KeccakF1600_FastLoop_supported +#define KeccakP1600_12rounds_FastLoop_supported + +#include + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define KeccakP1600_AddByte(state, byte, offset) \ + ((unsigned char*)(state))[(offset)] ^= (byte) +#else +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +#endif +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#endif diff --git a/cbits/xkcp/KeccakP-1600-opt64-config.h b/cbits/xkcp/KeccakP-1600-opt64-config.h new file mode 100644 index 000000000..085b6c958 --- /dev/null +++ b/cbits/xkcp/KeccakP-1600-opt64-config.h @@ -0,0 +1,6 @@ +/* +This file defines some parameters of the implementation in the parent directory. +*/ + +#define KeccakP1600_implementation_config "all rounds unrolled" +#define KeccakP1600_fullUnrolling diff --git a/cbits/xkcp/KeccakP-1600-opt64.c b/cbits/xkcp/KeccakP-1600-opt64.c new file mode 100644 index 000000000..617069e66 --- /dev/null +++ b/cbits/xkcp/KeccakP-1600-opt64.c @@ -0,0 +1,565 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file implements Keccak-p[1600] in a SnP-compatible way. +Please refer to SnP-documentation.h for more details. + +This implementation comes with KeccakP-1600-SnP.h in the same folder. +Please refer to LowLevel.build for the exact list of other files it must be combined with. +*/ + +#include +#include +#include +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +#if defined(KeccakP1600_useLaneComplementing) +#define UseBebigokimisa +#endif + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) + #define ROL64(x,N) ({ \ + register uint64_t __out; \ + register uint64_t __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((uint64_t)a) << offset) ^ (((uint64_t)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#ifdef KeccakP1600_fullUnrolling +#define FullUnrolling +#else +#define Unrolling KeccakP1600_unrolling +#endif +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const uint64_t KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +#ifdef KeccakP1600_useLaneComplementing + ((uint64_t*)state)[ 1] = ~(uint64_t)0; + ((uint64_t*)state)[ 2] = ~(uint64_t)0; + ((uint64_t*)state)[ 8] = ~(uint64_t)0; + ((uint64_t*)state)[12] = ~(uint64_t)0; + ((uint64_t*)state)[17] = ~(uint64_t)0; + ((uint64_t*)state)[20] = ~(uint64_t)0; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + uint64_t lane; + if (length == 0) + return; + if (length == 1) + lane = data[0]; + else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset*8; +#else + uint64_t lane = 0; + unsigned int i; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +static void fromWordToBytes(uint8_t *bytes, const uint64_t word) +{ + unsigned int i; + + for(i=0; i<(64/8); i++) + bytes[i] = (word >> (8*i)) & 0xFF; +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i 1) { + ((uint64_t*)data)[ 1] = ~((uint64_t*)data)[ 1]; + if (laneCount > 2) { + ((uint64_t*)data)[ 2] = ~((uint64_t*)data)[ 2]; + if (laneCount > 8) { + ((uint64_t*)data)[ 8] = ~((uint64_t*)data)[ 8]; + if (laneCount > 12) { + ((uint64_t*)data)[12] = ~((uint64_t*)data)[12]; + if (laneCount > 17) { + ((uint64_t*)data)[17] = ~((uint64_t*)data)[17]; + if (laneCount > 20) { + ((uint64_t*)data)[20] = ~((uint64_t*)data)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + uint64_t lane = ((uint64_t*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + uint64_t lane1[1]; + lane1[0] = lane; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#endif + + for(i=0; i 1) { + ((uint64_t*)output)[ 1] = ~((uint64_t*)output)[ 1]; + if (laneCount > 2) { + ((uint64_t*)output)[ 2] = ~((uint64_t*)output)[ 2]; + if (laneCount > 8) { + ((uint64_t*)output)[ 8] = ~((uint64_t*)output)[ 8]; + if (laneCount > 12) { + ((uint64_t*)output)[12] = ~((uint64_t*)output)[12]; + if (laneCount > 17) { + ((uint64_t*)output)[17] = ~((uint64_t*)output)[17]; + if (laneCount > 20) { + ((uint64_t*)output)[20] = ~((uint64_t*)output)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + uint64_t *stateAsLanes = (uint64_t*)state; + uint64_t *inDataAsLanes = (uint64_t*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds24 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakP1600_12rounds_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + uint64_t *stateAsLanes = (uint64_t*)state; + uint64_t *inDataAsLanes = (uint64_t*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds12 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/cbits/xkcp/KeccakP-1600-unrolling.macros b/cbits/xkcp/KeccakP-1600-unrolling.macros new file mode 100644 index 000000000..9f7200226 --- /dev/null +++ b/cbits/xkcp/KeccakP-1600-unrolling.macros @@ -0,0 +1,305 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +The Keccak-p permutations, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (defined(FullUnrolling)) +#define rounds24 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 12) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 6) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 4) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 3) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 2) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 1) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds6 \ + prepareTheta \ + for(i=18; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds4 \ + prepareTheta \ + for(i=20; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#else +#error "Unrolling is not correctly specified!" +#endif + +#define roundsN(__nrounds) \ + prepareTheta \ + i = 24 - (__nrounds); \ + if ((i&1) != 0) { \ + thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + copyStateVariables(A, E) \ + ++i; \ + } \ + for( /* empty */; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } diff --git a/cbits/xkcp/KeccakSponge.c b/cbits/xkcp/KeccakSponge.c new file mode 100644 index 000000000..350df772e --- /dev/null +++ b/cbits/xkcp/KeccakSponge.c @@ -0,0 +1,111 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSponge.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifdef XKCP_has_KeccakP200 + #include "KeccakP-200-SnP.h" + + #define prefix KeccakWidth200 + #define SnP KeccakP200 + #define SnP_width 200 + #define SnP_Permute KeccakP200_Permute_18rounds + #if defined(KeccakF200_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifdef XKCP_has_KeccakP400 + #include "KeccakP-400-SnP.h" + + #define prefix KeccakWidth400 + #define SnP KeccakP400 + #define SnP_width 400 + #define SnP_Permute KeccakP400_Permute_20rounds + #if defined(KeccakF400_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifdef XKCP_has_KeccakP800 + #include "KeccakP-800-SnP.h" + + #define prefix KeccakWidth800 + #define SnP KeccakP800 + #define SnP_width 800 + #define SnP_Permute KeccakP800_Permute_22rounds + #if defined(KeccakF800_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/cbits/xkcp/KeccakSponge.h b/cbits/xkcp/KeccakSponge.h new file mode 100644 index 000000000..31c2d05ca --- /dev/null +++ b/cbits/xkcp/KeccakSponge.h @@ -0,0 +1,76 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +/* For the documentation, please follow the link: */ +/* #include "KeccakSponge-documentation.h" */ + +#include +#include "align.h" +#include "config.h" + +#define XKCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define XKCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#ifdef XKCP_has_KeccakP200 + #include "KeccakP-200-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth200) + #define XKCP_has_Sponge_Keccak_width200 +#endif + +#ifdef XKCP_has_KeccakP400 + #include "KeccakP-400-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth400) + #define XKCP_has_Sponge_Keccak_width400 +#endif + +#ifdef XKCP_has_KeccakP800 + #include "KeccakP-800-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth800) + #define XKCP_has_Sponge_Keccak_width800 +#endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth1600) + #define XKCP_has_Sponge_Keccak_width1600 +#endif + +#ifdef XKCP_has_KeccakP1600 + #include "KeccakP-1600-SnP.h" + XKCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + XKCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) +#endif + +#endif diff --git a/cbits/xkcp/KeccakSponge.inc b/cbits/xkcp/KeccakSponge.inc new file mode 100644 index 000000000..70080923e --- /dev/null +++ b/cbits/xkcp/KeccakSponge.inc @@ -0,0 +1,314 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by the designers, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define Sponge JOIN(prefix, _Sponge) +#define SpongeInstance JOIN(prefix, _SpongeInstance) +#define SpongeInitialize JOIN(prefix, _SpongeInitialize) +#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb) +#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits) +#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze) + +#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) +#define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) + +int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen) +{ + ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes]; + unsigned int partialBlock; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + unsigned int rateInBytes = rate/8; + + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + if (suffix == 0) + return 1; + + /* Initialize the state */ + SnP_StaticInitialize(); + SnP_Initialize(state); + + /* First, absorb whole blocks */ +#ifdef SnP_FastLoop_Absorb + if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { + /* fast lane: whole lane rate */ + size_t j; + j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); + curInput += j; + inputByteLen -= j; + } +#endif + while(inputByteLen >= (size_t)rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curInput, rateInBytes); + #endif + SnP_AddBytes(state, curInput, 0, rateInBytes); + SnP_Permute(state); + curInput += rateInBytes; + inputByteLen -= rateInBytes; + } + + /* Then, absorb what remains */ + partialBlock = (unsigned int)inputByteLen; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); + #endif + SnP_AddBytes(state, curInput, 0, partialBlock); + + /* Finally, absorb the suffix */ + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = suffix; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(state, suffix, partialBlock); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) + SnP_Permute(state); + /* Second bit of padding */ + SnP_AddByte(state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(state); + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + + /* First, output whole blocks */ + while(outputByteLen > (size_t)rateInBytes) { + SnP_ExtractBytes(state, curOutput, 0, rateInBytes); + SnP_Permute(state); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curOutput, rateInBytes); + #endif + curOutput += rateInBytes; + outputByteLen -= rateInBytes; + } + + /* Finally, output what remains */ + partialBlock = (unsigned int)outputByteLen; + SnP_ExtractBytes(state, curOutput, 0, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curOutput, partialBlock); + #endif + + return 0; +} + +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ + +int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->rate = rate; + instance->byteIOIndex = 0; + instance->squeezing = 0; + + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + const unsigned char *curData; + unsigned int rateInBytes = instance->rate/8; + + if (instance->squeezing) + return 1; /* Too late for additional input */ + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { +#ifdef SnP_FastLoop_Absorb + /* processing full blocks first */ + if ((rateInBytes % (SnP_width/200)) == 0) { + /* fast lane: whole lane rate */ + j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); + i += j; + curData += j; + } + else { +#endif + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, rateInBytes); + #endif + SnP_AddBytes(instance->state, curData, 0, rateInBytes); + SnP_Permute(instance->state); + curData+=rateInBytes; + } + i = dataByteLen - j; +#ifdef SnP_FastLoop_Absorb + } +#endif + } + else { + /* normal lane: using the message queue */ + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); + #endif + i += partialBlock; + + SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + curData += partialBlock; + instance->byteIOIndex += partialBlock; + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData) +{ + unsigned int rateInBytes = instance->rate/8; + + if (delimitedData == 0) + return 1; + if (instance->squeezing) + return 1; /* Too late for additional input */ + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = delimitedData; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) + SnP_Permute(instance->state); + /* Second bit of padding */ + SnP_AddByte(instance->state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + instance->squeezing = 1; + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + unsigned int rateInBytes = instance->rate/8; + unsigned char *curData; + + if (!instance->squeezing) + SpongeAbsorbLastFewBits(instance, 0x01); + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + SnP_Permute(instance->state); + SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curData, rateInBytes); + #endif + curData+=rateInBytes; + } + i = dataByteLen - j; + } + else { + /* normal lane: using the message queue */ + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + i += partialBlock; + + SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curData, partialBlock); + #endif + curData += partialBlock; + instance->byteIOIndex += partialBlock; + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +#undef Sponge +#undef SpongeInstance +#undef SpongeInitialize +#undef SpongeAbsorb +#undef SpongeAbsorbLastFewBits +#undef SpongeSqueeze +#undef SnP_stateSizeInBytes +#undef SnP_stateAlignment +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_ExtractBytes diff --git a/cbits/xkcp/README.md b/cbits/xkcp/README.md new file mode 100644 index 000000000..b80483776 --- /dev/null +++ b/cbits/xkcp/README.md @@ -0,0 +1,14 @@ +Keccak (SHA) implementation taken from the _eXtended Keccak Code Package_ + +[https://github.com/XKCP/XKCP](https://github.com/XKCP/XKCP/tree/5b81ec547d8c1453ff502e309379a99b1c43f338) + +This implementation extracted using: + +``` +make FIPS202-opt64.pack +``` + +Most of the source and header files in the XKCP are released to the **public domain** and associated to the +[CC0](http://creativecommons.org/publicdomain/zero/1.0/) deed, except for the file `brg_endian.h` which is copyrighted by Brian Gladman and comes with a BSD +3-clause license. + diff --git a/cbits/xkcp/SimpleFIPS202.c b/cbits/xkcp/SimpleFIPS202.c new file mode 100644 index 000000000..837afa977 --- /dev/null +++ b/cbits/xkcp/SimpleFIPS202.c @@ -0,0 +1,48 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSponge.h" +#include "SimpleFIPS202.h" + +int SHAKE128(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge(1344, 256, input, inputByteLen, 0x1F, output, outputByteLen); +} + +int SHAKE256(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge(1088, 512, input, inputByteLen, 0x1F, output, outputByteLen); +} + +int SHA3_224(unsigned char *output, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge(1152, 448, input, inputByteLen, 0x06, output, 224/8); +} + +int SHA3_256(unsigned char *output, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge(1088, 512, input, inputByteLen, 0x06, output, 256/8); +} + +int SHA3_384(unsigned char *output, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge( 832, 768, input, inputByteLen, 0x06, output, 384/8); +} + +int SHA3_512(unsigned char *output, const unsigned char *input, size_t inputByteLen) +{ + return KeccakWidth1600_Sponge(576, 1024, input, inputByteLen, 0x06, output, 512/8); +} diff --git a/cbits/xkcp/SimpleFIPS202.h b/cbits/xkcp/SimpleFIPS202.h new file mode 100644 index 000000000..ec4644030 --- /dev/null +++ b/cbits/xkcp/SimpleFIPS202.h @@ -0,0 +1,79 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Keccak, designed by Guido Bertoni, Joan Daemen, Michaël Peeters and Gilles Van Assche. + +Implementation by Gilles Van Assche, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _SimpleFIPS202_h_ +#define _SimpleFIPS202_h_ + +#include "config.h" +#ifdef XKCP_has_KeccakP1600 + +#include + +/** Implementation of the SHAKE128 extendable output function (XOF) [FIPS 202]. + * @param output Pointer to the output buffer. + * @param outputByteLen The desired number of output bytes. + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHAKE128(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen); + +/** Implementation of the SHAKE256 extendable output function (XOF) [FIPS 202]. + * @param output Pointer to the output buffer. + * @param outputByteLen The desired number of output bytes. + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHAKE256(unsigned char *output, size_t outputByteLen, const unsigned char *input, size_t inputByteLen); + +/** Implementation of SHA3-224 [FIPS 202]. + * @param output Pointer to the output buffer (28 bytes). + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHA3_224(unsigned char *output, const unsigned char *input, size_t inputByteLen); + +/** Implementation of SHA3-256 [FIPS 202]. + * @param output Pointer to the output buffer (32 bytes). + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHA3_256(unsigned char *output, const unsigned char *input, size_t inputByteLen); + +/** Implementation of SHA3-384 [FIPS 202]. + * @param output Pointer to the output buffer (48 bytes). + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHA3_384(unsigned char *output, const unsigned char *input, size_t inputByteLen); + +/** Implementation of SHA3-512 [FIPS 202]. + * @param output Pointer to the output buffer (64 bytes). + * @param input Pointer to the input message. + * @param inputByteLen The length of the input message in bytes. + * @return 0 if successful, 1 otherwise. + */ +int SHA3_512(unsigned char *output, const unsigned char *input, size_t inputByteLen); + +#else +#error This requires an implementation of Keccak-p[1600] +#endif + +#endif diff --git a/cbits/xkcp/SnP-Relaned.h b/cbits/xkcp/SnP-Relaned.h new file mode 100644 index 000000000..631fb5ae2 --- /dev/null +++ b/cbits/xkcp/SnP-Relaned.h @@ -0,0 +1,141 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ + +--- + +This file contains macros that help implement a permutation in a SnP-compatible way. +It converts an implementation that implement state input/output functions +in a lane-oriented fashion (i.e., using SnP_AddLanes() and SnP_AddBytesInLane, +and similarly for Overwite, Extract and ExtractAndAdd) to the byte-oriented SnP. +Please refer to SnP-documentation.h for more details. +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/cbits/xkcp/align.h b/cbits/xkcp/align.h new file mode 100644 index 000000000..82ad2f905 --- /dev/null +++ b/cbits/xkcp/align.h @@ -0,0 +1,33 @@ +/* +The eXtended Keccak Code Package (XKCP) +https://github.com/XKCP/XKCP + +Implementation by Gilles Van Assche and Ronny Van Keer, hereby denoted as "the implementer". + +For more information, feedback or questions, please refer to the Keccak Team website: +https://keccak.team/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/cbits/xkcp/brg_endian.h b/cbits/xkcp/brg_endian.h new file mode 100644 index 000000000..7c640b90e --- /dev/null +++ b/cbits/xkcp/brg_endian.h @@ -0,0 +1,143 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) || \ + defined( __s390__ ) || defined( __s390x__ ) || defined( __zarch__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/cbits/xkcp/config.h b/cbits/xkcp/config.h new file mode 100644 index 000000000..97f857dd1 --- /dev/null +++ b/cbits/xkcp/config.h @@ -0,0 +1,5 @@ +/* File generated by ToTargetConfigFile.xsl */ + +#define XKCP_has_Sponge_Keccak +#define XKCP_has_FIPS202 +#define XKCP_has_KeccakP1600 diff --git a/cbits/xkcp/test.c b/cbits/xkcp/test.c new file mode 100644 index 000000000..ee22d4661 --- /dev/null +++ b/cbits/xkcp/test.c @@ -0,0 +1,10 @@ + +#include "KeccakHash.h" +#include + +int main() +{ + printf("sizeof(Keccak_HashInstance) = %ld\n", sizeof(Keccak_HashInstance)); + return 0; +} + diff --git a/src/Crypto/Hash/XKCP.hs b/src/Crypto/Hash/XKCP.hs new file mode 100644 index 000000000..07bfe99c5 --- /dev/null +++ b/src/Crypto/Hash/XKCP.hs @@ -0,0 +1,172 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# OPTIONS_GHC -fobject-code #-} +{-# OPTIONS_HADDOCK hide #-} +-- | +-- Module : Crypto.Hash.XKCP +-- Copyright : [2016..2022] The Accelerate Team +-- License : BSD3 +-- +-- Maintainer : Trevor L. McDonell +-- Stability : experimental +-- Portability : non-portable (GHC extensions) +-- + +module Crypto.Hash.XKCP ( + + SHA3_256, + hash, hashlazy, + +) where + +import Control.Monad +import Data.Word +import Foreign.C.Types +import Foreign.Ptr +import Language.Haskell.TH.Syntax +import Numeric +import System.IO.Unsafe + +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Lazy.Internal as L +import qualified Data.ByteString.Unsafe as B + +import GHC.Base +import GHC.Exts +import GHC.Word + + +-- | SHA3 (256 bits) cryptographic hash digest +-- +data SHA3_256 = SHA3_256 ByteArray# + +instance Show SHA3_256 where + show (SHA3_256 ba#) = + let go !i# = + case i# <# 32# of + 0# -> [] + _ -> showHex (W8# (indexWord8Array# ba# i#)) (go (i# +# 1#)) + in + go 0# + +instance Eq SHA3_256 where + SHA3_256 ba1# == SHA3_256 ba2# = + case reallyUnsafePtrEquality# (unsafeCoerce# ba1#) (unsafeCoerce# ba2#) of + 1# -> True + _ -> case (compareByteArrays# ba1# 0# ba2# 0# 32#) of + 0# -> True + _ -> False + +instance Ord SHA3_256 where + compare (SHA3_256 ba1#) (SHA3_256 ba2#) = + let go !i# = + case i# <# 32# of + 0# -> EQ + _ -> case W8# (indexWord8Array# ba1# i#) `compare` W8# (indexWord8Array# ba2# i#) of + EQ -> go (i# +# 1#) + r -> r + in + go 0# + + +-- | Hash a strict 'S.ByteString' into a digest +-- +hash :: S.ByteString -> SHA3_256 +hash bs = unsafePerformIO $! + B.unsafeUseAsCStringLen bs $ \(p, n) -> keccak_Hash_SHA3_256 (castPtr p) n + + +-- | Hash a lazy 'L.ByteString' into a digest +-- +hashlazy :: L.ByteString -> SHA3_256 +hashlazy lbs = unsafePerformIO $! do + s <- keccak_HashInitialize_SHA3_256 + let go L.Empty = keccak_HashFinal_SHA3_256 s + go (L.Chunk c cs) = do + B.unsafeUseAsCStringLen c $ \(p, n) -> keccak_HashUpdate_SHA3_256 s (castPtr p) n + go cs + go lbs + + +-- Internals +-- ----------------------------------------------------------------------------- + +keccak_Hash_SHA3_256 :: Ptr Word8 -> Int -> IO SHA3_256 +keccak_Hash_SHA3_256 ptr len = + IO $ \s0 -> + case newByteArray# 32# s0 of { (# s1, mba# #) -> + case c_sha3_256 mba# ptr (fromIntegral len) of { IO c_sha3_256# -> + case c_sha3_256# s1 of { (# s2, _ #) -> + case unsafeFreezeByteArray# mba# s2 of { (# s3, hash_val# #) -> + (# s3, SHA3_256 hash_val# #) + }}}} + + +data Keccak_HashInstance = Keccak_HashInstance (MutableByteArray# RealWorld) + +-- See: KeccakHash.h for magic numbers +-- +keccak_HashInitialize_SHA3_256 :: IO Keccak_HashInstance +keccak_HashInitialize_SHA3_256 = + IO $ \s0 -> + case newByteArray# 224# s0 of { (# s1, hash_instance# #) -> + case c_keccak_hash_initialise hash_instance# 1088 512 256 0x06 of { IO c_keccak_hash_initialise# -> + case c_keccak_hash_initialise# s1 of { (# s2, _ #) -> + (# s2, Keccak_HashInstance hash_instance# #) + }}} + +keccak_HashUpdate_SHA3_256 :: Keccak_HashInstance -> Ptr Word8 -> Int -> IO () +keccak_HashUpdate_SHA3_256 (Keccak_HashInstance hash_instance#) ptr len = + void $ c_keccak_hash_update hash_instance# ptr (fromIntegral len * 8) + +keccak_HashFinal_SHA3_256 :: Keccak_HashInstance -> IO SHA3_256 +keccak_HashFinal_SHA3_256 (Keccak_HashInstance hash_instance#) = + IO $ \s0 -> + case newByteArray# 32# s0 of { (# s1, mba# #) -> + case c_keccak_hash_final hash_instance# mba# of { IO c_keccak_hash_final# -> + case c_keccak_hash_final# s1 of { (# s2, _ #) -> + case unsafeFreezeByteArray# mba# s2 of { (# s3, hash_val# #) -> + (# s3, SHA3_256 hash_val# #) + }}}} + +-- SEE: [HLS and GHC IDE] +-- +#ifndef __GHCIDE__ + +foreign import ccall unsafe "SHA3_256" c_sha3_256 :: MutableByteArray# RealWorld -> Ptr Word8 -> CSize -> IO CInt +foreign import ccall unsafe "Keccak_HashInitialize" c_keccak_hash_initialise :: MutableByteArray# RealWorld -> CUInt -> CUInt -> CUInt -> CUChar -> IO CInt +foreign import ccall unsafe "Keccak_HashUpdate" c_keccak_hash_update :: MutableByteArray# RealWorld -> Ptr Word8 -> CSize -> IO CInt +foreign import ccall unsafe "Keccak_HashFinal" c_keccak_hash_final :: MutableByteArray# RealWorld -> MutableByteArray# RealWorld -> IO CInt + +#else + +c_sha3_256 :: Ptr Word8 -> Ptr Word8 -> CSize -> IO CInt +c_sha3_256 = undefined + +c_keccak_hash_initialise :: MutableByteArray# RealWorld -> CUInt -> CUInt -> CUInt -> CUChar -> IO CInt +c_keccak_hash_initialise = undefined + +c_keccak_hash_update :: MutableByteArray# RealWorld -> Ptr Word8 -> CSize -> IO CInt +c_keccak_hash_update = undefined + +c_keccak_hash_final :: MutableByteArray# RealWorld -> MutableByteArray# RealWorld -> IO CInt +c_keccak_hash_final = undefined + +#endif + + +-- SEE: [linking to .c files] +-- +runQ $ do + addForeignFilePath LangC "cbits/xkcp/KeccakHash.c" + addForeignFilePath LangC "cbits/xkcp/KeccakSponge.c" + addForeignFilePath LangC "cbits/xkcp/SimpleFIPS202.c" + addForeignFilePath LangC "cbits/xkcp/KeccakP-1600-opt64.c" + return [] + diff --git a/src/Data/Array/Accelerate/Analysis/Hash.hs b/src/Data/Array/Accelerate/Analysis/Hash.hs index 75625b9ec..7e0fe88c9 100644 --- a/src/Data/Array/Accelerate/Analysis/Hash.hs +++ b/src/Data/Array/Accelerate/Analysis/Hash.hs @@ -47,9 +47,8 @@ import Data.Array.Accelerate.Representation.Stencil import Data.Array.Accelerate.Representation.Type import Data.Array.Accelerate.Type import Data.Primitive.Vec +import Crypto.Hash.XKCP -import Crypto.Hash -import qualified Data.Hashable as Hashable import Data.ByteString.Builder import Data.ByteString.Builder.Extra import Data.ByteString.Short.Internal ( ShortByteString(..) ) @@ -57,12 +56,13 @@ import Data.Monoid import System.IO.Unsafe ( unsafePerformIO ) import System.Mem.StableName ( hashStableName, makeStableName ) import Prelude hiding ( exp ) +import qualified Data.Hashable as Hashable -- Hashing -- ------- -type Hash = Digest SHA3_256 +type Hash = SHA3_256 data HashOptions = HashOptions { perfect :: Bool