Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

8333867: SHA3 performance can be improved #1000

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/hotspot/share/opto/library_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7428,7 +7428,7 @@ bool LibraryCallKit::inline_digestBase_implCompress(vmIntrinsics::ID id) {
break;
case vmIntrinsics::_sha3_implCompress:
assert(UseSHA3Intrinsics, "need SHA3 instruction support");
state = get_state_from_digest_object(digestBase_obj, T_BYTE);
state = get_state_from_digest_object(digestBase_obj, T_LONG);
stubAddr = StubRoutines::sha3_implCompress();
stubName = "sha3_implCompress";
block_size = get_block_size_from_digest_object(digestBase_obj);
Expand Down Expand Up @@ -7528,7 +7528,7 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) {
klass_digestBase_name = "sun/security/provider/SHA3";
stub_name = "sha3_implCompressMB";
stub_addr = StubRoutines::sha3_implCompressMB();
elem_type = T_BYTE;
elem_type = T_LONG;
}
break;
default:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -61,7 +61,7 @@ abstract class DigestBase extends MessageDigestSpi implements Cloneable {
private final int digestLength;

// size of the input to the compression function in bytes
private final int blockSize;
protected final int blockSize;
// buffer to store partial blocks, blockSize bytes large
// Subclasses should not access this array directly except possibly in their
// implDigest() method. See MD5.java as an example.
Expand Down
105 changes: 47 additions & 58 deletions src/java.base/share/classes/sun/security/provider/SHA3.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -25,13 +25,14 @@

package sun.security.provider;

import java.lang.invoke.MethodHandles;
import java.lang.invoke.VarHandle;
import java.nio.ByteOrder;
import java.security.ProviderException;
import java.util.Arrays;
import java.util.Objects;

import jdk.internal.vm.annotation.IntrinsicCandidate;
import static sun.security.provider.ByteArrayAccess.b2lLittle;
import static sun.security.provider.ByteArrayAccess.l2bLittle;

/**
* This class implements the Secure Hash Algorithm SHA-3 developed by
Expand All @@ -48,7 +49,7 @@
abstract class SHA3 extends DigestBase {

private static final int WIDTH = 200; // in bytes, e.g. 1600 bits
private static final int DM = 5; // dimension of lanes
private static final int DM = 5; // dimension of state matrix

private static final int NR = 24; // number of rounds

Expand All @@ -65,8 +66,11 @@ abstract class SHA3 extends DigestBase {
};

private final byte suffix;
private byte[] state = new byte[WIDTH];
private long[] lanes = new long[DM*DM];
private long[] state = new long[DM*DM];

static final VarHandle asLittleEndian
= MethodHandles.byteArrayViewVarHandle(long[].class,
ByteOrder.LITTLE_ENDIAN).withInvokeExactBehavior();

/**
* Creates a new SHA-3 object.
Expand All @@ -91,40 +95,56 @@ void implCompress(byte[] b, int ofs) {

@IntrinsicCandidate
private void implCompress0(byte[] b, int ofs) {
for (int i = 0; i < buffer.length; i++) {
state[i] ^= b[ofs++];
}
keccak();
for (int i = 0; i < blockSize / 8; i++) {
state[i] ^= (long) asLittleEndian.get(b, ofs);
ofs += 8;
}

keccak();
}

/**
* Return the digest. Subclasses do not need to reset() themselves,
* DigestBase calls implReset() when necessary.
*/
void implDigest(byte[] out, int ofs) {
byte[] byteState = new byte[8];
int numOfPadding =
setPaddingBytes(suffix, buffer, (int)(bytesProcessed % buffer.length));
setPaddingBytes(suffix, buffer, (int)(bytesProcessed % blockSize));
if (numOfPadding < 1) {
throw new ProviderException("Incorrect pad size: " + numOfPadding);
}
implCompress(buffer, 0);
int availableBytes = buffer.length;
int availableBytes = blockSize; // i.e. buffer.length
int numBytes = engineGetDigestLength();
while (numBytes > availableBytes) {
System.arraycopy(state, 0, out, ofs, availableBytes);
for (int i = 0; i < availableBytes / 8 ; i++) {
asLittleEndian.set(out, ofs, state[i]);
ofs += 8;
}
numBytes -= availableBytes;
ofs += availableBytes;
keccak();
}
System.arraycopy(state, 0, out, ofs, numBytes);
int numLongs = (numBytes + 7) / 8;

for (int i = 0; i < numLongs - 1; i++) {
asLittleEndian.set(out, ofs, state[i]);
ofs += 8;
}
if (numBytes == numLongs * 8) {
asLittleEndian.set(out, ofs, state[numLongs - 1]);
} else {
asLittleEndian.set(byteState, 0, state[numLongs - 1]);
System.arraycopy(byteState, 0,
out, ofs, numBytes - (numLongs - 1) * 8);
}
}

/**
* Resets the internal state to start a new hash.
*/
void implReset() {
Arrays.fill(state, (byte)0);
Arrays.fill(lanes, 0L);
Arrays.fill(state, 0L);
}

/**
Expand All @@ -144,46 +164,19 @@ private static int setPaddingBytes(byte suffix, byte[] in, int len) {
return (in.length - len);
}

/**
* Utility function for transforming the specified byte array 's'
* into array of lanes 'm' as defined in section 3.1.2.
*/
private static void bytes2Lanes(byte[] s, long[] m) {
int sOfs = 0;
// Conversion traverses along x-axis before y-axis
for (int y = 0; y < DM; y++, sOfs += 40) {
b2lLittle(s, sOfs, m, DM*y, 40);
}
}

/**
* Utility function for transforming the specified array of
* lanes 'm' into a byte array 's' as defined in section 3.1.3.
*/
private static void lanes2Bytes(long[] m, byte[] s) {
int sOfs = 0;
// Conversion traverses along x-axis before y-axis
for (int y = 0; y < DM; y++, sOfs += 40) {
l2bLittle(m, DM*y, s, sOfs, 40);
}
}

/**
* The function Keccak as defined in section 5.2 with
* rate r = 1600 and capacity c.
*/
private void keccak() {
// convert the 200-byte state into 25 lanes
bytes2Lanes(state, lanes);

long a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12;
long a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24;
// move data into local variables
a0 = lanes[0]; a1 = lanes[1]; a2 = lanes[2]; a3 = lanes[3]; a4 = lanes[4];
a5 = lanes[5]; a6 = lanes[6]; a7 = lanes[7]; a8 = lanes[8]; a9 = lanes[9];
a10 = lanes[10]; a11 = lanes[11]; a12 = lanes[12]; a13 = lanes[13]; a14 = lanes[14];
a15 = lanes[15]; a16 = lanes[16]; a17 = lanes[17]; a18 = lanes[18]; a19 = lanes[19];
a20 = lanes[20]; a21 = lanes[21]; a22 = lanes[22]; a23 = lanes[23]; a24 = lanes[24];
a0 = state[0]; a1 = state[1]; a2 = state[2]; a3 = state[3]; a4 = state[4];
a5 = state[5]; a6 = state[6]; a7 = state[7]; a8 = state[8]; a9 = state[9];
a10 = state[10]; a11 = state[11]; a12 = state[12]; a13 = state[13]; a14 = state[14];
a15 = state[15]; a16 = state[16]; a17 = state[17]; a18 = state[18]; a19 = state[19];
a20 = state[20]; a21 = state[21]; a22 = state[22]; a23 = state[23]; a24 = state[24];

// process the lanes through step mappings
for (int ir = 0; ir < NR; ir++) {
Expand Down Expand Up @@ -287,20 +280,16 @@ private void keccak() {
a0 ^= RC_CONSTANTS[ir];
}

lanes[0] = a0; lanes[1] = a1; lanes[2] = a2; lanes[3] = a3; lanes[4] = a4;
lanes[5] = a5; lanes[6] = a6; lanes[7] = a7; lanes[8] = a8; lanes[9] = a9;
lanes[10] = a10; lanes[11] = a11; lanes[12] = a12; lanes[13] = a13; lanes[14] = a14;
lanes[15] = a15; lanes[16] = a16; lanes[17] = a17; lanes[18] = a18; lanes[19] = a19;
lanes[20] = a20; lanes[21] = a21; lanes[22] = a22; lanes[23] = a23; lanes[24] = a24;

// convert the resulting 25 lanes back into 200-byte state
lanes2Bytes(lanes, state);
state[0] = a0; state[1] = a1; state[2] = a2; state[3] = a3; state[4] = a4;
state[5] = a5; state[6] = a6; state[7] = a7; state[8] = a8; state[9] = a9;
state[10] = a10; state[11] = a11; state[12] = a12; state[13] = a13; state[14] = a14;
state[15] = a15; state[16] = a16; state[17] = a17; state[18] = a18; state[19] = a19;
state[20] = a20; state[21] = a21; state[22] = a22; state[23] = a23; state[24] = a24;
}

public Object clone() throws CloneNotSupportedException {
SHA3 copy = (SHA3) super.clone();
copy.state = copy.state.clone();
copy.lanes = new long[DM*DM];
return copy;
}

Expand Down