diff --git a/base64_arm64.go b/base64_arm64.go index bd269d3..febf0e9 100644 --- a/base64_arm64.go +++ b/base64_arm64.go @@ -3,9 +3,23 @@ package base64 +var dencodeStdLut1 = [128]byte{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 255, 255, 255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 255, 255, 255, + 0, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, + 255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, +} + //go:noescape func encodeAsm(dst, src []byte, lut *[64]byte) int +//go:noescape +func decodeAsm(dst, src []byte, lut *[128]byte) int + func encode(enc *Encoding, dst, src []byte) { if len(src) >= 48 { encoded := encodeAsm(dst, src, &enc.encode) diff --git a/base64_arm64.s b/base64_arm64.s index f6a4e7f..6000e57 100644 --- a/base64_arm64.s +++ b/base64_arm64.s @@ -53,3 +53,32 @@ loop: done: MOVD R5, ret+56(FP) RET + +//func decodeAsm(dst, src []byte, lut *[128]byte) int +TEXT ·decodeAsm(SB),NOSPLIT,$0 + MOVD dst_base+0(FP), R0 + MOVD src_base+24(FP), R1 + MOVD src_len+32(FP), R2 + MOVD lut+48(FP), R3 + + VLD1.P 64(R3), [V8.B16, V9.B16, V10.B16, V11.B16] + VLD1 (R3), [V12.B16, V13.B16, V14.B16, V15.B16] + MOVD $0x63, R4 + VDUP R4, V7.B16 + +loop: + CMP $64, R2 + BLT done + + VLD4.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] + VUQSUB V7.B16, V0.B16, V16.B16 + VUQSUB V7.B16, V1.B16, V17.B16 + VUQSUB V7.B16, V2.B16, V18.B16 + VUQSUB V7.B16, V3.B16, V19.B16 + + SUB $64, R2 + B loop + +done: + MOVD R2, ret+56(FP) + RET