diff --git a/README-CN.md b/README-CN.md index 2939d0e..e8bd6c0 100644 --- a/README-CN.md +++ b/README-CN.md @@ -4,6 +4,7 @@ 使用SIMD指令加速的Base64实现。 [![ci](https://github.com/emmansun/base64/actions/workflows/ci.yml/badge.svg)](https://github.com/emmansun/base64/actions/workflows/ci.yml) +[![arm64-qemu](https://github.com/emmansun/base64/actions/workflows/ci_qemu.yml/badge.svg)](https://github.com/emmansun/base64/actions/workflows/ci_qemu.yml) ![GitHub go.mod Go version (branch)](https://img.shields.io/github/go-mod/go-version/emmansun/base64) ## 致谢 diff --git a/README.md b/README.md index b4bf1e9..a7a47dc 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ English | [简体中文](README-CN.md) Base64 with SIMD acceleration [![ci](https://github.com/emmansun/base64/actions/workflows/ci.yml/badge.svg)](https://github.com/emmansun/base64/actions/workflows/ci.yml) +[![arm64-qemu](https://github.com/emmansun/base64/actions/workflows/ci_qemu.yml/badge.svg)](https://github.com/emmansun/base64/actions/workflows/ci_qemu.yml) ![GitHub go.mod Go version (branch)](https://img.shields.io/github/go-mod/go-version/emmansun/base64) ## Acknowledgements diff --git a/base64_arm64.go b/base64_arm64.go new file mode 100644 index 0000000..cd9b8f3 --- /dev/null +++ b/base64_arm64.go @@ -0,0 +1,7 @@ +//go:build arm64 && !purego +// +build arm64,!purego + +package base64 + +//go:noescape +func encodeAsm(dst, src []byte, lut *[64]byte) int diff --git a/base64_arm64.s b/base64_arm64.s new file mode 100644 index 0000000..755c34a --- /dev/null +++ b/base64_arm64.s @@ -0,0 +1,53 @@ +// Reference +// https://github.com/aklomp/base64/blob/master/lib/arch/neon64/enc_loop.c +//go:build arm64 && !purego +// +build arm64,!purego + +//func encodeAsm(dst, src []byte, lut *[64]byte) int +TEXT ·encodeAsm(SB),NOSPLIT,$0 + MOVD dst_base+0(FP), R0 + MOVD src_base+24(FP), R1 + MOVD src_len+32(FP), R2 + MOVD lut+48(FP), R3 + + VLD1 (R3), [V8.B16, V9.B16, V10.B16, V11.B16] + MOVB $0x3F, R4 + VDUP R3, V7.B16 + EOR R5, R5, R5 + +loop: + CMP $48, R2 + BLT done + + // Move the input bits to where they need to be in the outputs. Except + // for the first output, the high two bits are not cleared. + VLD3.P 48(R1), [V0.B16, V1.B16, V2.B16] + VUSHR $2, V0.B16, V3.B16 + VUSHR $4, V1.B16, V4.B16 + VUSHR $6, V2.B16, V5.B16 + VSLI $4, V0.B16, V4.B16 + VSLI $2, V1.B16, V5.B16 + + // Clear the high two bits in the second, third and fourth output. + VAND V7.B16, V4.B16, V4.B16 + VAND V7.B16, V5.B16, V5.B16 + VAND V7.B16, V2.B16, V6.B16 + + // The bits have now been shifted to the right locations; + // translate their values 0..63 to the Base64 alphabet. + // Use a 64-byte table lookup: + VTBL V3.B16, [V8.B16, V9.B16, V10.B16, V11.B16], V3.B16 + VTBL V4.B16, [V8.B16, V9.B16, V10.B16, V11.B16], V4.B16 + VTBL V5.B16, [V8.B16, V9.B16, V10.B16, V11.B16], V5.B16 + VTBL V6.B16, [V8.B16, V9.B16, V10.B16, V11.B16], V6.B16 + + // Interleave and store output: + VST4.P [V3.B16, V4.B16, V5.B16, V6.B16], 64(R0) + + SUB $48, R2 + ADD $64, R5 + B loop + +done: + MOVD R5, ret+56(FP) + RET diff --git a/base64_arm64_test.go b/base64_arm64_test.go new file mode 100644 index 0000000..795c54a --- /dev/null +++ b/base64_arm64_test.go @@ -0,0 +1,25 @@ +//go:build arm64 && !purego +// +build arm64,!purego + +package base64 + +func TestStdEncodeSIMD(t *testing.T) { + pairs := []testpair{ + {"abcdefghijklabcdefghijklabcdefghijklabcdefghijkl", "YWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamts"}, + {"abcdefghijklabcdefghijklabcdefghijklabcdefghijklabcdefghijklabcdefghijklabcdefghijklabcdefghijkl", "YWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamtsYWJjZGVmZ2hpamts"}, + } + for _, p := range pairs { + src := []byte(p.decoded) + expected := []byte(p.encoded) + dst := make([]byte, len(expected)) + + ret := encodeAsm(dst, src, &encodeStd) + if ret != len(expected) { + t.Fatalf("should return %v", len(expected)) + } + if !bytes.Equal(dst, expected) { + t.Fatalf("got %v", string(dst)) + } + + } +}