Skip to content

Commit

Permalink
s390x: decode #19
Browse files Browse the repository at this point in the history
  • Loading branch information
emmansun authored Oct 17, 2024
1 parent 11e4128 commit 2ef02a2
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 51 deletions.
2 changes: 1 addition & 1 deletion base64_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD 3-Clause-style
// license that can be found in the LICENSE file.

//go:build (amd64 || ppc64 || ppc64le) && !purego
//go:build (amd64 || ppc64 || ppc64le || s390x) && !purego

package base64

Expand Down
2 changes: 1 addition & 1 deletion base64_asm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD 3-Clause-style
// license that can be found in the LICENSE file.

//go:build (amd64 || ppc64 || ppc64le) && !purego
//go:build (amd64 || ppc64 || ppc64le || s390x) && !purego

package base64

Expand Down
2 changes: 1 addition & 1 deletion base64_generic.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//go:build !(amd64 || arm64 || ppc64 || ppc64le) || purego
//go:build !(amd64 || arm64 || ppc64 || ppc64le || s390x) || purego

package base64

Expand Down
10 changes: 0 additions & 10 deletions base64_s390x.go

This file was deleted.

149 changes: 148 additions & 1 deletion base64_s390x.s
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@ DATA base64_const<>+0x50(SB)/8, $0x0100001001000010 // mullo const
DATA base64_const<>+0x58(SB)/8, $0x0100001001000010
GLOBL base64_const<>(SB), (NOPTR+RODATA), $96

DATA decode_const<>+0x00(SB)/8, $0x1010010204080408 // standard decode lut hi
DATA decode_const<>+0x08(SB)/8, $0x1010101010101010
DATA decode_const<>+0x10(SB)/8, $0x1511111111111111 // standard decode lut lo
DATA decode_const<>+0x18(SB)/8, $0x1111131A1B1B1B1A
DATA decode_const<>+0x20(SB)/8, $0x00101304BFBFB9B9 // standard decode lut roll
DATA decode_const<>+0x28(SB)/8, $0x0000000000000000
DATA decode_const<>+0x30(SB)/8, $0x4001400140014001 // reshuffle const0
DATA decode_const<>+0x38(SB)/8, $0x4001400140014001
DATA decode_const<>+0x40(SB)/8, $0x1000000110000001 // reshuffle const1
DATA decode_const<>+0x48(SB)/8, $0x1000000110000001
DATA decode_const<>+0x50(SB)/8, $0x010203050607090a // reshuffle mask
DATA decode_const<>+0x58(SB)/8, $0x0b0d0e0f00000000
DATA decode_const<>+0x60(SB)/8, $0x1010010204080428 // url decode lut hi
DATA decode_const<>+0x68(SB)/8, $0x1010101010101010
DATA decode_const<>+0x70(SB)/8, $0x1511111111111111 // url decode lut lo
DATA decode_const<>+0x78(SB)/8, $0x1111131B1B1A1B33
DATA decode_const<>+0x80(SB)/8, $0x00001104BFBFE0B9 // url decode lut roll
DATA decode_const<>+0x88(SB)/8, $0xB900000000000000
GLOBL decode_const<>(SB), (NOPTR+RODATA), $144

#define REV_BYTES V0
#define RESHUFFLE_MASK V1
#define MULHI_MASK V2
Expand Down Expand Up @@ -58,7 +78,6 @@ TEXT ·encodeAsm(SB),NOSPLIT,$0
VREPIB $0xff, NEG
VZERO ZERO


MOVD $0, R4
loop:
VL (R2), X0
Expand Down Expand Up @@ -99,6 +118,134 @@ done:
#undef RANGE0_END
#undef LUT
#undef ZERO
#undef NEG
#undef X0
#undef X1
#undef X2

#define LUT_HI V0
#define LUT_LO V1
#define LUT_ROLL V2
#define RESHUFFLE_CONST0 V3
#define RESHUFFLE_CONST1 V4
#define RESHUFFLE_MASK V5
#define ZERO V6
#define NEG V7
#define NIBBLE_MASK V8
#define DECODE_CONST V9
#define X0 V10
#define X1 V11
#define X2 V12
#define X3 V13

//func decodeStdAsm(dst, src []byte) int
TEXT ·decodeStdAsm(SB),NOSPLIT,$0
MOVD dst_base+0(FP), R1
MOVD src_base+24(FP), R2
MOVD src_len+32(FP), R3

MOVD $decode_const<>(SB), R4
VLM (R4), LUT_HI, RESHUFFLE_MASK
VREPIB $0x0f, NIBBLE_MASK
VREPIB $0x2f, DECODE_CONST
VZERO ZERO

loop:
VL (R2), X0
// validate the input
VESRLF $4, X0, X1
VN X1, NIBBLE_MASK, X1 // high nibbles
VN X0, NIBBLE_MASK, X2
VPERM LUT_HI, LUT_HI, X1, X3
VPERM LUT_LO, LUT_LO, X2, X2
VN X2, X3, X2
VCEQGS ZERO, X2, X2
BNE done

// decode the input
VCEQB DECODE_CONST, X0, X2
VAB X2, X1, X1
VPERM LUT_ROLL, LUT_ROLL, X1, X2
VAB X0, X2, X0

VMLEB RESHUFFLE_CONST0, X0, X1
VMLOB RESHUFFLE_CONST0, X0, X2
VAH X1, X2, X0
VMLEH RESHUFFLE_CONST1, X0, X1
VMLOH RESHUFFLE_CONST1, X0, X2
VAF X1, X2, X0

VPERM X0, X0, RESHUFFLE_MASK, X0
VST X0, (R1)

LAY 16(R2), R2
LAY 12(R1), R1
SUB $16, R3
CMPBGE R3, $24, loop

done:
MOVD R3, ret+48(FP)
RET
#undef LUT_HI
#undef LUT_LO
#undef LUT_ROLL
#undef RESHUFFLE_CONST0
#undef RESHUFFLE_CONST1
#undef RESHUFFLE_MASK

#define RESHUFFLE_CONST0 V0
#define RESHUFFLE_CONST1 V1
#define RESHUFFLE_MASK V2
#define LUT_HI V3
#define LUT_LO V4
#define LUT_ROLL V5

//func decodeUrlAsm(dst, src []byte) int
TEXT ·decodeUrlAsm(SB),NOSPLIT,$0
MOVD dst_base+0(FP), R1
MOVD src_base+24(FP), R2
MOVD src_len+32(FP), R3

MOVD $decode_const<>(SB), R4
VLM (R4), LUT_HI, RESHUFFLE_MASK
VREPIB $0xff, NEG
VREPIB $0x0f, NIBBLE_MASK
VREPIB $0x5e, DECODE_CONST
VZERO ZERO

loop:
VL (R2), X0
// validate the input
VESRLF $4, X0, X1
VN X1, NIBBLE_MASK, X1 // high nibbles
VN X0, NIBBLE_MASK, X2
VPERM LUT_HI, LUT_HI, X1, X3
VPERM LUT_LO, LUT_LO, X2, X2
VN X2, X3, X2
VCEQGS ZERO, X2, X2
BNE done

// decode the input
VCGTB(X0, DECODE_CONST, X2)
VSB X2, X1, X1
VPERM LUT_ROLL, LUT_ROLL, X1, X2
VAB X0, X2, X0

VMLEB RESHUFFLE_CONST0, X0, X1
VMLOB RESHUFFLE_CONST0, X0, X2
VAH X1, X2, X0
VMLEH RESHUFFLE_CONST1, X0, X1
VMLOH RESHUFFLE_CONST1, X0, X2
VAF X1, X2, X0

VPERM X0, X0, RESHUFFLE_MASK, X0
VST X0, (R1)

LAY 16(R2), R2
LAY 12(R1), R1
SUB $16, R3
CMPBGE R3, $24, loop

done:
MOVD R3, ret+48(FP)
RET
37 changes: 0 additions & 37 deletions base64_s390x_test.go

This file was deleted.

0 comments on commit 2ef02a2

Please sign in to comment.