Skip to content

Commit

Permalink
JIT compiler update
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoltan Herczeg committed Dec 6, 2023
1 parent c3529d0 commit 4e8fdb3
Show file tree
Hide file tree
Showing 16 changed files with 1,293 additions and 455 deletions.
31 changes: 3 additions & 28 deletions src/pcre2_jit_simd_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,11 +483,7 @@ sljit_s32 cmp2a_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR3);
sljit_s32 cmp1b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR4);
sljit_s32 cmp2b_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR5);
sljit_s32 tmp1_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_FR6);
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
sljit_s32 tmp2_ind = 0;
#else /* !SLJIT_CONFIG_X86_32 */
sljit_s32 tmp2_ind = 4;
#endif /* SLJIT_CONFIG_X86_32 */
sljit_s32 tmp2_ind = sljit_get_register_index(SLJIT_FLOAT_REGISTER, SLJIT_TMP_FR0);
struct sljit_label *start;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
struct sljit_label *restart;
Expand Down Expand Up @@ -660,19 +656,7 @@ for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare1_type, reg_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind);
}

/* PAND xmm1, xmm2/m128 */
if (reg_type == SLJIT_SIMD_REG_256)
{
instruction[0] = 0xc5;
instruction[1] = (sljit_u8)(0xfd ^ (data1_ind << 3));
}

/* instruction[0] = 0x66 / 0xc5; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xdb;
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
sljit_emit_op_custom(compiler, instruction, 4);

sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);

/* Ignore matches before the first STR_PTR. */
Expand Down Expand Up @@ -700,16 +684,7 @@ for (i = 0; i < 4; i++)
fast_forward_char_pair_sse2_compare(compiler, compare2_type, reg_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind);
}

/* PAND xmm1, xmm2/m128 */
if (reg_type == SLJIT_SIMD_REG_256)
instruction[1] = (sljit_u8)(0xfd ^ (data1_ind << 3));

/* instruction[0] = 0x66 / 0xc5; */
/* instruction[1] = 0x0f; */
instruction[2] = 0xdb;
instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind;
sljit_emit_op_custom(compiler, instruction, 4);

sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | reg_type, SLJIT_FR0, SLJIT_FR0, SLJIT_FR1);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | reg_type | SLJIT_SIMD_ELEM_8, SLJIT_FR0, TMP1, 0);

CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start);
Expand Down
112 changes: 109 additions & 3 deletions src/sljit/sljitConfigInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ extern "C" {
SLJIT_NUMBER_OF_FLOAT_REGISTERS : number of available floating point registers
SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS : number of available floating point scratch registers
SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS : number of available floating point saved registers
SLJIT_NUMBER_OF_TEMPORARY_REGISTERS : number of available temporary registers
SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS : number of available temporary floating point registers
SLJIT_WORD_SHIFT : the shift required to apply when accessing a sljit_sw/sljit_uw array by index
SLJIT_F32_SHIFT : the shift required to apply when accessing
a single precision floating point array by index
Expand All @@ -81,8 +83,21 @@ extern "C" {
the scratch register index of ecx is stored in this variable
SLJIT_LOCALS_OFFSET : local space starting offset (SLJIT_SP + SLJIT_LOCALS_OFFSET)
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
SLJIT_CONV_MAX_FLOAT : result when a floating point value is converted to integer
and the floating point value is higher than the maximum integer value
(possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
SLJIT_CONV_MIN_FLOAT : result when a floating point value is converted to integer
and the floating point value is lower than the minimum integer value
(possible values: SLJIT_CONV_RESULT_MAX_INT or SLJIT_CONV_RESULT_MIN_INT)
SLJIT_CONV_NAN_FLOAT : result when a NaN floating point value is converted to integer
(possible values: SLJIT_CONV_RESULT_MAX_INT, SLJIT_CONV_RESULT_MIN_INT,
or SLJIT_CONV_RESULT_ZERO)
Other macros:
SLJIT_TMP_R0 .. R9 : accessing temporary registers
SLJIT_TMP_R(i) : accessing temporary registers
SLJIT_TMP_FR0 .. FR9 : accessing temporary floating point registers
SLJIT_TMP_FR(i) : accessing temporary floating point registers
SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (platform independent helper)
SLJIT_F64_SECOND(reg) : provides the register index of the second 32 bit part of a 64 bit
Expand Down Expand Up @@ -356,6 +371,38 @@ typedef double sljit_f64;
#define SLJIT_F32_SHIFT 2
#define SLJIT_F64_SHIFT 3

#define SLJIT_CONV_RESULT_MAX_INT 0
#define SLJIT_CONV_RESULT_MIN_INT 1
#define SLJIT_CONV_RESULT_ZERO 2

#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#elif (defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_ZERO
#elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MAX_INT
#elif (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)
#define SLJIT_CONV_MAX_FLOAT SLJIT_CONV_RESULT_MAX_INT
#define SLJIT_CONV_MIN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#define SLJIT_CONV_NAN_FLOAT SLJIT_CONV_RESULT_MIN_INT
#else
#error "Result for float to integer conversion is not defined"
#endif

#ifndef SLJIT_W

/* Defining long constants. */
Expand Down Expand Up @@ -528,8 +575,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);

#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 1
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 7
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#define SLJIT_LOCALS_OFFSET_BASE (8 * SSIZE_OF(sw))
#define SLJIT_PREF_SHIFT_REG SLJIT_R2
#define SLJIT_MASKED_SHIFT 1
Expand All @@ -538,7 +587,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)

#define SLJIT_NUMBER_OF_REGISTERS 13
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#ifndef _WIN64
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 6
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
Expand All @@ -556,16 +607,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);

#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 2
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 14
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE 0

#elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)

#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE (2 * (sljit_s32)sizeof(sljit_sw))
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1
Expand All @@ -574,8 +629,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);

#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 17
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 18
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) || (defined _AIX)
#define SLJIT_LOCALS_OFFSET_BASE ((6 + 8) * (sljit_s32)sizeof(sljit_sw))
#elif (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
Expand All @@ -598,16 +655,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 29
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#endif
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 3
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1

#elif (defined SLJIT_CONFIG_RISCV && SLJIT_CONFIG_RISCV)

#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 12
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1

Expand Down Expand Up @@ -636,18 +697,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);

#define SLJIT_NUMBER_OF_REGISTERS 12
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 3
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 15
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 8
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 1
#define SLJIT_LOCALS_OFFSET_BASE SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE
#define SLJIT_MASKED_SHIFT 1

#elif (defined SLJIT_CONFIG_LOONGARCH && SLJIT_CONFIG_LOONGARCH)

#define SLJIT_NUMBER_OF_REGISTERS 23
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 5
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 30
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 12
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 2
#define SLJIT_LOCALS_OFFSET_BASE 0
#define SLJIT_MASKED_SHIFT 1
#define SLJIT_MASKED_SHIFT32 1

Expand All @@ -656,8 +721,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
/* Just to have something. */
#define SLJIT_NUMBER_OF_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 0
#define SLJIT_NUMBER_OF_TEMPORARY_REGISTERS 0
#define SLJIT_NUMBER_OF_FLOAT_REGISTERS 0
#define SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS 0
#define SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS 0
#define SLJIT_LOCALS_OFFSET_BASE 0

#endif
Expand All @@ -670,6 +737,45 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS \
(SLJIT_NUMBER_OF_FLOAT_REGISTERS - SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS)

/**********************************/
/* Temporary register management. */
/**********************************/

#define SLJIT_TMP_REGISTER_BASE (SLJIT_NUMBER_OF_REGISTERS + 2)
#define SLJIT_TMP_FREGISTER_BASE (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)

/* WARNING: Accessing temporary registers is not recommended, because they
are also used by the JIT compiler for various computations. Using them
might have any side effects including incorrect operations and crashes,
so use them at your own risk. The machine registers themselves might have
limitations, e.g. the r0 register on s390x / ppc cannot be used as
base address for memory operations. */

/* Temporary registers */
#define SLJIT_TMP_R0 (SLJIT_TMP_REGISTER_BASE + 0)
#define SLJIT_TMP_R1 (SLJIT_TMP_REGISTER_BASE + 1)
#define SLJIT_TMP_R2 (SLJIT_TMP_REGISTER_BASE + 2)
#define SLJIT_TMP_R3 (SLJIT_TMP_REGISTER_BASE + 3)
#define SLJIT_TMP_R4 (SLJIT_TMP_REGISTER_BASE + 4)
#define SLJIT_TMP_R5 (SLJIT_TMP_REGISTER_BASE + 5)
#define SLJIT_TMP_R6 (SLJIT_TMP_REGISTER_BASE + 6)
#define SLJIT_TMP_R7 (SLJIT_TMP_REGISTER_BASE + 7)
#define SLJIT_TMP_R8 (SLJIT_TMP_REGISTER_BASE + 8)
#define SLJIT_TMP_R9 (SLJIT_TMP_REGISTER_BASE + 9)
#define SLJIT_TMP_R(i) (SLJIT_TMP_REGISTER_BASE + (i))

#define SLJIT_TMP_FR0 (SLJIT_TMP_FREGISTER_BASE + 0)
#define SLJIT_TMP_FR1 (SLJIT_TMP_FREGISTER_BASE + 1)
#define SLJIT_TMP_FR2 (SLJIT_TMP_FREGISTER_BASE + 2)
#define SLJIT_TMP_FR3 (SLJIT_TMP_FREGISTER_BASE + 3)
#define SLJIT_TMP_FR4 (SLJIT_TMP_FREGISTER_BASE + 4)
#define SLJIT_TMP_FR5 (SLJIT_TMP_FREGISTER_BASE + 5)
#define SLJIT_TMP_FR6 (SLJIT_TMP_FREGISTER_BASE + 6)
#define SLJIT_TMP_FR7 (SLJIT_TMP_FREGISTER_BASE + 7)
#define SLJIT_TMP_FR8 (SLJIT_TMP_FREGISTER_BASE + 8)
#define SLJIT_TMP_FR9 (SLJIT_TMP_FREGISTER_BASE + 9)
#define SLJIT_TMP_FR(i) (SLJIT_TMP_FREGISTER_BASE + (i))

/********************************/
/* CPU status flags management. */
/********************************/
Expand All @@ -690,7 +796,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32) \
|| (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
#define SLJIT_F64_SECOND(reg) \
((reg) + SLJIT_FS0)
((reg) + SLJIT_FS0 + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)
#else /* !SLJIT_CONFIG_ARM_32 && !SLJIT_CONFIG_MIPS_32 */
#define SLJIT_F64_SECOND(reg) \
(reg)
Expand Down
Loading

0 comments on commit 4e8fdb3

Please sign in to comment.