From 41e60ff6208db60766f96c8e4341ecfa82b5da09 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 15 Nov 2024 22:50:46 +0200 Subject: [PATCH] Use a pool for SRE_REPEAT. --- Lib/test/test_re.py | 1 + Modules/_sre/sre.c | 118 ++++++++++++++++++++++++++--------------- Modules/_sre/sre.h | 11 ++-- Modules/_sre/sre_lib.h | 10 +++- 4 files changed, 90 insertions(+), 50 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index af36806bffeaf3..cc0add39a03579 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2689,6 +2689,7 @@ class Interrupt(Exception): p._fail_after(n, Interrupt) try: p.match(string) + print(n) return n except Interrupt: pass diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index d640f63874bc11..2cdff035e17b7a 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -267,46 +267,83 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size) return 0; } -LOCAL(SRE_REPEAT *) -add_repeat(SRE_STATE* state, const SRE_CODE* pattern) -{ - SRE_REPEAT *rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(SRE_REPEAT)); - if (!rep) { - return NULL; - } - rep->count = -1; - rep->last_ptr = NULL; - rep->prevlink = NULL; - if (state->repeats) { - rep->nextlink = state->repeats; - state->repeats->prevlink = rep; +/* memory pool functions for SRE_REPEAT, this can avoid memory + leak when SRE(match) function terminates abruptly. + state->repeat_pool_used is a doubly-linked list, so that we + can remove a SRE_REPEAT node from it. + state->repeat_pool_unused is a singly-linked list, we put/get + node at the head. */ +static SRE_REPEAT * +repeat_pool_malloc(SRE_STATE *state) +{ + SRE_REPEAT *repeat; + + if (state->repeat_pool_unused) { + /* remove from unused pool (singly-linked list) */ + repeat = state->repeat_pool_unused; + state->repeat_pool_unused = repeat->pool_next; } else { - rep->nextlink = NULL; + repeat = PyMem_Malloc(sizeof(SRE_REPEAT)); + if (!repeat) { + return NULL; + } + } + + /* add to used pool (doubly-linked list) */ + SRE_REPEAT *temp = state->repeat_pool_used; + if (temp) { + temp->pool_prev = repeat; } - state->repeats = rep; + repeat->pool_prev = NULL; + repeat->pool_next = temp; + state->repeat_pool_used = repeat; - rep->pattern = pattern; - rep->prev = state->repeat; - state->repeat = rep; - return rep; + return repeat; } -LOCAL(void) -remove_repeat(SRE_STATE* state, SRE_REPEAT *rep) +static void +repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat) { - state->repeat = rep->prev; + SRE_REPEAT *prev = repeat->pool_prev; + SRE_REPEAT *next = repeat->pool_next; - if (rep->nextlink) { - rep->nextlink->prevlink = rep->prevlink; + /* remove from used pool (doubly-linked list) */ + if (prev) { + prev->pool_next = next; } - if (rep->prevlink) { - rep->prevlink->nextlink = rep->nextlink; + else { + state->repeat_pool_used = next; } - if (state->repeats == rep) { - state->repeats = rep->nextlink; + if (next) { + next->pool_prev = prev; + } + + /* add to unused pool (singly-linked list) */ + repeat->pool_next = state->repeat_pool_unused; + state->repeat_pool_unused = repeat; +} + +static void +repeat_pool_clear(SRE_STATE *state) +{ + /* clear used pool */ + SRE_REPEAT *next = state->repeat_pool_used; + state->repeat_pool_used = NULL; + while (next) { + SRE_REPEAT *temp = next; + next = temp->pool_next; + PyMem_Free(temp); + } + + /* clear unused pool */ + next = state->repeat_pool_unused; + state->repeat_pool_unused = NULL; + while (next) { + SRE_REPEAT *temp = next; + next = temp->pool_next; + PyMem_Free(temp); } - PyMem_Free(rep); } /* generate 8-bit version */ @@ -439,18 +476,6 @@ _sre_unicode_tolower_impl(PyObject *module, int character) return sre_lower_unicode(character); } -LOCAL(void) -state_clean_repeat_data(SRE_STATE* state) -{ - SRE_REPEAT *rep = state->repeats; - state->repeats = NULL; - while (rep) { - SRE_REPEAT *prev = rep->prevlink; - PyMem_Free(rep); - rep = prev; - } -} - LOCAL(void) state_reset(SRE_STATE* state) { @@ -460,7 +485,8 @@ state_reset(SRE_STATE* state) state->lastmark = -1; state->lastindex = -1; - state_clean_repeat_data(state); + state->repeat = NULL; + data_stack_dealloc(state); } @@ -555,6 +581,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->must_advance = 0; state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0); + /* SRE_REPEAT pool */ + state->repeat = NULL; + state->repeat_pool_used = NULL; + state->repeat_pool_unused = NULL; + state->beginning = ptr; state->start = (void*) ((char*) ptr + start * state->charsize); @@ -587,11 +618,12 @@ state_fini(SRE_STATE* state, PatternObject *pattern) if (state->buffer.buf) PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); - state_clean_repeat_data(state); data_stack_dealloc(state); /* See above PyMem_Free() for why we explicitly cast here. */ PyMem_Free((void*) state->mark); state->mark = NULL; + /* SRE_REPEAT pool */ + repeat_pool_clear(state); #ifdef Py_DEBUG if (pattern) { pattern->fail_after_count = -1; diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h index 29085f4c029c89..42681c2addf3c2 100644 --- a/Modules/_sre/sre.h +++ b/Modules/_sre/sre.h @@ -73,9 +73,9 @@ typedef struct SRE_REPEAT_T { const SRE_CODE* pattern; /* points to REPEAT operator arguments */ const void* last_ptr; /* helper to check for infinite loops */ struct SRE_REPEAT_T *prev; /* points to previous repeat context */ - /* links for double-linked list of all repeat contexts */ - struct SRE_REPEAT_T *nextlink; - struct SRE_REPEAT_T *prevlink; + /* for SRE_REPEAT pool */ + struct SRE_REPEAT_T *pool_prev; + struct SRE_REPEAT_T *pool_next; } SRE_REPEAT; typedef struct { @@ -103,8 +103,9 @@ typedef struct { size_t data_stack_base; /* current repeat context */ SRE_REPEAT *repeat; - /* linked list of all repeat contexts */ - SRE_REPEAT *repeats; + /* SRE_REPEAT pool */ + SRE_REPEAT *repeat_pool_used; + SRE_REPEAT *repeat_pool_unused; unsigned int sigcount; #ifdef Py_DEBUG int fail_after_count; diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index e471c231e55f21..dbec92dd7ee0a9 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1136,14 +1136,20 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) pattern[1], pattern[2])); /* install new repeat context */ - ctx->u.rep = add_repeat(state, pattern); + ctx->u.rep = repeat_pool_malloc(state); if (!ctx->u.rep) { RETURN_ERROR(SRE_ERROR_MEMORY); } + ctx->u.rep->count = -1; + ctx->u.rep->pattern = pattern; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; state->ptr = ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); - remove_repeat(state, ctx->u.rep); + state->repeat = ctx->u.rep->prev; + repeat_pool_free(state, ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret);