Skip to content

Commit

Permalink
Use a pool for SRE_REPEAT.
Browse files Browse the repository at this point in the history
  • Loading branch information
serhiy-storchaka committed Nov 15, 2024
1 parent 541de35 commit 41e60ff
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 50 deletions.
1 change: 1 addition & 0 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -2689,6 +2689,7 @@ class Interrupt(Exception):
p._fail_after(n, Interrupt)
try:
p.match(string)
print(n)
return n
except Interrupt:
pass
Expand Down
118 changes: 75 additions & 43 deletions Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,46 +267,83 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size)
return 0;
}

LOCAL(SRE_REPEAT *)
add_repeat(SRE_STATE* state, const SRE_CODE* pattern)
{
SRE_REPEAT *rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(SRE_REPEAT));
if (!rep) {
return NULL;
}
rep->count = -1;
rep->last_ptr = NULL;
rep->prevlink = NULL;
if (state->repeats) {
rep->nextlink = state->repeats;
state->repeats->prevlink = rep;
/* memory pool functions for SRE_REPEAT, this can avoid memory
leak when SRE(match) function terminates abruptly.
state->repeat_pool_used is a doubly-linked list, so that we
can remove a SRE_REPEAT node from it.
state->repeat_pool_unused is a singly-linked list, we put/get
node at the head. */
static SRE_REPEAT *
repeat_pool_malloc(SRE_STATE *state)
{
SRE_REPEAT *repeat;

if (state->repeat_pool_unused) {
/* remove from unused pool (singly-linked list) */
repeat = state->repeat_pool_unused;
state->repeat_pool_unused = repeat->pool_next;
}
else {
rep->nextlink = NULL;
repeat = PyMem_Malloc(sizeof(SRE_REPEAT));
if (!repeat) {
return NULL;
}
}

/* add to used pool (doubly-linked list) */
SRE_REPEAT *temp = state->repeat_pool_used;
if (temp) {
temp->pool_prev = repeat;
}
state->repeats = rep;
repeat->pool_prev = NULL;
repeat->pool_next = temp;
state->repeat_pool_used = repeat;

rep->pattern = pattern;
rep->prev = state->repeat;
state->repeat = rep;
return rep;
return repeat;
}

LOCAL(void)
remove_repeat(SRE_STATE* state, SRE_REPEAT *rep)
static void
repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat)
{
state->repeat = rep->prev;
SRE_REPEAT *prev = repeat->pool_prev;
SRE_REPEAT *next = repeat->pool_next;

if (rep->nextlink) {
rep->nextlink->prevlink = rep->prevlink;
/* remove from used pool (doubly-linked list) */
if (prev) {
prev->pool_next = next;
}
if (rep->prevlink) {
rep->prevlink->nextlink = rep->nextlink;
else {
state->repeat_pool_used = next;
}
if (state->repeats == rep) {
state->repeats = rep->nextlink;
if (next) {
next->pool_prev = prev;
}

/* add to unused pool (singly-linked list) */
repeat->pool_next = state->repeat_pool_unused;
state->repeat_pool_unused = repeat;
}

static void
repeat_pool_clear(SRE_STATE *state)
{
/* clear used pool */
SRE_REPEAT *next = state->repeat_pool_used;
state->repeat_pool_used = NULL;
while (next) {
SRE_REPEAT *temp = next;
next = temp->pool_next;
PyMem_Free(temp);
}

/* clear unused pool */
next = state->repeat_pool_unused;
state->repeat_pool_unused = NULL;
while (next) {
SRE_REPEAT *temp = next;
next = temp->pool_next;
PyMem_Free(temp);
}
PyMem_Free(rep);
}

/* generate 8-bit version */
Expand Down Expand Up @@ -439,18 +476,6 @@ _sre_unicode_tolower_impl(PyObject *module, int character)
return sre_lower_unicode(character);
}

LOCAL(void)
state_clean_repeat_data(SRE_STATE* state)
{
SRE_REPEAT *rep = state->repeats;
state->repeats = NULL;
while (rep) {
SRE_REPEAT *prev = rep->prevlink;
PyMem_Free(rep);
rep = prev;
}
}

LOCAL(void)
state_reset(SRE_STATE* state)
{
Expand All @@ -460,7 +485,8 @@ state_reset(SRE_STATE* state)
state->lastmark = -1;
state->lastindex = -1;

state_clean_repeat_data(state);
state->repeat = NULL;

data_stack_dealloc(state);
}

Expand Down Expand Up @@ -555,6 +581,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string,
state->must_advance = 0;
state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0);

/* SRE_REPEAT pool */
state->repeat = NULL;
state->repeat_pool_used = NULL;
state->repeat_pool_unused = NULL;

state->beginning = ptr;

state->start = (void*) ((char*) ptr + start * state->charsize);
Expand Down Expand Up @@ -587,11 +618,12 @@ state_fini(SRE_STATE* state, PatternObject *pattern)
if (state->buffer.buf)
PyBuffer_Release(&state->buffer);
Py_XDECREF(state->string);
state_clean_repeat_data(state);
data_stack_dealloc(state);
/* See above PyMem_Free() for why we explicitly cast here. */
PyMem_Free((void*) state->mark);
state->mark = NULL;
/* SRE_REPEAT pool */
repeat_pool_clear(state);
#ifdef Py_DEBUG
if (pattern) {
pattern->fail_after_count = -1;
Expand Down
11 changes: 6 additions & 5 deletions Modules/_sre/sre.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ typedef struct SRE_REPEAT_T {
const SRE_CODE* pattern; /* points to REPEAT operator arguments */
const void* last_ptr; /* helper to check for infinite loops */
struct SRE_REPEAT_T *prev; /* points to previous repeat context */
/* links for double-linked list of all repeat contexts */
struct SRE_REPEAT_T *nextlink;
struct SRE_REPEAT_T *prevlink;
/* for SRE_REPEAT pool */
struct SRE_REPEAT_T *pool_prev;
struct SRE_REPEAT_T *pool_next;
} SRE_REPEAT;

typedef struct {
Expand Down Expand Up @@ -103,8 +103,9 @@ typedef struct {
size_t data_stack_base;
/* current repeat context */
SRE_REPEAT *repeat;
/* linked list of all repeat contexts */
SRE_REPEAT *repeats;
/* SRE_REPEAT pool */
SRE_REPEAT *repeat_pool_used;
SRE_REPEAT *repeat_pool_unused;
unsigned int sigcount;
#ifdef Py_DEBUG
int fail_after_count;
Expand Down
10 changes: 8 additions & 2 deletions Modules/_sre/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -1136,14 +1136,20 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
pattern[1], pattern[2]));

/* install new repeat context */
ctx->u.rep = add_repeat(state, pattern);
ctx->u.rep = repeat_pool_malloc(state);
if (!ctx->u.rep) {
RETURN_ERROR(SRE_ERROR_MEMORY);
}
ctx->u.rep->count = -1;
ctx->u.rep->pattern = pattern;
ctx->u.rep->prev = state->repeat;
ctx->u.rep->last_ptr = NULL;
state->repeat = ctx->u.rep;

state->ptr = ptr;
DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]);
remove_repeat(state, ctx->u.rep);
state->repeat = ctx->u.rep->prev;
repeat_pool_free(state, ctx->u.rep);

if (ret) {
RETURN_ON_ERROR(ret);
Expand Down

0 comments on commit 41e60ff

Please sign in to comment.