From c35ac050b122c854fb55edbc0b0ffc9aa6c5f01c Mon Sep 17 00:00:00 2001 From: Isabell Huang Date: Thu, 11 Jul 2024 18:15:30 +0200 Subject: [PATCH 1/3] erts: Fix out-of-bounds read in print_atom_name --- erts/emulator/beam/erl_printf_term.c | 192 ++++++++++++--------------- 1 file changed, 88 insertions(+), 104 deletions(-) diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index fecbbc593b98..9b7eeac22a6f 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -184,26 +184,42 @@ static int is_printable_ascii(byte* bytep, Uint bytesize, Uint bitoffs) } /* - * Helper function for print_atom_name(). Not generally useful. + * Helper function for print_atom_name() that decodes Utf8. After decoding a + * valid character, the offset is updated to point to the next character. size + * is only used for debugging. */ -static ERTS_INLINE int latin1_char(int c1, int c2) +static ERTS_INLINE int utf8_decode(const byte *text, int *offset, int size) { - if ((c1 & 0x80) == 0) { - /* Plain old 7-bit ASCII. */ - return c1; - } else if ((c1 & 0xE0) == 0xC0) { - /* Unicode code points from 0x80 through 0x7FF. */ - ASSERT((c2 & 0xC0) == 0x80); - return (c1 & 0x1F) << 6 | (c2 & 0x3F); - } else if ((c1 & 0xC0) == 0x80) { - /* A continutation byte in a utf8 sequence. Pretend that it is - * a character that is allowed in an atom. */ - return 'a'; + int component = text[*offset]; + int codepoint = 0; + int length = 0; + + if ((component & 0x80) == 0) { + codepoint = component; + length = 1; + } else if ((component & 0xE0) == 0xC0) { + codepoint = component & 0x1F; + length = 2; + } else if ((component & 0xF0) == 0xE0) { + codepoint = component & 0x0F; + length = 3; } else { - /* The start of a utf8 sequence comprising three or four - * bytes. Always needs quoting. */ - return 0; + ASSERT((component & 0xF8) == 0xF0); + codepoint = component & 0x07; + length = 4; + } + + /* Assert that there are enough bytes for decoding */ + ASSERT(*offset + length <= size); + + for (int i = 1; i < length; i++) { + component = text[*offset + i]; + ASSERT((component & 0xC0) == 0x80); + codepoint = (codepoint << 6) | (component & 0x3F); } + + *offset += length; + return codepoint; } /* @@ -215,34 +231,38 @@ static ERTS_INLINE int latin1_char(int c1, int c2) */ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) { - int n, i; - int res; + int length, index; + const Atom *entry; + int result; int need_quote; - int pos; - byte *s; - byte *cpos; - int c; - int lc; + int position; + const byte *s; + int codepoint; - res = 0; - i = atom_val(atom); + result = 0; + index = atom_val(atom); + entry = NULL; - if ((i < 0) || (i >= atom_table_size()) || (atom_tab(i) == NULL)) { - PRINT_STRING(res, fn, arg, "'); - return res; + if ((index > 0) || (index <= atom_table_size()) ) { + entry = atom_tab(index); } - s = atom_tab(i)->name; - n = atom_tab(i)->len; + if (entry == NULL) { + PRINT_STRING(result, fn, arg, "'); + return result; + } - *dcount -= atom_tab(i)->len; + s = entry->name; + length = entry->len; - if (n == 0) { + *dcount -= entry->len; + + if (length == 0) { /* The empty atom: '' */ - PRINT_STRING(res, fn, arg, "''"); - return res; + PRINT_STRING(result, fn, arg, "''"); + return result; } /* @@ -256,22 +276,12 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) * the Latin-1 code block or the character '_'. */ - need_quote = 0; - cpos = s; - pos = n - 1; - c = *cpos++; - lc = latin1_char(c, *cpos); - if (!IS_LOWER(lc)) - need_quote++; - else { - while (pos--) { - c = *cpos++; - lc = latin1_char(c, *cpos); - if (!IS_ALNUM(lc) && lc != '_') { - need_quote++; - break; - } - } + position = 0; + codepoint = utf8_decode(s, &position, length); + need_quote = !IS_LOWER(codepoint); + while (position < length && !need_quote) { + codepoint = utf8_decode(s, &position, length); + need_quote = !IS_ALNUM(codepoint) && codepoint != '_'; } /* @@ -281,62 +291,36 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) * be specially printed. Therefore, we must do a partial * decoding of the utf8 encoding. */ - cpos = s; - pos = n; + position = 0; if (need_quote) - PRINT_CHAR(res, fn, arg, '\''); - while(pos--) { - c = *cpos++; - switch(c) { - case '\'': PRINT_STRING(res, fn, arg, "\\'"); break; - case '\\': PRINT_STRING(res, fn, arg, "\\\\"); break; - case '\n': PRINT_STRING(res, fn, arg, "\\n"); break; - case '\f': PRINT_STRING(res, fn, arg, "\\f"); break; - case '\t': PRINT_STRING(res, fn, arg, "\\t"); break; - case '\r': PRINT_STRING(res, fn, arg, "\\r"); break; - case '\b': PRINT_STRING(res, fn, arg, "\\b"); break; - case '\v': PRINT_STRING(res, fn, arg, "\\v"); break; - default: - if (c < ' ') { - /* ASCII control character (0-31). */ - PRINT_CHAR(res, fn, arg, '\\'); - PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) c); - } else if (c >= 0x80) { - /* A multi-byte utf8-encoded code point. Determine the - * length of the sequence. */ - int n; - if ((c & 0xE0) == 0xC0) { - n = 2; - } else if ((c & 0xF0) == 0xE0) { - n = 3; - } else { - ASSERT((c & 0xF8) == 0xF0); - n = 4; - } - ASSERT(pos - n + 1 >= 0); - - if (c == 0xC2 && *cpos < 0xA0) { - /* Extended ASCII control character (128-159). */ - ASSERT(pos > 0); - ASSERT(0x80 <= *cpos); - PRINT_CHAR(res, fn, arg, '\\'); - PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) *cpos); - pos--, cpos++; - } else { - PRINT_BUF(res, fn, arg, cpos-1, n); - cpos += n - 1; - pos -= n - 1; - } + PRINT_CHAR(result, fn, arg, '\''); + + while(position < length) { + int cp_start = position; + codepoint = utf8_decode(s, &position, length); + switch(codepoint) { + case '\'': PRINT_STRING(result, fn, arg, "\\'"); break; + case '\\': PRINT_STRING(result, fn, arg, "\\\\"); break; + case '\n': PRINT_STRING(result, fn, arg, "\\n"); break; + case '\f': PRINT_STRING(result, fn, arg, "\\f"); break; + case '\t': PRINT_STRING(result, fn, arg, "\\t"); break; + case '\r': PRINT_STRING(result, fn, arg, "\\r"); break; + case '\b': PRINT_STRING(result, fn, arg, "\\b"); break; + case '\v': PRINT_STRING(result, fn, arg, "\\v"); break; + default: + if (codepoint < 32 || (codepoint >= 128 && codepoint <= 159)) { + /* ASCII control character (0-31) or extended ASCII control character (128-159)*/ + PRINT_CHAR(result, fn, arg, '\\'); + PRINT_UWORD(result, fn, arg, 'o', 1, 3, (ErlPfUWord) codepoint); } else { - /* Printable ASCII character. */ - PRINT_CHAR(res, fn, arg, (char) c); + PRINT_BUF(result, fn, arg, &s[cp_start], position - cp_start); } - break; - } + break; + } } if (need_quote) - PRINT_CHAR(res, fn, arg, '\''); - return res; + PRINT_CHAR(result, fn, arg, '\''); + return result; } #define PRT_BAR ((Eterm) 0) @@ -657,7 +641,7 @@ print_term(fmtfn_t fn, void* arg, Eterm obj, long *dcount) { Atom *ap = atom_tab(atom_val(fe->module)); PRINT_STRING(res, fn, arg, "#Fun<"); - PRINT_BUF(res, fn, arg, ap->name, ap->len); + PRINT_BUF(res, fn, arg, erts_atom_get_name(ap), ap->len); PRINT_CHAR(res, fn, arg, '.'); PRINT_SWORD(res, fn, arg, 'd', 0, 1, (ErlPfSWord) fe->old_index); From e927b0b01d8c1fc3aef7d03a16a5688c7868bac9 Mon Sep 17 00:00:00 2001 From: Isabell Huang Date: Thu, 11 Jul 2024 18:07:49 +0200 Subject: [PATCH 2/3] erts: Introduce global literals `erl_global_literals` has been redesigned to be more dynamic, allowing any global literal to be created, rather than a small set of literals. Lambdas for exports will now use this interface. The next commit will change atoms to also do that. --- erts/emulator/beam/beam_file.c | 8 +- erts/emulator/beam/dist.c | 8 +- erts/emulator/beam/emu/generators.tab | 2 +- erts/emulator/beam/erl_alloc_util.c | 4 +- erts/emulator/beam/erl_bif_info.c | 16 ++- erts/emulator/beam/erl_global_literals.c | 160 ++++++++++++++++++---- erts/emulator/beam/erl_global_literals.h | 43 ++++-- erts/emulator/beam/erl_init.c | 13 +- erts/emulator/beam/erl_lock_check.c | 2 + erts/emulator/beam/erl_process_dump.c | 10 +- erts/emulator/beam/erl_term.h | 6 +- erts/emulator/beam/export.c | 85 +----------- erts/emulator/beam/jit/arm/generators.tab | 2 +- erts/emulator/beam/jit/x86/generators.tab | 2 +- erts/emulator/sys/common/erl_mmap.c | 24 +++- erts/emulator/sys/common/erl_mmap.h | 6 +- 16 files changed, 224 insertions(+), 167 deletions(-) diff --git a/erts/emulator/beam/beam_file.c b/erts/emulator/beam/beam_file.c index 58e7e722fb2a..8c8398f3d368 100644 --- a/erts/emulator/beam/beam_file.c +++ b/erts/emulator/beam/beam_file.c @@ -522,7 +522,7 @@ static int parse_line_chunk(BeamFile *beam, IFF_Chunk *chunk) { Eterm name, suffix; Eterm *hp; - suffix = erts_get_global_literal(ERTS_LIT_ERL_FILE_SUFFIX); + suffix = ERTS_GLOBAL_LIT_ERL_FILE_SUFFIX; hp = name_heap; name = erts_atom_to_string(&hp, beam->module, suffix); @@ -1312,11 +1312,13 @@ int iff_read_chunk(IFF_File *iff, Uint id, IFF_Chunk *chunk) void beamfile_init(void) { Eterm suffix; Eterm *hp; + struct erl_off_heap_header **ohp; - hp = erts_alloc_global_literal(ERTS_LIT_ERL_FILE_SUFFIX, 8); + hp = erts_global_literal_allocate(8, &ohp); suffix = erts_bin_bytes_to_list(NIL, hp, (byte*)".erl", 4, 0); - erts_register_global_literal(ERTS_LIT_ERL_FILE_SUFFIX, suffix); + erts_global_literal_register(&suffix, hp, 8); + ERTS_GLOBAL_LIT_ERL_FILE_SUFFIX = suffix; } /* * * * * * * */ diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index 59281208a60e..cc0535de5db1 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -1146,6 +1146,7 @@ void init_dist(void) Eterm *hp_start, *hp, **hpp = NULL, tuple; Uint sz = 0, *szp = &sz; while (1) { + struct erl_off_heap_header **ohp; /* * Sync with dist_util.erl: * @@ -1162,10 +1163,11 @@ void init_dist(void) if (hpp) { ASSERT(is_value(tuple)); ASSERT(hp == hp_start + sz); - erts_register_global_literal(ERTS_LIT_DFLAGS_RECORD, tuple); + erts_global_literal_register(&tuple, hp, sz); + ERTS_GLOBAL_LIT_DFLAGS_RECORD = tuple; break; } - hp = hp_start = erts_alloc_global_literal(ERTS_LIT_DFLAGS_RECORD, sz); + hp = hp_start = erts_global_literal_allocate(sz, &ohp); hpp = &hp; szp = NULL; } @@ -5423,7 +5425,7 @@ BIF_RETTYPE erts_internal_get_dflags_0(BIF_ALIST_0) szp = NULL; } } - return erts_get_global_literal(ERTS_LIT_DFLAGS_RECORD); + return ERTS_GLOBAL_LIT_DFLAGS_RECORD; } BIF_RETTYPE erts_internal_get_creation_0(BIF_ALIST_0) diff --git a/erts/emulator/beam/emu/generators.tab b/erts/emulator/beam/emu/generators.tab index 1fad49e6a1b2..038a1f17a55c 100644 --- a/erts/emulator/beam/emu/generators.tab +++ b/erts/emulator/beam/emu/generators.tab @@ -563,7 +563,7 @@ gen.new_small_map_lit(Dst, Live, Size, Rest) { tmp = thp = erts_alloc(ERTS_ALC_T_LOADER_TMP, ((size == 0 ? 0 : 1) + size/2) * sizeof(*tmp)); if (size == 0) { - keys = erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); + keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE; } else { keys = make_tuple(thp); *thp++ = make_arityval(size/2); diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index 9a67e06a5e29..6c7c7d50beb0 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -7744,7 +7744,7 @@ static int gather_ahist_append_result(hist_tree_t *node, void *arg, Sint reds) hp = erts_produce_heap(&state->msg_factory, heap_size, 0); if (state->hist_slot_count == 0) { - histogram_tuple = erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); + histogram_tuple = ERTS_GLOBAL_LIT_EMPTY_TUPLE; } else { hp[0] = make_arityval(state->hist_slot_count); @@ -8088,7 +8088,7 @@ static void gather_cinfo_append_result(gather_cinfo_t *state, } } if (state->hist_slot_count == 0) { - histogram_tuple = erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); + histogram_tuple = ERTS_GLOBAL_LIT_EMPTY_TUPLE; } else { hp[0] = make_arityval(state->hist_slot_count); for (ix = 0; ix < state->hist_slot_count; ix++) { diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 6395bbefbdd2..c0536012f94e 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -2985,7 +2985,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) NIL)); } else if (BIF_ARG_1 == am_os_type) { - BIF_RET(erts_get_global_literal(ERTS_LIT_OS_TYPE)); + BIF_RET(ERTS_GLOBAL_LIT_OS_TYPE); } else if (BIF_ARG_1 == am_allocator) { BIF_RET(erts_allocator_options((void *) BIF_P)); @@ -3001,7 +3001,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) BIF_RET(erts_alloc_util_allocators((void *) BIF_P)); } else if (BIF_ARG_1 == am_os_version) { - BIF_RET(erts_get_global_literal(ERTS_LIT_OS_VERSION)); + BIF_RET(ERTS_GLOBAL_LIT_OS_VERSION); } else if (BIF_ARG_1 == am_version) { int n = sys_strlen(ERLANG_VERSION); @@ -6265,21 +6265,25 @@ static void os_info_init(void) char* buf = erts_alloc(ERTS_ALC_T_TMP, 1024); /* More than enough */ Eterm* hp; Eterm tuple; + struct erl_off_heap_header **ohp; os_flavor(buf, 1024); flav = erts_atom_put((byte *) buf, sys_strlen(buf), ERTS_ATOM_ENC_LATIN1, 1); erts_free(ERTS_ALC_T_TMP, (void *) buf); - hp = erts_alloc_global_literal(ERTS_LIT_OS_TYPE, 3); + + hp = erts_global_literal_allocate(3, &ohp); tuple = TUPLE2(hp, type, flav); - erts_register_global_literal(ERTS_LIT_OS_TYPE, tuple); + erts_global_literal_register(&tuple, hp, 3); + ERTS_GLOBAL_LIT_OS_TYPE = tuple; - hp = erts_alloc_global_literal(ERTS_LIT_OS_VERSION, 4); + hp = erts_global_literal_allocate(4, &ohp); os_version(&major, &minor, &build); tuple = TUPLE3(hp, make_small(major), make_small(minor), make_small(build)); - erts_register_global_literal(ERTS_LIT_OS_VERSION, tuple); + erts_global_literal_register(&tuple, hp, 4); + ERTS_GLOBAL_LIT_OS_VERSION = tuple; } void diff --git a/erts/emulator/beam/erl_global_literals.c b/erts/emulator/beam/erl_global_literals.c index a05c1031426b..581f4db3ef7f 100644 --- a/erts/emulator/beam/erl_global_literals.c +++ b/erts/emulator/beam/erl_global_literals.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2020-2021. All Rights Reserved. + * Copyright Ericsson AB 2020-2024. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,51 +25,153 @@ #include "sys.h" #include "global.h" #include "erl_global_literals.h" +#include "erl_mmap.h" -struct literal { - Eterm term; - ErtsLiteralArea* area; -}; -static struct literal literals[ERTS_NUM_GLOBAL_LITERALS]; +#define GLOBAL_LITERAL_INITIAL_SIZE (1<<16) +#define GLOBAL_LITERAL_EXPAND_SIZE 512 + /* * Global Constant Literals */ +Eterm ERTS_WRITE_UNLIKELY(ERTS_GLOBAL_LIT_OS_TYPE); +Eterm ERTS_WRITE_UNLIKELY(ERTS_GLOBAL_LIT_OS_VERSION); +Eterm ERTS_WRITE_UNLIKELY(ERTS_GLOBAL_LIT_DFLAGS_RECORD); +Eterm ERTS_WRITE_UNLIKELY(ERTS_GLOBAL_LIT_ERL_FILE_SUFFIX); Eterm ERTS_WRITE_UNLIKELY(ERTS_GLOBAL_LIT_EMPTY_TUPLE); +/* This lock is taken in the beginning of erts_global_literal_allocate, + * released at the end of erts_global_literal_register. It protects the + * allocated literal chunk, and the heap pointer from concurrent access until + * the literal tag is set. + */ +erts_mtx_t global_literal_lock; + +/* Bump allocator for global literal chunks, allocating them in + * reasonably large chunks to simplify crash dumping and avoid fragmenting the + * literal heap too much. + * + * This is protected by the global literal lock. */ +struct global_literal_chunk { + struct global_literal_chunk *next; + Eterm *hp; + + ErtsLiteralArea area; +} *global_literal_chunk = NULL; + + -Eterm* erts_alloc_global_literal(Uint index, Uint sz) +ErtsLiteralArea *erts_global_literal_iterate_area(ErtsLiteralArea *prev) { - ErtsLiteralArea* area; - Uint area_sz; - - ASSERT(index < ERTS_NUM_GLOBAL_LITERALS); - area_sz = sizeof(ErtsLiteralArea) + (sz-1)*sizeof(Eterm); - area = erts_alloc(ERTS_ALC_T_LITERAL, area_sz); - area->end = area->start + sz; - literals[index].area = area; - return area->start; + struct global_literal_chunk *next; + + ASSERT(ERTS_IS_CRASH_DUMPING); + + if (prev != NULL) { + struct global_literal_chunk *chunk = ErtsContainerStruct(prev, + struct global_literal_chunk, + area); + next = chunk->next; + + if (next == NULL) { + return NULL; + } + } else { + next = global_literal_chunk; + } + + next->area.end = next->hp; + return &next->area; } -void erts_register_global_literal(Uint index, Eterm term) +static void expand_shared_global_literal_area(Uint heap_size) { - Eterm* start; + const size_t size = sizeof(struct global_literal_chunk) + + (heap_size - 1) * sizeof(Eterm); + struct global_literal_chunk *chunk; + +#ifndef DEBUG + chunk = (struct global_literal_chunk *) erts_alloc(ERTS_ALC_T_LITERAL, size); +#else + /* erts_mem_guard requires the memory area to be page aligned. Overallocate + * and align the address to ensure that is the case. */ + UWord address; + address = (UWord) erts_alloc(ERTS_ALC_T_LITERAL, size + sys_page_size * 2); + address = (address + (sys_page_size - 1)) & ~(sys_page_size - 1); + chunk = (struct global_literal_chunk *) address; +#endif + + chunk->hp = &chunk->area.start[0]; + chunk->area.end = &chunk->hp[heap_size]; + chunk->area.off_heap = NULL; + chunk->next = global_literal_chunk; - ASSERT(index < ERTS_NUM_GLOBAL_LITERALS); - start = literals[index].area->start; - erts_set_literal_tag(&term, start, literals[index].area->end - start); - literals[index].term = term; + global_literal_chunk = chunk; } -Eterm erts_get_global_literal(Uint index) +Eterm *erts_global_literal_allocate(Uint heap_size, struct erl_off_heap_header ***ohp) { - ASSERT(index < ERTS_NUM_GLOBAL_LITERALS); - return literals[index].term; + Eterm *hp; + + erts_mtx_lock(&global_literal_lock); + + ASSERT((global_literal_chunk->hp <= global_literal_chunk->area.end && + global_literal_chunk->hp >= global_literal_chunk->area.start) ); + if (global_literal_chunk->area.end - global_literal_chunk->hp <= heap_size) { + expand_shared_global_literal_area(heap_size + GLOBAL_LITERAL_EXPAND_SIZE); + } + + *ohp = &global_literal_chunk->area.off_heap; + hp = global_literal_chunk->hp; + global_literal_chunk->hp += heap_size; + +#ifdef DEBUG + { + struct global_literal_chunk *chunk = global_literal_chunk; + erts_mem_guard(&chunk->area.start[0], + (chunk->area.end - &chunk->area.start[0]) * sizeof(Eterm), + 1, + 1); + } +#endif + + return hp; } -ErtsLiteralArea* erts_get_global_literal_area(Uint index) -{ - ASSERT(index < ERTS_NUM_GLOBAL_LITERALS); - return literals[index].area; +void erts_global_literal_register(Eterm *variable, Eterm *hp, Uint heap_size) { + erts_set_literal_tag(variable, hp, heap_size); + +#ifdef DEBUG + { + struct global_literal_chunk *chunk = global_literal_chunk; + erts_mem_guard(&chunk->area.start[0], + (chunk->area.end - &chunk->area.start[0]) * sizeof(Eterm), + 1, + 0); + } +#endif + + erts_mtx_unlock(&global_literal_lock); +} + +static void init_empty_tuple(void) { + struct erl_off_heap_header **ohp; + Eterm* hp = erts_global_literal_allocate(2, &ohp); + Eterm tuple; + hp[0] = make_arityval_zero(); + hp[1] = make_arityval_zero(); + tuple = make_tuple(hp); + erts_global_literal_register(&tuple, hp, 2); + ERTS_GLOBAL_LIT_EMPTY_TUPLE = tuple; } + +void +init_global_literals(void) +{ + erts_mtx_init(&global_literal_lock, "global_literals", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + expand_shared_global_literal_area(GLOBAL_LITERAL_INITIAL_SIZE); + init_empty_tuple(); +} \ No newline at end of file diff --git a/erts/emulator/beam/erl_global_literals.h b/erts/emulator/beam/erl_global_literals.h index 9d17c16e66dc..fbf80a612e0f 100644 --- a/erts/emulator/beam/erl_global_literals.h +++ b/erts/emulator/beam/erl_global_literals.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2021. All Rights Reserved. + * Copyright Ericsson AB 1996-2024. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,22 +18,41 @@ * %CopyrightEnd% */ +/* Global literals are used to store Erlang terms that are never modified or + * deleted. They are commonly-used constants at compile or run-time. This is + * similar in spirit to persistent_term but for internal usage. + * + * Examples include lambdas associated with export entries, the bitstring + * representation of atoms, and certain constants. + */ + #ifndef __ERL_GLOBAL_LITERALS_H__ #define __ERL_GLOBAL_LITERALS_H__ -#define ERTS_LIT_OS_TYPE 0 -#define ERTS_LIT_OS_VERSION 1 -#define ERTS_LIT_DFLAGS_RECORD 2 -#define ERTS_LIT_EMPTY_TUPLE 3 -#define ERTS_LIT_ERL_FILE_SUFFIX 4 +extern Eterm ERTS_GLOBAL_LIT_OS_TYPE; +extern Eterm ERTS_GLOBAL_LIT_OS_VERSION; +extern Eterm ERTS_GLOBAL_LIT_DFLAGS_RECORD; +extern Eterm ERTS_GLOBAL_LIT_ERL_FILE_SUFFIX; +extern Eterm ERTS_GLOBAL_LIT_EMPTY_TUPLE; -#define ERTS_NUM_GLOBAL_LITERALS 5 +/* Initializes global literals. Note that the literals terms mentioned in the + * examples above may be created elsewhere, and are only kept here for clarity. + */ +void init_global_literals(void); -extern Eterm ERTS_GLOBAL_LIT_EMPTY_TUPLE; +/* Allocates space for global literals. Users must call erts_global_literal_register + * when done creating the literal. + */ +Eterm *erts_global_literal_allocate(Uint sz, struct erl_off_heap_header ***ohp); -Eterm* erts_alloc_global_literal(Uint index, Uint sz); -void erts_register_global_literal(Uint index, Eterm term); -Eterm erts_get_global_literal(Uint index); -ErtsLiteralArea* erts_get_global_literal_area(Uint index); +/* Registers the pointed-to term as a global literal. Must be called for terms + * allocated using erts_global_literal_allocate.*/ +void erts_global_literal_register(Eterm *variable, Eterm *hp, Uint heap_size); + +/* Iterates between global literal areas. Can only be used when crash dumping. + * Iteration is started by passing NULL, then successively calling this function + * until it returns NULL. + */ +ErtsLiteralArea *erts_global_literal_iterate_area(ErtsLiteralArea *prev); #endif diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 26884fcfdaf0..c0c19060c860 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -239,17 +239,6 @@ void erl_error(const char *fmt, va_list args) static int early_init(int *argc, char **argv); -static void init_constant_literals(void) { - Eterm* hp = erts_alloc_global_literal(ERTS_LIT_EMPTY_TUPLE, 2); - Eterm tuple; - hp[0] = make_arityval_zero(); - hp[1] = make_arityval_zero(); - tuple = make_tuple(hp); - erts_register_global_literal(ERTS_LIT_EMPTY_TUPLE, tuple); - ERTS_GLOBAL_LIT_EMPTY_TUPLE = - erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); -} - static void erl_init(int ncpu, int proc_tab_sz, @@ -263,7 +252,7 @@ erl_init(int ncpu, int node_tab_delete_delay, ErtsDbSpinCount db_spin_count) { - init_constant_literals(); + init_global_literals(); erts_monitor_link_init(); erts_bif_unique_init(); erts_proc_sig_queue_init(); /* Must be after erts_bif_unique_init(); */ diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c index 0123f28ce75e..637c1d719bba 100644 --- a/erts/emulator/beam/erl_lock_check.c +++ b/erts/emulator/beam/erl_lock_check.c @@ -171,6 +171,8 @@ static erts_lc_lock_order_t erts_lock_order[] = { {"fun_tab", NULL}, {"atom_tab", NULL}, LEVEL, + {"global_literals", NULL}, + LEVEL, {"alcu_allocator", "index"}, LEVEL, {"mseg", NULL}, diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index cf6d03454262..8648d72f9046 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -888,8 +888,9 @@ dump_literals(fmtfn_t to, void *to_arg) erts_print(to, to_arg, "=literals\n"); - for (i = 0; i < ERTS_NUM_GLOBAL_LITERALS; i++) { - ErtsLiteralArea* area = erts_get_global_literal_area(i); + for (ErtsLiteralArea *area = erts_global_literal_iterate_area(NULL); + area != NULL; + area = erts_global_literal_iterate_area(area)) { dump_module_literals(to, to_arg, area); } @@ -905,11 +906,6 @@ dump_literals(fmtfn_t to, void *to_arg) dump_module_literals(to, to_arg, erts_persistent_areas[idx]); } - for (ErtsLiteralArea *lambda_area = erts_get_next_lambda_lit_area(NULL); - lambda_area != NULL; - lambda_area = erts_get_next_lambda_lit_area(lambda_area)) { - dump_module_literals(to, to_arg, lambda_area); - } } static void diff --git a/erts/emulator/beam/erl_term.h b/erts/emulator/beam/erl_term.h index 1d9ef1607b76..521fe53695d7 100644 --- a/erts/emulator/beam/erl_term.h +++ b/erts/emulator/beam/erl_term.h @@ -317,8 +317,8 @@ _ET_DECLARE_CHECKED(Uint,header_arity,Eterm) /* Due to an optimization that assumes that the word after the arity word is allocated, one should generally not create tuples of arity - zero. One should instead use the literal that can be obtained by - calling erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE). + zero. One should instead use the literal identified by + ERTS_GLOBAL_LIT_EMPTY_TUPLE. If one really wants to create a zero arityval one should use make_arityval_zero() or make_arityval_unchecked(sz) @@ -517,7 +517,7 @@ _ET_DECLARE_CHECKED(Eterm*,tuple_val,Eterm) Due to an optimization that assumes that the word after the arity word is allocated, one should generally not create tuples of arity zero on heaps. One should instead use the literal that can be - obtained by calling erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE). + obtained by use the literal identified by ERTS_GLOBAL_LIT_EMPTY_TUPLE. */ #define TUPLE1(t,e1) \ ((t)[0] = make_arityval(1), \ diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c index 8ad860f08a53..4e4d3affc108 100644 --- a/erts/emulator/beam/export.c +++ b/erts/emulator/beam/export.c @@ -28,19 +28,13 @@ #include "export.h" #include "hash.h" #include "jit/beam_asm.h" +#include "erl_global_literals.h" #define EXPORT_INITIAL_SIZE 4000 #define EXPORT_LIMIT (512*1024) #define EXPORT_HASH(m,f,a) ((atom_val(m) * atom_val(f)) ^ (a)) -#ifndef DEBUG -# define SHARED_LAMBDA_INITIAL_SIZE EXPORT_INITIAL_SIZE -# define SHARED_LAMBDA_EXPAND_SIZE 512 -#else -# define SHARED_LAMBDA_INITIAL_SIZE 256 -# define SHARED_LAMBDA_EXPAND_SIZE 16 -#endif #ifdef DEBUG # define IF_DEBUG(x) x @@ -57,17 +51,6 @@ static erts_atomic_t total_entries_bytes; */ erts_mtx_t export_staging_lock; -/* Bump allocator for globally shared external funs, allocating them in - * reasonably large chunks to simplify crash dumping and avoid fragmenting the - * literal heap too much. - * - * This is protected by the export staging lock. */ -struct lambda_chunk { - struct lambda_chunk *next; - Eterm *hp; - - ErtsLiteralArea area; -} *lambda_chunk = NULL; struct export_entry { @@ -129,70 +112,21 @@ export_cmp(struct export_entry* tmpl_e, struct export_entry* obj_e) tmpl->info.mfa.arity == obj->info.mfa.arity); } -ErtsLiteralArea *erts_get_next_lambda_lit_area(ErtsLiteralArea *prev) -{ - struct lambda_chunk *next; - - ASSERT(ERTS_IS_CRASH_DUMPING); - - if (prev != NULL) { - struct lambda_chunk *chunk = ErtsContainerStruct(prev, - struct lambda_chunk, - area); - next = chunk->next; - - if (next == NULL) { - return NULL; - } - } else { - next = lambda_chunk; - } - - next->area.end = next->hp; - return &next->area; -} - -static void expand_shared_lambda_area(Uint count) -{ - struct lambda_chunk *chunk; - Uint heap_size; - - ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&export_staging_lock)); - - heap_size = count * ERL_FUN_SIZE; - chunk = erts_alloc(ERTS_ALC_T_LITERAL, - sizeof(struct lambda_chunk) + - (heap_size - 1) * sizeof(Eterm)); - chunk->hp = &chunk->area.start[0]; - chunk->area.end = &chunk->hp[heap_size]; - chunk->area.off_heap = NULL; - chunk->next = lambda_chunk; - - lambda_chunk = chunk; -} static void create_shared_lambda(Export *export) { ErlFunThing *lambda; - + struct erl_off_heap_header **ohp; ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&export_staging_lock)); - - ASSERT((lambda_chunk->hp <= lambda_chunk->area.end && - lambda_chunk->hp >= lambda_chunk->area.start) && - ((lambda_chunk->area.end - lambda_chunk->hp) % ERL_FUN_SIZE) == 0); - if (lambda_chunk->hp == lambda_chunk->area.end) { - expand_shared_lambda_area(SHARED_LAMBDA_EXPAND_SIZE); - } - - lambda = (ErlFunThing*)lambda_chunk->hp; - lambda_chunk->hp += ERL_FUN_SIZE; + + lambda = (ErlFunThing*)erts_global_literal_allocate(ERL_FUN_SIZE, &ohp); lambda->thing_word = MAKE_FUN_HEADER(export->info.mfa.arity, 0, 1); lambda->entry.exp = export; export->lambda = make_fun(lambda); - erts_set_literal_tag(&export->lambda, (Eterm*)lambda, ERL_FUN_SIZE); + erts_global_literal_register(&export->lambda, (Eterm*)lambda, ERL_FUN_SIZE); } static struct export_entry* @@ -285,15 +219,6 @@ init_export_table(void) EXPORT_INITIAL_SIZE, EXPORT_LIMIT, f); } -#ifdef ERTS_ENABLE_LOCK_CHECK - export_staging_lock(); -#endif - - expand_shared_lambda_area(SHARED_LAMBDA_INITIAL_SIZE); - -#ifdef ERTS_ENABLE_LOCK_CHECK - export_staging_unlock(); -#endif } static struct export_entry* init_template(struct export_templ* templ, diff --git a/erts/emulator/beam/jit/arm/generators.tab b/erts/emulator/beam/jit/arm/generators.tab index 7a4aa84028a3..c020225cdf1a 100644 --- a/erts/emulator/beam/jit/arm/generators.tab +++ b/erts/emulator/beam/jit/arm/generators.tab @@ -262,7 +262,7 @@ gen.new_small_map_lit(Dst, Live, Size, Rest) { tmp = thp = erts_alloc(ERTS_ALC_T_LOADER_TMP, ((size == 0 ? 0 : 1) + size/2) * sizeof(*tmp)); if (size == 0) { - keys = erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); + keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE; } else { keys = make_tuple(thp); *thp++ = make_arityval(size/2); diff --git a/erts/emulator/beam/jit/x86/generators.tab b/erts/emulator/beam/jit/x86/generators.tab index 84ac8da3fced..9695118e7955 100644 --- a/erts/emulator/beam/jit/x86/generators.tab +++ b/erts/emulator/beam/jit/x86/generators.tab @@ -332,7 +332,7 @@ gen.new_small_map_lit(Dst, Live, Size, Rest) { tmp = thp = erts_alloc(ERTS_ALC_T_LOADER_TMP, ((size == 0 ? 0 : 1) + size/2) * sizeof(*tmp)); if (size == 0) { - keys = erts_get_global_literal(ERTS_LIT_EMPTY_TUPLE); + keys = ERTS_GLOBAL_LIT_EMPTY_TUPLE; } else { keys = make_tuple(thp); *thp++ = make_arityval(size/2); diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index 4dfa1ba3c00d..35d9dbfde917 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -32,19 +32,35 @@ #include #endif -int erts_mem_guard(void *p, UWord size) { +int erts_mem_guard(void *p, UWord size, int readable, int writable) { + #if defined(WIN32) DWORD oldProtect; + DWORD newProtect = PAGE_NOACCESS; BOOL success; - + if (readable && writable) { + newProtect = PAGE_READWRITE; + } else if (readable) { + newProtect = PAGE_READONLY; + } else { + ERTS_INTERNAL_ERROR(!"mem_guard invalid page permissions"); + } success = VirtualProtect((LPVOID*)p, size, - PAGE_NOACCESS, + newProtect, &oldProtect); return success ? 0 : -1; #elif defined(HAVE_SYS_MMAN_H) - return mprotect(p, size, PROT_NONE); + int flags = 0; + + if (writable) { + flags |= PROT_WRITE; + } + if (readable) { + flags |= PROT_READ; + } + return mprotect(p, size, flags); #else errno = ENOTSUP; return -1; diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 648cd6f0ce6f..1a0b3713f52a 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -182,10 +182,10 @@ void hard_dbg_remove_mseg(void* seg, UWord sz); #endif /* HAVE_ERTS_MMAP */ -/* Marks the given memory region as permanently inaccessible. - * +/* Changes the permissions of the given memory region. + * Assumes proper page alignment. * Returns 0 on success, and -1 on error. */ -int erts_mem_guard(void *p, UWord size); +int erts_mem_guard(void *p, UWord size, int readable, int writable); /* Marks the given memory region as unused without freeing it, letting the OS * reclaim its physical memory with the promise that we'll get it back (without From d1fca18d8c40d6851c53f90035cf3715e330e54e Mon Sep 17 00:00:00 2001 From: Isabell Huang Date: Thu, 11 Jul 2024 18:09:19 +0200 Subject: [PATCH 3/3] erts: Optimize atom_to_binary/1 When an atom is created, we now create a binary literal of it, so that atom_to_binary returns the pre-allocated binary literal instead of a newly converted one. --- erts/emulator/beam/atom.c | 132 +++++++++------------ erts/emulator/beam/atom.h | 11 +- erts/emulator/beam/bif.c | 4 +- erts/emulator/beam/dist.c | 2 +- erts/emulator/beam/erl_bif_ddll.c | 4 +- erts/emulator/beam/erl_bif_info.c | 2 +- erts/emulator/beam/erl_bif_re.c | 2 +- erts/emulator/beam/erl_db_util.c | 4 +- erts/emulator/beam/erl_nif.c | 10 +- erts/emulator/beam/erl_printf_term.c | 2 +- erts/emulator/beam/erl_process_dump.c | 2 +- erts/emulator/beam/erl_unicode.c | 58 +++++---- erts/emulator/beam/erl_utils.h | 6 +- erts/emulator/beam/external.c | 12 +- erts/emulator/beam/global.h | 2 +- erts/emulator/beam/jit/beam_jit_common.cpp | 2 +- erts/emulator/test/bif_SUITE.erl | 6 + erts/etc/unix/etp-commands.in | 33 +++++- 18 files changed, 155 insertions(+), 139 deletions(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 036816df2fae..390ca0443566 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -28,6 +28,7 @@ #include "global.h" #include "hash.h" #include "atom.h" +#include "erl_global_literals.h" #define ATOM_SIZE 3000 @@ -48,19 +49,6 @@ static erts_rwmtx_t atom_table_lock; static erts_atomic_t atom_put_ops; #endif -/* Functions for allocating space for the ext of atoms. We do not - * use malloc for each atom to prevent excessive memory fragmentation - */ - -typedef struct _atom_text { - struct _atom_text* next; - unsigned char text[ATOM_TEXT_SIZE]; -} AtomText; - -static AtomText* text_list; /* List of text buffers */ -static byte *atom_text_pos; -static byte *atom_text_end; -static Uint reserved_atom_space; /* Total amount of atom text space */ static Uint atom_space; /* Amount of atom text space used */ /* @@ -81,44 +69,8 @@ void atom_info(fmtfn_t to, void *to_arg) atom_read_unlock(); } -/* - * Allocate an atom text segment. - */ -static void -more_atom_space(void) -{ - AtomText* ptr; - - ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText)); - ptr->next = text_list; - text_list = ptr; - atom_text_pos = ptr->text; - atom_text_end = atom_text_pos + ATOM_TEXT_SIZE; - reserved_atom_space += sizeof(AtomText); - - VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE)); -} - -/* - * Allocate string space within an atom text segment. - */ - -static byte* -atom_text_alloc(int bytes) -{ - byte *res; - - ASSERT(bytes <= MAX_ATOM_SZ_LIMIT); - if (atom_text_pos + bytes >= atom_text_end) { - more_atom_space(); - } - res = atom_text_pos; - atom_text_pos += bytes; - atom_space += bytes; - return res; -} /* * Calculate atom hash value (using the hash algorithm @@ -128,7 +80,7 @@ atom_text_alloc(int bytes) static HashValue atom_hash(Atom* obj) { - byte* p = obj->name; + byte* p = obj->u.name; int len = obj->len; HashValue h = 0, g; byte v; @@ -150,12 +102,23 @@ atom_hash(Atom* obj) return h; } +const byte *erts_atom_get_name(const Atom *atom) +{ + byte *name; + Uint size; + Uint offset; + ERTS_GET_BITSTRING(atom->u.bin, name, offset, size); + ASSERT(offset == 0 && (size % 8) == 0); + (void) size; + (void) offset; + return name; +} static int atom_cmp(Atom* tmpl, Atom* obj) { if (tmpl->len == obj->len && - sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0) + sys_memcmp(tmpl->u.name, erts_atom_get_name(obj), tmpl->len) == 0) return 0; return 1; } @@ -164,13 +127,39 @@ atom_cmp(Atom* tmpl, Atom* obj) static Atom* atom_alloc(Atom* tmpl) { - Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom)); + Atom *obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom)); + + { + Eterm *hp; + Uint heap_size = 0; + ErtsHeapFactory factory; + ErlOffHeap oh; + struct erl_off_heap_header **literal_ohp; + + if (tmpl->len <= ERL_ONHEAP_BINARY_LIMIT) { + heap_size = heap_bits_size(NBITS(tmpl->len)); + } else { + heap_size = ERL_REFC_BITS_SIZE; + } - obj->name = atom_text_alloc(tmpl->len); - sys_memcpy(obj->name, tmpl->name, tmpl->len); + hp = erts_global_literal_allocate(heap_size, &literal_ohp); + ERTS_INIT_OFF_HEAP(&oh); + oh.first = *literal_ohp; + + erts_factory_static_init(&factory, hp, heap_size, &oh); + *literal_ohp = oh.first; + obj->u.bin = erts_hfact_new_binary_from_data(&factory, + 0, + tmpl->len, + tmpl->u.name); + erts_global_literal_register(&obj->u.bin, hp, heap_size); + } + obj->len = tmpl->len; obj->latin1_chars = tmpl->latin1_chars; obj->slot.index = -1; + atom_space += tmpl->len; + /* * Precompute ordinal value of first 3 bytes + 7 bits. @@ -186,7 +175,7 @@ atom_alloc(Atom* tmpl) j = (tmpl->len < 4) ? tmpl->len : 4; for(i = 0; i < j; ++i) - c[i] = tmpl->name[i]; + c[i] = tmpl->u.name[i]; for(; i < 4; ++i) c[i] = '\0'; obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1); @@ -293,7 +282,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) } a.len = tlen; - a.name = (byte *) text; + a.u.name = (byte *) text; atom_read_lock(); aix = index_get(&erts_atom_table, (void*) &a); atom_read_unlock(); @@ -333,7 +322,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) a.len = tlen; a.latin1_chars = (Sint16) no_latin1_chars; - a.name = (byte *) text; + a.u.name = (byte *) text; atom_write_lock(); aix = index_put(&erts_atom_table, (void*) &a); atom_write_unlock(); @@ -400,7 +389,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len); - a.name = (byte*)name; + a.u.name = (byte*)name; a.len = (Sint16)len; break; case ERTS_ATOM_ENC_7BIT_ASCII: @@ -415,7 +404,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) } a.len = (Sint16)len; - a.name = (byte*)name; + a.u.name = (byte*)name; break; case ERTS_ATOM_ENC_UTF8: if (len > MAX_ATOM_SZ_LIMIT) { @@ -427,7 +416,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) * name will fail. */ a.len = (Sint16)len; - a.name = (byte*)name; + a.u.name = (byte*)name; break; } @@ -446,7 +435,7 @@ erts_atom_get_text_space_sizes(Uint *reserved, Uint *used) if (lock) atom_read_lock(); if (reserved) - *reserved = reserved_atom_space; + *reserved = atom_space; if (used) *used = atom_space; if (lock) @@ -479,33 +468,28 @@ init_atom_table(void) f.meta_free = (HMFREE_FUN) erts_free; f.meta_print = (HMPRINT_FUN) erts_print; - atom_text_pos = NULL; - atom_text_end = NULL; - reserved_atom_space = 0; - atom_space = 0; - text_list = NULL; - erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table, "atom_tab", ATOM_SIZE, erts_atom_table_size, f); - more_atom_space(); - /* Ordinary atoms */ + /* Ordinary atoms. a is a template for creating an entry in the atom table */ for (i = 0; erl_atom_names[i] != 0; i++) { int ix; a.len = sys_strlen(erl_atom_names[i]); a.latin1_chars = a.len; - a.name = (byte*)erl_atom_names[i]; + a.u.name = (byte*)erl_atom_names[i]; a.slot.index = i; + + #ifdef DEBUG /* Verify 7-bit ascii */ for (ix = 0; ix < a.len; ix++) { - ASSERT((a.name[ix] & 0x80) == 0); + ASSERT((a.u.name[ix] & 0x80) == 0); } #endif ix = index_put(&erts_atom_table, (void*) &a); - atom_text_pos -= a.len; - atom_space -= a.len; - atom_tab(ix)->name = (byte*)erl_atom_names[i]; + (void) ix; + /* Assert that the entry in the atom table is not a template */ + ASSERT(erts_atom_get_name(atom_tab(ix))); } } diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 681bd4586f06..ff05bc35d2cf 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -50,7 +50,10 @@ typedef struct atom { Sint16 len; /* length of atom name (UTF-8 encoded) */ Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */ int ord0; /* ordinal value of first 3 bytes + 7 bits */ - byte* name; /* name of atom */ + union{ + byte* name; /* name of atom, used by templates */ + Eterm bin; /* name of atom, used when atom is in table*/ + } u; } Atom; extern IndexTable erts_atom_table; @@ -59,6 +62,8 @@ ERTS_GLB_INLINE Atom* atom_tab(Uint i); ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term); ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1); +const byte *erts_atom_get_name(const Atom *atom); + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Atom* atom_tab(Uint i) @@ -73,7 +78,7 @@ ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term) return 0; a = atom_tab(atom_val(term)); return (len == (size_t) a->len - && sys_memcmp((void *) a->name, (void *) text, len) == 0); + && sys_memcmp((void *) erts_atom_get_name(a), (void *) text, len) == 0); } ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1) @@ -87,7 +92,7 @@ ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1) return 0; a = atom_tab(atom_val(term)); len = a->len; - aname = a->name; + aname = erts_atom_get_name(a); if (is_latin1) { for (i = 0; i < len; s++) { if (aname[i] < 0x80) { diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 9d6fd75807d3..d48292c2ae2c 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -2929,10 +2929,10 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1) BIF_RET(NIL); /* the empty atom */ ares = - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); (void)ares; - res = erts_utf8_to_list(BIF_P, num_chars, ap->name, ap->len, ap->len, + res = erts_utf8_to_list(BIF_P, num_chars, erts_atom_get_name(ap), ap->len, ap->len, &num_built, &num_eaten, NIL); ASSERT(num_built == num_chars); ASSERT(num_eaten == ap->len); diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index cc0535de5db1..920fa81f70b7 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -838,7 +838,7 @@ int is_node_name_atom(Eterm a) return 0; i = atom_val(a); ASSERT((i > 0) && (i < atom_table_size()) && (atom_tab(i) != NULL)); - return is_node_name((char*)atom_tab(i)->name, atom_tab(i)->len); + return is_node_name((char*)erts_atom_get_name(atom_tab(i)), atom_tab(i)->len); } static void diff --git a/erts/emulator/beam/erl_bif_ddll.c b/erts/emulator/beam/erl_bif_ddll.c index 9279b5cce743..15f37e3fd5b7 100644 --- a/erts/emulator/beam/erl_bif_ddll.c +++ b/erts/emulator/beam/erl_bif_ddll.c @@ -1725,7 +1725,7 @@ static int errdesc_to_code(Eterm errdesc, int *code /* out */) for (i = 0; errcode_tab[i].atm != NULL; ++i) { int len = sys_strlen(errcode_tab[i].atm); if (len == ap->len && - !sys_strncmp(errcode_tab[i].atm,(char *) ap->name,len)) { + !sys_strncmp(errcode_tab[i].atm,(char *) erts_atom_get_name(ap),len)) { *code = errcode_tab[i].code; return 0; } @@ -1799,7 +1799,7 @@ static char *pick_list_or_atom(Eterm name_term) goto error; } name = erts_alloc(ERTS_ALC_T_DDLL_TMP_BUF, ap->len + 1); - sys_memcpy(name,ap->name,ap->len); + sys_memcpy(name,erts_atom_get_name(ap),ap->len); name[ap->len] = '\0'; } else { if (erts_iolist_size(name_term, &name_len)) { diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index c0536012f94e..08cd96ee652e 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -2703,7 +2703,7 @@ c_compiler_used(Eterm **hpp, Uint *szp) static int is_snif_term(Eterm module_atom) { int i; Atom *a = atom_tab(atom_val(module_atom)); - char *aname = (char *) a->name; + char *aname = (char *) erts_atom_get_name(a); /* if a->name has a '.' then the bif (snif) is bogus i.e a package */ for (i = 0; i < a->len; i++) { diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 91554f65f7f9..38072f3fcd73 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -1024,7 +1024,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) } } ASSERT(tmpb != NULL); - sys_memcpy(tmpb,ap->name,ap->len); + sys_memcpy(tmpb,erts_atom_get_name(ap),ap->len); tmpb[ap->len] = '\0'; } else { ErlDrvSizeT slen; diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index b7666a0eb209..43526995b112 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -3851,13 +3851,13 @@ bool db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b) int db_is_variable(Eterm obj) { - byte *b; + const byte *b; int n; int N; if (is_not_atom(obj)) return -1; - b = atom_tab(atom_val(obj))->name; + b = erts_atom_get_name(atom_tab(atom_val(obj))); if ((n = atom_tab(atom_val(obj))->len) < 2) return -1; if (*b++ != '$') diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index d9c61182a36e..2563d40ad563 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1799,9 +1799,9 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len, return 0; } if (ap->latin1_chars == ap->len) { - sys_memcpy(buf, ap->name, ap->len); + sys_memcpy(buf, erts_atom_get_name(ap), ap->len); } else { - int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len); + int dlen = erts_utf8_to_latin1((byte*)buf, erts_atom_get_name(ap), ap->len); ASSERT(dlen == ap->latin1_chars); (void)dlen; } buf[ap->latin1_chars] = '\0'; @@ -1810,7 +1810,7 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len, if (ap->len >= len) { return 0; } - sys_memcpy(buf, ap->name, ap->len); + sys_memcpy(buf, erts_atom_get_name(ap), ap->len); buf[ap->len] = '\0'; return ap->len + 1; } @@ -4480,8 +4480,8 @@ void erts_print_nif_taints(fmtfn_t to, void* to_arg) t = (struct tainted_module_t*) erts_atomic_read_nob(&first_taint); for ( ; t; t = t->next) { - const Atom* atom = atom_tab(atom_val(t->module_atom)); - erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, atom->name); + Atom* atom = atom_tab(atom_val(t->module_atom)); + erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, erts_atom_get_name(atom)); delim = ","; } erts_cbprintf(to,to_arg,"\n"); diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index 9b7eeac22a6f..95f94c9bea3a 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -254,7 +254,7 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) return result; } - s = entry->name; + s = erts_atom_get_name(entry); length = entry->len; *dcount -= entry->len; diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index 8648d72f9046..89e7865bc77e 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -323,7 +323,7 @@ dump_element(fmtfn_t to, void *to_arg, Eterm x) erts_print(to, to_arg, "H" PTR_FMT, boxed_val(x)); } else if (is_immed(x)) { if (is_atom(x)) { - unsigned char* s = atom_tab(atom_val(x))->name; + const byte* s = erts_atom_get_name(atom_tab(atom_val(x))); int len = atom_tab(atom_val(x))->len; int i; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 970ca89bd9c0..9e2924946d0a 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1347,7 +1347,7 @@ static Eterm do_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, num_built, num_eaten, tail); } -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail) { return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail); @@ -1366,7 +1366,7 @@ Uint erts_atom_to_string_length(Eterm atom) const byte* err_pos; Uint num_chars; int ares = - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); (void)ares; return num_chars; @@ -1380,7 +1380,7 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail) ASSERT(is_atom(atom)); ap = atom_tab(atom_val(atom)); if (ap->latin1_chars >= 0) - return buf_to_intlist(hpp, (char*)ap->name, ap->len, tail); + return buf_to_intlist(hpp, (char*)erts_atom_get_name(ap), ap->len, tail); else { Eterm res; const byte* err_pos; @@ -1389,10 +1389,10 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail) Eterm *hp_start = *hpp; int ares = #endif - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); - res = erts_make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len, + res = erts_make_list_from_utf8_buf(hpp, num_chars, erts_atom_get_name(ap), ap->len, &num_built, &num_eaten, tail); ASSERT(num_built == num_chars); @@ -1924,26 +1924,23 @@ BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2) ap = atom_tab(atom_val(BIF_ARG_1)); if (BIF_ARG_2 == am_latin1) { - Eterm bin_term; - + Eterm bin_term; if (ap->latin1_chars < 0) { goto error; } if (ap->latin1_chars == ap->len) { - bin_term = erts_new_binary_from_data(BIF_P, ap->len, ap->name); + BIF_RET(ap->u.bin); } else { byte* bin_p; int dbg_sz; - bin_term = erts_new_binary(BIF_P, ap->latin1_chars, &bin_p); - dbg_sz = erts_utf8_to_latin1(bin_p, ap->name, ap->len); + dbg_sz = erts_utf8_to_latin1(bin_p, erts_atom_get_name(ap), ap->len); ASSERT(dbg_sz == ap->latin1_chars); (void)dbg_sz; + BIF_RET(bin_term); } - - BIF_RET(bin_term); } else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) { - BIF_RET(erts_new_binary_from_data(BIF_P, ap->len, ap->name)); + BIF_RET(ap->u.bin); } else { error: BIF_ERROR(BIF_P, BADARG); @@ -2233,12 +2230,13 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) need = 2* ap->latin1_chars; } else { + const byte * name = erts_atom_get_name(ap); for (i = 0; i < ap->len; ) { - if (ap->name[i] < 0x80) { + if (name[i] < 0x80) { i++; - } else if (ap->name[i] < 0xE0) { + } else if (name[i] < 0xE0) { i += 2; - } else if (ap->name[i] < 0xF0) { + } else if (name[i] < 0xF0) { i += 3; } else { need = -1; @@ -2256,7 +2254,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) * the middle of filenames */ if (need > 0) { - byte *name = ap->name; + const byte *name = erts_atom_get_name(ap); int len = ap->len; for (i = 0; i < len; i++) { if (name[i] == 0) { @@ -2398,33 +2396,33 @@ void erts_native_filename_put(Eterm ioterm, int encoding, byte *p) switch (encoding) { case ERL_FILENAME_LATIN1: for (i = 0; i < ap->len; i++) { - if (ap->name[i] < 0x80) { - *p++ = ap->name[i]; + if (erts_atom_get_name(ap)[i] < 0x80) { + *p++ = erts_atom_get_name(ap)[i]; } else { - ASSERT(ap->name[i] < 0xC4); - *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F); + ASSERT(erts_atom_get_name(ap)[i] < 0xC4); + *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F); i++; } } break; case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: - sys_memcpy(p, ap->name, ap->len); + sys_memcpy(p, erts_atom_get_name(ap), ap->len); break; case ERL_FILENAME_WIN_WCHAR: for (i = 0; i < ap->len; i++) { /* Little endian */ - if (ap->name[i] < 0x80) { - *p++ = ap->name[i]; + if (erts_atom_get_name(ap)[i] < 0x80) { + *p++ = erts_atom_get_name(ap)[i]; *p++ = 0; - } else if (ap->name[i] < 0xE0) { - *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F); - *p++ = ((ap->name[i] & 0x1C) >> 2); + } else if (erts_atom_get_name(ap)[i] < 0xE0) { + *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F); + *p++ = ((erts_atom_get_name(ap)[i] & 0x1C) >> 2); i++; } else { - ASSERT(ap->name[i] < 0xF0); - *p++ = ((ap->name[i+1] & 3) << 6) | (ap->name[i+2] & 0x3C); - *p++ = ((ap->name[i] & 0xF) << 4) | ((ap->name[i+1] & 0x3C) >> 2); + ASSERT(erts_atom_get_name(ap)[i] < 0xF0); + *p++ = ((erts_atom_get_name(ap)[i+1] & 3) << 6) | (erts_atom_get_name(ap)[i+2] & 0x3C); + *p++ = ((erts_atom_get_name(ap)[i] & 0xF) << 4) | ((erts_atom_get_name(ap)[i+1] & 0x3C) >> 2); i += 2; } } diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index e29555e0de47..d6ebc1c41dde 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -192,7 +192,7 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { Atom *aa = atom_tab(atom_val(a)); Atom *bb = atom_tab(atom_val(b)); - byte *name_a, *name_b; + const byte *name_a, *name_b; int len_a, len_b, diff; diff = aa->ord0 - bb->ord0; @@ -201,8 +201,8 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { return diff; } - name_a = &aa->name[3]; - name_b = &bb->name[3]; + name_a = &erts_atom_get_name(aa)[3]; + name_b = &erts_atom_get_name(bb)[3]; len_a = aa->len-3; len_b = bb->len-3; diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index cfa51387b81c..d815235eacb3 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -591,7 +591,7 @@ Sint erts_encode_ext_dist_header_finalize(ErtsDistOutputBuf* ob, a = atom_tab(atom_val(atom)); sz = a->len; ep -= sz; - sys_memcpy((void *) ep, (void *) a->name, sz); + sys_memcpy((void *) ep, (void *) erts_atom_get_name(a), sz); if (long_atoms) { ep -= 2; put_int16(sz, ep); @@ -2910,16 +2910,16 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags) put_int8(len, ep); ep += 1; } - sys_memcpy((char *) ep, (char *) a->name, len); + sys_memcpy((char *) ep, (char *) erts_atom_get_name(a), len); } else { if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) { *ep++ = SMALL_ATOM_EXT; if (len == a->latin1_chars) { - sys_memcpy(ep+1, a->name, len); + sys_memcpy(ep+1, erts_atom_get_name(a), len); } else { - len = erts_utf8_to_latin1(ep+1, a->name, len); + len = erts_utf8_to_latin1(ep+1, erts_atom_get_name(a), len); ASSERT(len == a->latin1_chars); } put_int8(len, ep); @@ -2928,10 +2928,10 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags) else { *ep++ = ATOM_EXT; if (len == a->latin1_chars) { - sys_memcpy(ep+2, a->name, len); + sys_memcpy(ep+2, erts_atom_get_name(a), len); } else { - len = erts_utf8_to_latin1(ep+2, a->name, len); + len = erts_utf8_to_latin1(ep+2, erts_atom_get_name(a), len); ASSERT(len == a->latin1_chars); } put_int16(len, ep); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 218aea136a57..dce7e2956c77 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1491,7 +1491,7 @@ char *erts_convert_filename_to_wchar(const byte* bytes, Uint size, ErtsAlcType_t alloc_type, Sint* used, Uint extra_wchars); Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes); -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail); Eterm erts_make_list_from_utf8_buf(Eterm **hpp, Uint num, diff --git a/erts/emulator/beam/jit/beam_jit_common.cpp b/erts/emulator/beam/jit/beam_jit_common.cpp index aac423948518..fde678606ffa 100644 --- a/erts/emulator/beam/jit/beam_jit_common.cpp +++ b/erts/emulator/beam/jit/beam_jit_common.cpp @@ -39,7 +39,7 @@ extern "C" static std::string getAtom(Eterm atom) { Atom *ap = atom_tab(atom_val(atom)); - return std::string((char *)ap->name, ap->len); + return std::string((char *)erts_atom_get_name(ap), ap->len); } BeamAssemblerCommon::BeamAssemblerCommon(BaseAssembler &assembler_) diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl index fd50e9e2bb53..196bb70cdce5 100644 --- a/erts/emulator/test/bif_SUITE.erl +++ b/erts/emulator/test/bif_SUITE.erl @@ -657,6 +657,12 @@ t_atom_to_binary(Config) when is_list(Config) -> <<>> = atom_to_binary('', unicode), <<127>> = atom_to_binary('\177', utf8), <<"abcdef">> = atom_to_binary(abcdef, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq, utf8), HalfLongBin = atom_to_binary(HalfLongAtom, utf8), HalfLongBin = atom_to_binary(HalfLongAtom), LongAtomBin = atom_to_binary(LongAtom, utf8), diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 559b45262cd5..0b2e8e1e7a11 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -612,7 +612,8 @@ define etp-atom-1 else set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF] set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) set $etp_atom_1_quote = 1 # Check if atom has to be quoted if ($etp_atom_1_i > 0) @@ -642,7 +643,7 @@ define etp-atom-1 printf "'" end set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_atom_1_i > 0 etp-char-1 (*$etp_atom_1_p) '\'' set $etp_atom_1_p++ @@ -654,6 +655,25 @@ define etp-atom-1 end end +define etp-bitstring-data-1 +# Args: Eterm bitstring +# +# Non-reentrant +# +# Unbox and retrieve the binary data pointer from any bitstring + set $etp_bitstring_unboxed = ((Eterm*)(($arg0) & etp_ptr_mask)) + set $etp_bitstring_subtag = ($etp_bitstring_unboxed[0] & etp_header_subtag_mask) + if ($etp_bitstring_subtag == etp_sub_bits_subtag) + set $etp_bitstring_ptr = (ErlSubBits *) $etp_bitstring_unboxed + set $etp_bitstring_size = ($etp_bitstring_ptr)->end - ($etp_bitstring_ptr)->start + set $etp_bitstring_data = (byte *)(($etp_bitstring_ptr)->base_flags & ~(UWord)3) + else + set $etp_bitstring_ptr = (ErlHeapBits *) $etp_bitstring_unboxed + set $etp_bitstring_size = ($etp_bitstring_ptr)->size + set $etp_bitstring_data = (byte *)&($etp_bitstring_ptr)->data + end +end + define etp-string-to-atom # Args: (char*) null-terminated @@ -691,8 +711,10 @@ define etp-string-to-atom # search hash bucket list while $etp_p set $etp_i = 0 + etp-bitstring-data-1 ($etp_p)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_i < $etp_p->len && ($arg0)[$etp_i] - if $etp_p->name[$etp_i] != ($arg0)[$etp_i] + if $etp_atom_1_p[$etp_i] != ($arg0)[$etp_i] loop_break end set $etp_i++ @@ -2094,7 +2116,8 @@ define etp-term-dump-atom # Args: atom term set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF] set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) set $etp_atom_1_quote = 1 set $etp_atom_indent = 13 @@ -2142,7 +2165,7 @@ define etp-term-dump-atom printf "'" end set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_atom_1_i > 0 etp-char-1 (*$etp_atom_1_p) '\'' set $etp_atom_1_p++