From 71cda5970d1cfe89bbb1d34a98c320755dd6f345 Mon Sep 17 00:00:00 2001 From: Isabell Huang Date: Thu, 11 Jul 2024 18:09:19 +0200 Subject: [PATCH] erts: Optimize atom_to_binary/1 When an atom is created, we now create a binary literal of it, so that atom_to_binary returns the pre-allocated binary literal instead of a newly converted one. --- erts/emulator/beam/atom.c | 141 ++++++++++----------- erts/emulator/beam/atom.h | 11 +- erts/emulator/beam/bif.c | 4 +- erts/emulator/beam/dist.c | 2 +- erts/emulator/beam/erl_bif_ddll.c | 4 +- erts/emulator/beam/erl_bif_info.c | 2 +- erts/emulator/beam/erl_bif_re.c | 2 +- erts/emulator/beam/erl_db_util.c | 4 +- erts/emulator/beam/erl_nif.c | 10 +- erts/emulator/beam/erl_printf_term.c | 2 +- erts/emulator/beam/erl_process_dump.c | 2 +- erts/emulator/beam/erl_unicode.c | 58 ++++----- erts/emulator/beam/erl_utils.h | 6 +- erts/emulator/beam/external.c | 12 +- erts/emulator/beam/global.h | 2 +- erts/emulator/beam/jit/beam_jit_common.cpp | 2 +- erts/emulator/test/bif_SUITE.erl | 6 + erts/etc/unix/etp-commands.in | 33 ++++- 18 files changed, 166 insertions(+), 137 deletions(-) diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 036816df2fae..c4a37f9719a3 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -28,6 +28,7 @@ #include "global.h" #include "hash.h" #include "atom.h" +#include "erl_global_literals.h" #define ATOM_SIZE 3000 @@ -48,19 +49,19 @@ static erts_rwmtx_t atom_table_lock; static erts_atomic_t atom_put_ops; #endif -/* Functions for allocating space for the ext of atoms. We do not - * use malloc for each atom to prevent excessive memory fragmentation - */ +// /* Functions for allocating space for the ext of atoms. We do not +// * use malloc for each atom to prevent excessive memory fragmentation +// */ -typedef struct _atom_text { - struct _atom_text* next; - unsigned char text[ATOM_TEXT_SIZE]; -} AtomText; +// typedef struct _atom_text { +// struct _atom_text* next; +// unsigned char text[ATOM_TEXT_SIZE]; +// } AtomText; -static AtomText* text_list; /* List of text buffers */ -static byte *atom_text_pos; -static byte *atom_text_end; -static Uint reserved_atom_space; /* Total amount of atom text space */ +// static AtomText* text_list; /* List of text buffers */ +// static byte *atom_text_pos; +// static byte *atom_text_end; +// static Uint reserved_atom_space; /* Total amount of atom text space */ static Uint atom_space; /* Amount of atom text space used */ /* @@ -81,44 +82,8 @@ void atom_info(fmtfn_t to, void *to_arg) atom_read_unlock(); } -/* - * Allocate an atom text segment. - */ -static void -more_atom_space(void) -{ - AtomText* ptr; - - ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText)); - - ptr->next = text_list; - text_list = ptr; - atom_text_pos = ptr->text; - atom_text_end = atom_text_pos + ATOM_TEXT_SIZE; - reserved_atom_space += sizeof(AtomText); - VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE)); -} - -/* - * Allocate string space within an atom text segment. - */ - -static byte* -atom_text_alloc(int bytes) -{ - byte *res; - - ASSERT(bytes <= MAX_ATOM_SZ_LIMIT); - if (atom_text_pos + bytes >= atom_text_end) { - more_atom_space(); - } - res = atom_text_pos; - atom_text_pos += bytes; - atom_space += bytes; - return res; -} /* * Calculate atom hash value (using the hash algorithm @@ -128,7 +93,7 @@ atom_text_alloc(int bytes) static HashValue atom_hash(Atom* obj) { - byte* p = obj->name; + byte* p = obj->u.name; int len = obj->len; HashValue h = 0, g; byte v; @@ -150,12 +115,23 @@ atom_hash(Atom* obj) return h; } +const byte *erts_atom_get_name(const Atom *atom) +{ + byte *name; + Uint size; + Uint offset; + ERTS_GET_BITSTRING(atom->u.bin, name, offset, size); + ASSERT(offset == 0 && (size % 8) == 0); + (void) size; + (void) offset; + return name; +} static int atom_cmp(Atom* tmpl, Atom* obj) { if (tmpl->len == obj->len && - sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0) + sys_memcmp(tmpl->u.name, erts_atom_get_name(obj), tmpl->len) == 0) return 0; return 1; } @@ -164,13 +140,39 @@ atom_cmp(Atom* tmpl, Atom* obj) static Atom* atom_alloc(Atom* tmpl) { - Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom)); + Atom *obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom)); - obj->name = atom_text_alloc(tmpl->len); - sys_memcpy(obj->name, tmpl->name, tmpl->len); + { + Eterm *hp; + Uint heap_size = 0; + ErtsHeapFactory factory; + ErlOffHeap oh; + struct erl_off_heap_header **literal_ohp; + + if (tmpl->len <= ERL_ONHEAP_BINARY_LIMIT) { + heap_size = heap_bits_size(NBITS(tmpl->len)); + } else { + heap_size = ERL_REFC_BITS_SIZE; + } + + hp = erts_global_literal_allocate(heap_size, &literal_ohp); + ERTS_INIT_OFF_HEAP(&oh); + oh.first = *literal_ohp; + + erts_factory_static_init(&factory, hp, heap_size, &oh); + *literal_ohp = oh.first; + obj->u.bin = erts_hfact_new_binary_from_data(&factory, + 0, + tmpl->len, + tmpl->u.name); + erts_global_literal_register(&obj->u.bin, hp, heap_size); + } + obj->len = tmpl->len; obj->latin1_chars = tmpl->latin1_chars; obj->slot.index = -1; + atom_space += tmpl->len; + /* * Precompute ordinal value of first 3 bytes + 7 bits. @@ -186,7 +188,7 @@ atom_alloc(Atom* tmpl) j = (tmpl->len < 4) ? tmpl->len : 4; for(i = 0; i < j; ++i) - c[i] = tmpl->name[i]; + c[i] = tmpl->u.name[i]; for(; i < 4; ++i) c[i] = '\0'; obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1); @@ -293,7 +295,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) } a.len = tlen; - a.name = (byte *) text; + a.u.name = (byte *) text; atom_read_lock(); aix = index_get(&erts_atom_table, (void*) &a); atom_read_unlock(); @@ -333,7 +335,7 @@ erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) a.len = tlen; a.latin1_chars = (Sint16) no_latin1_chars; - a.name = (byte *) text; + a.u.name = (byte *) text; atom_write_lock(); aix = index_put(&erts_atom_table, (void*) &a); atom_write_unlock(); @@ -400,7 +402,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len); - a.name = (byte*)name; + a.u.name = (byte*)name; a.len = (Sint16)len; break; case ERTS_ATOM_ENC_7BIT_ASCII: @@ -415,7 +417,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) } a.len = (Sint16)len; - a.name = (byte*)name; + a.u.name = (byte*)name; break; case ERTS_ATOM_ENC_UTF8: if (len > MAX_ATOM_SZ_LIMIT) { @@ -427,7 +429,7 @@ erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) * name will fail. */ a.len = (Sint16)len; - a.name = (byte*)name; + a.u.name = (byte*)name; break; } @@ -446,7 +448,7 @@ erts_atom_get_text_space_sizes(Uint *reserved, Uint *used) if (lock) atom_read_lock(); if (reserved) - *reserved = reserved_atom_space; + *reserved = atom_space; if (used) *used = atom_space; if (lock) @@ -479,33 +481,28 @@ init_atom_table(void) f.meta_free = (HMFREE_FUN) erts_free; f.meta_print = (HMPRINT_FUN) erts_print; - atom_text_pos = NULL; - atom_text_end = NULL; - reserved_atom_space = 0; - atom_space = 0; - text_list = NULL; - erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table, "atom_tab", ATOM_SIZE, erts_atom_table_size, f); - more_atom_space(); - /* Ordinary atoms */ + /* Ordinary atoms. a is a template for creating an entry in the atom table */ for (i = 0; erl_atom_names[i] != 0; i++) { int ix; a.len = sys_strlen(erl_atom_names[i]); a.latin1_chars = a.len; - a.name = (byte*)erl_atom_names[i]; + a.u.name = (byte*)erl_atom_names[i]; a.slot.index = i; + + #ifdef DEBUG /* Verify 7-bit ascii */ for (ix = 0; ix < a.len; ix++) { - ASSERT((a.name[ix] & 0x80) == 0); + ASSERT((a.u.name[ix] & 0x80) == 0); } #endif ix = index_put(&erts_atom_table, (void*) &a); - atom_text_pos -= a.len; - atom_space -= a.len; - atom_tab(ix)->name = (byte*)erl_atom_names[i]; + (void) ix; + /* Assert that the entry in the atom table is not a template */ + ASSERT(erts_atom_get_name(atom_tab(ix))); } } diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 681bd4586f06..ff05bc35d2cf 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -50,7 +50,10 @@ typedef struct atom { Sint16 len; /* length of atom name (UTF-8 encoded) */ Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */ int ord0; /* ordinal value of first 3 bytes + 7 bits */ - byte* name; /* name of atom */ + union{ + byte* name; /* name of atom, used by templates */ + Eterm bin; /* name of atom, used when atom is in table*/ + } u; } Atom; extern IndexTable erts_atom_table; @@ -59,6 +62,8 @@ ERTS_GLB_INLINE Atom* atom_tab(Uint i); ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term); ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1); +const byte *erts_atom_get_name(const Atom *atom); + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Atom* atom_tab(Uint i) @@ -73,7 +78,7 @@ ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term) return 0; a = atom_tab(atom_val(term)); return (len == (size_t) a->len - && sys_memcmp((void *) a->name, (void *) text, len) == 0); + && sys_memcmp((void *) erts_atom_get_name(a), (void *) text, len) == 0); } ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1) @@ -87,7 +92,7 @@ ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1) return 0; a = atom_tab(atom_val(term)); len = a->len; - aname = a->name; + aname = erts_atom_get_name(a); if (is_latin1) { for (i = 0; i < len; s++) { if (aname[i] < 0x80) { diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 9d6fd75807d3..d48292c2ae2c 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -2929,10 +2929,10 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1) BIF_RET(NIL); /* the empty atom */ ares = - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); (void)ares; - res = erts_utf8_to_list(BIF_P, num_chars, ap->name, ap->len, ap->len, + res = erts_utf8_to_list(BIF_P, num_chars, erts_atom_get_name(ap), ap->len, ap->len, &num_built, &num_eaten, NIL); ASSERT(num_built == num_chars); ASSERT(num_eaten == ap->len); diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c index cc0535de5db1..920fa81f70b7 100644 --- a/erts/emulator/beam/dist.c +++ b/erts/emulator/beam/dist.c @@ -838,7 +838,7 @@ int is_node_name_atom(Eterm a) return 0; i = atom_val(a); ASSERT((i > 0) && (i < atom_table_size()) && (atom_tab(i) != NULL)); - return is_node_name((char*)atom_tab(i)->name, atom_tab(i)->len); + return is_node_name((char*)erts_atom_get_name(atom_tab(i)), atom_tab(i)->len); } static void diff --git a/erts/emulator/beam/erl_bif_ddll.c b/erts/emulator/beam/erl_bif_ddll.c index 9279b5cce743..15f37e3fd5b7 100644 --- a/erts/emulator/beam/erl_bif_ddll.c +++ b/erts/emulator/beam/erl_bif_ddll.c @@ -1725,7 +1725,7 @@ static int errdesc_to_code(Eterm errdesc, int *code /* out */) for (i = 0; errcode_tab[i].atm != NULL; ++i) { int len = sys_strlen(errcode_tab[i].atm); if (len == ap->len && - !sys_strncmp(errcode_tab[i].atm,(char *) ap->name,len)) { + !sys_strncmp(errcode_tab[i].atm,(char *) erts_atom_get_name(ap),len)) { *code = errcode_tab[i].code; return 0; } @@ -1799,7 +1799,7 @@ static char *pick_list_or_atom(Eterm name_term) goto error; } name = erts_alloc(ERTS_ALC_T_DDLL_TMP_BUF, ap->len + 1); - sys_memcpy(name,ap->name,ap->len); + sys_memcpy(name,erts_atom_get_name(ap),ap->len); name[ap->len] = '\0'; } else { if (erts_iolist_size(name_term, &name_len)) { diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index c0536012f94e..08cd96ee652e 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -2703,7 +2703,7 @@ c_compiler_used(Eterm **hpp, Uint *szp) static int is_snif_term(Eterm module_atom) { int i; Atom *a = atom_tab(atom_val(module_atom)); - char *aname = (char *) a->name; + char *aname = (char *) erts_atom_get_name(a); /* if a->name has a '.' then the bif (snif) is bogus i.e a package */ for (i = 0; i < a->len; i++) { diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c index 91554f65f7f9..38072f3fcd73 100644 --- a/erts/emulator/beam/erl_bif_re.c +++ b/erts/emulator/beam/erl_bif_re.c @@ -1024,7 +1024,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code) } } ASSERT(tmpb != NULL); - sys_memcpy(tmpb,ap->name,ap->len); + sys_memcpy(tmpb,erts_atom_get_name(ap),ap->len); tmpb[ap->len] = '\0'; } else { ErlDrvSizeT slen; diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index b7666a0eb209..43526995b112 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -3851,13 +3851,13 @@ bool db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b) int db_is_variable(Eterm obj) { - byte *b; + const byte *b; int n; int N; if (is_not_atom(obj)) return -1; - b = atom_tab(atom_val(obj))->name; + b = erts_atom_get_name(atom_tab(atom_val(obj))); if ((n = atom_tab(atom_val(obj))->len) < 2) return -1; if (*b++ != '$') diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index d9c61182a36e..2563d40ad563 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1799,9 +1799,9 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len, return 0; } if (ap->latin1_chars == ap->len) { - sys_memcpy(buf, ap->name, ap->len); + sys_memcpy(buf, erts_atom_get_name(ap), ap->len); } else { - int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len); + int dlen = erts_utf8_to_latin1((byte*)buf, erts_atom_get_name(ap), ap->len); ASSERT(dlen == ap->latin1_chars); (void)dlen; } buf[ap->latin1_chars] = '\0'; @@ -1810,7 +1810,7 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len, if (ap->len >= len) { return 0; } - sys_memcpy(buf, ap->name, ap->len); + sys_memcpy(buf, erts_atom_get_name(ap), ap->len); buf[ap->len] = '\0'; return ap->len + 1; } @@ -4480,8 +4480,8 @@ void erts_print_nif_taints(fmtfn_t to, void* to_arg) t = (struct tainted_module_t*) erts_atomic_read_nob(&first_taint); for ( ; t; t = t->next) { - const Atom* atom = atom_tab(atom_val(t->module_atom)); - erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, atom->name); + Atom* atom = atom_tab(atom_val(t->module_atom)); + erts_cbprintf(to,to_arg,"%s%.*s", delim, atom->len, erts_atom_get_name(atom)); delim = ","; } erts_cbprintf(to,to_arg,"\n"); diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index a96357f6b0f7..761d40b775ad 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -254,7 +254,7 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) return res; } - s = entry->name; + s = erts_atom_get_name(entry); length = entry->len; *dcount -= entry->len; diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index 8648d72f9046..89e7865bc77e 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -323,7 +323,7 @@ dump_element(fmtfn_t to, void *to_arg, Eterm x) erts_print(to, to_arg, "H" PTR_FMT, boxed_val(x)); } else if (is_immed(x)) { if (is_atom(x)) { - unsigned char* s = atom_tab(atom_val(x))->name; + const byte* s = erts_atom_get_name(atom_tab(atom_val(x))); int len = atom_tab(atom_val(x))->len; int i; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 970ca89bd9c0..9e2924946d0a 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1347,7 +1347,7 @@ static Eterm do_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, num_built, num_eaten, tail); } -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail) { return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail); @@ -1366,7 +1366,7 @@ Uint erts_atom_to_string_length(Eterm atom) const byte* err_pos; Uint num_chars; int ares = - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); (void)ares; return num_chars; @@ -1380,7 +1380,7 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail) ASSERT(is_atom(atom)); ap = atom_tab(atom_val(atom)); if (ap->latin1_chars >= 0) - return buf_to_intlist(hpp, (char*)ap->name, ap->len, tail); + return buf_to_intlist(hpp, (char*)erts_atom_get_name(ap), ap->len, tail); else { Eterm res; const byte* err_pos; @@ -1389,10 +1389,10 @@ Eterm erts_atom_to_string(Eterm **hpp, Eterm atom, Eterm tail) Eterm *hp_start = *hpp; int ares = #endif - erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + erts_analyze_utf8(erts_atom_get_name(ap), ap->len, &err_pos, &num_chars, NULL); ASSERT(ares == ERTS_UTF8_OK); - res = erts_make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len, + res = erts_make_list_from_utf8_buf(hpp, num_chars, erts_atom_get_name(ap), ap->len, &num_built, &num_eaten, tail); ASSERT(num_built == num_chars); @@ -1924,26 +1924,23 @@ BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2) ap = atom_tab(atom_val(BIF_ARG_1)); if (BIF_ARG_2 == am_latin1) { - Eterm bin_term; - + Eterm bin_term; if (ap->latin1_chars < 0) { goto error; } if (ap->latin1_chars == ap->len) { - bin_term = erts_new_binary_from_data(BIF_P, ap->len, ap->name); + BIF_RET(ap->u.bin); } else { byte* bin_p; int dbg_sz; - bin_term = erts_new_binary(BIF_P, ap->latin1_chars, &bin_p); - dbg_sz = erts_utf8_to_latin1(bin_p, ap->name, ap->len); + dbg_sz = erts_utf8_to_latin1(bin_p, erts_atom_get_name(ap), ap->len); ASSERT(dbg_sz == ap->latin1_chars); (void)dbg_sz; + BIF_RET(bin_term); } - - BIF_RET(bin_term); } else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) { - BIF_RET(erts_new_binary_from_data(BIF_P, ap->len, ap->name)); + BIF_RET(ap->u.bin); } else { error: BIF_ERROR(BIF_P, BADARG); @@ -2233,12 +2230,13 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) need = 2* ap->latin1_chars; } else { + const byte * name = erts_atom_get_name(ap); for (i = 0; i < ap->len; ) { - if (ap->name[i] < 0x80) { + if (name[i] < 0x80) { i++; - } else if (ap->name[i] < 0xE0) { + } else if (name[i] < 0xE0) { i += 2; - } else if (ap->name[i] < 0xF0) { + } else if (name[i] < 0xF0) { i += 3; } else { need = -1; @@ -2256,7 +2254,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) * the middle of filenames */ if (need > 0) { - byte *name = ap->name; + const byte *name = erts_atom_get_name(ap); int len = ap->len; for (i = 0; i < len; i++) { if (name[i] == 0) { @@ -2398,33 +2396,33 @@ void erts_native_filename_put(Eterm ioterm, int encoding, byte *p) switch (encoding) { case ERL_FILENAME_LATIN1: for (i = 0; i < ap->len; i++) { - if (ap->name[i] < 0x80) { - *p++ = ap->name[i]; + if (erts_atom_get_name(ap)[i] < 0x80) { + *p++ = erts_atom_get_name(ap)[i]; } else { - ASSERT(ap->name[i] < 0xC4); - *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F); + ASSERT(erts_atom_get_name(ap)[i] < 0xC4); + *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F); i++; } } break; case ERL_FILENAME_UTF8_MAC: case ERL_FILENAME_UTF8: - sys_memcpy(p, ap->name, ap->len); + sys_memcpy(p, erts_atom_get_name(ap), ap->len); break; case ERL_FILENAME_WIN_WCHAR: for (i = 0; i < ap->len; i++) { /* Little endian */ - if (ap->name[i] < 0x80) { - *p++ = ap->name[i]; + if (erts_atom_get_name(ap)[i] < 0x80) { + *p++ = erts_atom_get_name(ap)[i]; *p++ = 0; - } else if (ap->name[i] < 0xE0) { - *p++ = ((ap->name[i] & 3) << 6) | (ap->name[i+1] & 0x3F); - *p++ = ((ap->name[i] & 0x1C) >> 2); + } else if (erts_atom_get_name(ap)[i] < 0xE0) { + *p++ = ((erts_atom_get_name(ap)[i] & 3) << 6) | (erts_atom_get_name(ap)[i+1] & 0x3F); + *p++ = ((erts_atom_get_name(ap)[i] & 0x1C) >> 2); i++; } else { - ASSERT(ap->name[i] < 0xF0); - *p++ = ((ap->name[i+1] & 3) << 6) | (ap->name[i+2] & 0x3C); - *p++ = ((ap->name[i] & 0xF) << 4) | ((ap->name[i+1] & 0x3C) >> 2); + ASSERT(erts_atom_get_name(ap)[i] < 0xF0); + *p++ = ((erts_atom_get_name(ap)[i+1] & 3) << 6) | (erts_atom_get_name(ap)[i+2] & 0x3C); + *p++ = ((erts_atom_get_name(ap)[i] & 0xF) << 4) | ((erts_atom_get_name(ap)[i+1] & 0x3C) >> 2); i += 2; } } diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index e29555e0de47..d6ebc1c41dde 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -192,7 +192,7 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { Atom *aa = atom_tab(atom_val(a)); Atom *bb = atom_tab(atom_val(b)); - byte *name_a, *name_b; + const byte *name_a, *name_b; int len_a, len_b, diff; diff = aa->ord0 - bb->ord0; @@ -201,8 +201,8 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { return diff; } - name_a = &aa->name[3]; - name_b = &bb->name[3]; + name_a = &erts_atom_get_name(aa)[3]; + name_b = &erts_atom_get_name(bb)[3]; len_a = aa->len-3; len_b = bb->len-3; diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c index cfa51387b81c..d815235eacb3 100644 --- a/erts/emulator/beam/external.c +++ b/erts/emulator/beam/external.c @@ -591,7 +591,7 @@ Sint erts_encode_ext_dist_header_finalize(ErtsDistOutputBuf* ob, a = atom_tab(atom_val(atom)); sz = a->len; ep -= sz; - sys_memcpy((void *) ep, (void *) a->name, sz); + sys_memcpy((void *) ep, (void *) erts_atom_get_name(a), sz); if (long_atoms) { ep -= 2; put_int16(sz, ep); @@ -2910,16 +2910,16 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags) put_int8(len, ep); ep += 1; } - sys_memcpy((char *) ep, (char *) a->name, len); + sys_memcpy((char *) ep, (char *) erts_atom_get_name(a), len); } else { if (a->latin1_chars <= 255 && (dflags & DFLAG_SMALL_ATOM_TAGS)) { *ep++ = SMALL_ATOM_EXT; if (len == a->latin1_chars) { - sys_memcpy(ep+1, a->name, len); + sys_memcpy(ep+1, erts_atom_get_name(a), len); } else { - len = erts_utf8_to_latin1(ep+1, a->name, len); + len = erts_utf8_to_latin1(ep+1, erts_atom_get_name(a), len); ASSERT(len == a->latin1_chars); } put_int8(len, ep); @@ -2928,10 +2928,10 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint64 dflags) else { *ep++ = ATOM_EXT; if (len == a->latin1_chars) { - sys_memcpy(ep+2, a->name, len); + sys_memcpy(ep+2, erts_atom_get_name(a), len); } else { - len = erts_utf8_to_latin1(ep+2, a->name, len); + len = erts_utf8_to_latin1(ep+2, erts_atom_get_name(a), len); ASSERT(len == a->latin1_chars); } put_int16(len, ep); diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 218aea136a57..dce7e2956c77 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1491,7 +1491,7 @@ char *erts_convert_filename_to_wchar(const byte* bytes, Uint size, ErtsAlcType_t alloc_type, Sint* used, Uint extra_wchars); Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes); -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail); Eterm erts_make_list_from_utf8_buf(Eterm **hpp, Uint num, diff --git a/erts/emulator/beam/jit/beam_jit_common.cpp b/erts/emulator/beam/jit/beam_jit_common.cpp index aac423948518..fde678606ffa 100644 --- a/erts/emulator/beam/jit/beam_jit_common.cpp +++ b/erts/emulator/beam/jit/beam_jit_common.cpp @@ -39,7 +39,7 @@ extern "C" static std::string getAtom(Eterm atom) { Atom *ap = atom_tab(atom_val(atom)); - return std::string((char *)ap->name, ap->len); + return std::string((char *)erts_atom_get_name(ap), ap->len); } BeamAssemblerCommon::BeamAssemblerCommon(BaseAssembler &assembler_) diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl index fd50e9e2bb53..196bb70cdce5 100644 --- a/erts/emulator/test/bif_SUITE.erl +++ b/erts/emulator/test/bif_SUITE.erl @@ -657,6 +657,12 @@ t_atom_to_binary(Config) when is_list(Config) -> <<>> = atom_to_binary('', unicode), <<127>> = atom_to_binary('\177', utf8), <<"abcdef">> = atom_to_binary(abcdef, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwe, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwer, utf8), + <<"qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq">> = + atom_to_binary(qwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerqwerq, utf8), HalfLongBin = atom_to_binary(HalfLongAtom, utf8), HalfLongBin = atom_to_binary(HalfLongAtom), LongAtomBin = atom_to_binary(LongAtom, utf8), diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 559b45262cd5..0b2e8e1e7a11 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -612,7 +612,8 @@ define etp-atom-1 else set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF] set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) set $etp_atom_1_quote = 1 # Check if atom has to be quoted if ($etp_atom_1_i > 0) @@ -642,7 +643,7 @@ define etp-atom-1 printf "'" end set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_atom_1_i > 0 etp-char-1 (*$etp_atom_1_p) '\'' set $etp_atom_1_p++ @@ -654,6 +655,25 @@ define etp-atom-1 end end +define etp-bitstring-data-1 +# Args: Eterm bitstring +# +# Non-reentrant +# +# Unbox and retrieve the binary data pointer from any bitstring + set $etp_bitstring_unboxed = ((Eterm*)(($arg0) & etp_ptr_mask)) + set $etp_bitstring_subtag = ($etp_bitstring_unboxed[0] & etp_header_subtag_mask) + if ($etp_bitstring_subtag == etp_sub_bits_subtag) + set $etp_bitstring_ptr = (ErlSubBits *) $etp_bitstring_unboxed + set $etp_bitstring_size = ($etp_bitstring_ptr)->end - ($etp_bitstring_ptr)->start + set $etp_bitstring_data = (byte *)(($etp_bitstring_ptr)->base_flags & ~(UWord)3) + else + set $etp_bitstring_ptr = (ErlHeapBits *) $etp_bitstring_unboxed + set $etp_bitstring_size = ($etp_bitstring_ptr)->size + set $etp_bitstring_data = (byte *)&($etp_bitstring_ptr)->data + end +end + define etp-string-to-atom # Args: (char*) null-terminated @@ -691,8 +711,10 @@ define etp-string-to-atom # search hash bucket list while $etp_p set $etp_i = 0 + etp-bitstring-data-1 ($etp_p)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_i < $etp_p->len && ($arg0)[$etp_i] - if $etp_p->name[$etp_i] != ($arg0)[$etp_i] + if $etp_atom_1_p[$etp_i] != ($arg0)[$etp_i] loop_break end set $etp_i++ @@ -2094,7 +2116,8 @@ define etp-term-dump-atom # Args: atom term set $etp_atom_1_ap = (Atom*)erts_atom_table.seg_table[(Eterm)($arg0)>>16][((Eterm)($arg0)>>6)&0x3FF] set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + etp-bitstring-data-1 ($etp_atom_1_ap)->u.bin + set $etp_atom_1_p = ($etp_bitstring_data) set $etp_atom_1_quote = 1 set $etp_atom_indent = 13 @@ -2142,7 +2165,7 @@ define etp-term-dump-atom printf "'" end set $etp_atom_1_i = ($etp_atom_1_ap)->len - set $etp_atom_1_p = ($etp_atom_1_ap)->name + set $etp_atom_1_p = ($etp_bitstring_data) while $etp_atom_1_i > 0 etp-char-1 (*$etp_atom_1_p) '\'' set $etp_atom_1_p++