diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 2b848ee27493..c4a37f9719a3 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -115,7 +115,7 @@ atom_hash(Atom* obj) return h; } -byte *erts_atom_get_name(Atom *atom) +const byte *erts_atom_get_name(const Atom *atom) { byte *name; Uint size; diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index 00b61a8b2a42..ff05bc35d2cf 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -62,7 +62,7 @@ ERTS_GLB_INLINE Atom* atom_tab(Uint i); ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term); ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1); -byte *erts_atom_get_name(Atom *atom); +const byte *erts_atom_get_name(const Atom *atom); #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Atom* diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c index bd821f1cafed..43526995b112 100644 --- a/erts/emulator/beam/erl_db_util.c +++ b/erts/emulator/beam/erl_db_util.c @@ -3851,7 +3851,7 @@ bool db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b) int db_is_variable(Eterm obj) { - byte *b; + const byte *b; int n; int N; diff --git a/erts/emulator/beam/erl_printf_term.c b/erts/emulator/beam/erl_printf_term.c index e4c086089f36..761d40b775ad 100644 --- a/erts/emulator/beam/erl_printf_term.c +++ b/erts/emulator/beam/erl_printf_term.c @@ -188,36 +188,34 @@ static int is_printable_ascii(byte* bytep, Uint bytesize, Uint bitoffs) * valid character, the offset is updated to point to the next character. size * is only used for debugging. */ -static ERTS_INLINE int utf8_decode(const unsigned char *text, int *offset, int size) +static ERTS_INLINE int utf8_decode(const byte *text, int *offset, int size) { - int c1 = text[*offset]; + int component = text[*offset]; int codepoint = 0; int length = 0; - if ((c1 & 0x80) == 0) { - codepoint = c1; + if ((component & 0x80) == 0) { + codepoint = component; length = 1; - } else if ((c1 & 0xE0) == 0xC0) { - codepoint = c1 & 0x1F; + } else if ((component & 0xE0) == 0xC0) { + codepoint = component & 0x1F; length = 2; - } else if ((c1 & 0xF0) == 0xE0) { - codepoint = c1 & 0x0F; + } else if ((component & 0xF0) == 0xE0) { + codepoint = component & 0x0F; length = 3; - } else if ((c1 & 0xF8) == 0xF0) { - codepoint = c1 & 0x07; - length = 4; } else { - /* Invalid first byte */ - ASSERT(length != 0); - } + ASSERT((component & 0xF8) == 0xF0); + codepoint = component & 0x07; + length = 4; + } /* Assert that there are enough bytes for decoding */ ASSERT(*offset + length <= size); for (int i = 1; i < length; i++) { - c1 = text[*offset + i]; - ASSERT((c1 & 0xC0) == 0x80); - codepoint = (codepoint << 6) | (c1 & 0x3F); + component = text[*offset + i]; + ASSERT((component & 0xC0) == 0x80); + codepoint = (codepoint << 6) | (component & 0x3F); } *offset += length; @@ -233,31 +231,35 @@ static ERTS_INLINE int utf8_decode(const unsigned char *text, int *offset, int s */ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) { - int n, i; + int length, i; + const Atom *entry; int res; int need_quote; int pos; - byte *s; - byte *cpos; - int c; - int lc; + const byte *s; + int codepoint; res = 0; i = atom_val(atom); + entry = NULL; - if ((i < 0) || (i >= atom_table_size()) || (atom_tab(i) == NULL)) { - PRINT_STRING(res, fn, arg, "'); - return res; + if ((i > 0) || (i <= atom_table_size()) ) { + entry = atom_tab(i); + } + + if (entry == NULL) { + PRINT_STRING(res, fn, arg, "'); + return res; } - s = erts_atom_get_name(atom_tab(i)); - n = atom_tab(i)->len; + s = erts_atom_get_name(entry); + length = entry->len; - *dcount -= atom_tab(i)->len; + *dcount -= entry->len; - if (n == 0) { + if (length == 0) { /* The empty atom: '' */ PRINT_STRING(res, fn, arg, "''"); return res; @@ -274,19 +276,12 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) * the Latin-1 code block or the character '_'. */ - need_quote = 0; pos = 0; - lc = utf8_decode(s, &pos, n); - if (!IS_LOWER(lc)) - need_quote++; - else { - while (pos < n) { - lc = utf8_decode(s, &pos, n); - if (!IS_ALNUM(lc) && lc != '_') { - need_quote++; - break; - } - } + codepoint = utf8_decode(s, &pos, length); + need_quote = !IS_LOWER(codepoint); + while (pos < length && !need_quote) { + codepoint = utf8_decode(s, &pos, length); + need_quote = !IS_ALNUM(codepoint) && codepoint != '_'; } /* @@ -296,58 +291,32 @@ static int print_atom_name(fmtfn_t fn, void* arg, Eterm atom, long *dcount) * be specially printed. Therefore, we must do a partial * decoding of the utf8 encoding. */ - cpos = s; - pos = n; + pos = 0; if (need_quote) PRINT_CHAR(res, fn, arg, '\''); - while(pos--) { - c = *cpos++; - switch(c) { - case '\'': PRINT_STRING(res, fn, arg, "\\'"); break; - case '\\': PRINT_STRING(res, fn, arg, "\\\\"); break; - case '\n': PRINT_STRING(res, fn, arg, "\\n"); break; - case '\f': PRINT_STRING(res, fn, arg, "\\f"); break; - case '\t': PRINT_STRING(res, fn, arg, "\\t"); break; - case '\r': PRINT_STRING(res, fn, arg, "\\r"); break; - case '\b': PRINT_STRING(res, fn, arg, "\\b"); break; - case '\v': PRINT_STRING(res, fn, arg, "\\v"); break; - default: - if (c < ' ') { - /* ASCII control character (0-31). */ - PRINT_CHAR(res, fn, arg, '\\'); - PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) c); - } else if (c >= 0x80) { - /* A multi-byte utf8-encoded code point. Determine the - * length of the sequence. */ - int n; - if ((c & 0xE0) == 0xC0) { - n = 2; - } else if ((c & 0xF0) == 0xE0) { - n = 3; - } else { - ASSERT((c & 0xF8) == 0xF0); - n = 4; - } - ASSERT(pos - n + 1 >= 0); - - if (c == 0xC2 && *cpos < 0xA0) { - /* Extended ASCII control character (128-159). */ - ASSERT(pos > 0); - ASSERT(0x80 <= *cpos); - PRINT_CHAR(res, fn, arg, '\\'); - PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) *cpos); - pos--, cpos++; - } else { - PRINT_BUF(res, fn, arg, cpos-1, n); - cpos += n - 1; - pos -= n - 1; - } + + while(pos < length) { + int cp_start = pos; + codepoint = utf8_decode(s, &pos, length); + switch(codepoint) { + case '\'': PRINT_STRING(res, fn, arg, "\\'"); break; + case '\\': PRINT_STRING(res, fn, arg, "\\\\"); break; + case '\n': PRINT_STRING(res, fn, arg, "\\n"); break; + case '\f': PRINT_STRING(res, fn, arg, "\\f"); break; + case '\t': PRINT_STRING(res, fn, arg, "\\t"); break; + case '\r': PRINT_STRING(res, fn, arg, "\\r"); break; + case '\b': PRINT_STRING(res, fn, arg, "\\b"); break; + case '\v': PRINT_STRING(res, fn, arg, "\\v"); break; + default: + if (codepoint < 32 || (codepoint >= 128 && codepoint <= 159)) { + /* ASCII control character (0-31) or extended ASCII control character (128-159)*/ + PRINT_CHAR(res, fn, arg, '\\'); + PRINT_UWORD(res, fn, arg, 'o', 1, 3, (ErlPfUWord) codepoint); } else { - /* Printable ASCII character. */ - PRINT_CHAR(res, fn, arg, (char) c); + PRINT_BUF(res, fn, arg, &s[cp_start], pos - cp_start); } - break; - } + break; + } } if (need_quote) PRINT_CHAR(res, fn, arg, '\''); diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c index 7e9f079b379a..89e7865bc77e 100644 --- a/erts/emulator/beam/erl_process_dump.c +++ b/erts/emulator/beam/erl_process_dump.c @@ -323,7 +323,7 @@ dump_element(fmtfn_t to, void *to_arg, Eterm x) erts_print(to, to_arg, "H" PTR_FMT, boxed_val(x)); } else if (is_immed(x)) { if (is_atom(x)) { - unsigned char* s = erts_atom_get_name(atom_tab(atom_val(x))); + const byte* s = erts_atom_get_name(atom_tab(atom_val(x))); int len = atom_tab(atom_val(x))->len; int i; diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 3ac3924e99e9..9e2924946d0a 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1347,7 +1347,7 @@ static Eterm do_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, num_built, num_eaten, tail); } -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail) { return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail); @@ -2230,12 +2230,13 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) need = 2* ap->latin1_chars; } else { + const byte * name = erts_atom_get_name(ap); for (i = 0; i < ap->len; ) { - if (erts_atom_get_name(ap)[i] < 0x80) { + if (name[i] < 0x80) { i++; - } else if (erts_atom_get_name(ap)[i] < 0xE0) { + } else if (name[i] < 0xE0) { i += 2; - } else if (erts_atom_get_name(ap)[i] < 0xF0) { + } else if (name[i] < 0xF0) { i += 3; } else { need = -1; @@ -2253,7 +2254,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) * the middle of filenames */ if (need > 0) { - byte *name = erts_atom_get_name(ap); + const byte *name = erts_atom_get_name(ap); int len = ap->len; for (i = 0; i < len; i++) { if (name[i] == 0) { diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h index 2cbfdf9ba8d2..d6ebc1c41dde 100644 --- a/erts/emulator/beam/erl_utils.h +++ b/erts/emulator/beam/erl_utils.h @@ -192,7 +192,7 @@ ERTS_GLB_INLINE int erts_cmp_atoms(Eterm a, Eterm b) { Atom *aa = atom_tab(atom_val(a)); Atom *bb = atom_tab(atom_val(b)); - byte *name_a, *name_b; + const byte *name_a, *name_b; int len_a, len_b, diff; diff = aa->ord0 - bb->ord0; diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index 218aea136a57..dce7e2956c77 100644 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1491,7 +1491,7 @@ char *erts_convert_filename_to_wchar(const byte* bytes, Uint size, ErtsAlcType_t alloc_type, Sint* used, Uint extra_wchars); Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes); -Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, +Eterm erts_utf8_to_list(Process *p, Uint num, const byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail); Eterm erts_make_list_from_utf8_buf(Eterm **hpp, Uint num,