From 56498cdc876ddeb0f41baec8b9596f92fe0334da Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 1 Jan 2025 12:45:40 -0500 Subject: [PATCH 1/2] base/strings: Add documentation to unicode conversion table --- base/strings/unicode.jl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index fcb4a371e9898..cb50e87f69f88 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -174,14 +174,19 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch return String(resize!(buffer, nbytes)) end -# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib +# Array of {original codepoint, replacement codepoint} normalizations +# to perform on Julia identifiers, to canonicalize characters that +# are both easily confused and easily inputted by accident. +# +# Important: when this table is updated, also update the corresponding table +# in src/flisp/julia_charmap.h const _julia_charmap = Dict{UInt32,UInt32}( - 0x025B => 0x03B5, - 0x00B5 => 0x03BC, - 0x00B7 => 0x22C5, - 0x0387 => 0x22C5, - 0x2212 => 0x002D, - 0x210F => 0x0127, + 0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon + 0x00B5 => 0x03BC, # micro sign -> greek small letter mu + 0x00B7 => 0x22C5, # middot char -> dot operator (#25098) + 0x0387 => 0x22C5, # Greek interpunct -> dot operator (#25098) + 0x2212 => 0x002D, # minus -> hyphen-minus (#26193) + 0x210F => 0x0127, # hbar -> small letter h with stroke (#48870) ) utf8proc_map(s::AbstractString, flags::Integer, chartransform::F = identity) where F = utf8proc_map(String(s), flags, chartransform) From 7535b583f15b65c4acfcba675f7f8781e887c69f Mon Sep 17 00:00:00 2001 From: Erik Schnetter Date: Wed, 1 Jan 2025 13:53:50 -0500 Subject: [PATCH 2/2] Update base/strings/unicode.jl Co-authored-by: Neven Sajko <4944410+nsajko@users.noreply.github.com> --- base/strings/unicode.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl index cb50e87f69f88..f2938ba6021f2 100644 --- a/base/strings/unicode.jl +++ b/base/strings/unicode.jl @@ -174,12 +174,14 @@ function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, ch return String(resize!(buffer, nbytes)) end -# Array of {original codepoint, replacement codepoint} normalizations -# to perform on Julia identifiers, to canonicalize characters that -# are both easily confused and easily inputted by accident. -# -# Important: when this table is updated, also update the corresponding table -# in src/flisp/julia_charmap.h +""" +`Dict` of `original codepoint => replacement codepoint` normalizations +to perform on Julia identifiers, to canonicalize characters that +are both easily confused and easily inputted by accident. + +!!! warning + When this table is updated, also update the corresponding table in `src/flisp/julia_charmap.h`. +""" const _julia_charmap = Dict{UInt32,UInt32}( 0x025B => 0x03B5, # latin small letter open e -> greek small letter epsilon 0x00B5 => 0x03BC, # micro sign -> greek small letter mu