From 3aab8903097b85dcdbfd4e2638fa629882ed4136 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Mon, 5 Jun 2023 15:52:18 -0400 Subject: [PATCH 1/3] Don't mangle question marks specially --- hy/reader/mangling.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/hy/reader/mangling.py b/hy/reader/mangling.py index ce9f85822..e71e6d4cb 100644 --- a/hy/reader/mangling.py +++ b/hy/reader/mangling.py @@ -35,19 +35,15 @@ def mangle(s): if "." in s and s.strip("."): return ".".join(mangle(x) if x else "" for x in s.split(".")) - # Step 1: Remove and save leading underscores + # Remove and save leading underscores s2 = s.lstrip(normalizes_to_underscore) leading_underscores = "_" * (len(s) - len(s2)) s = s2 - # Step 2: Convert hyphens without introducing a new leading underscore + # Convert hyphens without introducing a new leading underscore s = s[0] + s[1:].replace("-", "_") if s else s - # Step 3: Convert trailing `?` to leading `is_` - if s.endswith("?"): - s = "is_" + s[:-1] - - # Step 4: Convert invalid characters or reserved words + # Convert invalid characters or reserved words if not (leading_underscores + s).isidentifier(): # Replace illegal characters with their Unicode character # names, or hexadecimal if they don't have one. @@ -126,8 +122,6 @@ def unmangle(s): ), s[len("hyx_") :], ) - if s.startswith("is_"): - s = s[len("is_") :] + "?" s = s.replace("_", "-") return prefix + s + suffix From 8539d956b29883966ae91f923e6e8e327b879e87 Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Mon, 5 Jun 2023 15:53:23 -0400 Subject: [PATCH 2/3] Update the tests for the mangling change --- tests/native_tests/mangling.hy | 42 ++++++++++++++-------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/tests/native_tests/mangling.hy b/tests/native_tests/mangling.hy index 64c896841..5f361c8e7 100644 --- a/tests/native_tests/mangling.hy +++ b/tests/native_tests/mangling.hy @@ -28,8 +28,6 @@ (assert (= (hy.mangle "--__") "hyx_XhyphenHminusX___")) (assert (= (hy.mangle "__--") "__hyx_XhyphenHminusX_")) (assert (= (hy.mangle "__--__") "__hyx_XhyphenHminusX___")) - (assert (= (hy.mangle "--?") "hyx_is_XhyphenHminusX_")) - (assert (= (hy.mangle "__--?") "__hyx_is_XhyphenHminusX_")) ;; test unmangling choices (assert (= (hy.unmangle "hyx_XhyphenHminusX") "-")) @@ -49,12 +47,10 @@ (defn test-question-mark [] + ; Once treated specially, but no more. (setv foo? "nachos") (assert (= foo? "nachos")) - (assert (= is_foo "nachos")) - (setv ___ab_cd? "tacos") - (assert (= ___ab_cd? "tacos")) - (assert (= ___is_ab_cd "tacos"))) + (assert (= hyx_fooXquestion_markX "nachos"))) (defn test-py-forbidden-ascii [] @@ -71,10 +67,7 @@ (assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove")) (setv ⚘-⚘ "doubleflower") (assert (= ⚘-⚘ "doubleflower")) - (assert (= hyx_XflowerX_XflowerX "doubleflower")) - (setv ⚘? "mystery") - (assert (= ⚘? "mystery")) - (assert (= hyx_is_XflowerX "mystery"))) + (assert (= hyx_XflowerX_XflowerX "doubleflower"))) (defn test-higher-unicode [] @@ -156,34 +149,33 @@ (defn test-keyword-args [] - (defn f [a a-b foo? ☘] - [a a-b foo? ☘]) - (assert (= (f :foo? 3 :☘ 4 :a 1 :a-b 2) [1 2 3 4])) - (assert (= (f :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2) [1 2 3 4])) + (defn f [a a-b ☘] + [a a-b ☘]) + (assert (= (f :☘ 3 :a 1 :a-b 2) [1 2 3])) + (assert (= (f :hyx_XshamrockX 3 :a 1 :a_b 2) [1 2 3])) (defn g [#** x] x) - (assert (= (g :foo? 3 :☘ 4 :a 1 :a-b 2) - {"a" 1 "a_b" 2 "is_foo" 3 "hyx_XshamrockX" 4})) - (assert (= (g :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2) - {"a" 1 "a_b" 2 "is_foo" 3 "hyx_XshamrockX" 4}))) + (assert (= (g :☘ 3 :a 1 :a-b 2) + {"a" 1 "a_b" 2 "hyx_XshamrockX" 3})) + (assert (= (g :hyx_XshamrockX 3 :a 1 :a_b 2) + {"a" 1 "a_b" 2 "hyx_XshamrockX" 3}))) (defn test-late-mangling [] ; Mangling should only happen during compilation. - (assert (!= 'foo? 'is_foo)) - (setv sym 'foo?) - (assert (= sym (hy.models.Symbol "foo?"))) - (assert (!= sym (hy.models.Symbol "is_foo"))) + (assert (!= 'foo-bar 'foo_bar)) + (setv sym 'foo-bar) + (assert (= sym (hy.models.Symbol "foo-bar"))) + (assert (!= sym (hy.models.Symbol "foo_bar"))) (setv out (hy.eval `(do (setv ~sym 10) - [foo? is_foo]))) + [foo-bar foo_bar]))) (assert (= out [10 10]))) (defn test-functions [] - (for [[a b] [["___ab-cd?" "___is_ab_cd"] - ["⚘-⚘" "hyx_XflowerX_XflowerX"]]] + (for [[a b] [["⚘-⚘" "hyx_XflowerX_XflowerX"]]] (assert (= (hy.mangle a) b)) (assert (= (hy.unmangle b) a)))) From 7aa4d467df4cd67986844eb5bda29686cd3829da Mon Sep 17 00:00:00 2001 From: Kodi Arfer Date: Mon, 5 Jun 2023 16:01:08 -0400 Subject: [PATCH 3/3] Update the docs and NEWS for the mangling change --- NEWS.rst | 2 ++ docs/syntax.rst | 19 ++++++------------- hy/pyops.hy | 2 +- hy/reader/mangling.py | 10 ++-------- 4 files changed, 11 insertions(+), 22 deletions(-) diff --git a/NEWS.rst b/NEWS.rst index 1136f216d..cbd903774 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -14,6 +14,8 @@ Breaking Changes Forms like `#*word` will attempt to dispatch a macro named `*word`; to unpack a symbol named `word`, write `#* word` (note the space). * Reader macro names are no longer mangled. +* Question marks (`?`) are no longer mangled specially, so `foo?` now + mangles to `hyx_fooXquestion_markX` instead of `is_foo`. * `hy2py`'s recursive mode now expects a module name as input, not any old directory. You must be in the parent directory of the module directory. diff --git a/docs/syntax.rst b/docs/syntax.rst index 6fd748221..5773cbb73 100644 --- a/docs/syntax.rst +++ b/docs/syntax.rst @@ -266,10 +266,6 @@ Python-legal names. The steps are as follows: underscore into the name. Thus ``--has-dashes?`` becomes ``-_has_dashes?`` at this step. -#. If the name ends with ASCII ``?``, remove it and prepend ``is_``. Thus, - ``tasty?`` becomes ``is_tasty`` and ``-_has_dashes?`` becomes - ``is_-_has_dashes``. - #. If the name still isn't Python-legal, make the following changes. A name could be Python-illegal because it contains a character that's never legal in a Python name or it contains a character that's illegal in that position. @@ -282,12 +278,11 @@ Python-legal names. The steps are as follows: code point in lowercase hexadecimal. Thus, ``green☘`` becomes ``hyx_greenXshamrockX`` and - ``is_-_has_dashes`` becomes ``hyx_is_XhyphenHminusX_has_dashes``. + ``-_has_dashes`` becomes ``hyx_XhyphenHminusX_has_dashes``. #. Take any leading underscores removed in the first step, transliterate them - to ASCII, and add them back to the mangled name. Thus, ``(hy.mangle - '_tasty?)`` is ``"_is_tasty"`` instead of ``"is__tasty"`` and ``(hy.mangle - '__-_has-dashes?)`` is ``"__hyx_is_XhyphenHminusX_has_dashes"``. + to ASCII, and add them back to the mangled name. Thus, ``__green☘`` becomes + ``__hyx_greenXshamrockX``. #. Finally, normalize any leftover non-ASCII characters. The result may still not be ASCII (e.g., ``α`` is already Python-legal and normalized, so it @@ -300,11 +295,9 @@ You can invoke the mangler yourself with the function :hy:func:`hy.mangle`, and Mangling isn't something you should have to think about often, but you may see mangled names in error messages, the output of ``hy2py``, etc. A catch to be aware of is that mangling, as well as the inverse "unmangling" operation -offered by :hy:func:`hy.unmangle`, isn't one-to-one. Two different symbols -can mangle to the same string and hence compile to the same Python variable. -The chief practical consequence of this is that (non-initial) ``-`` and ``_`` are -interchangeable under mangling, so you can't use e.g. ``foo-bar`` and -``foo_bar`` as separate variables. +offered by :hy:func:`hy.unmangle`, isn't one-to-one. Two different symbols, +like ``foo-bar`` and ``foo_bar``, can mangle to the same string and hence +compile to the same Python variable. .. _string-literals: diff --git a/hy/pyops.hy b/hy/pyops.hy index 34342d79c..95941d13a 100644 --- a/hy/pyops.hy +++ b/hy/pyops.hy @@ -6,7 +6,7 @@ to the names being the same: - ``==`` in Python is :hy:func:`= ` in Hy. - ``~`` in Python is :hy:func:`bnot ` in Hy. -- ``is not`` in Python is :hy:func:`is-not ` in Hy. +- ``is not`` in Python is :hy:func:`is-not ` in Hy. - ``not in`` in Python is :hy:func:`not-in ` in Hy. For Python's subscription expressions (like ``x[2]``), Hy has two named diff --git a/hy/reader/mangling.py b/hy/reader/mangling.py index e71e6d4cb..b12100dd0 100644 --- a/hy/reader/mangling.py +++ b/hy/reader/mangling.py @@ -11,7 +11,7 @@ def mangle(s): :hy:func:`hy.repr`) and convert it to a valid Python identifier according to :ref:`Hy's mangling rules `. :: - (hy.mangle 'foo-bar?) ; => "is_foo_bar" + (hy.mangle 'foo-bar) ; => "foo_bar" (hy.mangle "🦑") ; => "hyx_squid" If the stringified argument is already both legal as a Python identifier @@ -26,7 +26,7 @@ def mangle(s): `, and ``hy.mangle`` will mangle the dot-delimited parts separately. :: - (hy.mangle "a.b?.c!.d") ; => "a.is_b.hyx_cXexclamation_markX.d" + (hy.mangle "a.c!.d") ; => "a.hyx_cXexclamation_markX.d" """ assert s @@ -84,15 +84,9 @@ def unmangle(s): => (hy.unmangle 'foo_bar) "foo-bar" - => (hy.unmangle 'is_foo_bar) - "foo-bar?" - => (hy.unmangle 'hyx_XasteriskX) "*" - => (hy.unmangle '_hyx_is_fooXsolidusXa) - "_foo/a?" - => (hy.unmangle 'hyx_XhyphenHminusX_XgreaterHthan_signX) "-->"