Skip to content

Commit

Permalink
Merge pull request hylang#2453 from Kodiologist/qmark-mangle
Browse files Browse the repository at this point in the history
Don't mangle question marks specially
  • Loading branch information
Kodiologist authored Jun 9, 2023
2 parents 43eb9ee + 7aa4d46 commit 6bb0ee9
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 56 deletions.
2 changes: 2 additions & 0 deletions NEWS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Breaking Changes
Forms like `#*word` will attempt to dispatch a macro named `*word`;
to unpack a symbol named `word`, write `#* word` (note the space).
* Reader macro names are no longer mangled.
* Question marks (`?`) are no longer mangled specially, so `foo?` now
mangles to `hyx_fooXquestion_markX` instead of `is_foo`.
* `hy2py`'s recursive mode now expects a module name as input, not any
old directory. You must be in the parent directory of the module
directory.
Expand Down
19 changes: 6 additions & 13 deletions docs/syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,6 @@ Python-legal names. The steps are as follows:
underscore into the name. Thus ``--has-dashes?`` becomes ``-_has_dashes?``
at this step.

#. If the name ends with ASCII ``?``, remove it and prepend ``is_``. Thus,
``tasty?`` becomes ``is_tasty`` and ``-_has_dashes?`` becomes
``is_-_has_dashes``.

#. If the name still isn't Python-legal, make the following changes. A name
could be Python-illegal because it contains a character that's never legal
in a Python name or it contains a character that's illegal in that position.
Expand All @@ -282,12 +278,11 @@ Python-legal names. The steps are as follows:
code point in lowercase hexadecimal.

Thus, ``green☘`` becomes ``hyx_greenXshamrockX`` and
``is_-_has_dashes`` becomes ``hyx_is_XhyphenHminusX_has_dashes``.
``-_has_dashes`` becomes ``hyx_XhyphenHminusX_has_dashes``.

#. Take any leading underscores removed in the first step, transliterate them
to ASCII, and add them back to the mangled name. Thus, ``(hy.mangle
'_tasty?)`` is ``"_is_tasty"`` instead of ``"is__tasty"`` and ``(hy.mangle
'__-_has-dashes?)`` is ``"__hyx_is_XhyphenHminusX_has_dashes"``.
to ASCII, and add them back to the mangled name. Thus, ``__green☘`` becomes
``__hyx_greenXshamrockX``.

#. Finally, normalize any leftover non-ASCII characters. The result may still
not be ASCII (e.g., ``α`` is already Python-legal and normalized, so it
Expand All @@ -300,11 +295,9 @@ You can invoke the mangler yourself with the function :hy:func:`hy.mangle`, and
Mangling isn't something you should have to think about often, but you may see
mangled names in error messages, the output of ``hy2py``, etc. A catch to be
aware of is that mangling, as well as the inverse "unmangling" operation
offered by :hy:func:`hy.unmangle`, isn't one-to-one. Two different symbols
can mangle to the same string and hence compile to the same Python variable.
The chief practical consequence of this is that (non-initial) ``-`` and ``_`` are
interchangeable under mangling, so you can't use e.g. ``foo-bar`` and
``foo_bar`` as separate variables.
offered by :hy:func:`hy.unmangle`, isn't one-to-one. Two different symbols,
like ``foo-bar`` and ``foo_bar``, can mangle to the same string and hence
compile to the same Python variable.

.. _string-literals:

Expand Down
2 changes: 1 addition & 1 deletion hy/pyops.hy
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ to the names being the same:
- ``==`` in Python is :hy:func:`= <hy.pyops.=>` in Hy.
- ``~`` in Python is :hy:func:`bnot <hy.pyops.bnot>` in Hy.
- ``is not`` in Python is :hy:func:`is-not <hy.pyops.not?>` in Hy.
- ``is not`` in Python is :hy:func:`is-not <hy.pyops.is-not>` in Hy.
- ``not in`` in Python is :hy:func:`not-in <hy.pyops.not-in>` in Hy.
For Python's subscription expressions (like ``x[2]``), Hy has two named
Expand Down
22 changes: 5 additions & 17 deletions hy/reader/mangling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def mangle(s):
:hy:func:`hy.repr`) and convert it to a valid Python identifier according
to :ref:`Hy's mangling rules <mangling>`. ::
(hy.mangle 'foo-bar?) ; => "is_foo_bar"
(hy.mangle 'foo-bar) ; => "foo_bar"
(hy.mangle "🦑") ; => "hyx_squid"
If the stringified argument is already both legal as a Python identifier
Expand All @@ -26,7 +26,7 @@ def mangle(s):
<dotted-identifiers>`, and ``hy.mangle`` will mangle the dot-delimited
parts separately. ::
(hy.mangle "a.b?.c!.d") ; => "a.is_b.hyx_cXexclamation_markX.d"
(hy.mangle "a.c!.d") ; => "a.hyx_cXexclamation_markX.d"
"""

assert s
Expand All @@ -35,19 +35,15 @@ def mangle(s):
if "." in s and s.strip("."):
return ".".join(mangle(x) if x else "" for x in s.split("."))

# Step 1: Remove and save leading underscores
# Remove and save leading underscores
s2 = s.lstrip(normalizes_to_underscore)
leading_underscores = "_" * (len(s) - len(s2))
s = s2

# Step 2: Convert hyphens without introducing a new leading underscore
# Convert hyphens without introducing a new leading underscore
s = s[0] + s[1:].replace("-", "_") if s else s

# Step 3: Convert trailing `?` to leading `is_`
if s.endswith("?"):
s = "is_" + s[:-1]

# Step 4: Convert invalid characters or reserved words
# Convert invalid characters or reserved words
if not (leading_underscores + s).isidentifier():
# Replace illegal characters with their Unicode character
# names, or hexadecimal if they don't have one.
Expand Down Expand Up @@ -88,15 +84,9 @@ def unmangle(s):
=> (hy.unmangle 'foo_bar)
"foo-bar"
=> (hy.unmangle 'is_foo_bar)
"foo-bar?"
=> (hy.unmangle 'hyx_XasteriskX)
"*"
=> (hy.unmangle '_hyx_is_fooXsolidusXa)
"_foo/a?"
=> (hy.unmangle 'hyx_XhyphenHminusX_XgreaterHthan_signX)
"-->"
Expand Down Expand Up @@ -126,8 +116,6 @@ def unmangle(s):
),
s[len("hyx_") :],
)
if s.startswith("is_"):
s = s[len("is_") :] + "?"
s = s.replace("_", "-")

return prefix + s + suffix
42 changes: 17 additions & 25 deletions tests/native_tests/mangling.hy
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
(assert (= (hy.mangle "--__") "hyx_XhyphenHminusX___"))
(assert (= (hy.mangle "__--") "__hyx_XhyphenHminusX_"))
(assert (= (hy.mangle "__--__") "__hyx_XhyphenHminusX___"))
(assert (= (hy.mangle "--?") "hyx_is_XhyphenHminusX_"))
(assert (= (hy.mangle "__--?") "__hyx_is_XhyphenHminusX_"))

;; test unmangling choices
(assert (= (hy.unmangle "hyx_XhyphenHminusX") "-"))
Expand All @@ -49,12 +47,10 @@


(defn test-question-mark []
; Once treated specially, but no more.
(setv foo? "nachos")
(assert (= foo? "nachos"))
(assert (= is_foo "nachos"))
(setv ___ab_cd? "tacos")
(assert (= ___ab_cd? "tacos"))
(assert (= ___is_ab_cd "tacos")))
(assert (= hyx_fooXquestion_markX "nachos")))


(defn test-py-forbidden-ascii []
Expand All @@ -71,10 +67,7 @@
(assert (= (+ hyx_XflowerXab hyx_Xblack_heart_suitX) "flowerlove"))
(setv-"doubleflower")
(assert (= ⚘-⚘ "doubleflower"))
(assert (= hyx_XflowerX_XflowerX "doubleflower"))
(setv? "mystery")
(assert (= ⚘? "mystery"))
(assert (= hyx_is_XflowerX "mystery")))
(assert (= hyx_XflowerX_XflowerX "doubleflower")))


(defn test-higher-unicode []
Expand Down Expand Up @@ -156,34 +149,33 @@

(defn test-keyword-args []

(defn f [a a-b foo? ☘]
[a a-b foo? ☘])
(assert (= (f :foo? 3 :4 :a 1 :a-b 2) [1 2 3 4]))
(assert (= (f :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2) [1 2 3 4]))
(defn f [a a-b ☘]
[a a-b ☘])
(assert (= (f : 3 :a 1 :a-b 2) [1 2 3]))
(assert (= (f :hyx_XshamrockX 3 :a 1 :a_b 2) [1 2 3]))

(defn g [#** x]
x)
(assert (= (g :foo? 3 :☘ 4 :a 1 :a-b 2)
{"a" 1 "a_b" 2 "is_foo" 3 "hyx_XshamrockX" 4}))
(assert (= (g :is_foo 3 :hyx_XshamrockX 4 :a 1 :a_b 2)
{"a" 1 "a_b" 2 "is_foo" 3 "hyx_XshamrockX" 4})))
(assert (= (g :3 :a 1 :a-b 2)
{"a" 1 "a_b" 2 "hyx_XshamrockX" 3}))
(assert (= (g :hyx_XshamrockX 3 :a 1 :a_b 2)
{"a" 1 "a_b" 2 "hyx_XshamrockX" 3})))


(defn test-late-mangling []
; Mangling should only happen during compilation.
(assert (!= 'foo? 'is_foo))
(setv sym 'foo?)
(assert (= sym (hy.models.Symbol "foo?")))
(assert (!= sym (hy.models.Symbol "is_foo")))
(assert (!= 'foo-bar 'foo_bar))
(setv sym 'foo-bar)
(assert (= sym (hy.models.Symbol "foo-bar")))
(assert (!= sym (hy.models.Symbol "foo_bar")))
(setv out (hy.eval `(do
(setv ~sym 10)
[foo? is_foo])))
[foo-bar foo_bar])))
(assert (= out [10 10])))


(defn test-functions []
(for [[a b] [["___ab-cd?" "___is_ab_cd"]
["⚘-⚘" "hyx_XflowerX_XflowerX"]]]
(for [[a b] [["⚘-⚘" "hyx_XflowerX_XflowerX"]]]
(assert (= (hy.mangle a) b))
(assert (= (hy.unmangle b) a))))

Expand Down

0 comments on commit 6bb0ee9

Please sign in to comment.