Skip to content

Commit

Permalink
Strip RIGHT-TO-LEFT OVERRIDE and LEFT-TO-RIGHT OVERRIDE characters fr…
Browse files Browse the repository at this point in the history
…om AUTHORS.txt

Since we don't know anything about the names, we assume anything in between
RIGHT-TO-LEFT OVERRIDE and LEFT-TO-RIGHT OVERRIDE (or the end of the name)
should be spelled backwards.

This resulted in a duplicate author name,
because it uses different Unicode form.
So I also added a call to unicodedata.normalize.

Fixes #12467
  • Loading branch information
hroncok committed Feb 8, 2024
1 parent 51de88c commit 239c1d4
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
5 changes: 2 additions & 3 deletions AUTHORS.txt
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ Mark Williams
Markus Hametner
Martey Dodoo
Martin Fischer
Martin Häcker
Martin Häcker
Martin Pavlasek
Masaki
Masklinn
Expand Down Expand Up @@ -495,7 +495,7 @@ Miro Hrončok
Monica Baluna
montefra
Monty Taylor
Muha Ajjan
Muha Ajjan
Nadav Wexler
Nahuel Ambrosini
Nate Coraor
Expand Down Expand Up @@ -757,4 +757,3 @@ Zvezdan Petkovic
Łukasz Langa
Роман Донченко
Семён Марьясин
‮rekcäH nitraM‮
31 changes: 31 additions & 0 deletions tools/release/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pathlib
import subprocess
import tempfile
import unicodedata
from typing import Iterator, List, Optional, Set

from nox.sessions import Session
Expand Down Expand Up @@ -45,6 +46,34 @@ def modified_files_in_git(*args: str) -> int:
).returncode


def strip_rtl_ltr_overrides(a: str) -> str:
"""Strip RIGHT-TO-LEFT OVERRIDE and LEFT-TO-RIGHT OVERRIDE characters
from author names.
Reorder the characters in between them to preserve the perception.
See https://github.com/pypa/pip/issues/12467 for more info."""
rtl = "\N{RIGHT-TO-LEFT OVERRIDE}"
ltr = "\N{LEFT-TO-RIGHT OVERRIDE}"

# If there are no overrides to RIGHT-TO-LEFT,
# only strip useless LEFT-TO-RIGHT overrides.
# This returns the original for most of the authors.
# It also serves as a termination condition for recursive calls.
if rtl not in a:
return a.replace(ltr, "")

prefix = a[: a.index(rtl)].replace(ltr, "")
rest = a[: a.index(rtl) : -1]
if ltr not in rest:
rest = rest.replace(rtl, "")
else:
rest = a[a.index(ltr) - 1 : a.index(rtl) : -1].replace(rtl, "")
rest += a[a.index(ltr) + 1 :]
combined = prefix + strip_rtl_ltr_overrides(rest)
assert rtl not in combined, f"RIGHT-TO-LEFT OVERRIDE in {combined!r}"
assert ltr not in combined, f"LEFT-TO-RIGHT OVERRIDE in {combined!r}"
return combined


def get_author_list() -> List[str]:
"""Get the list of authors from Git commits."""
# subprocess because session.run doesn't give us stdout
Expand All @@ -60,6 +89,8 @@ def get_author_list() -> List[str]:
seen_authors: Set[str] = set()
for author in result.stdout.splitlines():
author = author.strip()
author = strip_rtl_ltr_overrides(author)
author = unicodedata.normalize("NFC", author)
if author.lower() not in seen_authors:
seen_authors.add(author.lower())
authors.append(author)
Expand Down

0 comments on commit 239c1d4

Please sign in to comment.