From 637bf1a012c2ddbee5d3ecc0dc6b87cb4bedbab5 Mon Sep 17 00:00:00 2001 From: Nikolaus Waxweiler Date: Wed, 7 Dec 2022 15:34:40 +0000 Subject: [PATCH 1/5] Disable warning B905 until we require Python >= 3.10 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 79f3af6ba..42ec64535 100644 --- a/tox.ini +++ b/tox.ini @@ -48,7 +48,7 @@ commands = [flake8] select = C, E, F, W, B, B9 -ignore = E203, E266, E501, W503 +ignore = E203, E266, E501, W503, B905 max-line-length = 88 exclude = .git, __pycache__, build, dist, .eggs, .tox, venv, venv*, .venv, .venv* From 779a9081c2847ccaba9b22dbc9c5c5bbdd112fbd Mon Sep 17 00:00:00 2001 From: Nikolaus Waxweiler Date: Wed, 7 Dec 2022 15:05:51 +0000 Subject: [PATCH 2/5] Bucketize Hiragana and Katakana for kern writing purposes --- Lib/ufo2ft/constants.py | 2 + .../featureWriters/baseFeatureWriter.py | 5 +- .../featureWriters/kernFeatureWriter.py | 9 +- Lib/ufo2ft/util.py | 19 ++++ .../featureWriters/kernFeatureWriter_test.py | 89 +++++++++++++++++-- 5 files changed, 114 insertions(+), 10 deletions(-) diff --git a/Lib/ufo2ft/constants.py b/Lib/ufo2ft/constants.py index 0c8327fd4..9e1fe1b16 100644 --- a/Lib/ufo2ft/constants.py +++ b/Lib/ufo2ft/constants.py @@ -40,6 +40,8 @@ COMMON_SCRIPT = "Zyyy" +HIRAGANA_KATAKANA_SCRIPTS = {"Hira", "Kana"} + INDIC_SCRIPTS = [ "Beng", # Bengali "Cham", # Cham diff --git a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py index 8e83d0a71..2173a09f7 100644 --- a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py @@ -2,11 +2,10 @@ from collections import OrderedDict, namedtuple from types import SimpleNamespace -from fontTools import unicodedata - from ufo2ft.constants import OPENTYPE_CATEGORIES_KEY from ufo2ft.errors import InvalidFeaturesData from ufo2ft.featureWriters import ast +from ufo2ft.util import bucketizedScriptExtensions INSERT_FEATURE_MARKER = r"\s*# Automatic Code.*" @@ -414,7 +413,7 @@ def guessFontScripts(self): if glyph.name not in glyphSet or glyph.unicodes is None: continue for codepoint in glyph.unicodes: - scripts = unicodedata.script_extension(chr(codepoint)) + scripts = bucketizedScriptExtensions(codepoint) if len(scripts) == 1: single_scripts.update(scripts) diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py index bc71ee422..df1cead27 100644 --- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py @@ -11,7 +11,12 @@ from ufo2ft.constants import COMMON_SCRIPT, INDIC_SCRIPTS, USE_SCRIPTS from ufo2ft.featureWriters import BaseFeatureWriter, ast -from ufo2ft.util import DFLT_SCRIPTS, classifyGlyphs, quantize +from ufo2ft.util import ( + DFLT_SCRIPTS, + bucketizedScriptExtensions, + classifyGlyphs, + quantize, +) LOGGER = logging.getLogger(__name__) @@ -357,7 +362,7 @@ def knownScriptsPerCodepoint(self, uv: int) -> set[str]: # anyway. return {COMMON_SCRIPT} else: - script_extension = unicodedata.script_extension(chr(uv)) + script_extension = bucketizedScriptExtensions(uv) return script_extension & (self.context.knownScripts | DFLT_SCRIPTS) def _makeKerningLookups(self): diff --git a/Lib/ufo2ft/util.py b/Lib/ufo2ft/util.py index 19a661645..ff1d4969a 100644 --- a/Lib/ufo2ft/util.py +++ b/Lib/ufo2ft/util.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import importlib import logging import re @@ -13,6 +15,8 @@ from fontTools.pens.reverseContourPen import ReverseContourPen from fontTools.pens.transformPen import TransformPen +from ufo2ft.constants import HIRAGANA_KATAKANA_SCRIPTS + logger = logging.getLogger(__name__) @@ -595,3 +599,18 @@ def getMaxComponentDepth(glyph, glyphSet, maxComponentDepth=0): maxComponentDepth = max(maxComponentDepth, componentDepth) return maxComponentDepth + + +def bucketizedScriptExtensions(codepoint: int) -> set[str]: + """Returns the Unicode script extensions for a codepoint, combining some + scripts into the same bucket. + + This allows lookups to contain more than one script. The most prominent case + is being able to kern Hiragana and Katakana against each other, Unicode + defines "Hrkt" as an alias for both scripts. + """ + scripts = unicodedata.script_extension(chr(codepoint)) + if HIRAGANA_KATAKANA_SCRIPTS & scripts: + scripts -= HIRAGANA_KATAKANA_SCRIPTS + scripts.add("Hrkt") # Hrkt is an alias for Hira and Kata. + return scripts diff --git a/tests/featureWriters/kernFeatureWriter_test.py b/tests/featureWriters/kernFeatureWriter_test.py index b40b65763..3e3606afb 100644 --- a/tests/featureWriters/kernFeatureWriter_test.py +++ b/tests/featureWriters/kernFeatureWriter_test.py @@ -4,10 +4,11 @@ import pytest from fontTools import unicodedata +from ufo2ft.constants import HIRAGANA_KATAKANA_SCRIPTS from ufo2ft.errors import InvalidFeaturesData from ufo2ft.featureCompiler import parseLayoutFeatures from ufo2ft.featureWriters import KernFeatureWriter, ast -from ufo2ft.util import DFLT_SCRIPTS +from ufo2ft.util import DFLT_SCRIPTS, bucketizedScriptExtensions from . import FeatureWriterTest @@ -1651,13 +1652,29 @@ def test_kern_mixed_bidis(caplog, FontClass): assert " with ambiguous direction" in caplog.text +def bucketizedScript(codepoint: int) -> str: + """Returns the Unicode script for a codepoint, combining some + scripts into the same bucket. + + This allows lookups to contain more than one script. The most prominent case + is being able to kern Hiragana and Katakana against each other, Unicode + defines "Hrkt" as an alias for both scripts. + + Note: Keep in sync with bucketizedScriptExtensions! + """ + script = unicodedata.script(chr(codepoint)) + if script in HIRAGANA_KATAKANA_SCRIPTS: + return "Hrkt" + return script + + def test_kern_zyyy_zinh(FontClass): """Test that a sampling of glyphs with a common or inherited script, but a disjoint set of explicit script extensions end up in the correct lookups.""" glyphs = {} for i in range(0, 0x110000, 0x10): - script = unicodedata.script(chr(i)) - script_extension = unicodedata.script_extension(chr(i)) + script = bucketizedScript(i) + script_extension = bucketizedScriptExtensions(i) if script not in script_extension: assert script in DFLT_SCRIPTS name = f"uni{i:04X}" @@ -1713,6 +1730,14 @@ def test_kern_zyyy_zinh(FontClass): pos uniA700 uniA700 27; } kern_Hani; + lookup kern_Hrkt { + lookupflag IgnoreMarks; + pos uni3010 uni3010 8; + pos uni3030 uni3030 9; + pos uni30A0 uni30A0 10; + pos uniFF70 uniFF70 29; + } kern_Hrkt; + lookup kern_Default { lookupflag IgnoreMarks; pos uni0640 uni0640 0; @@ -1724,8 +1749,6 @@ def test_kern_zyyy_zinh(FontClass): pos uni10130 uni10130 33; pos uni102E0 uni102E0 34; pos uni102F0 uni102F0 35; - pos uni30A0 uni30A0 10; - pos uniFF70 uniFF70 29; } kern_Default; feature kern { @@ -1742,6 +1765,11 @@ def test_kern_zyyy_zinh(FontClass): language dflt; lookup kern_Default; lookup kern_Hani; + + script kana; + language dflt; + lookup kern_Default; + lookup kern_Hrkt; } kern; feature dist { @@ -1764,6 +1792,57 @@ def test_kern_zyyy_zinh(FontClass): ) +def test_kern_hira_kana_hrkt(FontClass): + """Test that Hiragana and Katakana lands in the same lookup and can be + kerned against each other and common glyphs are kerned just once.""" + glyphs = {"a-hira": 0x3042, "a-kana": 0x30A2, "period": ord(".")} + kerning = { + ("a-hira", "a-hira"): 1, + ("a-hira", "a-kana"): 2, + ("a-kana", "a-hira"): 3, + ("a-kana", "a-kana"): 4, + ("period", "period"): 5, + ("a-hira", "period"): 6, + ("period", "a-hira"): 7, + ("a-kana", "period"): 8, + ("period", "a-kana"): 9, + } + ufo = makeUFO(FontClass, glyphs, None, kerning) + newFeatures = KernFeatureWriterTest.writeFeatures(ufo) + + assert dedent(str(newFeatures)) == dedent( + """\ + lookup kern_Hrkt { + lookupflag IgnoreMarks; + pos a-hira a-hira 1; + pos a-hira a-kana 2; + pos a-hira period 6; + pos a-kana a-hira 3; + pos a-kana a-kana 4; + pos a-kana period 8; + pos period a-hira 7; + pos period a-kana 9; + } kern_Hrkt; + + lookup kern_Default { + lookupflag IgnoreMarks; + pos period period 5; + } kern_Default; + + feature kern { + script DFLT; + language dflt; + lookup kern_Default; + + script kana; + language dflt; + lookup kern_Default; + lookup kern_Hrkt; + } kern; + """ + ) + + if __name__ == "__main__": import sys From 01605e86d1b40e48d3c29f9731a436e721a2d65b Mon Sep 17 00:00:00 2001 From: Nikolaus Waxweiler Date: Wed, 7 Dec 2022 17:46:07 +0000 Subject: [PATCH 3/5] Add note about handling Hira and Kana as Hrkt --- Lib/ufo2ft/featureWriters/kernFeatureWriter.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py index df1cead27..d889d628f 100644 --- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py @@ -135,6 +135,9 @@ class KernFeatureWriter(BaseFeatureWriter): pairs that would mix RTL and LTR glyphs, which will not occur in applications. Unicode BiDi classes L, AN and EN are considered L, R and AL are considered R. + * Note: the glyph script determination has the quirk of declaring "Hira" and + "Kana" scripts as "Hrkt" so that they are considered one script and can be + kerned against each other. * Get the kerning groups from the UFO and filter out glyphs not in the glyphset and empty groups. Remember which group a glyph is a member of, for kern1 and kern2, so we can later reconstruct per-script groups. From e6a60002248828b7554f92c6cbbe0659517715ee Mon Sep 17 00:00:00 2001 From: Nikolaus Waxweiler Date: Thu, 8 Dec 2022 11:52:59 +0000 Subject: [PATCH 4/5] Rename function and use everywhere in ufo2ft --- Lib/ufo2ft/constants.py | 2 +- .../featureWriters/baseFeatureWriter.py | 4 ++-- .../featureWriters/kernFeatureWriter.py | 9 ++------ .../featureWriters/markFeatureWriter.py | 10 ++++++--- Lib/ufo2ft/util.py | 22 +++++++++---------- .../featureWriters/kernFeatureWriter_test.py | 16 ++++++-------- 6 files changed, 30 insertions(+), 33 deletions(-) diff --git a/Lib/ufo2ft/constants.py b/Lib/ufo2ft/constants.py index 9e1fe1b16..e314de4a6 100644 --- a/Lib/ufo2ft/constants.py +++ b/Lib/ufo2ft/constants.py @@ -40,7 +40,7 @@ COMMON_SCRIPT = "Zyyy" -HIRAGANA_KATAKANA_SCRIPTS = {"Hira", "Kana"} +UNICODE_SCRIPT_ALIASES = {"Hira": "Hrkt", "Kana": "Hrkt"} INDIC_SCRIPTS = [ "Beng", # Bengali diff --git a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py index 2173a09f7..6d66dc2fc 100644 --- a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py @@ -5,7 +5,7 @@ from ufo2ft.constants import OPENTYPE_CATEGORIES_KEY from ufo2ft.errors import InvalidFeaturesData from ufo2ft.featureWriters import ast -from ufo2ft.util import bucketizedScriptExtensions +from ufo2ft.util import unicodeScriptExtensions INSERT_FEATURE_MARKER = r"\s*# Automatic Code.*" @@ -413,7 +413,7 @@ def guessFontScripts(self): if glyph.name not in glyphSet or glyph.unicodes is None: continue for codepoint in glyph.unicodes: - scripts = bucketizedScriptExtensions(codepoint) + scripts = unicodeScriptExtensions(codepoint) if len(scripts) == 1: single_scripts.update(scripts) diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py index d889d628f..ad98e5ca5 100644 --- a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py @@ -11,12 +11,7 @@ from ufo2ft.constants import COMMON_SCRIPT, INDIC_SCRIPTS, USE_SCRIPTS from ufo2ft.featureWriters import BaseFeatureWriter, ast -from ufo2ft.util import ( - DFLT_SCRIPTS, - bucketizedScriptExtensions, - classifyGlyphs, - quantize, -) +from ufo2ft.util import DFLT_SCRIPTS, classifyGlyphs, quantize, unicodeScriptExtensions LOGGER = logging.getLogger(__name__) @@ -365,7 +360,7 @@ def knownScriptsPerCodepoint(self, uv: int) -> set[str]: # anyway. return {COMMON_SCRIPT} else: - script_extension = bucketizedScriptExtensions(uv) + script_extension = unicodeScriptExtensions(uv) return script_extension & (self.context.knownScripts | DFLT_SCRIPTS) def _makeKerningLookups(self): diff --git a/Lib/ufo2ft/featureWriters/markFeatureWriter.py b/Lib/ufo2ft/featureWriters/markFeatureWriter.py index a7042d14a..a0a53dec9 100644 --- a/Lib/ufo2ft/featureWriters/markFeatureWriter.py +++ b/Lib/ufo2ft/featureWriters/markFeatureWriter.py @@ -4,11 +4,15 @@ from functools import partial from fontTools.misc.fixedTools import otRound -from fontTools.unicodedata import script_extension from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS from ufo2ft.featureWriters import BaseFeatureWriter, ast -from ufo2ft.util import classifyGlyphs, quantize, unicodeInScripts +from ufo2ft.util import ( + classifyGlyphs, + quantize, + unicodeInScripts, + unicodeScriptExtensions, +) class AbstractMarkPos: @@ -867,7 +871,7 @@ def _getAbvmGlyphs(self): unicodeIsAbvm = partial(unicodeInScripts, scripts=scriptsUsingAbvm) def unicodeIsNotAbvm(uv): - return bool(script_extension(chr(uv)) - self.scriptsUsingAbvm) + return bool(unicodeScriptExtensions(uv) - self.scriptsUsingAbvm) if any(unicodeIsAbvm(uv) for uv in cmap): # If there are any characters from Indic/USE/Khmer scripts in diff --git a/Lib/ufo2ft/util.py b/Lib/ufo2ft/util.py index ff1d4969a..da60efc15 100644 --- a/Lib/ufo2ft/util.py +++ b/Lib/ufo2ft/util.py @@ -5,7 +5,7 @@ import re from copy import deepcopy from inspect import currentframe, getfullargspec -from typing import Set +from typing import Mapping, Set from fontTools import subset, ttLib, unicodedata from fontTools.designspaceLib import DesignSpaceDocument @@ -15,7 +15,7 @@ from fontTools.pens.reverseContourPen import ReverseContourPen from fontTools.pens.transformPen import TransformPen -from ufo2ft.constants import HIRAGANA_KATAKANA_SCRIPTS +from ufo2ft.constants import UNICODE_SCRIPT_ALIASES logger = logging.getLogger(__name__) @@ -325,7 +325,7 @@ def unicodeInScripts(uv, scripts): False if it does not intersect. Return None for 'Common' script ('Zyyy'). """ - sx = unicodedata.script_extension(chr(uv)) + sx = unicodeScriptExtensions(uv) if "Zyyy" in sx: return None return not sx.isdisjoint(scripts) @@ -601,16 +601,16 @@ def getMaxComponentDepth(glyph, glyphSet, maxComponentDepth=0): return maxComponentDepth -def bucketizedScriptExtensions(codepoint: int) -> set[str]: - """Returns the Unicode script extensions for a codepoint, combining some - scripts into the same bucket. +def unicodeScriptExtensions( + codepoint: int, aliases: Mapping[str, str] | None = None +) -> set[str]: + """Returns the Unicode script extensions for a codepoint, optionally + aliasing some scripts. This allows lookups to contain more than one script. The most prominent case is being able to kern Hiragana and Katakana against each other, Unicode defines "Hrkt" as an alias for both scripts. """ - scripts = unicodedata.script_extension(chr(codepoint)) - if HIRAGANA_KATAKANA_SCRIPTS & scripts: - scripts -= HIRAGANA_KATAKANA_SCRIPTS - scripts.add("Hrkt") # Hrkt is an alias for Hira and Kata. - return scripts + if aliases is None: + aliases = UNICODE_SCRIPT_ALIASES + return {aliases.get(s, s) for s in unicodedata.script_extension(chr(codepoint))} diff --git a/tests/featureWriters/kernFeatureWriter_test.py b/tests/featureWriters/kernFeatureWriter_test.py index 3e3606afb..5ba49fefa 100644 --- a/tests/featureWriters/kernFeatureWriter_test.py +++ b/tests/featureWriters/kernFeatureWriter_test.py @@ -4,11 +4,11 @@ import pytest from fontTools import unicodedata -from ufo2ft.constants import HIRAGANA_KATAKANA_SCRIPTS +from ufo2ft.constants import UNICODE_SCRIPT_ALIASES from ufo2ft.errors import InvalidFeaturesData from ufo2ft.featureCompiler import parseLayoutFeatures from ufo2ft.featureWriters import KernFeatureWriter, ast -from ufo2ft.util import DFLT_SCRIPTS, bucketizedScriptExtensions +from ufo2ft.util import DFLT_SCRIPTS, unicodeScriptExtensions from . import FeatureWriterTest @@ -1652,7 +1652,7 @@ def test_kern_mixed_bidis(caplog, FontClass): assert " with ambiguous direction" in caplog.text -def bucketizedScript(codepoint: int) -> str: +def unicodeScript(codepoint: int) -> str: """Returns the Unicode script for a codepoint, combining some scripts into the same bucket. @@ -1660,12 +1660,10 @@ def bucketizedScript(codepoint: int) -> str: is being able to kern Hiragana and Katakana against each other, Unicode defines "Hrkt" as an alias for both scripts. - Note: Keep in sync with bucketizedScriptExtensions! + Note: Keep in sync with unicodeScriptExtensions! """ script = unicodedata.script(chr(codepoint)) - if script in HIRAGANA_KATAKANA_SCRIPTS: - return "Hrkt" - return script + return UNICODE_SCRIPT_ALIASES.get(script, script) def test_kern_zyyy_zinh(FontClass): @@ -1673,8 +1671,8 @@ def test_kern_zyyy_zinh(FontClass): disjoint set of explicit script extensions end up in the correct lookups.""" glyphs = {} for i in range(0, 0x110000, 0x10): - script = bucketizedScript(i) - script_extension = bucketizedScriptExtensions(i) + script = unicodeScript(i) + script_extension = unicodeScriptExtensions(i) if script not in script_extension: assert script in DFLT_SCRIPTS name = f"uni{i:04X}" From 656a9baf5890fff067dba04e7b81d4814f26b948 Mon Sep 17 00:00:00 2001 From: Nikolaus Waxweiler Date: Thu, 8 Dec 2022 14:26:42 +0000 Subject: [PATCH 5/5] Use MappingProxyType for UNICODE_SCRIPT_ALIASES --- Lib/ufo2ft/constants.py | 4 +++- Lib/ufo2ft/util.py | 4 +--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/ufo2ft/constants.py b/Lib/ufo2ft/constants.py index e314de4a6..6f2673949 100644 --- a/Lib/ufo2ft/constants.py +++ b/Lib/ufo2ft/constants.py @@ -1,3 +1,5 @@ +from types import MappingProxyType + SPARSE_TTF_MASTER_TABLES = frozenset( ["glyf", "head", "hmtx", "loca", "maxp", "post", "vmtx"] ) @@ -40,7 +42,7 @@ COMMON_SCRIPT = "Zyyy" -UNICODE_SCRIPT_ALIASES = {"Hira": "Hrkt", "Kana": "Hrkt"} +UNICODE_SCRIPT_ALIASES = MappingProxyType({"Hira": "Hrkt", "Kana": "Hrkt"}) INDIC_SCRIPTS = [ "Beng", # Bengali diff --git a/Lib/ufo2ft/util.py b/Lib/ufo2ft/util.py index da60efc15..d5f6b4c6c 100644 --- a/Lib/ufo2ft/util.py +++ b/Lib/ufo2ft/util.py @@ -602,7 +602,7 @@ def getMaxComponentDepth(glyph, glyphSet, maxComponentDepth=0): def unicodeScriptExtensions( - codepoint: int, aliases: Mapping[str, str] | None = None + codepoint: int, aliases: Mapping[str, str] = UNICODE_SCRIPT_ALIASES ) -> set[str]: """Returns the Unicode script extensions for a codepoint, optionally aliasing some scripts. @@ -611,6 +611,4 @@ def unicodeScriptExtensions( is being able to kern Hiragana and Katakana against each other, Unicode defines "Hrkt" as an alias for both scripts. """ - if aliases is None: - aliases = UNICODE_SCRIPT_ALIASES return {aliases.get(s, s) for s in unicodedata.script_extension(chr(codepoint))}