diff --git a/documentation/c08-language.sil b/documentation/c08-language.sil index bf42557d0..0b40c6b3b 100644 --- a/documentation/c08-language.sil +++ b/documentation/c08-language.sil @@ -93,6 +93,18 @@ The command \autodoc:command{\nohyphenation{…}} is provided as a shortcut for The hyphenator uses the same algorithm as TeX and can use TeX hyphenation pattern files if they are converted to Lua format. To implement hyphenation for a new language, first check to see if TeX hyphenation dictionaries are available; if not, work through the resources at \url{http://tug.org/docs/liang/}. +\em{Note on Unicode soft hyphens} — By default, soft hyphens (U+00AD) are interpreted as discretionary breaks, allowing line-breaking at that point (using the current font’s hyphen character). + +However, issues may arise when soft hyphens are used in ligatures, causing breaks between constituent characters and disrupting the ligature’s integrity. +Rather than relying on soft hyphens, for instances requiring hyphenation in unknown words, consider adding an exception to the hyphenation rules instead, with \autodoc:command{\hyphenator:add-exceptions{}} (where the text is a lowercase representation of the word, with dashes where hyphenation is allowed). + +Moreover, typists sometimes manually insert soft hyphens to rectify line-breaking issues in other typesetting systems. +In SILE, leveraging language-specific hyphenation rules tends to be more reliable. +Setting \autodoc:setting{typesetter.softHyphen} to \code{false} ignores soft hyphens entirely in the text, alleviating potential issues arising from their manual insertion. + +Soft hyphens can be inadvertently inserted by text editors or software, remaining invisible in the source text and causing unexpected output. +Setting \autodoc:setting{typesetter.softHyphenWarning} to \code{true} triggers warnings upon encountering soft hyphens, aiding users in identifying and rectifying such instances, regardless of the previous setting. + \section{Localization} A small handful of strings may be programmatically added to documents depending on language, context, and options. diff --git a/tests/feat-unicode-softhyphen.expected b/tests/feat-unicode-softhyphen.expected new file mode 100644 index 000000000..46c759d59 --- /dev/null +++ b/tests/feat-unicode-softhyphen.expected @@ -0,0 +1,76 @@ +Set paper size 297.6377985 419.5275636 +Begin page +Mx 14.8819 +My 28.5447 +Set font Gentium Plus;10;400;;normal;;;LTR +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 44.6950 +T 76 83 w=8.0078 (ip) +Mx 52.7028 +T 86 88 80 w=17.1924 (sum) +Mx 69.8952 +T 71 82 w=10.2295 (do) +Mx 80.1247 +T 79 82 85 w=11.6992 (lor) +Mx 91.8239 +T 86 76 87 w=10.0146 (sit) +Mx 101.8385 +T 68 80 72 87 w=20.6836 (amet) +Mx 122.5221 +T 17 w=2.2900 (.) +Mx 128.2239 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 158.0370 +T 76 83 w=8.0078 (ip) +Mx 166.0448 +T 86 88 80 w=17.1924 (sum) +Mx 183.2372 +T 71 82 w=10.2295 (do) +Mx 193.4667 +T 79 82 85 w=11.6992 (lor) +Mx 205.1659 +T 86 76 87 w=10.0146 (sit) +Mx 215.1805 +T 68 80 72 87 w=20.6836 (amet) +Mx 235.8641 +T 17 w=2.2900 (.) +Mx 241.5659 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 271.3790 +T 76 83 w=8.0078 (ip) +Mx 279.3868 +T 16 w=3.3691 (-) +Mx 14.8819 +My 40.5447 +T 86 88 80 w=17.1924 (sum) +Mx 32.0743 +T 71 82 w=10.2295 (do) +Mx 42.3038 +T 79 82 85 w=11.6992 (lor) +Mx 54.0030 +T 86 76 87 w=10.0146 (sit) +Mx 64.0176 +T 68 80 72 87 w=20.6836 (amet) +Mx 84.7012 +T 17 w=2.2900 (.) +Mx 88.8998 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 117.2097 +T 76 83 86 88 80 71 82 79 82 85 86 76 87 68 80 72 87 w=77.8271 (ipsumdolorsitamet) +Mx 195.0368 +T 17 w=2.2900 (.) +Mx 199.2354 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 227.5453 +T 76 83 86 88 80 71 82 79 82 85 86 76 87 68 80 72 87 w=77.8271 (ipsumdolorsitamet) +Mx 305.3724 +T 17 w=2.2900 (.) +Mx 14.8819 +My 52.5447 +T 47 82 85 72 80 w=26.4014 (Lorem) +Mx 43.9433 +T 76 83 86 88 80 71 82 79 82 85 86 76 87 68 80 72 87 w=77.8271 (ipsumdolorsitamet) +Mx 121.7705 +T 17 w=2.2900 (.) +End page +Finish diff --git a/tests/feat-unicode-softhyphen.sil b/tests/feat-unicode-softhyphen.sil new file mode 100644 index 000000000..1a34c76f6 --- /dev/null +++ b/tests/feat-unicode-softhyphen.sil @@ -0,0 +1,16 @@ +\begin[papersize=a6]{document} +\nofolios +\neverindent +% Language without hyphenation patterns +% (so we are sure that the hyphenation comes from the soft hyphens) +\set[parameter=document.language, value=und] +% The text has soft hyphens U+00AD inside words. +Lorem ip­sum­do­lor­sit­amet. +Lorem ip­sum­do­lor­sit­amet. +Lorem ip­sum­do­lor­sit­amet.% Should be hyphenated here as "ip-sum" + +\set[parameter=typesetter.softHyphen, value=false] +Lorem ip­sum­do­lor­sit­amet. +Lorem ip­sum­do­lor­sit­amet. +Lorem ip­sum­do­lor­sit­amet. +\end{document} diff --git a/typesetters/base.lua b/typesetters/base.lua index 7b44b96cf..6ab5293e7 100644 --- a/typesetters/base.lua +++ b/typesetters/base.lua @@ -132,6 +132,19 @@ function typesetter.declareSettings(_) help = "Width to break lines at" }) + SILE.settings:declare({ + parameter = "typesetter.softHyphen", + type = "boolean", + default = true, + help = "When true, soft hyphens are rendered as discretionary breaks, otherwise they are ignored" + }) + + SILE.settings:declare({ + parameter = "typesetter.softHyphenWarning", + type = "boolean", + default = false, + help = "When true, a warning is issued when a soft hyphen is encountered" + }) end function typesetter:initState () @@ -276,7 +289,29 @@ function typesetter:typeset (text) if token.separator then self:endline() else - self:setpar(token.string) + if SILE.settings:get("typesetter.softHyphen") then + local warnedshy = false + for token2 in SU.gtoke(token.string, luautf8.char(0x00AD)) do + if token2.separator then -- soft hyphen support + local discretionary = SILE.nodefactory.discretionary({}) + local hbox = SILE.typesetter:makeHbox({ SILE.settings:get("font.hyphenchar") }) + discretionary.prebreak = { hbox } + table.insert(SILE.typesetter.state.nodes, discretionary) + if not warnedshy and SILE.settings:get("typesetter.softHyphenWarning") then + SU.warn("Soft hyphen encountered and replaced with discretionary") + end + warnedshy = true + else + self:setpar(token2.string) + end + end + else + if SILE.settings:get("typesetter.softHyphenWarning") and luautf8.match(token.string, luautf8.char(0x00AD)) then + SU.warn("Soft hyphen encountered and ignored") + end + text = luautf8.gsub(token.string, luautf8.char(0x00AD), "") + self:setpar(text) + end end end SILE.traceStack:pop(pId)