Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use softhyphen for hyphenation points on LuaTeX #105

Merged
merged 11 commits into from
Jul 27, 2024
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to the `tagpdf` package since the

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
this project uses date-based 'snapshot' version identifiers.

## [Unreleased]

### Added
- key activate/softhyphen and code to use soft hyphens for hyphenation
if supported by the font.

## [2024-06-20]
Version 0.99c
Expand Down
83 changes: 83 additions & 0 deletions tagpdf-backend.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ local iwfontattributeid = luatexbase.new_attribute ("g_@@_interwordfont_attr")
local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
local truebool = token.create("c_true_bool")
% \end{macrocode}
% with this token we can query the state of the softhyphen boolean
% and so detect if hyphens should be marked with ActualText
% \begin{macrocode}
local softhyphenbool = token.create("g_@@_softhyphen_bool")
% \end{macrocode}

% Now a number of local versions from global tables.
% Not all is perhaps needed, most node variants were copied from lua-debug.
% \begin{macrocode}
Expand Down Expand Up @@ -286,6 +292,9 @@ local KERN = node.id("kern")
local PENALTY = node.id("penalty")
local LOCAL_PAR = node.id("local_par")
local MATH = node.id("math")

local explicit_disc = 1
local regular_disc = 3
% \end{macrocode}
% Now we setup the main table structure. ltx is used by other latex code too!
% \begin{macrocode}
Expand Down Expand Up @@ -1267,6 +1276,80 @@ function ltx.@@.func.output_parenttree (abspage)
end
% \end{macrocode}
% \end{macro}
%
% \begin{macro}
% {
% process_softhyphen_pre
% process_softhyphen_post
% }
% First some local definitions. Since these are only needed locally everything gets wrapped into a block.
% \begin{macrocode}
do
local properties = node.get_properties_table()
local is_soft_hyphen_prop = 'tagpdf.rewrite-softhyphen.is_soft_hyphen'
local hyphen_char = 0x2D
local soft_hyphen_char = 0xAD
% \end{macrocode}
%
% A lookup table to test if the font supports the soft hyphen glyph.
% \begin{macrocode}
local softhyphen_fonts = setmetatable({}, {__index = function(t, fid)
local fdir = identifiers[fontid]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't that be local fdir = identifiers[fid]?

local format = fdir and fdir.format
local result = (format == 'opentype' or format == 'truetype')
local characters = fdir and fdir.characters
result = result and (characters and characters[soft_hyphen_char]) ~= nil
t[fid] = result
return result
end})
% \end{macrocode}
%
% A pre shaping callback to mark hyphens as being hyphenation hyphens.
% This runs before shaping to avoid affecting hyphens moved into
% discretionaries during shaping.
% \begin{macrocode}
local function process_softhyphen_pre(head, _context, _dir)
if softhyphenbool.mode ~= truebool.mode then return true end
for disc, sub in node.traverse_id(DISC, head) do
if sub == explicit_disc or sub == regular_disc then
for n, _ch, _f in node.traverse_char(disc.pre) do
local props = properties[n]
if not props then
props = {}
properties[n] = props
end
props[is_soft_hyphen_prop] = true
end
end
end
return true
end

% \end{macrocode}
%
% Finally do the actual replacement after shaping. No checking for double processing here
% since the operation is idempotent.
% \begin{macrocode}
local function process_softhyphen_post(head, _context, _dir)
if softhyphenbool.mode ~= truebool.mode then return true end
for disc, sub in node.traverse_id(DISC, head) do
for n, ch, fid in node.traverse_glyph(disc.pre) do
local props = properties[n]
if softhyphen_fonts[fid] and ch == hyphen_char and props and props[is_soft_hyphen_prop] then
n.char = soft_hyphen_char
props.glyph_info = nil
end
end
end
return true
end

luatexbase.add_to_callback('pre_shaping_filter', process_softhyphen_pre, 'tagpdf.rewrite-softhyphen')
luatexbase.add_to_callback('post_shaping_filter', process_softhyphen_post, 'tagpdf.rewrite-softhyphen')
end
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
%</lua>
% \end{macrocode}
Expand Down
20 changes: 20 additions & 0 deletions tagpdf.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@
% marked up as artifact. The initial value is true.
% \end{function}
%
% \begin{function}{activate/softhyphen (setup-key)}
% This key allows to activates automatic handling of hyphens inserted
% by hyphenation. It only is used in luamode and replaces hyphens
% by U+00AD if the font supports this.
% \end{function}
%
% \begin{function}{page/tabsorder (setup-key), tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
Expand Down Expand Up @@ -354,6 +360,13 @@
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_softhyphen_bool}
% This boolean controls if the code should try to automatically
% handle hyphens from hyphenation. It is currently only used in luamode.
% \begin{macrocode}
\bool_new:N \g_@@_softhyphen_bool
% \end{macrocode}
% \end{variable}
% \section{Variants of l3 commands}
% \begin{macrocode}
\prg_generate_conditional_variant:Nnn \pdf_object_if_exist:n {e}{T,F,TF}
Expand Down Expand Up @@ -648,6 +661,13 @@
tagunmarked .bool_gset:N = \g_@@_tagunmarked_bool,
% \end{macrocode}
% \end{macro}
% \begin{macro}{activate/softhyphen (setup-key)}
% This key activates (in luamode) the handling of soft hyphens.
% \begin{macrocode}
activate/softhyphen .bool_gset:N = \g_@@_softhyphen_bool,
activate/softhyphen .initial:n = false,
% \end{macrocode}
% \end{macro}
% \begin{macro}{page/tabsorder (setup-key),tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
Expand Down