-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtagpdf-backend.dtx
1400 lines (1346 loc) · 47.9 KB
/
tagpdf-backend.dtx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% \iffalse meta-comment
%
%% File: tagpdf-backend.dtx
%
% Copyright (C) 2019-2025 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version. The latest version
% of this license is in the file
%
% https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
% https://github.com/latex3/tagpdf
%
% for those people who are interested.
%
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
pdftitle=tagpdf-mc module (tagpdf)}
\begin{document}
\DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
% \title{^^A
% The \pkg{tagpdf-luatex.def} \\ Driver for luatex ^^A
% \\ Part of the tagpdf package
% }
%
% \author{^^A
% Ulrike Fischer\thanks
% {^^A
% E-mail:
% \href{mailto:[email protected]}
% {[email protected]}^^A
% }^^A
% }
%
% \date{Version 0.99l, released 2025-01-12}
% \maketitle
% \begin{implementation}
% \begin{macrocode}
%<@@=tag>
%<*luatex>
\ProvidesExplFile {tagpdf-luatex.def} {2025-01-12} {0.99l}
{tagpdf~driver~for~luatex}
% \end{macrocode}
% \section{Loading the lua}
% The space code requires that the fall back font has been loaded and initialized,
% so we force that first. But perhaps this could be done in the kernel.
%
% \begin{macrocode}
{
\fontencoding{TU}\fontfamily{lmr}\fontseries{m}\fontshape{n}\fontsize{10pt}{10pt}\selectfont
}
\lua_now:e { tagpdf=require('tagpdf.lua') }
% \end{macrocode}
%
% The following defines wrappers around prop and seq commands to store the
% data also in lua tables.
% I probably want also lua tables
% I put them in the ltx.@@.tables namespaces
% The tables will be named like the variables but without backslash
% To access such a table with a dynamical name create a string and then use
% ltx.@@.tables[string]
% Old code, I'm not quite sure if this was a good idea. Now I have mix of table in
% ltx.@@.tables and ltx.@@.mc/struct. And a lot is probably not needed.
% TODO: this should be cleaned up, but at least roles are currently using
% the table!
%
% \begin{macro}
% {
% \@@_prop_new:N,
% \@@_seq_new:N,
% \@@_prop_gput:Nnn,
% \@@_seq_gput_right:Nn,
% \@@_seq_gput_left:Nn,
% \@@_seq_item:cn,
% \@@_prop_item:cn,
% \@@_seq_show:N,
% \@@_prop_show:N
% }
% \begin{macrocode}
\cs_set_protected:Npn \@@_prop_new:N #1
{
\prop_new:N #1
\lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
}
\cs_set_protected:Npn \@@_prop_new_linked:N #1
{
\prop_new_linked:N #1
\lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
}
\cs_set_protected:Npn \@@_seq_new:N #1
{
\seq_new:N #1
\lua_now:e { ltx.@@.tables.\cs_to_str:N#1 = {} }
}
\cs_set_protected:Npn \@@_prop_gput:Nnn #1 #2 #3
{
\prop_gput:Nnn #1 { #2 } { #3 }
\lua_now:e { ltx.@@.tables.\cs_to_str:N#1 ["#2"] = "\lua_escape:n{#3}" }
}
\cs_set_protected:Npn \@@_seq_gput_right:Nn #1 #2
{
\seq_gput_right:Nn #1 { #2 }
\lua_now:e { table.insert(ltx.@@.tables.\cs_to_str:N#1, "#2") }
}
% \end{macrocode}
%
% this inserts on the right of the lua table, but as the lua table is not used for kids
% this is ignored for now.
% \begin{macrocode}
\cs_set_protected:Npn \@@_seq_gput_left:Nn #1 #2
{
\seq_gput_left:Nn #1 { #2 }
\lua_now:e { table.insert(ltx.@@.tables.\cs_to_str:N#1, "#2") }
}
%Hm not quite sure about the naming
\cs_set:Npn \@@_seq_item:cn #1 #2
{
\lua_now:e { tex.print(ltx.@@.tables.#1[#2]) }
}
\cs_set:Npn \@@_prop_item:cn #1 #2
{
\lua_now:e { tex.print(ltx.@@.tables.#1["#2"]) }
}
%for debugging commands that show both the seq/prop and the lua tables
\cs_set_protected:Npn \@@_seq_show:N #1
{
\seq_show:N #1
\lua_now:e { ltx.@@.trace.log ("lua~sequence~array~\cs_to_str:N#1",1) }
\lua_now:e { ltx.@@.trace.show_seq (ltx.@@.tables.\cs_to_str:N#1) }
}
\cs_set_protected:Npn \@@_prop_show:N #1
{
\prop_show:N #1
\lua_now:e {ltx.@@.trace.log ("lua~property~table~\cs_to_str:N#1",1) }
\lua_now:e {ltx.@@.trace.show_prop (ltx.@@.tables.\cs_to_str:N#1) }
}
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
%</luatex>
% \end{macrocode}
% The module declaration
% \begin{macrocode}
%<*lua>
-- tagpdf.lua
-- Ulrike Fischer
local ProvidesLuaModule = {
name = "tagpdf",
version = "0.99l", --TAGVERSION
date = "2025-01-12", --TAGDATE
description = "tagpdf lua code",
license = "The LATEX Project Public License 1.3c"
}
if luatexbase and luatexbase.provides_module then
luatexbase.provides_module (ProvidesLuaModule)
end
--[[
The code has quite probably a number of problems
- more variables should be local instead of global
- the naming is not always consistent due to the development of the code
- the traversing of the shipout box must be tested with more complicated setups
- it should probably handle more node types
-
--]]
% \end{macrocode}
% Some comments about the lua structure.
% \begin{macrocode}
--[[
the main table is named ltx.@@. It contains the functions and also the data
collected during the compilation.
ltx.@@.mc will contain mc connected data.
ltx.@@.struct will contain structure related data.
ltx.@@.page will contain page data
ltx.@@.tables contains also data from mc and struct (from older code). This needs cleaning up.
There are certainly dublettes, but I don't dare yet ...
ltx.@@.func will contain (public) functions.
ltx.@@.trace will contain tracing/logging functions.
local functions starts with __
functions meant for users will be in ltx.tag
functions
ltx.@@.func.get_num_from (tag): takes a tag (string) and returns the id number
ltx.@@.func.output_num_from (tag): takes a tag (string) and prints (to tex) the id number
ltx.@@.func.get_tag_from (num): takes a num and returns the tag
ltx.@@.func.output_tag_from (num): takes a num and prints (to tex) the tag
ltx.@@.func.store_mc_data (num,key,data): stores key=data in ltx.@@.mc[num]
ltx.@@.func.store_mc_label (label,num): stores label=num in ltx.@@.mc.labels
ltx.@@.func.store_mc_kid (mcnum,kid,page): stores the mc-kids of mcnum on page page
ltx.@@.func.store_mc_in_page(mcnum,mcpagecnt,page): stores in the page table the number of mcnum on this page
ltx.@@.func.store_struct_mcabs (structnum,mcnum): stores relations structnum<->mcnum (abs)
ltx.@@.func.mc_insert_kids (mcnum): inserts the /K entries for mcnum by wandering through the [kids] table
ltx.@@.func.mark_page_elements(box,mcpagecnt,mccntprev,mcopen,name,mctypeprev) : the main function
ltx.@@.func.mark_shipout (): a wrapper around the core function which inserts the last EMC
ltx.@@.func.fill_parent_tree_line (page): outputs the entries of the parenttree for this page
ltx.@@.func.output_parenttree(): outputs the content of the parenttree
ltx.@@.func.pdf_object_ref(name,index): outputs the object reference for the object name
ltx.@@.func.markspaceon(), ltx.@@.func.markspaceoff(): (de)activates the marking of positions for space chars
ltx.@@.trace.show_mc_data (num,loglevel): shows ltx.@@.mc[num] is the current log level is >= loglevel
ltx.@@.trace.show_all_mc_data (max,loglevel): shows a maximum about mc's if the current log level is >= loglevel
ltx.@@.trace.show_seq: shows a sequence (array)
ltx.@@.trace.show_struct_data (num): shows data of structure num
ltx.@@.trace.show_prop: shows a prop
ltx.@@.trace.log
ltx.@@.trace.showspaces : boolean
ltx.tag.get_structnum: number, shows the current structure number
ltx.tag.get_structnum_next: number, shows the next structure number
--]]
% \end{macrocode}
% This set-ups the main attribute registers.
% The mc_type attribute stores the type (P, Span etc) encoded as a num,
% The mc_cnt attribute stores the absolute number and allows so to see
% if a node belongs to the same mc-chunk.
%
% The interwordspace attr is set by the function |@@_mark_spaces|, and marks
% the place where spaces should be inserted.
% The interwordfont attr is set by the function |@@_mark_spaces| too and
% stores the font, so that we can decide which font
% to use for the real space char.
% The interwordspaceOff attr allows to locally suppress the insertion of
% real space chars, e.g. when they are inserted by other means (e.g. with |\char|).
% \begin{macrocode}
local mctypeattributeid = luatexbase.new_attribute ("g_@@_mc_type_attr")
local mccntattributeid = luatexbase.new_attribute ("g_@@_mc_cnt_attr")
local iwspaceOffattributeid = luatexbase.new_attribute ("g__tag_interwordspaceOff_attr")
local iwspaceattributeid = luatexbase.new_attribute ("g_@@_interwordspace_attr")
local iwfontattributeid = luatexbase.new_attribute ("g_@@_interwordfont_attr")
% \end{macrocode}
% with this token we can query the state of the boolean
% and so detect if unmarked nodes should be marked as attributes
% \begin{macrocode}
local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
local truebool = token.create("c_true_bool")
% \end{macrocode}
% with this token we can query the state of the softhyphen boolean
% and so detect if hyphens from hyphenation should be replaced by soft-hyphens.
% \begin{macrocode}
local softhyphenbool = token.create("g_@@_softhyphen_bool")
% \end{macrocode}
% Now a number of local versions from global tables.
% Not all is perhaps needed, most node variants were copied from lua-debug.
% \begin{macrocode}
local catlatex = luatexbase.registernumber("catcodetable@latex")
local tableinsert = table.insert
local nodeid = node.id
local nodecopy = node.copy
local nodegetattribute = node.get_attribute
local nodesetattribute = node.set_attribute
local nodehasattribute = node.has_attribute
local nodenew = node.new
local nodetail = node.tail
local nodeslide = node.slide
local noderemove = node.remove
local nodetraverseid = node.traverse_id
local nodetraverse = node.traverse
local nodeinsertafter = node.insert_after
local nodeinsertbefore = node.insert_before
local pdfpageref = pdf.pageref
local fonthashes = fonts.hashes
local identifiers = fonthashes.identifiers
local fontid = font.id
local HLIST = node.id("hlist")
local VLIST = node.id("vlist")
local RULE = node.id("rule")
local DISC = node.id("disc")
local GLUE = node.id("glue")
local GLYPH = node.id("glyph")
local KERN = node.id("kern")
local PENALTY = node.id("penalty")
local LOCAL_PAR = node.id("local_par")
local MATH = node.id("math")
local explicit_disc = 1
local regular_disc = 3
% \end{macrocode}
% Now we setup the main table structure. ltx is used by other latex code too!
% \begin{macrocode}
ltx = ltx or { }
ltx.tag = ltx.tag or { } -- user commands
ltx.@@ = ltx.@@ or { }
ltx.@@.mc = ltx.@@.mc or { } -- mc data
ltx.@@.struct = ltx.@@.struct or { } -- struct data
ltx.@@.tables = ltx.@@.tables or { } -- tables created with new prop and new seq.
-- wasn't a so great idea ...
-- g_@@_role_tags_seq used by tag<-> is in this tables!
-- used for pure lua tables too now!
ltx.@@.page = ltx.@@.page or { } -- page data, currently only i->{0->mcnum,1->mcnum,...}
ltx.@@.trace = ltx.@@.trace or { } -- show commands
ltx.@@.func = ltx.@@.func or { } -- functions
ltx.@@.conf = ltx.@@.conf or { } -- configuration variables
% \end{macrocode}
%
% \section{User commands to access data}
% Code like the one in luamml will have to access the current state in some places.
% \begin{macro}{\}
% \begin{macrocode}
local @@_get_struct_num =
function()
local a = token.get_macro("g__tag_struct_stack_current_tl")
return a
end
local @@_get_struct_counter =
function()
local a = tex.getcount("c@g_@@_struct_abs_int")
return a
end
local @@_get_struct_num_next =
function()
local a = tex.getcount("c@g_@@_struct_abs_int") + 1
return a
end
ltx.tag.get_struct_num = @@_get_struct_num
ltx.tag.get_struct_counter = @@_get_struct_counter
ltx.tag.get_struct_num_next = @@_get_struct_num_next
% \end{macrocode}
% \end{macro}
% \section{Logging functions}
%
% \begin{macro}{@@_log,ltx.@@.trace.log}
% This rather simple log function takes as argument a
% message (string) and a number and
% will output the message to the log/terminal if the current loglevel
% is greater or equal than num.
% \begin{macrocode}
local @@_log =
function (message,loglevel)
if (loglevel or 3) <= tex.count["l_@@_loglevel_int"] then
texio.write_nl("tagpdf: ".. message)
end
end
ltx.@@.trace.log = @@_log
% \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_seq}
% This shows the content of a seq as stored in the tables table.
% It is used by the |\@@_seq_show:N| function. It is not used
% in user commands, only for debugging, and so requires log level >0.
% \begin{macrocode}
function ltx.@@.trace.show_seq (seq)
if (type(seq) == "table") then
for i,v in ipairs(seq) do
@@_log ("[" .. i .. "] => " .. tostring(v),1)
end
else
@@_log ("sequence " .. tostring(seq) .. " not found",1)
end
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{ @@_pairs_prop,ltx.@@.trace.show_prop}
% This shows the content of a prop as stored in the tables table.
% It is used by the |\@@_prop_show:N| function.
% \begin{macrocode}
local @@_pairs_prop =
function (prop)
local a = {}
for n in pairs(prop) do tableinsert(a, n) end
table.sort(a)
local i = 0 -- iterator variable
local iter = function () -- iterator function
i = i + 1
if a[i] == nil then return nil
else return a[i], prop[a[i]]
end
end
return iter
end
function ltx.@@.trace.show_prop (prop)
if (type(prop) == "table") then
for i,v in @@_pairs_prop (prop) do
@@_log ("[" .. i .. "] => " .. tostring(v),1)
end
else
@@_log ("prop " .. tostring(prop) .. " not found or not a table",1)
end
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_mc_data}
% This shows some data for a mc given by |num|.
% If something is shown depends on the log level.
% The function is used by the following function and then in
% |\ShowTagging|
% \begin{macrocode}
function ltx.@@.trace.show_mc_data (num,loglevel)
if ltx.@@ and ltx.@@.mc and ltx.@@.mc[num] then
for k,v in pairs(ltx.@@.mc[num]) do
@@_log ("mc"..num..": "..tostring(k).."=>"..tostring(v),loglevel)
end
if ltx.@@.mc[num]["kids"] then
@@_log ("mc" .. num .. " has " .. #ltx.@@.mc[num]["kids"] .. " kids",loglevel)
for k,v in ipairs(ltx.@@.mc[num]["kids"]) do
@@_log ("mc ".. num .. " kid "..k.." =>" .. v.kid.." on page " ..v.page,loglevel)
end
end
else
@@_log ("mc"..num.." not found",loglevel)
end
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{ltx.@@.trace.show_all_mc_data}
% This shows data for the mc's between |min| and |max| (numbers).
% It is used by the |\ShowTagging| function.
% \begin{macrocode}
function ltx.@@.trace.show_all_mc_data (min,max,loglevel)
for i = min, max do
ltx.@@.trace.show_mc_data (i,loglevel)
end
texio.write_nl("")
end
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% ltx.@@.trace.show_struct_data
% }
% This function shows some struct data.
% Unused but kept for debugging.
% \begin{macrocode}
function ltx.@@.trace.show_struct_data (num)
if ltx.@@ and ltx.@@.struct and ltx.@@.struct[num] then
for k,v in ipairs(ltx.@@.struct[num]) do
@@_log ("struct "..num..": "..tostring(k).."=>"..tostring(v),1)
end
else
@@_log ("struct "..num.." not found ",1)
end
end
% \end{macrocode}
% \end{macro}
%
% \section{Helper functions}
% \subsection{Retrieve data functions}
% \begin{macro}{@@_get_mc_cnt_type_tag}
% This takes a node as argument and returns the mc-cnt, the mc-type and
% and the tag (calculated from the mc-cnt.
% \begin{macrocode}
local @@_get_mc_cnt_type_tag = function (n)
local mccnt = nodegetattribute(n,mccntattributeid) or -1
local mctype = nodegetattribute(n,mctypeattributeid) or -1
local tag = ltx.@@.func.get_tag_from(mctype)
return mccnt,mctype,tag
end
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{@@_get_mathsubtype}
% This function allows to detect if we are at the begin or the end of math.
% It takes as argument a mathnode.
% \begin{macrocode}
local function @@_get_mathsubtype (mathnode)
if mathnode.subtype == 0 then
subtype = "beginmath"
else
subtype = "endmath"
end
return subtype
end
% \end{macrocode}
% \end{macro}
%
% \begin{variable}{ltx.@@.tables.role_tag_attribute,ltx.@@.tables.role_tag_attribute}
% The first is a table with key a tag and value a number (the attribute)
% The second is an array with the attribute value as key.
% \begin{macrocode}
ltx.@@.tables.role_tag_attribute = {}
ltx.@@.tables.role_attribute_tag = {}
% \end{macrocode}
% \end{variable}
% \begin{macro}{ltx.@@.func.alloctag}
% \begin{macrocode}
local @@_alloctag =
function (tag)
if not ltx.@@.tables.role_tag_attribute[tag] then
table.insert(ltx.@@.tables.role_attribute_tag,tag)
ltx.@@.tables.role_tag_attribute[tag]=#ltx.@@.tables.role_attribute_tag
@@_log ("Add "..tag.." "..ltx.@@.tables.role_tag_attribute[tag],3)
end
end
ltx.@@.func.alloctag = @@_alloctag
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% @@_get_num_from,
% ltx.@@.func.get_num_from,
% ltx.@@.func.output_num_from
% }
% These functions take as argument a string |tag|, and return the number
% under which is it recorded (and so the attribute value).
% The first function outputs the number for lua, while the |output| function
% outputs to tex.
% \begin{macrocode}
local @@_get_num_from =
function (tag)
if ltx.@@.tables.role_tag_attribute[tag] then
a= ltx.@@.tables.role_tag_attribute[tag]
else
a= -1
end
return a
end
ltx.@@.func.get_num_from = @@_get_num_from
function ltx.@@.func.output_num_from (tag)
local num = @@_get_num_from (tag)
tex.sprint(catlatex,num)
if num == -1 then
@@_log ("Unknown tag "..tag.." used")
end
end
% \end{macrocode}
% \end{macro}
%
% \begin{macro}
% {
% @@_get_tag_from ,
% ltx.@@.func.get_tag_from,
% ltx.@@.func.output_tag_from
% }
% These functions are the opposites to the previous function:
% they take as argument a number (the attribute value) and return the string |tag|.
% The first function outputs the string for lua, while the |output| function
% outputs to tex.
% \begin{macrocode}
local @@_get_tag_from =
function (num)
if ltx.@@.tables.role_attribute_tag[num] then
a = ltx.@@.tables.role_attribute_tag[num]
else
a= "UNKNOWN"
end
return a
end
ltx.@@.func.get_tag_from = @@_get_tag_from
function ltx.@@.func.output_tag_from (num)
tex.sprint(catlatex,@@_get_tag_from (num))
end
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% ltx.@@.func.store_mc_data
% }
% This function stores for |key|=|data| for mc-chunk |num|. It is used in the
% tagpdf-mc code, to store for example the tag string, and the raw options.
% \begin{macrocode}
function ltx.@@.func.store_mc_data (num,key,data)
ltx.@@.mc[num] = ltx.@@.mc[num] or { }
ltx.@@.mc[num][key] = data
@@_log ("INFO TEX-STORE-MC-DATA: "..num.." => "..tostring(key).." => "..tostring(data),3)
end
% \end{macrocode}
% \end{macro}
%
% \begin{macro}
% {
% ltx.@@.func.store_mc_label
% }
% This function stores the |label|=|num| relationship in the |labels| subtable.
% TODO: this is probably unused and can go.
% \begin{macrocode}
function ltx.@@.func.store_mc_label (label,num)
ltx.@@.mc["labels"] = ltx.@@.mc["labels"] or { }
ltx.@@.mc.labels[label] = num
end
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% ltx.@@.func.store_mc_kid
% }
% This function is used in the traversing code. It stores
% a sub-chunk of a mc |mcnum| into the |kids| table.
% \begin{macrocode}
function ltx.@@.func.store_mc_kid (mcnum,kid,page)
ltx.@@.trace.log("INFO TAG-STORE-MC-KID: "..mcnum.." => " .. kid.." on page " .. page,3)
ltx.@@.mc[mcnum]["kids"] = ltx.@@.mc[mcnum]["kids"] or { }
local kidtable = {kid=kid,page=page}
tableinsert(ltx.@@.mc[mcnum]["kids"], kidtable )
end
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% ltx.@@.func.mc_num_of_kids
% }
% This function returns the number of kids a mc |mcnum| has. We need to account for
% the case that a mc can have no kids.
% \begin{macrocode}
function ltx.@@.func.mc_num_of_kids (mcnum)
local num = 0
if ltx.@@.mc[mcnum] and ltx.@@.mc[mcnum]["kids"] then
num = #ltx.@@.mc[mcnum]["kids"]
end
ltx.@@.trace.log ("INFO MC-KID-NUMBERS: " .. mcnum .. "has " .. num .. "KIDS",4)
return num
end
% \end{macrocode}
% \end{macro}
% \subsection{Functions to insert the pdf literals}
% \begin{macro}{@@_backend_create_emc_node,@@_insert_emc_node}
% This insert the emc node. We support also dvips and dvipdfmx backend
% \begin{macrocode}
local @@_backend_create_emc_node
if tex.outputmode == 0 then
if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
function @@_backend_create_emc_node ()
local emcnode = nodenew("whatsit","special")
emcnode.data = "pdf:code EMC"
return emcnode
end
else -- assume a dvips variant
function @@_backend_create_emc_node ()
local emcnode = nodenew("whatsit","special")
emcnode.data = "ps:SDict begin mark /EMC pdfmark end"
return emcnode
end
end
else -- pdf mode
function @@_backend_create_emc_node ()
local emcnode = nodenew("whatsit","pdf_literal")
emcnode.data = "EMC"
emcnode.mode=1
return emcnode
end
end
local function @@_insert_emc_node (head,current)
local emcnode= @@_backend_create_emc_node()
head = node.insert_before(head,current,emcnode)
return head
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{@@_backend_create_bmc_node,@@_insert_bmc_node}
% This inserts a simple bmc node
% \begin{macrocode}
local @@_backend_create_bmc_node
if tex.outputmode == 0 then
if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
function @@_backend_create_bmc_node (tag)
local bmcnode = nodenew("whatsit","special")
bmcnode.data = "pdf:code /"..tag.." BMC"
return bmcnode
end
else -- assume a dvips variant
function @@_backend_create_bmc_node (tag)
local bmcnode = nodenew("whatsit","special")
bmcnode.data = "ps:SDict begin mark/"..tag.." /BMC pdfmark end"
return bmcnode
end
end
else -- pdf mode
function @@_backend_create_bmc_node (tag)
local bmcnode = nodenew("whatsit","pdf_literal")
bmcnode.data = "/"..tag.." BMC"
bmcnode.mode=1
return bmcnode
end
end
local function @@_insert_bmc_node (head,current,tag)
local bmcnode = @@_backend_create_bmc_node (tag)
head = node.insert_before(head,current,bmcnode)
return head
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{@@_backend_create_bdc_node,@@_insert_bdc_node}
% This inserts a bcd node with a fix dict.
% TODO: check if this is still used, now that we create properties.
% \begin{macrocode}
local @@_backend_create_bdc_node
if tex.outputmode == 0 then
if token.get_macro("c_sys_backend_str") == "dvipdfmx" then
function @@_backend_create_bdc_node (tag,dict)
local bdcnode = nodenew("whatsit","special")
bdcnode.data = "pdf:code /"..tag.."<<"..dict..">> BDC"
return bdcnode
end
else -- assume a dvips variant
function @@_backend_create_bdc_node (tag,dict)
local bdcnode = nodenew("whatsit","special")
bdcnode.data = "ps:SDict begin mark/"..tag.."<<"..dict..">> /BDC pdfmark end"
return bdcnode
end
end
else -- pdf mode
function @@_backend_create_bdc_node (tag,dict)
local bdcnode = nodenew("whatsit","pdf_literal")
bdcnode.data = "/"..tag.."<<"..dict..">> BDC"
bdcnode.mode=1
return bdcnode
end
end
local function @@_insert_bdc_node (head,current,tag,dict)
bdcnode= @@_backend_create_bdc_node (tag,dict)
head = node.insert_before(head,current,bdcnode)
return head
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{@@_pdf_object_ref}
% This allows to reference a pdf object reserved with the l3pdf command by name.
% The return value is |n 0 R|, if the object doesn't exist, n is 0.
% \begin{macrocode}
local function @@_pdf_object_ref (name,index)
local object
if ltx.pdf.object_id then
object = ltx.pdf.object_id (name,index) ..' 0 R'
else
local tokenname = 'c__pdf_object_'..name..'/'..index..'_int'
object = token.create(tokenname).mode ..' 0 R'
end
return object
end
ltx.@@.func.pdf_object_ref = @@_pdf_object_ref
% \end{macrocode}
% \end{macro}
%
% \section{Function for the real space chars}
% \begin{macro}{@@_show_spacemark}
% A debugging function, it is used to
% inserts red color markers in the places where space chars can go, it can have
% side effects so not always reliable, but ok.
% \begin{macrocode}
local function @@_show_spacemark (head,current,color,height)
local markcolor = color or "1 0 0"
local markheight = height or 10
local pdfstring
if tex.outputmode == 0 then
-- ignore dvi mode for now
else
pdfstring = node.new("whatsit","pdf_literal")
pdfstring.data =
string.format("q "..markcolor.." RG "..markcolor.." rg 0.4 w 0 %g m 0 %g l S Q",-3,markheight)
head = node.insert_after(head,current,pdfstring)
return head
end
end
% \end{macrocode}
% \end{macro}
% \begin{macro}{@@_fakespace,ltx.@@.func.fakespace}
% This is used to define a lua version of |\pdffakespace|
% \begin{macrocode}
local function @@_fakespace()
tex.setattribute(iwspaceattributeid,1)
tex.setattribute(iwfontattributeid,font.current())
end
ltx.@@.func.fakespace = @@_fakespace
% \end{macrocode}
% \end{macro}
% \begin{macro}{@@_mark_spaces}
% a function to mark up places where real space chars should be inserted.
% It only sets attributes, these are then be used in a later traversing
% which inserts the actual spaces.
% When space handling is activated this function is inserted in some callbacks.
% \begin{macrocode}
--[[ a function to mark up places where real space chars should be inserted
it only sets an attribute.
--]]
local function @@_mark_spaces (head)
local inside_math = false
for n in nodetraverse(head) do
local id = n.id
if id == GLYPH then
local glyph = n
default_currfontid = glyph.font
if glyph.next and (glyph.next.id == GLUE)
and not inside_math and (glyph.next.width >0)
then
nodesetattribute(glyph.next,iwspaceattributeid,1)
nodesetattribute(glyph.next,iwfontattributeid,glyph.font)
-- for debugging
if ltx.@@.trace.showspaces then
@@_show_spacemark (head,glyph)
end
elseif glyph.next and (glyph.next.id==KERN) and not inside_math then
local kern = glyph.next
if kern.next and (kern.next.id== GLUE) and (kern.next.width >0)
then
nodesetattribute(kern.next,iwspaceattributeid,1)
nodesetattribute(kern.next,iwfontattributeid,glyph.font)
end
end
-- look also back
if glyph.prev and (glyph.prev.id == GLUE)
and not inside_math
and (glyph.prev.width >0)
and not nodehasattribute(glyph.prev,iwspaceattributeid)
then
nodesetattribute(glyph.prev,iwspaceattributeid,1)
nodesetattribute(glyph.prev,iwfontattributeid,glyph.font)
-- for debugging
if ltx.@@.trace.showspaces then
@@_show_spacemark (head,glyph)
end
end
elseif id == PENALTY then
local glyph = n
-- ltx.@@.trace.log ("PENALTY ".. n.subtype.."VALUE"..n.penalty,3)
if glyph.next and (glyph.next.id == GLUE)
and not inside_math and (glyph.next.width >0) and n.subtype==0
then
nodesetattribute(glyph.next,iwspaceattributeid,1)
-- changed 2024-01-18, issue #72
nodesetattribute(glyph.next,iwfontattributeid,default_currfontid)
-- for debugging
if ltx.@@.trace.showspaces then
@@_show_spacemark (head,glyph)
end
end
elseif id == MATH then
inside_math = (n.subtype == 0)
end
end
return head
end
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% @@_activate_mark_space,
% ltx.@@.func.markspaceon,
% @@_activate_mark_space,
% ltx.@@.func.markspaceoff
% }
% These functions add/remove the function which marks the spaces to the callbacks
% |pre_linebreak_filter| and |hpack_filter|
% \begin{macrocode}
local function @@_activate_mark_space ()
if not luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
luatexbase.add_to_callback("pre_linebreak_filter",@@_mark_spaces,"markspaces")
luatexbase.add_to_callback("hpack_filter",@@_mark_spaces,"markspaces")
end
end
ltx.@@.func.markspaceon=@@_activate_mark_space
local function @@_deactivate_mark_space ()
if luatexbase.in_callback ("pre_linebreak_filter","markspaces") then
luatexbase.remove_from_callback("pre_linebreak_filter","markspaces")
luatexbase.remove_from_callback("hpack_filter","markspaces")
end
end
ltx.@@.func.markspaceoff=@@_deactivate_mark_space
% \end{macrocode}
% \end{macro}
% We need two local variable to setup a default space char.
% \begin{macrocode}
local default_space_char = nodenew(GLYPH)
local default_fontid = fontid("TU/lmr/m/n/10")
local default_currfontid = fontid("TU/lmr/m/n/10")
default_space_char.char = 32
default_space_char.font = default_fontid
% \end{macrocode}
% And a function to check as best as possible if a font has a space:
% \begin{macrocode}
local function @@_font_has_space (fontid)
t= fonts.hashes.identifiers[fontid]
if luaotfload.aux.slot_of_name(fontid,"space")
or t.characters and t.characters[32] and t.characters[32]["unicode"]==32
then
return true
else
return false
end
end
% \end{macrocode}
% \begin{macro}
% {
% @@_space_chars_shipout,
% ltx.@@.func.space_chars_shipout,
% }
% These is the main function to insert real space chars. It inserts a
% glyph before every glue which has been marked previously. The attributes
% are copied from the glue, so if the tagging is done later,
% it will be tagged like it.
% \begin{macrocode}
local function @@_space_chars_shipout (box)
local head = box.head
if head then
for n in node.traverse(head) do
local spaceattr = -1
if not nodehasattribute(n,iwspaceOffattributeid) then
spaceattr = nodegetattribute(n,iwspaceattributeid) or -1
end
if n.id == HLIST then -- enter the hlist
@@_space_chars_shipout (n)
elseif n.id == VLIST then -- enter the vlist
@@_space_chars_shipout (n)
elseif n.id == GLUE then
if ltx.@@.trace.showspaces and spaceattr==1 then
@@_show_spacemark (head,n,"0 1 0")
end
if spaceattr==1 then
local space
local space_char = node.copy(default_space_char)
local curfont = nodegetattribute(n,iwfontattributeid)
ltx.@@.trace.log ("INFO SPACE-FUNCTION-FONT: ".. tostring(curfont),3)
if curfont and
-- luaotfload.aux.slot_of_name(curfont,"space")
@@_font_has_space (curfont)
then
space_char.font=curfont
end
head, space = node.insert_before(head, n, space_char) --
n.width = n.width - space.width
space.attr = n.attr
end
end
end
box.head = head
end
end
function ltx.@@.func.space_chars_shipout (box)
@@_space_chars_shipout (box)
end
% \end{macrocode}
% \end{macro}
%
% \section{Function for the tagging}
% \begin{macro}
% {
% ltx.@@.func.mc_insert_kids
% }
% This is the main function to insert the
% K entry into a StructElem object. It is used in tagpdf-mc-luacode module.
% The |single| attribute allows to handle the case that a single
% mc on the tex side can have more than one kid after the processing here,
% and so we get the correct array/non array setup.
% \begin{macrocode}
function ltx.@@.func.mc_insert_kids (mcnum,single)
if ltx.@@.mc[mcnum] then
ltx.@@.trace.log("INFO TEX-MC-INSERT-KID-TEST: " .. mcnum,4)
if ltx.@@.mc[mcnum]["kids"] then
if #ltx.@@.mc[mcnum]["kids"] > 1 and single==1 then
tex.sprint("[")
end
for i,kidstable in ipairs( ltx.@@.mc[mcnum]["kids"] ) do
local kidnum = kidstable["kid"]
local kidpage = kidstable["page"]
local kidpageobjnum = pdfpageref(kidpage)
ltx.@@.trace.log("INFO TEX-MC-INSERT-KID: " .. mcnum ..