-
Notifications
You must be signed in to change notification settings - Fork 2
/
tdomselect
executable file
·166 lines (142 loc) · 3.7 KB
/
tdomselect
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/tclsh
# -*- Tcl -*-
package require tdom
package require cmdline
# Ex. To list up reused passwords:
# tdomselect tmp/keepass.xml //entry list password username url title|sort|uniq
# tdomselect foo.docx
# tdomselect foo.docx //w:t
# tdomselect foo.docx //w:t toXPath
# tdomselect foo.docx //w:t 'n {list [$n toXPath] [$n asText]}'
proc usage {{rc 1}} {
set prog [file tail $::argv0]
puts stderr "Usage: $prog -m asText xmlfile xpath ?sub-xpath...?"
exit $rc
}
proc open_chan {fn args} {
set fh [open $fn]
if {[llength $args]} {
fconfigure $fh {*}$args
}
set fh
}
proc read_enc {fn args} {
set fh [open_chan $fn {*}$args]
set data [read $fh]
close $fh
set data
}
#========================================
catch {
package require Tclx
signal trap SIGPIPE exit
}
set opts [cmdline::getoptions argv {
{encoding.arg utf-8 "input file encoding"}
{debug no "debug"}
{parser.arg {} "parser options"}
}]
proc tdom_open {fn opts} {
set ext [string tolower [file extension $fn]]
set opener tdom_open$ext
if {[llength [info commands $opener]]} {
$opener $fn $opts
} else {
list -channel [open_chan $fn -encoding [dict get $opts encoding]]
}
}
proc tdom_open.html {fn opts} {
set data [read_enc $fn -encoding [dict get $opts encoding]]
list $data
}
proc tdom_open.docx {fn opts} {
list -channel [open_chan [list | unzip -qc $fn word/document.xml] \
-encoding utf-8]
}
proc tdom_open.odt {fn opts} {
list -channel [open_chan [list | unzip -qc $fn content.xml] \
-encoding utf-8]
}
proc tdom_file_apply {opts fn method nodeVar cmdBody {emitCmd puts} args} {
upvar 1 $nodeVar node
set fd_opts [tdom_open $fn $opts]
dom parse {*}$fd_opts doc
close [if {[lindex $fd_opts 0] eq "-channel"} {
lindex $fd_opts 1
} else {
lindex $fd_opts 0
}]
$doc documentElement root
set rc [catch {$root {*}$method} found]
if {$rc} {
puts stderr "dom selection error for '$method'\n $found"
exit 1
}
foreach node $found {
set res [{*}$cmdBody $node]
if {$emitCmd ne ""} {
{*}$emitCmd $res {*}$args
}
}
set found
}
proc tdom_map.list {tags node} {
set ls {}
foreach c $tags {
if {![regexp {^\./} $c]} {
set c "./$c"
}
foreach sub [$node selectNodes $c] {
lappend ls [regsub {\s+} [$sub asText] { }]
}
}
join $ls \t
}
# returns [list $method $nodeVar $cmdBody $emitCmd ...]
proc tdom_auto_command argList {
if {[llength $argList] == 1} {
lassign $argList xpath
list [list selectNodes $xpath] node \
[list apply [list node {$node asText}]]
} else {
set rest [lassign $argList xpath method]
if {[llength $method] == 1} {
lassign $method method
set nodeVar node
} else {
lassign $method nodeVar method
}
if {[llength [info commands [set cmdName tdom_map.$method]]]} {
list [list selectNodes $xpath] $nodeVar [list $cmdName $rest]
} elseif {[regexp {\s} $method]} {
list [list selectNodes $xpath] $nodeVar \
[list apply [list $nodeVar $method]]
} else {
list [list selectNodes $xpath] node \
[list apply [list node "\$node $method"]]
}
}
}
proc dict-default {dict key {default ""}} {
if {[dict exists $dict $key]} {
dict get $dict $key
} else {
set default
}
}
if {![llength $argv]} {
usage
}
set rest [lassign $argv fn]
if {![llength $rest]} {
tdom_file_apply $opts $fn childNodes node [list apply {node {
$node toXPath
}}] puts
} else {
set cmd [tdom_auto_command $rest]
if {[dict get $opts debug]} {
puts stderr cmd=$cmd
}
set found [tdom_file_apply $opts $fn {*}$cmd]
exit [expr {[llength $found] == 0}]
}