-
Notifications
You must be signed in to change notification settings - Fork 38
/
util.c
237 lines (211 loc) · 7.5 KB
/
util.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
#include "util.h"
static bool parsley_exslt_registered = false;
#define BUF 128
FILE* parsley_fopen(char* name, char* mode) {
FILE* fo;
if(!strcmp("-", name)) {
if(!strcmp("w", mode)) {
fo = stdout;
} else {
fo = stdin;
}
} else {
fo = fopen(name, mode);
}
if(fo == NULL) {
fprintf(stderr, "Cannot open file %s, error %d, %s\n", name, errno, strerror(errno));
exit(1);
}
return fo;
}
static int parsley_io_mode = 0;
static char *parsley_user_agent_header = NULL;
int
parsley_io_get_mode() {
return parsley_io_mode;
}
static xsltStylesheetPtr span_wrap_sheet = NULL;
xmlDocPtr
parsley_apply_span_wrap(xmlDocPtr doc) {
if(span_wrap_sheet == NULL) {
char * sheet = "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\" xmlns:sg=\"http://selectorgadget.com/\"> \
<xsl:template match=\"text()[(following-sibling::* or preceding-sibling::*) and normalize-space(.) != '']\"> \
<sg_wrap><xsl:value-of select=\".\" /></sg_wrap> \
</xsl:template> \
<xsl:template match=\"@*|node()\"> \
<xsl:copy> \
<xsl:apply-templates select=\"@*|node()\"/> \
</xsl:copy> \
</xsl:template> \
</xsl:stylesheet>";
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, sheet, strlen(sheet), NULL, NULL, 0);
span_wrap_sheet = xsltParseStylesheetDoc(xml);
}
xsltTransformContextPtr ctxt = xsltNewTransformContext(span_wrap_sheet, doc);
xmlSetGenericErrorFunc(ctxt, parsleyXsltError);
xmlDocPtr out = xsltApplyStylesheetUser(span_wrap_sheet, doc, NULL, NULL, NULL, ctxt);
xsltFreeTransformContext(ctxt);
return out;
}
void
_parsley_set_user_agent(char * agent) {
if(parsley_user_agent_header != NULL) free(parsley_user_agent_header);
if(agent == NULL) {
parsley_user_agent_header = NULL;
} else {
asprintf(&parsley_user_agent_header, "User-Agent: %s\n", agent);
}
}
static void *
xmlUserAgentIOHTTPOpen(const char * file_name) {
return (void *)(xmlNanoHTTPMethod(file_name, NULL, NULL, NULL, parsley_user_agent_header, 0));
}
void
parsley_io_set_mode(int mode) {
if(mode == parsley_io_mode) return;
parsley_io_mode = mode;
xmlCleanupInputCallbacks();
if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_LOCAL) {
xmlRegisterInputCallbacks(xmlFileMatch, xmlFileOpen,
xmlFileRead, xmlFileClose);
#ifdef HAVE_ZLIB_H
xmlRegisterInputCallbacks(xmlGzfileMatch, xmlGzfileOpen,
xmlGzfileRead, xmlGzfileClose);
#endif /* HAVE_ZLIB_H */
}
if(parsley_io_mode & PARSLEY_OPTIONS_ALLOW_NET) {
#ifdef LIBXML_HTTP_ENABLED
xmlRegisterInputCallbacks(xmlIOHTTPMatch, xmlUserAgentIOHTTPOpen,
xmlIOHTTPRead, xmlIOHTTPClose);
#endif /* LIBXML_HTTP_ENABLED */
#ifdef LIBXML_FTP_ENABLED
xmlRegisterInputCallbacks(xmlIOFTPMatch, xmlIOFTPOpen,
xmlIOFTPRead, xmlIOFTPClose);
#endif /* LIBXML_FTP_ENABLED */
}
}
void
printbuf_file_read(FILE *f, struct printbuf *buf) {
char chars[BUF];
while(fgets(chars, BUF, f) != NULL){
sprintbuf(buf, "%s", chars);
}
}
void registerEXSLT() {
if(!parsley_exslt_registered) {
exsltRegisterAll();
parsley_register_all();
init_xpath_alias();
exslt_org_regular_expressions_init();
parsley_exslt_registered = true;
}
}
int parsley_key_flags(char* key) {
char* ptr = key;
char* last_alnum = key;
char* last_paren = key;
while(*ptr++ != '\0'){
if(isalnum(*ptr)) {
last_alnum = ptr;
} else if (*ptr == ')') {
last_paren = ptr;
}
}
ptr = (last_alnum > last_paren ? last_alnum : last_paren);
int flags = 0;
while(*ptr++ != '\0'){
switch(*ptr){
case '?':
flags |= PARSLEY_OPTIONAL;
break;
case '!':
flags |= PARSLEY_BANG;
break;
}
}
return flags;
}
char* parsley_key_tag(char* key) {
char *tag = strdup(key);
char *ptr = tag;
while(*ptr++ != '\0'){
if(!isalnum(*ptr) && *ptr != '_' && *ptr != '-') {
*ptr = 0;
return tag;
}
}
return tag;
}
pxpathPtr parsley_key_filter(char* key) {
char *expr = strdup(key);
char *ptr = expr;
char *orig = expr;
char *last_paren;
int offset = 0;
bool has_expr = false;
while(*ptr++ != '\0'){
if(!has_expr) offset++;
if(*ptr == '(') has_expr = true;
if(*ptr == ')') last_paren = ptr;
}
if(!has_expr) return NULL;
*last_paren = 0; // clip ")"
expr += offset + 1; // clip "("
pxpathPtr out = strlen(expr) == 0 ? NULL : myparse(expr);
free(orig);
// free(expr);
return out;
}
static xmlNodePtr
_xmlLastElementChild(xmlNodePtr node) {
xmlNodePtr child = node->children;
xmlNodePtr elem = NULL;
while(child != NULL) {
if(child->type == XML_ELEMENT_NODE) elem = child;
child = child->next;
}
return elem;
}
xmlNodePtr new_stylesheet_skeleton(char *incl) {
struct printbuf *buf = printbuf_new();
sprintbuf(buf, "%s", "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
sprintbuf(buf, "%s", " xmlns:lib=\"http://parselets.com/stdlib\"");
sprintbuf(buf, "%s", " xmlns:parsley=\"http://parselets.com/json\"");
sprintbuf(buf, "%s", " xmlns:str=\"http://exslt.org/strings\"");
sprintbuf(buf, "%s", " xmlns:set=\"http://exslt.org/sets\"");
sprintbuf(buf, "%s", " xmlns:math=\"http://exslt.org/math\"");
sprintbuf(buf, "%s", " xmlns:func=\"http://exslt.org/functions\"");
sprintbuf(buf, "%s", " xmlns:user=\"http://parselets.com/usre\"");
sprintbuf(buf, "%s", " xmlns:dyn=\"http://exslt.org/dynamic\"");
sprintbuf(buf, "%s", " xmlns:date=\"http://exslt.org/dates-and-times\"");
sprintbuf(buf, "%s", " xmlns:exsl=\"http://exslt.org/common\"");
sprintbuf(buf, "%s", " xmlns:saxon=\"http://icl.com/saxon\"");
sprintbuf(buf, "%s", " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
sprintbuf(buf, "%s", " xmlns:regex=\"http://exslt.org/regular-expressions\"");
sprintbuf(buf, "%s", " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
sprintbuf(buf, "%s", ">\n");
sprintbuf(buf, "%s", "<xsl:variable name=\"nbsp\"> </xsl:variable>\n");
sprintbuf(buf, "%s", "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
sprintbuf(buf, "%s", "<xsl:strip-space elements=\"*\"/>\n");
sprintbuf(buf, "%s", "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
sprintbuf(buf, "%s", "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
sprintbuf(buf, "%s", "<func:result select=\"$out\" /></func:function>");
sprintbuf(buf, "%s", "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
sprintbuf(buf, "%s", "<xsl:template match=\"script|style\" mode=\"innertext\"/>");
sprintbuf(buf, "%s", "<xsl:template match=\"br|address|blockquote|center|dir|div|form|h1|h2|h3|h4|h5|h6|hr|menu|noframes|noscript|p|pre|li|td|th|p\" mode=\"innertext\"><xsl:apply-templates mode=\"innertext\" /><xsl:text>\n</xsl:text></xsl:template>");
sprintbuf(buf, "%s\n", incl);
sprintbuf(buf, "%s\n", "<xsl:template match=\"/\">\n");
sprintbuf(buf, "%s\n", "<parsley:root />\n");
sprintbuf(buf, "%s\n", "</xsl:template>\n");
sprintbuf(buf, "%s\n", "</xsl:stylesheet>\n");
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://parselets.com/compiled", NULL, 3);
xmlFreeParserCtxt(ctxt);
printbuf_free(buf);
xmlNodePtr node = xmlDocGetRootElement(doc);
while(_xmlLastElementChild(node) != NULL) {
node = _xmlLastElementChild(node);
}
return node;
}