NetSurf
libdom.c
Go to the documentation of this file.
1/*
2 * Copyright 2012 Vincent Sanders <vince@netsurf-browser.org>
3 *
4 * This file is part of NetSurf, http://www.netsurf-browser.org/
5 *
6 * NetSurf is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * NetSurf is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19/** \file
20 * libdom utilities (implementation).
21 */
22
23#include <assert.h>
24#include <string.h>
25#include <dom/dom.h>
26
27#include "utils/config.h"
28#include "utils/log.h"
29#include "utils/libdom.h"
30
31
32/* exported interface documented in libdom.h */
33dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name)
34{
35 dom_node *element;
36 dom_exception exc;
37 dom_string *node_name = NULL;
38 dom_node_type node_type;
39 dom_node *next_node;
40
41 exc = dom_node_get_first_child(parent, &element);
42 if ((exc != DOM_NO_ERR) || (element == NULL)) {
43 return NULL;
44 }
45
46 /* find first node thats a element */
47 do {
48 exc = dom_node_get_node_type(element, &node_type);
49
50 if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) {
51 exc = dom_node_get_node_name(element, &node_name);
52 if ((exc == DOM_NO_ERR) && (node_name != NULL)) {
53 if (dom_string_caseless_lwc_isequal(node_name,
54 element_name)) {
55 dom_string_unref(node_name);
56 break;
57 }
58 dom_string_unref(node_name);
59 }
60 }
61
62 exc = dom_node_get_next_sibling(element, &next_node);
63 dom_node_unref(element);
64 if (exc == DOM_NO_ERR) {
65 element = next_node;
66 } else {
67 element = NULL;
68 }
69 } while (element != NULL);
70
71 return element;
72}
73
74/* exported interface documented in libdom.h */
75/* TODO: return appropriate errors */
77 libdom_iterate_cb cb, void *ctx)
78{
79 dom_nodelist *children;
80 uint32_t index, num_children;
81 dom_exception error;
82
83 error = dom_node_get_child_nodes(parent, &children);
84 if (error != DOM_NO_ERR || children == NULL)
85 return NSERROR_NOMEM;
86
87 error = dom_nodelist_get_length(children, &num_children);
88 if (error != DOM_NO_ERR) {
89 dom_nodelist_unref(children);
90 return NSERROR_NOMEM;
91 }
92
93 for (index = 0; index < num_children; index++) {
94 dom_node *child;
95 dom_node_type type;
96
97 error = dom_nodelist_item(children, index, &child);
98 if (error != DOM_NO_ERR) {
99 dom_nodelist_unref(children);
100 return NSERROR_NOMEM;
101 }
102
103 error = dom_node_get_node_type(child, &type);
104 if (error == DOM_NO_ERR && type == DOM_ELEMENT_NODE) {
105 nserror err = cb(child, ctx);
106 if (err != NSERROR_OK) {
107 dom_node_unref(child);
108 dom_nodelist_unref(children);
109 return err;
110 }
111 }
112
113 dom_node_unref(child);
114 }
115
116 dom_nodelist_unref(children);
117
118 return NSERROR_OK;
119}
120
121/* exported interface documented in libdom.h */
123{
124 switch (error) {
125
126 /* HUBBUB_REPROCESS is not handled here because it can
127 * never occur outside the hubbub treebuilder
128 */
129
130 case DOM_HUBBUB_OK:
131 /* parsed ok */
132 return NSERROR_OK;
133
134 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED):
135 /* hubbub input paused */
136 return NSERROR_OK;
137
138 case DOM_HUBBUB_NOMEM:
139 /* out of memory error from DOM */
140 return NSERROR_NOMEM;
141
142 case DOM_HUBBUB_BADPARM:
143 /* Bad parameter passed to creation */
145
146 case DOM_HUBBUB_DOM:
147 /* DOM call returned error */
148 return NSERROR_DOM;
149
150 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE):
151 /* encoding changed */
153
154 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NOMEM):
155 /* out of memory error from parser */
156 return NSERROR_NOMEM;
157
158 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADPARM):
160
161 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_INVALID):
162 return NSERROR_INVALID;
163
164 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_FILENOTFOUND):
165 return NSERROR_NOT_FOUND;
166
167 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NEEDDATA):
168 return NSERROR_NEED_DATA;
169
170 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADENCODING):
172
173 case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_UNKNOWN):
174 /* currently only generated by the libdom hubbub binding */
175 return NSERROR_DOM;
176 default:
177 /* unknown error */
178 /** @todo better error handling and reporting */
179 return NSERROR_UNKNOWN;
180 }
181 return NSERROR_UNKNOWN;
182}
183
184
185static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg, ...)
186{
187}
188
189
190
191/**
192 * Dump attribute/value for an element node
193 *
194 * \param node The element node to dump attribute details for
195 * \param f file handle to dump to.
196 * \param attribute The attribute to dump
197 * \return true on success, or false on error
198 */
199static bool dump_dom_element_attribute(dom_node *node, FILE *f, const char *attribute)
200{
201 dom_exception exc;
202 dom_string *attr = NULL;
203 dom_string *attr_value = NULL;
204 dom_node_type type;
205 const char *string;
206 size_t length;
207
208 /* Should only have element nodes here */
209 exc = dom_node_get_node_type(node, &type);
210 if (exc != DOM_NO_ERR) {
211 fprintf(f, " Exception raised for node_get_node_type\n");
212 return false;
213 }
214 assert(type == DOM_ELEMENT_NODE);
215
216 /* Create a dom_string containing required attribute name. */
217 exc = dom_string_create_interned((uint8_t *)attribute,
218 strlen(attribute), &attr);
219 if (exc != DOM_NO_ERR) {
220 fprintf(f, " Exception raised for dom_string_create\n");
221 return false;
222 }
223
224 /* Get class attribute's value */
225 exc = dom_element_get_attribute(node, attr, &attr_value);
226 if (exc != DOM_NO_ERR) {
227 fprintf(f, " Exception raised for element_get_attribute\n");
228 dom_string_unref(attr);
229 return false;
230 } else if (attr_value == NULL) {
231 /* Element lacks required attribute */
232 dom_string_unref(attr);
233 return true;
234 }
235
236 /* Finished with the attr dom_string */
237 dom_string_unref(attr);
238
239 /* Get attribute value's string data */
240 string = dom_string_data(attr_value);
241 length = dom_string_byte_length(attr_value);
242
243 /* Print attribute info */
244 fprintf(f, " %s=\"%.*s\"", attribute, (int)length, string);
245
246 /* Finished with the attr_value dom_string */
247 dom_string_unref(attr_value);
248
249 return true;
250}
251
252
253/**
254 * Print a line in a DOM structure dump for an element
255 *
256 * \param node The node to dump
257 * \param f file handle to dump to.
258 * \param depth The node's depth
259 * \return true on success, or false on error
260 */
261static bool dump_dom_element(dom_node *node, FILE *f, int depth)
262{
263 dom_exception exc;
264 dom_string *node_name = NULL;
265 dom_node_type type;
266 int i;
267 const char *string;
268 size_t length;
269
270 /* Only interested in element nodes */
271 exc = dom_node_get_node_type(node, &type);
272 if (exc != DOM_NO_ERR) {
273 fprintf(f, "Exception raised for node_get_node_type\n");
274 return false;
275 } else if (type != DOM_ELEMENT_NODE) {
276 /* Nothing to print */
277 return true;
278 }
279
280 /* Get element name */
281 exc = dom_node_get_node_name(node, &node_name);
282 if (exc != DOM_NO_ERR) {
283 fprintf(f, "Exception raised for get_node_name\n");
284 return false;
285 } else if (node_name == NULL) {
286 fprintf(f, "Broken: root_name == NULL\n");
287 return false;
288 }
289
290 /* Print ASCII tree structure for current node */
291 if (depth > 0) {
292 for (i = 0; i < depth; i++) {
293 fprintf(f, "| ");
294 }
295 fprintf(f, "+-");
296 }
297
298 /* Get string data and print element name */
299 string = dom_string_data(node_name);
300 length = dom_string_byte_length(node_name);
301 fprintf(f, "[%.*s]", (int)length, string);
302
303 if (length == 5 && strncmp(string, "title", 5) == 0) {
304 /* Title tag, gather the title */
305 dom_string *str;
306 exc = dom_node_get_text_content(node, &str);
307 if (exc == DOM_NO_ERR && str != NULL) {
308 fprintf(f, " $%.*s$", (int)dom_string_byte_length(str),
309 dom_string_data(str));
310 dom_string_unref(str);
311 }
312 }
313
314 /* Finished with the node_name dom_string */
315 dom_string_unref(node_name);
316
317 /* Print the element's id & class, if it has them */
318 if (dump_dom_element_attribute(node, f, "id") == false ||
319 dump_dom_element_attribute(node, f, "class") == false) {
320 /* Error occured */
321 fprintf(f, "\n");
322 return false;
323 }
324
325 fprintf(f, "\n");
326 return true;
327}
328
329
330/* exported interface documented in libdom.h */
331nserror libdom_dump_structure(dom_node *node, FILE *f, int depth)
332{
333 dom_exception exc;
334 dom_node *child;
335 nserror ret;
336 dom_node *next_child;
337
338 /* Print this node's entry */
339 if (dump_dom_element(node, f, depth) == false) {
340 /* There was an error; return */
341 return NSERROR_DOM;
342 }
343
344 /* Get the node's first child */
345 exc = dom_node_get_first_child(node, &child);
346 if (exc != DOM_NO_ERR) {
347 fprintf(f, "Exception raised for node_get_first_child\n");
348 return NSERROR_DOM;
349 } else if (child != NULL) {
350 /* node has children; decend to children's depth */
351 depth++;
352
353 /* Loop though all node's children */
354 do {
355 /* Visit node's descendents */
356 ret = libdom_dump_structure(child, f, depth);
357 if (ret !=NSERROR_OK) {
358 /* There was an error; return */
359 dom_node_unref(child);
360 return NSERROR_DOM;
361 }
362
363 /* Go to next sibling */
364 exc = dom_node_get_next_sibling(child, &next_child);
365 if (exc != DOM_NO_ERR) {
366 fprintf(f, "Exception raised for node_get_next_sibling\n");
367 dom_node_unref(child);
368 return NSERROR_DOM;
369 }
370
371 dom_node_unref(child);
372 child = next_child;
373 } while (child != NULL); /* No more children */
374 }
375
376 return NSERROR_OK;
377}
378
379
380/* exported interface documented in libdom.h */
381nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc)
382{
383 dom_hubbub_parser_params parse_params;
384 dom_hubbub_error error;
385 dom_hubbub_parser *parser;
386 dom_document *document;
387 FILE *fp = NULL;
388#define BUF_SIZE 512
389 uint8_t buf[BUF_SIZE];
390
391 fp = fopen(filename, "r");
392 if (fp == NULL) {
393 return NSERROR_NOT_FOUND;
394 }
395
396 parse_params.enc = encoding;
397 parse_params.fix_enc = false;
398 parse_params.enable_script = false;
399 parse_params.msg = ignore_dom_msg;
400 parse_params.script = NULL;
401 parse_params.ctx = NULL;
402 parse_params.daf = NULL;
403
404 error = dom_hubbub_parser_create(&parse_params, &parser, &document);
405 if (error != DOM_HUBBUB_OK) {
406 fclose(fp);
407 return libdom_hubbub_error_to_nserror(error);
408 }
409
410 while (feof(fp) == 0) {
411 size_t read = fread(buf, sizeof(buf[0]), BUF_SIZE, fp);
412
413 error = dom_hubbub_parser_parse_chunk(parser, buf, read);
414 if (error != DOM_HUBBUB_OK) {
415 dom_node_unref(document);
416 dom_hubbub_parser_destroy(parser);
417 fclose(fp);
418 return NSERROR_DOM;
419 }
420 }
421
422 error = dom_hubbub_parser_completed(parser);
423 if (error != DOM_HUBBUB_OK) {
424 dom_node_unref(document);
425 dom_hubbub_parser_destroy(parser);
426 fclose(fp);
427 return libdom_hubbub_error_to_nserror(error);
428 }
429
430 dom_hubbub_parser_destroy(parser);
431 fclose(fp);
432
433 *doc = document;
434 return NSERROR_OK;
435}
static dom_node * next_node(dom_node *n, html_content *content, bool convert_children)
Find the next node in the DOM tree, completing element construction where appropriate.
wimp_w parent
Definition: dialog.c:88
nserror
Enumeration of error codes.
Definition: errors.h:29
@ NSERROR_NOT_FOUND
Requested item not found.
Definition: errors.h:34
@ NSERROR_BAD_ENCODING
The character set is unknown.
Definition: errors.h:45
@ NSERROR_DOM
DOM call returned error.
Definition: errors.h:52
@ NSERROR_BAD_PARAMETER
Bad Parameter.
Definition: errors.h:48
@ NSERROR_NEED_DATA
More data needed.
Definition: errors.h:46
@ NSERROR_UNKNOWN
Unknown error - DO NOT USE.
Definition: errors.h:31
@ NSERROR_ENCODING_CHANGE
The character changed.
Definition: errors.h:47
@ NSERROR_INVALID
Invalid data.
Definition: errors.h:49
@ NSERROR_NOMEM
Memory exhaustion.
Definition: errors.h:32
@ NSERROR_OK
No error.
Definition: errors.h:30
const char * type
Definition: filetype.cpp:44
nserror libdom_iterate_child_elements(dom_node *parent, libdom_iterate_cb cb, void *ctx)
Definition: libdom.c:76
dom_node * libdom_find_first_element(dom_node *parent, lwc_string *element_name)
Search children of a node for first named element.
Definition: libdom.c:33
nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc)
Definition: libdom.c:381
#define BUF_SIZE
static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg,...)
Definition: libdom.c:185
nserror libdom_hubbub_error_to_nserror(dom_hubbub_error error)
Convert libdom hubbub binding errors to nserrors.
Definition: libdom.c:122
nserror libdom_dump_structure(dom_node *node, FILE *f, int depth)
Walk though a DOM (sub)tree, in depth first order, printing DOM structure.
Definition: libdom.c:331
static bool dump_dom_element_attribute(dom_node *node, FILE *f, const char *attribute)
Dump attribute/value for an element node.
Definition: libdom.c:199
static bool dump_dom_element(dom_node *node, FILE *f, int depth)
Print a line in a DOM structure dump for an element.
Definition: libdom.c:261
libdom utilities (implementation).
nserror(* libdom_iterate_cb)(dom_node *node, void *ctx)
Definition: libdom.h:44
static css_error node_name(void *pw, void *node, css_qname *qname)
Callback to retrieve a node's name.
Definition: select.c:373
Interface to utility string handling.