| File: | utils/libdom.c |
| Warning: | line 411, column 17 File position of the stream might be 'indeterminate' after a failed operation. Can cause undefined behavior |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | |||
| 2 | * Copyright 2012 Vincent Sanders <vince@netsurf-browser.org> | |||
| 3 | * | |||
| 4 | * This file is part of NetSurf, http://www.netsurf-browser.org/ | |||
| 5 | * | |||
| 6 | * NetSurf is free software; you can redistribute it and/or modify | |||
| 7 | * it under the terms of the GNU General Public License as published by | |||
| 8 | * the Free Software Foundation; version 2 of the License. | |||
| 9 | * | |||
| 10 | * NetSurf is distributed in the hope that it will be useful, | |||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| 13 | * GNU General Public License for more details. | |||
| 14 | * | |||
| 15 | * You should have received a copy of the GNU General Public License | |||
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |||
| 17 | */ | |||
| 18 | ||||
| 19 | /** \file | |||
| 20 | * libdom utilities (implementation). | |||
| 21 | */ | |||
| 22 | ||||
| 23 | #include <assert.h> | |||
| 24 | #include <string.h> | |||
| 25 | #include <dom/dom.h> | |||
| 26 | ||||
| 27 | #include "utils/config.h" | |||
| 28 | #include "utils/log.h" | |||
| 29 | #include "utils/libdom.h" | |||
| 30 | ||||
| 31 | ||||
| 32 | /* exported interface documented in libdom.h */ | |||
| 33 | dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name) | |||
| 34 | { | |||
| 35 | dom_node *element; | |||
| 36 | dom_exception exc; | |||
| 37 | dom_string *node_name = NULL((void*)0); | |||
| 38 | dom_node_type node_type; | |||
| 39 | dom_node *next_node; | |||
| 40 | ||||
| 41 | exc = dom_node_get_first_child(parent, &element)dom_node_get_first_child( (dom_node *) (parent), (dom_node ** ) (&element)); | |||
| 42 | if ((exc != DOM_NO_ERR) || (element == NULL((void*)0))) { | |||
| 43 | return NULL((void*)0); | |||
| 44 | } | |||
| 45 | ||||
| 46 | /* find first node thats a element */ | |||
| 47 | do { | |||
| 48 | exc = dom_node_get_node_type(element, &node_type)dom_node_get_node_type( (dom_node *) (element), (dom_node_type *) (&node_type)); | |||
| 49 | ||||
| 50 | if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) { | |||
| 51 | exc = dom_node_get_node_name(element, &node_name)dom_node_get_node_name((dom_node *) (element), (&node_name )); | |||
| 52 | if ((exc == DOM_NO_ERR) && (node_name != NULL((void*)0))) { | |||
| 53 | if (dom_string_caseless_lwc_isequal(node_name, | |||
| 54 | element_name)) { | |||
| 55 | dom_string_unref(node_name); | |||
| 56 | break; | |||
| 57 | } | |||
| 58 | dom_string_unref(node_name); | |||
| 59 | } | |||
| 60 | } | |||
| 61 | ||||
| 62 | exc = dom_node_get_next_sibling(element, &next_node)dom_node_get_next_sibling( (dom_node *) (element), (dom_node * *) (&next_node)); | |||
| 63 | dom_node_unref(element)dom_node_unref((dom_node *) (element)); | |||
| 64 | if (exc == DOM_NO_ERR) { | |||
| 65 | element = next_node; | |||
| 66 | } else { | |||
| 67 | element = NULL((void*)0); | |||
| 68 | } | |||
| 69 | } while (element != NULL((void*)0)); | |||
| 70 | ||||
| 71 | return element; | |||
| 72 | } | |||
| 73 | ||||
| 74 | /* exported interface documented in libdom.h */ | |||
| 75 | /* TODO: return appropriate errors */ | |||
| 76 | nserror libdom_iterate_child_elements(dom_node *parent, | |||
| 77 | libdom_iterate_cb cb, void *ctx) | |||
| 78 | { | |||
| 79 | dom_nodelist *children; | |||
| 80 | uint32_t index, num_children; | |||
| 81 | dom_exception error; | |||
| 82 | ||||
| 83 | error = dom_node_get_child_nodes(parent, &children)dom_node_get_child_nodes( (dom_node *) (parent), (struct dom_nodelist **) (&children)); | |||
| 84 | if (error != DOM_NO_ERR || children == NULL((void*)0)) | |||
| 85 | return NSERROR_NOMEM; | |||
| 86 | ||||
| 87 | error = dom_nodelist_get_length(children, &num_children); | |||
| 88 | if (error != DOM_NO_ERR) { | |||
| 89 | dom_nodelist_unref(children); | |||
| 90 | return NSERROR_NOMEM; | |||
| 91 | } | |||
| 92 | ||||
| 93 | for (index = 0; index < num_children; index++) { | |||
| 94 | dom_node *child; | |||
| 95 | dom_node_type type; | |||
| 96 | ||||
| 97 | error = dom_nodelist_item(children, index, &child)_dom_nodelist_item((dom_nodelist *) (children), (uint32_t) (index ), (dom_node **) (&child)); | |||
| 98 | if (error != DOM_NO_ERR) { | |||
| 99 | dom_nodelist_unref(children); | |||
| 100 | return NSERROR_NOMEM; | |||
| 101 | } | |||
| 102 | ||||
| 103 | error = dom_node_get_node_type(child, &type)dom_node_get_node_type( (dom_node *) (child), (dom_node_type * ) (&type)); | |||
| 104 | if (error == DOM_NO_ERR && type == DOM_ELEMENT_NODE) { | |||
| 105 | nserror err = cb(child, ctx); | |||
| 106 | if (err != NSERROR_OK) { | |||
| 107 | dom_node_unref(child)dom_node_unref((dom_node *) (child)); | |||
| 108 | dom_nodelist_unref(children); | |||
| 109 | return err; | |||
| 110 | } | |||
| 111 | } | |||
| 112 | ||||
| 113 | dom_node_unref(child)dom_node_unref((dom_node *) (child)); | |||
| 114 | } | |||
| 115 | ||||
| 116 | dom_nodelist_unref(children); | |||
| 117 | ||||
| 118 | return NSERROR_OK; | |||
| 119 | } | |||
| 120 | ||||
| 121 | /* exported interface documented in libdom.h */ | |||
| 122 | nserror libdom_hubbub_error_to_nserror(dom_hubbub_error error) | |||
| 123 | { | |||
| 124 | switch (error) { | |||
| 125 | ||||
| 126 | /* HUBBUB_REPROCESS is not handled here because it can | |||
| 127 | * never occur outside the hubbub treebuilder | |||
| 128 | */ | |||
| 129 | ||||
| 130 | case DOM_HUBBUB_OK: | |||
| 131 | /* parsed ok */ | |||
| 132 | return NSERROR_OK; | |||
| 133 | ||||
| 134 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED): | |||
| 135 | /* hubbub input paused */ | |||
| 136 | return NSERROR_OK; | |||
| 137 | ||||
| 138 | case DOM_HUBBUB_NOMEM: | |||
| 139 | /* out of memory error from DOM */ | |||
| 140 | return NSERROR_NOMEM; | |||
| 141 | ||||
| 142 | case DOM_HUBBUB_BADPARM: | |||
| 143 | /* Bad parameter passed to creation */ | |||
| 144 | return NSERROR_BAD_PARAMETER; | |||
| 145 | ||||
| 146 | case DOM_HUBBUB_DOM: | |||
| 147 | /* DOM call returned error */ | |||
| 148 | return NSERROR_DOM; | |||
| 149 | ||||
| 150 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE): | |||
| 151 | /* encoding changed */ | |||
| 152 | return NSERROR_ENCODING_CHANGE; | |||
| 153 | ||||
| 154 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NOMEM): | |||
| 155 | /* out of memory error from parser */ | |||
| 156 | return NSERROR_NOMEM; | |||
| 157 | ||||
| 158 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADPARM): | |||
| 159 | return NSERROR_BAD_PARAMETER; | |||
| 160 | ||||
| 161 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_INVALID): | |||
| 162 | return NSERROR_INVALID; | |||
| 163 | ||||
| 164 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_FILENOTFOUND): | |||
| 165 | return NSERROR_NOT_FOUND; | |||
| 166 | ||||
| 167 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NEEDDATA): | |||
| 168 | return NSERROR_NEED_DATA; | |||
| 169 | ||||
| 170 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADENCODING): | |||
| 171 | return NSERROR_BAD_ENCODING; | |||
| 172 | ||||
| 173 | case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_UNKNOWN): | |||
| 174 | /* currently only generated by the libdom hubbub binding */ | |||
| 175 | return NSERROR_DOM; | |||
| 176 | default: | |||
| 177 | /* unknown error */ | |||
| 178 | /** @todo better error handling and reporting */ | |||
| 179 | return NSERROR_UNKNOWN; | |||
| 180 | } | |||
| 181 | return NSERROR_UNKNOWN; | |||
| 182 | } | |||
| 183 | ||||
| 184 | ||||
| 185 | static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg, ...) | |||
| 186 | { | |||
| 187 | } | |||
| 188 | ||||
| 189 | ||||
| 190 | ||||
| 191 | /** | |||
| 192 | * Dump attribute/value for an element node | |||
| 193 | * | |||
| 194 | * \param node The element node to dump attribute details for | |||
| 195 | * \param f file handle to dump to. | |||
| 196 | * \param attribute The attribute to dump | |||
| 197 | * \return true on success, or false on error | |||
| 198 | */ | |||
| 199 | static bool_Bool dump_dom_element_attribute(dom_node *node, FILE *f, const char *attribute) | |||
| 200 | { | |||
| 201 | dom_exception exc; | |||
| 202 | dom_string *attr = NULL((void*)0); | |||
| 203 | dom_string *attr_value = NULL((void*)0); | |||
| 204 | dom_node_type type; | |||
| 205 | const char *string; | |||
| 206 | size_t length; | |||
| 207 | ||||
| 208 | /* Should only have element nodes here */ | |||
| 209 | exc = dom_node_get_node_type(node, &type)dom_node_get_node_type( (dom_node *) (node), (dom_node_type * ) (&type)); | |||
| 210 | if (exc != DOM_NO_ERR) { | |||
| 211 | fprintf(f, " Exception raised for node_get_node_type\n"); | |||
| 212 | return false0; | |||
| 213 | } | |||
| 214 | assert(type == DOM_ELEMENT_NODE)((type == DOM_ELEMENT_NODE) ? (void) (0) : __assert_fail ("type == DOM_ELEMENT_NODE" , "utils/libdom.c", 214, __extension__ __PRETTY_FUNCTION__)); | |||
| 215 | ||||
| 216 | /* Create a dom_string containing required attribute name. */ | |||
| 217 | exc = dom_string_create_interned((uint8_t *)attribute, | |||
| 218 | strlen(attribute), &attr); | |||
| 219 | if (exc != DOM_NO_ERR) { | |||
| 220 | fprintf(f, " Exception raised for dom_string_create\n"); | |||
| 221 | return false0; | |||
| 222 | } | |||
| 223 | ||||
| 224 | /* Get class attribute's value */ | |||
| 225 | exc = dom_element_get_attribute(node, attr, &attr_value)dom_element_get_attribute( (dom_element *) (node), (attr), (& attr_value)); | |||
| 226 | if (exc != DOM_NO_ERR) { | |||
| 227 | fprintf(f, " Exception raised for element_get_attribute\n"); | |||
| 228 | dom_string_unref(attr); | |||
| 229 | return false0; | |||
| 230 | } else if (attr_value == NULL((void*)0)) { | |||
| 231 | /* Element lacks required attribute */ | |||
| 232 | dom_string_unref(attr); | |||
| 233 | return true1; | |||
| 234 | } | |||
| 235 | ||||
| 236 | /* Finished with the attr dom_string */ | |||
| 237 | dom_string_unref(attr); | |||
| 238 | ||||
| 239 | /* Get attribute value's string data */ | |||
| 240 | string = dom_string_data(attr_value); | |||
| 241 | length = dom_string_byte_length(attr_value); | |||
| 242 | ||||
| 243 | /* Print attribute info */ | |||
| 244 | fprintf(f, " %s=\"%.*s\"", attribute, (int)length, string); | |||
| 245 | ||||
| 246 | /* Finished with the attr_value dom_string */ | |||
| 247 | dom_string_unref(attr_value); | |||
| 248 | ||||
| 249 | return true1; | |||
| 250 | } | |||
| 251 | ||||
| 252 | ||||
| 253 | /** | |||
| 254 | * Print a line in a DOM structure dump for an element | |||
| 255 | * | |||
| 256 | * \param node The node to dump | |||
| 257 | * \param f file handle to dump to. | |||
| 258 | * \param depth The node's depth | |||
| 259 | * \return true on success, or false on error | |||
| 260 | */ | |||
| 261 | static bool_Bool dump_dom_element(dom_node *node, FILE *f, int depth) | |||
| 262 | { | |||
| 263 | dom_exception exc; | |||
| 264 | dom_string *node_name = NULL((void*)0); | |||
| 265 | dom_node_type type; | |||
| 266 | int i; | |||
| 267 | const char *string; | |||
| 268 | size_t length; | |||
| 269 | ||||
| 270 | /* Only interested in element nodes */ | |||
| 271 | exc = dom_node_get_node_type(node, &type)dom_node_get_node_type( (dom_node *) (node), (dom_node_type * ) (&type)); | |||
| 272 | if (exc != DOM_NO_ERR) { | |||
| 273 | fprintf(f, "Exception raised for node_get_node_type\n"); | |||
| 274 | return false0; | |||
| 275 | } else if (type != DOM_ELEMENT_NODE) { | |||
| 276 | /* Nothing to print */ | |||
| 277 | return true1; | |||
| 278 | } | |||
| 279 | ||||
| 280 | /* Get element name */ | |||
| 281 | exc = dom_node_get_node_name(node, &node_name)dom_node_get_node_name((dom_node *) (node), (&node_name)); | |||
| 282 | if (exc != DOM_NO_ERR) { | |||
| 283 | fprintf(f, "Exception raised for get_node_name\n"); | |||
| 284 | return false0; | |||
| 285 | } else if (node_name == NULL((void*)0)) { | |||
| 286 | fprintf(f, "Broken: root_name == NULL\n"); | |||
| 287 | return false0; | |||
| 288 | } | |||
| 289 | ||||
| 290 | /* Print ASCII tree structure for current node */ | |||
| 291 | if (depth > 0) { | |||
| 292 | for (i = 0; i < depth; i++) { | |||
| 293 | fprintf(f, "| "); | |||
| 294 | } | |||
| 295 | fprintf(f, "+-"); | |||
| 296 | } | |||
| 297 | ||||
| 298 | /* Get string data and print element name */ | |||
| 299 | string = dom_string_data(node_name); | |||
| 300 | length = dom_string_byte_length(node_name); | |||
| 301 | fprintf(f, "[%.*s]", (int)length, string); | |||
| 302 | ||||
| 303 | if (length == 5 && strncmp(string, "title", 5) == 0) { | |||
| 304 | /* Title tag, gather the title */ | |||
| 305 | dom_string *str; | |||
| 306 | exc = dom_node_get_text_content(node, &str)dom_node_get_text_content( (dom_node *) (node), (&str)); | |||
| 307 | if (exc == DOM_NO_ERR && str != NULL((void*)0)) { | |||
| 308 | fprintf(f, " $%.*s$", (int)dom_string_byte_length(str), | |||
| 309 | dom_string_data(str)); | |||
| 310 | dom_string_unref(str); | |||
| 311 | } | |||
| 312 | } | |||
| 313 | ||||
| 314 | /* Finished with the node_name dom_string */ | |||
| 315 | dom_string_unref(node_name); | |||
| 316 | ||||
| 317 | /* Print the element's id & class, if it has them */ | |||
| 318 | if (dump_dom_element_attribute(node, f, "id") == false0 || | |||
| 319 | dump_dom_element_attribute(node, f, "class") == false0) { | |||
| 320 | /* Error occured */ | |||
| 321 | fprintf(f, "\n"); | |||
| 322 | return false0; | |||
| 323 | } | |||
| 324 | ||||
| 325 | fprintf(f, "\n"); | |||
| 326 | return true1; | |||
| 327 | } | |||
| 328 | ||||
| 329 | ||||
| 330 | /* exported interface documented in libdom.h */ | |||
| 331 | nserror libdom_dump_structure(dom_node *node, FILE *f, int depth) | |||
| 332 | { | |||
| 333 | dom_exception exc; | |||
| 334 | dom_node *child; | |||
| 335 | nserror ret; | |||
| 336 | dom_node *next_child; | |||
| 337 | ||||
| 338 | /* Print this node's entry */ | |||
| 339 | if (dump_dom_element(node, f, depth) == false0) { | |||
| 340 | /* There was an error; return */ | |||
| 341 | return NSERROR_DOM; | |||
| 342 | } | |||
| 343 | ||||
| 344 | /* Get the node's first child */ | |||
| 345 | exc = dom_node_get_first_child(node, &child)dom_node_get_first_child( (dom_node *) (node), (dom_node **) ( &child)); | |||
| 346 | if (exc != DOM_NO_ERR) { | |||
| 347 | fprintf(f, "Exception raised for node_get_first_child\n"); | |||
| 348 | return NSERROR_DOM; | |||
| 349 | } else if (child != NULL((void*)0)) { | |||
| 350 | /* node has children; decend to children's depth */ | |||
| 351 | depth++; | |||
| 352 | ||||
| 353 | /* Loop though all node's children */ | |||
| 354 | do { | |||
| 355 | /* Visit node's descendents */ | |||
| 356 | ret = libdom_dump_structure(child, f, depth); | |||
| 357 | if (ret !=NSERROR_OK) { | |||
| 358 | /* There was an error; return */ | |||
| 359 | dom_node_unref(child)dom_node_unref((dom_node *) (child)); | |||
| 360 | return NSERROR_DOM; | |||
| 361 | } | |||
| 362 | ||||
| 363 | /* Go to next sibling */ | |||
| 364 | exc = dom_node_get_next_sibling(child, &next_child)dom_node_get_next_sibling( (dom_node *) (child), (dom_node ** ) (&next_child)); | |||
| 365 | if (exc != DOM_NO_ERR) { | |||
| 366 | fprintf(f, "Exception raised for node_get_next_sibling\n"); | |||
| 367 | dom_node_unref(child)dom_node_unref((dom_node *) (child)); | |||
| 368 | return NSERROR_DOM; | |||
| 369 | } | |||
| 370 | ||||
| 371 | dom_node_unref(child)dom_node_unref((dom_node *) (child)); | |||
| 372 | child = next_child; | |||
| 373 | } while (child != NULL((void*)0)); /* No more children */ | |||
| 374 | } | |||
| 375 | ||||
| 376 | return NSERROR_OK; | |||
| 377 | } | |||
| 378 | ||||
| 379 | ||||
| 380 | /* exported interface documented in libdom.h */ | |||
| 381 | nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc) | |||
| ||||
| 382 | { | |||
| 383 | dom_hubbub_parser_params parse_params; | |||
| 384 | dom_hubbub_error error; | |||
| 385 | dom_hubbub_parser *parser; | |||
| 386 | dom_document *document; | |||
| 387 | FILE *fp = NULL((void*)0); | |||
| 388 | #define BUF_SIZE512 512 | |||
| 389 | uint8_t buf[BUF_SIZE512]; | |||
| 390 | ||||
| 391 | fp = fopen(filename, "r"); | |||
| 392 | if (fp
| |||
| 393 | return NSERROR_NOT_FOUND; | |||
| 394 | } | |||
| 395 | ||||
| 396 | parse_params.enc = encoding; | |||
| 397 | parse_params.fix_enc = false0; | |||
| 398 | parse_params.enable_script = false0; | |||
| 399 | parse_params.msg = ignore_dom_msg; | |||
| 400 | parse_params.script = NULL((void*)0); | |||
| 401 | parse_params.ctx = NULL((void*)0); | |||
| 402 | parse_params.daf = NULL((void*)0); | |||
| 403 | ||||
| 404 | error = dom_hubbub_parser_create(&parse_params, &parser, &document); | |||
| 405 | if (error != DOM_HUBBUB_OK) { | |||
| 406 | fclose(fp); | |||
| 407 | return libdom_hubbub_error_to_nserror(error); | |||
| 408 | } | |||
| 409 | ||||
| 410 | while (feof(fp) == 0) { | |||
| 411 | size_t read = fread(buf, sizeof(buf[0]), BUF_SIZE512, fp); | |||
| ||||
| 412 | ||||
| 413 | error = dom_hubbub_parser_parse_chunk(parser, buf, read); | |||
| 414 | if (error != DOM_HUBBUB_OK) { | |||
| 415 | dom_node_unref(document)dom_node_unref((dom_node *) (document)); | |||
| 416 | dom_hubbub_parser_destroy(parser); | |||
| 417 | fclose(fp); | |||
| 418 | return NSERROR_DOM; | |||
| 419 | } | |||
| 420 | } | |||
| 421 | ||||
| 422 | error = dom_hubbub_parser_completed(parser); | |||
| 423 | if (error != DOM_HUBBUB_OK) { | |||
| 424 | dom_node_unref(document)dom_node_unref((dom_node *) (document)); | |||
| 425 | dom_hubbub_parser_destroy(parser); | |||
| 426 | fclose(fp); | |||
| 427 | return libdom_hubbub_error_to_nserror(error); | |||
| 428 | } | |||
| 429 | ||||
| 430 | dom_hubbub_parser_destroy(parser); | |||
| 431 | fclose(fp); | |||
| 432 | ||||
| 433 | *doc = document; | |||
| 434 | return NSERROR_OK; | |||
| 435 | } |