| File: | desktop/save_text.c |
| Warning: | line 306, column 31 Use of memory allocated with size zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* | ||||
| 2 | * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk> | ||||
| 3 | * Copyright 2008 Michael Drake <tlsa@netsurf-browser.org> | ||||
| 4 | * | ||||
| 5 | * This file is part of NetSurf, http://www.netsurf-browser.org/ | ||||
| 6 | * | ||||
| 7 | * NetSurf is free software; you can redistribute it and/or modify | ||||
| 8 | * it under the terms of the GNU General Public License as published by | ||||
| 9 | * the Free Software Foundation; version 2 of the License. | ||||
| 10 | * | ||||
| 11 | * NetSurf is distributed in the hope that it will be useful, | ||||
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
| 14 | * GNU General Public License for more details. | ||||
| 15 | * | ||||
| 16 | * You should have received a copy of the GNU General Public License | ||||
| 17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
| 18 | */ | ||||
| 19 | |||||
| 20 | /** \file | ||||
| 21 | * Text export of HTML (implementation). | ||||
| 22 | */ | ||||
| 23 | |||||
| 24 | #include <assert.h> | ||||
| 25 | #include <stdbool.h> | ||||
| 26 | #include <string.h> | ||||
| 27 | |||||
| 28 | #include <dom/dom.h> | ||||
| 29 | |||||
| 30 | #include "utils/config.h" | ||||
| 31 | #include "utils/log.h" | ||||
| 32 | #include "utils/utf8.h" | ||||
| 33 | #include "utils/utils.h" | ||||
| 34 | #include "netsurf/content.h" | ||||
| 35 | #include "html/box.h" | ||||
| 36 | #include "html/html_save.h" | ||||
| 37 | |||||
| 38 | #include "netsurf/utf8.h" | ||||
| 39 | #include "desktop/gui_internal.h" | ||||
| 40 | #include "desktop/save_text.h" | ||||
| 41 | |||||
| 42 | static void extract_text(struct box *box, bool_Bool *first, | ||||
| 43 | save_text_whitespace *before, struct save_text_state *save); | ||||
| 44 | static bool_Bool save_text_add_to_buffer(const char *text, size_t length, | ||||
| 45 | struct box *box, const char *whitespace_text, | ||||
| 46 | size_t whitespace_length, struct save_text_state *save); | ||||
| 47 | |||||
| 48 | |||||
| 49 | /** | ||||
| 50 | * Extract the text from an HTML content and save it as a text file. Text is | ||||
| 51 | * converted to the local encoding. | ||||
| 52 | * | ||||
| 53 | * \param c An HTML content. | ||||
| 54 | * \param path Path to save text file too. | ||||
| 55 | */ | ||||
| 56 | |||||
| 57 | void save_as_text(struct hlcache_handle *c, char *path) | ||||
| |||||
| 58 | { | ||||
| 59 | FILE *out; | ||||
| 60 | struct save_text_state save = { NULL((void*)0), 0, 0 }; | ||||
| 61 | save_text_whitespace before = WHITESPACE_NONE; | ||||
| 62 | bool_Bool first = true1; | ||||
| 63 | nserror ret; | ||||
| 64 | char *result; | ||||
| 65 | |||||
| 66 | if (!c || content_get_type(c) != CONTENT_HTML) { | ||||
| 67 | return; | ||||
| 68 | } | ||||
| 69 | |||||
| 70 | extract_text(html_get_box_tree(c), &first, &before, &save); | ||||
| 71 | if (!save.block) | ||||
| 72 | return; | ||||
| 73 | |||||
| 74 | ret = guit->utf8->utf8_to_local(save.block, save.length, &result); | ||||
| 75 | free(save.block); | ||||
| 76 | |||||
| 77 | if (ret != NSERROR_OK) { | ||||
| 78 | NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79 , }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d" , ret); } } while(0) | ||||
| 79 | "failed to convert to local encoding, return %d", ret)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79 , }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d" , ret); } } while(0); | ||||
| 80 | return; | ||||
| 81 | } | ||||
| 82 | |||||
| 83 | out = fopen(path, "w"); | ||||
| 84 | if (out) { | ||||
| 85 | int res = fputs(result, out); | ||||
| 86 | |||||
| 87 | if (res < 0) { | ||||
| 88 | NSLOG(netsurf, INFO, "Warning: write failed")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 88 , }; nslog__log(&_nslog_ctx, "Warning: write failed"); } } while(0); | ||||
| 89 | } | ||||
| 90 | |||||
| 91 | res = fputs("\n", out); | ||||
| 92 | if (res < 0) { | ||||
| 93 | NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94 , }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline" ); } } while(0) | ||||
| 94 | "Warning: failed writing trailing newline")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94 , }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline" ); } } while(0); | ||||
| 95 | } | ||||
| 96 | |||||
| 97 | fclose(out); | ||||
| 98 | } | ||||
| 99 | |||||
| 100 | free(result); | ||||
| 101 | } | ||||
| 102 | |||||
| 103 | |||||
| 104 | /** | ||||
| 105 | * Decide what whitespace to place before the next bit of content-related text | ||||
| 106 | * that is saved. Any existing whitespace is overridden if the whitespace for | ||||
| 107 | * this box is more "significant". | ||||
| 108 | * | ||||
| 109 | * \param box Pointer to box. | ||||
| 110 | * \param first Whether this is before the first bit of content-related | ||||
| 111 | * text to be saved. | ||||
| 112 | * \param before Type of whitespace currently intended to be placed | ||||
| 113 | * before the next bit of content-related text to be saved. | ||||
| 114 | * Updated if this box is worthy of more significant | ||||
| 115 | * whitespace. | ||||
| 116 | * \param whitespace_text Whitespace to place before next bit of | ||||
| 117 | * content-related text to be saved. | ||||
| 118 | * Updated if this box is worthy of more significant | ||||
| 119 | * whitespace. | ||||
| 120 | * \param whitespace_length Length of whitespace_text. | ||||
| 121 | * Updated if this box is worthy of more significant | ||||
| 122 | * whitespace. | ||||
| 123 | */ | ||||
| 124 | |||||
| 125 | void save_text_solve_whitespace(struct box *box, bool_Bool *first, | ||||
| 126 | save_text_whitespace *before, const char **whitespace_text, | ||||
| 127 | size_t *whitespace_length) | ||||
| 128 | { | ||||
| 129 | /* work out what whitespace should be placed before the next bit of | ||||
| 130 | * text */ | ||||
| 131 | if (*before < WHITESPACE_TWO_NEW_LINES && | ||||
| 132 | /* significant box type */ | ||||
| 133 | (box->type == BOX_BLOCK || | ||||
| 134 | box->type == BOX_TABLE || | ||||
| 135 | box->type == BOX_FLOAT_LEFT || | ||||
| 136 | box->type == BOX_FLOAT_RIGHT) && | ||||
| 137 | /* and not a list element */ | ||||
| 138 | !box->list_marker && | ||||
| 139 | /* and not a marker... */ | ||||
| 140 | (!(box->parent && box->parent->list_marker == box) || | ||||
| 141 | /* ...unless marker follows WHITESPACE_TAB */ | ||||
| 142 | ((box->parent && box->parent->list_marker == box) && | ||||
| 143 | *before == WHITESPACE_TAB))) { | ||||
| 144 | *before = WHITESPACE_TWO_NEW_LINES; | ||||
| 145 | } else if (*before <= WHITESPACE_ONE_NEW_LINE && | ||||
| 146 | (box->type == BOX_TABLE_ROW || | ||||
| 147 | box->type == BOX_BR || | ||||
| 148 | (box->type != BOX_INLINE && | ||||
| 149 | (box->parent && box->parent->list_marker == box)) || | ||||
| 150 | (box->parent && box->parent->style && | ||||
| 151 | (css_computed_white_space(box->parent->style) == | ||||
| 152 | CSS_WHITE_SPACE_PRE || | ||||
| 153 | css_computed_white_space(box->parent->style) == | ||||
| 154 | CSS_WHITE_SPACE_PRE_WRAP) && | ||||
| 155 | box->type == BOX_INLINE_CONTAINER))) { | ||||
| 156 | if (*before == WHITESPACE_ONE_NEW_LINE) | ||||
| 157 | *before = WHITESPACE_TWO_NEW_LINES; | ||||
| 158 | else | ||||
| 159 | *before = WHITESPACE_ONE_NEW_LINE; | ||||
| 160 | } | ||||
| 161 | else if (*before < WHITESPACE_TAB && | ||||
| 162 | (box->type == BOX_TABLE_CELL || | ||||
| 163 | box->list_marker)) { | ||||
| 164 | *before = WHITESPACE_TAB; | ||||
| 165 | } | ||||
| 166 | |||||
| 167 | if (*first) { | ||||
| 168 | /* before the first bit of text to be saved; there is | ||||
| 169 | * no preceding whitespace */ | ||||
| 170 | *whitespace_text = ""; | ||||
| 171 | *whitespace_length = 0; | ||||
| 172 | } else { | ||||
| 173 | /* set the whitespace that has been decided on */ | ||||
| 174 | switch (*before) { | ||||
| 175 | case WHITESPACE_TWO_NEW_LINES: | ||||
| 176 | *whitespace_text = "\n\n"; | ||||
| 177 | *whitespace_length = 2; | ||||
| 178 | break; | ||||
| 179 | case WHITESPACE_ONE_NEW_LINE: | ||||
| 180 | *whitespace_text = "\n"; | ||||
| 181 | *whitespace_length = 1; | ||||
| 182 | break; | ||||
| 183 | case WHITESPACE_TAB: | ||||
| 184 | *whitespace_text = "\t"; | ||||
| 185 | *whitespace_length = 1; | ||||
| 186 | break; | ||||
| 187 | case WHITESPACE_NONE: | ||||
| 188 | *whitespace_text = ""; | ||||
| 189 | *whitespace_length = 0; | ||||
| 190 | break; | ||||
| 191 | default: | ||||
| 192 | *whitespace_text = ""; | ||||
| 193 | *whitespace_length = 0; | ||||
| 194 | break; | ||||
| 195 | } | ||||
| 196 | } | ||||
| 197 | } | ||||
| 198 | |||||
| 199 | |||||
| 200 | /** | ||||
| 201 | * Traverse though the box tree and add all text to a save buffer. | ||||
| 202 | * | ||||
| 203 | * \param box Pointer to box. | ||||
| 204 | * \param first Whether this is before the first bit of content-related | ||||
| 205 | * text to be saved. | ||||
| 206 | * \param before Type of whitespace currently intended to be placed | ||||
| 207 | * before the next bit of content-related text to be saved. | ||||
| 208 | * Updated if this box is worthy of more significant | ||||
| 209 | * whitespace. | ||||
| 210 | * \param save our save_text_state workspace pointer | ||||
| 211 | * \return true iff the file writing succeeded and traversal should continue. | ||||
| 212 | */ | ||||
| 213 | |||||
| 214 | void extract_text(struct box *box, bool_Bool *first, save_text_whitespace *before, | ||||
| 215 | struct save_text_state *save) | ||||
| 216 | { | ||||
| 217 | struct box *child; | ||||
| 218 | const char *whitespace_text = ""; | ||||
| 219 | size_t whitespace_length = 0; | ||||
| 220 | |||||
| 221 | assert(box)((box) ? (void) (0) : __assert_fail ("box", "desktop/save_text.c" , 221, __extension__ __PRETTY_FUNCTION__)); | ||||
| 222 | |||||
| 223 | /* If box has a list marker */ | ||||
| 224 | if (box->list_marker) { | ||||
| 225 | /* do the marker box before continuing with the rest of the | ||||
| 226 | * list element */ | ||||
| 227 | extract_text(box->list_marker, first, before, save); | ||||
| 228 | } | ||||
| 229 | |||||
| 230 | /* read before calling the handler in case it modifies the tree */ | ||||
| 231 | child = box->children; | ||||
| 232 | |||||
| 233 | save_text_solve_whitespace(box, first, before, &whitespace_text, | ||||
| 234 | &whitespace_length); | ||||
| 235 | |||||
| 236 | if (box->type
| ||||
| 237 | box->type
| ||||
| 238 | box->length > 0 && box->text
| ||||
| 239 | /* Box meets criteria for export; add text to buffer */ | ||||
| 240 | save_text_add_to_buffer(box->text, box->length, box, | ||||
| 241 | whitespace_text, whitespace_length, save); | ||||
| 242 | *first = false0; | ||||
| 243 | *before = WHITESPACE_NONE; | ||||
| 244 | } | ||||
| 245 | |||||
| 246 | /* Work though the children of this box, extracting any text */ | ||||
| 247 | while (child) { | ||||
| 248 | extract_text(child, first, before, save); | ||||
| 249 | child = child->next; | ||||
| 250 | } | ||||
| 251 | |||||
| 252 | return; | ||||
| 253 | } | ||||
| 254 | |||||
| 255 | |||||
| 256 | /** | ||||
| 257 | * Add text to save text buffer. Any preceding whitespace or following space is | ||||
| 258 | * also added to the buffer. | ||||
| 259 | * | ||||
| 260 | * \param text Pointer to text being added. | ||||
| 261 | * \param length Length of text to be appended (bytes). | ||||
| 262 | * \param box Pointer to text box. | ||||
| 263 | * \param whitespace_text Whitespace to place before text for formatting | ||||
| 264 | * may be NULL. | ||||
| 265 | * \param whitespace_length Length of whitespace_text. | ||||
| 266 | * \param save Our save_text_state workspace pointer. | ||||
| 267 | * \return true iff the file writing succeeded and traversal should continue. | ||||
| 268 | */ | ||||
| 269 | |||||
| 270 | bool_Bool save_text_add_to_buffer(const char *text, size_t length, struct box *box, | ||||
| 271 | const char *whitespace_text, size_t whitespace_length, | ||||
| 272 | struct save_text_state *save) | ||||
| 273 | { | ||||
| 274 | size_t new_length; | ||||
| 275 | int space = 0; | ||||
| 276 | |||||
| 277 | assert(save)((save) ? (void) (0) : __assert_fail ("save", "desktop/save_text.c" , 277, __extension__ __PRETTY_FUNCTION__)); | ||||
| 278 | |||||
| 279 | if (box->space > 0) | ||||
| 280 | space = 1; | ||||
| 281 | |||||
| 282 | if (whitespace_text
| ||||
| 283 | length += whitespace_length; | ||||
| 284 | |||||
| 285 | new_length = save->length + whitespace_length + length + space; | ||||
| 286 | if (new_length
| ||||
| 287 | size_t new_alloc = save->alloc + (save->alloc / 4); | ||||
| 288 | char *new_block; | ||||
| 289 | |||||
| 290 | if (new_alloc < new_length) new_alloc = new_length; | ||||
| 291 | |||||
| 292 | new_block = realloc(save->block, new_alloc); | ||||
| 293 | if (!new_block) return false0; | ||||
| 294 | |||||
| 295 | save->block = new_block; | ||||
| 296 | save->alloc = new_alloc; | ||||
| 297 | } | ||||
| 298 | if (whitespace_text
| ||||
| 299 | memcpy(save->block + save->length, whitespace_text, | ||||
| 300 | whitespace_length); | ||||
| 301 | } | ||||
| 302 | memcpy(save->block + save->length + whitespace_length, text, length); | ||||
| 303 | save->length += length; | ||||
| 304 | |||||
| 305 | if (space
| ||||
| 306 | save->block[save->length++] = ' '; | ||||
| |||||
| 307 | |||||
| 308 | return true1; | ||||
| 309 | } |