File: | desktop/save_text.c |
Warning: | line 306, column 31 Use of memory allocated with size zero |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* | ||||
2 | * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk> | ||||
3 | * Copyright 2008 Michael Drake <tlsa@netsurf-browser.org> | ||||
4 | * | ||||
5 | * This file is part of NetSurf, http://www.netsurf-browser.org/ | ||||
6 | * | ||||
7 | * NetSurf is free software; you can redistribute it and/or modify | ||||
8 | * it under the terms of the GNU General Public License as published by | ||||
9 | * the Free Software Foundation; version 2 of the License. | ||||
10 | * | ||||
11 | * NetSurf is distributed in the hope that it will be useful, | ||||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||||
14 | * GNU General Public License for more details. | ||||
15 | * | ||||
16 | * You should have received a copy of the GNU General Public License | ||||
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | ||||
18 | */ | ||||
19 | |||||
20 | /** \file | ||||
21 | * Text export of HTML (implementation). | ||||
22 | */ | ||||
23 | |||||
24 | #include <assert.h> | ||||
25 | #include <stdbool.h> | ||||
26 | #include <string.h> | ||||
27 | |||||
28 | #include <dom/dom.h> | ||||
29 | |||||
30 | #include "utils/config.h" | ||||
31 | #include "utils/log.h" | ||||
32 | #include "utils/utf8.h" | ||||
33 | #include "utils/utils.h" | ||||
34 | #include "netsurf/content.h" | ||||
35 | #include "html/box.h" | ||||
36 | #include "html/html_save.h" | ||||
37 | |||||
38 | #include "netsurf/utf8.h" | ||||
39 | #include "desktop/gui_internal.h" | ||||
40 | #include "desktop/save_text.h" | ||||
41 | |||||
42 | static void extract_text(struct box *box, bool_Bool *first, | ||||
43 | save_text_whitespace *before, struct save_text_state *save); | ||||
44 | static bool_Bool save_text_add_to_buffer(const char *text, size_t length, | ||||
45 | struct box *box, const char *whitespace_text, | ||||
46 | size_t whitespace_length, struct save_text_state *save); | ||||
47 | |||||
48 | |||||
49 | /** | ||||
50 | * Extract the text from an HTML content and save it as a text file. Text is | ||||
51 | * converted to the local encoding. | ||||
52 | * | ||||
53 | * \param c An HTML content. | ||||
54 | * \param path Path to save text file too. | ||||
55 | */ | ||||
56 | |||||
57 | void save_as_text(struct hlcache_handle *c, char *path) | ||||
58 | { | ||||
59 | FILE *out; | ||||
60 | struct save_text_state save = { NULL((void*)0), 0, 0 }; | ||||
61 | save_text_whitespace before = WHITESPACE_NONE; | ||||
62 | bool_Bool first = true1; | ||||
63 | nserror ret; | ||||
64 | char *result; | ||||
65 | |||||
66 | if (!c || content_get_type(c) != CONTENT_HTML) { | ||||
| |||||
67 | return; | ||||
68 | } | ||||
69 | |||||
70 | extract_text(html_get_box_tree(c), &first, &before, &save); | ||||
71 | if (!save.block) | ||||
72 | return; | ||||
73 | |||||
74 | ret = guit->utf8->utf8_to_local(save.block, save.length, &result); | ||||
75 | free(save.block); | ||||
76 | |||||
77 | if (ret != NSERROR_OK) { | ||||
78 | NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79 , }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d" , ret); } } while(0) | ||||
79 | "failed to convert to local encoding, return %d", ret)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79 , }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d" , ret); } } while(0); | ||||
80 | return; | ||||
81 | } | ||||
82 | |||||
83 | out = fopen(path, "w"); | ||||
84 | if (out) { | ||||
85 | int res = fputs(result, out); | ||||
86 | |||||
87 | if (res < 0) { | ||||
88 | NSLOG(netsurf, INFO, "Warning: write failed")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 88 , }; nslog__log(&_nslog_ctx, "Warning: write failed"); } } while(0); | ||||
89 | } | ||||
90 | |||||
91 | res = fputs("\n", out); | ||||
92 | if (res < 0) { | ||||
93 | NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94 , }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline" ); } } while(0) | ||||
94 | "Warning: failed writing trailing newline")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf , NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c" ) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94 , }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline" ); } } while(0); | ||||
95 | } | ||||
96 | |||||
97 | fclose(out); | ||||
98 | } | ||||
99 | |||||
100 | free(result); | ||||
101 | } | ||||
102 | |||||
103 | |||||
104 | /** | ||||
105 | * Decide what whitespace to place before the next bit of content-related text | ||||
106 | * that is saved. Any existing whitespace is overridden if the whitespace for | ||||
107 | * this box is more "significant". | ||||
108 | * | ||||
109 | * \param box Pointer to box. | ||||
110 | * \param first Whether this is before the first bit of content-related | ||||
111 | * text to be saved. | ||||
112 | * \param before Type of whitespace currently intended to be placed | ||||
113 | * before the next bit of content-related text to be saved. | ||||
114 | * Updated if this box is worthy of more significant | ||||
115 | * whitespace. | ||||
116 | * \param whitespace_text Whitespace to place before next bit of | ||||
117 | * content-related text to be saved. | ||||
118 | * Updated if this box is worthy of more significant | ||||
119 | * whitespace. | ||||
120 | * \param whitespace_length Length of whitespace_text. | ||||
121 | * Updated if this box is worthy of more significant | ||||
122 | * whitespace. | ||||
123 | */ | ||||
124 | |||||
125 | void save_text_solve_whitespace(struct box *box, bool_Bool *first, | ||||
126 | save_text_whitespace *before, const char **whitespace_text, | ||||
127 | size_t *whitespace_length) | ||||
128 | { | ||||
129 | /* work out what whitespace should be placed before the next bit of | ||||
130 | * text */ | ||||
131 | if (*before < WHITESPACE_TWO_NEW_LINES && | ||||
132 | /* significant box type */ | ||||
133 | (box->type == BOX_BLOCK || | ||||
134 | box->type == BOX_TABLE || | ||||
135 | box->type == BOX_FLOAT_LEFT || | ||||
136 | box->type == BOX_FLOAT_RIGHT) && | ||||
137 | /* and not a list element */ | ||||
138 | !box->list_marker && | ||||
139 | /* and not a marker... */ | ||||
140 | (!(box->parent && box->parent->list_marker == box) || | ||||
141 | /* ...unless marker follows WHITESPACE_TAB */ | ||||
142 | ((box->parent && box->parent->list_marker == box) && | ||||
143 | *before == WHITESPACE_TAB))) { | ||||
144 | *before = WHITESPACE_TWO_NEW_LINES; | ||||
145 | } else if (*before <= WHITESPACE_ONE_NEW_LINE && | ||||
146 | (box->type == BOX_TABLE_ROW || | ||||
147 | box->type == BOX_BR || | ||||
148 | (box->type != BOX_INLINE && | ||||
149 | (box->parent && box->parent->list_marker == box)) || | ||||
150 | (box->parent && box->parent->style && | ||||
151 | (css_computed_white_space(box->parent->style) == | ||||
152 | CSS_WHITE_SPACE_PRE || | ||||
153 | css_computed_white_space(box->parent->style) == | ||||
154 | CSS_WHITE_SPACE_PRE_WRAP) && | ||||
155 | box->type == BOX_INLINE_CONTAINER))) { | ||||
156 | if (*before == WHITESPACE_ONE_NEW_LINE) | ||||
157 | *before = WHITESPACE_TWO_NEW_LINES; | ||||
158 | else | ||||
159 | *before = WHITESPACE_ONE_NEW_LINE; | ||||
160 | } | ||||
161 | else if (*before < WHITESPACE_TAB && | ||||
162 | (box->type == BOX_TABLE_CELL || | ||||
163 | box->list_marker)) { | ||||
164 | *before = WHITESPACE_TAB; | ||||
165 | } | ||||
166 | |||||
167 | if (*first) { | ||||
168 | /* before the first bit of text to be saved; there is | ||||
169 | * no preceding whitespace */ | ||||
170 | *whitespace_text = ""; | ||||
171 | *whitespace_length = 0; | ||||
172 | } else { | ||||
173 | /* set the whitespace that has been decided on */ | ||||
174 | switch (*before) { | ||||
175 | case WHITESPACE_TWO_NEW_LINES: | ||||
176 | *whitespace_text = "\n\n"; | ||||
177 | *whitespace_length = 2; | ||||
178 | break; | ||||
179 | case WHITESPACE_ONE_NEW_LINE: | ||||
180 | *whitespace_text = "\n"; | ||||
181 | *whitespace_length = 1; | ||||
182 | break; | ||||
183 | case WHITESPACE_TAB: | ||||
184 | *whitespace_text = "\t"; | ||||
185 | *whitespace_length = 1; | ||||
186 | break; | ||||
187 | case WHITESPACE_NONE: | ||||
188 | *whitespace_text = ""; | ||||
189 | *whitespace_length = 0; | ||||
190 | break; | ||||
191 | default: | ||||
192 | *whitespace_text = ""; | ||||
193 | *whitespace_length = 0; | ||||
194 | break; | ||||
195 | } | ||||
196 | } | ||||
197 | } | ||||
198 | |||||
199 | |||||
200 | /** | ||||
201 | * Traverse though the box tree and add all text to a save buffer. | ||||
202 | * | ||||
203 | * \param box Pointer to box. | ||||
204 | * \param first Whether this is before the first bit of content-related | ||||
205 | * text to be saved. | ||||
206 | * \param before Type of whitespace currently intended to be placed | ||||
207 | * before the next bit of content-related text to be saved. | ||||
208 | * Updated if this box is worthy of more significant | ||||
209 | * whitespace. | ||||
210 | * \param save our save_text_state workspace pointer | ||||
211 | * \return true iff the file writing succeeded and traversal should continue. | ||||
212 | */ | ||||
213 | |||||
214 | void extract_text(struct box *box, bool_Bool *first, save_text_whitespace *before, | ||||
215 | struct save_text_state *save) | ||||
216 | { | ||||
217 | struct box *child; | ||||
218 | const char *whitespace_text = ""; | ||||
219 | size_t whitespace_length = 0; | ||||
220 | |||||
221 | assert(box)((box) ? (void) (0) : __assert_fail ("box", "desktop/save_text.c" , 221, __extension__ __PRETTY_FUNCTION__)); | ||||
222 | |||||
223 | /* If box has a list marker */ | ||||
224 | if (box->list_marker) { | ||||
225 | /* do the marker box before continuing with the rest of the | ||||
226 | * list element */ | ||||
227 | extract_text(box->list_marker, first, before, save); | ||||
228 | } | ||||
229 | |||||
230 | /* read before calling the handler in case it modifies the tree */ | ||||
231 | child = box->children; | ||||
232 | |||||
233 | save_text_solve_whitespace(box, first, before, &whitespace_text, | ||||
234 | &whitespace_length); | ||||
235 | |||||
236 | if (box->type
| ||||
237 | box->type
| ||||
238 | box->length > 0 && box->text
| ||||
239 | /* Box meets criteria for export; add text to buffer */ | ||||
240 | save_text_add_to_buffer(box->text, box->length, box, | ||||
241 | whitespace_text, whitespace_length, save); | ||||
242 | *first = false0; | ||||
243 | *before = WHITESPACE_NONE; | ||||
244 | } | ||||
245 | |||||
246 | /* Work though the children of this box, extracting any text */ | ||||
247 | while (child) { | ||||
248 | extract_text(child, first, before, save); | ||||
249 | child = child->next; | ||||
250 | } | ||||
251 | |||||
252 | return; | ||||
253 | } | ||||
254 | |||||
255 | |||||
256 | /** | ||||
257 | * Add text to save text buffer. Any preceding whitespace or following space is | ||||
258 | * also added to the buffer. | ||||
259 | * | ||||
260 | * \param text Pointer to text being added. | ||||
261 | * \param length Length of text to be appended (bytes). | ||||
262 | * \param box Pointer to text box. | ||||
263 | * \param whitespace_text Whitespace to place before text for formatting | ||||
264 | * may be NULL. | ||||
265 | * \param whitespace_length Length of whitespace_text. | ||||
266 | * \param save Our save_text_state workspace pointer. | ||||
267 | * \return true iff the file writing succeeded and traversal should continue. | ||||
268 | */ | ||||
269 | |||||
270 | bool_Bool save_text_add_to_buffer(const char *text, size_t length, struct box *box, | ||||
271 | const char *whitespace_text, size_t whitespace_length, | ||||
272 | struct save_text_state *save) | ||||
273 | { | ||||
274 | size_t new_length; | ||||
275 | int space = 0; | ||||
276 | |||||
277 | assert(save)((save) ? (void) (0) : __assert_fail ("save", "desktop/save_text.c" , 277, __extension__ __PRETTY_FUNCTION__)); | ||||
278 | |||||
279 | if (box->space > 0) | ||||
280 | space = 1; | ||||
281 | |||||
282 | if (whitespace_text
| ||||
283 | length += whitespace_length; | ||||
284 | |||||
285 | new_length = save->length + whitespace_length + length + space; | ||||
286 | if (new_length
| ||||
287 | size_t new_alloc = save->alloc + (save->alloc / 4); | ||||
288 | char *new_block; | ||||
289 | |||||
290 | if (new_alloc < new_length) new_alloc = new_length; | ||||
291 | |||||
292 | new_block = realloc(save->block, new_alloc); | ||||
293 | if (!new_block) return false0; | ||||
294 | |||||
295 | save->block = new_block; | ||||
296 | save->alloc = new_alloc; | ||||
297 | } | ||||
298 | if (whitespace_text
| ||||
299 | memcpy(save->block + save->length, whitespace_text, | ||||
300 | whitespace_length); | ||||
301 | } | ||||
302 | memcpy(save->block + save->length + whitespace_length, text, length); | ||||
303 | save->length += length; | ||||
304 | |||||
305 | if (space
| ||||
306 | save->block[save->length++] = ' '; | ||||
| |||||
307 | |||||
308 | return true1; | ||||
309 | } |