Bug Summary

File:desktop/save_text.c
Warning:line 306, column 31
Use of memory allocated with size zero

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name save_text.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -resource-dir /usr/lib/llvm-14/lib/clang/14.0.6 -I . -I include -I build/Linux-gtk2 -I frontends -I content/handlers -D WITH_JPEG -U WITH_PDF_EXPORT -D LIBICONV_PLUG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /usr/include/x86_64-linux-gnu -D WITH_CURL -D WITH_OPENSSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D UTF8PROC_EXPORTS -D WITH_UTF8PROC -D WITH_WEBP -I /usr/include/libpng16 -D WITH_PNG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include/ -D WITH_BMP -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_GIF -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NS_SVG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSSPRITE -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSPSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSLOG -D NETSURF_UA_FORMAT_STRING="Mozilla/5.0 (%s) NetSurf/%d.%d" -D NETSURF_HOMEPAGE="about:welcome" -D NETSURF_LOG_LEVEL=VERBOSE -D NETSURF_BUILTIN_LOG_FILTER="(level:WARNING || cat:jserrors)" -D NETSURF_BUILTIN_VERBOSE_FILTER="(level:VERBOSE || cat:jserrors)" -D STMTEXPR=1 -I /usr/include/librsvg-2.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/libmount -I /usr/include/blkid -I /usr/include/gdk-pixbuf-2.0 -I /usr/include/libpng16 -I /usr/include/x86_64-linux-gnu -I /usr/include/cairo -I /usr/include/pixman-1 -I /usr/include/freetype2 -D WITH_RSVG -I /usr/include/gtk-2.0 -I /usr/lib/x86_64-linux-gnu/gtk-2.0/include -I /usr/include/pango-1.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/harfbuzz -I /usr/include/freetype2 -I /usr/include/libpng16 -I /usr/include/libmount -I /usr/include/blkid -I /usr/include/fribidi -I /usr/include/cairo -I /usr/include/pixman-1 -I /usr/include/gdk-pixbuf-2.0 -I /usr/include/x86_64-linux-gnu -I /usr/include/atk-1.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -D gtk -D nsgtk -D G_DISABLE_SINGLE_INCLUDES -D G_DISABLE_DEPRECATED -D GTK_DISABLE_SINGLE_INCLUDES -D GTK_MULTIHEAD_SAFE -D PANGO_DISABLE_DEPRECATED -D GTK_DISABLE_DEPRECATED -D _XOPEN_SOURCE=700 -D _POSIX_C_SOURCE=200809L -D _BSD_SOURCE -D _DEFAULT_SOURCE -D _NETBSD_SOURCE -D GTK_RESPATH="/var/lib/jenkins/artifacts-x86_64-linux-gnu/share/netsurf/:./frontends/gtk/res/" -D WITH_GRESOURCE -D DUK_OPT_HAVE_CUSTOM_H -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.6/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wwrite-strings -Wno-unused-parameter -Wno-unused-but-set-variable -std=c99 -fconst-strings -fdebug-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-display-progress -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/lib/jenkins/workspace/scan-build-netsurf/clangScanBuildReports/2024-08-10-154106-1150071-1 -x c desktop/save_text.c
1/*
2 * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
3 * Copyright 2008 Michael Drake <tlsa@netsurf-browser.org>
4 *
5 * This file is part of NetSurf, http://www.netsurf-browser.org/
6 *
7 * NetSurf is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
10 *
11 * NetSurf is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/** \file
21 * Text export of HTML (implementation).
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <string.h>
27
28#include <dom/dom.h>
29
30#include "utils/config.h"
31#include "utils/log.h"
32#include "utils/utf8.h"
33#include "utils/utils.h"
34#include "netsurf/content.h"
35#include "html/box.h"
36#include "html/html_save.h"
37
38#include "netsurf/utf8.h"
39#include "desktop/gui_internal.h"
40#include "desktop/save_text.h"
41
42static void extract_text(struct box *box, bool_Bool *first,
43 save_text_whitespace *before, struct save_text_state *save);
44static bool_Bool save_text_add_to_buffer(const char *text, size_t length,
45 struct box *box, const char *whitespace_text,
46 size_t whitespace_length, struct save_text_state *save);
47
48
49/**
50 * Extract the text from an HTML content and save it as a text file. Text is
51 * converted to the local encoding.
52 *
53 * \param c An HTML content.
54 * \param path Path to save text file too.
55 */
56
57void save_as_text(struct hlcache_handle *c, char *path)
58{
59 FILE *out;
60 struct save_text_state save = { NULL((void*)0), 0, 0 };
61 save_text_whitespace before = WHITESPACE_NONE;
62 bool_Bool first = true1;
63 nserror ret;
64 char *result;
65
66 if (!c || content_get_type(c) != CONTENT_HTML) {
1
Assuming 'c' is non-null
2
Assuming the condition is false
3
Taking false branch
67 return;
68 }
69
70 extract_text(html_get_box_tree(c), &first, &before, &save);
4
Calling 'extract_text'
71 if (!save.block)
72 return;
73
74 ret = guit->utf8->utf8_to_local(save.block, save.length, &result);
75 free(save.block);
76
77 if (ret != NSERROR_OK) {
78 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79
, }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d"
, ret); } } while(0)
79 "failed to convert to local encoding, return %d", ret)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 79
, }; nslog__log(&_nslog_ctx, "failed to convert to local encoding, return %d"
, ret); } } while(0)
;
80 return;
81 }
82
83 out = fopen(path, "w");
84 if (out) {
85 int res = fputs(result, out);
86
87 if (res < 0) {
88 NSLOG(netsurf, INFO, "Warning: write failed")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 88
, }; nslog__log(&_nslog_ctx, "Warning: write failed"); } }
while(0)
;
89 }
90
91 res = fputs("\n", out);
92 if (res < 0) {
93 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94
, }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline"
); } } while(0)
94 "Warning: failed writing trailing newline")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "desktop/save_text.c", sizeof("desktop/save_text.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 94
, }; nslog__log(&_nslog_ctx, "Warning: failed writing trailing newline"
); } } while(0)
;
95 }
96
97 fclose(out);
98 }
99
100 free(result);
101}
102
103
104/**
105 * Decide what whitespace to place before the next bit of content-related text
106 * that is saved. Any existing whitespace is overridden if the whitespace for
107 * this box is more "significant".
108 *
109 * \param box Pointer to box.
110 * \param first Whether this is before the first bit of content-related
111 * text to be saved.
112 * \param before Type of whitespace currently intended to be placed
113 * before the next bit of content-related text to be saved.
114 * Updated if this box is worthy of more significant
115 * whitespace.
116 * \param whitespace_text Whitespace to place before next bit of
117 * content-related text to be saved.
118 * Updated if this box is worthy of more significant
119 * whitespace.
120 * \param whitespace_length Length of whitespace_text.
121 * Updated if this box is worthy of more significant
122 * whitespace.
123 */
124
125void save_text_solve_whitespace(struct box *box, bool_Bool *first,
126 save_text_whitespace *before, const char **whitespace_text,
127 size_t *whitespace_length)
128{
129 /* work out what whitespace should be placed before the next bit of
130 * text */
131 if (*before < WHITESPACE_TWO_NEW_LINES &&
132 /* significant box type */
133 (box->type == BOX_BLOCK ||
134 box->type == BOX_TABLE ||
135 box->type == BOX_FLOAT_LEFT ||
136 box->type == BOX_FLOAT_RIGHT) &&
137 /* and not a list element */
138 !box->list_marker &&
139 /* and not a marker... */
140 (!(box->parent && box->parent->list_marker == box) ||
141 /* ...unless marker follows WHITESPACE_TAB */
142 ((box->parent && box->parent->list_marker == box) &&
143 *before == WHITESPACE_TAB))) {
144 *before = WHITESPACE_TWO_NEW_LINES;
145 } else if (*before <= WHITESPACE_ONE_NEW_LINE &&
146 (box->type == BOX_TABLE_ROW ||
147 box->type == BOX_BR ||
148 (box->type != BOX_INLINE &&
149 (box->parent && box->parent->list_marker == box)) ||
150 (box->parent && box->parent->style &&
151 (css_computed_white_space(box->parent->style) ==
152 CSS_WHITE_SPACE_PRE ||
153 css_computed_white_space(box->parent->style) ==
154 CSS_WHITE_SPACE_PRE_WRAP) &&
155 box->type == BOX_INLINE_CONTAINER))) {
156 if (*before == WHITESPACE_ONE_NEW_LINE)
157 *before = WHITESPACE_TWO_NEW_LINES;
158 else
159 *before = WHITESPACE_ONE_NEW_LINE;
160 }
161 else if (*before < WHITESPACE_TAB &&
162 (box->type == BOX_TABLE_CELL ||
163 box->list_marker)) {
164 *before = WHITESPACE_TAB;
165 }
166
167 if (*first) {
168 /* before the first bit of text to be saved; there is
169 * no preceding whitespace */
170 *whitespace_text = "";
171 *whitespace_length = 0;
172 } else {
173 /* set the whitespace that has been decided on */
174 switch (*before) {
175 case WHITESPACE_TWO_NEW_LINES:
176 *whitespace_text = "\n\n";
177 *whitespace_length = 2;
178 break;
179 case WHITESPACE_ONE_NEW_LINE:
180 *whitespace_text = "\n";
181 *whitespace_length = 1;
182 break;
183 case WHITESPACE_TAB:
184 *whitespace_text = "\t";
185 *whitespace_length = 1;
186 break;
187 case WHITESPACE_NONE:
188 *whitespace_text = "";
189 *whitespace_length = 0;
190 break;
191 default:
192 *whitespace_text = "";
193 *whitespace_length = 0;
194 break;
195 }
196 }
197}
198
199
200/**
201 * Traverse though the box tree and add all text to a save buffer.
202 *
203 * \param box Pointer to box.
204 * \param first Whether this is before the first bit of content-related
205 * text to be saved.
206 * \param before Type of whitespace currently intended to be placed
207 * before the next bit of content-related text to be saved.
208 * Updated if this box is worthy of more significant
209 * whitespace.
210 * \param save our save_text_state workspace pointer
211 * \return true iff the file writing succeeded and traversal should continue.
212 */
213
214void extract_text(struct box *box, bool_Bool *first, save_text_whitespace *before,
215 struct save_text_state *save)
216{
217 struct box *child;
218 const char *whitespace_text = "";
219 size_t whitespace_length = 0;
220
221 assert(box)((box) ? (void) (0) : __assert_fail ("box", "desktop/save_text.c"
, 221, __extension__ __PRETTY_FUNCTION__))
;
5
Assuming 'box' is non-null
6
'?' condition is true
222
223 /* If box has a list marker */
224 if (box->list_marker) {
7
Assuming field 'list_marker' is null
8
Taking false branch
225 /* do the marker box before continuing with the rest of the
226 * list element */
227 extract_text(box->list_marker, first, before, save);
228 }
229
230 /* read before calling the handler in case it modifies the tree */
231 child = box->children;
232
233 save_text_solve_whitespace(box, first, before, &whitespace_text,
234 &whitespace_length);
235
236 if (box->type
8.1
Field 'type' is not equal to BOX_BR
!= BOX_BR && !((box->type
8.2
Field 'type' is not equal to BOX_FLOAT_LEFT
== BOX_FLOAT_LEFT ||
11
Taking true branch
237 box->type
8.3
Field 'type' is equal to BOX_FLOAT_RIGHT
== BOX_FLOAT_RIGHT) && !box->text) &&
9
Assuming field 'text' is non-null
238 box->length > 0 && box->text
10.1
Field 'text' is non-null
) {
10
Assuming field 'length' is > 0
239 /* Box meets criteria for export; add text to buffer */
240 save_text_add_to_buffer(box->text, box->length, box,
12
Calling 'save_text_add_to_buffer'
241 whitespace_text, whitespace_length, save);
242 *first = false0;
243 *before = WHITESPACE_NONE;
244 }
245
246 /* Work though the children of this box, extracting any text */
247 while (child) {
248 extract_text(child, first, before, save);
249 child = child->next;
250 }
251
252 return;
253}
254
255
256/**
257 * Add text to save text buffer. Any preceding whitespace or following space is
258 * also added to the buffer.
259 *
260 * \param text Pointer to text being added.
261 * \param length Length of text to be appended (bytes).
262 * \param box Pointer to text box.
263 * \param whitespace_text Whitespace to place before text for formatting
264 * may be NULL.
265 * \param whitespace_length Length of whitespace_text.
266 * \param save Our save_text_state workspace pointer.
267 * \return true iff the file writing succeeded and traversal should continue.
268 */
269
270bool_Bool save_text_add_to_buffer(const char *text, size_t length, struct box *box,
271 const char *whitespace_text, size_t whitespace_length,
272 struct save_text_state *save)
273{
274 size_t new_length;
275 int space = 0;
276
277 assert(save)((save) ? (void) (0) : __assert_fail ("save", "desktop/save_text.c"
, 277, __extension__ __PRETTY_FUNCTION__))
;
13
'?' condition is true
278
279 if (box->space > 0)
14
Assuming field 'space' is > 0
15
Taking true branch
280 space = 1;
281
282 if (whitespace_text
15.1
'whitespace_text' is non-null
)
16
Taking true branch
283 length += whitespace_length;
284
285 new_length = save->length + whitespace_length + length + space;
286 if (new_length
16.1
'new_length' is >= field 'alloc'
>= save->alloc) {
17
Taking true branch
287 size_t new_alloc = save->alloc + (save->alloc / 4);
288 char *new_block;
289
290 if (new_alloc < new_length) new_alloc = new_length;
18
Assuming 'new_alloc' is >= 'new_length'
19
Taking false branch
291
292 new_block = realloc(save->block, new_alloc);
293 if (!new_block) return false0;
20
Assuming 'new_block' is non-null
21
Taking false branch
294
295 save->block = new_block;
296 save->alloc = new_alloc;
297 }
298 if (whitespace_text
21.1
'whitespace_text' is non-null
) {
22
Taking true branch
299 memcpy(save->block + save->length, whitespace_text,
300 whitespace_length);
301 }
302 memcpy(save->block + save->length + whitespace_length, text, length);
303 save->length += length;
304
305 if (space
22.1
'space' is equal to 1
== 1)
23
Taking true branch
306 save->block[save->length++] = ' ';
24
Use of memory allocated with size zero
307
308 return true1;
309}