NetSurf
textselection.c
Go to the documentation of this file.
1/*
2 * Copyright 2020 Vincent Sanders <vince@netsurf-browser.org>
3 *
4 * This file is part of NetSurf, http://www.netsurf-browser.org/
5 *
6 * NetSurf is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * NetSurf is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19/**
20 * \file
21 * implementation of text selection for a HTML content.
22 */
23
24#include <stdlib.h>
25
26#include "utils/errors.h"
27#include "utils/utils.h"
28#include "netsurf/types.h"
29#include "netsurf/plot_style.h"
30#include "desktop/selection.h"
31#include "desktop/save_text.h"
32
33#include "html/private.h"
34#include "html/box.h"
35#include "html/box_inspect.h"
36#include "html/font.h"
37#include "html/textselection.h"
38
39#define SPACE_LEN(b) ((b->space == 0) ? 0 : 1)
40
41struct rdw_info {
42 bool inited;
43 struct rect r;
44};
45
46
47/**
48 * Tests whether a text box lies partially within the given range of
49 * byte offsets, returning the start and end indexes of the bytes
50 * that are enclosed.
51 *
52 * \param box box to be tested
53 * \param start_idx byte offset of start of range
54 * \param end_idx byte offset of end of range
55 * \param start_offset receives the start offset of the selected part
56 * \param end_offset receives the end offset of the selected part
57 * \return true iff the range encloses at least part of the box
58 */
59static bool
61 unsigned start_idx,
62 unsigned end_idx,
63 unsigned *start_offset,
64 unsigned *end_offset)
65{
66 size_t box_length = box->length + SPACE_LEN(box);
67
68 if (box_length > 0) {
69 if ((box->byte_offset >= start_idx) &&
70 (box->byte_offset + box_length <= end_idx)) {
71
72 /* fully enclosed */
73 *start_offset = 0;
74 *end_offset = box_length;
75 return true;
76 } else if ((box->byte_offset + box_length > start_idx) &&
77 (box->byte_offset < end_idx)) {
78 /* partly enclosed */
79 int offset = 0;
80 int len;
81
82 if (box->byte_offset < start_idx) {
83 offset = start_idx - box->byte_offset;
84 }
85
86 len = box_length - offset;
87
88 if (box->byte_offset + box_length > end_idx) {
89 len = end_idx - (box->byte_offset + offset);
90 }
91
92 *start_offset = offset;
93 *end_offset = offset + len;
94
95 return true;
96 }
97 }
98 return false;
99}
100
101
102/**
103 * Traverse the given box subtree adding the boxes inside the
104 * selection to the coordinate range.
105 *
106 * \param box box subtree
107 * \param start_idx start of range within textual representation (bytes)
108 * \param end_idx end of range
109 * \param rdwi redraw range to fill in
110 * \param do_marker whether deal enter any marker box
111 * \return NSERROR_OK on success else error code
112 */
113static nserror
115 unsigned start_idx,
116 unsigned end_idx,
117 struct rdw_info *rdwi,
118 bool do_marker)
119{
120 struct box *child;
121 nserror res;
122
123 assert(box);
124
125 /* If selection starts inside marker */
126 if (box->parent &&
127 box->parent->list_marker == box &&
128 !do_marker) {
129 /* set box to main list element */
130 box = box->parent;
131 }
132
133 /* If box has a list marker */
134 if (box->list_marker) {
135 /* do the marker box before continuing with the rest of the
136 * list element */
138 start_idx,
139 end_idx,
140 rdwi,
141 true);
142 if (res != NSERROR_OK) {
143 return res;
144 }
145 }
146
147 /* we can prune this subtree, it's after the selection */
148 if (box->byte_offset >= end_idx) {
149 return NSERROR_OK;
150 }
151
152 /* read before calling the handler in case it modifies the tree */
153 child = box->children;
154
155 if ((box->type != BOX_BR) &&
156 !((box->type == BOX_FLOAT_LEFT ||
157 box->type == BOX_FLOAT_RIGHT) &&
158 !box->text)) {
159 unsigned start_off;
160 unsigned end_off;
161
162 if (selected_part(box, start_idx, end_idx, &start_off, &end_off)) {
163 int width, height;
164 int x, y;
165
166 /**
167 * \todo it should be possible to reduce the redrawn
168 * area using the offsets
169 */
170 box_coords(box, &x, &y);
171
174
175 if ((box->type == BOX_TEXT) &&
176 (box->space != 0)) {
177 width += box->space;
178 }
179
180 if (rdwi->inited) {
181 if (x < rdwi->r.x0) {
182 rdwi->r.x0 = x;
183 }
184 if (y < rdwi->r.y0) {
185 rdwi->r.y0 = y;
186 }
187 if (x + width > rdwi->r.x1) {
188 rdwi->r.x1 = x + width;
189 }
190 if (y + height > rdwi->r.y1) {
191 rdwi->r.y1 = y + height;
192 }
193 } else {
194 rdwi->inited = true;
195 rdwi->r.x0 = x;
196 rdwi->r.y0 = y;
197 rdwi->r.x1 = x + width;
198 rdwi->r.y1 = y + height;
199 }
200 }
201 }
202
203 /* find the first child that could lie partially within the selection;
204 * this is important at the top-levels of the tree for pruning subtrees
205 * that lie entirely before the selection */
206
207 if (child) {
208 struct box *next = child->next;
209
210 while (next && next->byte_offset < start_idx) {
211 child = next;
212 next = child->next;
213 }
214
215 while (child) {
216 /* read before calling the handler in case it modifies
217 * the tree */
218 struct box *next = child->next;
219
220 res = coords_from_range(child,
221 start_idx,
222 end_idx,
223 rdwi,
224 false);
225 if (res != NSERROR_OK) {
226 return res;
227 }
228
229 child = next;
230 }
231 }
232
233 return NSERROR_OK;
234}
235
236
237/**
238 * Append the contents of a box to a selection along with style information
239 *
240 * \param text pointer to text being added, or NULL for newline
241 * \param length length of text to be appended (bytes)
242 * \param box pointer to text box, or NULL if from textplain
243 * \param unit_len_ctx Length conversion context
244 * \param handle selection string to append to
245 * \param whitespace_text whitespace to place before text for formatting
246 * may be NULL
247 * \param whitespace_length length of whitespace_text
248 * \return NSERROR_OK iff successful and traversal should continue else error code
249 */
250static nserror
252 size_t length,
253 struct box *box,
254 const css_unit_ctx *unit_len_ctx,
255 struct selection_string *handle,
256 const char *whitespace_text,
257 size_t whitespace_length)
258{
259 bool add_space = false;
261 plot_font_style_t *pstyle = NULL;
262
263 /* add any whitespace which precedes the text from this box */
264 if (whitespace_text != NULL &&
265 whitespace_length > 0) {
266 if (!selection_string_append(whitespace_text,
267 whitespace_length,
268 false,
269 pstyle,
270 handle)) {
271 return NSERROR_NOMEM;
272 }
273 }
274
275 if (box != NULL) {
276 /* HTML */
277 add_space = (box->space != 0);
278
279 if (box->style != NULL) {
280 /* Override default font style */
281 font_plot_style_from_css(unit_len_ctx, box->style, &style);
282 pstyle = &style;
283 } else {
284 /* If there's no style, there must be no text */
285 assert(box->text == NULL);
286 }
287 }
288
289 /* add the text from this box */
290 if (!selection_string_append(text, length, add_space, pstyle, handle)) {
291 return NSERROR_NOMEM;
292 }
293
294 return NSERROR_OK;
295}
296
297
298/**
299 * Traverse the given box subtree, calling selection copy for all
300 * boxes that lie (partially) within the given range
301 *
302 * \param box box subtree
303 * \param unit_len_ctx Length conversion context.
304 * \param start_idx start of range within textual representation (bytes)
305 * \param end_idx end of range
306 * \param handler handler function to call
307 * \param handle handle to pass
308 * \param before type of whitespace to place before next encountered text
309 * \param first whether this is the first box with text
310 * \param do_marker whether deal enter any marker box
311 * \return NSERROR_OK on sucess else error code
312 */
313static nserror
315 const css_unit_ctx *unit_len_ctx,
316 unsigned start_idx,
317 unsigned end_idx,
318 struct selection_string *selstr,
319 save_text_whitespace *before,
320 bool *first,
321 bool do_marker)
322{
323 nserror res;
324 struct box *child;
325 const char *whitespace_text = "";
326 size_t whitespace_length = 0;
327
328 assert(box);
329
330 /* If selection starts inside marker */
331 if (box->parent &&
332 box->parent->list_marker == box &&
333 !do_marker) {
334 /* set box to main list element */
335 box = box->parent;
336 }
337
338 /* If box has a list marker */
339 if (box->list_marker) {
340 /* do the marker box before continuing with the rest of the
341 * list element */
343 unit_len_ctx,
344 start_idx,
345 end_idx,
346 selstr,
347 before,
348 first,
349 true);
350 if (res != NSERROR_OK) {
351 return res;
352 }
353 }
354
355 /* we can prune this subtree, it's after the selection */
356 if (box->byte_offset >= end_idx) {
357 return NSERROR_OK;
358 }
359
360 /* read before calling the handler in case it modifies the tree */
361 child = box->children;
362
363 /* If nicely formatted output of the selected text is required, work
364 * out what whitespace should be placed before the next bit of text */
365 if (before) {
367 first,
368 before,
369 &whitespace_text,
370 &whitespace_length);
371 } else {
372 whitespace_text = NULL;
373 }
374
375 if ((box->type != BOX_BR) &&
376 !((box->type == BOX_FLOAT_LEFT ||
377 box->type == BOX_FLOAT_RIGHT) &&
378 !box->text)) {
379 unsigned start_off;
380 unsigned end_off;
381
382 if (selected_part(box, start_idx, end_idx, &start_off, &end_off)) {
383 res = selection_copy_box(box->text + start_off,
384 min(box->length, end_off) - start_off,
385 box,
386 unit_len_ctx,
387 selstr,
388 whitespace_text,
389 whitespace_length);
390 if (res != NSERROR_OK) {
391 return res;
392 }
393 if (before) {
394 *first = false;
395 *before = WHITESPACE_NONE;
396 }
397 }
398 }
399
400 /* find the first child that could lie partially within the selection;
401 * this is important at the top-levels of the tree for pruning subtrees
402 * that lie entirely before the selection */
403
404 if (child) {
405 struct box *next = child->next;
406
407 while (next && next->byte_offset < start_idx) {
408 child = next;
409 next = child->next;
410 }
411
412 while (child) {
413 /* read before calling the handler in case it modifies
414 * the tree */
415 struct box *next = child->next;
416
417 res = selection_copy(child,
418 unit_len_ctx,
419 start_idx,
420 end_idx,
421 selstr,
422 before,
423 first,
424 false);
425 if (res != NSERROR_OK) {
426 return res;
427 }
428
429 child = next;
430 }
431 }
432
433 return NSERROR_OK;
434}
435
436
437/**
438 * Label each text box in the given box subtree with its position
439 * in a textual representation of the content.
440 *
441 * \param box The box at root of subtree
442 * \param idx current position within textual representation
443 * \return updated position
444 */
445static unsigned selection_label_subtree(struct box *box, unsigned idx)
446{
447 struct box *child;
448
449 assert(box != NULL);
450
451 child = box->children;
452
453 box->byte_offset = idx;
454
455 if (box->text) {
456 idx += box->length + SPACE_LEN(box);
457 }
458
459 while (child) {
460 if (child->list_marker) {
461 idx = selection_label_subtree(child->list_marker, idx);
462 }
463
464 idx = selection_label_subtree(child, idx);
465 child = child->next;
466 }
467
468 return idx;
469}
470
471
472/* exported interface documented in html/textselection.h */
475 unsigned start_idx,
476 unsigned end_idx)
477{
478 nserror res;
479 html_content *html = (html_content *)c;
480 struct rdw_info rdw;
481
482 if (html->layout == NULL) {
483 return NSERROR_INVALID;
484 }
485
486 rdw.inited = false;
487
488 res = coords_from_range(html->layout, start_idx, end_idx, &rdw, false);
489 if (res != NSERROR_OK) {
490 return res;
491 }
492
493 if (rdw.inited) {
495 rdw.r.x0,
496 rdw.r.y0,
497 rdw.r.x1 - rdw.r.x0,
498 rdw.r.y1 - rdw.r.y0);
499 }
500
501 return NSERROR_OK;
502}
503
504
505/* exported interface documented in html/textselection.h */
508 unsigned start_idx,
509 unsigned end_idx,
510 struct selection_string *selstr)
511{
512 html_content *html = (html_content *)c;
514 bool first = true;
515
516 if (html->layout == NULL) {
517 return NSERROR_INVALID;
518 }
519
520 return selection_copy(html->layout,
521 &html->unit_len_ctx,
522 start_idx,
523 end_idx,
524 selstr,
525 &before,
526 &first,
527 false);
528}
529
530
531/* exported interface documented in html/textselection.h */
533html_textselection_get_end(struct content *c, unsigned *end_idx)
534{
535 html_content *html = (html_content *)c;
536 unsigned root_idx;
537
538 if (html->layout == NULL) {
539 return NSERROR_INVALID;
540 }
541
542 root_idx = 0;
543
544 *end_idx = selection_label_subtree(html->layout, root_idx);
545
546 return NSERROR_OK;
547}
Box interface.
@ BOX_FLOAT_LEFT
Definition: box.h:63
@ BOX_FLOAT_RIGHT
Definition: box.h:64
@ BOX_TEXT
Definition: box.h:67
@ BOX_BR
Definition: box.h:66
@ TOP
Definition: box.h:98
@ BOTTOM
Definition: box.h:98
@ LEFT
Definition: box.h:98
@ RIGHT
Definition: box.h:98
void box_coords(struct box *box, int *x, int *y)
Find the absolute coordinates of a box.
Definition: box_inspect.c:549
HTML Box tree inspection interface.
void font_plot_style_from_css(const css_unit_ctx *unit_len_ctx, const css_computed_style *css, plot_font_style_t *fstyle)
Populate a font style using data from a computed CSS style.
Definition: font.c:135
Internal font handling interfaces.
static nserror coords_from_range(struct box *box, unsigned start_idx, unsigned end_idx, struct rdw_info *rdwi, bool do_marker)
Traverse the given box subtree adding the boxes inside the selection to the coordinate range.
static unsigned selection_label_subtree(struct box *box, unsigned idx)
Label each text box in the given box subtree with its position in a textual representation of the con...
#define SPACE_LEN(b)
Definition: textselection.c:39
static bool selected_part(struct box *box, unsigned start_idx, unsigned end_idx, unsigned *start_offset, unsigned *end_offset)
Tests whether a text box lies partially within the given range of byte offsets, returning the start a...
Definition: textselection.c:60
nserror html_textselection_get_end(struct content *c, unsigned *end_idx)
get maximum index of text section.
nserror html_textselection_copy(struct content *c, unsigned start_idx, unsigned end_idx, struct selection_string *selstr)
static nserror selection_copy_box(const char *text, size_t length, struct box *box, const css_unit_ctx *unit_len_ctx, struct selection_string *handle, const char *whitespace_text, size_t whitespace_length)
Append the contents of a box to a selection along with style information.
nserror html_textselection_redraw(struct content *c, unsigned start_idx, unsigned end_idx)
static nserror selection_copy(struct box *box, const css_unit_ctx *unit_len_ctx, unsigned start_idx, unsigned end_idx, struct selection_string *selstr, save_text_whitespace *before, bool *first, bool do_marker)
Traverse the given box subtree, calling selection copy for all boxes that lie (partially) within the ...
HTML text selection handling.
void content__request_redraw(struct content *c, int x, int y, int width, int height)
Request a redraw of an area of a content.
Definition: content.c:459
bool selection_string_append(const char *text, size_t length, bool space, plot_font_style_t *style, struct selection_string *sel_string)
Append text to selection string.
Definition: selection.c:191
Text selection within browser windows (interface).
Error codes.
nserror
Enumeration of error codes.
Definition: errors.h:29
@ NSERROR_INVALID
Invalid data.
Definition: errors.h:49
@ NSERROR_NOMEM
Memory exhaustion.
Definition: errors.h:32
@ NSERROR_OK
No error.
Definition: errors.h:30
plotter style interfaces, generic styles and style colour helpers.
Private data for text/html content.
int width
Definition: gui.c:160
int height
Definition: gui.c:161
void save_text_solve_whitespace(struct box *box, bool *first, save_text_whitespace *before, const char **whitespace_text, size_t *whitespace_length)
Decide what whitespace to place before the next bit of content-related text that is saved.
Definition: save_text.c:125
Text export of HTML (interface).
save_text_whitespace
Definition: save_text.h:37
@ WHITESPACE_NONE
Definition: save_text.h:38
Node in box tree.
Definition: box.h:177
int width
Width of content box (excluding padding etc.).
Definition: box.h:289
struct box * parent
Parent box, or NULL.
Definition: box.h:236
size_t byte_offset
Byte offset within a textual representation of this content.
Definition: box.h:370
struct box * children
First child box, or NULL.
Definition: box.h:226
int height
Height of content box (excluding padding etc.).
Definition: box.h:293
struct box * list_marker
List marker box if this is a list-item, or NULL.
Definition: box.h:417
struct box * next
Next sibling box, or NULL.
Definition: box.h:216
box_type type
Type of box.
Definition: box.h:181
css_computed_style * style
Style for this box.
Definition: box.h:205
size_t length
Length of text.
Definition: box.h:360
char * text
Text, or NULL if none.
Definition: box.h:355
int padding[4]
Padding: TOP, RIGHT, BOTTOM, LEFT.
Definition: box.h:322
int x
Coordinate of left padding edge relative to parent box, or relative to ancestor that contains this bo...
Definition: box.h:280
int space
Width of space after current text (depends on font and size).
Definition: box.h:365
int y
Coordinate of top padding edge, relative as for x.
Definition: box.h:284
Content which corresponds to a single URL.
Data specific to CONTENT_HTML.
Definition: private.h:93
css_unit_ctx unit_len_ctx
CSS length conversion context for document.
Definition: private.h:163
struct box * layout
Box tree, or NULL.
Definition: private.h:140
Font style for plotting.
Definition: plot_style.h:111
bool inited
Definition: textselection.c:42
struct rect r
Definition: textselection.c:43
Rectangle coordinates.
Definition: types.h:40
int x0
Definition: types.h:41
int y0
Top left.
Definition: types.h:41
int x1
Definition: types.h:42
int y1
Bottom right.
Definition: types.h:42
NetSurf types.
Interface to a number of general purpose functionality.
#define min(x, y)
Definition: utils.h:46
static nserror text(const struct redraw_context *ctx, const struct plot_font_style *fstyle, int x, int y, const char *text, size_t length)
Text plotting.
Definition: plot.c:978