NetSurf
textsearch.c
Go to the documentation of this file.
1/*
2 * Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
3 * Copyright 2020 Vincent Sanders <vince@netsurf-browser.org>
4 *
5 * This file is part of NetSurf, http://www.netsurf-browser.org/
6 *
7 * NetSurf is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
10 *
11 * NetSurf is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/**
21 * \file
22 * Free text search
23 */
24
25#include <stdbool.h>
26#include <stddef.h>
27#include <stdlib.h>
28#include <string.h>
29
30#include "utils/errors.h"
31#include "utils/utils.h"
32#include "utils/ascii.h"
33#include "netsurf/types.h"
34#include "desktop/selection.h"
35
36#include "content/content.h"
38#include "content/hlcache.h"
39#include "content/textsearch.h"
40
41/**
42 * search match
43 */
44struct list_entry {
45 /**
46 * previous match
47 */
49
50 /**
51 * next match
52 */
54
55 /**
56 * start position of match
57 */
58 unsigned start_idx;
59
60 /**
61 * end of match
62 */
63 unsigned end_idx;
64
65 /**
66 * content opaque start pointer
67 */
68 struct box *start_box;
69
70 /**
71 * content opaque end pointer
72 */
73 struct box *end_box;
74
75 /**
76 * content specific selection object
77 */
78 struct selection *sel;
79};
80
81/**
82 * The context for a free text search
83 */
85
86 /**
87 * content search was performed upon
88 */
89 struct content *c;
90
91 /**
92 * opaque pointer passed to constructor.
93 */
94 void *gui_p;
95
96 /**
97 * List of matches
98 */
100
101 /**
102 * current selected match
103 */
104 struct list_entry *current; /* first for select all */
105
106 /**
107 * query string search results are for
108 */
109 char *string;
112};
113
114
115/**
116 * broadcast textsearch message
117 */
118static inline void
120 int type,
121 bool state,
122 const char *string)
123{
124 union content_msg_data msg_data;
125 msg_data.textsearch.type = type;
126 msg_data.textsearch.ctx = textsearch->gui_p;
127 msg_data.textsearch.state = state;
128 msg_data.textsearch.string = string;
130}
131
132
133/**
134 * Release the memory used by the list of matches,
135 * deleting selection objects too
136 */
138{
139 struct list_entry *cur;
140 struct list_entry *nxt;
141
142 cur = textsearch->found->next;
143
144 /*
145 * empty the list before clearing and deleting the selections
146 * because the the clearing may update the toolkit immediately,
147 * causing nested accesses to the list
148 */
149
150 textsearch->found->prev = NULL;
151 textsearch->found->next = NULL;
152
153 for (; cur; cur = nxt) {
154 nxt = cur->next;
155 if (cur->sel) {
157 }
158 free(cur);
159 }
160}
161
162
163/**
164 * Specifies whether all matches or just the current match should
165 * be highlighted in the search text.
166 */
167static void search_show_all(bool all, struct textsearch_context *context)
168{
169 struct list_entry *a;
170
171 for (a = context->found->next; a; a = a->next) {
172 bool add = true;
173 if (!all && a != context->current) {
174 add = false;
175 if (a->sel) {
177 a->sel = NULL;
178 }
179 }
180
181 if (add && !a->sel) {
182
183 a->sel = selection_create(context->c);
184 if (a->sel != NULL) {
187 a->start_idx,
188 a->end_idx);
189 }
190 }
191 }
192}
193
194
195/**
196 * Search for a string in a content.
197 *
198 * \param context The search context.
199 * \param string the string to search for
200 * \param string_len length of search string
201 * \param flags flags to control the search.
202 */
203static nserror
205 const char *string,
206 int string_len,
207 search_flags_t flags)
208{
209 struct rect bounds;
210 union content_msg_data msg_data;
211 bool case_sensitive, forwards, showall;
212 nserror res = NSERROR_OK;
213
214 case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ?
215 true : false;
216 forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false;
217 showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false;
218
219 if (context->c == NULL) {
220 return res;
221 }
222
223 /* check if we need to start a new search or continue an old one */
224 if ((context->newsearch) ||
225 (context->prev_case_sens != case_sensitive)) {
226
227 if (context->string != NULL) {
228 free(context->string);
229 }
230
231 context->current = NULL;
232 free_matches(context);
233
234 context->string = malloc(string_len + 1);
235 if (context->string != NULL) {
236 memcpy(context->string, string, string_len);
237 context->string[string_len] = '\0';
238 }
239
240 /* indicate find operation starting */
241 textsearch_broadcast(context, CONTENT_TEXTSEARCH_FIND, true, NULL);
242
243
244 /* call content find handler */
245 res = context->c->handler->textsearch_find(context->c,
246 context,
247 string,
248 string_len,
249 case_sensitive);
250
251 /* indicate find operation finished */
252 textsearch_broadcast(context, CONTENT_TEXTSEARCH_FIND, false, NULL);
253
254 if (res != NSERROR_OK) {
255 free_matches(context);
256 return res;
257 }
258
259 context->prev_case_sens = case_sensitive;
260
261 /* new search, beginning at the top of the page */
262 context->current = context->found->next;
263 context->newsearch = false;
264
265 } else if (context->current != NULL) {
266 /* continued search in the direction specified */
267 if (forwards) {
268 if (context->current->next)
269 context->current = context->current->next;
270 } else {
271 if (context->current->prev)
272 context->current = context->current->prev;
273 }
274 }
275
276 /* update match state */
277 textsearch_broadcast(context,
279 (context->current != NULL),
280 NULL);
281
282 search_show_all(showall, context);
283
284 /* update back state */
285 textsearch_broadcast(context,
287 ((context->current != NULL) &&
288 (context->current->prev != NULL)),
289 NULL);
290
291 /* update forward state */
292 textsearch_broadcast(context,
294 ((context->current != NULL) &&
295 (context->current->next != NULL)),
296 NULL);
297
298
299 if (context->current == NULL) {
300 /* no current match */
301 return res;
302 }
303
304 /* call content match bounds handler */
305 res = context->c->handler->textsearch_bounds(context->c,
306 context->current->start_idx,
307 context->current->end_idx,
308 context->current->start_box,
309 context->current->end_box,
310 &bounds);
311 if (res == NSERROR_OK) {
312 msg_data.scroll.area = true;
313 msg_data.scroll.x0 = bounds.x0;
314 msg_data.scroll.y0 = bounds.y0;
315 msg_data.scroll.x1 = bounds.x1;
316 msg_data.scroll.y1 = bounds.y1;
317 content_broadcast(context->c, CONTENT_MSG_SCROLL, &msg_data);
318 }
319
320 return res;
321}
322
323
324/**
325 * Begins/continues the search process
326 *
327 * \note that this may be called many times for a single search.
328 *
329 * \param context The search context in use.
330 * \param flags The flags forward/back etc
331 * \param string The string to match
332 */
333static nserror
336 const char *string)
337{
338 int string_len;
339 int i = 0;
340 nserror res = NSERROR_OK;
341
342 assert(textsearch != NULL);
343
344 /* broadcast recent query string */
347 false,
348 string);
349
350 string_len = strlen(string);
351 for (i = 0; i < string_len; i++) {
352 if (string[i] != '#' && string[i] != '*')
353 break;
354 }
355
356 if (i < string_len) {
357 res = search_text(textsearch, string, string_len, flags);
358 } else {
359 union content_msg_data msg_data;
360
362
363 /* update match state */
366 true,
367 NULL);
368
369 /* update back state */
372 false,
373 NULL);
374
375 /* update forward state */
378 false,
379 NULL);
380
381 /* clear scroll */
382 msg_data.scroll.area = false;
383 msg_data.scroll.x0 = 0;
384 msg_data.scroll.y0 = 0;
387 &msg_data);
388 }
389
390 return res;
391}
392
393
394/**
395 * Terminate a search.
396 *
397 * \param c content to clear
398 */
400{
401 free(c->textsearch.string);
402 c->textsearch.string = NULL;
403
404 if (c->textsearch.context != NULL) {
406 c->textsearch.context = NULL;
407 }
408 return NSERROR_OK;
409}
410
411
412/**
413 * create a search_context
414 *
415 * \param c The content the search_context is connected to
416 * \param context A context pointer passed to the provider routines.
417 * \param search_out A pointer to recive the new text search context
418 * \return NSERROR_OK on success and \a search_out updated else error code
419 */
420static nserror
422 void *gui_data,
423 struct textsearch_context **textsearch_out)
424{
425 struct textsearch_context *context;
426 struct list_entry *search_head;
428
429 if ((c->handler->textsearch_find == NULL) ||
430 (c->handler->textsearch_bounds == NULL)) {
431 /*
432 * content has no free text find handler so searching
433 * is unsupported.
434 */
436 }
437
438 type = c->handler->type();
439
440 context = malloc(sizeof(struct textsearch_context));
441 if (context == NULL) {
442 return NSERROR_NOMEM;
443 }
444
445 search_head = malloc(sizeof(struct list_entry));
446 if (search_head == NULL) {
447 free(context);
448 return NSERROR_NOMEM;
449 }
450
451 search_head->start_idx = 0;
452 search_head->end_idx = 0;
453 search_head->start_box = NULL;
454 search_head->end_box = NULL;
455 search_head->sel = NULL;
456 search_head->prev = NULL;
457 search_head->next = NULL;
458
459 context->found = search_head;
460 context->current = NULL;
461 context->string = NULL;
462 context->prev_case_sens = false;
463 context->newsearch = true;
464 context->c = c;
465 context->gui_p = gui_data;
466
467 *textsearch_out = context;
468
469 return NSERROR_OK;
470}
471
472
473/* exported interface, documented in content/textsearch.h */
474const char *
476 int s_len,
477 const char *pattern,
478 int p_len,
479 bool case_sens,
480 unsigned int *m_len)
481{
482 struct { const char *ss, *s, *p; bool first; } context[16];
483 const char *ep = pattern + p_len;
484 const char *es = string + s_len;
485 const char *p = pattern - 1; /* a virtual '*' before the pattern */
486 const char *ss = string;
487 const char *s = string;
488 bool first = true;
489 int top = 0;
490
491 while (p < ep) {
492 bool matches;
493 if (p < pattern || *p == '*') {
494 char ch;
495
496 /* skip any further asterisks; one is the same as many
497 */
498 do p++; while (p < ep && *p == '*');
499
500 /* if we're at the end of the pattern, yes, it matches
501 */
502 if (p >= ep) break;
503
504 /* anything matches a # so continue matching from
505 here, and stack a context that will try to match
506 the wildcard against the next character */
507
508 ch = *p;
509 if (ch != '#') {
510 /* scan forwards until we find a match for
511 this char */
512 if (!case_sens) ch = ascii_to_upper(ch);
513 while (s < es) {
514 if (case_sens) {
515 if (*s == ch) break;
516 } else if (ascii_to_upper(*s) == ch)
517 break;
518 s++;
519 }
520 }
521
522 if (s < es) {
523 /* remember where we are in case the match
524 fails; we may then resume */
525 if (top < (int)NOF_ELEMENTS(context)) {
526 context[top].ss = ss;
527 context[top].s = s + 1;
528 context[top].p = p - 1;
529 /* ptr to last asterisk */
530 context[top].first = first;
531 top++;
532 }
533
534 if (first) {
535 ss = s;
536 /* remember first non-'*' char */
537 first = false;
538 }
539
540 matches = true;
541 } else {
542 matches = false;
543 }
544
545 } else if (s < es) {
546 char ch = *p;
547 if (ch == '#')
548 matches = true;
549 else {
550 if (case_sens)
551 matches = (*s == ch);
552 else
553 matches = (ascii_to_upper(*s) == ascii_to_upper(ch));
554 }
555 if (matches && first) {
556 ss = s; /* remember first non-'*' char */
557 first = false;
558 }
559 } else {
560 matches = false;
561 }
562
563 if (matches) {
564 p++; s++;
565 } else {
566 /* doesn't match,
567 * resume with stacked context if we have one */
568 if (--top < 0)
569 return NULL; /* no match, give up */
570
571 ss = context[top].ss;
572 s = context[top].s;
573 p = context[top].p;
574 first = context[top].first;
575 }
576 }
577
578 /* end of pattern reached */
579 *m_len = max(s - ss, 1);
580 return ss;
581}
582
583
584/* exported interface, documented in content/textsearch.h */
587 unsigned start_idx,
588 unsigned end_idx,
589 struct box *start_box,
590 struct box *end_box)
591{
592 struct list_entry *entry;
593
594 /* found string in box => add to list */
595 entry = calloc(1, sizeof(*entry));
596 if (entry == NULL) {
597 return NSERROR_NOMEM;
598 }
599
600 entry->start_idx = start_idx;
601 entry->end_idx = end_idx;
602 entry->start_box = start_box;
603 entry->end_box = end_box;
604 entry->sel = NULL;
605
606 entry->next = NULL;
607 entry->prev = context->found->prev;
608
609 if (context->found->prev == NULL) {
610 context->found->next = entry;
611 } else {
612 context->found->prev->next = entry;
613 }
614
615 context->found->prev = entry;
616
617 return NSERROR_OK;
618}
619
620
621/* exported interface, documented in content/textsearch.h */
622bool
624 unsigned start_offset,
625 unsigned end_offset,
626 unsigned *start_idx,
627 unsigned *end_idx)
628{
629 struct list_entry *cur;
630
631 for (cur = textsearch->found->next; cur != NULL; cur = cur->next) {
632 if (cur->sel &&
634 start_offset,
635 end_offset,
636 start_idx,
637 end_idx)) {
638 return true;
639 }
640 }
641
642 return false;
643}
644
645
646/* exported interface, documented in content/textsearch.h */
648{
649 assert(textsearch != NULL);
650
651 if (textsearch->string != NULL) {
652 /* broadcast recent query string */
653 textsearch_broadcast(textsearch,
654 CONTENT_TEXTSEARCH_RECENT,
655 false,
656 textsearch->string);
657
658 free(textsearch->string);
659 }
660
661 /* update back state */
662 textsearch_broadcast(textsearch,
663 CONTENT_TEXTSEARCH_BACK,
664 true,
665 NULL);
666
667 /* update forward state */
668 textsearch_broadcast(textsearch,
669 CONTENT_TEXTSEARCH_FORWARD,
670 true,
671 NULL);
672
673 free_matches(textsearch);
674 free(textsearch);
675
676 return NSERROR_OK;
677}
678
679
680/* exported interface, documented in content/content.h */
683 void *context,
684 search_flags_t flags,
685 const char *string)
686{
687 struct content *c = hlcache_handle_get_content(h);
688 nserror res;
689
690 assert(c != NULL);
691
692 if (string != NULL &&
693 c->textsearch.string != NULL &&
694 c->textsearch.context != NULL &&
695 strcmp(string, c->textsearch.string) == 0) {
696 /* Continue prev. search */
697 content_textsearch_step(c->textsearch.context, flags, string);
698
699 } else if (string != NULL) {
700 /* New search */
701 free(c->textsearch.string);
702 c->textsearch.string = strdup(string);
703 if (c->textsearch.string == NULL) {
704 return NSERROR_NOMEM;
705 }
706
707 if (c->textsearch.context != NULL) {
709 c->textsearch.context = NULL;
710 }
711
713 context,
714 &c->textsearch.context);
715 if (res != NSERROR_OK) {
716 return res;
717 }
718
719 content_textsearch_step(c->textsearch.context, flags, string);
720
721 } else {
722 /* Clear search */
724
725 free(c->textsearch.string);
726 c->textsearch.string = NULL;
727 }
728
729 return NSERROR_OK;
730}
731
732
733/* exported interface, documented in content/content.h */
735{
736 struct content *c = hlcache_handle_get_content(h);
737 assert(c != 0);
738
739 return(content_textsearch__clear(c));
740}
Helpers for ASCII string handling.
static char ascii_to_upper(char c)
Convert a lower case character to upper case.
Definition: ascii.h:226
Content handling interface.
void content_broadcast(struct content *c, content_msg msg, const union content_msg_data *data)
Send a message to all users.
Definition: content.c:752
Protected interface to Content handling.
content_type
The type of a content.
Definition: content_type.h:53
@ CONTENT_MSG_TEXTSEARCH
A free text search action has occurred.
Definition: content_type.h:179
@ CONTENT_MSG_SCROLL
Request to scroll content.
Definition: content_type.h:152
search_flags_t
Definition: search.h:29
@ SEARCH_FLAG_CASE_SENSITIVE
Definition: search.h:31
@ SEARCH_FLAG_SHOWALL
Definition: search.h:34
@ SEARCH_FLAG_FORWARDS
Definition: search.h:32
bool selection_highlighted(const struct selection *s, unsigned start, unsigned end, unsigned *start_idx, unsigned *end_idx)
Tests whether a text range lies partially within the selection, if there is a selection defined,...
Definition: selection.c:562
struct selection * selection_create(struct content *c)
Creates a new selection object associated with a browser window.
Definition: selection.c:253
void selection_init(struct selection *s)
Initialise the selection object to use the given box subtree as its root, ie.
Definition: selection.c:302
void selection_set_position(struct selection *s, unsigned start, unsigned end)
Set the position of the current selection, updating the screen.
Definition: selection.c:553
void selection_destroy(struct selection *s)
Destroys a selection object clearing it if nesessary.
Definition: selection.c:269
Text selection within browser windows (interface).
Error codes.
nserror
Enumeration of error codes.
Definition: errors.h:29
@ NSERROR_NOT_IMPLEMENTED
Functionality is not implemented.
Definition: errors.h:61
@ NSERROR_NOMEM
Memory exhaustion.
Definition: errors.h:32
@ NSERROR_OK
No error.
Definition: errors.h:30
const char * type
Definition: filetype.cpp:44
#define NOF_ELEMENTS(array)
Definition: search.c:67
struct content * hlcache_handle_get_content(const hlcache_handle *handle)
Retrieve a content object from a cache handle.
Definition: hlcache.c:776
High-level resource cache interface.
Interface to utility string handling.
Node in box tree.
Definition: box.h:177
nserror(* textsearch_find)(struct content *c, struct textsearch_context *context, const char *pattern, int p_len, bool case_sens)
content specific free text search find
nserror(* textsearch_bounds)(struct content *c, unsigned start_idx, unsigned end_idx, struct box *start_ptr, struct box *end_ptr, struct rect *bounds_out)
get bounds of free text search match
content_type(* type)(void)
Content which corresponds to a single URL.
const struct content_handler * handler
Handler for content.
struct content::@117 textsearch
Free text search state.
struct textsearch_context * context
High-level cache handle.
Definition: hlcache.c:66
search match
Definition: textsearch.c:44
struct list_entry * next
next match
Definition: textsearch.c:53
struct selection * sel
content specific selection object
Definition: textsearch.c:78
unsigned start_idx
start position of match
Definition: textsearch.c:58
struct box * end_box
content opaque end pointer
Definition: textsearch.c:73
struct list_entry * prev
previous match
Definition: textsearch.c:48
unsigned end_idx
end of match
Definition: textsearch.c:63
struct box * start_box
content opaque start pointer
Definition: textsearch.c:68
Rectangle coordinates.
Definition: types.h:40
int x0
Definition: types.h:41
int y0
Top left.
Definition: types.h:41
int x1
Definition: types.h:42
int y1
Bottom right.
Definition: types.h:42
The context for a free text search.
Definition: textsearch.c:84
struct content * c
content search was performed upon
Definition: textsearch.c:89
void * gui_p
opaque pointer passed to constructor.
Definition: textsearch.c:94
struct list_entry * current
current selected match
Definition: textsearch.c:104
struct list_entry * found
List of matches.
Definition: textsearch.c:99
char * string
query string search results are for
Definition: textsearch.c:109
static void textsearch_broadcast(struct textsearch_context *textsearch, int type, bool state, const char *string)
broadcast textsearch message
Definition: textsearch.c:119
static void free_matches(struct textsearch_context *textsearch)
Release the memory used by the list of matches, deleting selection objects too.
Definition: textsearch.c:137
static void search_show_all(bool all, struct textsearch_context *context)
Specifies whether all matches or just the current match should be highlighted in the search text.
Definition: textsearch.c:167
static nserror search_text(struct textsearch_context *context, const char *string, int string_len, search_flags_t flags)
Search for a string in a content.
Definition: textsearch.c:204
nserror content_textsearch_clear(struct hlcache_handle *h)
Clear a search.
Definition: textsearch.c:734
nserror content_textsearch(struct hlcache_handle *h, void *context, search_flags_t flags, const char *string)
Free text search a content.
Definition: textsearch.c:682
static nserror content_textsearch_step(struct textsearch_context *textsearch, search_flags_t flags, const char *string)
Begins/continues the search process.
Definition: textsearch.c:334
nserror content_textsearch_destroy(struct textsearch_context *textsearch)
Ends the search process, invalidating all state freeing the list of found boxes.
Definition: textsearch.c:647
static nserror content_textsearch__clear(struct content *c)
Terminate a search.
Definition: textsearch.c:399
const char * content_textsearch_find_pattern(const char *string, int s_len, const char *pattern, int p_len, bool case_sens, unsigned int *m_len)
Find the first occurrence of 'match' in 'string' and return its index.
Definition: textsearch.c:475
bool content_textsearch_ishighlighted(struct textsearch_context *textsearch, unsigned start_offset, unsigned end_offset, unsigned *start_idx, unsigned *end_idx)
Determines whether any portion of the given text box should be selected because it matches the curren...
Definition: textsearch.c:623
static nserror content_textsearch_create(struct content *c, void *gui_data, struct textsearch_context **textsearch_out)
create a search_context
Definition: textsearch.c:421
nserror content_textsearch_add_match(struct textsearch_context *context, unsigned start_idx, unsigned end_idx, struct box *start_box, struct box *end_box)
Add a new entry to the list of matches.
Definition: textsearch.c:586
Interface to HTML searching.
NetSurf types.
Extra data for some content_msg messages.
Definition: content.h:60
struct content_msg_data::@103 scroll
CONTENT_MSG_SCROLL - Part of content to scroll to show.
browser_window_console_flags flags
The flags of the logging.
Definition: content.h:72
@ CONTENT_TEXTSEARCH_FIND
Free text search find operation has started or finished.
Definition: content.h:256
struct content_msg_data::@111 textsearch
CONTENT_MSG_TEXTSEARCH - Free text search action.
@ CONTENT_TEXTSEARCH_MATCH
Free text search match state has changed.
Definition: content.h:260
@ CONTENT_TEXTSEARCH_BACK
Free text search back available state changed.
Definition: content.h:264
@ CONTENT_TEXTSEARCH_FORWARD
Free text search forward available state changed.
Definition: content.h:268
bool state
state for operation
Definition: content.h:281
const char * string
search string
Definition: content.h:285
enum content_msg_data::@104::@112 type
void * ctx
context passed to browser_window_search()
Definition: content.h:277
@ CONTENT_TEXTSEARCH_RECENT
add a search query string to the recent searches
Definition: content.h:272
Interface to a number of general purpose functionality.
#define max(x, y)
Definition: utils.h:50