Bug Summary

File:content/urldb.c
Warning:line 481, column 7
Access to field 'children' results in a dereference of a null pointer (loaded from variable 'p')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name urldb.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -resource-dir /usr/lib/llvm-14/lib/clang/14.0.6 -I . -I include -I build/Linux-monkey -I frontends -I content/handlers -D WITH_JPEG -U WITH_PDF_EXPORT -D LIBICONV_PLUG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /usr/include/x86_64-linux-gnu -D WITH_CURL -D WITH_OPENSSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D UTF8PROC_EXPORTS -D WITH_UTF8PROC -D WITH_WEBP -I /usr/include/libpng16 -D WITH_PNG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include/ -D WITH_BMP -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_GIF -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSPSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSLOG -D NETSURF_UA_FORMAT_STRING="Mozilla/5.0 (%s) NetSurf/%d.%d" -D NETSURF_HOMEPAGE="about:welcome" -D NETSURF_LOG_LEVEL=VERBOSE -D NETSURF_BUILTIN_LOG_FILTER="(level:WARNING || cat:jserrors)" -D NETSURF_BUILTIN_VERBOSE_FILTER="(level:VERBOSE || cat:jserrors)" -D STMTEXPR=1 -D monkey -D nsmonkey -D MONKEY_RESPATH="/var/lib/jenkins/artifacts-x86_64-linux-gnu/share/netsurf/" -D _POSIX_C_SOURCE=200809L -D _XOPEN_SOURCE=700 -D _BSD_SOURCE -D _DEFAULT_SOURCE -D _NETBSD_SOURCE -D DUK_OPT_HAVE_CUSTOM_H -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.6/include -internal-isystem /usr/local/include -internal-isystem /usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wwrite-strings -Wno-unused-parameter -Wno-unused-but-set-variable -std=c99 -fconst-strings -fdebug-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -ferror-limit 19 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-display-progress -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/lib/jenkins/workspace/scan-build-netsurf/clangScanBuildReports/2024-05-08-185630-652128-1 -x c content/urldb.c
1/*
2 * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
3 * Copyright 2009 John Tytgat <joty@netsurf-browser.org>
4 *
5 * This file is part of NetSurf, http://www.netsurf-browser.org/
6 *
7 * NetSurf is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
10 *
11 * NetSurf is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/**
21 * \file
22 * Unified URL information database implementation
23 *
24 * URLs are stored in a tree-based structure as follows:
25 *
26 * The host component is extracted from each URL and, if a FQDN, split on
27 * every '.'.The tree is constructed by inserting each FQDN segment in
28 * reverse order. Duplicate nodes are merged.
29 *
30 * If the host part of an URL is an IP address, then this is added to the
31 * tree verbatim (as if it were a TLD).
32 *
33 * This provides something looking like:
34 *
35 * root (a sentinel)
36 * |
37 * -------------------------------------------------
38 * | | | | | | |
39 * com edu gov 127.0.0.1 net org uk TLDs
40 * | | | | | |
41 * google ... ... ... ... co 2LDs
42 * | |
43 * www bbc Hosts/Subdomains
44 * |
45 * www ...
46 *
47 * Each of the nodes in this tree is a struct host_part. This stores the
48 * FQDN segment (or IP address) with which the node is concerned. Each node
49 * may contain further information about paths on a host (struct path_data)
50 * or SSL certificate processing on a host-wide basis
51 * (host_part::permit_invalid_certs).
52 *
53 * Path data is concerned with storing various metadata about the path in
54 * question. This includes global history data, HTTP authentication details
55 * and any associated HTTP cookies. This is stored as a tree of path segments
56 * hanging off the relevant host_part node.
57 *
58 * Therefore, to find the last visited time of the URL
59 * http://www.example.com/path/to/resource.html, the FQDN tree would be
60 * traversed in the order root -> "com" -> "example" -> "www". The "www"
61 * node would have attached to it a tree of struct path_data:
62 *
63 * (sentinel)
64 * |
65 * path
66 * |
67 * to
68 * |
69 * resource.html
70 *
71 * This represents the absolute path "/path/to/resource.html". The leaf node
72 * "resource.html" contains the last visited time of the resource.
73 *
74 * The mechanism described above is, however, not particularly conducive to
75 * fast searching of the database for a given URL (or URLs beginning with a
76 * given prefix). Therefore, an anciliary data structure is used to enable
77 * fast searching. This structure simply reflects the contents of the
78 * database, with entries being added/removed at the same time as for the
79 * core database. In order to ensure that degenerate cases are kept to a
80 * minimum, we use an AAtree. This is an approximation of a Red-Black tree
81 * with similar performance characteristics, but with a significantly
82 * simpler implementation. Entries in this tree comprise pointers to the
83 * leaf nodes of the host tree described above.
84 *
85 * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
86 * non-normalised URLs with urldb will result in undefined behaviour and
87 * potential crashes.
88 */
89
90#include <assert.h>
91#include <stdbool.h>
92#include <stdio.h>
93#include <stdlib.h>
94#include <string.h>
95#include <strings.h>
96#include <time.h>
97#ifdef WITH_NSPSL1
98#include <nspsl.h>
99#endif
100
101#include "utils/inet.h"
102#include "utils/nsoption.h"
103#include "utils/log.h"
104#include "utils/corestrings.h"
105#include "utils/url.h"
106#include "utils/utils.h"
107#include "utils/bloom.h"
108#include "utils/time.h"
109#include "utils/nsurl.h"
110#include "utils/ascii.h"
111#include "utils/http.h"
112#include "netsurf/bitmap.h"
113#include "desktop/cookie_manager.h"
114
115#include "content/content.h"
116#include "content/urldb.h"
117
118/**
119 * cookie entry.
120 *
121 * \warning This *must* be kept in sync with the public interface in
122 * netsurf/cookie_db.h
123 */
124struct cookie_internal_data {
125 struct cookie_internal_data *prev; /**< Previous in list */
126 struct cookie_internal_data *next; /**< Next in list */
127
128 char *name; /**< Cookie name */
129 char *value; /**< Cookie value */
130 bool_Bool value_was_quoted; /**< Value was quoted in Set-Cookie: */
131 char *comment; /**< Cookie comment */
132 bool_Bool domain_from_set; /**< Domain came from Set-Cookie: header */
133 char *domain; /**< Domain */
134 bool_Bool path_from_set; /**< Path came from Set-Cookie: header */
135 char *path; /**< Path */
136 time_t expires; /**< Expiry timestamp, or -1 for session */
137 time_t last_used; /**< Last used time */
138 bool_Bool secure; /**< Only send for HTTPS requests */
139 bool_Bool http_only; /**< Only expose to HTTP(S) requests */
140 enum cookie_version version; /**< Specification compliance */
141 bool_Bool no_destroy; /**< Never destroy this cookie,
142 * unless it's expired */
143
144};
145
146
147/**
148 * A protection space
149 *
150 * This is defined as a tuple canonical_root_url and realm. This
151 * structure lives as linked list element in a leaf host_part struct
152 * so we need additional scheme and port to have a canonical_root_url.
153 */
154struct prot_space_data {
155 /**
156 * URL scheme of canonical hostname of this protection space.
157 */
158 lwc_string *scheme;
159 /**
160 * Port number of canonical hostname of this protection
161 * space. When 0, it means the default port for given scheme,
162 * i.e. 80 (http), 443 (https).
163 */
164 unsigned int port;
165 /** Protection realm */
166 char *realm;
167
168 /**
169 * Authentication details for this protection space in form
170 * username:password
171 */
172 char *auth;
173 /** Next sibling */
174 struct prot_space_data *next;
175};
176
177
178/**
179 * meta data about a url
180 *
181 * \warning must be kept in sync with url_data structure in netsurf/url_db.h
182 */
183struct url_internal_data {
184 char *title; /**< Resource title */
185 unsigned int visits; /**< Visit count */
186 time_t last_visit; /**< Last visit time */
187 content_type type; /**< Type of resource */
188};
189
190
191/**
192 * data entry for url
193 */
194struct path_data {
195 nsurl *url; /**< Full URL */
196 lwc_string *scheme; /**< URL scheme for data */
197 unsigned int port; /**< Port number for data. When 0, it means
198 * the default port for given scheme, i.e.
199 * 80 (http), 443 (https). */
200 char *segment; /**< Path segment for this node */
201 unsigned int frag_cnt; /**< Number of entries in path_data::fragment */
202 char **fragment; /**< Array of fragments */
203 bool_Bool persistent; /**< This entry should persist */
204
205 struct url_internal_data urld; /**< URL data for resource */
206
207 /**
208 * Protection space to which this resource belongs too. Can be
209 * NULL when it does not belong to a protection space or when
210 * it is not known. No ownership (is with struct host_part::prot_space).
211 */
212 const struct prot_space_data *prot_space;
213 /** Cookies associated with resource */
214 struct cookie_internal_data *cookies;
215 /** Last cookie in list */
216 struct cookie_internal_data *cookies_end;
217
218 struct path_data *next; /**< Next sibling */
219 struct path_data *prev; /**< Previous sibling */
220 struct path_data *parent; /**< Parent path segment */
221 struct path_data *children; /**< Child path segments */
222 struct path_data *last; /**< Last child */
223};
224
225struct hsts_data {
226 time_t expires; /**< Expiry time */
227 bool_Bool include_sub_domains; /**< Whether to include subdomains */
228};
229
230struct host_part {
231 /**
232 * Known paths on this host. This _must_ be first so that
233 * struct host_part *h = (struct host_part *)mypath; works
234 */
235 struct path_data paths;
236 /**
237 * Allow access to SSL protected resources on this host
238 * without verifying certificate authenticity
239 */
240 bool_Bool permit_invalid_certs;
241 /* HSTS data */
242 struct hsts_data hsts;
243
244 /**
245 * Part of host string
246 */
247 char *part;
248
249 /**
250 * Linked list of all known proctection spaces known for this
251 * host and all its schems and ports.
252 */
253 struct prot_space_data *prot_space;
254
255 struct host_part *next; /**< Next sibling */
256 struct host_part *prev; /**< Previous sibling */
257 struct host_part *parent; /**< Parent host part */
258 struct host_part *children; /**< Child host parts */
259};
260
261
262/**
263 * search index node
264 */
265struct search_node {
266 const struct host_part *data; /**< Host tree entry */
267
268 unsigned int level; /**< Node level */
269
270 struct search_node *left; /**< Left subtree */
271 struct search_node *right; /**< Right subtree */
272};
273
274/** Root database handle */
275static struct host_part db_root;
276
277/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
278#define NUM_SEARCH_TREES28 28
279#define ST_IP0 0
280#define ST_EE1 1
281#define ST_DN2 2
282static struct search_node empty = { 0, 0, &empty, &empty };
283static struct search_node *search_trees[NUM_SEARCH_TREES28] = {
284 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
285 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
286 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
287 &empty, &empty, &empty, &empty
288};
289
290/** Minimum cookie database file version */
291#define MIN_COOKIE_FILE_VERSION100 100
292/** Current cookie database file version */
293#define COOKIE_FILE_VERSION102 102
294/** loaded cookie file version */
295static int loaded_cookie_file_version;
296
297/** Minimum URL database file version */
298#define MIN_URL_FILE_VERSION106 106
299/** Current URL database file version */
300#define URL_FILE_VERSION107 107
301
302/**
303 * filter for url presence in database
304 *
305 * Bloom filter used for short-circuting the false case of "is this
306 * URL in the database?". BLOOM_SIZE controls how large the filter is
307 * in bytes. Primitive experimentation shows that for a filter of X
308 * bytes filled with X items, searching for X items not in the filter
309 * has a 5% false-positive rate. We set it to 32kB, which should be
310 * enough for all but the largest databases, while not being
311 * shockingly wasteful on memory.
312 */
313static struct bloom_filter *url_bloom;
314/**
315 * Size of url filter
316 */
317#define BLOOM_SIZE(1024 * 32) (1024 * 32)
318
319
320/**
321 * write a time_t to a file portably
322 *
323 * \param fp File to write to
324 * \param val the unix time value to output
325 * \return NSERROR_OK on success
326 */
327static nserror urldb_write_timet(FILE *fp, time_t val)
328{
329 int use;
330 char op[32];
331
332 use = nsc_sntimet(op, 32, &val);
333 if (use == 0) {
334 fprintf(fp, "%i\n", (int)val);
335 } else {
336 fprintf(fp, "%.*s\n", use, op);
337 }
338 return NSERROR_OK;
339}
340
341/**
342 * Write paths associated with a host
343 *
344 * \param parent Root of (sub)tree to write
345 * \param host Current host name
346 * \param fp File to write to
347 * \param path Current path string
348 * \param path_alloc Allocated size of path
349 * \param path_used Used size of path
350 * \param expiry Expiry time of URLs
351 */
352static void
353urldb_write_paths(const struct path_data *parent,
354 const char *host,
355 FILE *fp,
356 char **path,
357 int *path_alloc,
358 int *path_used,
359 time_t expiry)
360{
361 const struct path_data *p = parent;
362 int i;
363
364 do {
365 int seglen = p->segment != NULL((void*)0) ? strlen(p->segment) : 0;
366 int len = *path_used + seglen + 1;
367
368 if (*path_alloc < len) {
369 char *temp;
370 temp = realloc(*path,
371 (len > 64) ? len : *path_alloc + 64);
372 if (!temp) {
373 return;
374 }
375 *path = temp;
376 *path_alloc = (len > 64) ? len : *path_alloc + 64;
377 }
378
379 if (p->segment != NULL((void*)0)) {
380 memcpy(*path + *path_used - 1, p->segment, seglen);
381 }
382
383 if (p->children != NULL((void*)0)) {
384 (*path)[*path_used + seglen - 1] = '/';
385 (*path)[*path_used + seglen] = '\0';
386 } else {
387 (*path)[*path_used + seglen - 1] = '\0';
388 len -= 1;
389 }
390
391 *path_used = len;
392
393 if (p->children != NULL((void*)0)) {
394 /* Drill down into children */
395 p = p->children;
396 } else {
397 /* leaf node */
398 if (p->persistent ||
399 ((p->urld.last_visit > expiry) &&
400 (p->urld.visits > 0))) {
401 fprintf(fp, "%s\n", lwc_string_data(p->scheme)({((p->scheme != ((void*)0)) ? (void) (0) : __assert_fail (
"p->scheme != NULL", "content/urldb.c", 401, __extension__
__PRETTY_FUNCTION__)); (const char *)((p->scheme)+1);})
);
402
403 if (p->port) {
404 fprintf(fp,"%d\n", p->port);
405 } else {
406 fprintf(fp, "\n");
407 }
408
409 fprintf(fp, "%s\n", *path);
410
411 /** \todo handle fragments? */
412
413 /* number of visits */
414 fprintf(fp, "%i\n", p->urld.visits);
415
416 /* time entry was last used */
417 urldb_write_timet(fp, p->urld.last_visit);
418
419 /* entry type */
420 fprintf(fp, "%i\n", (int)p->urld.type);
421
422 fprintf(fp, "\n");
423
424 if (p->urld.title) {
425 uint8_t *s = (uint8_t *) p->urld.title;
426
427 for (i = 0; s[i] != '\0'; i++)
428 if (s[i] < 32)
429 s[i] = ' ';
430 for (--i; ((i > 0) && (s[i] == ' '));
431 i--)
432 s[i] = '\0';
433 fprintf(fp, "%s\n", p->urld.title);
434 } else {
435 fprintf(fp, "\n");
436 }
437 }
438
439 /* Now, find next node to process. */
440 while (p != parent) {
441 int seglen = p->segment != NULL((void*)0)
442 ? strlen(p->segment) : 0;
443
444 /* Remove our segment from the path */
445 *path_used -= seglen;
446 (*path)[*path_used - 1] = '\0';
447
448 if (p->next != NULL((void*)0)) {
449 /* Have a sibling, process that */
450 p = p->next;
451 break;
452 }
453
454 /* Going up, so remove '/' */
455 *path_used -= 1;
456 (*path)[*path_used - 1] = '\0';
457
458 /* Ascend tree */
459 p = p->parent;
460 }
461 }
462 } while (p != parent);
463}
464
465
466/**
467 * Count number of URLs associated with a host
468 *
469 * \param root Root of path data tree
470 * \param expiry Expiry time for URLs
471 * \param count Pointer to count
472 */
473static void
474urldb_count_urls(const struct path_data *root,
475 time_t expiry,
476 unsigned int *count)
477{
478 const struct path_data *p = root;
30
'p' initialized to a null pointer value
479
480 do {
481 if (p->children != NULL((void*)0)) {
31
Access to field 'children' results in a dereference of a null pointer (loaded from variable 'p')
482 /* Drill down into children */
483 p = p->children;
484 } else {
485 /* No more children, increment count if required */
486 if (p->persistent ||
487 ((p->urld.last_visit > expiry) &&
488 (p->urld.visits > 0))) {
489 (*count)++;
490 }
491
492 /* Now, find next node to process. */
493 while (p != root) {
494 if (p->next != NULL((void*)0)) {
495 /* Have a sibling, process that */
496 p = p->next;
497 break;
498 }
499
500 /* Ascend tree */
501 p = p->parent;
502 }
503 }
504 } while (p != root);
505}
506
507
508/**
509 * Save a search (sub)tree
510 *
511 * \param parent root node of search tree to save.
512 * \param fp File to write to
513 */
514static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
515{
516 char host[256];
517 const struct host_part *h;
518 unsigned int path_count = 0;
519 char *path, *p, *end;
520 int path_alloc = 64, path_used = 1;
521 time_t expiry, hsts_expiry = 0;
522 int hsts_include_subdomains = 0;
523
524 expiry = time(NULL((void*)0)) - ((60 * 60 * 24) * nsoption_int(expire_url)(nsoptions[NSOPTION_expire_url].value.i));
525
526 if (parent == &empty)
7
Assuming the condition is false
8
Taking false branch
10
Assuming the condition is false
11
Taking false branch
13
Assuming the condition is false
14
Taking false branch
16
Assuming the condition is false
17
Taking false branch
527 return;
528
529 urldb_save_search_tree(parent->left, fp);
9
Calling 'urldb_save_search_tree'
12
Calling 'urldb_save_search_tree'
15
Calling 'urldb_save_search_tree'
24
Returning from 'urldb_save_search_tree'
530
531 path = malloc(path_alloc);
532 if (!path)
18
Assuming 'path' is non-null
19
Taking false branch
25
Assuming 'path' is non-null
26
Taking false branch
533 return;
534
535 path[0] = '\0';
536
537 for (h = parent->data, p = host, end = host + sizeof host;
538 h && h != &db_root && p < end; h = h->parent) {
20
Assuming 'h' is null
27
Assuming 'h' is null
539 int written = snprintf(p, end - p, "%s%s", h->part,
540 (h->parent && h->parent->parent) ? "." : "");
541 if (written < 0) {
542 free(path);
543 return;
544 }
545 p += written;
546 }
547
548 h = parent->data;
549 if (h
20.1
'h' is null
27.1
'h' is null
&& h->hsts.expires > expiry) {
550 hsts_expiry = h->hsts.expires;
551 hsts_include_subdomains = h->hsts.include_sub_domains;
552 }
553
554 urldb_count_urls(&parent->data->paths, expiry, &path_count);
28
Passing null pointer value via 1st parameter 'root'
29
Calling 'urldb_count_urls'
555
556 if (path_count > 0) {
21
Assuming 'path_count' is <= 0
22
Taking false branch
557 fprintf(fp, "%s %i ", host, hsts_include_subdomains);
558 urldb_write_timet(fp, hsts_expiry);
559 fprintf(fp, "%i\n", path_count);
560
561 urldb_write_paths(&parent->data->paths, host, fp,
562 &path, &path_alloc, &path_used, expiry);
563 } else if (hsts_expiry
22.1
'hsts_expiry' is 0
) {
23
Taking false branch
564 fprintf(fp, "%s %i ", host, hsts_include_subdomains);
565 urldb_write_timet(fp, hsts_expiry);
566 fprintf(fp, "0\n");
567 }
568
569 free(path);
570
571 urldb_save_search_tree(parent->right, fp);
572}
573
574
575/**
576 * Path data iterator (internal)
577 *
578 * \param parent Root of subtree to iterate over
579 * \param url_callback Callback function
580 * \param cookie_callback Callback function
581 * \return true to continue, false otherwise
582 */
583static bool_Bool
584urldb_iterate_entries_path(const struct path_data *parent,
585 bool_Bool (*url_callback)(nsurl *url, const struct url_data *data),
586 bool_Bool (*cookie_callback)(const struct cookie_data *data))
587{
588 const struct path_data *p = parent;
589 const struct cookie_data *c;
590
591 do {
592 if (p->children != NULL((void*)0)) {
593 /* Drill down into children */
594 p = p->children;
595 } else {
596 /* All leaf nodes in the path tree should have an URL or
597 * cookies attached to them. If this is not the case, it
598 * indicates that there's a bug in the file loader/URL
599 * insertion code. Therefore, assert this here. */
600 assert(url_callback || cookie_callback)((url_callback || cookie_callback) ? (void) (0) : __assert_fail
("url_callback || cookie_callback", "content/urldb.c", 600, __extension__
__PRETTY_FUNCTION__))
;
601
602 /** \todo handle fragments? */
603 if (url_callback) {
604 const struct url_internal_data *u = &p->urld;
605
606 assert(p->url)((p->url) ? (void) (0) : __assert_fail ("p->url", "content/urldb.c"
, 606, __extension__ __PRETTY_FUNCTION__))
;
607
608 if (!url_callback(p->url,
609 (const struct url_data *) u))
610 return false0;
611 } else {
612 c = (const struct cookie_data *)p->cookies;
613 for (; c != NULL((void*)0); c = c->next) {
614 if (!cookie_callback(c))
615 return false0;
616 }
617 }
618
619 /* Now, find next node to process. */
620 while (p != parent) {
621 if (p->next != NULL((void*)0)) {
622 /* Have a sibling, process that */
623 p = p->next;
624 break;
625 }
626
627 /* Ascend tree */
628 p = p->parent;
629 }
630 }
631 } while (p != parent);
632
633 return true1;
634}
635
636
637/**
638 * Check whether a host string is an IP address.
639 *
640 * This call detects IPv4 addresses (all of dotted-quad or subsets,
641 * decimal or hexadecimal notations) and IPv6 addresses (including
642 * those containing embedded IPv4 addresses.)
643 *
644 * \param host a hostname terminated by '\0'
645 * \return true if the hostname is an IP address, false otherwise
646 */
647static bool_Bool urldb__host_is_ip_address(const char *host)
648{
649 struct in_addr ipv4;
650 size_t host_len = strlen(host);
651 const char *sane_host;
652 const char *slash;
653#ifndef NO_IPV6
654 struct in6_addr ipv6;
655 char ipv6_addr[64];
656 unsigned int ipv6_addr_len;
657#endif
658 /**
659 * @todo FIXME Some parts of urldb.c make confusions between hosts
660 * and "prefixes", we can sometimes be erroneously passed more than
661 * just a host. Sometimes we may be passed trailing slashes, or even
662 * whole path segments. A specific criminal in this class is
663 * urldb_iterate_partial, which takes a prefix to search for, but
664 * passes that prefix to functions that expect only hosts.
665 *
666 * For the time being, we will accept such calls; we check if there
667 * is a / in the host parameter, and if there is, we take a copy and
668 * replace the / with a \0. This is not a permanent solution; we
669 * should search through NetSurf and find all the callers that are
670 * in error and fix them. When doing this task, it might be wise
671 * to replace the hideousness below with code that doesn't have to do
672 * this, and add assert(strchr(host, '/') == NULL); somewhere.
673 * -- rjek - 2010-11-04
674 */
675
676 slash = strchr(host, '/');
677 if (slash == NULL((void*)0)) {
678 sane_host = host;
679 } else {
680 char *c = strdup(host);
681 c[slash - host] = '\0';
682 sane_host = c;
683 host_len = slash - host;
684 NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 685
, }; nslog__log(&_nslog_ctx, "WARNING: called with non-host '%s'"
, host); } } while(0)
685 host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 685
, }; nslog__log(&_nslog_ctx, "WARNING: called with non-host '%s'"
, host); } } while(0)
;
686 }
687
688 if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
689 goto out_false;
690
691 if (inet_aton(sane_host, &ipv4) != 0) {
692 /* This can only be a sane IPv4 address if it contains 3 dots.
693 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
694 * and "a.b.c.d" as valid IPv4 address strings where we only
695 * support the full, dotted-quad, form.
696 */
697 int num_dots = 0;
698 size_t index;
699
700 for (index = 0; index < host_len; index++) {
701 if (sane_host[index] == '.')
702 num_dots++;
703 }
704
705 if (num_dots == 3)
706 goto out_true;
707 else
708 goto out_false;
709 }
710
711#ifndef NO_IPV6
712 if ((host_len < 6) ||
713 (sane_host[0] != '[') ||
714 (sane_host[host_len - 1] != ']')) {
715 goto out_false;
716 }
717
718 ipv6_addr_len = host_len - 2;
719 if (ipv6_addr_len >= sizeof(ipv6_addr)) {
720 ipv6_addr_len = sizeof(ipv6_addr) - 1;
721 }
722 strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);
723 ipv6_addr[ipv6_addr_len] = '\0';
724
725 if (inet_pton(AF_INET610, ipv6_addr, &ipv6) == 1)
726 goto out_true;
727#endif
728
729out_false:
730 if (slash != NULL((void*)0)) free((void *)sane_host);
731 return false0;
732
733out_true:
734 if (slash != NULL((void*)0)) free((void *)sane_host);
735 return true1;
736}
737
738
739/**
740 * Compare host_part with prefix
741 *
742 * \param a host part
743 * \param b prefix
744 * \return 0 if match, non-zero, otherwise
745 */
746static int urldb_search_match_prefix(const struct host_part *a, const char *b)
747{
748 const char *end, *dot;
749 int plen, ret;
750
751 assert(a && a != &db_root && b)((a && a != &db_root && b) ? (void) (0) :
__assert_fail ("a && a != &db_root && b"
, "content/urldb.c", 751, __extension__ __PRETTY_FUNCTION__))
;
752
753 if (urldb__host_is_ip_address(b)) {
754 /* IP address */
755 return strncasecmp(a->part, b, strlen(b));
756 }
757
758 end = b + strlen(b) + 1;
759
760 while (b < end && a && a != &db_root) {
761 dot = strchr(b, '.');
762 if (!dot) {
763 /* last segment */
764 dot = end - 1;
765 }
766
767 /* Compare strings (length limited) */
768 if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
769 /* didn't match => return difference */
770 return ret;
771
772 /* The strings matched */
773 if (dot < end - 1) {
774 /* Consider segment lengths only in the case
775 * where the prefix contains segments */
776 plen = strlen(a->part);
777 if (plen > dot - b) {
778 /* len(a) > len(b) */
779 return 1;
780 } else if (plen < dot - b) {
781 /* len(a) < len(b) */
782 return -1;
783 }
784 }
785
786 b = dot + 1;
787 a = a->parent;
788 }
789
790 /* If we get here then either:
791 * a) The path lengths differ
792 * or b) The hosts are identical
793 */
794 if (a && a != &db_root && b >= end) {
795 /* len(a) > len(b) => prefix matches */
796 return 0;
797 } else if ((!a || a == &db_root) && b < end) {
798 /* len(a) < len(b) => prefix does not match */
799 return -1;
800 }
801
802 /* Identical */
803 return 0;
804}
805
806
807/**
808 * Partial host iterator (internal)
809 *
810 * \param root Root of (sub)tree to traverse
811 * \param prefix Prefix to match
812 * \param callback Callback function
813 * \return true to continue, false otherwise
814 */
815static bool_Bool
816urldb_iterate_partial_host(struct search_node *root,
817 const char *prefix,
818 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
819{
820 int c;
821
822 assert(root && prefix && callback)((root && prefix && callback) ? (void) (0) : __assert_fail
("root && prefix && callback", "content/urldb.c"
, 822, __extension__ __PRETTY_FUNCTION__))
;
823
824 if (root == &empty)
825 return true1;
826
827 c = urldb_search_match_prefix(root->data, prefix);
828
829 if (c > 0) {
830 /* No match => look in left subtree */
831 return urldb_iterate_partial_host(root->left,
832 prefix,
833 callback);
834 } else if (c < 0) {
835 /* No match => look in right subtree */
836 return urldb_iterate_partial_host(root->right,
837 prefix,
838 callback);
839 } else {
840 /* Match => iterate over l/r subtrees & process this node */
841 if (!urldb_iterate_partial_host(root->left,
842 prefix,
843 callback)) {
844 return false0;
845 }
846
847 if (root->data->paths.children) {
848 /* and extract all paths attached to this host */
849 if (!urldb_iterate_entries_path(&root->data->paths,
850 callback,
851 NULL((void*)0))) {
852 return false0;
853 }
854 }
855
856 if (!urldb_iterate_partial_host(root->right,
857 prefix,
858 callback)) {
859 return false0;
860 }
861 }
862
863 return true1;
864}
865
866
867/**
868 * Partial path iterator (internal)
869 *
870 * Given: http://www.example.org/a/b/c/d//e
871 * and assuming a path tree:
872 * ^
873 * / \
874 * a1 b1
875 * / \
876 * a2 b2
877 * /|\
878 * a b c
879 * 3 3 |
880 * d
881 * |
882 * e
883 * / \
884 * f g
885 *
886 * Prefix will be: p will be:
887 *
888 * a/b/c/d//e a1
889 * b/c/d//e a2
890 * b/c/d//e b3
891 * c/d//e a3
892 * c/d//e b3
893 * c/d//e c
894 * d//e d
895 * /e e (skip /)
896 * e e
897 *
898 * I.E. perform a breadth-first search of the tree.
899 *
900 * \param parent Root of (sub)tree to traverse
901 * \param prefix Prefix to match
902 * \param callback Callback function
903 * \return true to continue, false otherwise
904 */
905static bool_Bool
906urldb_iterate_partial_path(const struct path_data *parent,
907 const char *prefix,
908 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
909{
910 const struct path_data *p = parent->children;
911 const char *slash, *end = prefix + strlen(prefix);
912
913 do {
914 slash = strchr(prefix, '/');
915 if (!slash) {
916 slash = end;
917 }
918
919 if (slash == prefix && *prefix == '/') {
920 /* Ignore "//" */
921 prefix++;
922 continue;
923 }
924
925 if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
926 /* prefix matches so far */
927 if (slash == end) {
928 /* we've run out of prefix, so all
929 * paths below this one match */
930 if (!urldb_iterate_entries_path(p,
931 callback,
932 NULL((void*)0))) {
933 return false0;
934 }
935
936 /* Progress to next sibling */
937 p = p->next;
938 } else {
939 /* Skip over this segment */
940 prefix = slash + 1;
941
942 p = p->children;
943 }
944 } else {
945 /* Doesn't match this segment, try next sibling */
946 p = p->next;
947 }
948 } while (p != NULL((void*)0));
949
950 return true1;
951}
952
953
954/**
955 * Host data iterator (internal)
956 *
957 * \param parent Root of subtree to iterate over
958 * \param url_callback Callback function
959 * \param cookie_callback Callback function
960 * \return true to continue, false otherwise
961 */
962static bool_Bool
963urldb_iterate_entries_host(struct search_node *parent,
964 bool_Bool (*url_callback)(nsurl *url, const struct url_data *data),
965 bool_Bool (*cookie_callback)(const struct cookie_data *data))
966{
967 if (parent == &empty) {
968 return true1;
969 }
970
971 if (!urldb_iterate_entries_host(parent->left,
972 url_callback,
973 cookie_callback)) {
974 return false0;
975 }
976
977 if ((parent->data->paths.children) ||
978 ((cookie_callback) &&
979 (parent->data->paths.cookies))) {
980 /* We have paths (or domain cookies), so iterate them */
981 if (!urldb_iterate_entries_path(&parent->data->paths,
982 url_callback,
983 cookie_callback)) {
984 return false0;
985 }
986 }
987
988 if (!urldb_iterate_entries_host(parent->right,
989 url_callback,
990 cookie_callback)) {
991 return false0;
992 }
993
994 return true1;
995}
996
997
998/**
999 * Add a host node to the tree
1000 *
1001 * \param part Host segment to add (or whole IP address) (copied)
1002 * \param parent Parent node to add to
1003 * \return Pointer to added node, or NULL on memory exhaustion
1004 */
1005static struct host_part *
1006urldb_add_host_node(const char *part, struct host_part *parent)
1007{
1008 struct host_part *d;
1009
1010 assert(part && parent)((part && parent) ? (void) (0) : __assert_fail ("part && parent"
, "content/urldb.c", 1010, __extension__ __PRETTY_FUNCTION__)
)
;
1011
1012 d = calloc(1, sizeof(struct host_part));
1013 if (!d) {
1014 return NULL((void*)0);
1015 }
1016
1017 d->part = strdup(part);
1018 if (!d->part) {
1019 free(d);
1020 return NULL((void*)0);
1021 }
1022
1023 d->next = parent->children;
1024 if (parent->children) {
1025 parent->children->prev = d;
1026 }
1027 d->parent = parent;
1028 parent->children = d;
1029
1030 return d;
1031}
1032
1033
1034/**
1035 * Fragment comparator callback for qsort
1036 *
1037 * \param a first value
1038 * \param b second value
1039 * \return 0 for equal else positive or negative value on comparison
1040 */
1041static int urldb_add_path_fragment_cmp(const void *a, const void *b)
1042{
1043 return strcasecmp(*((const char **) a), *((const char **) b));
1044}
1045
1046
1047/**
1048 * Add a fragment to a path segment
1049 *
1050 * \param segment Path segment to add to
1051 * \param fragment Fragment to add (copied), or NULL
1052 * \return segment or NULL on memory exhaustion
1053 */
1054static struct path_data *
1055urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
1056{
1057 char **temp;
1058
1059 assert(segment)((segment) ? (void) (0) : __assert_fail ("segment", "content/urldb.c"
, 1059, __extension__ __PRETTY_FUNCTION__))
;
1060
1061 /* If no fragment, this function is a NOP
1062 * This may seem strange, but it makes the rest
1063 * of the code cleaner */
1064 if (!fragment)
1065 return segment;
1066
1067 temp = realloc(segment->fragment,
1068 (segment->frag_cnt + 1) * sizeof(char *));
1069 if (!temp)
1070 return NULL((void*)0);
1071
1072 segment->fragment = temp;
1073 segment->fragment[segment->frag_cnt] =
1074 strdup(lwc_string_data(fragment)({((fragment != ((void*)0)) ? (void) (0) : __assert_fail ("fragment != NULL"
, "content/urldb.c", 1074, __extension__ __PRETTY_FUNCTION__)
); (const char *)((fragment)+1);})
);
1075 if (!segment->fragment[segment->frag_cnt]) {
1076 /* Don't free temp - it's now our buffer */
1077 return NULL((void*)0);
1078 }
1079
1080 segment->frag_cnt++;
1081
1082 /* We want fragments in alphabetical order, so sort them
1083 * It may prove better to insert in alphabetical order instead */
1084 qsort(segment->fragment,
1085 segment->frag_cnt,
1086 sizeof (char *),
1087 urldb_add_path_fragment_cmp);
1088
1089 return segment;
1090}
1091
1092
1093/**
1094 * Add a path node to the tree
1095 *
1096 * \param scheme URL scheme associated with path (copied)
1097 * \param port Port number on host associated with path
1098 * \param segment Path segment to add (copied)
1099 * \param fragment URL fragment (copied), or NULL
1100 * \param parent Parent node to add to
1101 * \return Pointer to added node, or NULL on memory exhaustion
1102 */
1103static struct path_data *
1104urldb_add_path_node(lwc_string *scheme,
1105 unsigned int port,
1106 const char *segment,
1107 lwc_string *fragment,
1108 struct path_data *parent)
1109{
1110 struct path_data *d, *e;
1111
1112 assert(scheme && segment && parent)((scheme && segment && parent) ? (void) (0) :
__assert_fail ("scheme && segment && parent"
, "content/urldb.c", 1112, __extension__ __PRETTY_FUNCTION__)
)
;
1113
1114 d = calloc(1, sizeof(struct path_data));
1115 if (!d)
1116 return NULL((void*)0);
1117
1118 d->scheme = lwc_string_ref(scheme)({lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1118, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
++; __lwc_s;})
;
1119
1120 d->port = port;
1121
1122 d->segment = strdup(segment);
1123 if (!d->segment) {
1124 lwc_string_unref(d->scheme){ lwc_string *__lwc_s = (d->scheme); ((__lwc_s != ((void*)
0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1124, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1125 free(d);
1126 return NULL((void*)0);
1127 }
1128
1129 if (fragment) {
1130 if (!urldb_add_path_fragment(d, fragment)) {
1131 free(d->segment);
1132 lwc_string_unref(d->scheme){ lwc_string *__lwc_s = (d->scheme); ((__lwc_s != ((void*)
0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1132, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1133 free(d);
1134 return NULL((void*)0);
1135 }
1136 }
1137
1138 for (e = parent->children; e; e = e->next) {
1139 if (strcmp(e->segment, d->segment) > 0)
1140 break;
1141 }
1142
1143 if (e) {
1144 d->prev = e->prev;
1145 d->next = e;
1146 if (e->prev)
1147 e->prev->next = d;
1148 else
1149 parent->children = d;
1150 e->prev = d;
1151 } else if (!parent->children) {
1152 d->prev = d->next = NULL((void*)0);
1153 parent->children = parent->last = d;
1154 } else {
1155 d->next = NULL((void*)0);
1156 d->prev = parent->last;
1157 parent->last->next = d;
1158 parent->last = d;
1159 }
1160 d->parent = parent;
1161
1162 return d;
1163}
1164
1165
1166/**
1167 * Get the search tree for a particular host
1168 *
1169 * \param host the host to lookup
1170 * \return the corresponding search tree
1171 */
1172static struct search_node **urldb_get_search_tree_direct(const char *host)
1173{
1174 assert(host)((host) ? (void) (0) : __assert_fail ("host", "content/urldb.c"
, 1174, __extension__ __PRETTY_FUNCTION__))
;
1175
1176 if (urldb__host_is_ip_address(host)) {
1177 return &search_trees[ST_IP0];
1178 } else if (ascii_is_alpha(*host)) {
1179 return &search_trees[ST_DN2 + ascii_to_lower(*host) - 'a'];
1180 }
1181 return &search_trees[ST_EE1];
1182}
1183
1184
1185/**
1186 * Get the search tree for a particular host
1187 *
1188 * \param host the host to lookup
1189 * \return the corresponding search tree
1190 */
1191static struct search_node *urldb_get_search_tree(const char *host)
1192{
1193 return *urldb_get_search_tree_direct(host);
1194}
1195
1196
1197/**
1198 * Compare host part with a string
1199 *
1200 * \param a host part
1201 * \param b string to compare
1202 * \return 0 if match, non-zero, otherwise
1203 */
1204static int urldb_search_match_string(const struct host_part *a, const char *b)
1205{
1206 const char *end, *dot;
1207 int plen, ret;
1208
1209 assert(a && a != &db_root && b)((a && a != &db_root && b) ? (void) (0) :
__assert_fail ("a && a != &db_root && b"
, "content/urldb.c", 1209, __extension__ __PRETTY_FUNCTION__)
)
;
1210
1211 if (urldb__host_is_ip_address(b)) {
1212 /* IP address */
1213 return strcasecmp(a->part, b);
1214 }
1215
1216 end = b + strlen(b) + 1;
1217
1218 while (b < end && a && a != &db_root) {
1219 dot = strchr(b, '.');
1220 if (!dot) {
1221 /* last segment */
1222 dot = end - 1;
1223 }
1224
1225 /* Compare strings (length limited) */
1226 if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
1227 /* didn't match => return difference */
1228 return ret;
1229
1230 /* The strings matched, now check that the lengths do, too */
1231 plen = strlen(a->part);
1232
1233 if (plen > dot - b) {
1234 /* len(a) > len(b) */
1235 return 1;
1236 } else if (plen < dot - b) {
1237 /* len(a) < len(b) */
1238 return -1;
1239 }
1240
1241 b = dot + 1;
1242 a = a->parent;
1243 }
1244
1245 /* If we get here then either:
1246 * a) The path lengths differ
1247 * or b) The hosts are identical
1248 */
1249 if (a && a != &db_root && b >= end) {
1250 /* len(a) > len(b) */
1251 return 1;
1252 } else if ((!a || a == &db_root) && b < end) {
1253 /* len(a) < len(b) */
1254 return -1;
1255 }
1256
1257 /* Identical */
1258 return 0;
1259}
1260
1261
1262/**
1263 * Find a node in a search tree
1264 *
1265 * \param root Tree to look in
1266 * \param host Host to find
1267 * \return Pointer to host tree node, or NULL if not found
1268 */
1269static const struct host_part *
1270urldb_search_find(struct search_node *root, const char *host)
1271{
1272 int c;
1273
1274 assert(root && host)((root && host) ? (void) (0) : __assert_fail ("root && host"
, "content/urldb.c", 1274, __extension__ __PRETTY_FUNCTION__)
)
;
1275
1276 if (root == &empty) {
1277 return NULL((void*)0);
1278 }
1279
1280 c = urldb_search_match_string(root->data, host);
1281
1282 if (c > 0) {
1283 return urldb_search_find(root->left, host);
1284 } else if (c < 0) {
1285 return urldb_search_find(root->right, host);
1286 }
1287
1288 return root->data;
1289}
1290
1291
1292/**
1293 * Match a path string
1294 *
1295 * \param parent Path (sub)tree to look in
1296 * \param path The path to search for
1297 * \param scheme The URL scheme associated with the path
1298 * \param port The port associated with the path
1299 * \return Pointer to path data or NULL if not found.
1300 */
1301static struct path_data *
1302urldb_match_path(const struct path_data *parent,
1303 const char *path,
1304 lwc_string *scheme,
1305 unsigned short port)
1306{
1307 const struct path_data *p;
1308 const char *slash;
1309 bool_Bool match;
1310
1311 assert(parent != NULL)((parent != ((void*)0)) ? (void) (0) : __assert_fail ("parent != NULL"
, "content/urldb.c", 1311, __extension__ __PRETTY_FUNCTION__)
)
;
1312 assert(parent->segment == NULL)((parent->segment == ((void*)0)) ? (void) (0) : __assert_fail
("parent->segment == NULL", "content/urldb.c", 1312, __extension__
__PRETTY_FUNCTION__))
;
1313
1314 if (path[0] != '/') {
1315 NSLOG(netsurf, INFO, "path is %s", path)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1315
, }; nslog__log(&_nslog_ctx, "path is %s", path); } } while
(0)
;
1316 }
1317
1318 assert(path[0] == '/')((path[0] == '/') ? (void) (0) : __assert_fail ("path[0] == '/'"
, "content/urldb.c", 1318, __extension__ __PRETTY_FUNCTION__)
)
;
1319
1320 /* Start with children, as parent has no segment */
1321 p = parent->children;
1322
1323 while (p != NULL((void*)0)) {
1324 slash = strchr(path + 1, '/');
1325 if (!slash) {
1326 slash = path + strlen(path);
1327 }
1328
1329 if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
1330 lwc_string_isequal(p->scheme, scheme, &match)((*(&match) = ((p->scheme) == (scheme))), lwc_error_ok
)
== lwc_error_ok &&
1331 match == true1 &&
1332 p->port == port) {
1333 if (*slash == '\0') {
1334 /* Complete match */
1335 return (struct path_data *) p;
1336 }
1337
1338 /* Match so far, go down tree */
1339 p = p->children;
1340
1341 path = slash;
1342 } else {
1343 /* No match, try next sibling */
1344 p = p->next;
1345 }
1346 }
1347
1348 return NULL((void*)0);
1349}
1350
1351
1352/**
1353 * Find an URL in the database
1354 *
1355 * \param url Absolute URL to find
1356 * \return Pointer to path data, or NULL if not found
1357 */
1358static struct path_data *urldb_find_url(nsurl *url)
1359{
1360 const struct host_part *h;
1361 struct path_data *p;
1362 struct search_node *tree;
1363 char *plq;
1364 const char *host_str;
1365 lwc_string *scheme, *host, *port;
1366 size_t len = 0;
1367 unsigned int port_int;
1368 bool_Bool match;
1369
1370 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 1370, __extension__ __PRETTY_FUNCTION__))
;
1371
1372 if (url_bloom != NULL((void*)0)) {
1373 if (bloom_search_hash(url_bloom, nsurl_hash(url)) == false0) {
1374 return NULL((void*)0);
1375 }
1376 }
1377
1378 scheme = nsurl_get_component(url, NSURL_SCHEME);
1379 if (scheme == NULL((void*)0))
1380 return NULL((void*)0);
1381
1382 if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match)((*(&match) = ((scheme) == (corestring_lwc_mailto))), lwc_error_ok
)
==
1383 lwc_error_ok && match == true1) {
1384 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1384, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1385 return NULL((void*)0);
1386 }
1387
1388 host = nsurl_get_component(url, NSURL_HOST);
1389 if (host != NULL((void*)0)) {
1390 host_str = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 1390, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
1391 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 1391
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
1392
1393 } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match)((*(&match) = ((scheme) == (corestring_lwc_file))), lwc_error_ok
)
==
1394 lwc_error_ok && match == true1) {
1395 host_str = "localhost";
1396
1397 } else {
1398 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1398, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1399 return NULL((void*)0);
1400 }
1401
1402 tree = urldb_get_search_tree(host_str);
1403 h = urldb_search_find(tree, host_str);
1404 if (!h) {
1405 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1405, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1406 return NULL((void*)0);
1407 }
1408
1409 /* generate plq (path, leaf, query) */
1410 if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {
1411 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1411, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1412 return NULL((void*)0);
1413 }
1414
1415 /* Get port */
1416 port = nsurl_get_component(url, NSURL_PORT);
1417 if (port != NULL((void*)0)) {
1418 port_int = atoi(lwc_string_data(port)({((port != ((void*)0)) ? (void) (0) : __assert_fail ("port != NULL"
, "content/urldb.c", 1418, __extension__ __PRETTY_FUNCTION__)
); (const char *)((port)+1);})
);
1419 lwc_string_unref(port){ lwc_string *__lwc_s = (port); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 1419
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
1420 } else {
1421 port_int = 0;
1422 }
1423
1424 p = urldb_match_path(&h->paths, plq, scheme, port_int);
1425
1426 free(plq);
1427 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1427, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
1428
1429 return p;
1430}
1431
1432
1433/**
1434 * Dump URL database paths to stderr
1435 *
1436 * \param parent Parent node of tree to dump
1437 */
1438static void urldb_dump_paths(struct path_data *parent)
1439{
1440 const struct path_data *p = parent;
1441 unsigned int i;
1442
1443 do {
1444 if (p->segment != NULL((void*)0)) {
1445 NSLOG(netsurf, INFO, "\t%s : %u",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1446
, }; nslog__log(&_nslog_ctx, "\t%s : %u", ({((p->scheme
!= ((void*)0)) ? (void) (0) : __assert_fail ("p->scheme != NULL"
, "content/urldb.c", 1446, __extension__ __PRETTY_FUNCTION__)
); (const char *)((p->scheme)+1);}), p->port); } } while
(0)
1446 lwc_string_data(p->scheme), p->port)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1446
, }; nslog__log(&_nslog_ctx, "\t%s : %u", ({((p->scheme
!= ((void*)0)) ? (void) (0) : __assert_fail ("p->scheme != NULL"
, "content/urldb.c", 1446, __extension__ __PRETTY_FUNCTION__)
); (const char *)((p->scheme)+1);}), p->port); } } while
(0)
;
1447
1448 NSLOG(netsurf, INFO, "\t\t'%s'", p->segment)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1448
, }; nslog__log(&_nslog_ctx, "\t\t'%s'", p->segment); }
} while(0)
;
1449
1450 for (i = 0; i != p->frag_cnt; i++) {
1451 NSLOG(netsurf, INFO, "\t\t\t#%s",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1452
, }; nslog__log(&_nslog_ctx, "\t\t\t#%s", p->fragment[
i]); } } while(0)
1452 p->fragment[i])do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1452
, }; nslog__log(&_nslog_ctx, "\t\t\t#%s", p->fragment[
i]); } } while(0)
;
1453 }
1454 }
1455
1456 if (p->children != NULL((void*)0)) {
1457 p = p->children;
1458 } else {
1459 while (p != parent) {
1460 if (p->next != NULL((void*)0)) {
1461 p = p->next;
1462 break;
1463 }
1464
1465 p = p->parent;
1466 }
1467 }
1468 } while (p != parent);
1469}
1470
1471
1472/**
1473 * Dump URL database hosts to stderr
1474 *
1475 * \param parent Parent node of tree to dump
1476 */
1477static void urldb_dump_hosts(struct host_part *parent)
1478{
1479 struct host_part *h;
1480
1481 if (parent->part) {
1482 NSLOG(netsurf, INFO, "%s", parent->part)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1482
, }; nslog__log(&_nslog_ctx, "%s", parent->part); } } while
(0)
;
1483
1484 NSLOG(netsurf, INFO, "\t%s invalid SSL certs",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1485
, }; nslog__log(&_nslog_ctx, "\t%s invalid SSL certs", parent
->permit_invalid_certs ? "Permits" : "Denies"); } } while(
0)
1485 parent->permit_invalid_certs ? "Permits" : "Denies")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1485
, }; nslog__log(&_nslog_ctx, "\t%s invalid SSL certs", parent
->permit_invalid_certs ? "Permits" : "Denies"); } } while(
0)
;
1486 }
1487
1488 /* Dump path data */
1489 urldb_dump_paths(&parent->paths);
1490
1491 /* and recurse */
1492 for (h = parent->children; h; h = h->next) {
1493 urldb_dump_hosts(h);
1494 }
1495}
1496
1497
1498/**
1499 * Dump search tree
1500 *
1501 * \param parent Parent node of tree to dump
1502 * \param depth Tree depth
1503 */
1504static void urldb_dump_search(struct search_node *parent, int depth)
1505{
1506 const struct host_part *h;
1507 int i; /* index into string */
1508 char s[1024];
1509 int r;
1510 int sl = sizeof(s) - 2;
1511
1512 if (parent == &empty)
1513 return;
1514
1515 urldb_dump_search(parent->left, depth + 1);
1516
1517 for (i = 0; i != depth; i++) {
1518 s[i] = ' ';
1519 }
1520
1521 for (h = parent->data; h; h = h->parent) {
1522 if (h->part) {
1523 r = snprintf(&s[i], sl - i, "%s", h->part);
1524 if (r < 0) {
1525 break;
1526 }
1527 if ((i + r) >= sl) {
1528 break;
1529 }
1530 i += r;
1531 }
1532
1533 if (h->parent && h->parent->parent) {
1534 s[i]='.';
1535 i++;
1536 }
1537 }
1538 s[i]= 0;
1539
1540 NSLOG(netsurf, INFO, "%s", s)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1540
, }; nslog__log(&_nslog_ctx, "%s", s); } } while(0)
;
1541
1542 urldb_dump_search(parent->right, depth + 1);
1543}
1544
1545
1546/**
1547 * Compare a pair of host parts
1548 *
1549 * \param a first host part
1550 * \param b second host part
1551 * \return 0 if match, non-zero, otherwise
1552 */
1553static int
1554urldb_search_match_host(const struct host_part *a, const struct host_part *b)
1555{
1556 int ret;
1557
1558 assert(a && b)((a && b) ? (void) (0) : __assert_fail ("a && b"
, "content/urldb.c", 1558, __extension__ __PRETTY_FUNCTION__)
)
;
1559
1560 /* traverse up tree to root, comparing parts as we go. */
1561 for (; a && a != &db_root && b && b != &db_root;
1562 a = a->parent, b = b->parent) {
1563 if ((ret = strcasecmp(a->part, b->part)) != 0) {
1564 /* They differ => return the difference here */
1565 return ret;
1566 }
1567 }
1568
1569 /* If we get here then either:
1570 * a) The path lengths differ
1571 * or b) The hosts are identical
1572 */
1573 if (a && a != &db_root && (!b || b == &db_root)) {
1574 /* len(a) > len(b) */
1575 return 1;
1576 } else if ((!a || a == &db_root) && b && b != &db_root) {
1577 /* len(a) < len(b) */
1578 return -1;
1579 }
1580
1581 /* identical */
1582 return 0;
1583}
1584
1585
1586/**
1587 * Rotate a subtree right
1588 *
1589 * \param root Root of subtree to rotate
1590 * \return new root of subtree
1591 */
1592static struct search_node *urldb_search_skew(struct search_node *root)
1593{
1594 assert(root)((root) ? (void) (0) : __assert_fail ("root", "content/urldb.c"
, 1594, __extension__ __PRETTY_FUNCTION__))
;
1595
1596 if (root->left->level == root->level) {
1597 struct search_node *temp;
1598
1599 temp = root->left;
1600 root->left = temp->right;
1601 temp->right = root;
1602 root = temp;
1603 }
1604
1605 return root;
1606}
1607
1608
1609/**
1610 * Rotate a node left, increasing the parent's level
1611 *
1612 * \param root Root of subtree to rotate
1613 * \return New root of subtree
1614 */
1615static struct search_node *urldb_search_split(struct search_node *root)
1616{
1617 assert(root)((root) ? (void) (0) : __assert_fail ("root", "content/urldb.c"
, 1617, __extension__ __PRETTY_FUNCTION__))
;
1618
1619 if (root->right->right->level == root->level) {
1620 struct search_node *temp;
1621
1622 temp = root->right;
1623 root->right = temp->left;
1624 temp->left = root;
1625 root = temp;
1626
1627 root->level++;
1628 }
1629
1630 return root;
1631}
1632
1633
1634/**
1635 * Insert node into search tree
1636 *
1637 * \param root Root of (sub)tree to insert into
1638 * \param n Node to insert
1639 * \return Pointer to updated root
1640 */
1641static struct search_node *
1642urldb_search_insert_internal(struct search_node *root, struct search_node *n)
1643{
1644 assert(root && n)((root && n) ? (void) (0) : __assert_fail ("root && n"
, "content/urldb.c", 1644, __extension__ __PRETTY_FUNCTION__)
)
;
1645
1646 if (root == &empty) {
1647 root = n;
1648 } else {
1649 int c = urldb_search_match_host(root->data, n->data);
1650
1651 if (c > 0) {
1652 root->left = urldb_search_insert_internal(
1653 root->left, n);
1654 } else if (c < 0) {
1655 root->right = urldb_search_insert_internal(
1656 root->right, n);
1657 } else {
1658 /* exact match */
1659 free(n);
1660 return root;
1661 }
1662
1663 root = urldb_search_skew(root);
1664 root = urldb_search_split(root);
1665 }
1666
1667 return root;
1668}
1669
1670
1671/**
1672 * Insert a node into the search tree
1673 *
1674 * \param root Root of tree to insert into
1675 * \param data User data to insert
1676 * \return Pointer to updated root, or NULL if failed
1677 */
1678static struct search_node *
1679urldb_search_insert(struct search_node *root, const struct host_part *data)
1680{
1681 struct search_node *n;
1682
1683 assert(root && data)((root && data) ? (void) (0) : __assert_fail ("root && data"
, "content/urldb.c", 1683, __extension__ __PRETTY_FUNCTION__)
)
;
1684
1685 n = malloc(sizeof(struct search_node));
1686 if (!n)
1687 return NULL((void*)0);
1688
1689 n->level = 1;
1690 n->data = data;
1691 n->left = n->right = &empty;
1692
1693 root = urldb_search_insert_internal(root, n);
1694
1695 return root;
1696}
1697
1698
1699/**
1700 * Parse a cookie avpair
1701 *
1702 * \param c Cookie struct to populate
1703 * \param n Name component
1704 * \param v Value component
1705 * \param was_quoted Whether \a v was quoted in the input
1706 * \return true on success, false on memory exhaustion
1707 */
1708static bool_Bool
1709urldb_parse_avpair(struct cookie_internal_data *c,
1710 char *n,
1711 char *v,
1712 bool_Bool was_quoted)
1713{
1714 int vlen;
1715
1716 assert(c && n && v)((c && n && v) ? (void) (0) : __assert_fail (
"c && n && v", "content/urldb.c", 1716, __extension__
__PRETTY_FUNCTION__))
;
1717
1718 /* Strip whitespace from start of name */
1719 for (; *n; n++) {
1720 if (*n != ' ' && *n != '\t')
1721 break;
1722 }
1723
1724 /* Strip whitespace from end of name */
1725 for (vlen = strlen(n); vlen; vlen--) {
1726 if (n[vlen] == ' ' || n[vlen] == '\t')
1727 n[vlen] = '\0';
1728 else
1729 break;
1730 }
1731
1732 /* Strip whitespace from start of value */
1733 for (; *v; v++) {
1734 if (*v != ' ' && *v != '\t')
1735 break;
1736 }
1737
1738 /* Strip whitespace from end of value */
1739 for (vlen = strlen(v); vlen; vlen--) {
1740 if (v[vlen] == ' ' || v[vlen] == '\t')
1741 v[vlen] = '\0';
1742 else
1743 break;
1744 }
1745
1746 if (!c->comment && strcasecmp(n, "Comment") == 0) {
1747 c->comment = strdup(v);
1748 if (!c->comment)
1749 return false0;
1750 } else if (!c->domain && strcasecmp(n, "Domain") == 0) {
1751 if (v[0] == '.') {
1752 /* Domain must start with a dot */
1753 c->domain_from_set = true1;
1754 c->domain = strdup(v);
1755 if (!c->domain)
1756 return false0;
1757 }
1758 } else if (strcasecmp(n, "Max-Age") == 0) {
1759 int temp = atoi(v);
1760 if (temp == 0)
1761 /* Special case - 0 means delete */
1762 c->expires = 0;
1763 else
1764 c->expires = time(NULL((void*)0)) + temp;
1765 } else if (!c->path && strcasecmp(n, "Path") == 0) {
1766 c->path_from_set = true1;
1767 c->path = strdup(v);
1768 if (!c->path)
1769 return false0;
1770 } else if (strcasecmp(n, "Version") == 0) {
1771 c->version = atoi(v);
1772 } else if (strcasecmp(n, "Expires") == 0) {
1773 char *datenoday;
1774 time_t expires;
1775 nserror res;
1776
1777 /* Strip dayname from date (these are hugely variable
1778 * and liable to break the parser. They also serve no
1779 * useful purpose) */
1780 for (datenoday = v;
1781 *datenoday && !ascii_is_digit(*datenoday);
1782 datenoday++) {
1783 /* do nothing */
1784 }
1785
1786 res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
1787 if (res != NSERROR_OK) {
1788 /* assume we have an unrepresentable date =>
1789 * force it to the maximum possible value of a
1790 * 32bit time_t (this may break in 2038. We'll
1791 * deal with that once we come to it) */
1792 expires = (time_t)0x7fffffff;
1793 }
1794 c->expires = expires;
1795 } else if (strcasecmp(n, "Secure") == 0) {
1796 c->secure = true1;
1797 } else if (strcasecmp(n, "HttpOnly") == 0) {
1798 c->http_only = true1;
1799 } else if (!c->name) {
1800 c->name = strdup(n);
1801 c->value = strdup(v);
1802 c->value_was_quoted = was_quoted;
1803 if (!c->name || !c->value) {
1804 return false0;
1805 }
1806 }
1807
1808 return true1;
1809}
1810
1811
1812/**
1813 * Free a cookie
1814 *
1815 * \param c The cookie to free
1816 */
1817static void urldb_free_cookie(struct cookie_internal_data *c)
1818{
1819 assert(c)((c) ? (void) (0) : __assert_fail ("c", "content/urldb.c", 1819
, __extension__ __PRETTY_FUNCTION__))
;
1820
1821 free(c->comment);
1822 free(c->domain);
1823 free(c->path);
1824 free(c->name);
1825 free(c->value);
1826 free(c);
1827}
1828
1829
1830/**
1831 * Parse a cookie
1832 *
1833 * \param url URL being fetched
1834 * \param cookie Pointer to cookie string (updated on exit)
1835 * \return Pointer to cookie structure (on heap, caller frees) or NULL
1836 */
1837static struct cookie_internal_data *
1838urldb_parse_cookie(nsurl *url, const char **cookie)
1839{
1840 struct cookie_internal_data *c;
1841 const char *cur;
1842 char name[1024], value[4096];
1843 char *n = name, *v = value;
1844 bool_Bool in_value = false0;
1845 bool_Bool had_value_data = false0;
1846 bool_Bool value_verbatim = false0;
1847 bool_Bool quoted = false0;
1848 bool_Bool was_quoted = false0;
1849
1850 assert(url && cookie && *cookie)((url && cookie && *cookie) ? (void) (0) : __assert_fail
("url && cookie && *cookie", "content/urldb.c"
, 1850, __extension__ __PRETTY_FUNCTION__))
;
1851
1852 c = calloc(1, sizeof(struct cookie_internal_data));
1853 if (c == NULL((void*)0))
1854 return NULL((void*)0);
1855
1856 c->expires = -1;
1857
1858 name[0] = '\0';
1859 value[0] = '\0';
1860
1861 for (cur = *cookie; *cur; cur++) {
1862 if (*cur == '\r' && *(cur + 1) == '\n') {
1863 /* End of header */
1864 if (quoted) {
1865 /* Unmatched quote encountered */
1866
1867 /* Match Firefox 2.0.0.11 */
1868 value[0] = '\0';
1869
1870 }
1871
1872 break;
1873 } else if (*cur == '\r') {
1874 /* Spurious linefeed */
1875 continue;
1876 } else if (*cur == '\n') {
1877 /* Spurious newline */
1878 continue;
1879 }
1880
1881 if (in_value && !had_value_data) {
1882 if (*cur == ' ' || *cur == '\t') {
1883 /* Strip leading whitespace from value */
1884 continue;
1885 } else {
1886 had_value_data = true1;
1887
1888 /* Value is taken verbatim if first non-space
1889 * character is not a " */
1890 if (*cur != '"') {
1891 value_verbatim = true1;
1892 }
1893 }
1894 }
1895
1896 if (in_value && !value_verbatim && (*cur == '"')) {
1897 /* Only non-verbatim values may be quoted */
1898 if (cur == *cookie || *(cur - 1) != '\\') {
1899 /* Only unescaped quotes count */
1900 was_quoted = quoted;
1901 quoted = !quoted;
1902
1903 continue;
1904 }
1905 }
1906
1907 if (!quoted && !in_value && *cur == '=') {
1908 /* First equals => attr-value separator */
1909 in_value = true1;
1910 continue;
1911 }
1912
1913 if (!quoted && (was_quoted || *cur == ';')) {
1914 /* Semicolon or after quoted value
1915 * => end of current avpair */
1916
1917 /* NUL-terminate tokens */
1918 *n = '\0';
1919 *v = '\0';
1920
1921 if (!urldb_parse_avpair(c, name, value, was_quoted)) {
1922 /* Memory exhausted */
1923 urldb_free_cookie(c);
1924 return NULL((void*)0);
1925 }
1926
1927 /* And reset to start */
1928 n = name;
1929 v = value;
1930 in_value = false0;
1931 had_value_data = false0;
1932 value_verbatim = false0;
1933 was_quoted = false0;
1934
1935 /* Now, if the current input is anything other than a
1936 * semicolon, we must be sure to reprocess it */
1937 if (*cur != ';') {
1938 cur--;
1939 }
1940
1941 continue;
1942 }
1943
1944 /* And now handle commas. These are a pain as they may mean
1945 * any of the following:
1946 *
1947 * + End of cookie
1948 * + Day separator in Expires avpair
1949 * + (Invalid) comma in unquoted value
1950 *
1951 * Therefore, in order to handle all 3 cases (2 and 3 are
1952 * identical, the difference being that 2 is in the spec and
1953 * 3 isn't), we need to determine where the comma actually
1954 * lies. We use the following heuristic:
1955 *
1956 * Given a comma at the current input position, find the
1957 * immediately following semicolon (or end of input if none
1958 * found). Then, consider the input characters between
1959 * these two positions. If any of these characters is an
1960 * '=', we must assume that the comma signified the end of
1961 * the current cookie.
1962 *
1963 * This holds as the first avpair of any cookie must be
1964 * NAME=VALUE, so the '=' is guaranteed to appear in the
1965 * case where the comma marks the end of a cookie.
1966 *
1967 * This will fail, however, in the case where '=' appears in
1968 * the value of the current avpair after the comma or the
1969 * subsequent cookie does not start with NAME=VALUE. Neither
1970 * of these is particularly likely and if they do occur, the
1971 * website is more broken than we can be bothered to handle.
1972 */
1973 if (!quoted && *cur == ',') {
1974 /* Find semi-colon, if any */
1975 const char *p;
1976 const char *semi = strchr(cur + 1, ';');
1977 if (!semi)
1978 semi = cur + strlen(cur) - 2 /* CRLF */;
1979
1980 /* Look for equals sign between comma and semi */
1981 for (p = cur + 1; p < semi; p++)
1982 if (*p == '=')
1983 break;
1984
1985 if (p == semi) {
1986 /* none found => comma internal to value */
1987 /* do nothing */
1988 } else {
1989 /* found one => comma marks end of cookie */
1990 cur++;
1991 break;
1992 }
1993 }
1994
1995 /* Accumulate into buffers, always leaving space for a NUL */
1996 /** \todo is silently truncating overlong names/values wise? */
1997 if (!in_value) {
1998 if (n < name + (sizeof(name) - 1))
1999 *n++ = *cur;
2000 } else {
2001 if (v < value + (sizeof(value) - 1))
2002 *v++ = *cur;
2003 }
2004 }
2005
2006 /* Parse final avpair */
2007 *n = '\0';
2008 *v = '\0';
2009
2010 if (!urldb_parse_avpair(c, name, value, was_quoted)) {
2011 /* Memory exhausted */
2012 urldb_free_cookie(c);
2013 return NULL((void*)0);
2014 }
2015
2016 /* Now fix-up default values */
2017 if (c->domain == NULL((void*)0)) {
2018 lwc_string *host = nsurl_get_component(url, NSURL_HOST);
2019 if (host == NULL((void*)0)) {
2020 urldb_free_cookie(c);
2021 return NULL((void*)0);
2022 }
2023 c->domain = strdup(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 2023, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
);
2024 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 2024
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
2025 }
2026
2027 if (c->path == NULL((void*)0)) {
2028 const char *path_data;
2029 char *path, *slash;
2030 lwc_string *path_lwc;
2031
2032 path_lwc = nsurl_get_component(url, NSURL_PATH);
2033 if (path_lwc == NULL((void*)0)) {
2034 urldb_free_cookie(c);
2035 return NULL((void*)0);
2036 }
2037 path_data = lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 2037, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
;
2038
2039 /* Strip leafname and trailing slash (4.3.1) */
2040 slash = strrchr(path_data, '/');
2041 if (slash != NULL((void*)0)) {
2042 /* Special case: retain first slash in path */
2043 if (slash == path_data)
2044 slash++;
2045
2046 slash = strndup(path_data, slash - path_data);
2047 if (slash == NULL((void*)0)) {
2048 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); ((__lwc_s != ((void*)0)) ?
(void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 2048, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
2049 urldb_free_cookie(c);
2050 return NULL((void*)0);
2051 }
2052
2053 path = slash;
2054 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); ((__lwc_s != ((void*)0)) ?
(void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 2054, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
2055 } else {
2056 path = strdup(lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 2056, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
);
2057 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); ((__lwc_s != ((void*)0)) ?
(void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 2057, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
2058 if (path == NULL((void*)0)) {
2059 urldb_free_cookie(c);
2060 return NULL((void*)0);
2061 }
2062 }
2063
2064 c->path = path;
2065 }
2066
2067 /* Write back current position */
2068 *cookie = cur;
2069
2070 return c;
2071}
2072
2073
2074/**
2075 * Add a path to the database, creating any intermediate entries
2076 *
2077 * \param scheme URL scheme associated with path
2078 * \param port Port number on host associated with path
2079 * \param host Host tree node to attach to
2080 * \param path_query Absolute path plus query to add (freed)
2081 * \param fragment URL fragment, or NULL
2082 * \param url URL (fragment ignored)
2083 * \return Pointer to leaf node, or NULL on memory exhaustion
2084 */
2085static struct path_data *
2086urldb_add_path(lwc_string *scheme,
2087 unsigned int port,
2088 const struct host_part *host,
2089 char *path_query,
2090 lwc_string *fragment,
2091 nsurl *url)
2092{
2093 struct path_data *d, *e;
2094 char *buf = path_query;
2095 char *segment, *slash;
2096 bool_Bool match;
2097
2098 assert(scheme && host && url)((scheme && host && url) ? (void) (0) : __assert_fail
("scheme && host && url", "content/urldb.c",
2098, __extension__ __PRETTY_FUNCTION__))
;
2099
2100 d = (struct path_data *) &host->paths;
2101
2102 /* skip leading '/' */
2103 segment = buf;
2104 if (*segment == '/')
2105 segment++;
2106
2107 /* Process path segments */
2108 do {
2109 slash = strchr(segment, '/');
2110 if (!slash) {
2111 /* last segment */
2112 /* look for existing entry */
2113 for (e = d->children; e; e = e->next)
2114 if (strcmp(segment, e->segment) == 0 &&
2115 lwc_string_isequal(scheme,((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
2116 e->scheme, &match)((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
==
2117 lwc_error_ok &&
2118 match == true1 &&
2119 e->port == port)
2120 break;
2121
2122 d = e ? urldb_add_path_fragment(e, fragment) :
2123 urldb_add_path_node(scheme, port,
2124 segment, fragment, d);
2125 break;
2126 }
2127
2128 *slash = '\0';
2129
2130 /* look for existing entry */
2131 for (e = d->children; e; e = e->next)
2132 if (strcmp(segment, e->segment) == 0 &&
2133 lwc_string_isequal(scheme, e->scheme,((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
2134 &match)((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
== lwc_error_ok &&
2135 match == true1 &&
2136 e->port == port)
2137 break;
2138
2139 d = e ? e : urldb_add_path_node(scheme, port, segment, NULL((void*)0), d);
2140 if (!d)
2141 break;
2142
2143 segment = slash + 1;
2144 } while (1);
2145
2146 free(path_query);
2147
2148 if (d && !d->url) {
2149 /* Insert defragmented URL */
2150 if (nsurl_defragment(url, &d->url) != NSERROR_OK)
2151 return NULL((void*)0);
2152 }
2153
2154 return d;
2155}
2156
2157
2158/**
2159 * Add a host to the database, creating any intermediate entries
2160 *
2161 * \param host Hostname to add
2162 * \return Pointer to leaf node, or NULL on memory exhaustion
2163 */
2164static struct host_part *urldb_add_host(const char *host)
2165{
2166 struct host_part *d = (struct host_part *) &db_root, *e;
2167 struct search_node *s;
2168 char buf[256]; /* 256 bytes is sufficient - domain names are
2169 * limited to 255 chars. */
2170 char *part;
2171
2172 assert(host)((host) ? (void) (0) : __assert_fail ("host", "content/urldb.c"
, 2172, __extension__ __PRETTY_FUNCTION__))
;
2173
2174 if (urldb__host_is_ip_address(host)) {
2175 /* Host is an IP, so simply add as TLD */
2176
2177 /* Check for existing entry */
2178 for (e = d->children; e; e = e->next)
2179 if (strcasecmp(host, e->part) == 0)
2180 /* found => return it */
2181 return e;
2182
2183 d = urldb_add_host_node(host, d);
2184
2185 s = urldb_search_insert(search_trees[ST_IP0], d);
2186 if (!s) {
2187 /* failed */
2188 d = NULL((void*)0);
2189 } else {
2190 search_trees[ST_IP0] = s;
2191 }
2192
2193 return d;
2194 }
2195
2196 /* Copy host string, so we can corrupt it */
2197 strncpy(buf, host, sizeof buf);
2198 buf[sizeof buf - 1] = '\0';
2199
2200 /* Process FQDN segments backwards */
2201 do {
2202 part = strrchr(buf, '.');
2203 if (!part) {
2204 /* last segment */
2205 /* Check for existing entry */
2206 for (e = d->children; e; e = e->next)
2207 if (strcasecmp(buf, e->part) == 0)
2208 break;
2209
2210 if (e) {
2211 d = e;
2212 } else {
2213 d = urldb_add_host_node(buf, d);
2214 }
2215
2216 /* And insert into search tree */
2217 if (d) {
2218 struct search_node **r;
2219
2220 r = urldb_get_search_tree_direct(buf);
2221 s = urldb_search_insert(*r, d);
2222 if (!s) {
2223 /* failed */
2224 d = NULL((void*)0);
2225 } else {
2226 *r = s;
2227 }
2228 }
2229 break;
2230 }
2231
2232 /* Check for existing entry */
2233 for (e = d->children; e; e = e->next)
2234 if (strcasecmp(part + 1, e->part) == 0)
2235 break;
2236
2237 d = e ? e : urldb_add_host_node(part + 1, d);
2238 if (!d)
2239 break;
2240
2241 *part = '\0';
2242 } while (1);
2243
2244 return d;
2245}
2246
2247
2248/**
2249 * Insert a cookie into the database
2250 *
2251 * \param c The cookie to insert
2252 * \param scheme URL scheme associated with cookie path
2253 * \param url URL (sans fragment) associated with cookie
2254 * \return true on success, false on memory exhaustion (c will be freed)
2255 */
2256static bool_Bool
2257urldb_insert_cookie(struct cookie_internal_data *c,
2258 lwc_string *scheme,
2259 nsurl *url)
2260{
2261 struct cookie_internal_data *d;
2262 const struct host_part *h;
2263 struct path_data *p;
2264 time_t now = time(NULL((void*)0));
2265
2266 assert(c)((c) ? (void) (0) : __assert_fail ("c", "content/urldb.c", 2266
, __extension__ __PRETTY_FUNCTION__))
;
2267
2268 if (c->domain[0] == '.') {
2269 h = urldb_search_find(
2270 urldb_get_search_tree(&(c->domain[1])),
2271 c->domain + 1);
2272 if (!h) {
2273 h = urldb_add_host(c->domain + 1);
2274 if (!h) {
2275 urldb_free_cookie(c);
2276 return false0;
2277 }
2278 }
2279
2280 p = (struct path_data *) &h->paths;
2281 } else {
2282 /* Need to have a URL and scheme, if it's not a domain cookie */
2283 assert(url != NULL)((url != ((void*)0)) ? (void) (0) : __assert_fail ("url != NULL"
, "content/urldb.c", 2283, __extension__ __PRETTY_FUNCTION__)
)
;
2284 assert(scheme != NULL)((scheme != ((void*)0)) ? (void) (0) : __assert_fail ("scheme != NULL"
, "content/urldb.c", 2284, __extension__ __PRETTY_FUNCTION__)
)
;
2285
2286 h = urldb_search_find(
2287 urldb_get_search_tree(c->domain),
2288 c->domain);
2289
2290 if (!h) {
2291 h = urldb_add_host(c->domain);
2292 if (!h) {
2293 urldb_free_cookie(c);
2294 return false0;
2295 }
2296 }
2297
2298 /* find path */
2299 p = urldb_add_path(scheme, 0, h,
2300 strdup(c->path), NULL((void*)0), url);
2301 if (!p) {
2302 urldb_free_cookie(c);
2303 return false0;
2304 }
2305 }
2306
2307 /* add cookie */
2308 for (d = p->cookies; d; d = d->next) {
2309 if (!strcmp(d->domain, c->domain) &&
2310 !strcmp(d->path, c->path) &&
2311 !strcmp(d->name, c->name))
2312 break;
2313 }
2314
2315 if (d) {
2316 if (c->expires != -1 && c->expires < now) {
2317 /* remove cookie */
2318 if (d->next)
2319 d->next->prev = d->prev;
2320 else
2321 p->cookies_end = d->prev;
2322 if (d->prev)
2323 d->prev->next = d->next;
2324 else
2325 p->cookies = d->next;
2326
2327 cookie_manager_remove((struct cookie_data *)d);
2328
2329 urldb_free_cookie(d);
2330 urldb_free_cookie(c);
2331 } else {
2332 /* replace d with c */
2333 c->prev = d->prev;
2334 c->next = d->next;
2335 if (c->next)
2336 c->next->prev = c;
2337 else
2338 p->cookies_end = c;
2339 if (c->prev)
2340 c->prev->next = c;
2341 else
2342 p->cookies = c;
2343
2344 cookie_manager_remove((struct cookie_data *)d);
2345 urldb_free_cookie(d);
2346
2347 cookie_manager_add((struct cookie_data *)c);
2348 }
2349 } else {
2350 c->prev = p->cookies_end;
2351 c->next = NULL((void*)0);
2352 if (p->cookies_end)
2353 p->cookies_end->next = c;
2354 else
2355 p->cookies = c;
2356 p->cookies_end = c;
2357
2358 cookie_manager_add((struct cookie_data *)c);
2359 }
2360
2361 return true1;
2362}
2363
2364
2365/**
2366 * Concatenate a cookie into the provided buffer
2367 *
2368 * \param c Cookie to concatenate
2369 * \param version The version of the cookie string to output
2370 * \param used Pointer to amount of buffer used (updated)
2371 * \param alloc Pointer to allocated size of buffer (updated)
2372 * \param buf Pointer to Pointer to buffer (updated)
2373 * \return true on success, false on memory exhaustion
2374 */
2375static bool_Bool
2376urldb_concat_cookie(struct cookie_internal_data *c,
2377 int version,
2378 int *used,
2379 int *alloc,
2380 char **buf)
2381{
2382 /* Combined (A)BNF for the Cookie: request header:
2383 *
2384 * CHAR = <any US-ASCII character (octets 0 - 127)>
2385 * CTL = <any US-ASCII control character
2386 * (octets 0 - 31) and DEL (127)>
2387 * CR = <US-ASCII CR, carriage return (13)>
2388 * LF = <US-ASCII LF, linefeed (10)>
2389 * SP = <US-ASCII SP, space (32)>
2390 * HT = <US-ASCII HT, horizontal-tab (9)>
2391 * <"> = <US-ASCII double-quote mark (34)>
2392 *
2393 * CRLF = CR LF
2394 *
2395 * LWS = [CRLF] 1*( SP | HT )
2396 *
2397 * TEXT = <any OCTET except CTLs,
2398 * but including LWS>
2399 *
2400 * token = 1*<any CHAR except CTLs or separators>
2401 * separators = "(" | ")" | "<" | ">" | "@"
2402 * | "," | ";" | ":" | "\" | <">
2403 * | "/" | "[" | "]" | "?" | "="
2404 * | "{" | "}" | SP | HT
2405 *
2406 * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
2407 * qdtext = <any TEXT except <">>
2408 * quoted-pair = "\" CHAR
2409 *
2410 * attr = token
2411 * value = word
2412 * word = token | quoted-string
2413 *
2414 * cookie = "Cookie:" cookie-version
2415 * 1*((";" | ",") cookie-value)
2416 * cookie-value = NAME "=" VALUE [";" path] [";" domain]
2417 * cookie-version = "$Version" "=" value
2418 * NAME = attr
2419 * VALUE = value
2420 * path = "$Path" "=" value
2421 * domain = "$Domain" "=" value
2422 *
2423 * A note on quoted-string handling:
2424 * The cookie data stored in the db is verbatim (i.e. sans enclosing
2425 * <">, if any, and with all quoted-pairs intact) thus all that we
2426 * need to do here is ensure that value strings which were quoted
2427 * in Set-Cookie or which include any of the separators are quoted
2428 * before use.
2429 *
2430 * A note on cookie-value separation:
2431 * We use semicolons for all separators, including between
2432 * cookie-values. This simplifies things and is backwards compatible.
2433 */
2434 const char * const separators = "()<>@,;:\\\"/[]?={} \t";
2435
2436 int max_len;
2437
2438 assert(c && used && alloc && buf && *buf)((c && used && alloc && buf &&
*buf) ? (void) (0) : __assert_fail ("c && used && alloc && buf && *buf"
, "content/urldb.c", 2438, __extension__ __PRETTY_FUNCTION__)
)
;
2439
2440 /* "; " cookie-value
2441 * We allow for the possibility that values are quoted
2442 */
2443 max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
2444 (c->path_from_set ?
2445 8 + strlen(c->path) + 2 : 0) +
2446 (c->domain_from_set ?
2447 10 + strlen(c->domain) + 2 : 0);
2448
2449 if (*used + max_len >= *alloc) {
2450 char *temp = realloc(*buf, *alloc + 4096);
2451 if (!temp) {
2452 return false0;
2453 }
2454 *buf = temp;
2455 *alloc += 4096;
2456 }
2457
2458 if (version == COOKIE_NETSCAPE) {
2459 /* Original Netscape cookie */
2460 sprintf(*buf + *used - 1, "; %s=", c->name);
2461 *used += 2 + strlen(c->name) + 1;
2462
2463 /* The Netscape spec doesn't mention quoting of cookie values.
2464 * RFC 2109 $10.1.3 indicates that values must not be quoted.
2465 *
2466 * However, other browsers preserve quoting, so we should, too
2467 */
2468 if (c->value_was_quoted) {
2469 sprintf(*buf + *used - 1, "\"%s\"", c->value);
2470 *used += 1 + strlen(c->value) + 1;
2471 } else {
2472 /** \todo should we %XX-encode [;HT,SP] ? */
2473 /** \todo Should we strip escaping backslashes? */
2474 sprintf(*buf + *used - 1, "%s", c->value);
2475 *used += strlen(c->value);
2476 }
2477
2478 /* We don't send path/domain information -- that's what the
2479 * Netscape spec suggests we should do, anyway. */
2480 } else {
2481 /* RFC2109 or RFC2965 cookie */
2482 sprintf(*buf + *used - 1, "; %s=", c->name);
2483 *used += 2 + strlen(c->name) + 1;
2484
2485 /* Value needs quoting if it contains any separator or if
2486 * it needs preserving from the Set-Cookie header */
2487 if (c->value_was_quoted ||
2488 strpbrk(c->value, separators) != NULL((void*)0)) {
2489 sprintf(*buf + *used - 1, "\"%s\"", c->value);
2490 *used += 1 + strlen(c->value) + 1;
2491 } else {
2492 sprintf(*buf + *used - 1, "%s", c->value);
2493 *used += strlen(c->value);
2494 }
2495
2496 if (c->path_from_set) {
2497 /* Path, quoted if necessary */
2498 sprintf(*buf + *used - 1, "; $Path=");
2499 *used += 8;
2500
2501 if (strpbrk(c->path, separators) != NULL((void*)0)) {
2502 sprintf(*buf + *used - 1, "\"%s\"", c->path);
2503 *used += 1 + strlen(c->path) + 1;
2504 } else {
2505 sprintf(*buf + *used - 1, "%s", c->path);
2506 *used += strlen(c->path);
2507 }
2508 }
2509
2510 if (c->domain_from_set) {
2511 /* Domain, quoted if necessary */
2512 sprintf(*buf + *used - 1, "; $Domain=");
2513 *used += 10;
2514
2515 if (strpbrk(c->domain, separators) != NULL((void*)0)) {
2516 sprintf(*buf + *used - 1, "\"%s\"", c->domain);
2517 *used += 1 + strlen(c->domain) + 1;
2518 } else {
2519 sprintf(*buf + *used - 1, "%s", c->domain);
2520 *used += strlen(c->domain);
2521 }
2522 }
2523 }
2524
2525 return true1;
2526}
2527
2528
2529/**
2530 * deletes paths from a cookie.
2531 *
2532 * \param domain the cookie domain
2533 * \param path the cookie path
2534 * \param name The cookie name
2535 * \param parent The url data of the cookie
2536 */
2537static void
2538urldb_delete_cookie_paths(const char *domain,
2539 const char *path,
2540 const char *name,
2541 struct path_data *parent)
2542{
2543 struct cookie_internal_data *c;
2544 struct path_data *p = parent;
2545
2546 assert(parent)((parent) ? (void) (0) : __assert_fail ("parent", "content/urldb.c"
, 2546, __extension__ __PRETTY_FUNCTION__))
;
2547
2548 do {
2549 for (c = p->cookies; c; c = c->next) {
2550 if (strcmp(c->domain, domain) == 0 &&
2551 strcmp(c->path, path) == 0 &&
2552 strcmp(c->name, name) == 0) {
2553 if (c->prev) {
2554 c->prev->next = c->next;
2555 } else {
2556 p->cookies = c->next;
2557 }
2558
2559 if (c->next) {
2560 c->next->prev = c->prev;
2561 } else {
2562 p->cookies_end = c->prev;
2563 }
2564
2565 urldb_free_cookie(c);
2566
2567 return;
2568 }
2569 }
2570
2571 if (p->children) {
2572 p = p->children;
2573 } else {
2574 while (p != parent) {
2575 if (p->next != NULL((void*)0)) {
2576 p = p->next;
2577 break;
2578 }
2579
2580 p = p->parent;
2581 }
2582 }
2583 } while (p != parent);
2584}
2585
2586
2587/**
2588 * Deletes cookie hosts and their assoicated paths
2589 *
2590 * \param domain the cookie domain
2591 * \param path the cookie path
2592 * \param name The cookie name
2593 * \param parent The url data of the cookie
2594 */
2595static void
2596urldb_delete_cookie_hosts(const char *domain,
2597 const char *path,
2598 const char *name,
2599 struct host_part *parent)
2600{
2601 struct host_part *h;
2602 assert(parent)((parent) ? (void) (0) : __assert_fail ("parent", "content/urldb.c"
, 2602, __extension__ __PRETTY_FUNCTION__))
;
2603
2604 urldb_delete_cookie_paths(domain, path, name, &parent->paths);
2605
2606 for (h = parent->children; h; h = h->next) {
2607 urldb_delete_cookie_hosts(domain, path, name, h);
2608 }
2609}
2610
2611
2612/**
2613 * Save a path subtree's cookies
2614 *
2615 * \param fp File pointer to write to
2616 * \param parent Parent path
2617 */
2618static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
2619{
2620 struct path_data *p = parent;
2621 time_t now = time(NULL((void*)0));
2622
2623 assert(fp && parent)((fp && parent) ? (void) (0) : __assert_fail ("fp && parent"
, "content/urldb.c", 2623, __extension__ __PRETTY_FUNCTION__)
)
;
2624
2625 do {
2626 if (p->cookies != NULL((void*)0)) {
2627 struct cookie_internal_data *c;
2628
2629 for (c = p->cookies; c != NULL((void*)0); c = c->next) {
2630 if (c->expires == -1 || c->expires < now) {
2631 /* Skip expired & session cookies */
2632 continue;
2633 }
2634
2635 fprintf(fp,
2636 "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
2637 "%s\t%s\t%d\t%s\t%s\t%s\n",
2638 c->version, c->domain,
2639 c->domain_from_set, c->path,
2640 c->path_from_set, c->secure,
2641 c->http_only,
2642 (int)c->expires, (int)c->last_used,
2643 c->no_destroy, c->name, c->value,
2644 c->value_was_quoted,
2645 p->scheme ? lwc_string_data(p->scheme)({((p->scheme != ((void*)0)) ? (void) (0) : __assert_fail (
"p->scheme != NULL", "content/urldb.c", 2645, __extension__
__PRETTY_FUNCTION__)); (const char *)((p->scheme)+1);})
:
2646 "unused",
2647 p->url ? nsurl_access(p->url) :
2648 "unused",
2649 c->comment ? c->comment : "");
2650 }
2651 }
2652
2653 if (p->children != NULL((void*)0)) {
2654 p = p->children;
2655 } else {
2656 while (p != parent) {
2657 if (p->next != NULL((void*)0)) {
2658 p = p->next;
2659 break;
2660 }
2661
2662 p = p->parent;
2663 }
2664 }
2665 } while (p != parent);
2666}
2667
2668
2669/**
2670 * Save a host subtree's cookies
2671 *
2672 * \param fp File pointer to write to
2673 * \param parent Parent host
2674 */
2675static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
2676{
2677 struct host_part *h;
2678 assert(fp && parent)((fp && parent) ? (void) (0) : __assert_fail ("fp && parent"
, "content/urldb.c", 2678, __extension__ __PRETTY_FUNCTION__)
)
;
2679
2680 urldb_save_cookie_paths(fp, &parent->paths);
2681
2682 for (h = parent->children; h; h = h->next)
2683 urldb_save_cookie_hosts(fp, h);
2684}
2685
2686
2687/**
2688 * Destroy a cookie node
2689 *
2690 * \param c Cookie to destroy
2691 */
2692static void urldb_destroy_cookie(struct cookie_internal_data *c)
2693{
2694 free(c->name);
2695 free(c->value);
2696 free(c->comment);
2697 free(c->domain);
2698 free(c->path);
2699
2700 free(c);
2701}
2702
2703
2704/**
2705 * Destroy the contents of a path node
2706 *
2707 * \param node Node to destroy contents of (does not destroy node)
2708 */
2709static void urldb_destroy_path_node_content(struct path_data *node)
2710{
2711 struct cookie_internal_data *a, *b;
2712 unsigned int i;
2713
2714 if (node->url != NULL((void*)0)) {
2715 nsurl_unref(node->url);
2716 }
2717
2718 if (node->scheme != NULL((void*)0)) {
2719 lwc_string_unref(node->scheme){ lwc_string *__lwc_s = (node->scheme); ((__lwc_s != ((void
*)0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 2719, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
2720 }
2721
2722 free(node->segment);
2723 for (i = 0; i < node->frag_cnt; i++)
2724 free(node->fragment[i]);
2725 free(node->fragment);
2726
2727 free(node->urld.title);
2728
2729 for (a = node->cookies; a; a = b) {
2730 b = a->next;
2731 urldb_destroy_cookie(a);
2732 }
2733}
2734
2735
2736/**
2737 * Destroy protection space data
2738 *
2739 * \param space Protection space to destroy
2740 */
2741static void urldb_destroy_prot_space(struct prot_space_data *space)
2742{
2743 lwc_string_unref(space->scheme){ lwc_string *__lwc_s = (space->scheme); ((__lwc_s != ((void
*)0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 2743, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
2744 free(space->realm);
2745 free(space->auth);
2746
2747 free(space);
2748}
2749
2750
2751/**
2752 * Destroy a path tree
2753 *
2754 * \param root Root node of tree to destroy
2755 */
2756static void urldb_destroy_path_tree(struct path_data *root)
2757{
2758 struct path_data *p = root;
2759
2760 do {
2761 if (p->children != NULL((void*)0)) {
2762 p = p->children;
2763 } else {
2764 struct path_data *q = p;
2765
2766 while (p != root) {
2767 if (p->next != NULL((void*)0)) {
2768 p = p->next;
2769 break;
2770 }
2771
2772 p = p->parent;
2773
2774 urldb_destroy_path_node_content(q);
2775 free(q);
2776
2777 q = p;
2778 }
2779
2780 urldb_destroy_path_node_content(q);
2781 free(q);
2782 }
2783 } while (p != root);
2784}
2785
2786
2787/**
2788 * Destroy a host tree
2789 *
2790 * \param root Root node of tree to destroy
2791 */
2792static void urldb_destroy_host_tree(struct host_part *root)
2793{
2794 struct host_part *a, *b;
2795 struct path_data *p, *q;
2796 struct prot_space_data *s, *t;
2797
2798 /* Destroy children */
2799 for (a = root->children; a; a = b) {
2800 b = a->next;
2801 urldb_destroy_host_tree(a);
2802 }
2803
2804 /* Now clean up paths */
2805 for (p = root->paths.children; p; p = q) {
2806 q = p->next;
2807 urldb_destroy_path_tree(p);
2808 }
2809
2810 /* Root path */
2811 urldb_destroy_path_node_content(&root->paths);
2812
2813 /* Proctection space data */
2814 for (s = root->prot_space; s; s = t) {
2815 t = s->next;
2816 urldb_destroy_prot_space(s);
2817 }
2818
2819 /* And ourselves */
2820 free(root->part);
2821 free(root);
2822}
2823
2824
2825/**
2826 * Destroy a search tree
2827 *
2828 * \param root Root node of tree to destroy
2829 */
2830static void urldb_destroy_search_tree(struct search_node *root)
2831{
2832 /* Destroy children */
2833 if (root->left != &empty)
2834 urldb_destroy_search_tree(root->left);
2835 if (root->right != &empty)
2836 urldb_destroy_search_tree(root->right);
2837
2838 /* And destroy ourselves */
2839 free(root);
2840}
2841
2842
2843/*************** External interface ***************/
2844
2845
2846/* exported interface documented in content/urldb.h */
2847void urldb_destroy(void)
2848{
2849 struct host_part *a, *b;
2850 int i;
2851
2852 /* Clean up search trees */
2853 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
2854 if (search_trees[i] != &empty) {
2855 urldb_destroy_search_tree(search_trees[i]);
2856 search_trees[i] = &empty;
2857 }
2858 }
2859
2860 /* And database */
2861 for (a = db_root.children; a; a = b) {
2862 b = a->next;
2863 urldb_destroy_host_tree(a);
2864 }
2865 memset(&db_root, 0, sizeof(db_root));
2866
2867 /* And the bloom filter */
2868 if (url_bloom != NULL((void*)0)) {
2869 bloom_destroy(url_bloom);
2870 url_bloom = NULL((void*)0);
2871 }
2872}
2873
2874
2875/* exported interface documented in netsurf/url_db.h */
2876nserror urldb_load(const char *filename)
2877{
2878#define MAXIMUM_URL_LENGTH 4096
2879 char s[MAXIMUM_URL_LENGTH];
2880 char host[256];
2881 struct host_part *h;
2882 int urls;
2883 int i;
2884 int version;
2885 int length;
2886 FILE *fp;
2887
2888 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 2888, __extension__ __PRETTY_FUNCTION__))
;
2889
2890 NSLOG(netsurf, INFO, "Loading URL file %s", filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2890
, }; nslog__log(&_nslog_ctx, "Loading URL file %s", filename
); } } while(0)
;
2891
2892 if (url_bloom == NULL((void*)0))
2893 url_bloom = bloom_create(BLOOM_SIZE(1024 * 32));
2894
2895 fp = fopen(filename, "r");
2896 if (!fp) {
2897 NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2898
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for reading"
, filename); } } while(0)
2898 filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2898
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for reading"
, filename); } } while(0)
;
2899 return NSERROR_NOT_FOUND;
2900 }
2901
2902 if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
2903 fclose(fp);
2904 return NSERROR_NEED_DATA;
2905 }
2906
2907 version = atoi(s);
2908 if (version < MIN_URL_FILE_VERSION106) {
2909 NSLOG(netsurf, INFO, "Unsupported URL file version.")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2909
, }; nslog__log(&_nslog_ctx, "Unsupported URL file version."
); } } while(0)
;
2910 fclose(fp);
2911 return NSERROR_INVALID;
2912 }
2913 if (version > URL_FILE_VERSION107) {
2914 NSLOG(netsurf, INFO, "Unknown URL file version.")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2914
, }; nslog__log(&_nslog_ctx, "Unknown URL file version.")
; } } while(0)
;
2915 fclose(fp);
2916 return NSERROR_INVALID;
2917 }
2918
2919 while (fgets(host, sizeof host, fp)) {
2920 time_t hsts_expiry = 0;
2921 int hsts_include_sub_domains = 0;
2922
2923 /* get the hostname */
2924 length = strlen(host) - 1;
2925 host[length] = '\0';
2926
2927 /* skip data that has ended up with a host of '' */
2928 if (length == 0) {
2929 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2930 break;
2931 urls = atoi(s);
2932 /* Eight fields/url */
2933 for (i = 0; i < (8 * urls); i++) {
2934 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2935 break;
2936 }
2937 continue;
2938 }
2939
2940 if (version >= 107) {
2941 char *p = host;
2942 while (*p && *p != ' ') p++;
2943 while (*p && *p == ' ') { *p = '\0'; p++; }
2944 hsts_include_sub_domains = (*p == '1');
2945 while (*p && *p != ' ') p++;
2946 while (*p && *p == ' ') p++;
2947 nsc_snptimet(p, strlen(p), &hsts_expiry);
2948 }
2949
2950 h = urldb_add_host(host);
2951 if (!h) {
2952 NSLOG(netsurf, INFO, "Failed adding host: '%s'", host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2952
, }; nslog__log(&_nslog_ctx, "Failed adding host: '%s'", host
); } } while(0)
;
2953 fclose(fp);
2954 return NSERROR_NOMEM;
2955 }
2956 h->hsts.expires = hsts_expiry;
2957 h->hsts.include_sub_domains = hsts_include_sub_domains;
2958
2959 /* read number of URLs */
2960 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2961 break;
2962 urls = atoi(s);
2963
2964 /* no URLs => try next host */
2965 if (urls == 0) {
2966 NSLOG(netsurf, INFO, "No URLs for '%s'", host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2966
, }; nslog__log(&_nslog_ctx, "No URLs for '%s'", host); }
} while(0)
;
2967 continue;
2968 }
2969
2970 /* load the non-corrupt data */
2971 for (i = 0; i < urls; i++) {
2972 struct path_data *p = NULL((void*)0);
2973 char scheme[64], ports[10];
2974 char url[64 + 3 + 256 + 6 + 4096 + 1 + 1];
2975 unsigned int port;
2976 bool_Bool is_file = false0;
2977 nsurl *nsurl;
2978 lwc_string *scheme_lwc, *fragment_lwc;
2979 char *path_query;
2980 size_t len;
2981
2982 if (!fgets(scheme, sizeof scheme, fp))
2983 break;
2984 length = strlen(scheme) - 1;
2985 scheme[length] = '\0';
2986
2987 if (!fgets(ports, sizeof ports, fp))
2988 break;
2989 length = strlen(ports) - 1;
2990 ports[length] = '\0';
2991 port = atoi(ports);
2992
2993 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2994 break;
2995 length = strlen(s) - 1;
2996 s[length] = '\0';
2997
2998 if (!strcasecmp(host, "localhost") &&
2999 !strcasecmp(scheme, "file"))
3000 is_file = true1;
3001
3002 snprintf(url, sizeof url, "%s://%s%s%s%s",
3003 scheme,
3004 /* file URLs have no host */
3005 (is_file ? "" : host),
3006 (port ? ":" : ""),
3007 (port ? ports : ""),
3008 s);
3009
3010 /* TODO: store URLs in pre-parsed state, and make
3011 * a nsurl_load to generate the nsurl more
3012 * swiftly.
3013 * Need a nsurl_save too.
3014 */
3015 if (nsurl_create(url, &nsurl) != NSERROR_OK) {
3016 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3017
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3017 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3017
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3018 fclose(fp);
3019 return NSERROR_NOMEM;
3020 }
3021
3022 if (url_bloom != NULL((void*)0)) {
3023 uint32_t hash = nsurl_hash(nsurl);
3024 bloom_insert_hash(url_bloom, hash);
3025 }
3026
3027 /* Copy and merge path/query strings */
3028 if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
3029 &path_query, &len) != NSERROR_OK) {
3030 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3031
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3031 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3031
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3032 fclose(fp);
3033 return NSERROR_NOMEM;
3034 }
3035
3036 scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
3037 fragment_lwc = nsurl_get_component(nsurl,
3038 NSURL_FRAGMENT);
3039 p = urldb_add_path(scheme_lwc, port, h, path_query,
3040 fragment_lwc, nsurl);
3041 if (!p) {
3042 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3043
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3043 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3043
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3044 fclose(fp);
3045 return NSERROR_NOMEM;
3046 }
3047 nsurl_unref(nsurl);
3048 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); ((__lwc_s != ((void*)0)
) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3048, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3049 if (fragment_lwc != NULL((void*)0))
3050 lwc_string_unref(fragment_lwc){ lwc_string *__lwc_s = (fragment_lwc); ((__lwc_s != ((void*)
0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3050, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3051
3052 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3053 break;
3054 if (p)
3055 p->urld.visits = (unsigned int)atoi(s);
3056
3057 /* entry last use time */
3058 if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
3059 break;
3060 }
3061 if (p) {
3062 nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);
3063 }
3064
3065 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3066 break;
3067 if (p)
3068 p->urld.type = (content_type)atoi(s);
3069
3070 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3071 break;
3072
3073
3074 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3075 break;
3076 length = strlen(s) - 1;
3077 if (p && length > 0) {
3078 s[length] = '\0';
3079 p->urld.title = malloc(length + 1);
3080 if (p->urld.title)
3081 memcpy(p->urld.title, s, length + 1);
3082 }
3083 }
3084 }
3085
3086 fclose(fp);
3087 NSLOG(netsurf, INFO, "Successfully loaded URL file")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3087
, }; nslog__log(&_nslog_ctx, "Successfully loaded URL file"
); } } while(0)
;
3088#undef MAXIMUM_URL_LENGTH
3089
3090 return NSERROR_OK;
3091}
3092
3093/* exported interface documented in netsurf/url_db.h */
3094nserror urldb_save(const char *filename)
3095{
3096 FILE *fp;
3097 int i;
3098
3099 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 3099, __extension__ __PRETTY_FUNCTION__))
;
1
Assuming 'filename' is non-null
2
'?' condition is true
3100
3101 fp = fopen(filename, "w");
3102 if (!fp) {
3
Assuming 'fp' is non-null
4
Taking false branch
3103 NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3104
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for writing"
, filename); } } while(0)
3104 filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3104
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for writing"
, filename); } } while(0)
;
3105 return NSERROR_SAVE_FAILED;
3106 }
3107
3108 /* file format version number */
3109 fprintf(fp, "%d\n", URL_FILE_VERSION107);
3110
3111 for (i = 0; i != NUM_SEARCH_TREES28; i++) {
5
Loop condition is true. Entering loop body
3112 urldb_save_search_tree(search_trees[i], fp);
6
Calling 'urldb_save_search_tree'
3113 }
3114
3115 fclose(fp);
3116
3117 return NSERROR_OK;
3118}
3119
3120
3121/* exported interface documented in content/urldb.h */
3122nserror urldb_set_url_persistence(nsurl *url, bool_Bool persist)
3123{
3124 struct path_data *p;
3125
3126 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3126, __extension__ __PRETTY_FUNCTION__))
;
3127
3128 p = urldb_find_url(url);
3129 if (!p) {
3130 return NSERROR_NOT_FOUND;
3131 }
3132
3133 p->persistent = persist;
3134
3135 return NSERROR_OK;
3136}
3137
3138
3139/* exported interface documented in content/urldb.h */
3140bool_Bool urldb_add_url(nsurl *url)
3141{
3142 struct host_part *h;
3143 struct path_data *p;
3144 lwc_string *scheme;
3145 lwc_string *port;
3146 lwc_string *host;
3147 lwc_string *fragment;
3148 const char *host_str;
3149 char *path_query = NULL((void*)0);
3150 size_t len;
3151 bool_Bool match;
3152 unsigned int port_int;
3153
3154 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3154, __extension__ __PRETTY_FUNCTION__))
;
3155
3156 if (url_bloom == NULL((void*)0))
3157 url_bloom = bloom_create(BLOOM_SIZE(1024 * 32));
3158
3159 if (url_bloom != NULL((void*)0)) {
3160 uint32_t hash = nsurl_hash(url);
3161 bloom_insert_hash(url_bloom, hash);
3162 }
3163
3164 /* Copy and merge path/query strings */
3165 if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
3166 NSERROR_OK) {
3167 return false0;
3168 }
3169 assert(path_query != NULL)((path_query != ((void*)0)) ? (void) (0) : __assert_fail ("path_query != NULL"
, "content/urldb.c", 3169, __extension__ __PRETTY_FUNCTION__)
)
;
3170
3171 scheme = nsurl_get_component(url, NSURL_SCHEME);
3172 if (scheme == NULL((void*)0)) {
3173 free(path_query);
3174 return false0;
3175 }
3176
3177 host = nsurl_get_component(url, NSURL_HOST);
3178 if (host != NULL((void*)0)) {
3179 host_str = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3179, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
3180 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3180
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3181
3182 } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match)((*(&match) = ((scheme) == (corestring_lwc_file))), lwc_error_ok
)
==
3183 lwc_error_ok && match == true1) {
3184 host_str = "localhost";
3185
3186 } else {
3187 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3187, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3188 free(path_query);
3189 return false0;
3190 }
3191
3192 fragment = nsurl_get_component(url, NSURL_FRAGMENT);
3193
3194 port = nsurl_get_component(url, NSURL_PORT);
3195 if (port != NULL((void*)0)) {
3196 port_int = atoi(lwc_string_data(port)({((port != ((void*)0)) ? (void) (0) : __assert_fail ("port != NULL"
, "content/urldb.c", 3196, __extension__ __PRETTY_FUNCTION__)
); (const char *)((port)+1);})
);
3197 lwc_string_unref(port){ lwc_string *__lwc_s = (port); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3197
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3198 } else {
3199 port_int = 0;
3200 }
3201
3202 /* Get host entry */
3203 h = urldb_add_host(host_str);
3204
3205 /* Get path entry */
3206 if (h != NULL((void*)0)) {
3207 p = urldb_add_path(scheme,
3208 port_int,
3209 h,
3210 path_query,
3211 fragment,
3212 url);
3213 } else {
3214 p = NULL((void*)0);
3215 }
3216
3217 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3217, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3218 if (fragment != NULL((void*)0))
3219 lwc_string_unref(fragment){ lwc_string *__lwc_s = (fragment); ((__lwc_s != ((void*)0)) ?
(void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3219, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3220
3221 return (p != NULL((void*)0));
3222}
3223
3224
3225/* exported interface documented in content/urldb.h */
3226nserror urldb_set_url_title(nsurl *url, const char *title)
3227{
3228 struct path_data *p;
3229 char *temp;
3230
3231 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3231, __extension__ __PRETTY_FUNCTION__))
;
3232
3233 p = urldb_find_url(url);
3234 if (p == NULL((void*)0)) {
3235 return NSERROR_NOT_FOUND;
3236 }
3237
3238 /* copy the parameter if necessary */
3239 if (title != NULL((void*)0)) {
3240 temp = strdup(title);
3241 if (temp == NULL((void*)0)) {
3242 return NSERROR_NOMEM;
3243 }
3244 } else {
3245 temp = NULL((void*)0);
3246 }
3247
3248 free(p->urld.title);
3249 p->urld.title = temp;
3250
3251 return NSERROR_OK;
3252}
3253
3254
3255/* exported interface documented in content/urldb.h */
3256nserror urldb_set_url_content_type(nsurl *url, content_type type)
3257{
3258 struct path_data *p;
3259
3260 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3260, __extension__ __PRETTY_FUNCTION__))
;
3261
3262 p = urldb_find_url(url);
3263 if (!p) {
3264 return NSERROR_NOT_FOUND;
3265 }
3266
3267 p->urld.type = type;
3268
3269 return NSERROR_OK;
3270}
3271
3272
3273/* exported interface documented in content/urldb.h */
3274nserror urldb_update_url_visit_data(nsurl *url)
3275{
3276 struct path_data *p;
3277
3278 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3278, __extension__ __PRETTY_FUNCTION__))
;
3279
3280 p = urldb_find_url(url);
3281 if (!p) {
3282 return NSERROR_NOT_FOUND;
3283 }
3284
3285 p->urld.last_visit = time(NULL((void*)0));
3286 p->urld.visits++;
3287
3288 return NSERROR_OK;
3289}
3290
3291
3292/* exported interface documented in content/urldb.h */
3293void urldb_reset_url_visit_data(nsurl *url)
3294{
3295 struct path_data *p;
3296
3297 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3297, __extension__ __PRETTY_FUNCTION__))
;
3298
3299 p = urldb_find_url(url);
3300 if (!p)
3301 return;
3302
3303 p->urld.last_visit = (time_t)0;
3304 p->urld.visits = 0;
3305}
3306
3307
3308/* exported interface documented in netsurf/url_db.h */
3309const struct url_data *urldb_get_url_data(nsurl *url)
3310{
3311 struct path_data *p;
3312 struct url_internal_data *u;
3313
3314 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3314, __extension__ __PRETTY_FUNCTION__))
;
3315
3316 p = urldb_find_url(url);
3317 if (!p)
3318 return NULL((void*)0);
3319
3320 u = &p->urld;
3321
3322 return (const struct url_data *) u;
3323}
3324
3325
3326/* exported interface documented in content/urldb.h */
3327nsurl *urldb_get_url(nsurl *url)
3328{
3329 struct path_data *p;
3330
3331 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3331, __extension__ __PRETTY_FUNCTION__))
;
3332
3333 p = urldb_find_url(url);
3334 if (!p)
3335 return NULL((void*)0);
3336
3337 return p->url;
3338}
3339
3340
3341/* exported interface documented in netsurf/url_db.h */
3342void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
3343{
3344 struct path_data *p, *pi;
3345 struct host_part *h;
3346 struct prot_space_data *space, *space_alloc;
3347 char *realm_alloc, *auth_alloc;
3348 bool_Bool match;
3349
3350 assert(url && realm && auth)((url && realm && auth) ? (void) (0) : __assert_fail
("url && realm && auth", "content/urldb.c", 3350
, __extension__ __PRETTY_FUNCTION__))
;
3351
3352 /* add url, in case it's missing */
3353 urldb_add_url(url);
3354
3355 p = urldb_find_url(url);
3356
3357 if (!p)
3358 return;
3359
3360 /* Search for host_part */
3361 for (pi = p; pi->parent != NULL((void*)0); pi = pi->parent)
3362 ;
3363 h = (struct host_part *)pi;
3364
3365 /* Search if given URL belongs to a protection space we already know of. */
3366 for (space = h->prot_space; space; space = space->next) {
3367 if (!strcmp(space->realm, realm) &&
3368 lwc_string_isequal(space->scheme, p->scheme,((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
3369 &match)((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
== lwc_error_ok &&
3370 match == true1 &&
3371 space->port == p->port)
3372 break;
3373 }
3374
3375 if (space != NULL((void*)0)) {
3376 /* Overrule existing auth. */
3377 free(space->auth);
3378 space->auth = strdup(auth);
3379 } else {
3380 /* Create a new protection space. */
3381 space = space_alloc = malloc(sizeof(struct prot_space_data));
3382 realm_alloc = strdup(realm);
3383 auth_alloc = strdup(auth);
3384
3385 if (!space_alloc || !realm_alloc || !auth_alloc) {
3386 free(space_alloc);
3387 free(realm_alloc);
3388 free(auth_alloc);
3389 return;
3390 }
3391
3392 space->scheme = lwc_string_ref(p->scheme)({lwc_string *__lwc_s = (p->scheme); ((__lwc_s != ((void*)
0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3392, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
++; __lwc_s;})
;
3393 space->port = p->port;
3394 space->realm = realm_alloc;
3395 space->auth = auth_alloc;
3396 space->next = h->prot_space;
3397 h->prot_space = space;
3398 }
3399
3400 p->prot_space = space;
3401}
3402
3403
3404/* exported interface documented in netsurf/url_db.h */
3405const char *urldb_get_auth_details(nsurl *url, const char *realm)
3406{
3407 struct path_data *p, *p_cur, *p_top;
3408
3409 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3409, __extension__ __PRETTY_FUNCTION__))
;
3410
3411 /* add to the db, so our lookup will work */
3412 urldb_add_url(url);
3413
3414 p = urldb_find_url(url);
3415 if (!p)
3416 return NULL((void*)0);
3417
3418 /* Check for any auth details attached to the path_data node or any of
3419 * its parents.
3420 */
3421 for (p_cur = p; p_cur != NULL((void*)0); p_top = p_cur, p_cur = p_cur->parent) {
3422 if (p_cur->prot_space) {
3423 return p_cur->prot_space->auth;
3424 }
3425 }
3426
3427 /* Only when we have a realm (and canonical root of given URL), we can
3428 * uniquely locate the protection space.
3429 */
3430 if (realm != NULL((void*)0)) {
3431 const struct host_part *h = (const struct host_part *)p_top;
3432 const struct prot_space_data *space;
3433 bool_Bool match;
3434
3435 /* Search for a possible matching protection space. */
3436 for (space = h->prot_space; space != NULL((void*)0);
3437 space = space->next) {
3438 if (!strcmp(space->realm, realm) &&
3439 lwc_string_isequal(space->scheme,((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
3440 p->scheme, &match)((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
==
3441 lwc_error_ok &&
3442 match == true1 &&
3443 space->port == p->port) {
3444 p->prot_space = space;
3445 return p->prot_space->auth;
3446 }
3447 }
3448 }
3449
3450 return NULL((void*)0);
3451}
3452
3453
3454/* exported interface documented in netsurf/url_db.h */
3455void urldb_set_cert_permissions(nsurl *url, bool_Bool permit)
3456{
3457 struct path_data *p;
3458 struct host_part *h;
3459
3460 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3460, __extension__ __PRETTY_FUNCTION__))
;
3461
3462 /* add url, in case it's missing */
3463 urldb_add_url(url);
3464
3465 p = urldb_find_url(url);
3466 if (!p)
3467 return;
3468
3469 for (; p && p->parent; p = p->parent)
3470 /* do nothing */;
3471 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3471
, __extension__ __PRETTY_FUNCTION__))
;
3472
3473 h = (struct host_part *)p;
3474
3475 h->permit_invalid_certs = permit;
3476}
3477
3478
3479/* exported interface documented in content/urldb.h */
3480bool_Bool urldb_get_cert_permissions(nsurl *url)
3481{
3482 struct path_data *p;
3483 const struct host_part *h;
3484
3485 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3485, __extension__ __PRETTY_FUNCTION__))
;
3486
3487 p = urldb_find_url(url);
3488 if (!p)
3489 return false0;
3490
3491 for (; p && p->parent; p = p->parent)
3492 /* do nothing */;
3493 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3493
, __extension__ __PRETTY_FUNCTION__))
;
3494
3495 h = (const struct host_part *)p;
3496
3497 return h->permit_invalid_certs;
3498}
3499
3500
3501/* exported interface documented in content/urldb.h */
3502bool_Bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
3503{
3504 struct path_data *p;
3505 struct host_part *h;
3506 lwc_string *host;
3507 time_t now = time(NULL((void*)0));
3508 http_strict_transport_security *sts;
3509 uint32_t max_age = 0;
3510 nserror error;
3511
3512 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3512, __extension__ __PRETTY_FUNCTION__))
;
3513
3514 host = nsurl_get_component(url, NSURL_HOST);
3515 if (host != NULL((void*)0)) {
3516 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3516, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3517 /* Host is IP: ignore */
3518 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3518
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3519 return true1;
3520 } else if (lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3520, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
== 0) {
3521 /* Host is blank: ignore */
3522 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3522
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3523 return true1;
3524 }
3525
3526 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3526
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3527 } else {
3528 /* No host part: ignore */
3529 return true1;
3530 }
3531
3532 /* add url, in case it's missing */
3533 urldb_add_url(url);
3534
3535 p = urldb_find_url(url);
3536 if (!p)
3537 return false0;
3538
3539 for (; p && p->parent; p = p->parent)
3540 /* do nothing */;
3541 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3541
, __extension__ __PRETTY_FUNCTION__))
;
3542
3543 h = (struct host_part *)p;
3544 if (h->permit_invalid_certs) {
3545 /* Transport is tainted: ignore */
3546 return true1;
3547 }
3548
3549 error = http_parse_strict_transport_security(header, &sts);
3550 if (error != NSERROR_OK) {
3551 /* Parse failed: ignore */
3552 return true1;
3553 }
3554
3555 h->hsts.include_sub_domains =
3556 http_strict_transport_security_include_subdomains(sts);
3557
3558 max_age = http_strict_transport_security_max_age(sts);
3559 if (max_age == 0) {
3560 h->hsts.expires = 0;
3561 h->hsts.include_sub_domains = false0;
3562 } else if ((time_t) (now + max_age) > h->hsts.expires) {
3563 h->hsts.expires = now + max_age;
3564 }
3565
3566 http_strict_transport_security_destroy(sts);
3567
3568 return true1;
3569}
3570
3571
3572/* exported interface documented in content/urldb.h */
3573bool_Bool urldb_get_hsts_enabled(struct nsurl *url)
3574{
3575 struct path_data *p;
3576 const struct host_part *h;
3577 lwc_string *host;
3578 time_t now = time(NULL((void*)0));
3579
3580 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3580, __extension__ __PRETTY_FUNCTION__))
;
3581
3582 host = nsurl_get_component(url, NSURL_HOST);
3583 if (host != NULL((void*)0)) {
3584 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3584, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3585 /* Host is IP: not enabled */
3586 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3586
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3587 return false0;
3588 } else if (lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3588, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
== 0) {
3589 /* Host is blank: not enabled */
3590 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3590
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3591 return false0;
3592 }
3593
3594 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3594
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3595 } else {
3596 /* No host part: not enabled */
3597 return false0;
3598 }
3599
3600 /* The URL must exist in the db in order to find HSTS policy, since
3601 * we search up the tree from the URL node, and policy from further
3602 * up may also apply. */
3603 urldb_add_url(url);
3604
3605 p = urldb_find_url(url);
3606 if (!p)
3607 return false0;
3608
3609 for (; p && p->parent; p = p->parent)
3610 /* do nothing */;
3611 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3611
, __extension__ __PRETTY_FUNCTION__))
;
3612
3613 h = (const struct host_part *)p;
3614
3615 /* Consult record for this host */
3616 if (h->hsts.expires > now) {
3617 /* Not expired */
3618 return true1;
3619 }
3620
3621 /* Consult parent domains */
3622 for (h = h->parent; h && h != &db_root; h = h->parent) {
3623 if (h->hsts.expires > now && h->hsts.include_sub_domains) {
3624 /* Not expired and subdomains included */
3625 return true1;
3626 }
3627 }
3628
3629 return false0;
3630}
3631
3632
3633/* exported interface documented in netsurf/url_db.h */
3634void
3635urldb_iterate_partial(const char *prefix,
3636 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
3637{
3638 char host[256];
3639 char buf[260]; /* max domain + "www." */
3640 const char *slash, *scheme_sep;
3641 struct search_node *tree;
3642 const struct host_part *h;
3643
3644 assert(prefix && callback)((prefix && callback) ? (void) (0) : __assert_fail ("prefix && callback"
, "content/urldb.c", 3644, __extension__ __PRETTY_FUNCTION__)
)
;
3645
3646 /* strip scheme */
3647 scheme_sep = strstr(prefix, "://");
3648 if (scheme_sep)
3649 prefix = scheme_sep + 3;
3650
3651 slash = strchr(prefix, '/');
3652 tree = urldb_get_search_tree(prefix);
3653
3654 if (slash) {
3655 /* if there's a slash in the input, then we can
3656 * assume that we're looking for a path */
3657 snprintf(host, sizeof host, "%.*s",
3658 (int) (slash - prefix), prefix);
3659
3660 h = urldb_search_find(tree, host);
3661 if (!h) {
3662 int len = slash - prefix;
3663
3664 if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
3665 snprintf(buf, sizeof buf, "www.%s", host);
3666 h = urldb_search_find(
3667 search_trees[ST_DN2 + 'w' - 'a'],
3668 buf);
3669 if (!h)
3670 return;
3671 } else
3672 return;
3673 }
3674
3675 if (h->paths.children) {
3676 /* Have paths, iterate them */
3677 urldb_iterate_partial_path(&h->paths, slash + 1,
3678 callback);
3679 }
3680
3681 } else {
3682 int len = strlen(prefix);
3683
3684 /* looking for hosts */
3685 if (!urldb_iterate_partial_host(tree, prefix, callback))
3686 return;
3687
3688 if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
3689 /* now look for www.prefix */
3690 snprintf(buf, sizeof buf, "www.%s", prefix);
3691 if(!urldb_iterate_partial_host(
3692 search_trees[ST_DN2 + 'w' - 'a'],
3693 buf, callback))
3694 return;
3695 }
3696 }
3697}
3698
3699
3700/* exported interface documented in netsurf/url_db.h */
3701void
3702urldb_iterate_entries(bool_Bool (*callback)(nsurl *url, const struct url_data *data))
3703{
3704 int i;
3705
3706 assert(callback)((callback) ? (void) (0) : __assert_fail ("callback", "content/urldb.c"
, 3706, __extension__ __PRETTY_FUNCTION__))
;
3707
3708 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
3709 if (!urldb_iterate_entries_host(search_trees[i],
3710 callback,
3711 NULL((void*)0))) {
3712 break;
3713 }
3714 }
3715}
3716
3717
3718/* exported interface documented in content/urldb.h */
3719void urldb_iterate_cookies(bool_Bool (*callback)(const struct cookie_data *data))
3720{
3721 int i;
3722
3723 assert(callback)((callback) ? (void) (0) : __assert_fail ("callback", "content/urldb.c"
, 3723, __extension__ __PRETTY_FUNCTION__))
;
3724
3725 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
3726 if (!urldb_iterate_entries_host(search_trees[i],
3727 NULL((void*)0), callback))
3728 break;
3729 }
3730}
3731
3732
3733/* exported interface documented in content/urldb.h */
3734bool_Bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
3735{
3736 const char *cur = header, *end;
3737 lwc_string *path, *host, *scheme;
3738 nsurl *urlt;
3739 bool_Bool match;
3740
3741 assert(url && header)((url && header) ? (void) (0) : __assert_fail ("url && header"
, "content/urldb.c", 3741, __extension__ __PRETTY_FUNCTION__)
)
;
3742
3743 /* Get defragmented URL, as 'urlt' */
3744 if (nsurl_defragment(url, &urlt) != NSERROR_OK)
3745 return NULL((void*)0);
3746
3747 scheme = nsurl_get_component(url, NSURL_SCHEME);
3748 if (scheme == NULL((void*)0)) {
3749 nsurl_unref(urlt);
3750 return false0;
3751 }
3752
3753 path = nsurl_get_component(url, NSURL_PATH);
3754 if (path == NULL((void*)0)) {
3755 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3755, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3756 nsurl_unref(urlt);
3757 return false0;
3758 }
3759
3760 host = nsurl_get_component(url, NSURL_HOST);
3761 if (host == NULL((void*)0)) {
3762 lwc_string_unref(path){ lwc_string *__lwc_s = (path); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3762
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3763 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3763, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3764 nsurl_unref(urlt);
3765 return false0;
3766 }
3767
3768 if (referer) {
3769 lwc_string *rhost;
3770
3771 /* Ensure that url's host name domain matches
3772 * referer's (4.3.5) */
3773 rhost = nsurl_get_component(referer, NSURL_HOST);
3774 if (rhost == NULL((void*)0)) {
3775 goto error;
3776 }
3777
3778 /* Domain match host names */
3779 if (lwc_string_isequal(host, rhost, &match)((*(&match) = ((host) == (rhost))), lwc_error_ok) == lwc_error_ok &&
3780 match == false0) {
3781 const char *hptr;
3782 const char *rptr;
3783 const char *dot;
3784 const char *host_data = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3784, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
3785 const char *rhost_data = lwc_string_data(rhost)({((rhost != ((void*)0)) ? (void) (0) : __assert_fail ("rhost != NULL"
, "content/urldb.c", 3785, __extension__ __PRETTY_FUNCTION__)
); (const char *)((rhost)+1);})
;
3786
3787 /* Ensure neither host nor rhost are IP addresses */
3788 if (urldb__host_is_ip_address(host_data) ||
3789 urldb__host_is_ip_address(rhost_data)) {
3790 /* IP address, so no partial match */
3791 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3791, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3792 goto error;
3793 }
3794
3795 /* Not exact match, so try the following:
3796 *
3797 * 1) Find the longest common suffix of host and rhost
3798 * (may be all of host/rhost)
3799 * 2) Discard characters from the start of the suffix
3800 * until the suffix starts with a dot
3801 * (prevents foobar.com matching bar.com)
3802 * 3) Ensure the suffix is non-empty and contains
3803 * embedded dots (to avoid permitting .com as a
3804 * suffix)
3805 *
3806 * Note that the above in no way resembles the
3807 * domain matching algorithm found in RFC2109.
3808 * It does, however, model the real world rather
3809 * more accurately.
3810 */
3811
3812 /** \todo In future, we should consult a TLD service
3813 * instead of just looking for embedded dots.
3814 */
3815
3816 hptr = host_data + lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3816, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
- 1;
3817 rptr = rhost_data + lwc_string_length(rhost)({((rhost != ((void*)0)) ? (void) (0) : __assert_fail ("rhost != NULL"
, "content/urldb.c", 3817, __extension__ __PRETTY_FUNCTION__)
); (rhost)->len;})
- 1;
3818
3819 /* 1 */
3820 while (hptr >= host_data && rptr >= rhost_data) {
3821 if (*hptr != *rptr)
3822 break;
3823 hptr--;
3824 rptr--;
3825 }
3826 /* Ensure we end up pointing at the start of the
3827 * common suffix. The above loop will exit pointing
3828 * to the byte before the start of the suffix. */
3829 hptr++;
3830
3831 /* 2 */
3832 while (*hptr != '\0' && *hptr != '.')
3833 hptr++;
3834
3835 /* 3 */
3836 if (*hptr == '\0' ||
3837 (dot = strchr(hptr + 1, '.')) == NULL((void*)0) ||
3838 *(dot + 1) == '\0') {
3839 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3839, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3840 goto error;
3841 }
3842 }
3843
3844 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3844, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3845 }
3846
3847 end = cur + strlen(cur) - 2 /* Trailing CRLF */;
3848
3849 do {
3850 struct cookie_internal_data *c;
3851 char *dot;
3852 size_t len;
3853#ifdef WITH_NSPSL1
3854 const char *suffix;
3855#endif
3856
3857 c = urldb_parse_cookie(url, &cur);
3858 if (!c) {
3859 /* failed => stop parsing */
3860 goto error;
3861 }
3862
3863 /* validate cookie */
3864
3865 /* 4.2.2:i Cookie must have NAME and VALUE */
3866 if (!c->name || !c->value) {
3867 urldb_free_cookie(c);
3868 goto error;
3869 }
3870
3871 /* 4.3.2:i Cookie path must be a prefix of URL path */
3872 len = strlen(c->path);
3873 if (len > lwc_string_length(path)({((path != ((void*)0)) ? (void) (0) : __assert_fail ("path != NULL"
, "content/urldb.c", 3873, __extension__ __PRETTY_FUNCTION__)
); (path)->len;})
||
3874 strncmp(c->path, lwc_string_data(path)({((path != ((void*)0)) ? (void) (0) : __assert_fail ("path != NULL"
, "content/urldb.c", 3874, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path)+1);})
,
3875 len) != 0) {
3876 urldb_free_cookie(c);
3877 goto error;
3878 }
3879
3880#ifdef WITH_NSPSL1
3881 /* check domain is not a public suffix */
3882 dot = c->domain;
3883 if (*dot == '.') {
3884 dot++;
3885 }
3886 suffix = nspsl_getpublicsuffix(dot);
3887 if (suffix == NULL((void*)0)) {
3888 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3889
, }; nslog__log(&_nslog_ctx, "domain %s was a public suffix domain"
, dot); } } while(0)
3889 "domain %s was a public suffix domain", dot)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3889
, }; nslog__log(&_nslog_ctx, "domain %s was a public suffix domain"
, dot); } } while(0)
;
3890 urldb_free_cookie(c);
3891 goto error;
3892 }
3893#else
3894 /* 4.3.2:ii Cookie domain must contain embedded dots */
3895 dot = strchr(c->domain + 1, '.');
3896 if (!dot || *(dot + 1) == '\0') {
3897 /* no embedded dots */
3898 urldb_free_cookie(c);
3899 goto error;
3900 }
3901#endif
3902
3903 /* Domain match fetch host with cookie domain */
3904 if (strcasecmp(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3904, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
, c->domain) != 0) {
3905 int hlen, dlen;
3906 char *domain = c->domain;
3907
3908 /* c->domain must be a domain cookie here because:
3909 * c->domain is either:
3910 * + specified in the header as a domain cookie
3911 * (non-domain cookies in the header are ignored
3912 * by urldb_parse_cookie / urldb_parse_avpair)
3913 * + defaulted to the URL's host part
3914 * (by urldb_parse_cookie if no valid domain was
3915 * specified in the header)
3916 *
3917 * The latter will pass the strcasecmp above, which
3918 * leaves the former (i.e. a domain cookie)
3919 */
3920 assert(c->domain[0] == '.')((c->domain[0] == '.') ? (void) (0) : __assert_fail ("c->domain[0] == '.'"
, "content/urldb.c", 3920, __extension__ __PRETTY_FUNCTION__)
)
;
3921
3922 /* 4.3.2:iii */
3923 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3923, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3924 /* IP address, so no partial match */
3925 urldb_free_cookie(c);
3926 goto error;
3927 }
3928
3929 hlen = lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3929, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
;
3930 dlen = strlen(c->domain);
3931
3932 if (hlen <= dlen && hlen != dlen - 1) {
3933 /* Partial match not possible */
3934 urldb_free_cookie(c);
3935 goto error;
3936 }
3937
3938 if (hlen == dlen - 1) {
3939 /* Relax matching to allow
3940 * host a.com to match .a.com */
3941 domain++;
3942 dlen--;
3943 }
3944
3945 if (strcasecmp(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3945, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
+ (hlen - dlen),
3946 domain)) {
3947 urldb_free_cookie(c);
3948 goto error;
3949 }
3950
3951 /* 4.3.2:iv Ensure H contains no dots
3952 *
3953 * If you believe the spec, H should contain no
3954 * dots in _any_ cookie. Unfortunately, however,
3955 * reality differs in that many sites send domain
3956 * cookies of the form .foo.com from hosts such
3957 * as bar.bat.foo.com and then expect domain
3958 * matching to work. Thus we have to do what they
3959 * expect, regardless of any potential security
3960 * implications.
3961 *
3962 * This is what code conforming to the spec would
3963 * look like:
3964 *
3965 * for (int i = 0; i < (hlen - dlen); i++) {
3966 * if (host[i] == '.') {
3967 * urldb_free_cookie(c);
3968 * goto error;
3969 * }
3970 * }
3971 */
3972 }
3973
3974 /* Now insert into database */
3975 if (!urldb_insert_cookie(c, scheme, urlt))
3976 goto error;
3977 } while (cur < end);
3978
3979 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3979
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3980 lwc_string_unref(path){ lwc_string *__lwc_s = (path); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3980
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3981 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3981, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3982 nsurl_unref(urlt);
3983
3984 return true1;
3985
3986error:
3987 lwc_string_unref(host){ lwc_string *__lwc_s = (host); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3987
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3988 lwc_string_unref(path){ lwc_string *__lwc_s = (path); ((__lwc_s != ((void*)0)) ? (void
) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c", 3988
, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt--; if
((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1) &&
(__lwc_s->insensitive == __lwc_s))) lwc_string_destroy(__lwc_s
); }
;
3989 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3989, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
3990 nsurl_unref(urlt);
3991
3992 return false0;
3993}
3994
3995
3996/* exported interface documented in content/urldb.h */
3997char *urldb_get_cookie(nsurl *url, bool_Bool include_http_only)
3998{
3999 const struct path_data *p, *q;
4000 const struct host_part *h;
4001 lwc_string *path_lwc;
4002 struct cookie_internal_data *c;
4003 int count = 0, version = COOKIE_RFC2965;
4004 struct cookie_internal_data **matched_cookies;
4005 int matched_cookies_size = 20;
4006 int ret_alloc = 4096, ret_used = 1;
4007 const char *path;
4008 char *ret;
4009 lwc_string *scheme;
4010 time_t now;
4011 int i;
4012 bool_Bool match;
4013
4014 assert(url != NULL)((url != ((void*)0)) ? (void) (0) : __assert_fail ("url != NULL"
, "content/urldb.c", 4014, __extension__ __PRETTY_FUNCTION__)
)
;
4015
4016 /* The URL must exist in the db in order to find relevant cookies, since
4017 * we search up the tree from the URL node, and cookies from further
4018 * up also apply. */
4019 urldb_add_url(url);
4020
4021 p = urldb_find_url(url);
4022 if (!p)
4023 return NULL((void*)0);
4024
4025 scheme = p->scheme;
4026
4027 matched_cookies = malloc(matched_cookies_size *
4028 sizeof(struct cookie_internal_data *));
4029 if (!matched_cookies)
4030 return NULL((void*)0);
4031
4032#define GROW_MATCHED_COOKIES \
4033 do { \
4034 if (count == matched_cookies_size) { \
4035 struct cookie_internal_data **temp; \
4036 temp = realloc(matched_cookies, \
4037 (matched_cookies_size + 20) * \
4038 sizeof(struct cookie_internal_data *)); \
4039 \
4040 if (temp == NULL((void*)0)) { \
4041 free(ret); \
4042 free(matched_cookies); \
4043 return NULL((void*)0); \
4044 } \
4045 \
4046 matched_cookies = temp; \
4047 matched_cookies_size += 20; \
4048 } \
4049 } while(0)
4050
4051 ret = malloc(ret_alloc);
4052 if (!ret) {
4053 free(matched_cookies);
4054 return NULL((void*)0);
4055 }
4056
4057 ret[0] = '\0';
4058
4059 path_lwc = nsurl_get_component(url, NSURL_PATH);
4060 if (path_lwc == NULL((void*)0)) {
4061 free(ret);
4062 free(matched_cookies);
4063 return NULL((void*)0);
4064 }
4065 path = lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 4065, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
;
4066 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); ((__lwc_s != ((void*)0)) ?
(void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 4066, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
4067
4068 now = time(NULL((void*)0));
4069
4070 if (*(p->segment) != '\0') {
4071 /* Match exact path, unless directory, when prefix matching
4072 * will handle this case for us. */
4073 for (q = p->parent->children; q; q = q->next) {
4074 if (strcmp(q->segment, p->segment))
4075 continue;
4076
4077 /* Consider all cookies associated with
4078 * this exact path */
4079 for (c = q->cookies; c; c = c->next) {
4080 if (c->expires != -1 && c->expires < now)
4081 /* cookie has expired => ignore */
4082 continue;
4083
4084 if (c->secure && lwc_string_isequal(((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4085 q->scheme,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4086 corestring_lwc_https,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4087 &match)((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4088 match == false0)
4089 /* secure cookie for insecure host.
4090 * ignore */
4091 continue;
4092
4093 if (c->http_only && !include_http_only)
4094 /* Ignore HttpOnly */
4095 continue;
4096
4097 matched_cookies[count++] = c;
4098
4099 GROW_MATCHED_COOKIES;
4100
4101 if (c->version < (unsigned int)version)
4102 version = c->version;
4103
4104 c->last_used = now;
4105
4106 cookie_manager_add((struct cookie_data *)c);
4107 }
4108 }
4109 }
4110
4111 /* Now consider cookies whose paths prefix-match ours */
4112 for (p = p->parent; p; p = p->parent) {
4113 /* Find directory's path entry(ies) */
4114 /* There are potentially multiple due to differing schemes */
4115 for (q = p->children; q; q = q->next) {
4116 if (*(q->segment) != '\0')
4117 continue;
4118
4119 for (c = q->cookies; c; c = c->next) {
4120 if (c->expires != -1 && c->expires < now)
4121 /* cookie has expired => ignore */
4122 continue;
4123
4124 if (c->secure && lwc_string_isequal(((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4125 q->scheme,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4126 corestring_lwc_https,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4127 &match)((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4128 match == false0)
4129 /* Secure cookie for insecure server
4130 * => ignore */
4131 continue;
4132
4133 matched_cookies[count++] = c;
4134
4135 GROW_MATCHED_COOKIES;
4136
4137 if (c->version < (unsigned int) version)
4138 version = c->version;
4139
4140 c->last_used = now;
4141
4142 cookie_manager_add((struct cookie_data *)c);
4143 }
4144 }
4145
4146 if (!p->parent) {
4147 /* No parent, so bail here. This can't go in
4148 * the loop exit condition as we also want to
4149 * process the top-level node.
4150 *
4151 * If p->parent is NULL then p->cookies are
4152 * the domain cookies and thus we don't even
4153 * try matching against them.
4154 */
4155 break;
4156 }
4157
4158 /* Consider p itself - may be the result of Path=/foo */
4159 for (c = p->cookies; c; c = c->next) {
4160 if (c->expires != -1 && c->expires < now)
4161 /* cookie has expired => ignore */
4162 continue;
4163
4164 /* Ensure cookie path is a prefix of the resource */
4165 if (strncmp(c->path, path, strlen(c->path)) != 0)
4166 /* paths don't match => ignore */
4167 continue;
4168
4169 if (c->secure && lwc_string_isequal(p->scheme,((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4170 corestring_lwc_https,((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4171 &match)((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4172 match == false0)
4173 /* Secure cookie for insecure server
4174 * => ignore */
4175 continue;
4176
4177 matched_cookies[count++] = c;
4178
4179 GROW_MATCHED_COOKIES;
4180
4181 if (c->version < (unsigned int) version)
4182 version = c->version;
4183
4184 c->last_used = now;
4185
4186 cookie_manager_add((struct cookie_data *)c);
4187 }
4188
4189 }
4190
4191 /* Finally consider domain cookies for hosts which domain match ours */
4192 for (h = (const struct host_part *)p; h && h != &db_root;
4193 h = h->parent) {
4194 for (c = h->paths.cookies; c; c = c->next) {
4195 if (c->expires != -1 && c->expires < now)
4196 /* cookie has expired => ignore */
4197 continue;
4198
4199 /* Ensure cookie path is a prefix of the resource */
4200 if (strncmp(c->path, path, strlen(c->path)) != 0)
4201 /* paths don't match => ignore */
4202 continue;
4203
4204 if (c->secure && lwc_string_isequal(scheme,((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
4205 corestring_lwc_https,((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
4206 &match)((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
&&
4207 match == false0)
4208 /* secure cookie for insecure host. ignore */
4209 continue;
4210
4211 matched_cookies[count++] = c;
4212
4213 GROW_MATCHED_COOKIES;
4214
4215 if (c->version < (unsigned int)version)
4216 version = c->version;
4217
4218 c->last_used = now;
4219
4220 cookie_manager_add((struct cookie_data *)c);
4221 }
4222 }
4223
4224 if (count == 0) {
4225 /* No cookies found */
4226 free(ret);
4227 free(matched_cookies);
4228 return NULL((void*)0);
4229 }
4230
4231 /* and build output string */
4232 if (version > COOKIE_NETSCAPE) {
4233 sprintf(ret, "$Version=%d", version);
4234 ret_used = strlen(ret) + 1;
4235 }
4236
4237 for (i = 0; i < count; i++) {
4238 if (!urldb_concat_cookie(matched_cookies[i], version,
4239 &ret_used, &ret_alloc, &ret)) {
4240 free(ret);
4241 free(matched_cookies);
4242 return NULL((void*)0);
4243 }
4244 }
4245
4246 if (version == COOKIE_NETSCAPE) {
4247 /* Old-style cookies => no version & skip "; " */
4248 memmove(ret, ret + 2, ret_used - 2);
4249 ret_used -= 2;
4250 }
4251
4252 /* Now, shrink the output buffer to the required size */
4253 {
4254 char *temp = realloc(ret, ret_used);
4255 if (!temp) {
4256 free(ret);
4257 free(matched_cookies);
4258 return NULL((void*)0);
4259 }
4260
4261 ret = temp;
4262 }
4263
4264 free(matched_cookies);
4265
4266 return ret;
4267
4268#undef GROW_MATCHED_COOKIES
4269}
4270
4271
4272/* exported interface documented in content/urldb.h */
4273void urldb_delete_cookie(const char *domain, const char *path,
4274 const char *name)
4275{
4276 urldb_delete_cookie_hosts(domain, path, name, &db_root);
4277}
4278
4279
4280/* exported interface documented in content/urldb.h */
4281void urldb_load_cookies(const char *filename)
4282{
4283 FILE *fp;
4284 char s[16*1024];
4285
4286 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 4286, __extension__ __PRETTY_FUNCTION__))
;
4287
4288 fp = fopen(filename, "r");
4289 if (!fp)
4290 return;
4291
4292#define FIND_T { \
4293 for (; *p && *p != '\t'; p++) \
4294 ; /* do nothing */ \
4295 if (p >= end) { \
4296 NSLOG(netsurf, INFO, "Overran input")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4296
, }; nslog__log(&_nslog_ctx, "Overran input"); } } while(
0)
; \
4297 continue; \
4298 } \
4299 *p++ = '\0'; \
4300 }
4301
4302#define SKIP_T { \
4303 for (; *p && *p == '\t'; p++) \
4304 ; /* do nothing */ \
4305 if (p >= end) { \
4306 NSLOG(netsurf, INFO, "Overran input")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4306
, }; nslog__log(&_nslog_ctx, "Overran input"); } } while(
0)
; \
4307 continue; \
4308 } \
4309 }
4310
4311 while (fgets(s, sizeof s, fp)) {
4312 char *p = s, *end = 0,
4313 *domain, *path, *name, *value, *scheme, *url,
4314 *comment;
4315 int version, domain_specified, path_specified,
4316 secure, http_only, no_destroy, value_quoted;
4317 time_t expires, last_used;
4318 struct cookie_internal_data *c;
4319
4320 if(s[0] == 0 || s[0] == '#')
4321 /* Skip blank lines or comments */
4322 continue;
4323
4324 s[strlen(s) - 1] = '\0'; /* lose terminating newline */
4325 end = s + strlen(s);
4326
4327 /* Look for file version first
4328 * (all input is ignored until this is read)
4329 */
4330 if (strncasecmp(s, "Version:", 8) == 0) {
4331 FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
4332
4333 if (loaded_cookie_file_version <
4334 MIN_COOKIE_FILE_VERSION100) {
4335 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4336
, }; nslog__log(&_nslog_ctx, "Unsupported Cookie file version"
); } } while(0)
4336 "Unsupported Cookie file version")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4336
, }; nslog__log(&_nslog_ctx, "Unsupported Cookie file version"
); } } while(0)
;
4337 break;
4338 }
4339
4340 continue;
4341 } else if (loaded_cookie_file_version == 0) {
4342 /* Haven't yet seen version; skip this input */
4343 continue;
4344 }
4345
4346 /* One cookie/line */
4347
4348 /* Parse input */
4349 FIND_T; version = atoi(s);
4350 SKIP_T; domain = p; FIND_T;
4351 SKIP_T; domain_specified = atoi(p); FIND_T;
4352 SKIP_T; path = p; FIND_T;
4353 SKIP_T; path_specified = atoi(p); FIND_T;
4354 SKIP_T; secure = atoi(p); FIND_T;
4355 if (loaded_cookie_file_version > 101) {
4356 /* Introduced in version 1.02 */
4357 SKIP_T; http_only = atoi(p); FIND_T;
4358 } else {
4359 http_only = 0;
4360 }
4361 SKIP_T; expires = (time_t)atoi(p); FIND_T;
4362 SKIP_T; last_used = (time_t)atoi(p); FIND_T;
4363 SKIP_T; no_destroy = atoi(p); FIND_T;
4364 SKIP_T; name = p; FIND_T;
4365 SKIP_T; value = p; FIND_T;
4366 if (loaded_cookie_file_version > 100) {
4367 /* Introduced in version 1.01 */
4368 SKIP_T; value_quoted = atoi(p); FIND_T;
4369 } else {
4370 value_quoted = 0;
4371 }
4372 SKIP_T; scheme = p; FIND_T;
4373 SKIP_T; url = p; FIND_T;
4374
4375 /* Comment may have no content, so don't
4376 * use macros as they'll break */
4377 for (; *p && *p == '\t'; p++)
4378 ; /* do nothing */
4379 comment = p;
4380
4381 assert(p <= end)((p <= end) ? (void) (0) : __assert_fail ("p <= end", "content/urldb.c"
, 4381, __extension__ __PRETTY_FUNCTION__))
;
4382
4383 /* Now create cookie */
4384 c = malloc(sizeof(struct cookie_internal_data));
4385 if (!c)
4386 break;
4387
4388 c->name = strdup(name);
4389 c->value = strdup(value);
4390 c->value_was_quoted = value_quoted;
4391 c->comment = strdup(comment);
4392 c->domain_from_set = domain_specified;
4393 c->domain = strdup(domain);
4394 c->path_from_set = path_specified;
4395 c->path = strdup(path);
4396 c->expires = expires;
4397 c->last_used = last_used;
4398 c->secure = secure;
4399 c->http_only = http_only;
4400 c->version = version;
4401 c->no_destroy = no_destroy;
4402
4403 if (!(c->name && c->value && c->comment &&
4404 c->domain && c->path)) {
4405 urldb_free_cookie(c);
4406 break;
4407 }
4408
4409 if (c->domain[0] != '.') {
4410 lwc_string *scheme_lwc = NULL((void*)0);
4411 nsurl *url_nsurl = NULL((void*)0);
4412
4413 assert(scheme[0] != 'u')((scheme[0] != 'u') ? (void) (0) : __assert_fail ("scheme[0] != 'u'"
, "content/urldb.c", 4413, __extension__ __PRETTY_FUNCTION__)
)
;
4414
4415 if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
4416 urldb_free_cookie(c);
4417 break;
4418 }
4419 scheme_lwc = nsurl_get_component(url_nsurl,
4420 NSURL_SCHEME);
4421
4422 /* And insert it into database */
4423 if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
4424 /* Cookie freed for us */
4425 nsurl_unref(url_nsurl);
4426 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); ((__lwc_s != ((void*)0)
) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 4426, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
4427 break;
4428 }
4429 nsurl_unref(url_nsurl);
4430 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); ((__lwc_s != ((void*)0)
) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 4430, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->refcnt == 1
) && (__lwc_s->insensitive == __lwc_s))) lwc_string_destroy
(__lwc_s); }
;
4431
4432 } else {
4433 if (!urldb_insert_cookie(c, NULL((void*)0), NULL((void*)0))) {
4434 /* Cookie freed for us */
4435 break;
4436 }
4437 }
4438 }
4439
4440#undef SKIP_T
4441#undef FIND_T
4442
4443 fclose(fp);
4444}
4445
4446
4447/* exported interface documented in content/urldb.h */
4448void urldb_save_cookies(const char *filename)
4449{
4450 FILE *fp;
4451 int cookie_file_version = max(loaded_cookie_file_version,(((loaded_cookie_file_version)>(102))?(loaded_cookie_file_version
):(102))
4452 COOKIE_FILE_VERSION)(((loaded_cookie_file_version)>(102))?(loaded_cookie_file_version
):(102))
;
4453
4454 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 4454, __extension__ __PRETTY_FUNCTION__))
;
4455
4456 fp = fopen(filename, "w");
4457 if (!fp)
4458 return;
4459
4460 fprintf(fp, "# NetSurf cookies file.\n"
4461 "#\n"
4462 "# Lines starting with a '#' are comments, "
4463 "blank lines are ignored.\n"
4464 "#\n"
4465 "# All lines prior to \"Version:\t%d\" are discarded.\n"
4466 "#\n"
4467 "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
4468 "Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
4469 "No destroy\tName\tValue\tValue was quoted\tScheme\t"
4470 "URL\tComment\n",
4471 cookie_file_version);
4472 fprintf(fp, "Version:\t%d\n", cookie_file_version);
4473
4474 urldb_save_cookie_hosts(fp, &db_root);
4475
4476 fclose(fp);
4477}
4478
4479
4480/* exported interface documented in netsurf/url_db.h */
4481void urldb_dump(void)
4482{
4483 int i;
4484
4485 urldb_dump_hosts(&db_root);
4486
4487 for (i = 0; i != NUM_SEARCH_TREES28; i++) {
4488 urldb_dump_search(search_trees[i], 0);
4489 }
4490}
4491
4492
4493
4494