Bug Summary

File:content/urldb.c
Warning:line 2917, column 9
Read function called when stream is in EOF state. Function has no effect

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name urldb.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -pic-is-pie -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fdebug-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -fcoverage-compilation-dir=/var/lib/jenkins/workspace/scan-build-netsurf -resource-dir /usr/lib/llvm-19/lib/clang/19 -isystem /usr/include/mit-krb5 -I . -I include -I build/Linux-gtk2 -I frontends -I content/handlers -D WITH_JPEG -U WITH_PDF_EXPORT -D LIBICONV_PLUG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -I /usr/include/x86_64-linux-gnu -I /usr/include/p11-kit-1 -D WITH_CURL -D WITH_OPENSSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D UTF8PROC_EXPORTS -D WITH_UTF8PROC -I /usr/include/webp -D WITH_WEBP -I /usr/include/libpng16 -D WITH_PNG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include/ -D WITH_BMP -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_GIF -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NS_SVG -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSSPRITE -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSPSL -I /var/lib/jenkins/artifacts-x86_64-linux-gnu/include -D WITH_NSLOG -D NETSURF_UA_FORMAT_STRING="Mozilla/5.0 (%s) NetSurf/%d.%d" -D NETSURF_HOMEPAGE="about:welcome" -D NETSURF_LOG_LEVEL=VERBOSE -D NETSURF_BUILTIN_LOG_FILTER="(level:WARNING || cat:jserrors)" -D NETSURF_BUILTIN_VERBOSE_FILTER="(level:VERBOSE || cat:jserrors)" -D STMTEXPR=1 -I /usr/include/librsvg-2.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/sysprof-6 -I /usr/include/libmount -I /usr/include/blkid -I /usr/include/gdk-pixbuf-2.0 -I /usr/include/libpng16 -I /usr/include/x86_64-linux-gnu -I /usr/include/webp -I /usr/include/cairo -I /usr/include/freetype2 -I /usr/include/pixman-1 -I /usr/include/harfbuzz -I /usr/include/libxml2 -I /usr/include/pango-1.0 -I /usr/include/fribidi -D WITH_RSVG -I /usr/include/gtk-2.0 -I /usr/lib/x86_64-linux-gnu/gtk-2.0/include -I /usr/include/pango-1.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/sysprof-6 -I /usr/include/harfbuzz -I /usr/include/freetype2 -I /usr/include/libpng16 -I /usr/include/libmount -I /usr/include/blkid -I /usr/include/fribidi -I /usr/include/cairo -I /usr/include/pixman-1 -I /usr/include/gdk-pixbuf-2.0 -I /usr/include/x86_64-linux-gnu -I /usr/include/webp -I /usr/include/atk-1.0 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/sysprof-6 -I /usr/include/glib-2.0 -I /usr/lib/x86_64-linux-gnu/glib-2.0/include -I /usr/include/sysprof-6 -D gtk -D nsgtk -D G_DISABLE_SINGLE_INCLUDES -D G_DISABLE_DEPRECATED -D GTK_DISABLE_SINGLE_INCLUDES -D GTK_MULTIHEAD_SAFE -D PANGO_DISABLE_DEPRECATED -D GTK_DISABLE_DEPRECATED -D _XOPEN_SOURCE=700 -D _POSIX_C_SOURCE=200809L -D _BSD_SOURCE -D _DEFAULT_SOURCE -D _NETBSD_SOURCE -D GTK_RESPATH="/var/lib/jenkins/artifacts-x86_64-linux-gnu/share/netsurf/:./frontends/gtk/res/" -D WITH_GRESOURCE -D DUK_OPT_HAVE_CUSTOM_H -internal-isystem /usr/lib/llvm-19/lib/clang/19/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wwrite-strings -Wno-unused-parameter -Wno-unused-but-set-variable -std=c99 -fconst-strings -ferror-limit 19 -fgnuc-version=4.2.1 -fskip-odr-check-in-gmf -vectorize-loops -vectorize-slp -analyzer-display-progress -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /var/lib/jenkins/workspace/scan-build-netsurf/clangScanBuildReports/2025-11-30-114731-3042003-1 -x c content/urldb.c
1/*
2 * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
3 * Copyright 2009 John Tytgat <joty@netsurf-browser.org>
4 *
5 * This file is part of NetSurf, http://www.netsurf-browser.org/
6 *
7 * NetSurf is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; version 2 of the License.
10 *
11 * NetSurf is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 */
19
20/**
21 * \file
22 * Unified URL information database implementation
23 *
24 * URLs are stored in a tree-based structure as follows:
25 *
26 * The host component is extracted from each URL and, if a FQDN, split on
27 * every '.'.The tree is constructed by inserting each FQDN segment in
28 * reverse order. Duplicate nodes are merged.
29 *
30 * If the host part of an URL is an IP address, then this is added to the
31 * tree verbatim (as if it were a TLD).
32 *
33 * This provides something looking like:
34 *
35 * root (a sentinel)
36 * |
37 * -------------------------------------------------
38 * | | | | | | |
39 * com edu gov 127.0.0.1 net org uk TLDs
40 * | | | | | |
41 * google ... ... ... ... co 2LDs
42 * | |
43 * www bbc Hosts/Subdomains
44 * |
45 * www ...
46 *
47 * Each of the nodes in this tree is a struct host_part. This stores the
48 * FQDN segment (or IP address) with which the node is concerned. Each node
49 * may contain further information about paths on a host (struct path_data)
50 * or SSL certificate processing on a host-wide basis
51 * (host_part::permit_invalid_certs).
52 *
53 * Path data is concerned with storing various metadata about the path in
54 * question. This includes global history data, HTTP authentication details
55 * and any associated HTTP cookies. This is stored as a tree of path segments
56 * hanging off the relevant host_part node.
57 *
58 * Therefore, to find the last visited time of the URL
59 * http://www.example.com/path/to/resource.html, the FQDN tree would be
60 * traversed in the order root -> "com" -> "example" -> "www". The "www"
61 * node would have attached to it a tree of struct path_data:
62 *
63 * (sentinel)
64 * |
65 * path
66 * |
67 * to
68 * |
69 * resource.html
70 *
71 * This represents the absolute path "/path/to/resource.html". The leaf node
72 * "resource.html" contains the last visited time of the resource.
73 *
74 * The mechanism described above is, however, not particularly conducive to
75 * fast searching of the database for a given URL (or URLs beginning with a
76 * given prefix). Therefore, an anciliary data structure is used to enable
77 * fast searching. This structure simply reflects the contents of the
78 * database, with entries being added/removed at the same time as for the
79 * core database. In order to ensure that degenerate cases are kept to a
80 * minimum, we use an AAtree. This is an approximation of a Red-Black tree
81 * with similar performance characteristics, but with a significantly
82 * simpler implementation. Entries in this tree comprise pointers to the
83 * leaf nodes of the host tree described above.
84 *
85 * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
86 * non-normalised URLs with urldb will result in undefined behaviour and
87 * potential crashes.
88 */
89
90#include <assert.h>
91#include <stdbool.h>
92#include <stdio.h>
93#include <stdlib.h>
94#include <string.h>
95#include <strings.h>
96#include <time.h>
97#ifdef WITH_NSPSL1
98#include <nspsl.h>
99#endif
100
101#include "utils/inet.h"
102#include "utils/nsoption.h"
103#include "utils/log.h"
104#include "utils/corestrings.h"
105#include "utils/url.h"
106#include "utils/utils.h"
107#include "utils/bloom.h"
108#include "utils/time.h"
109#include "utils/nsurl.h"
110#include "utils/ascii.h"
111#include "utils/http.h"
112#include "netsurf/bitmap.h"
113#include "desktop/cookie_manager.h"
114
115#include "content/content.h"
116#include "content/urldb.h"
117
118/**
119 * cookie entry.
120 *
121 * \warning This *must* be kept in sync with the public interface in
122 * netsurf/cookie_db.h
123 */
124struct cookie_internal_data {
125 struct cookie_internal_data *prev; /**< Previous in list */
126 struct cookie_internal_data *next; /**< Next in list */
127
128 char *name; /**< Cookie name */
129 char *value; /**< Cookie value */
130 bool_Bool value_was_quoted; /**< Value was quoted in Set-Cookie: */
131 char *comment; /**< Cookie comment */
132 bool_Bool domain_from_set; /**< Domain came from Set-Cookie: header */
133 char *domain; /**< Domain */
134 bool_Bool path_from_set; /**< Path came from Set-Cookie: header */
135 char *path; /**< Path */
136 time_t expires; /**< Expiry timestamp, or -1 for session */
137 time_t last_used; /**< Last used time */
138 bool_Bool secure; /**< Only send for HTTPS requests */
139 bool_Bool http_only; /**< Only expose to HTTP(S) requests */
140 enum cookie_version version; /**< Specification compliance */
141 bool_Bool no_destroy; /**< Never destroy this cookie,
142 * unless it's expired */
143
144};
145
146
147/**
148 * A protection space
149 *
150 * This is defined as a tuple canonical_root_url and realm. This
151 * structure lives as linked list element in a leaf host_part struct
152 * so we need additional scheme and port to have a canonical_root_url.
153 */
154struct prot_space_data {
155 /**
156 * URL scheme of canonical hostname of this protection space.
157 */
158 lwc_string *scheme;
159 /**
160 * Port number of canonical hostname of this protection
161 * space. When 0, it means the default port for given scheme,
162 * i.e. 80 (http), 443 (https).
163 */
164 unsigned int port;
165 /** Protection realm */
166 char *realm;
167
168 /**
169 * Authentication details for this protection space in form
170 * username:password
171 */
172 char *auth;
173 /** Next sibling */
174 struct prot_space_data *next;
175};
176
177
178/**
179 * meta data about a url
180 *
181 * \warning must be kept in sync with url_data structure in netsurf/url_db.h
182 */
183struct url_internal_data {
184 char *title; /**< Resource title */
185 unsigned int visits; /**< Visit count */
186 time_t last_visit; /**< Last visit time */
187 content_type type; /**< Type of resource */
188};
189
190
191/**
192 * data entry for url
193 */
194struct path_data {
195 nsurl *url; /**< Full URL */
196 lwc_string *scheme; /**< URL scheme for data */
197 unsigned int port; /**< Port number for data. When 0, it means
198 * the default port for given scheme, i.e.
199 * 80 (http), 443 (https). */
200 char *segment; /**< Path segment for this node */
201 unsigned int frag_cnt; /**< Number of entries in path_data::fragment */
202 char **fragment; /**< Array of fragments */
203 bool_Bool persistent; /**< This entry should persist */
204
205 struct url_internal_data urld; /**< URL data for resource */
206
207 /**
208 * Protection space to which this resource belongs too. Can be
209 * NULL when it does not belong to a protection space or when
210 * it is not known. No ownership (is with struct host_part::prot_space).
211 */
212 const struct prot_space_data *prot_space;
213 /** Cookies associated with resource */
214 struct cookie_internal_data *cookies;
215 /** Last cookie in list */
216 struct cookie_internal_data *cookies_end;
217
218 struct path_data *next; /**< Next sibling */
219 struct path_data *prev; /**< Previous sibling */
220 struct path_data *parent; /**< Parent path segment */
221 struct path_data *children; /**< Child path segments */
222 struct path_data *last; /**< Last child */
223};
224
225struct hsts_data {
226 time_t expires; /**< Expiry time */
227 bool_Bool include_sub_domains; /**< Whether to include subdomains */
228};
229
230struct host_part {
231 /**
232 * Known paths on this host. This _must_ be first so that
233 * struct host_part *h = (struct host_part *)mypath; works
234 */
235 struct path_data paths;
236 /**
237 * Allow access to SSL protected resources on this host
238 * without verifying certificate authenticity
239 */
240 bool_Bool permit_invalid_certs;
241 /* HSTS data */
242 struct hsts_data hsts;
243
244 /**
245 * Part of host string
246 */
247 char *part;
248
249 /**
250 * Linked list of all known proctection spaces known for this
251 * host and all its schems and ports.
252 */
253 struct prot_space_data *prot_space;
254
255 struct host_part *next; /**< Next sibling */
256 struct host_part *prev; /**< Previous sibling */
257 struct host_part *parent; /**< Parent host part */
258 struct host_part *children; /**< Child host parts */
259};
260
261
262/**
263 * search index node
264 */
265struct search_node {
266 const struct host_part *data; /**< Host tree entry */
267
268 unsigned int level; /**< Node level */
269
270 struct search_node *left; /**< Left subtree */
271 struct search_node *right; /**< Right subtree */
272};
273
274/** Root database handle */
275static struct host_part db_root;
276
277/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
278#define NUM_SEARCH_TREES28 28
279#define ST_IP0 0
280#define ST_EE1 1
281#define ST_DN2 2
282static struct search_node empty = { 0, 0, &empty, &empty };
283static struct search_node *search_trees[NUM_SEARCH_TREES28] = {
284 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
285 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
286 &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
287 &empty, &empty, &empty, &empty
288};
289
290/** Minimum cookie database file version */
291#define MIN_COOKIE_FILE_VERSION100 100
292/** Current cookie database file version */
293#define COOKIE_FILE_VERSION102 102
294/** loaded cookie file version */
295static int loaded_cookie_file_version;
296
297/** Minimum URL database file version */
298#define MIN_URL_FILE_VERSION106 106
299/** Current URL database file version */
300#define URL_FILE_VERSION107 107
301
302/**
303 * filter for url presence in database
304 *
305 * Bloom filter used for short-circuting the false case of "is this
306 * URL in the database?". BLOOM_SIZE controls how large the filter is
307 * in bytes. Primitive experimentation shows that for a filter of X
308 * bytes filled with X items, searching for X items not in the filter
309 * has a 5% false-positive rate. We set it to 32kB, which should be
310 * enough for all but the largest databases, while not being
311 * shockingly wasteful on memory.
312 */
313static struct bloom_filter *url_bloom;
314/**
315 * Size of url filter
316 */
317#define BLOOM_SIZE(1024 * 32) (1024 * 32)
318
319
320/**
321 * write a time_t to a file portably
322 *
323 * \param fp File to write to
324 * \param val the unix time value to output
325 * \return NSERROR_OK on success
326 */
327static nserror urldb_write_timet(FILE *fp, time_t val)
328{
329 int use;
330 char op[32];
331
332 use = nsc_sntimet(op, 32, &val);
333 if (use == 0) {
334 fprintf(fp, "%i\n", (int)val);
335 } else {
336 fprintf(fp, "%.*s\n", use, op);
337 }
338 return NSERROR_OK;
339}
340
341/**
342 * Write paths associated with a host
343 *
344 * \param parent Root of (sub)tree to write
345 * \param host Current host name
346 * \param fp File to write to
347 * \param path Current path string
348 * \param path_alloc Allocated size of path
349 * \param path_used Used size of path
350 * \param expiry Expiry time of URLs
351 */
352static void
353urldb_write_paths(const struct path_data *parent,
354 const char *host,
355 FILE *fp,
356 char **path,
357 int *path_alloc,
358 int *path_used,
359 time_t expiry)
360{
361 const struct path_data *p = parent;
362 int i;
363
364 do {
365 int seglen = p->segment != NULL((void*)0) ? strlen(p->segment) : 0;
366 int len = *path_used + seglen + 1;
367
368 if (*path_alloc < len) {
369 char *temp;
370 temp = realloc(*path,
371 (len > 64) ? len : *path_alloc + 64);
372 if (!temp) {
373 return;
374 }
375 *path = temp;
376 *path_alloc = (len > 64) ? len : *path_alloc + 64;
377 }
378
379 if (p->segment != NULL((void*)0)) {
380 memcpy(*path + *path_used - 1, p->segment, seglen);
381 }
382
383 if (p->children != NULL((void*)0)) {
384 (*path)[*path_used + seglen - 1] = '/';
385 (*path)[*path_used + seglen] = '\0';
386 } else {
387 (*path)[*path_used + seglen - 1] = '\0';
388 len -= 1;
389 }
390
391 *path_used = len;
392
393 if (p->children != NULL((void*)0)) {
394 /* Drill down into children */
395 p = p->children;
396 } else {
397 /* leaf node */
398 if (p->persistent ||
399 ((p->urld.last_visit > expiry) &&
400 (p->urld.visits > 0))) {
401 fprintf(fp, "%s\n", lwc_string_data(p->scheme)({((p->scheme != ((void*)0)) ? (void) (0) : __assert_fail (
"p->scheme != NULL", "content/urldb.c", 401, __extension__
__PRETTY_FUNCTION__)); (const char *)((p->scheme)+1);})
);
402
403 if (p->port) {
404 fprintf(fp,"%d\n", p->port);
405 } else {
406 fprintf(fp, "\n");
407 }
408
409 fprintf(fp, "%s\n", *path);
410
411 /** \todo handle fragments? */
412
413 /* number of visits */
414 fprintf(fp, "%i\n", p->urld.visits);
415
416 /* time entry was last used */
417 urldb_write_timet(fp, p->urld.last_visit);
418
419 /* entry type */
420 fprintf(fp, "%i\n", (int)p->urld.type);
421
422 fprintf(fp, "\n");
423
424 if (p->urld.title) {
425 uint8_t *s = (uint8_t *) p->urld.title;
426
427 for (i = 0; s[i] != '\0'; i++)
428 if (s[i] < 32)
429 s[i] = ' ';
430 for (--i; ((i > 0) && (s[i] == ' '));
431 i--)
432 s[i] = '\0';
433 fprintf(fp, "%s\n", p->urld.title);
434 } else {
435 fprintf(fp, "\n");
436 }
437 }
438
439 /* Now, find next node to process. */
440 while (p != parent) {
441 int seglen = p->segment != NULL((void*)0)
442 ? strlen(p->segment) : 0;
443
444 /* Remove our segment from the path */
445 *path_used -= seglen;
446 (*path)[*path_used - 1] = '\0';
447
448 if (p->next != NULL((void*)0)) {
449 /* Have a sibling, process that */
450 p = p->next;
451 break;
452 }
453
454 /* Going up, so remove '/' */
455 *path_used -= 1;
456 (*path)[*path_used - 1] = '\0';
457
458 /* Ascend tree */
459 p = p->parent;
460 }
461 }
462 } while (p != parent);
463}
464
465
466/**
467 * Count number of URLs associated with a host
468 *
469 * \param root Root of path data tree
470 * \param expiry Expiry time for URLs
471 * \param count Pointer to count
472 */
473static void
474urldb_count_urls(const struct path_data *root,
475 time_t expiry,
476 unsigned int *count)
477{
478 const struct path_data *p = root;
479
480 do {
481 if (p->children != NULL((void*)0)) {
482 /* Drill down into children */
483 p = p->children;
484 } else {
485 /* No more children, increment count if required */
486 if (p->persistent ||
487 ((p->urld.last_visit > expiry) &&
488 (p->urld.visits > 0))) {
489 (*count)++;
490 }
491
492 /* Now, find next node to process. */
493 while (p != root) {
494 if (p->next != NULL((void*)0)) {
495 /* Have a sibling, process that */
496 p = p->next;
497 break;
498 }
499
500 /* Ascend tree */
501 p = p->parent;
502 }
503 }
504 } while (p != root);
505}
506
507
508/**
509 * Save a search (sub)tree
510 *
511 * \param parent root node of search tree to save.
512 * \param fp File to write to
513 */
514static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
515{
516 char host[256];
517 const struct host_part *h;
518 unsigned int path_count = 0;
519 char *path, *p, *end;
520 int path_alloc = 64, path_used = 1;
521 time_t expiry, hsts_expiry = 0;
522 int hsts_include_subdomains = 0;
523
524 expiry = time(NULL((void*)0)) - ((60 * 60 * 24) * nsoption_int(expire_url)(nsoptions[NSOPTION_expire_url].value.i));
525
526 if (parent == &empty)
527 return;
528
529 urldb_save_search_tree(parent->left, fp);
530
531 path = malloc(path_alloc);
532 if (!path)
533 return;
534
535 path[0] = '\0';
536
537 for (h = parent->data, p = host, end = host + sizeof host;
538 h && h != &db_root && p < end; h = h->parent) {
539 int written = snprintf(p, end - p, "%s%s", h->part,
540 (h->parent && h->parent->parent) ? "." : "");
541 if (written < 0) {
542 free(path);
543 return;
544 }
545 p += written;
546 }
547
548 h = parent->data;
549 if (h && h->hsts.expires > expiry) {
550 hsts_expiry = h->hsts.expires;
551 hsts_include_subdomains = h->hsts.include_sub_domains;
552 }
553
554 urldb_count_urls(&parent->data->paths, expiry, &path_count);
555
556 if (path_count > 0) {
557 fprintf(fp, "%s %i ", host, hsts_include_subdomains);
558 urldb_write_timet(fp, hsts_expiry);
559 fprintf(fp, "%i\n", path_count);
560
561 urldb_write_paths(&parent->data->paths, host, fp,
562 &path, &path_alloc, &path_used, expiry);
563 } else if (hsts_expiry) {
564 fprintf(fp, "%s %i ", host, hsts_include_subdomains);
565 urldb_write_timet(fp, hsts_expiry);
566 fprintf(fp, "0\n");
567 }
568
569 free(path);
570
571 urldb_save_search_tree(parent->right, fp);
572}
573
574
575/**
576 * Path data iterator (internal)
577 *
578 * \param parent Root of subtree to iterate over
579 * \param url_callback Callback function
580 * \param cookie_callback Callback function
581 * \return true to continue, false otherwise
582 */
583static bool_Bool
584urldb_iterate_entries_path(const struct path_data *parent,
585 bool_Bool (*url_callback)(nsurl *url, const struct url_data *data),
586 bool_Bool (*cookie_callback)(const struct cookie_data *data))
587{
588 const struct path_data *p = parent;
589 const struct cookie_data *c;
590
591 do {
592 if (p->children != NULL((void*)0)) {
593 /* Drill down into children */
594 p = p->children;
595 } else {
596 /* All leaf nodes in the path tree should have an URL or
597 * cookies attached to them. If this is not the case, it
598 * indicates that there's a bug in the file loader/URL
599 * insertion code. Therefore, assert this here. */
600 assert(url_callback || cookie_callback)((url_callback || cookie_callback) ? (void) (0) : __assert_fail
("url_callback || cookie_callback", "content/urldb.c", 600, __extension__
__PRETTY_FUNCTION__))
;
601
602 /** \todo handle fragments? */
603 if (url_callback) {
604 const struct url_internal_data *u = &p->urld;
605
606 assert(p->url)((p->url) ? (void) (0) : __assert_fail ("p->url", "content/urldb.c"
, 606, __extension__ __PRETTY_FUNCTION__))
;
607
608 if (!url_callback(p->url,
609 (const struct url_data *) u))
610 return false0;
611 } else {
612 c = (const struct cookie_data *)p->cookies;
613 for (; c != NULL((void*)0); c = c->next) {
614 if (!cookie_callback(c))
615 return false0;
616 }
617 }
618
619 /* Now, find next node to process. */
620 while (p != parent) {
621 if (p->next != NULL((void*)0)) {
622 /* Have a sibling, process that */
623 p = p->next;
624 break;
625 }
626
627 /* Ascend tree */
628 p = p->parent;
629 }
630 }
631 } while (p != parent);
632
633 return true1;
634}
635
636
637/**
638 * Check whether a host string is an IP address.
639 *
640 * This call detects IPv4 addresses (all of dotted-quad or subsets,
641 * decimal or hexadecimal notations) and IPv6 addresses (including
642 * those containing embedded IPv4 addresses.)
643 *
644 * \param host a hostname terminated by '\0'
645 * \return true if the hostname is an IP address, false otherwise
646 */
647static bool_Bool urldb__host_is_ip_address(const char *host)
648{
649 struct in_addr ipv4;
650 size_t host_len = strlen(host);
651 const char *sane_host;
652 const char *slash;
653#ifndef NO_IPV6
654 struct in6_addr ipv6;
655 char ipv6_addr[64];
656 unsigned int ipv6_addr_len;
657#endif
658 /**
659 * @todo FIXME Some parts of urldb.c make confusions between hosts
660 * and "prefixes", we can sometimes be erroneously passed more than
661 * just a host. Sometimes we may be passed trailing slashes, or even
662 * whole path segments. A specific criminal in this class is
663 * urldb_iterate_partial, which takes a prefix to search for, but
664 * passes that prefix to functions that expect only hosts.
665 *
666 * For the time being, we will accept such calls; we check if there
667 * is a / in the host parameter, and if there is, we take a copy and
668 * replace the / with a \0. This is not a permanent solution; we
669 * should search through NetSurf and find all the callers that are
670 * in error and fix them. When doing this task, it might be wise
671 * to replace the hideousness below with code that doesn't have to do
672 * this, and add assert(strchr(host, '/') == NULL); somewhere.
673 * -- rjek - 2010-11-04
674 */
675
676 slash = strchr(host, '/');
677 if (slash == NULL((void*)0)) {
678 sane_host = host;
679 } else {
680 char *c = strdup(host);
681 c[slash - host] = '\0';
682 sane_host = c;
683 host_len = slash - host;
684 NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 685
, }; nslog__log(&_nslog_ctx, "WARNING: called with non-host '%s'"
, host); } } while(0)
685 host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 685
, }; nslog__log(&_nslog_ctx, "WARNING: called with non-host '%s'"
, host); } } while(0)
;
686 }
687
688 if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
689 goto out_false;
690
691 if (inet_aton(sane_host, &ipv4) != 0) {
692 /* This can only be a sane IPv4 address if it contains 3 dots.
693 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
694 * and "a.b.c.d" as valid IPv4 address strings where we only
695 * support the full, dotted-quad, form.
696 */
697 int num_dots = 0;
698 size_t index;
699
700 for (index = 0; index < host_len; index++) {
701 if (sane_host[index] == '.')
702 num_dots++;
703 }
704
705 if (num_dots == 3)
706 goto out_true;
707 else
708 goto out_false;
709 }
710
711#ifndef NO_IPV6
712 if ((host_len < 6) ||
713 (sane_host[0] != '[') ||
714 (sane_host[host_len - 1] != ']')) {
715 goto out_false;
716 }
717
718 ipv6_addr_len = host_len - 2;
719 if (ipv6_addr_len >= sizeof(ipv6_addr)) {
720 ipv6_addr_len = sizeof(ipv6_addr) - 1;
721 }
722 strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);
723 ipv6_addr[ipv6_addr_len] = '\0';
724
725 if (inet_pton(AF_INET610, ipv6_addr, &ipv6) == 1)
726 goto out_true;
727#endif
728
729out_false:
730 if (slash != NULL((void*)0)) free((void *)sane_host);
731 return false0;
732
733out_true:
734 if (slash != NULL((void*)0)) free((void *)sane_host);
735 return true1;
736}
737
738
739/**
740 * Compare host_part with prefix
741 *
742 * \param a host part
743 * \param b prefix
744 * \return 0 if match, non-zero, otherwise
745 */
746static int urldb_search_match_prefix(const struct host_part *a, const char *b)
747{
748 const char *end, *dot;
749 int plen, ret;
750
751 assert(a && a != &db_root && b)((a && a != &db_root && b) ? (void) (0) :
__assert_fail ("a && a != &db_root && b"
, "content/urldb.c", 751, __extension__ __PRETTY_FUNCTION__))
;
752
753 if (urldb__host_is_ip_address(b)) {
754 /* IP address */
755 return strncasecmp(a->part, b, strlen(b));
756 }
757
758 end = b + strlen(b) + 1;
759
760 while (b < end && a && a != &db_root) {
761 dot = strchr(b, '.');
762 if (!dot) {
763 /* last segment */
764 dot = end - 1;
765 }
766
767 /* Compare strings (length limited) */
768 if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
769 /* didn't match => return difference */
770 return ret;
771
772 /* The strings matched */
773 if (dot < end - 1) {
774 /* Consider segment lengths only in the case
775 * where the prefix contains segments */
776 plen = strlen(a->part);
777 if (plen > dot - b) {
778 /* len(a) > len(b) */
779 return 1;
780 } else if (plen < dot - b) {
781 /* len(a) < len(b) */
782 return -1;
783 }
784 }
785
786 b = dot + 1;
787 a = a->parent;
788 }
789
790 /* If we get here then either:
791 * a) The path lengths differ
792 * or b) The hosts are identical
793 */
794 if (a && a != &db_root && b >= end) {
795 /* len(a) > len(b) => prefix matches */
796 return 0;
797 } else if ((!a || a == &db_root) && b < end) {
798 /* len(a) < len(b) => prefix does not match */
799 return -1;
800 }
801
802 /* Identical */
803 return 0;
804}
805
806
807/**
808 * Partial host iterator (internal)
809 *
810 * \param root Root of (sub)tree to traverse
811 * \param prefix Prefix to match
812 * \param callback Callback function
813 * \return true to continue, false otherwise
814 */
815static bool_Bool
816urldb_iterate_partial_host(struct search_node *root,
817 const char *prefix,
818 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
819{
820 int c;
821
822 assert(root && prefix && callback)((root && prefix && callback) ? (void) (0) : __assert_fail
("root && prefix && callback", "content/urldb.c"
, 822, __extension__ __PRETTY_FUNCTION__))
;
823
824 if (root == &empty)
825 return true1;
826
827 c = urldb_search_match_prefix(root->data, prefix);
828
829 if (c > 0) {
830 /* No match => look in left subtree */
831 return urldb_iterate_partial_host(root->left,
832 prefix,
833 callback);
834 } else if (c < 0) {
835 /* No match => look in right subtree */
836 return urldb_iterate_partial_host(root->right,
837 prefix,
838 callback);
839 } else {
840 /* Match => iterate over l/r subtrees & process this node */
841 if (!urldb_iterate_partial_host(root->left,
842 prefix,
843 callback)) {
844 return false0;
845 }
846
847 if (root->data->paths.children) {
848 /* and extract all paths attached to this host */
849 if (!urldb_iterate_entries_path(&root->data->paths,
850 callback,
851 NULL((void*)0))) {
852 return false0;
853 }
854 }
855
856 if (!urldb_iterate_partial_host(root->right,
857 prefix,
858 callback)) {
859 return false0;
860 }
861 }
862
863 return true1;
864}
865
866
867/**
868 * Partial path iterator (internal)
869 *
870 * Given: http://www.example.org/a/b/c/d//e
871 * and assuming a path tree:
872 * ^
873 * / \
874 * a1 b1
875 * / \
876 * a2 b2
877 * /|\
878 * a b c
879 * 3 3 |
880 * d
881 * |
882 * e
883 * / \
884 * f g
885 *
886 * Prefix will be: p will be:
887 *
888 * a/b/c/d//e a1
889 * b/c/d//e a2
890 * b/c/d//e b3
891 * c/d//e a3
892 * c/d//e b3
893 * c/d//e c
894 * d//e d
895 * /e e (skip /)
896 * e e
897 *
898 * I.E. perform a breadth-first search of the tree.
899 *
900 * \param parent Root of (sub)tree to traverse
901 * \param prefix Prefix to match
902 * \param callback Callback function
903 * \return true to continue, false otherwise
904 */
905static bool_Bool
906urldb_iterate_partial_path(const struct path_data *parent,
907 const char *prefix,
908 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
909{
910 const struct path_data *p = parent->children;
911 const char *slash, *end = prefix + strlen(prefix);
912
913 do {
914 slash = strchr(prefix, '/');
915 if (!slash) {
916 slash = end;
917 }
918
919 if (slash == prefix && *prefix == '/') {
920 /* Ignore "//" */
921 prefix++;
922 continue;
923 }
924
925 if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
926 /* prefix matches so far */
927 if (slash == end) {
928 /* we've run out of prefix, so all
929 * paths below this one match */
930 if (!urldb_iterate_entries_path(p,
931 callback,
932 NULL((void*)0))) {
933 return false0;
934 }
935
936 /* Progress to next sibling */
937 p = p->next;
938 } else {
939 /* Skip over this segment */
940 prefix = slash + 1;
941
942 p = p->children;
943 }
944 } else {
945 /* Doesn't match this segment, try next sibling */
946 p = p->next;
947 }
948 } while (p != NULL((void*)0));
949
950 return true1;
951}
952
953
954/**
955 * Host data iterator (internal)
956 *
957 * \param parent Root of subtree to iterate over
958 * \param url_callback Callback function
959 * \param cookie_callback Callback function
960 * \return true to continue, false otherwise
961 */
962static bool_Bool
963urldb_iterate_entries_host(struct search_node *parent,
964 bool_Bool (*url_callback)(nsurl *url, const struct url_data *data),
965 bool_Bool (*cookie_callback)(const struct cookie_data *data))
966{
967 if (parent == &empty) {
968 return true1;
969 }
970
971 if (!urldb_iterate_entries_host(parent->left,
972 url_callback,
973 cookie_callback)) {
974 return false0;
975 }
976
977 if ((parent->data->paths.children) ||
978 ((cookie_callback) &&
979 (parent->data->paths.cookies))) {
980 /* We have paths (or domain cookies), so iterate them */
981 if (!urldb_iterate_entries_path(&parent->data->paths,
982 url_callback,
983 cookie_callback)) {
984 return false0;
985 }
986 }
987
988 if (!urldb_iterate_entries_host(parent->right,
989 url_callback,
990 cookie_callback)) {
991 return false0;
992 }
993
994 return true1;
995}
996
997
998/**
999 * Add a host node to the tree
1000 *
1001 * \param part Host segment to add (or whole IP address) (copied)
1002 * \param parent Parent node to add to
1003 * \return Pointer to added node, or NULL on memory exhaustion
1004 */
1005static struct host_part *
1006urldb_add_host_node(const char *part, struct host_part *parent)
1007{
1008 struct host_part *d;
1009
1010 assert(part && parent)((part && parent) ? (void) (0) : __assert_fail ("part && parent"
, "content/urldb.c", 1010, __extension__ __PRETTY_FUNCTION__)
)
;
1011
1012 d = calloc(1, sizeof(struct host_part));
1013 if (!d) {
1014 return NULL((void*)0);
1015 }
1016
1017 d->part = strdup(part);
1018 if (!d->part) {
1019 free(d);
1020 return NULL((void*)0);
1021 }
1022
1023 d->next = parent->children;
1024 if (parent->children) {
1025 parent->children->prev = d;
1026 }
1027 d->parent = parent;
1028 parent->children = d;
1029
1030 return d;
1031}
1032
1033
1034/**
1035 * Fragment comparator callback for qsort
1036 *
1037 * \param a first value
1038 * \param b second value
1039 * \return 0 for equal else positive or negative value on comparison
1040 */
1041static int urldb_add_path_fragment_cmp(const void *a, const void *b)
1042{
1043 return strcasecmp(*((const char **) a), *((const char **) b));
1044}
1045
1046
1047/**
1048 * Add a fragment to a path segment
1049 *
1050 * \param segment Path segment to add to
1051 * \param fragment Fragment to add (copied), or NULL
1052 * \return segment or NULL on memory exhaustion
1053 */
1054static struct path_data *
1055urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
1056{
1057 char **temp;
1058
1059 assert(segment)((segment) ? (void) (0) : __assert_fail ("segment", "content/urldb.c"
, 1059, __extension__ __PRETTY_FUNCTION__))
;
1060
1061 /* If no fragment, this function is a NOP
1062 * This may seem strange, but it makes the rest
1063 * of the code cleaner */
1064 if (!fragment)
1065 return segment;
1066
1067 temp = realloc(segment->fragment,
1068 (segment->frag_cnt + 1) * sizeof(char *));
1069 if (!temp)
1070 return NULL((void*)0);
1071
1072 segment->fragment = temp;
1073 segment->fragment[segment->frag_cnt] =
1074 strdup(lwc_string_data(fragment)({((fragment != ((void*)0)) ? (void) (0) : __assert_fail ("fragment != NULL"
, "content/urldb.c", 1074, __extension__ __PRETTY_FUNCTION__)
); (const char *)((fragment)+1);})
);
1075 if (!segment->fragment[segment->frag_cnt]) {
1076 /* Don't free temp - it's now our buffer */
1077 return NULL((void*)0);
1078 }
1079
1080 segment->frag_cnt++;
1081
1082 /* We want fragments in alphabetical order, so sort them
1083 * It may prove better to insert in alphabetical order instead */
1084 qsort(segment->fragment,
1085 segment->frag_cnt,
1086 sizeof (char *),
1087 urldb_add_path_fragment_cmp);
1088
1089 return segment;
1090}
1091
1092
1093/**
1094 * Add a path node to the tree
1095 *
1096 * \param scheme URL scheme associated with path (copied)
1097 * \param port Port number on host associated with path
1098 * \param segment Path segment to add (copied)
1099 * \param fragment URL fragment (copied), or NULL
1100 * \param parent Parent node to add to
1101 * \return Pointer to added node, or NULL on memory exhaustion
1102 */
1103static struct path_data *
1104urldb_add_path_node(lwc_string *scheme,
1105 unsigned int port,
1106 const char *segment,
1107 lwc_string *fragment,
1108 struct path_data *parent)
1109{
1110 struct path_data *d, *e;
1111
1112 assert(scheme && segment && parent)((scheme && segment && parent) ? (void) (0) :
__assert_fail ("scheme && segment && parent"
, "content/urldb.c", 1112, __extension__ __PRETTY_FUNCTION__)
)
;
1113
1114 d = calloc(1, sizeof(struct path_data));
1115 if (!d)
1116 return NULL((void*)0);
1117
1118 d->scheme = lwc_string_ref(scheme)({lwc_string *__lwc_s = (scheme); ((__lwc_s != ((void*)0)) ? (
void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 1118, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
++; __lwc_s;})
;
1119
1120 d->port = port;
1121
1122 d->segment = strdup(segment);
1123 if (!d->segment) {
1124 lwc_string_unref(d->scheme){ lwc_string *__lwc_s = (d->scheme); if (__lwc_s != ((void
*)0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) ||
((__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
1125 free(d);
1126 return NULL((void*)0);
1127 }
1128
1129 if (fragment) {
1130 if (!urldb_add_path_fragment(d, fragment)) {
1131 free(d->segment);
1132 lwc_string_unref(d->scheme){ lwc_string *__lwc_s = (d->scheme); if (__lwc_s != ((void
*)0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) ||
((__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
1133 free(d);
1134 return NULL((void*)0);
1135 }
1136 }
1137
1138 for (e = parent->children; e; e = e->next) {
1139 if (strcmp(e->segment, d->segment) > 0)
1140 break;
1141 }
1142
1143 if (e) {
1144 d->prev = e->prev;
1145 d->next = e;
1146 if (e->prev)
1147 e->prev->next = d;
1148 else
1149 parent->children = d;
1150 e->prev = d;
1151 } else if (!parent->children) {
1152 d->prev = d->next = NULL((void*)0);
1153 parent->children = parent->last = d;
1154 } else {
1155 d->next = NULL((void*)0);
1156 d->prev = parent->last;
1157 parent->last->next = d;
1158 parent->last = d;
1159 }
1160 d->parent = parent;
1161
1162 return d;
1163}
1164
1165
1166/**
1167 * Get the search tree for a particular host
1168 *
1169 * \param host the host to lookup
1170 * \return the corresponding search tree
1171 */
1172static struct search_node **urldb_get_search_tree_direct(const char *host)
1173{
1174 assert(host)((host) ? (void) (0) : __assert_fail ("host", "content/urldb.c"
, 1174, __extension__ __PRETTY_FUNCTION__))
;
1175
1176 if (urldb__host_is_ip_address(host)) {
1177 return &search_trees[ST_IP0];
1178 } else if (ascii_is_alpha(*host)) {
1179 return &search_trees[ST_DN2 + ascii_to_lower(*host) - 'a'];
1180 }
1181 return &search_trees[ST_EE1];
1182}
1183
1184
1185/**
1186 * Get the search tree for a particular host
1187 *
1188 * \param host the host to lookup
1189 * \return the corresponding search tree
1190 */
1191static struct search_node *urldb_get_search_tree(const char *host)
1192{
1193 return *urldb_get_search_tree_direct(host);
1194}
1195
1196
1197/**
1198 * Compare host part with a string
1199 *
1200 * \param a host part
1201 * \param b string to compare
1202 * \return 0 if match, non-zero, otherwise
1203 */
1204static int urldb_search_match_string(const struct host_part *a, const char *b)
1205{
1206 const char *end, *dot;
1207 int plen, ret;
1208
1209 assert(a && a != &db_root && b)((a && a != &db_root && b) ? (void) (0) :
__assert_fail ("a && a != &db_root && b"
, "content/urldb.c", 1209, __extension__ __PRETTY_FUNCTION__)
)
;
1210
1211 if (urldb__host_is_ip_address(b)) {
1212 /* IP address */
1213 return strcasecmp(a->part, b);
1214 }
1215
1216 end = b + strlen(b) + 1;
1217
1218 while (b < end && a && a != &db_root) {
1219 dot = strchr(b, '.');
1220 if (!dot) {
1221 /* last segment */
1222 dot = end - 1;
1223 }
1224
1225 /* Compare strings (length limited) */
1226 if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
1227 /* didn't match => return difference */
1228 return ret;
1229
1230 /* The strings matched, now check that the lengths do, too */
1231 plen = strlen(a->part);
1232
1233 if (plen > dot - b) {
1234 /* len(a) > len(b) */
1235 return 1;
1236 } else if (plen < dot - b) {
1237 /* len(a) < len(b) */
1238 return -1;
1239 }
1240
1241 b = dot + 1;
1242 a = a->parent;
1243 }
1244
1245 /* If we get here then either:
1246 * a) The path lengths differ
1247 * or b) The hosts are identical
1248 */
1249 if (a && a != &db_root && b >= end) {
1250 /* len(a) > len(b) */
1251 return 1;
1252 } else if ((!a || a == &db_root) && b < end) {
1253 /* len(a) < len(b) */
1254 return -1;
1255 }
1256
1257 /* Identical */
1258 return 0;
1259}
1260
1261
1262/**
1263 * Find a node in a search tree
1264 *
1265 * \param root Tree to look in
1266 * \param host Host to find
1267 * \return Pointer to host tree node, or NULL if not found
1268 */
1269static const struct host_part *
1270urldb_search_find(struct search_node *root, const char *host)
1271{
1272 int c;
1273
1274 assert(root && host)((root && host) ? (void) (0) : __assert_fail ("root && host"
, "content/urldb.c", 1274, __extension__ __PRETTY_FUNCTION__)
)
;
1275
1276 if (root == &empty) {
1277 return NULL((void*)0);
1278 }
1279
1280 c = urldb_search_match_string(root->data, host);
1281
1282 if (c > 0) {
1283 return urldb_search_find(root->left, host);
1284 } else if (c < 0) {
1285 return urldb_search_find(root->right, host);
1286 }
1287
1288 return root->data;
1289}
1290
1291
1292/**
1293 * Match a path string
1294 *
1295 * \param parent Path (sub)tree to look in
1296 * \param path The path to search for
1297 * \param scheme The URL scheme associated with the path
1298 * \param port The port associated with the path
1299 * \return Pointer to path data or NULL if not found.
1300 */
1301static struct path_data *
1302urldb_match_path(const struct path_data *parent,
1303 const char *path,
1304 lwc_string *scheme,
1305 unsigned short port)
1306{
1307 const struct path_data *p;
1308 const char *slash;
1309 bool_Bool match;
1310
1311 assert(parent != NULL)((parent != ((void*)0)) ? (void) (0) : __assert_fail ("parent != NULL"
, "content/urldb.c", 1311, __extension__ __PRETTY_FUNCTION__)
)
;
1312 assert(parent->segment == NULL)((parent->segment == ((void*)0)) ? (void) (0) : __assert_fail
("parent->segment == NULL", "content/urldb.c", 1312, __extension__
__PRETTY_FUNCTION__))
;
1313
1314 if (path[0] != '/') {
1315 NSLOG(netsurf, INFO, "path is %s", path)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1315
, }; nslog__log(&_nslog_ctx, "path is %s", path); } } while
(0)
;
1316 }
1317
1318 assert(path[0] == '/')((path[0] == '/') ? (void) (0) : __assert_fail ("path[0] == '/'"
, "content/urldb.c", 1318, __extension__ __PRETTY_FUNCTION__)
)
;
1319
1320 /* Start with children, as parent has no segment */
1321 p = parent->children;
1322
1323 while (p != NULL((void*)0)) {
1324 slash = strchr(path + 1, '/');
1325 if (!slash) {
1326 slash = path + strlen(path);
1327 }
1328
1329 if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
1330 lwc_string_isequal(p->scheme, scheme, &match)((*(&match) = ((p->scheme) == (scheme))), lwc_error_ok
)
== lwc_error_ok &&
1331 match == true1 &&
1332 p->port == port) {
1333 if (*slash == '\0') {
1334 /* Complete match */
1335 return (struct path_data *) p;
1336 }
1337
1338 /* Match so far, go down tree */
1339 p = p->children;
1340
1341 path = slash;
1342 } else {
1343 /* No match, try next sibling */
1344 p = p->next;
1345 }
1346 }
1347
1348 return NULL((void*)0);
1349}
1350
1351
1352/**
1353 * Find an URL in the database
1354 *
1355 * \param url Absolute URL to find
1356 * \return Pointer to path data, or NULL if not found
1357 */
1358static struct path_data *urldb_find_url(nsurl *url)
1359{
1360 const struct host_part *h;
1361 struct path_data *p;
1362 struct search_node *tree;
1363 char *plq;
1364 const char *host_str;
1365 lwc_string *scheme, *host, *port;
1366 size_t len = 0;
1367 unsigned int port_int;
1368 bool_Bool match;
1369
1370 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 1370, __extension__ __PRETTY_FUNCTION__))
;
1371
1372 if (url_bloom != NULL((void*)0)) {
1373 if (bloom_search_hash(url_bloom, nsurl_hash(url)) == false0) {
1374 return NULL((void*)0);
1375 }
1376 }
1377
1378 scheme = nsurl_get_component(url, NSURL_SCHEME);
1379 if (scheme == NULL((void*)0))
1380 return NULL((void*)0);
1381
1382 if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match)((*(&match) = ((scheme) == (corestring_lwc_mailto))), lwc_error_ok
)
==
1383 lwc_error_ok && match == true1) {
1384 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
1385 return NULL((void*)0);
1386 }
1387
1388 host = nsurl_get_component(url, NSURL_HOST);
1389 if (host != NULL((void*)0)) {
1390 host_str = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 1390, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
1391 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
1392
1393 } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match)((*(&match) = ((scheme) == (corestring_lwc_file))), lwc_error_ok
)
==
1394 lwc_error_ok && match == true1) {
1395 host_str = "localhost";
1396
1397 } else {
1398 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
1399 return NULL((void*)0);
1400 }
1401
1402 tree = urldb_get_search_tree(host_str);
1403 h = urldb_search_find(tree, host_str);
1404 if (!h) {
1405 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
1406 return NULL((void*)0);
1407 }
1408
1409 /* generate plq (path, leaf, query) */
1410 if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {
1411 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
1412 return NULL((void*)0);
1413 }
1414
1415 /* Get port */
1416 port = nsurl_get_component(url, NSURL_PORT);
1417 if (port != NULL((void*)0)) {
1418 port_int = atoi(lwc_string_data(port)({((port != ((void*)0)) ? (void) (0) : __assert_fail ("port != NULL"
, "content/urldb.c", 1418, __extension__ __PRETTY_FUNCTION__)
); (const char *)((port)+1);})
);
1419 lwc_string_unref(port){ lwc_string *__lwc_s = (port); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
1420 } else {
1421 port_int = 0;
1422 }
1423
1424 p = urldb_match_path(&h->paths, plq, scheme, port_int);
1425
1426 free(plq);
1427 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
1428
1429 return p;
1430}
1431
1432
1433/**
1434 * Dump URL database paths to stderr
1435 *
1436 * \param parent Parent node of tree to dump
1437 */
1438static void urldb_dump_paths(struct path_data *parent)
1439{
1440 const struct path_data *p = parent;
1441 unsigned int i;
1442
1443 do {
1444 if (p->segment != NULL((void*)0)) {
1445 NSLOG(netsurf, INFO, "\t%s : %u",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1446
, }; nslog__log(&_nslog_ctx, "\t%s : %u", ({((p->scheme
!= ((void*)0)) ? (void) (0) : __assert_fail ("p->scheme != NULL"
, "content/urldb.c", 1446, __extension__ __PRETTY_FUNCTION__)
); (const char *)((p->scheme)+1);}), p->port); } } while
(0)
1446 lwc_string_data(p->scheme), p->port)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1446
, }; nslog__log(&_nslog_ctx, "\t%s : %u", ({((p->scheme
!= ((void*)0)) ? (void) (0) : __assert_fail ("p->scheme != NULL"
, "content/urldb.c", 1446, __extension__ __PRETTY_FUNCTION__)
); (const char *)((p->scheme)+1);}), p->port); } } while
(0)
;
1447
1448 NSLOG(netsurf, INFO, "\t\t'%s'", p->segment)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1448
, }; nslog__log(&_nslog_ctx, "\t\t'%s'", p->segment); }
} while(0)
;
1449
1450 for (i = 0; i != p->frag_cnt; i++) {
1451 NSLOG(netsurf, INFO, "\t\t\t#%s",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1452
, }; nslog__log(&_nslog_ctx, "\t\t\t#%s", p->fragment[
i]); } } while(0)
1452 p->fragment[i])do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1452
, }; nslog__log(&_nslog_ctx, "\t\t\t#%s", p->fragment[
i]); } } while(0)
;
1453 }
1454 }
1455
1456 if (p->children != NULL((void*)0)) {
1457 p = p->children;
1458 } else {
1459 while (p != parent) {
1460 if (p->next != NULL((void*)0)) {
1461 p = p->next;
1462 break;
1463 }
1464
1465 p = p->parent;
1466 }
1467 }
1468 } while (p != parent);
1469}
1470
1471
1472/**
1473 * Dump URL database hosts to stderr
1474 *
1475 * \param parent Parent node of tree to dump
1476 */
1477static void urldb_dump_hosts(struct host_part *parent)
1478{
1479 struct host_part *h;
1480
1481 if (parent->part) {
1482 NSLOG(netsurf, INFO, "%s", parent->part)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1482
, }; nslog__log(&_nslog_ctx, "%s", parent->part); } } while
(0)
;
1483
1484 NSLOG(netsurf, INFO, "\t%s invalid SSL certs",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1485
, }; nslog__log(&_nslog_ctx, "\t%s invalid SSL certs", parent
->permit_invalid_certs ? "Permits" : "Denies"); } } while(
0)
1485 parent->permit_invalid_certs ? "Permits" : "Denies")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1485
, }; nslog__log(&_nslog_ctx, "\t%s invalid SSL certs", parent
->permit_invalid_certs ? "Permits" : "Denies"); } } while(
0)
;
1486 }
1487
1488 /* Dump path data */
1489 urldb_dump_paths(&parent->paths);
1490
1491 /* and recurse */
1492 for (h = parent->children; h; h = h->next) {
1493 urldb_dump_hosts(h);
1494 }
1495}
1496
1497
1498/**
1499 * Dump search tree
1500 *
1501 * \param parent Parent node of tree to dump
1502 * \param depth Tree depth
1503 */
1504static void urldb_dump_search(struct search_node *parent, int depth)
1505{
1506 const struct host_part *h;
1507 int i; /* index into string */
1508 char s[1024];
1509 int r;
1510 int sl = sizeof(s) - 2;
1511
1512 if (parent == &empty)
1513 return;
1514
1515 urldb_dump_search(parent->left, depth + 1);
1516
1517 for (i = 0; i != depth; i++) {
1518 s[i] = ' ';
1519 }
1520
1521 for (h = parent->data; h; h = h->parent) {
1522 if (h->part) {
1523 r = snprintf(&s[i], sl - i, "%s", h->part);
1524 if (r < 0) {
1525 break;
1526 }
1527 if ((i + r) >= sl) {
1528 break;
1529 }
1530 i += r;
1531 }
1532
1533 if (h->parent && h->parent->parent) {
1534 s[i]='.';
1535 i++;
1536 }
1537 }
1538 s[i]= 0;
1539
1540 NSLOG(netsurf, INFO, "%s", s)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 1540
, }; nslog__log(&_nslog_ctx, "%s", s); } } while(0)
;
1541
1542 urldb_dump_search(parent->right, depth + 1);
1543}
1544
1545
1546/**
1547 * Compare a pair of host parts
1548 *
1549 * \param a first host part
1550 * \param b second host part
1551 * \return 0 if match, non-zero, otherwise
1552 */
1553static int
1554urldb_search_match_host(const struct host_part *a, const struct host_part *b)
1555{
1556 int ret;
1557
1558 assert(a && b)((a && b) ? (void) (0) : __assert_fail ("a && b"
, "content/urldb.c", 1558, __extension__ __PRETTY_FUNCTION__)
)
;
1559
1560 /* traverse up tree to root, comparing parts as we go. */
1561 for (; a && a != &db_root && b && b != &db_root;
1562 a = a->parent, b = b->parent) {
1563 if ((ret = strcasecmp(a->part, b->part)) != 0) {
1564 /* They differ => return the difference here */
1565 return ret;
1566 }
1567 }
1568
1569 /* If we get here then either:
1570 * a) The path lengths differ
1571 * or b) The hosts are identical
1572 */
1573 if (a && a != &db_root && (!b || b == &db_root)) {
1574 /* len(a) > len(b) */
1575 return 1;
1576 } else if ((!a || a == &db_root) && b && b != &db_root) {
1577 /* len(a) < len(b) */
1578 return -1;
1579 }
1580
1581 /* identical */
1582 return 0;
1583}
1584
1585
1586/**
1587 * Rotate a subtree right
1588 *
1589 * \param root Root of subtree to rotate
1590 * \return new root of subtree
1591 */
1592static struct search_node *urldb_search_skew(struct search_node *root)
1593{
1594 assert(root)((root) ? (void) (0) : __assert_fail ("root", "content/urldb.c"
, 1594, __extension__ __PRETTY_FUNCTION__))
;
1595
1596 if (root->left->level == root->level) {
1597 struct search_node *temp;
1598
1599 temp = root->left;
1600 root->left = temp->right;
1601 temp->right = root;
1602 root = temp;
1603 }
1604
1605 return root;
1606}
1607
1608
1609/**
1610 * Rotate a node left, increasing the parent's level
1611 *
1612 * \param root Root of subtree to rotate
1613 * \return New root of subtree
1614 */
1615static struct search_node *urldb_search_split(struct search_node *root)
1616{
1617 assert(root)((root) ? (void) (0) : __assert_fail ("root", "content/urldb.c"
, 1617, __extension__ __PRETTY_FUNCTION__))
;
1618
1619 if (root->right->right->level == root->level) {
1620 struct search_node *temp;
1621
1622 temp = root->right;
1623 root->right = temp->left;
1624 temp->left = root;
1625 root = temp;
1626
1627 root->level++;
1628 }
1629
1630 return root;
1631}
1632
1633
1634/**
1635 * Insert node into search tree
1636 *
1637 * \param root Root of (sub)tree to insert into
1638 * \param n Node to insert
1639 * \return Pointer to updated root
1640 */
1641static struct search_node *
1642urldb_search_insert_internal(struct search_node *root, struct search_node *n)
1643{
1644 assert(root && n)((root && n) ? (void) (0) : __assert_fail ("root && n"
, "content/urldb.c", 1644, __extension__ __PRETTY_FUNCTION__)
)
;
1645
1646 if (root == &empty) {
1647 root = n;
1648 } else {
1649 int c = urldb_search_match_host(root->data, n->data);
1650
1651 if (c > 0) {
1652 root->left = urldb_search_insert_internal(
1653 root->left, n);
1654 } else if (c < 0) {
1655 root->right = urldb_search_insert_internal(
1656 root->right, n);
1657 } else {
1658 /* exact match */
1659 free(n);
1660 return root;
1661 }
1662
1663 root = urldb_search_skew(root);
1664 root = urldb_search_split(root);
1665 }
1666
1667 return root;
1668}
1669
1670
1671/**
1672 * Insert a node into the search tree
1673 *
1674 * \param root Root of tree to insert into
1675 * \param data User data to insert
1676 * \return Pointer to updated root, or NULL if failed
1677 */
1678static struct search_node *
1679urldb_search_insert(struct search_node *root, const struct host_part *data)
1680{
1681 struct search_node *n;
1682
1683 assert(root && data)((root && data) ? (void) (0) : __assert_fail ("root && data"
, "content/urldb.c", 1683, __extension__ __PRETTY_FUNCTION__)
)
;
1684
1685 n = malloc(sizeof(struct search_node));
1686 if (!n)
1687 return NULL((void*)0);
1688
1689 n->level = 1;
1690 n->data = data;
1691 n->left = n->right = &empty;
1692
1693 root = urldb_search_insert_internal(root, n);
1694
1695 return root;
1696}
1697
1698
1699/**
1700 * Parse a cookie avpair
1701 *
1702 * \param c Cookie struct to populate
1703 * \param n Name component
1704 * \param v Value component
1705 * \param was_quoted Whether \a v was quoted in the input
1706 * \return true on success, false on memory exhaustion
1707 */
1708static bool_Bool
1709urldb_parse_avpair(struct cookie_internal_data *c,
1710 char *n,
1711 char *v,
1712 bool_Bool was_quoted)
1713{
1714 int vlen;
1715
1716 assert(c && n && v)((c && n && v) ? (void) (0) : __assert_fail (
"c && n && v", "content/urldb.c", 1716, __extension__
__PRETTY_FUNCTION__))
;
1717
1718 /* Strip whitespace from start of name */
1719 for (; *n; n++) {
1720 if (*n != ' ' && *n != '\t')
1721 break;
1722 }
1723
1724 /* Strip whitespace from end of name */
1725 for (vlen = strlen(n); vlen; vlen--) {
1726 if (n[vlen] == ' ' || n[vlen] == '\t')
1727 n[vlen] = '\0';
1728 else
1729 break;
1730 }
1731
1732 /* Strip whitespace from start of value */
1733 for (; *v; v++) {
1734 if (*v != ' ' && *v != '\t')
1735 break;
1736 }
1737
1738 /* Strip whitespace from end of value */
1739 for (vlen = strlen(v); vlen; vlen--) {
1740 if (v[vlen] == ' ' || v[vlen] == '\t')
1741 v[vlen] = '\0';
1742 else
1743 break;
1744 }
1745
1746 if (!c->comment && strcasecmp(n, "Comment") == 0) {
1747 c->comment = strdup(v);
1748 if (!c->comment)
1749 return false0;
1750 } else if (!c->domain && strcasecmp(n, "Domain") == 0) {
1751 if (v[0] == '.') {
1752 /* Domain must start with a dot */
1753 c->domain_from_set = true1;
1754 c->domain = strdup(v);
1755 if (!c->domain)
1756 return false0;
1757 }
1758 } else if (strcasecmp(n, "Max-Age") == 0) {
1759 int temp = atoi(v);
1760 if (temp == 0)
1761 /* Special case - 0 means delete */
1762 c->expires = 0;
1763 else
1764 c->expires = time(NULL((void*)0)) + temp;
1765 } else if (!c->path && strcasecmp(n, "Path") == 0) {
1766 c->path_from_set = true1;
1767 c->path = strdup(v);
1768 if (!c->path)
1769 return false0;
1770 } else if (strcasecmp(n, "Version") == 0) {
1771 c->version = atoi(v);
1772 } else if (strcasecmp(n, "Expires") == 0) {
1773 char *datenoday;
1774 time_t expires;
1775 nserror res;
1776
1777 /* Strip dayname from date (these are hugely variable
1778 * and liable to break the parser. They also serve no
1779 * useful purpose) */
1780 for (datenoday = v;
1781 *datenoday && !ascii_is_digit(*datenoday);
1782 datenoday++) {
1783 /* do nothing */
1784 }
1785
1786 res = nsc_strntimet(datenoday, strlen(datenoday), &expires);
1787 if (res != NSERROR_OK) {
1788 /* assume we have an unrepresentable date =>
1789 * force it to the maximum possible value of a
1790 * 32bit time_t (this may break in 2038. We'll
1791 * deal with that once we come to it) */
1792 expires = (time_t)0x7fffffff;
1793 }
1794 c->expires = expires;
1795 } else if (strcasecmp(n, "Secure") == 0) {
1796 c->secure = true1;
1797 } else if (strcasecmp(n, "HttpOnly") == 0) {
1798 c->http_only = true1;
1799 } else if (!c->name) {
1800 c->name = strdup(n);
1801 c->value = strdup(v);
1802 c->value_was_quoted = was_quoted;
1803 if (!c->name || !c->value) {
1804 return false0;
1805 }
1806 }
1807
1808 return true1;
1809}
1810
1811
1812/**
1813 * Free a cookie
1814 *
1815 * \param c The cookie to free
1816 */
1817static void urldb_free_cookie(struct cookie_internal_data *c)
1818{
1819 assert(c)((c) ? (void) (0) : __assert_fail ("c", "content/urldb.c", 1819
, __extension__ __PRETTY_FUNCTION__))
;
1820
1821 free(c->comment);
1822 free(c->domain);
1823 free(c->path);
1824 free(c->name);
1825 free(c->value);
1826 free(c);
1827}
1828
1829
1830/**
1831 * Parse a cookie
1832 *
1833 * \param url URL being fetched
1834 * \param cookie Pointer to cookie string (updated on exit)
1835 * \return Pointer to cookie structure (on heap, caller frees) or NULL
1836 */
1837static struct cookie_internal_data *
1838urldb_parse_cookie(nsurl *url, const char **cookie)
1839{
1840 struct cookie_internal_data *c;
1841 const char *cur;
1842 char name[1024], value[4096];
1843 char *n = name, *v = value;
1844 bool_Bool in_value = false0;
1845 bool_Bool had_value_data = false0;
1846 bool_Bool value_verbatim = false0;
1847 bool_Bool quoted = false0;
1848 bool_Bool was_quoted = false0;
1849
1850 assert(url && cookie && *cookie)((url && cookie && *cookie) ? (void) (0) : __assert_fail
("url && cookie && *cookie", "content/urldb.c"
, 1850, __extension__ __PRETTY_FUNCTION__))
;
1851
1852 c = calloc(1, sizeof(struct cookie_internal_data));
1853 if (c == NULL((void*)0))
1854 return NULL((void*)0);
1855
1856 c->expires = -1;
1857
1858 name[0] = '\0';
1859 value[0] = '\0';
1860
1861 for (cur = *cookie; *cur; cur++) {
1862 if (*cur == '\r' && *(cur + 1) == '\n') {
1863 /* End of header */
1864 if (quoted) {
1865 /* Unmatched quote encountered */
1866
1867 /* Match Firefox 2.0.0.11 */
1868 value[0] = '\0';
1869
1870 }
1871
1872 break;
1873 } else if (*cur == '\r') {
1874 /* Spurious linefeed */
1875 continue;
1876 } else if (*cur == '\n') {
1877 /* Spurious newline */
1878 continue;
1879 }
1880
1881 if (in_value && !had_value_data) {
1882 if (*cur == ' ' || *cur == '\t') {
1883 /* Strip leading whitespace from value */
1884 continue;
1885 } else {
1886 had_value_data = true1;
1887
1888 /* Value is taken verbatim if first non-space
1889 * character is not a " */
1890 if (*cur != '"') {
1891 value_verbatim = true1;
1892 }
1893 }
1894 }
1895
1896 if (in_value && !value_verbatim && (*cur == '"')) {
1897 /* Only non-verbatim values may be quoted */
1898 if (cur == *cookie || *(cur - 1) != '\\') {
1899 /* Only unescaped quotes count */
1900 was_quoted = quoted;
1901 quoted = !quoted;
1902
1903 continue;
1904 }
1905 }
1906
1907 if (!quoted && !in_value && *cur == '=') {
1908 /* First equals => attr-value separator */
1909 in_value = true1;
1910 continue;
1911 }
1912
1913 if (!quoted && (was_quoted || *cur == ';')) {
1914 /* Semicolon or after quoted value
1915 * => end of current avpair */
1916
1917 /* NUL-terminate tokens */
1918 *n = '\0';
1919 *v = '\0';
1920
1921 if (!urldb_parse_avpair(c, name, value, was_quoted)) {
1922 /* Memory exhausted */
1923 urldb_free_cookie(c);
1924 return NULL((void*)0);
1925 }
1926
1927 /* And reset to start */
1928 n = name;
1929 v = value;
1930 in_value = false0;
1931 had_value_data = false0;
1932 value_verbatim = false0;
1933 was_quoted = false0;
1934
1935 /* Now, if the current input is anything other than a
1936 * semicolon, we must be sure to reprocess it */
1937 if (*cur != ';') {
1938 cur--;
1939 }
1940
1941 continue;
1942 }
1943
1944 /* And now handle commas. These are a pain as they may mean
1945 * any of the following:
1946 *
1947 * + End of cookie
1948 * + Day separator in Expires avpair
1949 * + (Invalid) comma in unquoted value
1950 *
1951 * Therefore, in order to handle all 3 cases (2 and 3 are
1952 * identical, the difference being that 2 is in the spec and
1953 * 3 isn't), we need to determine where the comma actually
1954 * lies. We use the following heuristic:
1955 *
1956 * Given a comma at the current input position, find the
1957 * immediately following semicolon (or end of input if none
1958 * found). Then, consider the input characters between
1959 * these two positions. If any of these characters is an
1960 * '=', we must assume that the comma signified the end of
1961 * the current cookie.
1962 *
1963 * This holds as the first avpair of any cookie must be
1964 * NAME=VALUE, so the '=' is guaranteed to appear in the
1965 * case where the comma marks the end of a cookie.
1966 *
1967 * This will fail, however, in the case where '=' appears in
1968 * the value of the current avpair after the comma or the
1969 * subsequent cookie does not start with NAME=VALUE. Neither
1970 * of these is particularly likely and if they do occur, the
1971 * website is more broken than we can be bothered to handle.
1972 */
1973 if (!quoted && *cur == ',') {
1974 /* Find semi-colon, if any */
1975 const char *p;
1976 const char *semi = strchr(cur + 1, ';');
1977 if (!semi)
1978 semi = cur + strlen(cur) - 2 /* CRLF */;
1979
1980 /* Look for equals sign between comma and semi */
1981 for (p = cur + 1; p < semi; p++)
1982 if (*p == '=')
1983 break;
1984
1985 if (p == semi) {
1986 /* none found => comma internal to value */
1987 /* do nothing */
1988 } else {
1989 /* found one => comma marks end of cookie */
1990 cur++;
1991 break;
1992 }
1993 }
1994
1995 /* Accumulate into buffers, always leaving space for a NUL */
1996 /** \todo is silently truncating overlong names/values wise? */
1997 if (!in_value) {
1998 if (n < name + (sizeof(name) - 1))
1999 *n++ = *cur;
2000 } else {
2001 if (v < value + (sizeof(value) - 1))
2002 *v++ = *cur;
2003 }
2004 }
2005
2006 /* Parse final avpair */
2007 *n = '\0';
2008 *v = '\0';
2009
2010 if (!urldb_parse_avpair(c, name, value, was_quoted)) {
2011 /* Memory exhausted */
2012 urldb_free_cookie(c);
2013 return NULL((void*)0);
2014 }
2015
2016 /* Now fix-up default values */
2017 if (c->domain == NULL((void*)0)) {
2018 lwc_string *host = nsurl_get_component(url, NSURL_HOST);
2019 if (host == NULL((void*)0)) {
2020 urldb_free_cookie(c);
2021 return NULL((void*)0);
2022 }
2023 c->domain = strdup(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 2023, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
);
2024 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
2025 }
2026
2027 if (c->path == NULL((void*)0)) {
2028 const char *path_data;
2029 char *path, *slash;
2030 lwc_string *path_lwc;
2031
2032 path_lwc = nsurl_get_component(url, NSURL_PATH);
2033 if (path_lwc == NULL((void*)0)) {
2034 urldb_free_cookie(c);
2035 return NULL((void*)0);
2036 }
2037 path_data = lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 2037, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
;
2038
2039 /* Strip leafname and trailing slash (4.3.1) */
2040 slash = strrchr(path_data, '/');
2041 if (slash != NULL((void*)0)) {
2042 /* Special case: retain first slash in path */
2043 if (slash == path_data)
2044 slash++;
2045
2046 slash = strndup(path_data, slash - path_data);
2047 if (slash == NULL((void*)0)) {
2048 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); if (__lwc_s != ((void*)0)
) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((
__lwc_s->refcnt == 1) && (__lwc_s->insensitive ==
__lwc_s))) lwc_string_destroy(__lwc_s); } }
;
2049 urldb_free_cookie(c);
2050 return NULL((void*)0);
2051 }
2052
2053 path = slash;
2054 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); if (__lwc_s != ((void*)0)
) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((
__lwc_s->refcnt == 1) && (__lwc_s->insensitive ==
__lwc_s))) lwc_string_destroy(__lwc_s); } }
;
2055 } else {
2056 path = strdup(lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 2056, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
);
2057 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); if (__lwc_s != ((void*)0)
) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((
__lwc_s->refcnt == 1) && (__lwc_s->insensitive ==
__lwc_s))) lwc_string_destroy(__lwc_s); } }
;
2058 if (path == NULL((void*)0)) {
2059 urldb_free_cookie(c);
2060 return NULL((void*)0);
2061 }
2062 }
2063
2064 c->path = path;
2065 }
2066
2067 /* Write back current position */
2068 *cookie = cur;
2069
2070 return c;
2071}
2072
2073
2074/**
2075 * Add a path to the database, creating any intermediate entries
2076 *
2077 * \param scheme URL scheme associated with path
2078 * \param port Port number on host associated with path
2079 * \param host Host tree node to attach to
2080 * \param path_query Absolute path plus query to add (freed)
2081 * \param fragment URL fragment, or NULL
2082 * \param url URL (fragment ignored)
2083 * \return Pointer to leaf node, or NULL on memory exhaustion
2084 */
2085static struct path_data *
2086urldb_add_path(lwc_string *scheme,
2087 unsigned int port,
2088 const struct host_part *host,
2089 char *path_query,
2090 lwc_string *fragment,
2091 nsurl *url)
2092{
2093 struct path_data *d, *e;
2094 char *buf = path_query;
2095 char *segment, *slash;
2096 bool_Bool match;
2097
2098 assert(scheme && host && url)((scheme && host && url) ? (void) (0) : __assert_fail
("scheme && host && url", "content/urldb.c",
2098, __extension__ __PRETTY_FUNCTION__))
;
2099
2100 d = (struct path_data *) &host->paths;
2101
2102 /* skip leading '/' */
2103 segment = buf;
2104 if (*segment == '/')
2105 segment++;
2106
2107 /* Process path segments */
2108 do {
2109 slash = strchr(segment, '/');
2110 if (!slash) {
2111 /* last segment */
2112 /* look for existing entry */
2113 for (e = d->children; e; e = e->next)
2114 if (strcmp(segment, e->segment) == 0 &&
2115 lwc_string_isequal(scheme,((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
2116 e->scheme, &match)((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
==
2117 lwc_error_ok &&
2118 match == true1 &&
2119 e->port == port)
2120 break;
2121
2122 d = e ? urldb_add_path_fragment(e, fragment) :
2123 urldb_add_path_node(scheme, port,
2124 segment, fragment, d);
2125 break;
2126 }
2127
2128 *slash = '\0';
2129
2130 /* look for existing entry */
2131 for (e = d->children; e; e = e->next)
2132 if (strcmp(segment, e->segment) == 0 &&
2133 lwc_string_isequal(scheme, e->scheme,((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
2134 &match)((*(&match) = ((scheme) == (e->scheme))), lwc_error_ok
)
== lwc_error_ok &&
2135 match == true1 &&
2136 e->port == port)
2137 break;
2138
2139 d = e ? e : urldb_add_path_node(scheme, port, segment, NULL((void*)0), d);
2140 if (!d)
2141 break;
2142
2143 segment = slash + 1;
2144 } while (1);
2145
2146 free(path_query);
2147
2148 if (d && !d->url) {
2149 /* Insert defragmented URL */
2150 if (nsurl_defragment(url, &d->url) != NSERROR_OK)
2151 return NULL((void*)0);
2152 }
2153
2154 return d;
2155}
2156
2157
2158/**
2159 * Add a host to the database, creating any intermediate entries
2160 *
2161 * \param host Hostname to add
2162 * \return Pointer to leaf node, or NULL on memory exhaustion
2163 */
2164static struct host_part *urldb_add_host(const char *host)
2165{
2166 struct host_part *d = (struct host_part *) &db_root, *e;
2167 struct search_node *s;
2168 char buf[256]; /* 256 bytes is sufficient - domain names are
2169 * limited to 255 chars. */
2170 char *part;
2171
2172 assert(host)((host) ? (void) (0) : __assert_fail ("host", "content/urldb.c"
, 2172, __extension__ __PRETTY_FUNCTION__))
;
2173
2174 if (urldb__host_is_ip_address(host)) {
2175 /* Host is an IP, so simply add as TLD */
2176
2177 /* Check for existing entry */
2178 for (e = d->children; e; e = e->next)
2179 if (strcasecmp(host, e->part) == 0)
2180 /* found => return it */
2181 return e;
2182
2183 d = urldb_add_host_node(host, d);
2184
2185 s = urldb_search_insert(search_trees[ST_IP0], d);
2186 if (!s) {
2187 /* failed */
2188 d = NULL((void*)0);
2189 } else {
2190 search_trees[ST_IP0] = s;
2191 }
2192
2193 return d;
2194 }
2195
2196 /* Copy host string, so we can corrupt it */
2197 strncpy(buf, host, sizeof buf);
2198 buf[sizeof buf - 1] = '\0';
2199
2200 /* Process FQDN segments backwards */
2201 do {
2202 part = strrchr(buf, '.');
2203 if (!part) {
2204 /* last segment */
2205 /* Check for existing entry */
2206 for (e = d->children; e; e = e->next)
2207 if (strcasecmp(buf, e->part) == 0)
2208 break;
2209
2210 if (e) {
2211 d = e;
2212 } else {
2213 d = urldb_add_host_node(buf, d);
2214 }
2215
2216 /* And insert into search tree */
2217 if (d) {
2218 struct search_node **r;
2219
2220 r = urldb_get_search_tree_direct(buf);
2221 s = urldb_search_insert(*r, d);
2222 if (!s) {
2223 /* failed */
2224 d = NULL((void*)0);
2225 } else {
2226 *r = s;
2227 }
2228 }
2229 break;
2230 }
2231
2232 /* Check for existing entry */
2233 for (e = d->children; e; e = e->next)
2234 if (strcasecmp(part + 1, e->part) == 0)
2235 break;
2236
2237 d = e ? e : urldb_add_host_node(part + 1, d);
2238 if (!d)
2239 break;
2240
2241 *part = '\0';
2242 } while (1);
2243
2244 return d;
2245}
2246
2247
2248/**
2249 * Insert a cookie into the database
2250 *
2251 * \param c The cookie to insert
2252 * \param scheme URL scheme associated with cookie path
2253 * \param url URL (sans fragment) associated with cookie
2254 * \return true on success, false on memory exhaustion (c will be freed)
2255 */
2256static bool_Bool
2257urldb_insert_cookie(struct cookie_internal_data *c,
2258 lwc_string *scheme,
2259 nsurl *url)
2260{
2261 struct cookie_internal_data *d;
2262 const struct host_part *h;
2263 struct path_data *p;
2264 time_t now = time(NULL((void*)0));
2265
2266 assert(c)((c) ? (void) (0) : __assert_fail ("c", "content/urldb.c", 2266
, __extension__ __PRETTY_FUNCTION__))
;
2267
2268 if (c->domain[0] == '.') {
2269 h = urldb_search_find(
2270 urldb_get_search_tree(&(c->domain[1])),
2271 c->domain + 1);
2272 if (!h) {
2273 h = urldb_add_host(c->domain + 1);
2274 if (!h) {
2275 urldb_free_cookie(c);
2276 return false0;
2277 }
2278 }
2279
2280 p = (struct path_data *) &h->paths;
2281 } else {
2282 /* Need to have a URL and scheme, if it's not a domain cookie */
2283 assert(url != NULL)((url != ((void*)0)) ? (void) (0) : __assert_fail ("url != NULL"
, "content/urldb.c", 2283, __extension__ __PRETTY_FUNCTION__)
)
;
2284 assert(scheme != NULL)((scheme != ((void*)0)) ? (void) (0) : __assert_fail ("scheme != NULL"
, "content/urldb.c", 2284, __extension__ __PRETTY_FUNCTION__)
)
;
2285
2286 h = urldb_search_find(
2287 urldb_get_search_tree(c->domain),
2288 c->domain);
2289
2290 if (!h) {
2291 h = urldb_add_host(c->domain);
2292 if (!h) {
2293 urldb_free_cookie(c);
2294 return false0;
2295 }
2296 }
2297
2298 /* find path */
2299 p = urldb_add_path(scheme, 0, h,
2300 strdup(c->path), NULL((void*)0), url);
2301 if (!p) {
2302 urldb_free_cookie(c);
2303 return false0;
2304 }
2305 }
2306
2307 /* add cookie */
2308 for (d = p->cookies; d; d = d->next) {
2309 if (!strcmp(d->domain, c->domain) &&
2310 !strcmp(d->path, c->path) &&
2311 !strcmp(d->name, c->name))
2312 break;
2313 }
2314
2315 if (d) {
2316 if (c->expires != -1 && c->expires < now) {
2317 /* remove cookie */
2318 if (d->next)
2319 d->next->prev = d->prev;
2320 else
2321 p->cookies_end = d->prev;
2322 if (d->prev)
2323 d->prev->next = d->next;
2324 else
2325 p->cookies = d->next;
2326
2327 cookie_manager_remove((struct cookie_data *)d);
2328
2329 urldb_free_cookie(d);
2330 urldb_free_cookie(c);
2331 } else {
2332 /* replace d with c */
2333 c->prev = d->prev;
2334 c->next = d->next;
2335 if (c->next)
2336 c->next->prev = c;
2337 else
2338 p->cookies_end = c;
2339 if (c->prev)
2340 c->prev->next = c;
2341 else
2342 p->cookies = c;
2343
2344 cookie_manager_remove((struct cookie_data *)d);
2345 urldb_free_cookie(d);
2346
2347 cookie_manager_add((struct cookie_data *)c);
2348 }
2349 } else {
2350 c->prev = p->cookies_end;
2351 c->next = NULL((void*)0);
2352 if (p->cookies_end)
2353 p->cookies_end->next = c;
2354 else
2355 p->cookies = c;
2356 p->cookies_end = c;
2357
2358 cookie_manager_add((struct cookie_data *)c);
2359 }
2360
2361 return true1;
2362}
2363
2364
2365/**
2366 * Concatenate a cookie into the provided buffer
2367 *
2368 * \param c Cookie to concatenate
2369 * \param version The version of the cookie string to output
2370 * \param used Pointer to amount of buffer used (updated)
2371 * \param alloc Pointer to allocated size of buffer (updated)
2372 * \param buf Pointer to Pointer to buffer (updated)
2373 * \return true on success, false on memory exhaustion
2374 */
2375static bool_Bool
2376urldb_concat_cookie(struct cookie_internal_data *c,
2377 int version,
2378 int *used,
2379 int *alloc,
2380 char **buf)
2381{
2382 /* Combined (A)BNF for the Cookie: request header:
2383 *
2384 * CHAR = <any US-ASCII character (octets 0 - 127)>
2385 * CTL = <any US-ASCII control character
2386 * (octets 0 - 31) and DEL (127)>
2387 * CR = <US-ASCII CR, carriage return (13)>
2388 * LF = <US-ASCII LF, linefeed (10)>
2389 * SP = <US-ASCII SP, space (32)>
2390 * HT = <US-ASCII HT, horizontal-tab (9)>
2391 * <"> = <US-ASCII double-quote mark (34)>
2392 *
2393 * CRLF = CR LF
2394 *
2395 * LWS = [CRLF] 1*( SP | HT )
2396 *
2397 * TEXT = <any OCTET except CTLs,
2398 * but including LWS>
2399 *
2400 * token = 1*<any CHAR except CTLs or separators>
2401 * separators = "(" | ")" | "<" | ">" | "@"
2402 * | "," | ";" | ":" | "\" | <">
2403 * | "/" | "[" | "]" | "?" | "="
2404 * | "{" | "}" | SP | HT
2405 *
2406 * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
2407 * qdtext = <any TEXT except <">>
2408 * quoted-pair = "\" CHAR
2409 *
2410 * attr = token
2411 * value = word
2412 * word = token | quoted-string
2413 *
2414 * cookie = "Cookie:" cookie-version
2415 * 1*((";" | ",") cookie-value)
2416 * cookie-value = NAME "=" VALUE [";" path] [";" domain]
2417 * cookie-version = "$Version" "=" value
2418 * NAME = attr
2419 * VALUE = value
2420 * path = "$Path" "=" value
2421 * domain = "$Domain" "=" value
2422 *
2423 * A note on quoted-string handling:
2424 * The cookie data stored in the db is verbatim (i.e. sans enclosing
2425 * <">, if any, and with all quoted-pairs intact) thus all that we
2426 * need to do here is ensure that value strings which were quoted
2427 * in Set-Cookie or which include any of the separators are quoted
2428 * before use.
2429 *
2430 * A note on cookie-value separation:
2431 * We use semicolons for all separators, including between
2432 * cookie-values. This simplifies things and is backwards compatible.
2433 */
2434 const char * const separators = "()<>@,;:\\\"/[]?={} \t";
2435
2436 int max_len;
2437
2438 assert(c && used && alloc && buf && *buf)((c && used && alloc && buf &&
*buf) ? (void) (0) : __assert_fail ("c && used && alloc && buf && *buf"
, "content/urldb.c", 2438, __extension__ __PRETTY_FUNCTION__)
)
;
2439
2440 /* "; " cookie-value
2441 * We allow for the possibility that values are quoted
2442 */
2443 max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
2444 (c->path_from_set ?
2445 8 + strlen(c->path) + 2 : 0) +
2446 (c->domain_from_set ?
2447 10 + strlen(c->domain) + 2 : 0);
2448
2449 if (*used + max_len >= *alloc) {
2450 char *temp = realloc(*buf, *alloc + 4096);
2451 if (!temp) {
2452 return false0;
2453 }
2454 *buf = temp;
2455 *alloc += 4096;
2456 }
2457
2458 if (version == COOKIE_NETSCAPE) {
2459 /* Original Netscape cookie */
2460 sprintf(*buf + *used - 1, "; %s=", c->name);
2461 *used += 2 + strlen(c->name) + 1;
2462
2463 /* The Netscape spec doesn't mention quoting of cookie values.
2464 * RFC 2109 $10.1.3 indicates that values must not be quoted.
2465 *
2466 * However, other browsers preserve quoting, so we should, too
2467 */
2468 if (c->value_was_quoted) {
2469 sprintf(*buf + *used - 1, "\"%s\"", c->value);
2470 *used += 1 + strlen(c->value) + 1;
2471 } else {
2472 /** \todo should we %XX-encode [;HT,SP] ? */
2473 /** \todo Should we strip escaping backslashes? */
2474 sprintf(*buf + *used - 1, "%s", c->value);
2475 *used += strlen(c->value);
2476 }
2477
2478 /* We don't send path/domain information -- that's what the
2479 * Netscape spec suggests we should do, anyway. */
2480 } else {
2481 /* RFC2109 or RFC2965 cookie */
2482 sprintf(*buf + *used - 1, "; %s=", c->name);
2483 *used += 2 + strlen(c->name) + 1;
2484
2485 /* Value needs quoting if it contains any separator or if
2486 * it needs preserving from the Set-Cookie header */
2487 if (c->value_was_quoted ||
2488 strpbrk(c->value, separators) != NULL((void*)0)) {
2489 sprintf(*buf + *used - 1, "\"%s\"", c->value);
2490 *used += 1 + strlen(c->value) + 1;
2491 } else {
2492 sprintf(*buf + *used - 1, "%s", c->value);
2493 *used += strlen(c->value);
2494 }
2495
2496 if (c->path_from_set) {
2497 /* Path, quoted if necessary */
2498 sprintf(*buf + *used - 1, "; $Path=");
2499 *used += 8;
2500
2501 if (strpbrk(c->path, separators) != NULL((void*)0)) {
2502 sprintf(*buf + *used - 1, "\"%s\"", c->path);
2503 *used += 1 + strlen(c->path) + 1;
2504 } else {
2505 sprintf(*buf + *used - 1, "%s", c->path);
2506 *used += strlen(c->path);
2507 }
2508 }
2509
2510 if (c->domain_from_set) {
2511 /* Domain, quoted if necessary */
2512 sprintf(*buf + *used - 1, "; $Domain=");
2513 *used += 10;
2514
2515 if (strpbrk(c->domain, separators) != NULL((void*)0)) {
2516 sprintf(*buf + *used - 1, "\"%s\"", c->domain);
2517 *used += 1 + strlen(c->domain) + 1;
2518 } else {
2519 sprintf(*buf + *used - 1, "%s", c->domain);
2520 *used += strlen(c->domain);
2521 }
2522 }
2523 }
2524
2525 return true1;
2526}
2527
2528
2529/**
2530 * deletes paths from a cookie.
2531 *
2532 * \param domain the cookie domain
2533 * \param path the cookie path
2534 * \param name The cookie name
2535 * \param parent The url data of the cookie
2536 */
2537static void
2538urldb_delete_cookie_paths(const char *domain,
2539 const char *path,
2540 const char *name,
2541 struct path_data *parent)
2542{
2543 struct cookie_internal_data *c;
2544 struct path_data *p = parent;
2545
2546 assert(parent)((parent) ? (void) (0) : __assert_fail ("parent", "content/urldb.c"
, 2546, __extension__ __PRETTY_FUNCTION__))
;
2547
2548 do {
2549 for (c = p->cookies; c; c = c->next) {
2550 if (strcmp(c->domain, domain) == 0 &&
2551 strcmp(c->path, path) == 0 &&
2552 strcmp(c->name, name) == 0) {
2553 if (c->prev) {
2554 c->prev->next = c->next;
2555 } else {
2556 p->cookies = c->next;
2557 }
2558
2559 if (c->next) {
2560 c->next->prev = c->prev;
2561 } else {
2562 p->cookies_end = c->prev;
2563 }
2564
2565 urldb_free_cookie(c);
2566
2567 return;
2568 }
2569 }
2570
2571 if (p->children) {
2572 p = p->children;
2573 } else {
2574 while (p != parent) {
2575 if (p->next != NULL((void*)0)) {
2576 p = p->next;
2577 break;
2578 }
2579
2580 p = p->parent;
2581 }
2582 }
2583 } while (p != parent);
2584}
2585
2586
2587/**
2588 * Deletes cookie hosts and their assoicated paths
2589 *
2590 * \param domain the cookie domain
2591 * \param path the cookie path
2592 * \param name The cookie name
2593 * \param parent The url data of the cookie
2594 */
2595static void
2596urldb_delete_cookie_hosts(const char *domain,
2597 const char *path,
2598 const char *name,
2599 struct host_part *parent)
2600{
2601 struct host_part *h;
2602 assert(parent)((parent) ? (void) (0) : __assert_fail ("parent", "content/urldb.c"
, 2602, __extension__ __PRETTY_FUNCTION__))
;
2603
2604 urldb_delete_cookie_paths(domain, path, name, &parent->paths);
2605
2606 for (h = parent->children; h; h = h->next) {
2607 urldb_delete_cookie_hosts(domain, path, name, h);
2608 }
2609}
2610
2611
2612/**
2613 * Save a path subtree's cookies
2614 *
2615 * \param fp File pointer to write to
2616 * \param parent Parent path
2617 */
2618static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
2619{
2620 struct path_data *p = parent;
2621 time_t now = time(NULL((void*)0));
2622
2623 assert(fp && parent)((fp && parent) ? (void) (0) : __assert_fail ("fp && parent"
, "content/urldb.c", 2623, __extension__ __PRETTY_FUNCTION__)
)
;
2624
2625 do {
2626 if (p->cookies != NULL((void*)0)) {
2627 struct cookie_internal_data *c;
2628
2629 for (c = p->cookies; c != NULL((void*)0); c = c->next) {
2630 if (c->expires == -1 || c->expires < now) {
2631 /* Skip expired & session cookies */
2632 continue;
2633 }
2634
2635 fprintf(fp,
2636 "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
2637 "%s\t%s\t%d\t%s\t%s\t%s\n",
2638 c->version, c->domain,
2639 c->domain_from_set, c->path,
2640 c->path_from_set, c->secure,
2641 c->http_only,
2642 (int)c->expires, (int)c->last_used,
2643 c->no_destroy, c->name, c->value,
2644 c->value_was_quoted,
2645 p->scheme ? lwc_string_data(p->scheme)({((p->scheme != ((void*)0)) ? (void) (0) : __assert_fail (
"p->scheme != NULL", "content/urldb.c", 2645, __extension__
__PRETTY_FUNCTION__)); (const char *)((p->scheme)+1);})
:
2646 "unused",
2647 p->url ? nsurl_access(p->url) :
2648 "unused",
2649 c->comment ? c->comment : "");
2650 }
2651 }
2652
2653 if (p->children != NULL((void*)0)) {
2654 p = p->children;
2655 } else {
2656 while (p != parent) {
2657 if (p->next != NULL((void*)0)) {
2658 p = p->next;
2659 break;
2660 }
2661
2662 p = p->parent;
2663 }
2664 }
2665 } while (p != parent);
2666}
2667
2668
2669/**
2670 * Save a host subtree's cookies
2671 *
2672 * \param fp File pointer to write to
2673 * \param parent Parent host
2674 */
2675static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
2676{
2677 struct host_part *h;
2678 assert(fp && parent)((fp && parent) ? (void) (0) : __assert_fail ("fp && parent"
, "content/urldb.c", 2678, __extension__ __PRETTY_FUNCTION__)
)
;
2679
2680 urldb_save_cookie_paths(fp, &parent->paths);
2681
2682 for (h = parent->children; h; h = h->next)
2683 urldb_save_cookie_hosts(fp, h);
2684}
2685
2686
2687/**
2688 * Destroy a cookie node
2689 *
2690 * \param c Cookie to destroy
2691 */
2692static void urldb_destroy_cookie(struct cookie_internal_data *c)
2693{
2694 free(c->name);
2695 free(c->value);
2696 free(c->comment);
2697 free(c->domain);
2698 free(c->path);
2699
2700 free(c);
2701}
2702
2703
2704/**
2705 * Destroy the contents of a path node
2706 *
2707 * \param node Node to destroy contents of (does not destroy node)
2708 */
2709static void urldb_destroy_path_node_content(struct path_data *node)
2710{
2711 struct cookie_internal_data *a, *b;
2712 unsigned int i;
2713
2714 if (node->url != NULL((void*)0)) {
2715 nsurl_unref(node->url);
2716 }
2717
2718 lwc_string_unref(node->scheme){ lwc_string *__lwc_s = (node->scheme); if (__lwc_s != ((void
*)0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) ||
((__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
2719
2720 free(node->segment);
2721 for (i = 0; i < node->frag_cnt; i++)
2722 free(node->fragment[i]);
2723 free(node->fragment);
2724
2725 free(node->urld.title);
2726
2727 for (a = node->cookies; a; a = b) {
2728 b = a->next;
2729 urldb_destroy_cookie(a);
2730 }
2731}
2732
2733
2734/**
2735 * Destroy protection space data
2736 *
2737 * \param space Protection space to destroy
2738 */
2739static void urldb_destroy_prot_space(struct prot_space_data *space)
2740{
2741 lwc_string_unref(space->scheme){ lwc_string *__lwc_s = (space->scheme); if (__lwc_s != ((
void*)0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0
) || ((__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
2742 free(space->realm);
2743 free(space->auth);
2744
2745 free(space);
2746}
2747
2748
2749/**
2750 * Destroy a path tree
2751 *
2752 * \param root Root node of tree to destroy
2753 */
2754static void urldb_destroy_path_tree(struct path_data *root)
2755{
2756 struct path_data *p = root;
2757
2758 do {
2759 if (p->children != NULL((void*)0)) {
2760 p = p->children;
2761 } else {
2762 struct path_data *q = p;
2763
2764 while (p != root) {
2765 if (p->next != NULL((void*)0)) {
2766 p = p->next;
2767 break;
2768 }
2769
2770 p = p->parent;
2771
2772 urldb_destroy_path_node_content(q);
2773 free(q);
2774
2775 q = p;
2776 }
2777
2778 urldb_destroy_path_node_content(q);
2779 free(q);
2780 }
2781 } while (p != root);
2782}
2783
2784
2785/**
2786 * Destroy a host tree
2787 *
2788 * \param root Root node of tree to destroy
2789 */
2790static void urldb_destroy_host_tree(struct host_part *root)
2791{
2792 struct host_part *a, *b;
2793 struct path_data *p, *q;
2794 struct prot_space_data *s, *t;
2795
2796 /* Destroy children */
2797 for (a = root->children; a; a = b) {
2798 b = a->next;
2799 urldb_destroy_host_tree(a);
2800 }
2801
2802 /* Now clean up paths */
2803 for (p = root->paths.children; p; p = q) {
2804 q = p->next;
2805 urldb_destroy_path_tree(p);
2806 }
2807
2808 /* Root path */
2809 urldb_destroy_path_node_content(&root->paths);
2810
2811 /* Proctection space data */
2812 for (s = root->prot_space; s; s = t) {
2813 t = s->next;
2814 urldb_destroy_prot_space(s);
2815 }
2816
2817 /* And ourselves */
2818 free(root->part);
2819 free(root);
2820}
2821
2822
2823/**
2824 * Destroy a search tree
2825 *
2826 * \param root Root node of tree to destroy
2827 */
2828static void urldb_destroy_search_tree(struct search_node *root)
2829{
2830 /* Destroy children */
2831 if (root->left != &empty)
2832 urldb_destroy_search_tree(root->left);
2833 if (root->right != &empty)
2834 urldb_destroy_search_tree(root->right);
2835
2836 /* And destroy ourselves */
2837 free(root);
2838}
2839
2840
2841/*************** External interface ***************/
2842
2843
2844/* exported interface documented in content/urldb.h */
2845void urldb_destroy(void)
2846{
2847 struct host_part *a, *b;
2848 int i;
2849
2850 /* Clean up search trees */
2851 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
2852 if (search_trees[i] != &empty) {
2853 urldb_destroy_search_tree(search_trees[i]);
2854 search_trees[i] = &empty;
2855 }
2856 }
2857
2858 /* And database */
2859 for (a = db_root.children; a; a = b) {
2860 b = a->next;
2861 urldb_destroy_host_tree(a);
2862 }
2863 memset(&db_root, 0, sizeof(db_root));
2864
2865 /* And the bloom filter */
2866 if (url_bloom != NULL((void*)0)) {
2867 bloom_destroy(url_bloom);
2868 url_bloom = NULL((void*)0);
2869 }
2870}
2871
2872
2873/* exported interface documented in netsurf/url_db.h */
2874nserror urldb_load(const char *filename)
1
[debug] analyzing from urldb_load
2875{
2876#define MAXIMUM_URL_LENGTH 4096
2877 char s[MAXIMUM_URL_LENGTH];
2878 char host[256];
2879 struct host_part *h;
2880 int urls;
2881 int i;
2882 int version;
2883 int length;
2884 FILE *fp;
2885
2886 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 2886, __extension__ __PRETTY_FUNCTION__))
;
2
Assuming 'filename' is non-null
3
'?' condition is true
2887
2888 NSLOG(netsurf, INFO, "Loading URL file %s", filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2888
, }; nslog__log(&_nslog_ctx, "Loading URL file %s", filename
); } } while(0)
;
4
Taking true branch
5
Loop condition is false. Exiting loop
2889
2890 if (url_bloom == NULL((void*)0))
6
Assuming 'url_bloom' is not equal to NULL
7
Taking false branch
2891 url_bloom = bloom_create(BLOOM_SIZE(1024 * 32));
2892
2893 fp = fopen(filename, "r");
2894 if (!fp
7.1
'fp' is non-null
) {
8
Taking false branch
2895 NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2896
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for reading"
, filename); } } while(0)
2896 filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2896
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for reading"
, filename); } } while(0)
;
2897 return NSERROR_NOT_FOUND;
2898 }
2899
2900 if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
9
Taking false branch
2901 fclose(fp);
2902 return NSERROR_NEED_DATA;
2903 }
2904
2905 version = atoi(s);
2906 if (version < MIN_URL_FILE_VERSION106) {
10
Assuming 'version' is >= MIN_URL_FILE_VERSION
11
Taking false branch
2907 NSLOG(netsurf, INFO, "Unsupported URL file version.")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2907
, }; nslog__log(&_nslog_ctx, "Unsupported URL file version."
); } } while(0)
;
2908 fclose(fp);
2909 return NSERROR_INVALID;
2910 }
2911 if (version > URL_FILE_VERSION107) {
12
Assuming 'version' is <= URL_FILE_VERSION
13
Taking false branch
2912 NSLOG(netsurf, INFO, "Unknown URL file version.")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2912
, }; nslog__log(&_nslog_ctx, "Unknown URL file version.")
; } } while(0)
;
2913 fclose(fp);
2914 return NSERROR_INVALID;
2915 }
2916
2917 while (fgets(host, sizeof host, fp)) {
14
Loop condition is true. Entering loop body
23
Execution continues on line 2917
24
Read function called when stream is in EOF state. Function has no effect
2918 time_t hsts_expiry = 0;
2919 int hsts_include_sub_domains = 0;
2920
2921 /* get the hostname */
2922 length = strlen(host) - 1;
2923 host[length] = '\0';
2924
2925 /* skip data that has ended up with a host of '' */
2926 if (length == 0) {
15
Assuming 'length' is equal to 0
16
Taking true branch
2927 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
17
Taking false branch
2928 break;
2929 urls = atoi(s);
2930 /* Eight fields/url */
2931 for (i = 0; i < (8 * urls); i++) {
18
Assuming the condition is true
19
Loop condition is true. Entering loop body
2932 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
20
Assuming stream reaches end-of-file here
21
Taking true branch
2933 break;
22
Execution continues on line 2935
2934 }
2935 continue;
2936 }
2937
2938 if (version >= 107) {
2939 char *p = host;
2940 while (*p && *p != ' ') p++;
2941 while (*p && *p == ' ') { *p = '\0'; p++; }
2942 hsts_include_sub_domains = (*p == '1');
2943 while (*p && *p != ' ') p++;
2944 while (*p && *p == ' ') p++;
2945 nsc_snptimet(p, strlen(p), &hsts_expiry);
2946 }
2947
2948 h = urldb_add_host(host);
2949 if (!h) {
2950 NSLOG(netsurf, INFO, "Failed adding host: '%s'", host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2950
, }; nslog__log(&_nslog_ctx, "Failed adding host: '%s'", host
); } } while(0)
;
2951 fclose(fp);
2952 return NSERROR_NOMEM;
2953 }
2954 h->hsts.expires = hsts_expiry;
2955 h->hsts.include_sub_domains = hsts_include_sub_domains;
2956
2957 /* read number of URLs */
2958 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2959 break;
2960 urls = atoi(s);
2961
2962 /* no URLs => try next host */
2963 if (urls == 0) {
2964 NSLOG(netsurf, INFO, "No URLs for '%s'", host)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 2964
, }; nslog__log(&_nslog_ctx, "No URLs for '%s'", host); }
} while(0)
;
2965 continue;
2966 }
2967
2968 /* load the non-corrupt data */
2969 for (i = 0; i < urls; i++) {
2970 struct path_data *p = NULL((void*)0);
2971 char scheme[64], ports[10];
2972 char url[64 + 3 + 256 + 6 + 4096 + 1 + 1];
2973 unsigned int port;
2974 bool_Bool is_file = false0;
2975 nsurl *nsurl;
2976 lwc_string *scheme_lwc, *fragment_lwc;
2977 char *path_query;
2978 size_t len;
2979
2980 if (!fgets(scheme, sizeof scheme, fp))
2981 break;
2982 length = strlen(scheme) - 1;
2983 scheme[length] = '\0';
2984
2985 if (!fgets(ports, sizeof ports, fp))
2986 break;
2987 length = strlen(ports) - 1;
2988 ports[length] = '\0';
2989 port = atoi(ports);
2990
2991 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
2992 break;
2993 length = strlen(s) - 1;
2994 s[length] = '\0';
2995
2996 if (!strcasecmp(host, "localhost") &&
2997 !strcasecmp(scheme, "file"))
2998 is_file = true1;
2999
3000 snprintf(url, sizeof url, "%s://%s%s%s%s",
3001 scheme,
3002 /* file URLs have no host */
3003 (is_file ? "" : host),
3004 (port ? ":" : ""),
3005 (port ? ports : ""),
3006 s);
3007
3008 /* TODO: store URLs in pre-parsed state, and make
3009 * a nsurl_load to generate the nsurl more
3010 * swiftly.
3011 * Need a nsurl_save too.
3012 */
3013 if (nsurl_create(url, &nsurl) != NSERROR_OK) {
3014 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3015
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3015 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3015
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3016 fclose(fp);
3017 return NSERROR_NOMEM;
3018 }
3019
3020 if (url_bloom != NULL((void*)0)) {
3021 uint32_t hash = nsurl_hash(nsurl);
3022 bloom_insert_hash(url_bloom, hash);
3023 }
3024
3025 /* Copy and merge path/query strings */
3026 if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
3027 &path_query, &len) != NSERROR_OK) {
3028 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3029
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3029 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3029
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3030 fclose(fp);
3031 return NSERROR_NOMEM;
3032 }
3033
3034 scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
3035 fragment_lwc = nsurl_get_component(nsurl,
3036 NSURL_FRAGMENT);
3037 p = urldb_add_path(scheme_lwc, port, h, path_query,
3038 fragment_lwc, nsurl);
3039 if (!p) {
3040 NSLOG(netsurf, INFO, "Failed inserting '%s'",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3041
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
3041 url)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3041
, }; nslog__log(&_nslog_ctx, "Failed inserting '%s'", url
); } } while(0)
;
3042 fclose(fp);
3043 return NSERROR_NOMEM;
3044 }
3045 nsurl_unref(nsurl);
3046 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); if (__lwc_s != ((void*)
0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || (
(__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
3047 lwc_string_unref(fragment_lwc){ lwc_string *__lwc_s = (fragment_lwc); if (__lwc_s != ((void
*)0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) ||
((__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
3048
3049 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3050 break;
3051 if (p)
3052 p->urld.visits = (unsigned int)atoi(s);
3053
3054 /* entry last use time */
3055 if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
3056 break;
3057 }
3058 if (p) {
3059 nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);
3060 }
3061
3062 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3063 break;
3064 if (p)
3065 p->urld.type = (content_type)atoi(s);
3066
3067 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3068 break;
3069
3070
3071 if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
3072 break;
3073 length = strlen(s) - 1;
3074 if (p && length > 0) {
3075 s[length] = '\0';
3076 p->urld.title = malloc(length + 1);
3077 if (p->urld.title)
3078 memcpy(p->urld.title, s, length + 1);
3079 }
3080 }
3081 }
3082
3083 fclose(fp);
3084 NSLOG(netsurf, INFO, "Successfully loaded URL file")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3084
, }; nslog__log(&_nslog_ctx, "Successfully loaded URL file"
); } } while(0)
;
3085#undef MAXIMUM_URL_LENGTH
3086
3087 return NSERROR_OK;
3088}
3089
3090/* exported interface documented in netsurf/url_db.h */
3091nserror urldb_save(const char *filename)
3092{
3093 FILE *fp;
3094 int i;
3095
3096 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 3096, __extension__ __PRETTY_FUNCTION__))
;
3097
3098 fp = fopen(filename, "w");
3099 if (!fp) {
3100 NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3101
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for writing"
, filename); } } while(0)
3101 filename)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3101
, }; nslog__log(&_nslog_ctx, "Failed to open file '%s' for writing"
, filename); } } while(0)
;
3102 return NSERROR_SAVE_FAILED;
3103 }
3104
3105 /* file format version number */
3106 fprintf(fp, "%d\n", URL_FILE_VERSION107);
3107
3108 for (i = 0; i != NUM_SEARCH_TREES28; i++) {
3109 urldb_save_search_tree(search_trees[i], fp);
3110 }
3111
3112 fclose(fp);
3113
3114 return NSERROR_OK;
3115}
3116
3117
3118/* exported interface documented in content/urldb.h */
3119nserror urldb_set_url_persistence(nsurl *url, bool_Bool persist)
3120{
3121 struct path_data *p;
3122
3123 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3123, __extension__ __PRETTY_FUNCTION__))
;
3124
3125 p = urldb_find_url(url);
3126 if (!p) {
3127 return NSERROR_NOT_FOUND;
3128 }
3129
3130 p->persistent = persist;
3131
3132 return NSERROR_OK;
3133}
3134
3135
3136/* exported interface documented in content/urldb.h */
3137bool_Bool urldb_add_url(nsurl *url)
3138{
3139 struct host_part *h;
3140 struct path_data *p;
3141 lwc_string *scheme;
3142 lwc_string *port;
3143 lwc_string *host;
3144 lwc_string *fragment;
3145 const char *host_str;
3146 char *path_query = NULL((void*)0);
3147 size_t len;
3148 bool_Bool match;
3149 unsigned int port_int;
3150
3151 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3151, __extension__ __PRETTY_FUNCTION__))
;
3152
3153 if (url_bloom == NULL((void*)0))
3154 url_bloom = bloom_create(BLOOM_SIZE(1024 * 32));
3155
3156 if (url_bloom != NULL((void*)0)) {
3157 uint32_t hash = nsurl_hash(url);
3158 bloom_insert_hash(url_bloom, hash);
3159 }
3160
3161 /* Copy and merge path/query strings */
3162 if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
3163 NSERROR_OK) {
3164 return false0;
3165 }
3166 assert(path_query != NULL)((path_query != ((void*)0)) ? (void) (0) : __assert_fail ("path_query != NULL"
, "content/urldb.c", 3166, __extension__ __PRETTY_FUNCTION__)
)
;
3167
3168 scheme = nsurl_get_component(url, NSURL_SCHEME);
3169 if (scheme == NULL((void*)0)) {
3170 free(path_query);
3171 return false0;
3172 }
3173
3174 host = nsurl_get_component(url, NSURL_HOST);
3175 if (host != NULL((void*)0)) {
3176 host_str = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3176, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
3177 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3178
3179 } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match)((*(&match) = ((scheme) == (corestring_lwc_file))), lwc_error_ok
)
==
3180 lwc_error_ok && match == true1) {
3181 host_str = "localhost";
3182
3183 } else {
3184 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3185 free(path_query);
3186 return false0;
3187 }
3188
3189 fragment = nsurl_get_component(url, NSURL_FRAGMENT);
3190
3191 port = nsurl_get_component(url, NSURL_PORT);
3192 if (port != NULL((void*)0)) {
3193 port_int = atoi(lwc_string_data(port)({((port != ((void*)0)) ? (void) (0) : __assert_fail ("port != NULL"
, "content/urldb.c", 3193, __extension__ __PRETTY_FUNCTION__)
); (const char *)((port)+1);})
);
3194 lwc_string_unref(port){ lwc_string *__lwc_s = (port); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3195 } else {
3196 port_int = 0;
3197 }
3198
3199 /* Get host entry */
3200 h = urldb_add_host(host_str);
3201
3202 /* Get path entry */
3203 if (h != NULL((void*)0)) {
3204 p = urldb_add_path(scheme,
3205 port_int,
3206 h,
3207 path_query,
3208 fragment,
3209 url);
3210 } else {
3211 p = NULL((void*)0);
3212 }
3213
3214 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3215 lwc_string_unref(fragment){ lwc_string *__lwc_s = (fragment); if (__lwc_s != ((void*)0)
) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((
__lwc_s->refcnt == 1) && (__lwc_s->insensitive ==
__lwc_s))) lwc_string_destroy(__lwc_s); } }
;
3216
3217 return (p != NULL((void*)0));
3218}
3219
3220
3221/* exported interface documented in content/urldb.h */
3222nserror urldb_set_url_title(nsurl *url, const char *title)
3223{
3224 struct path_data *p;
3225 char *temp;
3226
3227 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3227, __extension__ __PRETTY_FUNCTION__))
;
3228
3229 p = urldb_find_url(url);
3230 if (p == NULL((void*)0)) {
3231 return NSERROR_NOT_FOUND;
3232 }
3233
3234 /* copy the parameter if necessary */
3235 if (title != NULL((void*)0)) {
3236 temp = strdup(title);
3237 if (temp == NULL((void*)0)) {
3238 return NSERROR_NOMEM;
3239 }
3240 } else {
3241 temp = NULL((void*)0);
3242 }
3243
3244 free(p->urld.title);
3245 p->urld.title = temp;
3246
3247 return NSERROR_OK;
3248}
3249
3250
3251/* exported interface documented in content/urldb.h */
3252nserror urldb_set_url_content_type(nsurl *url, content_type type)
3253{
3254 struct path_data *p;
3255
3256 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3256, __extension__ __PRETTY_FUNCTION__))
;
3257
3258 p = urldb_find_url(url);
3259 if (!p) {
3260 return NSERROR_NOT_FOUND;
3261 }
3262
3263 p->urld.type = type;
3264
3265 return NSERROR_OK;
3266}
3267
3268
3269/* exported interface documented in content/urldb.h */
3270nserror urldb_update_url_visit_data(nsurl *url)
3271{
3272 struct path_data *p;
3273
3274 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3274, __extension__ __PRETTY_FUNCTION__))
;
3275
3276 p = urldb_find_url(url);
3277 if (!p) {
3278 return NSERROR_NOT_FOUND;
3279 }
3280
3281 p->urld.last_visit = time(NULL((void*)0));
3282 p->urld.visits++;
3283
3284 return NSERROR_OK;
3285}
3286
3287
3288/* exported interface documented in content/urldb.h */
3289void urldb_reset_url_visit_data(nsurl *url)
3290{
3291 struct path_data *p;
3292
3293 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3293, __extension__ __PRETTY_FUNCTION__))
;
3294
3295 p = urldb_find_url(url);
3296 if (!p)
3297 return;
3298
3299 p->urld.last_visit = (time_t)0;
3300 p->urld.visits = 0;
3301}
3302
3303
3304/* exported interface documented in netsurf/url_db.h */
3305const struct url_data *urldb_get_url_data(nsurl *url)
3306{
3307 struct path_data *p;
3308 struct url_internal_data *u;
3309
3310 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3310, __extension__ __PRETTY_FUNCTION__))
;
3311
3312 p = urldb_find_url(url);
3313 if (!p)
3314 return NULL((void*)0);
3315
3316 u = &p->urld;
3317
3318 return (const struct url_data *) u;
3319}
3320
3321
3322/* exported interface documented in content/urldb.h */
3323nsurl *urldb_get_url(nsurl *url)
3324{
3325 struct path_data *p;
3326
3327 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3327, __extension__ __PRETTY_FUNCTION__))
;
3328
3329 p = urldb_find_url(url);
3330 if (!p)
3331 return NULL((void*)0);
3332
3333 return p->url;
3334}
3335
3336
3337/* exported interface documented in netsurf/url_db.h */
3338void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
3339{
3340 struct path_data *p, *pi;
3341 struct host_part *h;
3342 struct prot_space_data *space, *space_alloc;
3343 char *realm_alloc, *auth_alloc;
3344 bool_Bool match;
3345
3346 assert(url && realm && auth)((url && realm && auth) ? (void) (0) : __assert_fail
("url && realm && auth", "content/urldb.c", 3346
, __extension__ __PRETTY_FUNCTION__))
;
3347
3348 /* add url, in case it's missing */
3349 urldb_add_url(url);
3350
3351 p = urldb_find_url(url);
3352
3353 if (!p)
3354 return;
3355
3356 /* Search for host_part */
3357 for (pi = p; pi->parent != NULL((void*)0); pi = pi->parent)
3358 ;
3359 h = (struct host_part *)pi;
3360
3361 /* Search if given URL belongs to a protection space we already know of. */
3362 for (space = h->prot_space; space; space = space->next) {
3363 if (!strcmp(space->realm, realm) &&
3364 lwc_string_isequal(space->scheme, p->scheme,((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
3365 &match)((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
== lwc_error_ok &&
3366 match == true1 &&
3367 space->port == p->port)
3368 break;
3369 }
3370
3371 if (space != NULL((void*)0)) {
3372 /* Overrule existing auth. */
3373 free(space->auth);
3374 space->auth = strdup(auth);
3375 } else {
3376 /* Create a new protection space. */
3377 space = space_alloc = malloc(sizeof(struct prot_space_data));
3378 realm_alloc = strdup(realm);
3379 auth_alloc = strdup(auth);
3380
3381 if (!space_alloc || !realm_alloc || !auth_alloc) {
3382 free(space_alloc);
3383 free(realm_alloc);
3384 free(auth_alloc);
3385 return;
3386 }
3387
3388 space->scheme = lwc_string_ref(p->scheme)({lwc_string *__lwc_s = (p->scheme); ((__lwc_s != ((void*)
0)) ? (void) (0) : __assert_fail ("__lwc_s != NULL", "content/urldb.c"
, 3388, __extension__ __PRETTY_FUNCTION__)); __lwc_s->refcnt
++; __lwc_s;})
;
3389 space->port = p->port;
3390 space->realm = realm_alloc;
3391 space->auth = auth_alloc;
3392 space->next = h->prot_space;
3393 h->prot_space = space;
3394 }
3395
3396 p->prot_space = space;
3397}
3398
3399
3400/* exported interface documented in netsurf/url_db.h */
3401const char *urldb_get_auth_details(nsurl *url, const char *realm)
3402{
3403 struct path_data *p, *p_cur, *p_top;
3404
3405 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3405, __extension__ __PRETTY_FUNCTION__))
;
3406
3407 /* add to the db, so our lookup will work */
3408 urldb_add_url(url);
3409
3410 p = urldb_find_url(url);
3411 if (!p)
3412 return NULL((void*)0);
3413
3414 /* Check for any auth details attached to the path_data node or any of
3415 * its parents.
3416 */
3417 for (p_cur = p; p_cur != NULL((void*)0); p_top = p_cur, p_cur = p_cur->parent) {
3418 if (p_cur->prot_space) {
3419 return p_cur->prot_space->auth;
3420 }
3421 }
3422
3423 /* Only when we have a realm (and canonical root of given URL), we can
3424 * uniquely locate the protection space.
3425 */
3426 if (realm != NULL((void*)0)) {
3427 const struct host_part *h = (const struct host_part *)p_top;
3428 const struct prot_space_data *space;
3429 bool_Bool match;
3430
3431 /* Search for a possible matching protection space. */
3432 for (space = h->prot_space; space != NULL((void*)0);
3433 space = space->next) {
3434 if (!strcmp(space->realm, realm) &&
3435 lwc_string_isequal(space->scheme,((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
3436 p->scheme, &match)((*(&match) = ((space->scheme) == (p->scheme))), lwc_error_ok
)
==
3437 lwc_error_ok &&
3438 match == true1 &&
3439 space->port == p->port) {
3440 p->prot_space = space;
3441 return p->prot_space->auth;
3442 }
3443 }
3444 }
3445
3446 return NULL((void*)0);
3447}
3448
3449
3450/* exported interface documented in netsurf/url_db.h */
3451void urldb_set_cert_permissions(nsurl *url, bool_Bool permit)
3452{
3453 struct path_data *p;
3454 struct host_part *h;
3455
3456 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3456, __extension__ __PRETTY_FUNCTION__))
;
3457
3458 /* add url, in case it's missing */
3459 urldb_add_url(url);
3460
3461 p = urldb_find_url(url);
3462 if (!p)
3463 return;
3464
3465 for (; p && p->parent; p = p->parent)
3466 /* do nothing */;
3467 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3467
, __extension__ __PRETTY_FUNCTION__))
;
3468
3469 h = (struct host_part *)p;
3470
3471 h->permit_invalid_certs = permit;
3472}
3473
3474
3475/* exported interface documented in content/urldb.h */
3476bool_Bool urldb_get_cert_permissions(nsurl *url)
3477{
3478 struct path_data *p;
3479 const struct host_part *h;
3480
3481 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3481, __extension__ __PRETTY_FUNCTION__))
;
3482
3483 p = urldb_find_url(url);
3484 if (!p)
3485 return false0;
3486
3487 for (; p && p->parent; p = p->parent)
3488 /* do nothing */;
3489 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3489
, __extension__ __PRETTY_FUNCTION__))
;
3490
3491 h = (const struct host_part *)p;
3492
3493 return h->permit_invalid_certs;
3494}
3495
3496
3497/* exported interface documented in content/urldb.h */
3498bool_Bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
3499{
3500 struct path_data *p;
3501 struct host_part *h;
3502 lwc_string *host;
3503 time_t now = time(NULL((void*)0));
3504 http_strict_transport_security *sts;
3505 uint32_t max_age = 0;
3506 nserror error;
3507
3508 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3508, __extension__ __PRETTY_FUNCTION__))
;
3509
3510 host = nsurl_get_component(url, NSURL_HOST);
3511 if (host != NULL((void*)0)) {
3512 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3512, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3513 /* Host is IP: ignore */
3514 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3515 return true1;
3516 } else if (lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3516, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
== 0) {
3517 /* Host is blank: ignore */
3518 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3519 return true1;
3520 }
3521
3522 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3523 } else {
3524 /* No host part: ignore */
3525 return true1;
3526 }
3527
3528 /* add url, in case it's missing */
3529 urldb_add_url(url);
3530
3531 p = urldb_find_url(url);
3532 if (!p)
3533 return false0;
3534
3535 for (; p && p->parent; p = p->parent)
3536 /* do nothing */;
3537 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3537
, __extension__ __PRETTY_FUNCTION__))
;
3538
3539 h = (struct host_part *)p;
3540 if (h->permit_invalid_certs) {
3541 /* Transport is tainted: ignore */
3542 return true1;
3543 }
3544
3545 error = http_parse_strict_transport_security(header, &sts);
3546 if (error != NSERROR_OK) {
3547 /* Parse failed: ignore */
3548 return true1;
3549 }
3550
3551 h->hsts.include_sub_domains =
3552 http_strict_transport_security_include_subdomains(sts);
3553
3554 max_age = http_strict_transport_security_max_age(sts);
3555 if (max_age == 0) {
3556 h->hsts.expires = 0;
3557 h->hsts.include_sub_domains = false0;
3558 } else if ((time_t) (now + max_age) > h->hsts.expires) {
3559 h->hsts.expires = now + max_age;
3560 }
3561
3562 http_strict_transport_security_destroy(sts);
3563
3564 return true1;
3565}
3566
3567
3568/* exported interface documented in content/urldb.h */
3569bool_Bool urldb_get_hsts_enabled(struct nsurl *url)
3570{
3571 struct path_data *p;
3572 const struct host_part *h;
3573 lwc_string *host;
3574 time_t now = time(NULL((void*)0));
3575
3576 assert(url)((url) ? (void) (0) : __assert_fail ("url", "content/urldb.c"
, 3576, __extension__ __PRETTY_FUNCTION__))
;
3577
3578 host = nsurl_get_component(url, NSURL_HOST);
3579 if (host != NULL((void*)0)) {
3580 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3580, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3581 /* Host is IP: not enabled */
3582 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3583 return false0;
3584 } else if (lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3584, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
== 0) {
3585 /* Host is blank: not enabled */
3586 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3587 return false0;
3588 }
3589
3590 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3591 } else {
3592 /* No host part: not enabled */
3593 return false0;
3594 }
3595
3596 /* The URL must exist in the db in order to find HSTS policy, since
3597 * we search up the tree from the URL node, and policy from further
3598 * up may also apply. */
3599 urldb_add_url(url);
3600
3601 p = urldb_find_url(url);
3602 if (!p)
3603 return false0;
3604
3605 for (; p && p->parent; p = p->parent)
3606 /* do nothing */;
3607 assert(p)((p) ? (void) (0) : __assert_fail ("p", "content/urldb.c", 3607
, __extension__ __PRETTY_FUNCTION__))
;
3608
3609 h = (const struct host_part *)p;
3610
3611 /* Consult record for this host */
3612 if (h->hsts.expires > now) {
3613 /* Not expired */
3614 return true1;
3615 }
3616
3617 /* Consult parent domains */
3618 for (h = h->parent; h && h != &db_root; h = h->parent) {
3619 if (h->hsts.expires > now && h->hsts.include_sub_domains) {
3620 /* Not expired and subdomains included */
3621 return true1;
3622 }
3623 }
3624
3625 return false0;
3626}
3627
3628
3629/* exported interface documented in netsurf/url_db.h */
3630void
3631urldb_iterate_partial(const char *prefix,
3632 bool_Bool (*callback)(nsurl *url, const struct url_data *data))
3633{
3634 char host[256];
3635 char buf[260]; /* max domain + "www." */
3636 const char *slash, *scheme_sep;
3637 struct search_node *tree;
3638 const struct host_part *h;
3639
3640 assert(prefix && callback)((prefix && callback) ? (void) (0) : __assert_fail ("prefix && callback"
, "content/urldb.c", 3640, __extension__ __PRETTY_FUNCTION__)
)
;
3641
3642 /* strip scheme */
3643 scheme_sep = strstr(prefix, "://");
3644 if (scheme_sep)
3645 prefix = scheme_sep + 3;
3646
3647 slash = strchr(prefix, '/');
3648 tree = urldb_get_search_tree(prefix);
3649
3650 if (slash) {
3651 /* if there's a slash in the input, then we can
3652 * assume that we're looking for a path */
3653 snprintf(host, sizeof host, "%.*s",
3654 (int) (slash - prefix), prefix);
3655
3656 h = urldb_search_find(tree, host);
3657 if (!h) {
3658 int len = slash - prefix;
3659
3660 if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
3661 snprintf(buf, sizeof buf, "www.%s", host);
3662 h = urldb_search_find(
3663 search_trees[ST_DN2 + 'w' - 'a'],
3664 buf);
3665 if (!h)
3666 return;
3667 } else
3668 return;
3669 }
3670
3671 if (h->paths.children) {
3672 /* Have paths, iterate them */
3673 urldb_iterate_partial_path(&h->paths, slash + 1,
3674 callback);
3675 }
3676
3677 } else {
3678 int len = strlen(prefix);
3679
3680 /* looking for hosts */
3681 if (!urldb_iterate_partial_host(tree, prefix, callback))
3682 return;
3683
3684 if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
3685 /* now look for www.prefix */
3686 snprintf(buf, sizeof buf, "www.%s", prefix);
3687 if(!urldb_iterate_partial_host(
3688 search_trees[ST_DN2 + 'w' - 'a'],
3689 buf, callback))
3690 return;
3691 }
3692 }
3693}
3694
3695
3696/* exported interface documented in netsurf/url_db.h */
3697void
3698urldb_iterate_entries(bool_Bool (*callback)(nsurl *url, const struct url_data *data))
3699{
3700 int i;
3701
3702 assert(callback)((callback) ? (void) (0) : __assert_fail ("callback", "content/urldb.c"
, 3702, __extension__ __PRETTY_FUNCTION__))
;
3703
3704 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
3705 if (!urldb_iterate_entries_host(search_trees[i],
3706 callback,
3707 NULL((void*)0))) {
3708 break;
3709 }
3710 }
3711}
3712
3713
3714/* exported interface documented in content/urldb.h */
3715void urldb_iterate_cookies(bool_Bool (*callback)(const struct cookie_data *data))
3716{
3717 int i;
3718
3719 assert(callback)((callback) ? (void) (0) : __assert_fail ("callback", "content/urldb.c"
, 3719, __extension__ __PRETTY_FUNCTION__))
;
3720
3721 for (i = 0; i < NUM_SEARCH_TREES28; i++) {
3722 if (!urldb_iterate_entries_host(search_trees[i],
3723 NULL((void*)0), callback))
3724 break;
3725 }
3726}
3727
3728
3729/* exported interface documented in content/urldb.h */
3730bool_Bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
3731{
3732 const char *cur = header, *end;
3733 lwc_string *path, *host, *scheme;
3734 nsurl *urlt;
3735 bool_Bool match;
3736
3737 assert(url && header)((url && header) ? (void) (0) : __assert_fail ("url && header"
, "content/urldb.c", 3737, __extension__ __PRETTY_FUNCTION__)
)
;
3738
3739 /* Get defragmented URL, as 'urlt' */
3740 if (nsurl_defragment(url, &urlt) != NSERROR_OK)
3741 return NULL((void*)0);
3742
3743 scheme = nsurl_get_component(url, NSURL_SCHEME);
3744 if (scheme == NULL((void*)0)) {
3745 nsurl_unref(urlt);
3746 return false0;
3747 }
3748
3749 path = nsurl_get_component(url, NSURL_PATH);
3750 if (path == NULL((void*)0)) {
3751 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3752 nsurl_unref(urlt);
3753 return false0;
3754 }
3755
3756 host = nsurl_get_component(url, NSURL_HOST);
3757 if (host == NULL((void*)0)) {
3758 lwc_string_unref(path){ lwc_string *__lwc_s = (path); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3759 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3760 nsurl_unref(urlt);
3761 return false0;
3762 }
3763
3764 if (referer) {
3765 lwc_string *rhost;
3766
3767 /* Ensure that url's host name domain matches
3768 * referer's (4.3.5) */
3769 rhost = nsurl_get_component(referer, NSURL_HOST);
3770 if (rhost == NULL((void*)0)) {
3771 goto error;
3772 }
3773
3774 /* Domain match host names */
3775 if (lwc_string_isequal(host, rhost, &match)((*(&match) = ((host) == (rhost))), lwc_error_ok) == lwc_error_ok &&
3776 match == false0) {
3777 const char *hptr;
3778 const char *rptr;
3779 const char *dot;
3780 const char *host_data = lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3780, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
;
3781 const char *rhost_data = lwc_string_data(rhost)({((rhost != ((void*)0)) ? (void) (0) : __assert_fail ("rhost != NULL"
, "content/urldb.c", 3781, __extension__ __PRETTY_FUNCTION__)
); (const char *)((rhost)+1);})
;
3782
3783 /* Ensure neither host nor rhost are IP addresses */
3784 if (urldb__host_is_ip_address(host_data) ||
3785 urldb__host_is_ip_address(rhost_data)) {
3786 /* IP address, so no partial match */
3787 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3788 goto error;
3789 }
3790
3791 /* Not exact match, so try the following:
3792 *
3793 * 1) Find the longest common suffix of host and rhost
3794 * (may be all of host/rhost)
3795 * 2) Discard characters from the start of the suffix
3796 * until the suffix starts with a dot
3797 * (prevents foobar.com matching bar.com)
3798 * 3) Ensure the suffix is non-empty and contains
3799 * embedded dots (to avoid permitting .com as a
3800 * suffix)
3801 *
3802 * Note that the above in no way resembles the
3803 * domain matching algorithm found in RFC2109.
3804 * It does, however, model the real world rather
3805 * more accurately.
3806 */
3807
3808 /** \todo In future, we should consult a TLD service
3809 * instead of just looking for embedded dots.
3810 */
3811
3812 hptr = host_data + lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3812, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
- 1;
3813 rptr = rhost_data + lwc_string_length(rhost)({((rhost != ((void*)0)) ? (void) (0) : __assert_fail ("rhost != NULL"
, "content/urldb.c", 3813, __extension__ __PRETTY_FUNCTION__)
); (rhost)->len;})
- 1;
3814
3815 /* 1 */
3816 while (hptr >= host_data && rptr >= rhost_data) {
3817 if (*hptr != *rptr)
3818 break;
3819 hptr--;
3820 rptr--;
3821 }
3822 /* Ensure we end up pointing at the start of the
3823 * common suffix. The above loop will exit pointing
3824 * to the byte before the start of the suffix. */
3825 hptr++;
3826
3827 /* 2 */
3828 while (*hptr != '\0' && *hptr != '.')
3829 hptr++;
3830
3831 /* 3 */
3832 if (*hptr == '\0' ||
3833 (dot = strchr(hptr + 1, '.')) == NULL((void*)0) ||
3834 *(dot + 1) == '\0') {
3835 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3836 goto error;
3837 }
3838 }
3839
3840 lwc_string_unref(rhost){ lwc_string *__lwc_s = (rhost); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3841 }
3842
3843 end = cur + strlen(cur) - 2 /* Trailing CRLF */;
3844
3845 do {
3846 struct cookie_internal_data *c;
3847 char *dot;
3848 size_t len;
3849#ifdef WITH_NSPSL1
3850 const char *suffix;
3851#endif
3852
3853 c = urldb_parse_cookie(url, &cur);
3854 if (!c) {
3855 /* failed => stop parsing */
3856 goto error;
3857 }
3858
3859 /* validate cookie */
3860
3861 /* 4.2.2:i Cookie must have NAME and VALUE */
3862 if (!c->name || !c->value) {
3863 urldb_free_cookie(c);
3864 goto error;
3865 }
3866
3867 /* 4.3.2:i Cookie path must be a prefix of URL path */
3868 len = strlen(c->path);
3869 if (len > lwc_string_length(path)({((path != ((void*)0)) ? (void) (0) : __assert_fail ("path != NULL"
, "content/urldb.c", 3869, __extension__ __PRETTY_FUNCTION__)
); (path)->len;})
||
3870 strncmp(c->path, lwc_string_data(path)({((path != ((void*)0)) ? (void) (0) : __assert_fail ("path != NULL"
, "content/urldb.c", 3870, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path)+1);})
,
3871 len) != 0) {
3872 urldb_free_cookie(c);
3873 goto error;
3874 }
3875
3876#ifdef WITH_NSPSL1
3877 /* check domain is not a public suffix */
3878 dot = c->domain;
3879 if (*dot == '.') {
3880 dot++;
3881 }
3882 suffix = nspsl_getpublicsuffix(dot);
3883 if (suffix == NULL((void*)0)) {
3884 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3885
, }; nslog__log(&_nslog_ctx, "domain %s was a public suffix domain"
, dot); } } while(0)
3885 "domain %s was a public suffix domain", dot)do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 3885
, }; nslog__log(&_nslog_ctx, "domain %s was a public suffix domain"
, dot); } } while(0)
;
3886 urldb_free_cookie(c);
3887 goto error;
3888 }
3889#else
3890 /* 4.3.2:ii Cookie domain must contain embedded dots */
3891 dot = strchr(c->domain + 1, '.');
3892 if (!dot || *(dot + 1) == '\0') {
3893 /* no embedded dots */
3894 urldb_free_cookie(c);
3895 goto error;
3896 }
3897#endif
3898
3899 /* Domain match fetch host with cookie domain */
3900 if (strcasecmp(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3900, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
, c->domain) != 0) {
3901 int hlen, dlen;
3902 char *domain = c->domain;
3903
3904 /* c->domain must be a domain cookie here because:
3905 * c->domain is either:
3906 * + specified in the header as a domain cookie
3907 * (non-domain cookies in the header are ignored
3908 * by urldb_parse_cookie / urldb_parse_avpair)
3909 * + defaulted to the URL's host part
3910 * (by urldb_parse_cookie if no valid domain was
3911 * specified in the header)
3912 *
3913 * The latter will pass the strcasecmp above, which
3914 * leaves the former (i.e. a domain cookie)
3915 */
3916 assert(c->domain[0] == '.')((c->domain[0] == '.') ? (void) (0) : __assert_fail ("c->domain[0] == '.'"
, "content/urldb.c", 3916, __extension__ __PRETTY_FUNCTION__)
)
;
3917
3918 /* 4.3.2:iii */
3919 if (urldb__host_is_ip_address(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3919, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
)) {
3920 /* IP address, so no partial match */
3921 urldb_free_cookie(c);
3922 goto error;
3923 }
3924
3925 hlen = lwc_string_length(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3925, __extension__ __PRETTY_FUNCTION__)
); (host)->len;})
;
3926 dlen = strlen(c->domain);
3927
3928 if (hlen <= dlen && hlen != dlen - 1) {
3929 /* Partial match not possible */
3930 urldb_free_cookie(c);
3931 goto error;
3932 }
3933
3934 if (hlen == dlen - 1) {
3935 /* Relax matching to allow
3936 * host a.com to match .a.com */
3937 domain++;
3938 dlen--;
3939 }
3940
3941 if (strcasecmp(lwc_string_data(host)({((host != ((void*)0)) ? (void) (0) : __assert_fail ("host != NULL"
, "content/urldb.c", 3941, __extension__ __PRETTY_FUNCTION__)
); (const char *)((host)+1);})
+ (hlen - dlen),
3942 domain)) {
3943 urldb_free_cookie(c);
3944 goto error;
3945 }
3946
3947 /* 4.3.2:iv Ensure H contains no dots
3948 *
3949 * If you believe the spec, H should contain no
3950 * dots in _any_ cookie. Unfortunately, however,
3951 * reality differs in that many sites send domain
3952 * cookies of the form .foo.com from hosts such
3953 * as bar.bat.foo.com and then expect domain
3954 * matching to work. Thus we have to do what they
3955 * expect, regardless of any potential security
3956 * implications.
3957 *
3958 * This is what code conforming to the spec would
3959 * look like:
3960 *
3961 * for (int i = 0; i < (hlen - dlen); i++) {
3962 * if (host[i] == '.') {
3963 * urldb_free_cookie(c);
3964 * goto error;
3965 * }
3966 * }
3967 */
3968 }
3969
3970 /* Now insert into database */
3971 if (!urldb_insert_cookie(c, scheme, urlt))
3972 goto error;
3973 } while (cur < end);
3974
3975 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3976 lwc_string_unref(path){ lwc_string *__lwc_s = (path); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3977 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3978 nsurl_unref(urlt);
3979
3980 return true1;
3981
3982error:
3983 lwc_string_unref(host){ lwc_string *__lwc_s = (host); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3984 lwc_string_unref(path){ lwc_string *__lwc_s = (path); if (__lwc_s != ((void*)0)) { __lwc_s
->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s->
refcnt == 1) && (__lwc_s->insensitive == __lwc_s))
) lwc_string_destroy(__lwc_s); } }
;
3985 lwc_string_unref(scheme){ lwc_string *__lwc_s = (scheme); if (__lwc_s != ((void*)0)) {
__lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((__lwc_s
->refcnt == 1) && (__lwc_s->insensitive == __lwc_s
))) lwc_string_destroy(__lwc_s); } }
;
3986 nsurl_unref(urlt);
3987
3988 return false0;
3989}
3990
3991
3992/* exported interface documented in content/urldb.h */
3993char *urldb_get_cookie(nsurl *url, bool_Bool include_http_only)
3994{
3995 const struct path_data *p, *q;
3996 const struct host_part *h;
3997 lwc_string *path_lwc;
3998 struct cookie_internal_data *c;
3999 int count = 0, version = COOKIE_RFC2965;
4000 struct cookie_internal_data **matched_cookies;
4001 int matched_cookies_size = 20;
4002 int ret_alloc = 4096, ret_used = 1;
4003 const char *path;
4004 char *ret;
4005 lwc_string *scheme;
4006 time_t now;
4007 int i;
4008 bool_Bool match;
4009
4010 assert(url != NULL)((url != ((void*)0)) ? (void) (0) : __assert_fail ("url != NULL"
, "content/urldb.c", 4010, __extension__ __PRETTY_FUNCTION__)
)
;
4011
4012 /* The URL must exist in the db in order to find relevant cookies, since
4013 * we search up the tree from the URL node, and cookies from further
4014 * up also apply. */
4015 urldb_add_url(url);
4016
4017 p = urldb_find_url(url);
4018 if (!p)
4019 return NULL((void*)0);
4020
4021 scheme = p->scheme;
4022
4023 matched_cookies = malloc(matched_cookies_size *
4024 sizeof(struct cookie_internal_data *));
4025 if (!matched_cookies)
4026 return NULL((void*)0);
4027
4028#define GROW_MATCHED_COOKIES \
4029 do { \
4030 if (count == matched_cookies_size) { \
4031 struct cookie_internal_data **temp; \
4032 temp = realloc(matched_cookies, \
4033 (matched_cookies_size + 20) * \
4034 sizeof(struct cookie_internal_data *)); \
4035 \
4036 if (temp == NULL((void*)0)) { \
4037 free(ret); \
4038 free(matched_cookies); \
4039 return NULL((void*)0); \
4040 } \
4041 \
4042 matched_cookies = temp; \
4043 matched_cookies_size += 20; \
4044 } \
4045 } while(0)
4046
4047 ret = malloc(ret_alloc);
4048 if (!ret) {
4049 free(matched_cookies);
4050 return NULL((void*)0);
4051 }
4052
4053 ret[0] = '\0';
4054
4055 path_lwc = nsurl_get_component(url, NSURL_PATH);
4056 if (path_lwc == NULL((void*)0)) {
4057 free(ret);
4058 free(matched_cookies);
4059 return NULL((void*)0);
4060 }
4061 path = lwc_string_data(path_lwc)({((path_lwc != ((void*)0)) ? (void) (0) : __assert_fail ("path_lwc != NULL"
, "content/urldb.c", 4061, __extension__ __PRETTY_FUNCTION__)
); (const char *)((path_lwc)+1);})
;
4062 lwc_string_unref(path_lwc){ lwc_string *__lwc_s = (path_lwc); if (__lwc_s != ((void*)0)
) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || ((
__lwc_s->refcnt == 1) && (__lwc_s->insensitive ==
__lwc_s))) lwc_string_destroy(__lwc_s); } }
;
4063
4064 now = time(NULL((void*)0));
4065
4066 if (*(p->segment) != '\0') {
4067 /* Match exact path, unless directory, when prefix matching
4068 * will handle this case for us. */
4069 for (q = p->parent->children; q; q = q->next) {
4070 if (strcmp(q->segment, p->segment))
4071 continue;
4072
4073 /* Consider all cookies associated with
4074 * this exact path */
4075 for (c = q->cookies; c; c = c->next) {
4076 if (c->expires != -1 && c->expires < now)
4077 /* cookie has expired => ignore */
4078 continue;
4079
4080 if (c->secure && lwc_string_isequal(((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4081 q->scheme,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4082 corestring_lwc_https,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4083 &match)((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4084 match == false0)
4085 /* secure cookie for insecure host.
4086 * ignore */
4087 continue;
4088
4089 if (c->http_only && !include_http_only)
4090 /* Ignore HttpOnly */
4091 continue;
4092
4093 matched_cookies[count++] = c;
4094
4095 GROW_MATCHED_COOKIES;
4096
4097 if (c->version < (unsigned int)version)
4098 version = c->version;
4099
4100 c->last_used = now;
4101
4102 cookie_manager_add((struct cookie_data *)c);
4103 }
4104 }
4105 }
4106
4107 /* Now consider cookies whose paths prefix-match ours */
4108 for (p = p->parent; p; p = p->parent) {
4109 /* Find directory's path entry(ies) */
4110 /* There are potentially multiple due to differing schemes */
4111 for (q = p->children; q; q = q->next) {
4112 if (*(q->segment) != '\0')
4113 continue;
4114
4115 for (c = q->cookies; c; c = c->next) {
4116 if (c->expires != -1 && c->expires < now)
4117 /* cookie has expired => ignore */
4118 continue;
4119
4120 if (c->secure && lwc_string_isequal(((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4121 q->scheme,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4122 corestring_lwc_https,((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4123 &match)((*(&match) = ((q->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4124 match == false0)
4125 /* Secure cookie for insecure server
4126 * => ignore */
4127 continue;
4128
4129 matched_cookies[count++] = c;
4130
4131 GROW_MATCHED_COOKIES;
4132
4133 if (c->version < (unsigned int) version)
4134 version = c->version;
4135
4136 c->last_used = now;
4137
4138 cookie_manager_add((struct cookie_data *)c);
4139 }
4140 }
4141
4142 if (!p->parent) {
4143 /* No parent, so bail here. This can't go in
4144 * the loop exit condition as we also want to
4145 * process the top-level node.
4146 *
4147 * If p->parent is NULL then p->cookies are
4148 * the domain cookies and thus we don't even
4149 * try matching against them.
4150 */
4151 break;
4152 }
4153
4154 /* Consider p itself - may be the result of Path=/foo */
4155 for (c = p->cookies; c; c = c->next) {
4156 if (c->expires != -1 && c->expires < now)
4157 /* cookie has expired => ignore */
4158 continue;
4159
4160 /* Ensure cookie path is a prefix of the resource */
4161 if (strncmp(c->path, path, strlen(c->path)) != 0)
4162 /* paths don't match => ignore */
4163 continue;
4164
4165 if (c->secure && lwc_string_isequal(p->scheme,((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4166 corestring_lwc_https,((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
4167 &match)((*(&match) = ((p->scheme) == (corestring_lwc_https)))
, lwc_error_ok)
&&
4168 match == false0)
4169 /* Secure cookie for insecure server
4170 * => ignore */
4171 continue;
4172
4173 matched_cookies[count++] = c;
4174
4175 GROW_MATCHED_COOKIES;
4176
4177 if (c->version < (unsigned int) version)
4178 version = c->version;
4179
4180 c->last_used = now;
4181
4182 cookie_manager_add((struct cookie_data *)c);
4183 }
4184
4185 }
4186
4187 /* Finally consider domain cookies for hosts which domain match ours */
4188 for (h = (const struct host_part *)p; h && h != &db_root;
4189 h = h->parent) {
4190 for (c = h->paths.cookies; c; c = c->next) {
4191 if (c->expires != -1 && c->expires < now)
4192 /* cookie has expired => ignore */
4193 continue;
4194
4195 /* Ensure cookie path is a prefix of the resource */
4196 if (strncmp(c->path, path, strlen(c->path)) != 0)
4197 /* paths don't match => ignore */
4198 continue;
4199
4200 if (c->secure && lwc_string_isequal(scheme,((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
4201 corestring_lwc_https,((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
4202 &match)((*(&match) = ((scheme) == (corestring_lwc_https))), lwc_error_ok
)
&&
4203 match == false0)
4204 /* secure cookie for insecure host. ignore */
4205 continue;
4206
4207 matched_cookies[count++] = c;
4208
4209 GROW_MATCHED_COOKIES;
4210
4211 if (c->version < (unsigned int)version)
4212 version = c->version;
4213
4214 c->last_used = now;
4215
4216 cookie_manager_add((struct cookie_data *)c);
4217 }
4218 }
4219
4220 if (count == 0) {
4221 /* No cookies found */
4222 free(ret);
4223 free(matched_cookies);
4224 return NULL((void*)0);
4225 }
4226
4227 /* and build output string */
4228 if (version > COOKIE_NETSCAPE) {
4229 sprintf(ret, "$Version=%d", version);
4230 ret_used = strlen(ret) + 1;
4231 }
4232
4233 for (i = 0; i < count; i++) {
4234 if (!urldb_concat_cookie(matched_cookies[i], version,
4235 &ret_used, &ret_alloc, &ret)) {
4236 free(ret);
4237 free(matched_cookies);
4238 return NULL((void*)0);
4239 }
4240 }
4241
4242 if (version == COOKIE_NETSCAPE) {
4243 /* Old-style cookies => no version & skip "; " */
4244 memmove(ret, ret + 2, ret_used - 2);
4245 ret_used -= 2;
4246 }
4247
4248 /* Now, shrink the output buffer to the required size */
4249 {
4250 char *temp = realloc(ret, ret_used);
4251 if (!temp) {
4252 free(ret);
4253 free(matched_cookies);
4254 return NULL((void*)0);
4255 }
4256
4257 ret = temp;
4258 }
4259
4260 free(matched_cookies);
4261
4262 return ret;
4263
4264#undef GROW_MATCHED_COOKIES
4265}
4266
4267
4268/* exported interface documented in content/urldb.h */
4269void urldb_delete_cookie(const char *domain, const char *path,
4270 const char *name)
4271{
4272 urldb_delete_cookie_hosts(domain, path, name, &db_root);
4273}
4274
4275
4276/* exported interface documented in content/urldb.h */
4277void urldb_load_cookies(const char *filename)
4278{
4279 FILE *fp;
4280 char s[16*1024];
4281
4282 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 4282, __extension__ __PRETTY_FUNCTION__))
;
4283
4284 fp = fopen(filename, "r");
4285 if (!fp)
4286 return;
4287
4288#define FIND_T { \
4289 for (; *p && *p != '\t'; p++) \
4290 ; /* do nothing */ \
4291 if (p >= end) { \
4292 NSLOG(netsurf, INFO, "Overran input")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4292
, }; nslog__log(&_nslog_ctx, "Overran input"); } } while(
0)
; \
4293 continue; \
4294 } \
4295 *p++ = '\0'; \
4296 }
4297
4298#define SKIP_T { \
4299 for (; *p && *p == '\t'; p++) \
4300 ; /* do nothing */ \
4301 if (p >= end) { \
4302 NSLOG(netsurf, INFO, "Overran input")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4302
, }; nslog__log(&_nslog_ctx, "Overran input"); } } while(
0)
; \
4303 continue; \
4304 } \
4305 }
4306
4307 while (fgets(s, sizeof s, fp)) {
4308 char *p = s, *end = 0,
4309 *domain, *path, *name, *value, *scheme, *url,
4310 *comment;
4311 int version, domain_specified, path_specified,
4312 secure, http_only, no_destroy, value_quoted;
4313 time_t expires, last_used;
4314 struct cookie_internal_data *c;
4315
4316 if(s[0] == 0 || s[0] == '#')
4317 /* Skip blank lines or comments */
4318 continue;
4319
4320 s[strlen(s) - 1] = '\0'; /* lose terminating newline */
4321 end = s + strlen(s);
4322
4323 /* Look for file version first
4324 * (all input is ignored until this is read)
4325 */
4326 if (strncasecmp(s, "Version:", 8) == 0) {
4327 FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
4328
4329 if (loaded_cookie_file_version <
4330 MIN_COOKIE_FILE_VERSION100) {
4331 NSLOG(netsurf, INFO,do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4332
, }; nslog__log(&_nslog_ctx, "Unsupported Cookie file version"
); } } while(0)
4332 "Unsupported Cookie file version")do { if (NSLOG_LEVEL_INFO >= NSLOG_LEVEL_VERBOSE) { static
nslog_entry_context_t _nslog_ctx = { &__nslog_category_netsurf
, NSLOG_LEVEL_INFO, "content/urldb.c", sizeof("content/urldb.c"
) - 1, __PRETTY_FUNCTION__, sizeof(__PRETTY_FUNCTION__) - 1, 4332
, }; nslog__log(&_nslog_ctx, "Unsupported Cookie file version"
); } } while(0)
;
4333 break;
4334 }
4335
4336 continue;
4337 } else if (loaded_cookie_file_version == 0) {
4338 /* Haven't yet seen version; skip this input */
4339 continue;
4340 }
4341
4342 /* One cookie/line */
4343
4344 /* Parse input */
4345 FIND_T; version = atoi(s);
4346 SKIP_T; domain = p; FIND_T;
4347 SKIP_T; domain_specified = atoi(p); FIND_T;
4348 SKIP_T; path = p; FIND_T;
4349 SKIP_T; path_specified = atoi(p); FIND_T;
4350 SKIP_T; secure = atoi(p); FIND_T;
4351 if (loaded_cookie_file_version > 101) {
4352 /* Introduced in version 1.02 */
4353 SKIP_T; http_only = atoi(p); FIND_T;
4354 } else {
4355 http_only = 0;
4356 }
4357 SKIP_T; expires = (time_t)atoi(p); FIND_T;
4358 SKIP_T; last_used = (time_t)atoi(p); FIND_T;
4359 SKIP_T; no_destroy = atoi(p); FIND_T;
4360 SKIP_T; name = p; FIND_T;
4361 SKIP_T; value = p; FIND_T;
4362 if (loaded_cookie_file_version > 100) {
4363 /* Introduced in version 1.01 */
4364 SKIP_T; value_quoted = atoi(p); FIND_T;
4365 } else {
4366 value_quoted = 0;
4367 }
4368 SKIP_T; scheme = p; FIND_T;
4369 SKIP_T; url = p; FIND_T;
4370
4371 /* Comment may have no content, so don't
4372 * use macros as they'll break */
4373 for (; *p && *p == '\t'; p++)
4374 ; /* do nothing */
4375 comment = p;
4376
4377 assert(p <= end)((p <= end) ? (void) (0) : __assert_fail ("p <= end", "content/urldb.c"
, 4377, __extension__ __PRETTY_FUNCTION__))
;
4378
4379 /* Now create cookie */
4380 c = malloc(sizeof(struct cookie_internal_data));
4381 if (!c)
4382 break;
4383
4384 c->name = strdup(name);
4385 c->value = strdup(value);
4386 c->value_was_quoted = value_quoted;
4387 c->comment = strdup(comment);
4388 c->domain_from_set = domain_specified;
4389 c->domain = strdup(domain);
4390 c->path_from_set = path_specified;
4391 c->path = strdup(path);
4392 c->expires = expires;
4393 c->last_used = last_used;
4394 c->secure = secure;
4395 c->http_only = http_only;
4396 c->version = version;
4397 c->no_destroy = no_destroy;
4398
4399 if (!(c->name && c->value && c->comment &&
4400 c->domain && c->path)) {
4401 urldb_free_cookie(c);
4402 break;
4403 }
4404
4405 if (c->domain[0] != '.') {
4406 lwc_string *scheme_lwc = NULL((void*)0);
4407 nsurl *url_nsurl = NULL((void*)0);
4408
4409 assert(scheme[0] != 'u')((scheme[0] != 'u') ? (void) (0) : __assert_fail ("scheme[0] != 'u'"
, "content/urldb.c", 4409, __extension__ __PRETTY_FUNCTION__)
)
;
4410
4411 if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
4412 urldb_free_cookie(c);
4413 break;
4414 }
4415 scheme_lwc = nsurl_get_component(url_nsurl,
4416 NSURL_SCHEME);
4417
4418 /* And insert it into database */
4419 if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
4420 /* Cookie freed for us */
4421 nsurl_unref(url_nsurl);
4422 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); if (__lwc_s != ((void*)
0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || (
(__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
4423 break;
4424 }
4425 nsurl_unref(url_nsurl);
4426 lwc_string_unref(scheme_lwc){ lwc_string *__lwc_s = (scheme_lwc); if (__lwc_s != ((void*)
0)) { __lwc_s->refcnt--; if ((__lwc_s->refcnt == 0) || (
(__lwc_s->refcnt == 1) && (__lwc_s->insensitive
== __lwc_s))) lwc_string_destroy(__lwc_s); } }
;
4427
4428 } else {
4429 if (!urldb_insert_cookie(c, NULL((void*)0), NULL((void*)0))) {
4430 /* Cookie freed for us */
4431 break;
4432 }
4433 }
4434 }
4435
4436#undef SKIP_T
4437#undef FIND_T
4438
4439 fclose(fp);
4440}
4441
4442
4443/* exported interface documented in content/urldb.h */
4444void urldb_save_cookies(const char *filename)
4445{
4446 FILE *fp;
4447 int cookie_file_version = max(loaded_cookie_file_version,(((loaded_cookie_file_version)>(102))?(loaded_cookie_file_version
):(102))
4448 COOKIE_FILE_VERSION)(((loaded_cookie_file_version)>(102))?(loaded_cookie_file_version
):(102))
;
4449
4450 assert(filename)((filename) ? (void) (0) : __assert_fail ("filename", "content/urldb.c"
, 4450, __extension__ __PRETTY_FUNCTION__))
;
4451
4452 fp = fopen(filename, "w");
4453 if (!fp)
4454 return;
4455
4456 fprintf(fp, "# NetSurf cookies file.\n"
4457 "#\n"
4458 "# Lines starting with a '#' are comments, "
4459 "blank lines are ignored.\n"
4460 "#\n"
4461 "# All lines prior to \"Version:\t%d\" are discarded.\n"
4462 "#\n"
4463 "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
4464 "Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
4465 "No destroy\tName\tValue\tValue was quoted\tScheme\t"
4466 "URL\tComment\n",
4467 cookie_file_version);
4468 fprintf(fp, "Version:\t%d\n", cookie_file_version);
4469
4470 urldb_save_cookie_hosts(fp, &db_root);
4471
4472 fclose(fp);
4473}
4474
4475
4476/* exported interface documented in netsurf/url_db.h */
4477void urldb_dump(void)
4478{
4479 int i;
4480
4481 urldb_dump_hosts(&db_root);
4482
4483 for (i = 0; i != NUM_SEARCH_TREES28; i++) {
4484 urldb_dump_search(search_trees[i], 0);
4485 }
4486}
4487
4488
4489
4490