docs-netsurf/doxygen/urldb_8c_source.html

/*

 * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>

 * Copyright 2009 John Tytgat <joty@netsurf-browser.org>

 *

 * This file is part of NetSurf, http://www.netsurf-browser.org/

 *

 * NetSurf is free software; you can redistribute it and/or modify

 * it under the terms of the GNU General Public License as published by

 * the Free Software Foundation; version 2 of the License.

 *

 * NetSurf is distributed in the hope that it will be useful,

 * but WITHOUT ANY WARRANTY; without even the implied warranty of

 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 * GNU General Public License for more details.

 *

 * You should have received a copy of the GNU General Public License

 * along with this program.  If not, see <http://www.gnu.org/licenses/>.

 */


/**

 * \file

 * Unified URL information database implementation

 *

 * URLs are stored in a tree-based structure as follows:

 *

 * The host component is extracted from each URL and, if a FQDN, split on

 * every '.'.The tree is constructed by inserting each FQDN segment in

 * reverse order. Duplicate nodes are merged.

 *

 * If the host part of an URL is an IP address, then this is added to the

 * tree verbatim (as if it were a TLD).

 *

 * This provides something looking like:

 *

 *                            root (a sentinel)

 *                              |

 *      -------------------------------------------------

 *      |       |       |       |       |       |       |

 *     com     edu     gov  127.0.0.1  net     org     uk       TLDs

 *      |       |       |               |       |       |

 *    google   ...     ...             ...     ...     co       2LDs

 *      |                                               |

 *     www                                             bbc  Hosts/Subdomains

 *                                                      |

 *                                                     www      ...

 *

 * Each of the nodes in this tree is a struct host_part. This stores the

 * FQDN segment (or IP address) with which the node is concerned. Each node

 * may contain further information about paths on a host (struct path_data)

 * or SSL certificate processing on a host-wide basis

 * (host_part::permit_invalid_certs).

 *

 * Path data is concerned with storing various metadata about the path in

 * question. This includes global history data, HTTP authentication details

 * and any associated HTTP cookies. This is stored as a tree of path segments

 * hanging off the relevant host_part node.

 *

 * Therefore, to find the last visited time of the URL

 * http://www.example.com/path/to/resource.html, the FQDN tree would be

 * traversed in the order root -> "com" -> "example" -> "www". The "www"

 * node would have attached to it a tree of struct path_data:

 *

 *                          (sentinel)

 *                              |

 *                             path

 *                              |

 *                             to

 *                              |

 *                         resource.html

 *

 * This represents the absolute path "/path/to/resource.html". The leaf node

 * "resource.html" contains the last visited time of the resource.

 *

 * The mechanism described above is, however, not particularly conducive to

 * fast searching of the database for a given URL (or URLs beginning with a

 * given prefix). Therefore, an anciliary data structure is used to enable

 * fast searching. This structure simply reflects the contents of the

 * database, with entries being added/removed at the same time as for the

 * core database. In order to ensure that degenerate cases are kept to a

 * minimum, we use an AAtree. This is an approximation of a Red-Black tree

 * with similar performance characteristics, but with a significantly

 * simpler implementation. Entries in this tree comprise pointers to the

 * leaf nodes of the host tree described above.

 *

 * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of

 * non-normalised URLs with urldb will result in undefined behaviour and

 * potential crashes.

 */


#include <assert.h>

#include <stdbool.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <strings.h>

#include <time.h>

#ifdef WITH_NSPSL

#include <nspsl.h>

#endif


#include "utils/inet.h"

#include "utils/nsoption.h"

#include "utils/log.h"

#include "utils/corestrings.h"

#include "utils/url.h"

#include "utils/utils.h"

#include "utils/bloom.h"

#include "utils/time.h"

#include "utils/nsurl.h"

#include "utils/ascii.h"

#include "utils/http.h"

#include "netsurf/bitmap.h"

#include "desktop/cookie_manager.h"


#include "content/content.h"

#include "content/urldb.h"


/**

 * cookie entry.

 *

 * \warning This *must* be kept in sync with the public interface in

 *   netsurf/cookie_db.h

 */

struct cookie_internal_data {

        struct cookie_internal_data *prev;      /**< Previous in list */

        struct cookie_internal_data *next;      /**< Next in list */


        char *name;             /**< Cookie name */

        char *value;            /**< Cookie value */

        bool value_was_quoted;  /**< Value was quoted in Set-Cookie: */

        char *comment;          /**< Cookie comment */

        bool domain_from_set;   /**< Domain came from Set-Cookie: header */

        char *domain;           /**< Domain */

        bool path_from_set;     /**< Path came from Set-Cookie: header */

        char *path;             /**< Path */

        time_t expires;         /**< Expiry timestamp, or -1 for session */

        time_t last_used;       /**< Last used time */

        bool secure;            /**< Only send for HTTPS requests */

        bool http_only;         /**< Only expose to HTTP(S) requests */

        enum cookie_version version;    /**< Specification compliance */

        bool no_destroy;        /**< Never destroy this cookie,

                                 * unless it's expired */


};


/**

 * A protection space

 *

 * This is defined as a tuple canonical_root_url and realm.  This

 * structure lives as linked list element in a leaf host_part struct

 * so we need additional scheme and port to have a canonical_root_url.

 */

struct prot_space_data {

        /**

         * URL scheme of canonical hostname of this protection space.

         */

        lwc_string *scheme;

        /**

         * Port number of canonical hostname of this protection

         * space. When 0, it means the default port for given scheme,

         * i.e. 80 (http), 443 (https).

         */

        unsigned int port;

        /** Protection realm */

        char *realm;


        /**

         * Authentication details for this protection space in form

         * username:password

         */

        char *auth;

        /** Next sibling */

        struct prot_space_data *next;

};


/**

 * meta data about a url

 *

 * \warning must be kept in sync with url_data structure in netsurf/url_db.h

 */

struct url_internal_data {

        char *title;            /**< Resource title */

        unsigned int visits;    /**< Visit count */

        time_t last_visit;      /**< Last visit time */

        content_type type;      /**< Type of resource */

};


/**

 * data entry for url

 */

struct path_data {

        nsurl *url;             /**< Full URL */

        lwc_string *scheme;     /**< URL scheme for data */

        unsigned int port;      /**< Port number for data. When 0, it means

                                 * the default port for given scheme, i.e.

                                 * 80 (http), 443 (https). */

        char *segment;          /**< Path segment for this node */

        unsigned int frag_cnt;  /**< Number of entries in path_data::fragment */

        char **fragment;        /**< Array of fragments */

        bool persistent;        /**< This entry should persist */


        struct url_internal_data urld;  /**< URL data for resource */


        /**

         * Protection space to which this resource belongs too. Can be

         * NULL when it does not belong to a protection space or when

         * it is not known. No ownership (is with struct host_part::prot_space).

         */

        const struct prot_space_data *prot_space;

        /** Cookies associated with resource */

        struct cookie_internal_data *cookies;

        /** Last cookie in list */

        struct cookie_internal_data *cookies_end;


        struct path_data *next; /**< Next sibling */

        struct path_data *prev; /**< Previous sibling */

        struct path_data *parent; /**< Parent path segment */

        struct path_data *children; /**< Child path segments */

        struct path_data *last; /**< Last child */

};


struct hsts_data {

        time_t expires; /**< Expiry time */

        bool include_sub_domains; /**< Whether to include subdomains */

};


struct host_part {

        /**

         * Known paths on this host. This _must_ be first so that

         * struct host_part *h = (struct host_part *)mypath; works

         */

        struct path_data paths;

        /**

         * Allow access to SSL protected resources on this host

         * without verifying certificate authenticity

         */

        bool permit_invalid_certs;

        /* HSTS data */

        struct hsts_data hsts;


        /**

         * Part of host string

         */

        char *part;


        /**

         * Linked list of all known proctection spaces known for this

         * host and all its schems and ports.

         */

        struct prot_space_data *prot_space;


        struct host_part *next; /**< Next sibling */

        struct host_part *prev; /**< Previous sibling */

        struct host_part *parent; /**< Parent host part */

        struct host_part *children; /**< Child host parts */

};


/**

 * search index node

 */

struct search_node {

        const struct host_part *data;   /**< Host tree entry */


        unsigned int level;             /**< Node level */


        struct search_node *left;       /**< Left subtree */

        struct search_node *right;      /**< Right subtree */

};


/** Root database handle */

static struct host_part db_root;


/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */

#define NUM_SEARCH_TREES 28

#define ST_IP 0

#define ST_EE 1

#define ST_DN 2

static struct search_node empty = { 0, 0, &empty, &empty };

static struct search_node *search_trees[NUM_SEARCH_TREES] = {

        &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,

        &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,

        &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,

        &empty, &empty, &empty, &empty

};


/** Minimum cookie database file version */

#define MIN_COOKIE_FILE_VERSION 100

/** Current cookie database file version */

#define COOKIE_FILE_VERSION 102

/** loaded cookie file version */

static int loaded_cookie_file_version;


/** Minimum URL database file version */

#define MIN_URL_FILE_VERSION 106

/** Current URL database file version */

#define URL_FILE_VERSION 107


/**

 * filter for url presence in database

 *

 * Bloom filter used for short-circuting the false case of "is this

 * URL in the database?".  BLOOM_SIZE controls how large the filter is

 * in bytes.  Primitive experimentation shows that for a filter of X

 * bytes filled with X items, searching for X items not in the filter

 * has a 5% false-positive rate.  We set it to 32kB, which should be

 * enough for all but the largest databases, while not being

 * shockingly wasteful on memory.

 */

static struct bloom_filter *url_bloom;

/**

 * Size of url filter

 */

#define BLOOM_SIZE (1024 * 32)


/**

 * write a time_t to a file portably

 *

 * \param fp File to write to

 * \param val the unix time value to output

 * \return NSERROR_OK on success

 */

static nserror urldb_write_timet(FILE *fp, time_t val)

{

        int use;

        char op[32];


        use = nsc_sntimet(op, 32, &val);

        if (use == 0) {

                fprintf(fp, "%i\n", (int)val);

        } else {

                fprintf(fp, "%.*s\n", use, op);

        }

        return NSERROR_OK;

}


/**

 * Write paths associated with a host

 *

 * \param parent Root of (sub)tree to write

 * \param host Current host name

 * \param fp File to write to

 * \param path Current path string

 * \param path_alloc Allocated size of path

 * \param path_used Used size of path

 * \param expiry Expiry time of URLs

 */

static void

urldb_write_paths(const struct path_data *parent,

                  const char *host,

                  FILE *fp,

                  char **path,

                  int *path_alloc,

                  int *path_used,

                  time_t expiry)

{

        const struct path_data *p = parent;

        int i;


        do {

                int seglen = p->segment != NULL ? strlen(p->segment) : 0;

                int len = *path_used + seglen + 1;


                if (*path_alloc < len) {

                        char *temp;

                        temp = realloc(*path,

                                       (len > 64) ? len : *path_alloc + 64);

                        if (!temp) {

                                return;

                        }

                        *path = temp;

                        *path_alloc = (len > 64) ? len : *path_alloc + 64;

                }


                if (p->segment != NULL) {

                        memcpy(*path + *path_used - 1, p->segment, seglen);

                }


                if (p->children != NULL) {

                        (*path)[*path_used + seglen - 1] = '/';

                        (*path)[*path_used + seglen] = '\0';

                } else {

                        (*path)[*path_used + seglen - 1] = '\0';

                        len -= 1;

                }


                *path_used = len;


                if (p->children != NULL) {

                        /* Drill down into children */

                        p = p->children;

                } else {

                        /* leaf node */

                        if (p->persistent ||

                            ((p->urld.last_visit > expiry) &&

                             (p->urld.visits > 0))) {

                                fprintf(fp, "%s\n", lwc_string_data(p->scheme));


                                if (p->port) {

                                        fprintf(fp,"%d\n", p->port);

                                } else {

                                        fprintf(fp, "\n");

                                }


                                fprintf(fp, "%s\n", *path);


                                /** \todo handle fragments? */


                                /* number of visits */

                                fprintf(fp, "%i\n", p->urld.visits);


                                /* time entry was last used */

                                urldb_write_timet(fp, p->urld.last_visit);


                                /* entry type */

                                fprintf(fp, "%i\n", (int)p->urld.type);


                                fprintf(fp, "\n");


                                if (p->urld.title) {

                                        uint8_t *s = (uint8_t *) p->urld.title;


                                        for (i = 0; s[i] != '\0'; i++)

                                                if (s[i] < 32)

                                                        s[i] = ' ';

                                        for (--i; ((i > 0) && (s[i] == ' '));

                                             i--)

                                                s[i] = '\0';

                                        fprintf(fp, "%s\n", p->urld.title);

                                } else {

                                        fprintf(fp, "\n");

                                }

                        }


                        /* Now, find next node to process. */

                        while (p != parent) {

                                int seglen = p->segment != NULL

                                        ? strlen(p->segment) : 0;


                                /* Remove our segment from the path */

                                *path_used -= seglen;

                                (*path)[*path_used - 1] = '\0';


                                if (p->next != NULL) {

                                        /* Have a sibling, process that */

                                        p = p->next;

                                        break;

                                }


                                /* Going up, so remove '/' */

                                *path_used -= 1;

                                (*path)[*path_used - 1] = '\0';


                                /* Ascend tree */

                                p = p->parent;

                        }

                }

        } while (p != parent);

}


/**

 * Count number of URLs associated with a host

 *

 * \param root Root of path data tree

 * \param expiry Expiry time for URLs

 * \param count Pointer to count

 */

static void

urldb_count_urls(const struct path_data *root,

                 time_t expiry,

                 unsigned int *count)

{

        const struct path_data *p = root;


        do {

                if (p->children != NULL) {

                        /* Drill down into children */

                        p = p->children;

                } else {

                        /* No more children, increment count if required */

                        if (p->persistent ||

                            ((p->urld.last_visit > expiry) &&

                             (p->urld.visits > 0))) {

                                (*count)++;

                        }


                        /* Now, find next node to process. */

                        while (p != root) {

                                if (p->next != NULL) {

                                        /* Have a sibling, process that */

                                        p = p->next;

                                        break;

                                }


                                /* Ascend tree */

                                p = p->parent;

                        }

                }

        } while (p != root);

}


/**

 * Save a search (sub)tree

 *

 * \param parent root node of search tree to save.

 * \param fp File to write to

 */

static void urldb_save_search_tree(struct search_node *parent, FILE *fp)

{

        char host[256];

        const struct host_part *h;

        unsigned int path_count = 0;

        char *path, *p, *end;

        int path_alloc = 64, path_used = 1;

        time_t expiry, hsts_expiry = 0;

        int hsts_include_subdomains = 0;


        expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));


        if (parent == &empty)

                return;


        urldb_save_search_tree(parent->left, fp);


        path = malloc(path_alloc);

        if (!path)

                return;


        path[0] = '\0';


        for (h = parent->data, p = host, end = host + sizeof host;

             h && h != &db_root && p < end; h = h->parent) {

                int written = snprintf(p, end - p, "%s%s", h->part,

                                       (h->parent && h->parent->parent) ? "." : "");

                if (written < 0) {

                        free(path);

                        return;

                }

                p += written;

        }


        h = parent->data;

        if (h && h->hsts.expires > expiry) {

                hsts_expiry = h->hsts.expires;

                hsts_include_subdomains = h->hsts.include_sub_domains;

        }


        urldb_count_urls(&parent->data->paths, expiry, &path_count);


        if (path_count > 0) {

                fprintf(fp, "%s %i ", host, hsts_include_subdomains);

                urldb_write_timet(fp, hsts_expiry);

                fprintf(fp, "%i\n", path_count);


                urldb_write_paths(&parent->data->paths, host, fp,

                                  &path, &path_alloc, &path_used, expiry);

        } else if (hsts_expiry) {

                fprintf(fp, "%s %i ", host, hsts_include_subdomains);

                urldb_write_timet(fp, hsts_expiry);

                fprintf(fp, "0\n");

        }


        free(path);


        urldb_save_search_tree(parent->right, fp);

}


/**

 * Path data iterator (internal)

 *

 * \param parent Root of subtree to iterate over

 * \param url_callback Callback function

 * \param cookie_callback Callback function

 * \return true to continue, false otherwise

 */

static bool

urldb_iterate_entries_path(const struct path_data *parent,

                bool (*url_callback)(nsurl *url, const struct url_data *data),

                bool (*cookie_callback)(const struct cookie_data *data))

{

        const struct path_data *p = parent;

        const struct cookie_data *c;


        do {

                if (p->children != NULL) {

                        /* Drill down into children */

                        p = p->children;

                } else {

                        /* All leaf nodes in the path tree should have an URL or

                         * cookies attached to them. If this is not the case, it

                         * indicates that there's a bug in the file loader/URL

                         * insertion code. Therefore, assert this here. */

                        assert(url_callback || cookie_callback);


                        /** \todo handle fragments? */

                        if (url_callback) {

                                const struct url_internal_data *u = &p->urld;


                                assert(p->url);


                                if (!url_callback(p->url,

                                                  (const struct url_data *) u))

                                        return false;

                        } else {

                                c = (const struct cookie_data *)p->cookies;

                                for (; c != NULL; c = c->next) {

                                        if (!cookie_callback(c))

                                                return false;

                                }

                        }


                        /* Now, find next node to process. */

                        while (p != parent) {

                                if (p->next != NULL) {

                                        /* Have a sibling, process that */

                                        p = p->next;

                                        break;

                                }


                                /* Ascend tree */

                                p = p->parent;

                        }

                }

        } while (p != parent);


        return true;

}


/**

 * Check whether a host string is an IP address.

 *

 * This call detects IPv4 addresses (all of dotted-quad or subsets,

 * decimal or hexadecimal notations) and IPv6 addresses (including

 * those containing embedded IPv4 addresses.)

 *

 * \param host a hostname terminated by '\0'

 * \return true if the hostname is an IP address, false otherwise

 */

static bool urldb__host_is_ip_address(const char *host)

{

        struct in_addr ipv4;

        size_t host_len = strlen(host);

        const char *sane_host;

        const char *slash;

#ifndef NO_IPV6

        struct in6_addr ipv6;

        char ipv6_addr[64];

        unsigned int ipv6_addr_len;

#endif

        /**

         * @todo FIXME Some parts of urldb.c make confusions between hosts

         * and "prefixes", we can sometimes be erroneously passed more than

         * just a host.  Sometimes we may be passed trailing slashes, or even

         * whole path segments.  A specific criminal in this class is

         * urldb_iterate_partial, which takes a prefix to search for, but

         * passes that prefix to functions that expect only hosts.

         *

         * For the time being, we will accept such calls; we check if there

         * is a / in the host parameter, and if there is, we take a copy and

         * replace the / with a \0.  This is not a permanent solution; we

         * should search through NetSurf and find all the callers that are

         * in error and fix them.  When doing this task, it might be wise

         * to replace the hideousness below with code that doesn't have to do

         * this, and add assert(strchr(host, '/') == NULL); somewhere.

         * -- rjek - 2010-11-04

         */


        slash = strchr(host, '/');

        if (slash == NULL) {

                sane_host = host;

        } else {

                char *c = strdup(host);

                c[slash - host] = '\0';

                sane_host = c;

                host_len = slash - host;

                NSLOG(netsurf, INFO, "WARNING: called with non-host '%s'",

                      host);

        }


        if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)

                goto out_false;


        if (inet_aton(sane_host, &ipv4) != 0) {

                /* This can only be a sane IPv4 address if it contains 3 dots.

                 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",

                 * and "a.b.c.d" as valid IPv4 address strings where we only

                 * support the full, dotted-quad, form.

                 */

                int num_dots = 0;

                size_t index;


                for (index = 0; index < host_len; index++) {

                        if (sane_host[index] == '.')

                                num_dots++;

                }


                if (num_dots == 3)

                        goto out_true;

                else

                        goto out_false;

        }


#ifndef NO_IPV6

        if ((host_len < 6) ||

            (sane_host[0] != '[') ||

            (sane_host[host_len - 1] != ']')) {

                goto out_false;

        }


        ipv6_addr_len = host_len - 2;

        if (ipv6_addr_len >= sizeof(ipv6_addr)) {

                ipv6_addr_len = sizeof(ipv6_addr) - 1;

        }

        strncpy(ipv6_addr, sane_host + 1, ipv6_addr_len);

        ipv6_addr[ipv6_addr_len] = '\0';


        if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)

                goto out_true;

#endif


out_false:

        if (slash != NULL) free((void *)sane_host);

        return false;


out_true:

        if (slash != NULL) free((void *)sane_host);

        return true;

}


/**

 * Compare host_part with prefix

 *

 * \param a host part

 * \param b prefix

 * \return 0 if match, non-zero, otherwise

 */

static int urldb_search_match_prefix(const struct host_part *a, const char *b)

{

        const char *end, *dot;

        int plen, ret;


        assert(a && a != &db_root && b);


        if (urldb__host_is_ip_address(b)) {

                /* IP address */

                return strncasecmp(a->part, b, strlen(b));

        }


        end = b + strlen(b) + 1;


        while (b < end && a && a != &db_root) {

                dot = strchr(b, '.');

                if (!dot) {

                        /* last segment */

                        dot = end - 1;

                }


                /* Compare strings (length limited) */

                if ((ret = strncasecmp(a->part, b, dot - b)) != 0)

                        /* didn't match => return difference */

                        return ret;


                /* The strings matched */

                if (dot < end - 1) {

                        /* Consider segment lengths only in the case

                         * where the prefix contains segments */

                        plen = strlen(a->part);

                        if (plen > dot - b) {

                                /* len(a) > len(b) */

                                return 1;

                        } else if (plen < dot - b) {

                                /* len(a) < len(b) */

                                return -1;

                        }

                }


                b = dot + 1;

                a = a->parent;

        }


        /* If we get here then either:

         *    a) The path lengths differ

         * or b) The hosts are identical

         */

        if (a && a != &db_root && b >= end) {

                /* len(a) > len(b) => prefix matches */

                return 0;

        } else if ((!a || a == &db_root) && b < end) {

                /* len(a) < len(b) => prefix does not match */

                return -1;

        }


        /* Identical */

        return 0;

}


/**

 * Partial host iterator (internal)

 *

 * \param root Root of (sub)tree to traverse

 * \param prefix Prefix to match

 * \param callback Callback function

 * \return true to continue, false otherwise

 */

static bool

urldb_iterate_partial_host(struct search_node *root,

                const char *prefix,

                bool (*callback)(nsurl *url, const struct url_data *data))

{

        int c;


        assert(root && prefix && callback);


        if (root == &empty)

                return true;


        c = urldb_search_match_prefix(root->data, prefix);


        if (c > 0) {

                /* No match => look in left subtree */

                return urldb_iterate_partial_host(root->left,

                                                  prefix,

                                                  callback);

        } else if (c < 0) {

                /* No match => look in right subtree */

                return urldb_iterate_partial_host(root->right,

                                                  prefix,

                                                  callback);

        } else {

                /* Match => iterate over l/r subtrees & process this node */

                if (!urldb_iterate_partial_host(root->left,

                                                prefix,

                                                callback)) {

                        return false;

                }


                if (root->data->paths.children) {

                        /* and extract all paths attached to this host */

                        if (!urldb_iterate_entries_path(&root->data->paths,

                                                        callback,

                                                        NULL)) {

                                return false;

                        }

                }


                if (!urldb_iterate_partial_host(root->right,

                                                prefix,

                                                callback)) {

                        return false;

                }

        }


        return true;

}


/**

 * Partial path iterator (internal)

 *

 * Given: http://www.example.org/a/b/c/d//e

 * and assuming a path tree:

 *     ^

 *    / \

 *   a1 b1

 *  / \

 * a2 b2

 *    /|\

 *   a b c

 *   3 3 |

 *       d

 *       |

 *       e

 *      / \

 *      f g

 *

 * Prefix will be:      p will be:

 *

 * a/b/c/d//e           a1

 *   b/c/d//e           a2

 *   b/c/d//e           b3

 *     c/d//e           a3

 *     c/d//e           b3

 *     c/d//e           c

 *       d//e           d

 *         /e           e               (skip /)

 *          e           e

 *

 * I.E. perform a breadth-first search of the tree.

 *

 * \param parent Root of (sub)tree to traverse

 * \param prefix Prefix to match

 * \param callback Callback function

 * \return true to continue, false otherwise

 */

static bool

urldb_iterate_partial_path(const struct path_data *parent,

                const char *prefix,

                bool (*callback)(nsurl *url, const struct url_data *data))

{

        const struct path_data *p = parent->children;

        const char *slash, *end = prefix + strlen(prefix);


        do {

                slash = strchr(prefix, '/');

                if (!slash) {

                        slash = end;

                }


                if (slash == prefix && *prefix == '/') {

                        /* Ignore "//" */

                        prefix++;

                        continue;

                }


                if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {

                        /* prefix matches so far */

                        if (slash == end) {

                                /* we've run out of prefix, so all

                                 * paths below this one match */

                                if (!urldb_iterate_entries_path(p,

                                                                callback,

                                                                NULL)) {

                                        return false;

                                }


                                /* Progress to next sibling */

                                p = p->next;

                        } else {

                                /* Skip over this segment */

                                prefix = slash + 1;


                                p = p->children;

                        }

                } else {

                        /* Doesn't match this segment, try next sibling */

                        p = p->next;

                }

        } while (p != NULL);


        return true;

}


/**

 * Host data iterator (internal)

 *

 * \param parent Root of subtree to iterate over

 * \param url_callback Callback function

 * \param cookie_callback Callback function

 * \return true to continue, false otherwise

 */

static bool

urldb_iterate_entries_host(struct search_node *parent,

                bool (*url_callback)(nsurl *url, const struct url_data *data),

                bool (*cookie_callback)(const struct cookie_data *data))

{

        if (parent == &empty) {

                return true;

        }


        if (!urldb_iterate_entries_host(parent->left,

                                        url_callback,

                                        cookie_callback)) {

                return false;

        }


        if ((parent->data->paths.children) ||

            ((cookie_callback) &&

             (parent->data->paths.cookies))) {

                /* We have paths (or domain cookies), so iterate them */

                if (!urldb_iterate_entries_path(&parent->data->paths,

                                                url_callback,

                                                cookie_callback)) {

                        return false;

                }

        }


        if (!urldb_iterate_entries_host(parent->right,

                                        url_callback,

                                        cookie_callback)) {

                return false;

        }


        return true;

}


/**

 * Add a host node to the tree

 *

 * \param part Host segment to add (or whole IP address) (copied)

 * \param parent Parent node to add to

 * \return Pointer to added node, or NULL on memory exhaustion

 */

static struct host_part *

urldb_add_host_node(const char *part, struct host_part *parent)

{

        struct host_part *d;


        assert(part && parent);


        d = calloc(1, sizeof(struct host_part));

        if (!d) {

                return NULL;

        }


        d->part = strdup(part);

        if (!d->part) {

                free(d);

                return NULL;

        }


        d->next = parent->children;

        if (parent->children) {

                parent->children->prev = d;

        }

        d->parent = parent;

        parent->children = d;


        return d;

}


/**

 * Fragment comparator callback for qsort

 *

 * \param a first value

 * \param b second value

 * \return 0 for equal else positive or negative value on comparison

 */

static int urldb_add_path_fragment_cmp(const void *a, const void *b)

{

        return strcasecmp(*((const char **) a), *((const char **) b));

}


/**

 * Add a fragment to a path segment

 *

 * \param segment Path segment to add to

 * \param fragment Fragment to add (copied), or NULL

 * \return segment or NULL on memory exhaustion

 */

static struct path_data *

urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)

{

        char **temp;


        assert(segment);


        /* If no fragment, this function is a NOP

         * This may seem strange, but it makes the rest

         * of the code cleaner */

        if (!fragment)

                return segment;


        temp = realloc(segment->fragment,

                       (segment->frag_cnt + 1) * sizeof(char *));

        if (!temp)

                return NULL;


        segment->fragment = temp;

        segment->fragment[segment->frag_cnt] =

                strdup(lwc_string_data(fragment));

        if (!segment->fragment[segment->frag_cnt]) {

                /* Don't free temp - it's now our buffer */

                return NULL;

        }


        segment->frag_cnt++;


        /* We want fragments in alphabetical order, so sort them

         * It may prove better to insert in alphabetical order instead */

        qsort(segment->fragment,

              segment->frag_cnt,

              sizeof (char *),

              urldb_add_path_fragment_cmp);


        return segment;

}


/**

 * Add a path node to the tree

 *

 * \param scheme URL scheme associated with path (copied)

 * \param port Port number on host associated with path

 * \param segment Path segment to add (copied)

 * \param fragment URL fragment (copied), or NULL

 * \param parent Parent node to add to

 * \return Pointer to added node, or NULL on memory exhaustion

 */

static struct path_data *

urldb_add_path_node(lwc_string *scheme,

                    unsigned int port,

                    const char *segment,

                    lwc_string *fragment,

                    struct path_data *parent)

{

        struct path_data *d, *e;


        assert(scheme && segment && parent);


        d = calloc(1, sizeof(struct path_data));

        if (!d)

                return NULL;


        d->scheme = lwc_string_ref(scheme);


        d->port = port;


        d->segment = strdup(segment);

        if (!d->segment) {

                lwc_string_unref(d->scheme);

                free(d);

                return NULL;

        }


        if (fragment) {

                if (!urldb_add_path_fragment(d, fragment)) {

                        free(d->segment);

                        lwc_string_unref(d->scheme);

                        free(d);

                        return NULL;

                }

        }


        for (e = parent->children; e; e = e->next) {

                if (strcmp(e->segment, d->segment) > 0)

                        break;

        }


        if (e) {

                d->prev = e->prev;

                d->next = e;

                if (e->prev)

                        e->prev->next = d;

                else

                        parent->children = d;

                e->prev = d;

        } else if (!parent->children) {

                d->prev = d->next = NULL;

                parent->children = parent->last = d;

        } else {

                d->next = NULL;

                d->prev = parent->last;

                parent->last->next = d;

                parent->last = d;

        }

        d->parent = parent;


        return d;

}


/**

 * Get the search tree for a particular host

 *

 * \param host the host to lookup

 * \return the corresponding search tree

 */

static struct search_node **urldb_get_search_tree_direct(const char *host)

{

        assert(host);


        if (urldb__host_is_ip_address(host)) {

                return &search_trees[ST_IP];

        } else if (ascii_is_alpha(*host)) {

                return &search_trees[ST_DN + ascii_to_lower(*host) - 'a'];

        }

        return &search_trees[ST_EE];

}


/**

 * Get the search tree for a particular host

 *

 * \param host the host to lookup

 * \return the corresponding search tree

 */

static struct search_node *urldb_get_search_tree(const char *host)

{

        return *urldb_get_search_tree_direct(host);

}


/**

 * Compare host part with a string

 *

 * \param a host part

 * \param b string to compare

 * \return 0 if match, non-zero, otherwise

 */

static int urldb_search_match_string(const struct host_part *a, const char *b)

{

        const char *end, *dot;

        int plen, ret;


        assert(a && a != &db_root && b);


        if (urldb__host_is_ip_address(b)) {

                /* IP address */

                return strcasecmp(a->part, b);

        }


        end = b + strlen(b) + 1;


        while (b < end && a && a != &db_root) {

                dot = strchr(b, '.');

                if (!dot) {

                        /* last segment */

                        dot = end - 1;

                }


                /* Compare strings (length limited) */

                if ((ret = strncasecmp(a->part, b, dot - b)) != 0)

                        /* didn't match => return difference */

                        return ret;


                /* The strings matched, now check that the lengths do, too */

                plen = strlen(a->part);


                if (plen > dot - b) {

                        /* len(a) > len(b) */

                        return 1;

                } else if (plen < dot - b) {

                        /* len(a) < len(b) */

                        return -1;

                }


                b = dot + 1;

                a = a->parent;

        }


        /* If we get here then either:

         *    a) The path lengths differ

         * or b) The hosts are identical

         */

        if (a && a != &db_root && b >= end) {

                /* len(a) > len(b) */

                return 1;

        } else if ((!a || a == &db_root) && b < end) {

                /* len(a) < len(b) */

                return -1;

        }


        /* Identical */

        return 0;

}


/**

 * Find a node in a search tree

 *

 * \param root Tree to look in

 * \param host Host to find

 * \return Pointer to host tree node, or NULL if not found

 */

static const struct host_part *

urldb_search_find(struct search_node *root, const char *host)

{

        int c;


        assert(root && host);


        if (root == &empty) {

                return NULL;

        }


        c = urldb_search_match_string(root->data, host);


        if (c > 0) {

                return urldb_search_find(root->left, host);

        } else if (c < 0) {

                return urldb_search_find(root->right, host);

        }


        return root->data;

}


/**

 * Match a path string

 *

 * \param parent Path (sub)tree to look in

 * \param path The path to search for

 * \param scheme The URL scheme associated with the path

 * \param port The port associated with the path

 * \return Pointer to path data or NULL if not found.

 */

static struct path_data *

urldb_match_path(const struct path_data *parent,

                 const char *path,

                 lwc_string *scheme,

                 unsigned short port)

{

        const struct path_data *p;

        const char *slash;

        bool match;


        assert(parent != NULL);

        assert(parent->segment == NULL);


        if (path[0] != '/') {

                NSLOG(netsurf, INFO, "path is %s", path);

        }


        assert(path[0] == '/');


        /* Start with children, as parent has no segment */

        p = parent->children;


        while (p != NULL) {

                slash = strchr(path + 1, '/');

                if (!slash) {

                        slash = path + strlen(path);

                }


                if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&

                    lwc_string_isequal(p->scheme, scheme, &match) == lwc_error_ok &&

                    match == true &&

                    p->port == port) {

                        if (*slash == '\0') {

                                /* Complete match */

                                return (struct path_data *) p;

                        }


                        /* Match so far, go down tree */

                        p = p->children;


                        path = slash;

                } else {

                        /* No match, try next sibling */

                        p = p->next;

                }

        }


        return NULL;

}


/**

 * Find an URL in the database

 *

 * \param url Absolute URL to find

 * \return Pointer to path data, or NULL if not found

 */

static struct path_data *urldb_find_url(nsurl *url)

{

        const struct host_part *h;

        struct path_data *p;

        struct search_node *tree;

        char *plq;

        const char *host_str;

        lwc_string *scheme, *host, *port;

        size_t len = 0;

        unsigned int port_int;

        bool match;


        assert(url);


        if (url_bloom != NULL) {

                if (bloom_search_hash(url_bloom, nsurl_hash(url)) == false) {

                        return NULL;

                }

        }


        scheme = nsurl_get_component(url, NSURL_SCHEME);

        if (scheme == NULL)

                return NULL;


        if (lwc_string_isequal(scheme, corestring_lwc_mailto, &match) ==

            lwc_error_ok && match == true) {

                lwc_string_unref(scheme);

                return NULL;

        }


        host = nsurl_get_component(url, NSURL_HOST);

        if (host != NULL) {

                host_str = lwc_string_data(host);

                lwc_string_unref(host);


        } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==

                   lwc_error_ok && match == true) {

                host_str = "localhost";


        } else {

                lwc_string_unref(scheme);

                return NULL;

        }


        tree = urldb_get_search_tree(host_str);

        h = urldb_search_find(tree, host_str);

        if (!h) {

                lwc_string_unref(scheme);

                return NULL;

        }


        /* generate plq (path, leaf, query) */

        if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != NSERROR_OK) {

                lwc_string_unref(scheme);

                return NULL;

        }


        /* Get port */

        port = nsurl_get_component(url, NSURL_PORT);

        if (port != NULL) {

                port_int = atoi(lwc_string_data(port));

                lwc_string_unref(port);

        } else {

                port_int = 0;

        }


        p = urldb_match_path(&h->paths, plq, scheme, port_int);


        free(plq);

        lwc_string_unref(scheme);


        return p;

}


/**

 * Dump URL database paths to stderr

 *

 * \param parent Parent node of tree to dump

 */

static void urldb_dump_paths(struct path_data *parent)

{

        const struct path_data *p = parent;

        unsigned int i;


        do {

                if (p->segment != NULL) {

                        NSLOG(netsurf, INFO, "\t%s : %u",

                              lwc_string_data(p->scheme), p->port);


                        NSLOG(netsurf, INFO, "\t\t'%s'", p->segment);


                        for (i = 0; i != p->frag_cnt; i++) {

                                NSLOG(netsurf, INFO, "\t\t\t#%s",

                                      p->fragment[i]);

                        }

                }


                if (p->children != NULL) {

                        p = p->children;

                } else {

                        while (p != parent) {

                                if (p->next != NULL) {

                                        p = p->next;

                                        break;

                                }


                                p = p->parent;

                        }

                }

        } while (p != parent);

}


/**

 * Dump URL database hosts to stderr

 *

 * \param parent Parent node of tree to dump

 */

static void urldb_dump_hosts(struct host_part *parent)

{

        struct host_part *h;


        if (parent->part) {

                NSLOG(netsurf, INFO, "%s", parent->part);


                NSLOG(netsurf, INFO, "\t%s invalid SSL certs",

                      parent->permit_invalid_certs ? "Permits" : "Denies");

        }


        /* Dump path data */

        urldb_dump_paths(&parent->paths);


        /* and recurse */

        for (h = parent->children; h; h = h->next) {

                urldb_dump_hosts(h);

        }

}


/**

 * Dump search tree

 *

 * \param parent Parent node of tree to dump

 * \param depth Tree depth

 */

static void urldb_dump_search(struct search_node *parent, int depth)

{

        const struct host_part *h;

        int i; /* index into string */

        char s[1024];

        int r;

        int sl = sizeof(s) - 2;


        if (parent == &empty)

                return;


        urldb_dump_search(parent->left, depth + 1);


        for (i = 0; i != depth; i++) {

                s[i] = ' ';

        }


        for (h = parent->data; h; h = h->parent) {

                if (h->part) {

                        r = snprintf(&s[i], sl - i, "%s", h->part);

                        if (r < 0) {

                                break;

                        }

                        if ((i + r) >= sl) {

                                break;

                        }

                        i += r;

                }


                if (h->parent && h->parent->parent) {

                        s[i]='.';

                        i++;

                }

        }

        s[i]= 0;


        NSLOG(netsurf, INFO, "%s", s);


        urldb_dump_search(parent->right, depth + 1);

}


/**

 * Compare a pair of host parts

 *

 * \param a first host part

 * \param b second host part

 * \return 0 if match, non-zero, otherwise

 */

static int

urldb_search_match_host(const struct host_part *a, const struct host_part *b)

{

        int ret;


        assert(a && b);


        /* traverse up tree to root, comparing parts as we go. */

        for (; a && a != &db_root && b && b != &db_root;

             a = a->parent, b = b->parent) {

                if ((ret = strcasecmp(a->part, b->part)) != 0) {

                        /* They differ => return the difference here */

                        return ret;

                }

        }


        /* If we get here then either:

         *    a) The path lengths differ

         * or b) The hosts are identical

         */

        if (a && a != &db_root && (!b || b == &db_root)) {

                /* len(a) > len(b) */

                return 1;

        } else if ((!a || a == &db_root) && b && b != &db_root) {

                /* len(a) < len(b) */

                return -1;

        }


        /* identical */

        return 0;

}


/**

 * Rotate a subtree right

 *

 * \param root Root of subtree to rotate

 * \return new root of subtree

 */

static struct search_node *urldb_search_skew(struct search_node *root)

{

        assert(root);


        if (root->left->level == root->level) {

                struct search_node *temp;


                temp = root->left;

                root->left = temp->right;

                temp->right = root;

                root = temp;

        }


        return root;

}


/**

 * Rotate a node left, increasing the parent's level

 *

 * \param root Root of subtree to rotate

 * \return New root of subtree

 */

static struct search_node *urldb_search_split(struct search_node *root)

{

        assert(root);


        if (root->right->right->level == root->level) {

                struct search_node *temp;


                temp = root->right;

                root->right = temp->left;

                temp->left = root;

                root = temp;


                root->level++;

        }


        return root;

}


/**

 * Insert node into search tree

 *

 * \param root Root of (sub)tree to insert into

 * \param n Node to insert

 * \return Pointer to updated root

 */

static struct search_node *

urldb_search_insert_internal(struct search_node *root, struct search_node *n)

{

        assert(root && n);


        if (root == &empty) {

                root = n;

        } else {

                int c = urldb_search_match_host(root->data, n->data);


                if (c > 0) {

                        root->left = urldb_search_insert_internal(

                                root->left, n);

                } else if (c < 0) {

                        root->right = urldb_search_insert_internal(

                                root->right, n);

                } else {

                        /* exact match */

                        free(n);

                        return root;

                }


                root = urldb_search_skew(root);

                root = urldb_search_split(root);

        }


        return root;

}


/**

 * Insert a node into the search tree

 *

 * \param root Root of tree to insert into

 * \param data User data to insert

 * \return Pointer to updated root, or NULL if failed

 */

static struct search_node *

urldb_search_insert(struct search_node *root, const struct host_part *data)

{

        struct search_node *n;


        assert(root && data);


        n = malloc(sizeof(struct search_node));

        if (!n)

                return NULL;


        n->level = 1;

        n->data = data;

        n->left = n->right = &empty;


        root = urldb_search_insert_internal(root, n);


        return root;

}


/**

 * Parse a cookie avpair

 *

 * \param c Cookie struct to populate

 * \param n Name component

 * \param v Value component

 * \param was_quoted Whether \a v was quoted in the input

 * \return true on success, false on memory exhaustion

 */

static bool

urldb_parse_avpair(struct cookie_internal_data *c,

                   char *n,

                   char *v,

                   bool was_quoted)

{

        int vlen;


        assert(c && n && v);


        /* Strip whitespace from start of name */

        for (; *n; n++) {

                if (*n != ' ' && *n != '\t')

                        break;

        }


        /* Strip whitespace from end of name */

        for (vlen = strlen(n); vlen; vlen--) {

                if (n[vlen] == ' ' || n[vlen] == '\t')

                        n[vlen] = '\0';

                else

                        break;

        }


        /* Strip whitespace from start of value */

        for (; *v; v++) {

                if (*v != ' ' && *v != '\t')

                        break;

        }


        /* Strip whitespace from end of value */

        for (vlen = strlen(v); vlen; vlen--) {

                if (v[vlen] == ' ' || v[vlen] == '\t')

                        v[vlen] = '\0';

                else

                        break;

        }


        if (!c->comment && strcasecmp(n, "Comment") == 0) {

                c->comment = strdup(v);

                if (!c->comment)

                        return false;

        } else if (!c->domain && strcasecmp(n, "Domain") == 0) {

                if (v[0] == '.') {

                        /* Domain must start with a dot */

                        c->domain_from_set = true;

                        c->domain = strdup(v);

                        if (!c->domain)

                                return false;

                }

        } else if (strcasecmp(n, "Max-Age") == 0) {

                int temp = atoi(v);

                if (temp == 0)

                        /* Special case - 0 means delete */

                        c->expires = 0;

                else

                        c->expires = time(NULL) + temp;

        } else if (!c->path && strcasecmp(n, "Path") == 0) {

                c->path_from_set = true;

                c->path = strdup(v);

                if (!c->path)

                        return false;

        } else if (strcasecmp(n, "Version") == 0) {

                c->version = atoi(v);

        } else if (strcasecmp(n, "Expires") == 0) {

                char *datenoday;

                time_t expires;

                nserror res;


                /* Strip dayname from date (these are hugely variable

                 * and liable to break the parser.  They also serve no

                 * useful purpose) */

                for (datenoday = v;

                     *datenoday && !ascii_is_digit(*datenoday);

                     datenoday++) {

                        /* do nothing */

                }


                res = nsc_strntimet(datenoday, strlen(datenoday), &expires);

                if (res != NSERROR_OK) {

                        /* assume we have an unrepresentable date =>

                         * force it to the maximum possible value of a

                         * 32bit time_t (this may break in 2038. We'll

                         * deal with that once we come to it) */

                        expires = (time_t)0x7fffffff;

                }

                c->expires = expires;

        } else if (strcasecmp(n, "Secure") == 0) {

                c->secure = true;

        } else if (strcasecmp(n, "HttpOnly") == 0) {

                c->http_only = true;

        } else if (!c->name) {

                c->name = strdup(n);

                c->value = strdup(v);

                c->value_was_quoted = was_quoted;

                if (!c->name || !c->value) {

                        return false;

                }

        }


        return true;

}


/**

 * Free a cookie

 *

 * \param c The cookie to free

 */

static void urldb_free_cookie(struct cookie_internal_data *c)

{

        assert(c);


        free(c->comment);

        free(c->domain);

        free(c->path);

        free(c->name);

        free(c->value);

        free(c);

}


/**

 * Parse a cookie

 *

 * \param url URL being fetched

 * \param cookie Pointer to cookie string (updated on exit)

 * \return Pointer to cookie structure (on heap, caller frees) or NULL

 */

static struct cookie_internal_data *

urldb_parse_cookie(nsurl *url, const char **cookie)

{

        struct cookie_internal_data *c;

        const char *cur;

        char name[1024], value[4096];

        char *n = name, *v = value;

        bool in_value = false;

        bool had_value_data = false;

        bool value_verbatim = false;

        bool quoted = false;

        bool was_quoted = false;


        assert(url && cookie && *cookie);


        c = calloc(1, sizeof(struct cookie_internal_data));

        if (c == NULL)

                return NULL;


        c->expires = -1;


        name[0] = '\0';

        value[0] = '\0';


        for (cur = *cookie; *cur; cur++) {

                if (*cur == '\r' && *(cur + 1) == '\n') {

                        /* End of header */

                        if (quoted) {

                                /* Unmatched quote encountered */


                                /* Match Firefox 2.0.0.11 */

                                value[0] = '\0';


                        }


                        break;

                } else if (*cur == '\r') {

                        /* Spurious linefeed */

                        continue;

                } else if (*cur == '\n') {

                        /* Spurious newline */

                        continue;

                }


                if (in_value && !had_value_data) {

                        if (*cur == ' ' || *cur == '\t') {

                                /* Strip leading whitespace from value */

                                continue;

                        } else {

                                had_value_data = true;


                                /* Value is taken verbatim if first non-space

                                 * character is not a " */

                                if (*cur != '"') {

                                        value_verbatim = true;

                                }

                        }

                }


                if (in_value && !value_verbatim && (*cur == '"')) {

                        /* Only non-verbatim values may be quoted */

                        if (cur == *cookie || *(cur - 1) != '\\') {

                                /* Only unescaped quotes count */

                                was_quoted = quoted;

                                quoted = !quoted;


                                continue;

                        }

                }


                if (!quoted && !in_value && *cur == '=') {

                        /* First equals => attr-value separator */

                        in_value = true;

                        continue;

                }


                if (!quoted && (was_quoted || *cur == ';')) {

                        /* Semicolon or after quoted value

                         * => end of current avpair */


                        /* NUL-terminate tokens */

                        *n = '\0';

                        *v = '\0';


                        if (!urldb_parse_avpair(c, name, value, was_quoted)) {

                                /* Memory exhausted */

                                urldb_free_cookie(c);

                                return NULL;

                        }


                        /* And reset to start */

                        n = name;

                        v = value;

                        in_value = false;

                        had_value_data = false;

                        value_verbatim = false;

                        was_quoted = false;


                        /* Now, if the current input is anything other than a

                         * semicolon, we must be sure to reprocess it */

                        if (*cur != ';') {

                                cur--;

                        }


                        continue;

                }


                /* And now handle commas. These are a pain as they may mean

                 * any of the following:

                 *

                 * + End of cookie

                 * + Day separator in Expires avpair

                 * + (Invalid) comma in unquoted value

                 *

                 * Therefore, in order to handle all 3 cases (2 and 3 are

                 * identical, the difference being that 2 is in the spec and

                 * 3 isn't), we need to determine where the comma actually

                 * lies. We use the following heuristic:

                 *

                 *   Given a comma at the current input position, find the

                 *   immediately following semicolon (or end of input if none

                 *   found). Then, consider the input characters between

                 *   these two positions. If any of these characters is an

                 *   '=', we must assume that the comma signified the end of

                 *   the current cookie.

                 *

                 * This holds as the first avpair of any cookie must be

                 * NAME=VALUE, so the '=' is guaranteed to appear in the

                 * case where the comma marks the end of a cookie.

                 *

                 * This will fail, however, in the case where '=' appears in

                 * the value of the current avpair after the comma or the

                 * subsequent cookie does not start with NAME=VALUE. Neither

                 * of these is particularly likely and if they do occur, the

                 * website is more broken than we can be bothered to handle.

                 */

                if (!quoted && *cur == ',') {

                        /* Find semi-colon, if any */

                        const char *p;

                        const char *semi = strchr(cur + 1, ';');

                        if (!semi)

                                semi = cur + strlen(cur) - 2 /* CRLF */;


                        /* Look for equals sign between comma and semi */

                        for (p = cur + 1; p < semi; p++)

                                if (*p == '=')

                                        break;


                        if (p == semi) {

                                /* none found => comma internal to value */

                                /* do nothing */

                        } else {

                                /* found one => comma marks end of cookie */

                                cur++;

                                break;

                        }

                }


                /* Accumulate into buffers, always leaving space for a NUL */

                /** \todo is silently truncating overlong names/values wise? */

                if (!in_value) {

                        if (n < name + (sizeof(name) - 1))

                                *n++ = *cur;

                } else {

                        if (v < value + (sizeof(value) - 1))

                                *v++ = *cur;

                }

        }


        /* Parse final avpair */

        *n = '\0';

        *v = '\0';


        if (!urldb_parse_avpair(c, name, value, was_quoted)) {

                /* Memory exhausted */

                urldb_free_cookie(c);

                return NULL;

        }


        /* Now fix-up default values */

        if (c->domain == NULL) {

                lwc_string *host = nsurl_get_component(url, NSURL_HOST);

                if (host == NULL) {

                        urldb_free_cookie(c);

                        return NULL;

                }

                c->domain = strdup(lwc_string_data(host));

                lwc_string_unref(host);

        }


        if (c->path == NULL) {

                const char *path_data;

                char *path, *slash;

                lwc_string *path_lwc;


                path_lwc = nsurl_get_component(url, NSURL_PATH);

                if (path_lwc == NULL) {

                        urldb_free_cookie(c);

                        return NULL;

                }

                path_data = lwc_string_data(path_lwc);


                /* Strip leafname and trailing slash (4.3.1) */

                slash = strrchr(path_data, '/');

                if (slash != NULL) {

                        /* Special case: retain first slash in path */

                        if (slash == path_data)

                                slash++;


                        slash = strndup(path_data, slash - path_data);

                        if (slash == NULL) {

                                lwc_string_unref(path_lwc);

                                urldb_free_cookie(c);

                                return NULL;

                        }


                        path = slash;

                        lwc_string_unref(path_lwc);

                } else {

                        path = strdup(lwc_string_data(path_lwc));

                        lwc_string_unref(path_lwc);

                        if (path == NULL) {

                                urldb_free_cookie(c);

                                return NULL;

                        }

                }


                c->path = path;

        }


        /* Write back current position */

        *cookie = cur;


        return c;

}


/**

 * Add a path to the database, creating any intermediate entries

 *

 * \param scheme URL scheme associated with path

 * \param port Port number on host associated with path

 * \param host Host tree node to attach to

 * \param path_query Absolute path plus query to add (freed)

 * \param fragment URL fragment, or NULL

 * \param url URL (fragment ignored)

 * \return Pointer to leaf node, or NULL on memory exhaustion

 */

static struct path_data *

urldb_add_path(lwc_string *scheme,

               unsigned int port,

               const struct host_part *host,

               char *path_query,

               lwc_string *fragment,

               nsurl *url)

{

        struct path_data *d, *e;

        char *buf = path_query;

        char *segment, *slash;

        bool match;


        assert(scheme && host && url);


        d = (struct path_data *) &host->paths;


        /* skip leading '/' */

        segment = buf;

        if (*segment == '/')

                segment++;


        /* Process path segments */

        do {

                slash = strchr(segment, '/');

                if (!slash) {

                        /* last segment */

                        /* look for existing entry */

                        for (e = d->children; e; e = e->next)

                                if (strcmp(segment, e->segment) == 0 &&

                                    lwc_string_isequal(scheme,

                                                       e->scheme, &match) ==

                                    lwc_error_ok &&

                                    match == true &&

                                    e->port == port)

                                        break;


                        d = e ? urldb_add_path_fragment(e, fragment) :

                                urldb_add_path_node(scheme, port,

                                                    segment, fragment, d);

                        break;

                }


                *slash = '\0';


                /* look for existing entry */

                for (e = d->children; e; e = e->next)

                        if (strcmp(segment, e->segment) == 0 &&

                            lwc_string_isequal(scheme, e->scheme,

                                               &match) == lwc_error_ok &&

                            match == true &&

                            e->port == port)

                                break;


                d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);

                if (!d)

                        break;


                segment = slash + 1;

        } while (1);


        free(path_query);


        if (d && !d->url) {

                /* Insert defragmented URL */

                if (nsurl_defragment(url, &d->url) != NSERROR_OK)

                        return NULL;

        }


        return d;

}


/**

 * Add a host to the database, creating any intermediate entries

 *

 * \param host Hostname to add

 * \return Pointer to leaf node, or NULL on memory exhaustion

 */

static struct host_part *urldb_add_host(const char *host)

{

        struct host_part *d = (struct host_part *) &db_root, *e;

        struct search_node *s;

        char buf[256]; /* 256 bytes is sufficient - domain names are

                        * limited to 255 chars. */

        char *part;


        assert(host);


        if (urldb__host_is_ip_address(host)) {

                /* Host is an IP, so simply add as TLD */


                /* Check for existing entry */

                for (e = d->children; e; e = e->next)

                        if (strcasecmp(host, e->part) == 0)

                                /* found => return it */

                                return e;


                d = urldb_add_host_node(host, d);


                s = urldb_search_insert(search_trees[ST_IP], d);

                if (!s) {

                        /* failed */

                        d = NULL;

                } else {

                        search_trees[ST_IP] = s;

                }


                return d;

        }


        /* Copy host string, so we can corrupt it */

        strncpy(buf, host, sizeof buf);

        buf[sizeof buf - 1] = '\0';


        /* Process FQDN segments backwards */

        do {

                part = strrchr(buf, '.');

                if (!part) {

                        /* last segment */

                        /* Check for existing entry */

                        for (e = d->children; e; e = e->next)

                                if (strcasecmp(buf, e->part) == 0)

                                        break;


                        if (e) {

                                d = e;

                        } else {

                                d = urldb_add_host_node(buf, d);

                        }


                        /* And insert into search tree */

                        if (d) {

                                struct search_node **r;


                                r = urldb_get_search_tree_direct(buf);

                                s = urldb_search_insert(*r, d);

                                if (!s) {

                                        /* failed */

                                        d = NULL;

                                } else {

                                        *r = s;

                                }

                        }

                        break;

                }


                /* Check for existing entry */

                for (e = d->children; e; e = e->next)

                        if (strcasecmp(part + 1, e->part) == 0)

                                break;


                d = e ? e : urldb_add_host_node(part + 1, d);

                if (!d)

                        break;


                *part = '\0';

        } while (1);


        return d;

}


/**

 * Insert a cookie into the database

 *

 * \param c The cookie to insert

 * \param scheme URL scheme associated with cookie path

 * \param url URL (sans fragment) associated with cookie

 * \return true on success, false on memory exhaustion (c will be freed)

 */

static bool

urldb_insert_cookie(struct cookie_internal_data *c,

                    lwc_string *scheme,

                    nsurl *url)

{

        struct cookie_internal_data *d;

        const struct host_part *h;

        struct path_data *p;

        time_t now = time(NULL);


        assert(c);


        if (c->domain[0] == '.') {

                h = urldb_search_find(

                        urldb_get_search_tree(&(c->domain[1])),

                        c->domain + 1);

                if (!h) {

                        h = urldb_add_host(c->domain + 1);

                        if (!h) {

                                urldb_free_cookie(c);

                                return false;

                        }

                }


                p = (struct path_data *) &h->paths;

        } else {

                /* Need to have a URL and scheme, if it's not a domain cookie */

                assert(url != NULL);

                assert(scheme != NULL);


                h = urldb_search_find(

                        urldb_get_search_tree(c->domain),

                        c->domain);


                if (!h) {

                        h = urldb_add_host(c->domain);

                        if (!h) {

                                urldb_free_cookie(c);

                                return false;

                        }

                }


                /* find path */

                p = urldb_add_path(scheme, 0, h,

                                   strdup(c->path), NULL, url);

                if (!p) {

                        urldb_free_cookie(c);

                        return false;

                }

        }


        /* add cookie */

        for (d = p->cookies; d; d = d->next) {

                if (!strcmp(d->domain, c->domain) &&

                    !strcmp(d->path, c->path) &&

                    !strcmp(d->name, c->name))

                        break;

        }


        if (d) {

                if (c->expires != -1 && c->expires < now) {

                        /* remove cookie */

                        if (d->next)

                                d->next->prev = d->prev;

                        else

                                p->cookies_end = d->prev;

                        if (d->prev)

                                d->prev->next = d->next;

                        else

                                p->cookies = d->next;


                        cookie_manager_remove((struct cookie_data *)d);


                        urldb_free_cookie(d);

                        urldb_free_cookie(c);

                } else {

                        /* replace d with c */

                        c->prev = d->prev;

                        c->next = d->next;

                        if (c->next)

                                c->next->prev = c;

                        else

                                p->cookies_end = c;

                        if (c->prev)

                                c->prev->next = c;

                        else

                                p->cookies = c;


                        cookie_manager_remove((struct cookie_data *)d);

                        urldb_free_cookie(d);


                        cookie_manager_add((struct cookie_data *)c);

                }

        } else {

                c->prev = p->cookies_end;

                c->next = NULL;

                if (p->cookies_end)

                        p->cookies_end->next = c;

                else

                        p->cookies = c;

                p->cookies_end = c;


                cookie_manager_add((struct cookie_data *)c);

        }


        return true;

}


/**

 * Concatenate a cookie into the provided buffer

 *

 * \param c Cookie to concatenate

 * \param version The version of the cookie string to output

 * \param used Pointer to amount of buffer used (updated)

 * \param alloc Pointer to allocated size of buffer (updated)

 * \param buf Pointer to Pointer to buffer (updated)

 * \return true on success, false on memory exhaustion

 */

static bool

urldb_concat_cookie(struct cookie_internal_data *c,

                    int version,

                    int *used,

                    int *alloc,

                    char **buf)

{

        /* Combined (A)BNF for the Cookie: request header:

         *

         * CHAR           = <any US-ASCII character (octets 0 - 127)>

         * CTL            = <any US-ASCII control character

         *                  (octets 0 - 31) and DEL (127)>

         * CR             = <US-ASCII CR, carriage return (13)>

         * LF             = <US-ASCII LF, linefeed (10)>

         * SP             = <US-ASCII SP, space (32)>

         * HT             = <US-ASCII HT, horizontal-tab (9)>

         * <">            = <US-ASCII double-quote mark (34)>

         *

         * CRLF           = CR LF

         *

         * LWS            = [CRLF] 1*( SP | HT )

         *

         * TEXT           = <any OCTET except CTLs,

         *                  but including LWS>

         *

         * token          = 1*<any CHAR except CTLs or separators>

         * separators     = "(" | ")" | "<" | ">" | "@"

         *                | "," | ";" | ":" | "\" | <">

         *                | "/" | "[" | "]" | "?" | "="

         *                | "{" | "}" | SP | HT

         *

         * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )

         * qdtext         = <any TEXT except <">>

         * quoted-pair    = "\" CHAR

         *

         * attr            =       token

         * value           =       word

         * word            =       token | quoted-string

         *

         * cookie          =       "Cookie:" cookie-version

         *                         1*((";" | ",") cookie-value)

         * cookie-value    =       NAME "=" VALUE [";" path] [";" domain]

         * cookie-version  =       "$Version" "=" value

         * NAME            =       attr

         * VALUE           =       value

         * path            =       "$Path" "=" value

         * domain          =       "$Domain" "=" value

         *

         * A note on quoted-string handling:

         *   The cookie data stored in the db is verbatim (i.e. sans enclosing

         *   <">, if any, and with all quoted-pairs intact) thus all that we

         *   need to do here is ensure that value strings which were quoted

         *   in Set-Cookie or which include any of the separators are quoted

         *   before use.

         *

         * A note on cookie-value separation:

         *   We use semicolons for all separators, including between

         *   cookie-values. This simplifies things and is backwards compatible.

         */

        const char * const separators = "()<>@,;:\\\"/[]?={} \t";


        int max_len;


        assert(c && used && alloc && buf && *buf);


        /* "; " cookie-value

         * We allow for the possibility that values are quoted

         */

        max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +

                (c->path_from_set ?

                 8 + strlen(c->path) + 2 : 0) +

                (c->domain_from_set ?

                 10 + strlen(c->domain) + 2 : 0);


        if (*used + max_len >= *alloc) {

                char *temp = realloc(*buf, *alloc + 4096);

                if (!temp) {

                        return false;

                }

                *buf = temp;

                *alloc += 4096;

        }


        if (version == COOKIE_NETSCAPE) {

                /* Original Netscape cookie */

                sprintf(*buf + *used - 1, "; %s=", c->name);

                *used += 2 + strlen(c->name) + 1;


                /* The Netscape spec doesn't mention quoting of cookie values.

                 * RFC 2109 $10.1.3 indicates that values must not be quoted.

                 *

                 * However, other browsers preserve quoting, so we should, too

                 */

                if (c->value_was_quoted) {

                        sprintf(*buf + *used - 1, "\"%s\"", c->value);

                        *used += 1 + strlen(c->value) + 1;

                } else {

                        /** \todo should we %XX-encode [;HT,SP] ? */

                        /** \todo Should we strip escaping backslashes? */

                        sprintf(*buf + *used - 1, "%s", c->value);

                        *used += strlen(c->value);

                }


                /* We don't send path/domain information -- that's what the

                 * Netscape spec suggests we should do, anyway. */

        } else {

                /* RFC2109 or RFC2965 cookie */

                sprintf(*buf + *used - 1, "; %s=", c->name);

                *used += 2 + strlen(c->name) + 1;


                /* Value needs quoting if it contains any separator or if

                 * it needs preserving from the Set-Cookie header */

                if (c->value_was_quoted ||

                    strpbrk(c->value, separators) != NULL) {

                        sprintf(*buf + *used - 1, "\"%s\"", c->value);

                        *used += 1 + strlen(c->value) + 1;

                } else {

                        sprintf(*buf + *used - 1, "%s", c->value);

                        *used += strlen(c->value);

                }


                if (c->path_from_set) {

                        /* Path, quoted if necessary */

                        sprintf(*buf + *used - 1, "; $Path=");

                        *used += 8;


                        if (strpbrk(c->path, separators) != NULL) {

                                sprintf(*buf + *used - 1, "\"%s\"", c->path);

                                *used += 1 + strlen(c->path) + 1;

                        } else {

                                sprintf(*buf + *used - 1, "%s", c->path);

                                *used += strlen(c->path);

                        }

                }


                if (c->domain_from_set) {

                        /* Domain, quoted if necessary */

                        sprintf(*buf + *used - 1, "; $Domain=");

                        *used += 10;


                        if (strpbrk(c->domain, separators) != NULL) {

                                sprintf(*buf + *used - 1, "\"%s\"", c->domain);

                                *used += 1 + strlen(c->domain) + 1;

                        } else {

                                sprintf(*buf + *used - 1, "%s", c->domain);

                                *used += strlen(c->domain);

                        }

                }

        }


        return true;

}


/**

 * deletes paths from a cookie.

 *

 * \param domain the cookie domain

 * \param path the cookie path

 * \param name The cookie name

 * \param parent The url data of the cookie

 */

static void

urldb_delete_cookie_paths(const char *domain,

                          const char *path,

                          const char *name,

                          struct path_data *parent)

{

        struct cookie_internal_data *c;

        struct path_data *p = parent;


        assert(parent);


        do {

                for (c = p->cookies; c; c = c->next) {

                        if (strcmp(c->domain, domain) == 0 &&

                            strcmp(c->path, path) == 0 &&

                            strcmp(c->name, name) == 0) {

                                if (c->prev) {

                                        c->prev->next = c->next;

                                } else {

                                        p->cookies = c->next;

                                }


                                if (c->next) {

                                        c->next->prev = c->prev;

                                } else {

                                        p->cookies_end = c->prev;

                                }


                                urldb_free_cookie(c);


                                return;

                        }

                }


                if (p->children) {

                        p = p->children;

                } else {

                        while (p != parent) {

                                if (p->next != NULL) {

                                        p = p->next;

                                        break;

                                }


                                p = p->parent;

                        }

                }

        } while (p != parent);

}


/**

 * Deletes cookie hosts and their assoicated paths

 *

 * \param domain the cookie domain

 * \param path the cookie path

 * \param name The cookie name

 * \param parent The url data of the cookie

 */

static void

urldb_delete_cookie_hosts(const char *domain,

                          const char *path,

                          const char *name,

                          struct host_part *parent)

{

        struct host_part *h;

        assert(parent);


        urldb_delete_cookie_paths(domain, path, name, &parent->paths);


        for (h = parent->children; h; h = h->next) {

                urldb_delete_cookie_hosts(domain, path, name, h);

        }

}


/**

 * Save a path subtree's cookies

 *

 * \param fp File pointer to write to

 * \param parent Parent path

 */

static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)

{

        struct path_data *p = parent;

        time_t now = time(NULL);


        assert(fp && parent);


        do {

                if (p->cookies != NULL) {

                        struct cookie_internal_data *c;


                        for (c = p->cookies; c != NULL; c = c->next) {

                                if (c->expires == -1 || c->expires < now) {

                                        /* Skip expired & session cookies */

                                        continue;

                                }


                                fprintf(fp,

                                        "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"

                                        "%s\t%s\t%d\t%s\t%s\t%s\n",

                                        c->version, c->domain,

                                        c->domain_from_set, c->path,

                                        c->path_from_set, c->secure,

                                        c->http_only,

                                        (int)c->expires, (int)c->last_used,

                                        c->no_destroy, c->name, c->value,

                                        c->value_was_quoted,

                                        p->scheme ? lwc_string_data(p->scheme) :

                                        "unused",

                                        p->url ? nsurl_access(p->url) :

                                        "unused",

                                        c->comment ? c->comment : "");

                        }

                }


                if (p->children != NULL) {

                        p = p->children;

                } else {

                        while (p != parent) {

                                if (p->next != NULL) {

                                        p = p->next;

                                        break;

                                }


                                p = p->parent;

                        }

                }

        } while (p != parent);

}


/**

 * Save a host subtree's cookies

 *

 * \param fp File pointer to write to

 * \param parent Parent host

 */

static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)

{

        struct host_part *h;

        assert(fp && parent);


        urldb_save_cookie_paths(fp, &parent->paths);


        for (h = parent->children; h; h = h->next)

                urldb_save_cookie_hosts(fp, h);

}


/**

 * Destroy a cookie node

 *

 * \param c Cookie to destroy

 */

static void urldb_destroy_cookie(struct cookie_internal_data *c)

{

        free(c->name);

        free(c->value);

        free(c->comment);

        free(c->domain);

        free(c->path);


        free(c);

}


/**

 * Destroy the contents of a path node

 *

 * \param node Node to destroy contents of (does not destroy node)

 */

static void urldb_destroy_path_node_content(struct path_data *node)

{

        struct cookie_internal_data *a, *b;

        unsigned int i;


        if (node->url != NULL) {

                nsurl_unref(node->url);

        }


        if (node->scheme != NULL) {

                lwc_string_unref(node->scheme);

        }


        free(node->segment);

        for (i = 0; i < node->frag_cnt; i++)

                free(node->fragment[i]);

        free(node->fragment);


        free(node->urld.title);


        for (a = node->cookies; a; a = b) {

                b = a->next;

                urldb_destroy_cookie(a);

        }

}


/**

 * Destroy protection space data

 *

 * \param space Protection space to destroy

 */

static void urldb_destroy_prot_space(struct prot_space_data *space)

{

        lwc_string_unref(space->scheme);

        free(space->realm);

        free(space->auth);


        free(space);

}


/**

 * Destroy a path tree

 *

 * \param root Root node of tree to destroy

 */

static void urldb_destroy_path_tree(struct path_data *root)

{

        struct path_data *p = root;


        do {

                if (p->children != NULL) {

                        p = p->children;

                } else {

                        struct path_data *q = p;


                        while (p != root) {

                                if (p->next != NULL) {

                                        p = p->next;

                                        break;

                                }


                                p = p->parent;


                                urldb_destroy_path_node_content(q);

                                free(q);


                                q = p;

                        }


                        urldb_destroy_path_node_content(q);

                        free(q);

                }

        } while (p != root);

}


/**

 * Destroy a host tree

 *

 * \param root Root node of tree to destroy

 */

static void urldb_destroy_host_tree(struct host_part *root)

{

        struct host_part *a, *b;

        struct path_data *p, *q;

        struct prot_space_data *s, *t;


        /* Destroy children */

        for (a = root->children; a; a = b) {

                b = a->next;

                urldb_destroy_host_tree(a);

        }


        /* Now clean up paths */

        for (p = root->paths.children; p; p = q) {

                q = p->next;

                urldb_destroy_path_tree(p);

        }


        /* Root path */

        urldb_destroy_path_node_content(&root->paths);


        /* Proctection space data */

        for (s = root->prot_space; s; s = t) {

                t = s->next;

                urldb_destroy_prot_space(s);

        }


        /* And ourselves */

        free(root->part);

        free(root);

}


/**

 * Destroy a search tree

 *

 * \param root Root node of tree to destroy

 */

static void urldb_destroy_search_tree(struct search_node *root)

{

        /* Destroy children */

        if (root->left != &empty)

                urldb_destroy_search_tree(root->left);

        if (root->right != &empty)

                urldb_destroy_search_tree(root->right);


        /* And destroy ourselves */

        free(root);

}


/*************** External interface ***************/


/* exported interface documented in content/urldb.h */

void urldb_destroy(void)

{

        struct host_part *a, *b;

        int i;


        /* Clean up search trees */

        for (i = 0; i < NUM_SEARCH_TREES; i++) {

                if (search_trees[i] != &empty) {

                        urldb_destroy_search_tree(search_trees[i]);

                        search_trees[i] = &empty;

                }

        }


        /* And database */

        for (a = db_root.children; a; a = b) {

                b = a->next;

                urldb_destroy_host_tree(a);

        }

        memset(&db_root, 0, sizeof(db_root));


        /* And the bloom filter */

        if (url_bloom != NULL) {

                bloom_destroy(url_bloom);

                url_bloom = NULL;

        }

}


/* exported interface documented in netsurf/url_db.h */

nserror urldb_load(const char *filename)

{

#define MAXIMUM_URL_LENGTH 4096

        char s[MAXIMUM_URL_LENGTH];

        char host[256];

        struct host_part *h;

        int urls;

        int i;

        int version;

        int length;

        FILE *fp;


        assert(filename);


        NSLOG(netsurf, INFO, "Loading URL file %s", filename);


        if (url_bloom == NULL)

                url_bloom = bloom_create(BLOOM_SIZE);


        fp = fopen(filename, "r");

        if (!fp) {

                NSLOG(netsurf, INFO, "Failed to open file '%s' for reading",

                      filename);

                return NSERROR_NOT_FOUND;

        }


        if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {

                fclose(fp);

                return NSERROR_NEED_DATA;

        }


        version = atoi(s);

        if (version < MIN_URL_FILE_VERSION) {

                NSLOG(netsurf, INFO, "Unsupported URL file version.");

                fclose(fp);

                return NSERROR_INVALID;

        }

        if (version > URL_FILE_VERSION) {

                NSLOG(netsurf, INFO, "Unknown URL file version.");

                fclose(fp);

                return NSERROR_INVALID;

        }


        while (fgets(host, sizeof host, fp)) {

                time_t hsts_expiry = 0;

                int hsts_include_sub_domains = 0;


                /* get the hostname */

                length = strlen(host) - 1;

                host[length] = '\0';


                /* skip data that has ended up with a host of '' */

                if (length == 0) {

                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;

                        urls = atoi(s);

                        /* Eight fields/url */

                        for (i = 0; i < (8 * urls); i++) {

                                if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                        break;

                        }

                        continue;

                }


                if (version >= 107) {

                        char *p = host;

                        while (*p && *p != ' ') p++;

                        while (*p && *p == ' ') { *p = '\0'; p++; }

                        hsts_include_sub_domains = (*p == '1');

                        while (*p && *p != ' ') p++;

                        while (*p && *p == ' ') p++;

                        nsc_snptimet(p, strlen(p), &hsts_expiry);

                }


                h = urldb_add_host(host);

                if (!h) {

                        NSLOG(netsurf, INFO, "Failed adding host: '%s'", host);

                        fclose(fp);

                        return NSERROR_NOMEM;

                }

                h->hsts.expires = hsts_expiry;

                h->hsts.include_sub_domains = hsts_include_sub_domains;


                /* read number of URLs */

                if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                        break;

                urls = atoi(s);


                /* no URLs => try next host */

                if (urls == 0) {

                        NSLOG(netsurf, INFO, "No URLs for '%s'", host);

                        continue;

                }


                /* load the non-corrupt data */

                for (i = 0; i < urls; i++) {

                        struct path_data *p = NULL;

                        char scheme[64], ports[10];

                        char url[64 + 3 + 256 + 6 + 4096 + 1 + 1];

                        unsigned int port;

                        bool is_file = false;

                        nsurl *nsurl;

                        lwc_string *scheme_lwc, *fragment_lwc;

                        char *path_query;

                        size_t len;


                        if (!fgets(scheme, sizeof scheme, fp))

                                break;

                        length = strlen(scheme) - 1;

                        scheme[length] = '\0';


                        if (!fgets(ports, sizeof ports, fp))

                                break;

                        length = strlen(ports) - 1;

                        ports[length] = '\0';

                        port = atoi(ports);


                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;

                        length = strlen(s) - 1;

                        s[length] = '\0';


                        if (!strcasecmp(host, "localhost") &&

                            !strcasecmp(scheme, "file"))

                                is_file = true;


                        snprintf(url, sizeof url, "%s://%s%s%s%s",

                                 scheme,

                                 /* file URLs have no host */

                                 (is_file ? "" : host),

                                 (port ? ":" : ""),

                                 (port ? ports : ""),

                                 s);


                        /* TODO: store URLs in pre-parsed state, and make

                         *       a nsurl_load to generate the nsurl more

                         *       swiftly.

                         *       Need a nsurl_save too.

                         */

                        if (nsurl_create(url, &nsurl) != NSERROR_OK) {

                                NSLOG(netsurf, INFO, "Failed inserting '%s'",

                                      url);

                                fclose(fp);

                                return NSERROR_NOMEM;

                        }


                        if (url_bloom != NULL) {

                                uint32_t hash = nsurl_hash(nsurl);

                                bloom_insert_hash(url_bloom, hash);

                        }


                        /* Copy and merge path/query strings */

                        if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,

                                      &path_query, &len) != NSERROR_OK) {

                                NSLOG(netsurf, INFO, "Failed inserting '%s'",

                                      url);

                                fclose(fp);

                                return NSERROR_NOMEM;

                        }


                        scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);

                        fragment_lwc = nsurl_get_component(nsurl,

                                                           NSURL_FRAGMENT);

                        p = urldb_add_path(scheme_lwc, port, h, path_query,

                                           fragment_lwc, nsurl);

                        if (!p) {

                                NSLOG(netsurf, INFO, "Failed inserting '%s'",

                                      url);

                                fclose(fp);

                                return NSERROR_NOMEM;

                        }

                        nsurl_unref(nsurl);

                        lwc_string_unref(scheme_lwc);

                        if (fragment_lwc != NULL)

                                lwc_string_unref(fragment_lwc);


                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;

                        if (p)

                                p->urld.visits = (unsigned int)atoi(s);


                        /* entry last use time */

                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {

                                break;

                        }

                        if (p) {

                                nsc_snptimet(s, strlen(s) - 1, &p->urld.last_visit);

                        }


                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;

                        if (p)

                                p->urld.type = (content_type)atoi(s);


                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;


                        if (!fgets(s, MAXIMUM_URL_LENGTH, fp))

                                break;

                        length = strlen(s) - 1;

                        if (p && length > 0) {

                                s[length] = '\0';

                                p->urld.title = malloc(length + 1);

                                if (p->urld.title)

                                        memcpy(p->urld.title, s, length + 1);

                        }

                }

        }


        fclose(fp);

        NSLOG(netsurf, INFO, "Successfully loaded URL file");

#undef MAXIMUM_URL_LENGTH


        return NSERROR_OK;

}


/* exported interface documented in netsurf/url_db.h */

nserror urldb_save(const char *filename)

{

        FILE *fp;

        int i;


        assert(filename);


        fp = fopen(filename, "w");

        if (!fp) {

                NSLOG(netsurf, INFO, "Failed to open file '%s' for writing",

                      filename);

                return NSERROR_SAVE_FAILED;

        }


        /* file format version number */

        fprintf(fp, "%d\n", URL_FILE_VERSION);


        for (i = 0; i != NUM_SEARCH_TREES; i++) {

                urldb_save_search_tree(search_trees[i], fp);

        }


        fclose(fp);


        return NSERROR_OK;

}


/* exported interface documented in content/urldb.h */

nserror urldb_set_url_persistence(nsurl *url, bool persist)

{

        struct path_data *p;


        assert(url);


        p = urldb_find_url(url);

        if (!p) {

                return NSERROR_NOT_FOUND;

        }


        p->persistent = persist;


        return NSERROR_OK;

}


/* exported interface documented in content/urldb.h */

bool urldb_add_url(nsurl *url)

{

        struct host_part *h;

        struct path_data *p;

        lwc_string *scheme;

        lwc_string *port;

        lwc_string *host;

        lwc_string *fragment;

        const char *host_str;

        char *path_query = NULL;

        size_t len;

        bool match;

        unsigned int port_int;


        assert(url);


        if (url_bloom == NULL)

                url_bloom = bloom_create(BLOOM_SIZE);


        if (url_bloom != NULL) {

                uint32_t hash = nsurl_hash(url);

                bloom_insert_hash(url_bloom, hash);

        }


        /* Copy and merge path/query strings */

        if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=

            NSERROR_OK) {

                return false;

        }

        assert(path_query != NULL);


        scheme = nsurl_get_component(url, NSURL_SCHEME);

        if (scheme == NULL) {

                free(path_query);

                return false;

        }


        host = nsurl_get_component(url, NSURL_HOST);

        if (host != NULL) {

                host_str = lwc_string_data(host);

                lwc_string_unref(host);


        } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==

                   lwc_error_ok && match == true) {

                host_str = "localhost";


        } else {

                lwc_string_unref(scheme);

                free(path_query);

                return false;

        }


        fragment = nsurl_get_component(url, NSURL_FRAGMENT);


        port = nsurl_get_component(url, NSURL_PORT);

        if (port != NULL) {

                port_int = atoi(lwc_string_data(port));

                lwc_string_unref(port);

        } else {

                port_int = 0;

        }


        /* Get host entry */

        h = urldb_add_host(host_str);


        /* Get path entry */

        if (h != NULL) {

                p = urldb_add_path(scheme,

                                   port_int,

                                   h,

                                   path_query,

                                   fragment,

                                   url);

        } else {

                p = NULL;

        }


        lwc_string_unref(scheme);

        if (fragment != NULL)

                lwc_string_unref(fragment);


        return (p != NULL);

}


/* exported interface documented in content/urldb.h */

nserror urldb_set_url_title(nsurl *url, const char *title)

{

        struct path_data *p;

        char *temp;


        assert(url);


        p = urldb_find_url(url);

        if (p == NULL) {

                return NSERROR_NOT_FOUND;

        }


        /* copy the parameter if necessary */

        if (title != NULL) {

                temp = strdup(title);

                if (temp == NULL) {

                        return NSERROR_NOMEM;

                }

        } else {

                temp = NULL;

        }


        free(p->urld.title);

        p->urld.title = temp;


        return NSERROR_OK;

}


/* exported interface documented in content/urldb.h */

nserror urldb_set_url_content_type(nsurl *url, content_type type)

{

        struct path_data *p;


        assert(url);


        p = urldb_find_url(url);

        if (!p) {

                return NSERROR_NOT_FOUND;

        }


        p->urld.type = type;


        return NSERROR_OK;

}


/* exported interface documented in content/urldb.h */

nserror urldb_update_url_visit_data(nsurl *url)

{

        struct path_data *p;


        assert(url);


        p = urldb_find_url(url);

        if (!p) {

                return NSERROR_NOT_FOUND;

        }


        p->urld.last_visit = time(NULL);

        p->urld.visits++;


        return NSERROR_OK;

}


/* exported interface documented in content/urldb.h */

void urldb_reset_url_visit_data(nsurl *url)

{

        struct path_data *p;


        assert(url);


        p = urldb_find_url(url);

        if (!p)

                return;


        p->urld.last_visit = (time_t)0;

        p->urld.visits = 0;

}


/* exported interface documented in netsurf/url_db.h */

const struct url_data *urldb_get_url_data(nsurl *url)

{

        struct path_data *p;

        struct url_internal_data *u;


        assert(url);


        p = urldb_find_url(url);

        if (!p)

                return NULL;


        u = &p->urld;


        return (const struct url_data *) u;

}


/* exported interface documented in content/urldb.h */

nsurl *urldb_get_url(nsurl *url)

{

        struct path_data *p;


        assert(url);


        p = urldb_find_url(url);

        if (!p)

                return NULL;


        return p->url;

}


/* exported interface documented in netsurf/url_db.h */

void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)

{

        struct path_data *p, *pi;

        struct host_part *h;

        struct prot_space_data *space, *space_alloc;

        char *realm_alloc, *auth_alloc;

        bool match;


        assert(url && realm && auth);


        /* add url, in case it's missing */

        urldb_add_url(url);


        p = urldb_find_url(url);


        if (!p)

                return;


        /* Search for host_part */

        for (pi = p; pi->parent != NULL; pi = pi->parent)

                ;

        h = (struct host_part *)pi;


        /* Search if given URL belongs to a protection space we already know of. */

        for (space = h->prot_space; space; space = space->next) {

                if (!strcmp(space->realm, realm) &&

                    lwc_string_isequal(space->scheme, p->scheme,

                                       &match) == lwc_error_ok &&

                    match == true &&

                    space->port == p->port)

                        break;

        }


        if (space != NULL) {

                /* Overrule existing auth. */

                free(space->auth);

                space->auth = strdup(auth);

        } else {

                /* Create a new protection space. */

                space = space_alloc = malloc(sizeof(struct prot_space_data));

                realm_alloc = strdup(realm);

                auth_alloc = strdup(auth);


                if (!space_alloc || !realm_alloc || !auth_alloc) {

                        free(space_alloc);

                        free(realm_alloc);

                        free(auth_alloc);

                        return;

                }


                space->scheme = lwc_string_ref(p->scheme);

                space->port = p->port;

                space->realm = realm_alloc;

                space->auth = auth_alloc;

                space->next = h->prot_space;

                h->prot_space = space;

        }


        p->prot_space = space;

}


/* exported interface documented in netsurf/url_db.h */

const char *urldb_get_auth_details(nsurl *url, const char *realm)

{

        struct path_data *p, *p_cur, *p_top;


        assert(url);


        /* add to the db, so our lookup will work */

        urldb_add_url(url);


        p = urldb_find_url(url);

        if (!p)

                return NULL;


        /* Check for any auth details attached to the path_data node or any of

         * its parents.

         */

        for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {

                if (p_cur->prot_space) {

                        return p_cur->prot_space->auth;

                }

        }


        /* Only when we have a realm (and canonical root of given URL), we can

         * uniquely locate the protection space.

         */

        if (realm != NULL) {

                const struct host_part *h = (const struct host_part *)p_top;

                const struct prot_space_data *space;

                bool match;


                /* Search for a possible matching protection space. */

                for (space = h->prot_space; space != NULL;

                     space = space->next) {

                        if (!strcmp(space->realm, realm) &&

                            lwc_string_isequal(space->scheme,

                                               p->scheme, &match) ==

                            lwc_error_ok &&

                            match == true &&

                            space->port == p->port) {

                                p->prot_space = space;

                                return p->prot_space->auth;

                        }

                }

        }


        return NULL;

}


/* exported interface documented in netsurf/url_db.h */

void urldb_set_cert_permissions(nsurl *url, bool permit)

{

        struct path_data *p;

        struct host_part *h;


        assert(url);


        /* add url, in case it's missing */

        urldb_add_url(url);


        p = urldb_find_url(url);

        if (!p)

                return;


        for (; p && p->parent; p = p->parent)

                /* do nothing */;

        assert(p);


        h = (struct host_part *)p;


        h->permit_invalid_certs = permit;

}


/* exported interface documented in content/urldb.h */

bool urldb_get_cert_permissions(nsurl *url)

{

        struct path_data *p;

        const struct host_part *h;


        assert(url);


        p = urldb_find_url(url);

        if (!p)

                return false;


        for (; p && p->parent; p = p->parent)

                /* do nothing */;

        assert(p);


        h = (const struct host_part *)p;


        return h->permit_invalid_certs;

}


/* exported interface documented in content/urldb.h */

bool urldb_set_hsts_policy(struct nsurl *url, const char *header)

{

        struct path_data *p;

        struct host_part *h;

        lwc_string *host;

        time_t now = time(NULL);

        http_strict_transport_security *sts;

        uint32_t max_age = 0;

        nserror error;


        assert(url);


        host = nsurl_get_component(url, NSURL_HOST);

        if (host != NULL) {

                if (urldb__host_is_ip_address(lwc_string_data(host))) {

                        /* Host is IP: ignore */

                        lwc_string_unref(host);

                        return true;

                } else if (lwc_string_length(host) == 0) {

                        /* Host is blank: ignore */

                        lwc_string_unref(host);

                        return true;

                }


                lwc_string_unref(host);

        } else {

                /* No host part: ignore */

                return true;

        }


        /* add url, in case it's missing */

        urldb_add_url(url);


        p = urldb_find_url(url);

        if (!p)

                return false;


        for (; p && p->parent; p = p->parent)

                /* do nothing */;

        assert(p);


        h = (struct host_part *)p;

        if (h->permit_invalid_certs) {

                /* Transport is tainted: ignore */

                return true;

        }


        error = http_parse_strict_transport_security(header, &sts);

        if (error != NSERROR_OK) {

                /* Parse failed: ignore */

                return true;

        }


        h->hsts.include_sub_domains =

                http_strict_transport_security_include_subdomains(sts);


        max_age = http_strict_transport_security_max_age(sts);

        if (max_age == 0) {

                h->hsts.expires = 0;

                h->hsts.include_sub_domains = false;

        } else if ((time_t) (now + max_age) > h->hsts.expires) {

                h->hsts.expires = now + max_age;

        }


        http_strict_transport_security_destroy(sts);


        return true;

}


/* exported interface documented in content/urldb.h */

bool urldb_get_hsts_enabled(struct nsurl *url)

{

        struct path_data *p;

        const struct host_part *h;

        lwc_string *host;

        time_t now = time(NULL);


        assert(url);


        host = nsurl_get_component(url, NSURL_HOST);

        if (host != NULL) {

                if (urldb__host_is_ip_address(lwc_string_data(host))) {

                        /* Host is IP: not enabled */

                        lwc_string_unref(host);

                        return false;

                } else if (lwc_string_length(host) == 0) {

                        /* Host is blank: not enabled */

                        lwc_string_unref(host);

                        return false;

                }


                lwc_string_unref(host);

        } else {

                /* No host part: not enabled */

                return false;

        }


        /* The URL must exist in the db in order to find HSTS policy, since

         * we search up the tree from the URL node, and policy from further

         * up may also apply. */

        urldb_add_url(url);


        p = urldb_find_url(url);

        if (!p)

                return false;


        for (; p && p->parent; p = p->parent)

                /* do nothing */;

        assert(p);


        h = (const struct host_part *)p;


        /* Consult record for this host */

        if (h->hsts.expires > now) {

                /* Not expired */

                return true;

        }


        /* Consult parent domains */

        for (h = h->parent; h && h != &db_root; h = h->parent) {

                if (h->hsts.expires > now && h->hsts.include_sub_domains) {

                        /* Not expired and subdomains included */

                        return true;

                }

        }


        return false;

}


/* exported interface documented in netsurf/url_db.h */

void

urldb_iterate_partial(const char *prefix,

                      bool (*callback)(nsurl *url, const struct url_data *data))

{

        char host[256];

        char buf[260]; /* max domain + "www." */

        const char *slash, *scheme_sep;

        struct search_node *tree;

        const struct host_part *h;


        assert(prefix && callback);


        /* strip scheme */

        scheme_sep = strstr(prefix, "://");

        if (scheme_sep)

                prefix = scheme_sep + 3;


        slash = strchr(prefix, '/');

        tree = urldb_get_search_tree(prefix);


        if (slash) {

                /* if there's a slash in the input, then we can

                 * assume that we're looking for a path */

                snprintf(host, sizeof host, "%.*s",

                         (int) (slash - prefix), prefix);


                h = urldb_search_find(tree, host);

                if (!h) {

                        int len = slash - prefix;


                        if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {

                                snprintf(buf, sizeof buf, "www.%s", host);

                                h = urldb_search_find(

                                        search_trees[ST_DN + 'w' - 'a'],

                                        buf);

                                if (!h)

                                        return;

                        } else

                                return;

                }


                if (h->paths.children) {

                        /* Have paths, iterate them */

                        urldb_iterate_partial_path(&h->paths, slash + 1,

                                                   callback);

                }


        } else {

                int len = strlen(prefix);


                /* looking for hosts */

                if (!urldb_iterate_partial_host(tree, prefix, callback))

                        return;


                if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {

                        /* now look for www.prefix */

                        snprintf(buf, sizeof buf, "www.%s", prefix);

                        if(!urldb_iterate_partial_host(

                                   search_trees[ST_DN + 'w' - 'a'],

                                   buf, callback))

                                return;

                }

        }

}


/* exported interface documented in netsurf/url_db.h */

void

urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data))

{

        int i;


        assert(callback);


        for (i = 0; i < NUM_SEARCH_TREES; i++) {

                if (!urldb_iterate_entries_host(search_trees[i],

                                                callback,

                                                NULL)) {

                        break;

                }

        }

}


/* exported interface documented in content/urldb.h */

void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))

{

        int i;


        assert(callback);


        for (i = 0; i < NUM_SEARCH_TREES; i++) {

                if (!urldb_iterate_entries_host(search_trees[i],

                                                NULL, callback))

                        break;

        }

}


/* exported interface documented in content/urldb.h */

bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)

{

        const char *cur = header, *end;

        lwc_string *path, *host, *scheme;

        nsurl *urlt;

        bool match;


        assert(url && header);


        /* Get defragmented URL, as 'urlt' */

        if (nsurl_defragment(url, &urlt) != NSERROR_OK)

                return NULL;


        scheme = nsurl_get_component(url, NSURL_SCHEME);

        if (scheme == NULL) {

                nsurl_unref(urlt);

                return false;

        }


        path = nsurl_get_component(url, NSURL_PATH);

        if (path == NULL) {

                lwc_string_unref(scheme);

                nsurl_unref(urlt);

                return false;

        }


        host = nsurl_get_component(url, NSURL_HOST);

        if (host == NULL) {

                lwc_string_unref(path);

                lwc_string_unref(scheme);

                nsurl_unref(urlt);

                return false;

        }


        if (referer) {

                lwc_string *rhost;


                /* Ensure that url's host name domain matches

                 * referer's (4.3.5) */

                rhost = nsurl_get_component(referer, NSURL_HOST);

                if (rhost == NULL) {

                        goto error;

                }


                /* Domain match host names */

                if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&

                    match == false) {

                        const char *hptr;

                        const char *rptr;

                        const char *dot;

                        const char *host_data = lwc_string_data(host);

                        const char *rhost_data = lwc_string_data(rhost);


                        /* Ensure neither host nor rhost are IP addresses */

                        if (urldb__host_is_ip_address(host_data) ||

                            urldb__host_is_ip_address(rhost_data)) {

                                /* IP address, so no partial match */

                                lwc_string_unref(rhost);

                                goto error;

                        }


                        /* Not exact match, so try the following:

                         *

                         * 1) Find the longest common suffix of host and rhost

                         *    (may be all of host/rhost)

                         * 2) Discard characters from the start of the suffix

                         *    until the suffix starts with a dot

                         *    (prevents foobar.com matching bar.com)

                         * 3) Ensure the suffix is non-empty and contains

                         *    embedded dots (to avoid permitting .com as a

                         *    suffix)

                         *

                         * Note that the above in no way resembles the

                         * domain matching algorithm found in RFC2109.

                         * It does, however, model the real world rather

                         * more accurately.

                         */


                        /** \todo In future, we should consult a TLD service

                         * instead of just looking for embedded dots.

                         */


                        hptr = host_data + lwc_string_length(host) - 1;

                        rptr = rhost_data + lwc_string_length(rhost) - 1;


                        /* 1 */

                        while (hptr >= host_data && rptr >= rhost_data) {

                                if (*hptr != *rptr)

                                        break;

                                hptr--;

                                rptr--;

                        }

                        /* Ensure we end up pointing at the start of the

                         * common suffix. The above loop will exit pointing

                         * to the byte before the start of the suffix. */

                        hptr++;


                        /* 2 */

                        while (*hptr != '\0' && *hptr != '.')

                                hptr++;


                        /* 3 */

                        if (*hptr == '\0' ||

                            (dot = strchr(hptr + 1, '.')) == NULL ||

                            *(dot + 1) == '\0') {

                                lwc_string_unref(rhost);

                                goto error;

                        }

                }


                lwc_string_unref(rhost);

        }


        end = cur + strlen(cur) - 2 /* Trailing CRLF */;


        do {

                struct cookie_internal_data *c;

                char *dot;

                size_t len;

#ifdef WITH_NSPSL

                const char *suffix;

#endif


                c = urldb_parse_cookie(url, &cur);

                if (!c) {

                        /* failed => stop parsing */

                        goto error;

                }


                /* validate cookie */


                /* 4.2.2:i Cookie must have NAME and VALUE */

                if (!c->name || !c->value) {

                        urldb_free_cookie(c);

                        goto error;

                }


                /* 4.3.2:i Cookie path must be a prefix of URL path */

                len = strlen(c->path);

                if (len > lwc_string_length(path) ||

                    strncmp(c->path, lwc_string_data(path),

                            len) != 0) {

                        urldb_free_cookie(c);

                        goto error;

                }


#ifdef WITH_NSPSL

                /* check domain is not a public suffix */

                dot = c->domain;

                if (*dot == '.') {

                        dot++;

                }

                suffix = nspsl_getpublicsuffix(dot);

                if (suffix == NULL) {

                        NSLOG(netsurf, INFO,

                              "domain %s was a public suffix domain", dot);

                        urldb_free_cookie(c);

                        goto error;

                }

#else

                /* 4.3.2:ii Cookie domain must contain embedded dots */

                dot = strchr(c->domain + 1, '.');

                if (!dot || *(dot + 1) == '\0') {

                        /* no embedded dots */

                        urldb_free_cookie(c);

                        goto error;

                }

#endif


                /* Domain match fetch host with cookie domain */

                if (strcasecmp(lwc_string_data(host), c->domain) != 0) {

                        int hlen, dlen;

                        char *domain = c->domain;


                        /* c->domain must be a domain cookie here because:

                         * c->domain is either:

                         *   + specified in the header as a domain cookie

                         *     (non-domain cookies in the header are ignored

                         *      by urldb_parse_cookie / urldb_parse_avpair)

                         *   + defaulted to the URL's host part

                         *     (by urldb_parse_cookie if no valid domain was

                         *      specified in the header)

                         *

                         * The latter will pass the strcasecmp above, which

                         * leaves the former (i.e. a domain cookie)

                         */

                        assert(c->domain[0] == '.');


                        /* 4.3.2:iii */

                        if (urldb__host_is_ip_address(lwc_string_data(host))) {

                                /* IP address, so no partial match */

                                urldb_free_cookie(c);

                                goto error;

                        }


                        hlen = lwc_string_length(host);

                        dlen = strlen(c->domain);


                        if (hlen <= dlen && hlen != dlen - 1) {

                                /* Partial match not possible */

                                urldb_free_cookie(c);

                                goto error;

                        }


                        if (hlen == dlen - 1) {

                                /* Relax matching to allow

                                 * host a.com to match .a.com */

                                domain++;

                                dlen--;

                        }


                        if (strcasecmp(lwc_string_data(host) + (hlen - dlen),

                                       domain)) {

                                urldb_free_cookie(c);

                                goto error;

                        }


                        /* 4.3.2:iv Ensure H contains no dots

                         *

                         * If you believe the spec, H should contain no

                         * dots in _any_ cookie. Unfortunately, however,

                         * reality differs in that many sites send domain

                         * cookies of the form .foo.com from hosts such

                         * as bar.bat.foo.com and then expect domain

                         * matching to work. Thus we have to do what they

                         * expect, regardless of any potential security

                         * implications.

                         *

                         * This is what code conforming to the spec would

                         * look like:

                         *

                         * for (int i = 0; i < (hlen - dlen); i++) {

                         *      if (host[i] == '.') {

                         *              urldb_free_cookie(c);

                         *              goto error;

                         *      }

                         * }

                         */

                }


                /* Now insert into database */

                if (!urldb_insert_cookie(c, scheme, urlt))

                        goto error;

        } while (cur < end);


        lwc_string_unref(host);

        lwc_string_unref(path);

        lwc_string_unref(scheme);

        nsurl_unref(urlt);


        return true;


error:

        lwc_string_unref(host);

        lwc_string_unref(path);

        lwc_string_unref(scheme);

        nsurl_unref(urlt);


        return false;

}


/* exported interface documented in content/urldb.h */

char *urldb_get_cookie(nsurl *url, bool include_http_only)

{

        const struct path_data *p, *q;

        const struct host_part *h;

        lwc_string *path_lwc;

        struct cookie_internal_data *c;

        int count = 0, version = COOKIE_RFC2965;

        struct cookie_internal_data **matched_cookies;

        int matched_cookies_size = 20;

        int ret_alloc = 4096, ret_used = 1;

        const char *path;

        char *ret;

        lwc_string *scheme;

        time_t now;

        int i;

        bool match;


        assert(url != NULL);


        /* The URL must exist in the db in order to find relevant cookies, since

         * we search up the tree from the URL node, and cookies from further

         * up also apply. */

        urldb_add_url(url);


        p = urldb_find_url(url);

        if (!p)

                return NULL;


        scheme = p->scheme;


        matched_cookies = malloc(matched_cookies_size *

                                 sizeof(struct cookie_internal_data *));

        if (!matched_cookies)

                return NULL;


#define GROW_MATCHED_COOKIES                                            \

        do {                                                            \

                if (count == matched_cookies_size) {                    \

                        struct cookie_internal_data **temp;             \

                        temp = realloc(matched_cookies,                 \

                                       (matched_cookies_size + 20) *    \

                                       sizeof(struct cookie_internal_data *)); \

                                                                        \

                        if (temp == NULL) {                             \

                                free(ret);                              \

                                free(matched_cookies);                  \

                                return NULL;                            \

                        }                                               \

                                                                        \

                        matched_cookies = temp;                         \

                        matched_cookies_size += 20;                     \

                }                                                       \

        } while(0)


        ret = malloc(ret_alloc);

        if (!ret) {

                free(matched_cookies);

                return NULL;

        }


        ret[0] = '\0';


        path_lwc = nsurl_get_component(url, NSURL_PATH);

        if (path_lwc == NULL) {

                free(ret);

                free(matched_cookies);

                return NULL;

        }

        path = lwc_string_data(path_lwc);

        lwc_string_unref(path_lwc);


        now = time(NULL);


        if (*(p->segment) != '\0') {

                /* Match exact path, unless directory, when prefix matching

                 * will handle this case for us. */

                for (q = p->parent->children; q; q = q->next) {

                        if (strcmp(q->segment, p->segment))

                                continue;


                        /* Consider all cookies associated with

                         * this exact path */

                        for (c = q->cookies; c; c = c->next) {

                                if (c->expires != -1 && c->expires < now)

                                        /* cookie has expired => ignore */

                                        continue;


                                if (c->secure && lwc_string_isequal(

                                            q->scheme,

                                            corestring_lwc_https,

                                            &match) &&

                                    match == false)

                                        /* secure cookie for insecure host.

                                         * ignore */

                                        continue;


                                if (c->http_only && !include_http_only)

                                        /* Ignore HttpOnly */

                                        continue;


                                matched_cookies[count++] = c;


                                GROW_MATCHED_COOKIES;


                                if (c->version < (unsigned int)version)

                                        version = c->version;


                                c->last_used = now;


                                cookie_manager_add((struct cookie_data *)c);

                        }

                }

        }


        /* Now consider cookies whose paths prefix-match ours */

        for (p = p->parent; p; p = p->parent) {

                /* Find directory's path entry(ies) */

                /* There are potentially multiple due to differing schemes */

                for (q = p->children; q; q = q->next) {

                        if (*(q->segment) != '\0')

                                continue;


                        for (c = q->cookies; c; c = c->next) {

                                if (c->expires != -1 && c->expires < now)

                                        /* cookie has expired => ignore */

                                        continue;


                                if (c->secure && lwc_string_isequal(

                                            q->scheme,

                                            corestring_lwc_https,

                                            &match) &&

                                    match == false)

                                        /* Secure cookie for insecure server

                                         * => ignore */

                                        continue;


                                matched_cookies[count++] = c;


                                GROW_MATCHED_COOKIES;


                                if (c->version < (unsigned int) version)

                                        version = c->version;


                                c->last_used = now;


                                cookie_manager_add((struct cookie_data *)c);

                        }

                }


                if (!p->parent) {

                        /* No parent, so bail here. This can't go in

                         * the loop exit condition as we also want to

                         * process the top-level node.

                         *

                         * If p->parent is NULL then p->cookies are

                         * the domain cookies and thus we don't even

                         * try matching against them.

                         */

                        break;

                }


                /* Consider p itself - may be the result of Path=/foo */

                for (c = p->cookies; c; c = c->next) {

                        if (c->expires != -1 && c->expires < now)

                                /* cookie has expired => ignore */

                                continue;


                        /* Ensure cookie path is a prefix of the resource */

                        if (strncmp(c->path, path, strlen(c->path)) != 0)

                                /* paths don't match => ignore */

                                continue;


                        if (c->secure && lwc_string_isequal(p->scheme,

                                                            corestring_lwc_https,

                                                            &match) &&

                            match == false)

                                /* Secure cookie for insecure server

                                 * => ignore */

                                continue;


                        matched_cookies[count++] = c;


                        GROW_MATCHED_COOKIES;


                        if (c->version < (unsigned int) version)

                                version = c->version;


                        c->last_used = now;


                        cookie_manager_add((struct cookie_data *)c);

                }


        }


        /* Finally consider domain cookies for hosts which domain match ours */

        for (h = (const struct host_part *)p; h && h != &db_root;

             h = h->parent) {

                for (c = h->paths.cookies; c; c = c->next) {

                        if (c->expires != -1 && c->expires < now)

                                /* cookie has expired => ignore */

                                continue;


                        /* Ensure cookie path is a prefix of the resource */

                        if (strncmp(c->path, path, strlen(c->path)) != 0)

                                /* paths don't match => ignore */

                                continue;


                        if (c->secure && lwc_string_isequal(scheme,

                                                            corestring_lwc_https,

                                                            &match) &&

                            match == false)

                                /* secure cookie for insecure host. ignore */

                                continue;


                        matched_cookies[count++] = c;


                        GROW_MATCHED_COOKIES;


                        if (c->version < (unsigned int)version)

                                version = c->version;


                        c->last_used = now;


                        cookie_manager_add((struct cookie_data *)c);

                }

        }


        if (count == 0) {

                /* No cookies found */

                free(ret);

                free(matched_cookies);

                return NULL;

        }


        /* and build output string */

        if (version > COOKIE_NETSCAPE) {

                sprintf(ret, "$Version=%d", version);

                ret_used = strlen(ret) + 1;

        }


        for (i = 0; i < count; i++) {

                if (!urldb_concat_cookie(matched_cookies[i], version,

                                         &ret_used, &ret_alloc, &ret)) {

                        free(ret);

                        free(matched_cookies);

                        return NULL;

                }

        }


        if (version == COOKIE_NETSCAPE) {

                /* Old-style cookies => no version & skip "; " */

                memmove(ret, ret + 2, ret_used - 2);

                ret_used -= 2;

        }


        /* Now, shrink the output buffer to the required size */

        {

                char *temp = realloc(ret, ret_used);

                if (!temp) {

                        free(ret);

                        free(matched_cookies);

                        return NULL;

                }


                ret = temp;

        }


        free(matched_cookies);


        return ret;


#undef GROW_MATCHED_COOKIES

}


/* exported interface documented in content/urldb.h */

void urldb_delete_cookie(const char *domain, const char *path,

                         const char *name)

{

        urldb_delete_cookie_hosts(domain, path, name, &db_root);

}


/* exported interface documented in content/urldb.h */

void urldb_load_cookies(const char *filename)

{

        FILE *fp;

        char s[16*1024];


        assert(filename);


        fp = fopen(filename, "r");

        if (!fp)

                return;


#define FIND_T {                                \

                for (; *p && *p != '\t'; p++)   \

                        ; /* do nothing */      \

                if (p >= end) {                 \

                        NSLOG(netsurf, INFO, "Overran input");  \

                        continue;               \

                }                               \

                *p++ = '\0';                    \

        }


#define SKIP_T {                                \

                for (; *p && *p == '\t'; p++)   \

                        ; /* do nothing */      \

                if (p >= end) {                 \

                        NSLOG(netsurf, INFO, "Overran input");  \

                        continue;               \

                }                               \

        }


        while (fgets(s, sizeof s, fp)) {

                char *p = s, *end = 0,

                        *domain, *path, *name, *value, *scheme, *url,

                        *comment;

                int version, domain_specified, path_specified,

                        secure, http_only, no_destroy, value_quoted;

                time_t expires, last_used;

                struct cookie_internal_data *c;


                if(s[0] == 0 || s[0] == '#')

                        /* Skip blank lines or comments */

                        continue;


                s[strlen(s) - 1] = '\0'; /* lose terminating newline */

                end = s + strlen(s);


                /* Look for file version first

                 * (all input is ignored until this is read)

                 */

                if (strncasecmp(s, "Version:", 8) == 0) {

                        FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);


                        if (loaded_cookie_file_version <

                            MIN_COOKIE_FILE_VERSION) {

                                NSLOG(netsurf, INFO,

                                      "Unsupported Cookie file version");

                                break;

                        }


                        continue;

                } else if (loaded_cookie_file_version == 0) {

                        /* Haven't yet seen version; skip this input */

                        continue;

                }


                /* One cookie/line */


                /* Parse input */

                FIND_T; version = atoi(s);

                SKIP_T; domain = p; FIND_T;

                SKIP_T; domain_specified = atoi(p); FIND_T;

                SKIP_T; path = p; FIND_T;

                SKIP_T; path_specified = atoi(p); FIND_T;

                SKIP_T; secure = atoi(p); FIND_T;

                if (loaded_cookie_file_version > 101) {

                        /* Introduced in version 1.02 */

                        SKIP_T; http_only = atoi(p); FIND_T;

                } else {

                        http_only = 0;

                }

                SKIP_T; expires = (time_t)atoi(p); FIND_T;

                SKIP_T; last_used = (time_t)atoi(p); FIND_T;

                SKIP_T; no_destroy = atoi(p); FIND_T;

                SKIP_T; name = p; FIND_T;

                SKIP_T; value = p; FIND_T;

                if (loaded_cookie_file_version > 100) {

                        /* Introduced in version 1.01 */

                        SKIP_T; value_quoted = atoi(p); FIND_T;

                } else {

                        value_quoted = 0;

                }

                SKIP_T; scheme = p; FIND_T;

                SKIP_T; url = p; FIND_T;


                /* Comment may have no content, so don't

                 * use macros as they'll break */

                for (; *p && *p == '\t'; p++)

                        ; /* do nothing */

                comment = p;


                assert(p <= end);


                /* Now create cookie */

                c = malloc(sizeof(struct cookie_internal_data));

                if (!c)

                        break;


                c->name = strdup(name);

                c->value = strdup(value);

                c->value_was_quoted = value_quoted;

                c->comment = strdup(comment);

                c->domain_from_set = domain_specified;

                c->domain = strdup(domain);

                c->path_from_set = path_specified;

                c->path = strdup(path);

                c->expires = expires;

                c->last_used = last_used;

                c->secure = secure;

                c->http_only = http_only;

                c->version = version;

                c->no_destroy = no_destroy;


                if (!(c->name && c->value && c->comment &&

                      c->domain && c->path)) {

                        urldb_free_cookie(c);

                        break;

                }


                if (c->domain[0] != '.') {

                        lwc_string *scheme_lwc = NULL;

                        nsurl *url_nsurl = NULL;


                        assert(scheme[0] != 'u');


                        if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {

                                urldb_free_cookie(c);

                                break;

                        }

                        scheme_lwc = nsurl_get_component(url_nsurl,

                                                         NSURL_SCHEME);


                        /* And insert it into database */

                        if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {

                                /* Cookie freed for us */

                                nsurl_unref(url_nsurl);

                                lwc_string_unref(scheme_lwc);

                                break;

                        }

                        nsurl_unref(url_nsurl);

                        lwc_string_unref(scheme_lwc);


                } else {

                        if (!urldb_insert_cookie(c, NULL, NULL)) {

                                /* Cookie freed for us */

                                break;

                        }

                }

        }


#undef SKIP_T

#undef FIND_T


        fclose(fp);

}


/* exported interface documented in content/urldb.h */

void urldb_save_cookies(const char *filename)

{

        FILE *fp;

        int cookie_file_version = max(loaded_cookie_file_version,

                                      COOKIE_FILE_VERSION);


        assert(filename);


        fp = fopen(filename, "w");

        if (!fp)

                return;


        fprintf(fp, "# NetSurf cookies file.\n"

                "#\n"

                "# Lines starting with a '#' are comments, "

                "blank lines are ignored.\n"

                "#\n"

                "# All lines prior to \"Version:\t%d\" are discarded.\n"

                "#\n"

                "# Version\tDomain\tDomain from Set-Cookie\tPath\t"

                "Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"

                "No destroy\tName\tValue\tValue was quoted\tScheme\t"

                "URL\tComment\n",

                cookie_file_version);

        fprintf(fp, "Version:\t%d\n", cookie_file_version);


        urldb_save_cookie_hosts(fp, &db_root);


        fclose(fp);

}


/* exported interface documented in netsurf/url_db.h */

void urldb_dump(void)

{

        int i;


        urldb_dump_hosts(&db_root);


        for (i = 0; i != NUM_SEARCH_TREES; i++) {

                urldb_dump_search(search_trees[i], 0);

        }

}


ascii.h
Helpers for ASCII string handling.

ascii_to_lower
static char ascii_to_lower(char c)
Convert an upper case character to lower case.
Definition: ascii.h:212

ascii_is_alpha
static bool ascii_is_alpha(char c)
Test whether a character is alphabetical (upper or lower case).
Definition: ascii.h:75

ascii_is_digit
static bool ascii_is_digit(char c)
Test whether a character is a decimal digit.
Definition: ascii.h:86

version
static char version[32]
Definition: about.c:45

bloom_create
struct bloom_filter * bloom_create(size_t size)
Create a new bloom filter.
Definition: bloom.c:59

bloom_destroy
void bloom_destroy(struct bloom_filter *b)
Destroy a previously-created bloom filter.
Definition: bloom.c:71

bloom_search_hash
bool bloom_search_hash(struct bloom_filter *b, uint32_t hash)
Search the filter for the given hash value, assuming it was added by bloom_insert_hash().
Definition: bloom.c:98

bloom_insert_hash
void bloom_insert_hash(struct bloom_filter *b, uint32_t hash)
Insert a given hash value into the filter, should you already have one to hand.
Definition: bloom.c:82

bloom.h
Trivial bloom filter.

count
static uint32_t count(const http_directive *list, lwc_string *key)
Definition: cache-control.c:157

strndup
char * strndup(const char *s, size_t n)
Duplicate up to n characters of a string.
Definition: utils.c:332

content.h
Content handling interface.

content_type
content_type
The type of a content.
Definition: content_type.h:53

cookie_version
cookie_version
Version of cookie.
Definition: cookie_db.h:38

COOKIE_RFC2965
@ COOKIE_RFC2965
Definition: cookie_db.h:41

COOKIE_NETSCAPE
@ COOKIE_NETSCAPE
Definition: cookie_db.h:39

cookie_manager_add
bool cookie_manager_add(const struct cookie_data *data)
Add/update a cookie to the viewer.
Definition: cookie_manager.c:467

cookie_manager_remove
void cookie_manager_remove(const struct cookie_data *data)
Remove a cookie from viewer.
Definition: cookie_manager.c:511

cookie_manager.h
Cookie Manager (interface).

corestrings.h
Useful interned string pointers (interface).

parent
wimp_w parent
Definition: dialog.c:88

nserror
nserror
Enumeration of error codes.
Definition: errors.h:29

NSERROR_SAVE_FAILED
@ NSERROR_SAVE_FAILED
Failed to save data.
Definition: errors.h:36

NSERROR_NOT_FOUND
@ NSERROR_NOT_FOUND
Requested item not found.
Definition: errors.h:34

NSERROR_NEED_DATA
@ NSERROR_NEED_DATA
More data needed.
Definition: errors.h:46

NSERROR_INVALID
@ NSERROR_INVALID
Invalid data.
Definition: errors.h:49

NSERROR_NOMEM
@ NSERROR_NOMEM
Memory exhaustion.
Definition: errors.h:32

NSERROR_OK
@ NSERROR_OK
No error.
Definition: errors.h:30

root
static struct directory * root
Definition: filename.c:55

type
const char * type
Definition: filetype.cpp:44

http.h
HTTP header parsing functions.

bitmap.h
Generic bitmap handling interface.

inet.h
internet structures and defines

inet_aton
int inet_aton(const char *cp, struct in_addr *inp)
Definition: utils.c:489

inet_pton
int inet_pton(int af, const char *src, void *dst)
Definition: utils.c:512

log.h

NSLOG
#define NSLOG(catname, level, logmsg, args...)
Definition: log.h:116

nsurl.h
NetSurf URL handling (interface).

nsurl_create
nserror nsurl_create(const char *const url_s, nsurl **url)
Create a NetSurf URL object from a URL string.

nsurl_defragment
nserror nsurl_defragment(const nsurl *url, nsurl **no_frag)
Create a NetSurf URL object without a fragment from a NetSurf URL.

nsurl_unref
void nsurl_unref(nsurl *url)
Drop a reference to a NetSurf URL object.

nsurl_hash
uint32_t nsurl_hash(const nsurl *url)
Get a URL's hash value.

nsurl_get
nserror nsurl_get(const nsurl *url, nsurl_component parts, char **url_s, size_t *url_l)
Get URL (section) as a string, from a NetSurf URL object.

nsurl_access
const char * nsurl_access(const nsurl *url)
Access a NetSurf URL object as a string.

nsurl_get_component
lwc_string * nsurl_get_component(const nsurl *url, nsurl_component part)
Get part of a URL as a lwc_string, from a NetSurf URL object.

NSURL_FRAGMENT
@ NSURL_FRAGMENT
Definition: nsurl.h:56

NSURL_SCHEME
@ NSURL_SCHEME
Definition: nsurl.h:45

NSURL_PATH
@ NSURL_PATH
Definition: nsurl.h:52

NSURL_HOST
@ NSURL_HOST
Definition: nsurl.h:49

NSURL_PORT
@ NSURL_PORT
Definition: nsurl.h:50

NSURL_QUERY
@ NSURL_QUERY
Definition: nsurl.h:53

nsurl
struct nsurl nsurl
NetSurf URL object.
Definition: nsurl.h:31

http_parse_strict_transport_security
nserror http_parse_strict_transport_security(const char *header_value, http_strict_transport_security **result)
Parse an HTTP Strict-Transport-Security header value.
Definition: strict-transport-security.c:233

http_strict_transport_security_destroy
void http_strict_transport_security_destroy(http_strict_transport_security *victim)
Destroy a strict transport security object.
Definition: strict-transport-security.c:322

http_strict_transport_security_include_subdomains
bool http_strict_transport_security_include_subdomains(http_strict_transport_security *sts)
Get the value of a strict transport security's includeSubDomains flag.
Definition: strict-transport-security.c:336

http_strict_transport_security_max_age
uint32_t http_strict_transport_security_max_age(http_strict_transport_security *sts)
Get the value of a strict transport security's max-age.
Definition: strict-transport-security.c:329

string.h
Interface to utility string handling.

bloom_filter
Definition: bloom.c:53

cookie_data
Definition: cookie_db.h:44

cookie_data::next
const struct cookie_data * next
Next in list.
Definition: cookie_db.h:46

cookie_internal_data
cookie entry.
Definition: urldb.c:124

cookie_internal_data::expires
time_t expires
Expiry timestamp, or -1 for session.
Definition: urldb.c:136

cookie_internal_data::prev
struct cookie_internal_data * prev
Previous in list.
Definition: urldb.c:125

cookie_internal_data::version
enum cookie_version version
Specification compliance.
Definition: urldb.c:140

cookie_internal_data::path
char * path
Path.
Definition: urldb.c:135

cookie_internal_data::last_used
time_t last_used
Last used time.
Definition: urldb.c:137

cookie_internal_data::next
struct cookie_internal_data * next
Next in list.
Definition: urldb.c:126

cookie_internal_data::domain
char * domain
Domain.
Definition: urldb.c:133

cookie_internal_data::http_only
bool http_only
Only expose to HTTP(S) requests.
Definition: urldb.c:139

cookie_internal_data::path_from_set
bool path_from_set
Path came from Set-Cookie: header.
Definition: urldb.c:134

cookie_internal_data::comment
char * comment
Cookie comment.
Definition: urldb.c:131

cookie_internal_data::domain_from_set
bool domain_from_set
Domain came from Set-Cookie: header.
Definition: urldb.c:132

cookie_internal_data::no_destroy
bool no_destroy
Never destroy this cookie, unless it's expired.
Definition: urldb.c:141

cookie_internal_data::value_was_quoted
bool value_was_quoted
Value was quoted in Set-Cookie:
Definition: urldb.c:130

cookie_internal_data::name
char * name
Cookie name.
Definition: urldb.c:128

cookie_internal_data::secure
bool secure
Only send for HTTPS requests.
Definition: urldb.c:138

cookie_internal_data::value
char * value
Cookie value.
Definition: urldb.c:129

host_part
Definition: urldb.c:230

host_part::parent
struct host_part * parent
Parent host part.
Definition: urldb.c:257

host_part::prev
struct host_part * prev
Previous sibling.
Definition: urldb.c:256

host_part::children
struct host_part * children
Child host parts.
Definition: urldb.c:258

host_part::part
char * part
Part of host string.
Definition: urldb.c:247

host_part::paths
struct path_data paths
Known paths on this host.
Definition: urldb.c:235

host_part::next
struct host_part * next
Next sibling.
Definition: urldb.c:255

host_part::permit_invalid_certs
bool permit_invalid_certs
Allow access to SSL protected resources on this host without verifying certificate authenticity.
Definition: urldb.c:240

host_part::hsts
struct hsts_data hsts
Definition: urldb.c:242

host_part::prot_space
struct prot_space_data * prot_space
Linked list of all known proctection spaces known for this host and all its schems and ports.
Definition: urldb.c:253

hsts_data
Definition: urldb.c:225

hsts_data::expires
time_t expires
Expiry time.
Definition: urldb.c:226

hsts_data::include_sub_domains
bool include_sub_domains
Whether to include subdomains.
Definition: urldb.c:227

http_strict_transport_security
Representation of a Strict-Transport-Security.
Definition: strict-transport-security.c:31

path_data
data entry for url
Definition: urldb.c:194

path_data::persistent
bool persistent
This entry should persist.
Definition: urldb.c:203

path_data::urld
struct url_internal_data urld
URL data for resource.
Definition: urldb.c:205

path_data::segment
char * segment
Path segment for this node.
Definition: urldb.c:200

path_data::frag_cnt
unsigned int frag_cnt
Number of entries in path_data::fragment.
Definition: urldb.c:201

path_data::last
struct path_data * last
Last child.
Definition: urldb.c:222

path_data::fragment
char ** fragment
Array of fragments.
Definition: urldb.c:202

path_data::prot_space
const struct prot_space_data * prot_space
Protection space to which this resource belongs too.
Definition: urldb.c:212

path_data::next
struct path_data * next
Next sibling.
Definition: urldb.c:218

path_data::cookies
struct cookie_internal_data * cookies
Cookies associated with resource.
Definition: urldb.c:214

path_data::cookies_end
struct cookie_internal_data * cookies_end
Last cookie in list.
Definition: urldb.c:216

path_data::url
nsurl * url
Full URL.
Definition: urldb.c:195

path_data::children
struct path_data * children
Child path segments.
Definition: urldb.c:221

path_data::prev
struct path_data * prev
Previous sibling.
Definition: urldb.c:219

path_data::scheme
lwc_string * scheme
URL scheme for data.
Definition: urldb.c:196

path_data::port
unsigned int port
Port number for data.
Definition: urldb.c:197

path_data::parent
struct path_data * parent
Parent path segment.
Definition: urldb.c:220

prot_space_data
A protection space.
Definition: urldb.c:154

prot_space_data::next
struct prot_space_data * next
Next sibling.
Definition: urldb.c:174

prot_space_data::auth
char * auth
Authentication details for this protection space in form username:password.
Definition: urldb.c:172

prot_space_data::realm
char * realm
Protection realm.
Definition: urldb.c:166

prot_space_data::port
unsigned int port
Port number of canonical hostname of this protection space.
Definition: urldb.c:164

prot_space_data::scheme
lwc_string * scheme
URL scheme of canonical hostname of this protection space.
Definition: urldb.c:158

search_node
search index node
Definition: urldb.c:265

search_node::level
unsigned int level
Node level.
Definition: urldb.c:268

search_node::left
struct search_node * left
Left subtree.
Definition: urldb.c:270

search_node::right
struct search_node * right
Right subtree.
Definition: urldb.c:271

search_node::data
const struct host_part * data
Host tree entry.
Definition: urldb.c:266

url_data
Definition: url_db.h:36

url_internal_data
meta data about a url
Definition: urldb.c:183

url_internal_data::type
content_type type
Type of resource.
Definition: urldb.c:187

url_internal_data::title
char * title
Resource title.
Definition: urldb.c:184

url_internal_data::last_visit
time_t last_visit
Last visit time.
Definition: urldb.c:186

url_internal_data::visits
unsigned int visits
Visit count.
Definition: urldb.c:185

nsc_sntimet
int nsc_sntimet(char *str, size_t size, time_t *timep)
Write the time in seconds since epoch to a buffer.
Definition: time.c:126

nsc_strntimet
nserror nsc_strntimet(const char *str, size_t size, time_t *timep)
Converts a date string to a number of seconds since epoch.
Definition: time.c:980

nsc_snptimet
nserror nsc_snptimet(const char *str, size_t size, time_t *timep)
Parse time in seconds since epoc.
Definition: time.c:147

time.h
Interface to time operations.

url.h
Interface to URL parsing and joining operations.

urldb_set_cert_permissions
void urldb_set_cert_permissions(nsurl *url, bool permit)
Set certificate verification permissions.
Definition: urldb.c:3455

MAXIMUM_URL_LENGTH
#define MAXIMUM_URL_LENGTH

urldb_destroy_path_tree
static void urldb_destroy_path_tree(struct path_data *root)
Destroy a path tree.
Definition: urldb.c:2756

urldb_destroy
void urldb_destroy(void)
Destroy urldb.
Definition: urldb.c:2847

urldb_search_skew
static struct search_node * urldb_search_skew(struct search_node *root)
Rotate a subtree right.
Definition: urldb.c:1592

urldb_add_host_node
static struct host_part * urldb_add_host_node(const char *part, struct host_part *parent)
Add a host node to the tree.
Definition: urldb.c:1006

urldb_get_search_tree_direct
static struct search_node ** urldb_get_search_tree_direct(const char *host)
Get the search tree for a particular host.
Definition: urldb.c:1172

urldb_add_path_node
static struct path_data * urldb_add_path_node(lwc_string *scheme, unsigned int port, const char *segment, lwc_string *fragment, struct path_data *parent)
Add a path node to the tree.
Definition: urldb.c:1104

GROW_MATCHED_COOKIES
#define GROW_MATCHED_COOKIES

urldb_save_cookie_hosts
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
Save a host subtree's cookies.
Definition: urldb.c:2675

empty
static struct search_node empty
Definition: urldb.c:282

urldb_search_insert_internal
static struct search_node * urldb_search_insert_internal(struct search_node *root, struct search_node *n)
Insert node into search tree.
Definition: urldb.c:1642

SKIP_T
#define SKIP_T

urldb_get_search_tree
static struct search_node * urldb_get_search_tree(const char *host)
Get the search tree for a particular host.
Definition: urldb.c:1191

urldb_destroy_search_tree
static void urldb_destroy_search_tree(struct search_node *root)
Destroy a search tree.
Definition: urldb.c:2830

NUM_SEARCH_TREES
#define NUM_SEARCH_TREES
Search trees - one per letter + 1 for IPs + 1 for Everything Else.
Definition: urldb.c:278

urldb_free_cookie
static void urldb_free_cookie(struct cookie_internal_data *c)
Free a cookie.
Definition: urldb.c:1817

ST_IP
#define ST_IP
Definition: urldb.c:279

urldb_set_url_persistence
nserror urldb_set_url_persistence(nsurl *url, bool persist)
Set the cross-session persistence of the entry for an URL.
Definition: urldb.c:3122

urldb_insert_cookie
static bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme, nsurl *url)
Insert a cookie into the database.
Definition: urldb.c:2257

urldb_write_paths
static void urldb_write_paths(const struct path_data *parent, const char *host, FILE *fp, char **path, int *path_alloc, int *path_used, time_t expiry)
Write paths associated with a host.
Definition: urldb.c:353

urldb_set_cookie
bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
Parse Set-Cookie header and insert cookie(s) into database.
Definition: urldb.c:3734

urldb_iterate_entries_host
static bool urldb_iterate_entries_host(struct search_node *parent, bool(*url_callback)(nsurl *url, const struct url_data *data), bool(*cookie_callback)(const struct cookie_data *data))
Host data iterator (internal)
Definition: urldb.c:963

urldb_search_match_prefix
static int urldb_search_match_prefix(const struct host_part *a, const char *b)
Compare host_part with prefix.
Definition: urldb.c:746

urldb_write_timet
static nserror urldb_write_timet(FILE *fp, time_t val)
write a time_t to a file portably
Definition: urldb.c:327

urldb_iterate_cookies
void urldb_iterate_cookies(bool(*callback)(const struct cookie_data *data))
Definition: urldb.c:3719

urldb_get_hsts_enabled
bool urldb_get_hsts_enabled(struct nsurl *url)
Determine if HSTS policy is enabled for an URL.
Definition: urldb.c:3573

URL_FILE_VERSION
#define URL_FILE_VERSION
Current URL database file version.
Definition: urldb.c:300

urldb_save_cookies
void urldb_save_cookies(const char *filename)
Save persistent cookies to file.
Definition: urldb.c:4448

urldb_get_url
nsurl * urldb_get_url(nsurl *url)
Extract an URL from the db.
Definition: urldb.c:3327

urldb_add_path_fragment
static struct path_data * urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
Add a fragment to a path segment.
Definition: urldb.c:1055

urldb_set_url_title
nserror urldb_set_url_title(nsurl *url, const char *title)
Set an URL's title string, replacing any existing one.
Definition: urldb.c:3226

urldb_save
nserror urldb_save(const char *filename)
Export the current database to file.
Definition: urldb.c:3094

urldb_add_url
bool urldb_add_url(nsurl *url)
Insert an URL into the database.
Definition: urldb.c:3140

urldb_search_split
static struct search_node * urldb_search_split(struct search_node *root)
Rotate a node left, increasing the parent's level.
Definition: urldb.c:1615

urldb_dump_search
static void urldb_dump_search(struct search_node *parent, int depth)
Dump search tree.
Definition: urldb.c:1504

urldb_dump
void urldb_dump(void)
Dump URL database to stderr.
Definition: urldb.c:4481

urldb_iterate_partial_path
static bool urldb_iterate_partial_path(const struct path_data *parent, const char *prefix, bool(*callback)(nsurl *url, const struct url_data *data))
Partial path iterator (internal)
Definition: urldb.c:906

ST_DN
#define ST_DN
Definition: urldb.c:281

urldb_delete_cookie_paths
static void urldb_delete_cookie_paths(const char *domain, const char *path, const char *name, struct path_data *parent)
deletes paths from a cookie.
Definition: urldb.c:2538

MIN_COOKIE_FILE_VERSION
#define MIN_COOKIE_FILE_VERSION
Minimum cookie database file version.
Definition: urldb.c:291

urldb_set_auth_details
void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
Set authentication data for an URL.
Definition: urldb.c:3342

BLOOM_SIZE
#define BLOOM_SIZE
Size of url filter.
Definition: urldb.c:317

urldb_get_cert_permissions
bool urldb_get_cert_permissions(nsurl *url)
Retrieve certificate verification permissions from database.
Definition: urldb.c:3480

loaded_cookie_file_version
static int loaded_cookie_file_version
loaded cookie file version
Definition: urldb.c:295

urldb_set_hsts_policy
bool urldb_set_hsts_policy(struct nsurl *url, const char *header)
Set HSTS policy for an URL.
Definition: urldb.c:3502

urldb_iterate_entries_path
static bool urldb_iterate_entries_path(const struct path_data *parent, bool(*url_callback)(nsurl *url, const struct url_data *data), bool(*cookie_callback)(const struct cookie_data *data))
Path data iterator (internal)
Definition: urldb.c:584

urldb_dump_paths
static void urldb_dump_paths(struct path_data *parent)
Dump URL database paths to stderr.
Definition: urldb.c:1438

urldb_delete_cookie_hosts
static void urldb_delete_cookie_hosts(const char *domain, const char *path, const char *name, struct host_part *parent)
Deletes cookie hosts and their assoicated paths.
Definition: urldb.c:2596

urldb_load
nserror urldb_load(const char *filename)
Import an URL database from file, replacing any existing database.
Definition: urldb.c:2876

urldb_add_path
static struct path_data * urldb_add_path(lwc_string *scheme, unsigned int port, const struct host_part *host, char *path_query, lwc_string *fragment, nsurl *url)
Add a path to the database, creating any intermediate entries.
Definition: urldb.c:2086

urldb_iterate_entries
void urldb_iterate_entries(bool(*callback)(nsurl *url, const struct url_data *data))
Definition: urldb.c:3702

db_root
static struct host_part db_root
Root database handle.
Definition: urldb.c:275

urldb_find_url
static struct path_data * urldb_find_url(nsurl *url)
Find an URL in the database.
Definition: urldb.c:1358

urldb_add_host
static struct host_part * urldb_add_host(const char *host)
Add a host to the database, creating any intermediate entries.
Definition: urldb.c:2164

urldb_get_url_data
const struct url_data * urldb_get_url_data(nsurl *url)
Find data for an URL.
Definition: urldb.c:3309

urldb_destroy_prot_space
static void urldb_destroy_prot_space(struct prot_space_data *space)
Destroy protection space data.
Definition: urldb.c:2741

urldb_get_auth_details
const char * urldb_get_auth_details(nsurl *url, const char *realm)
Look up authentication details in database.
Definition: urldb.c:3405

urldb_destroy_path_node_content
static void urldb_destroy_path_node_content(struct path_data *node)
Destroy the contents of a path node.
Definition: urldb.c:2709

urldb_save_search_tree
static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
Save a search (sub)tree.
Definition: urldb.c:514

urldb_search_find
static const struct host_part * urldb_search_find(struct search_node *root, const char *host)
Find a node in a search tree.
Definition: urldb.c:1270

search_trees
static struct search_node * search_trees[NUM_SEARCH_TREES]
Definition: urldb.c:283

urldb_update_url_visit_data
nserror urldb_update_url_visit_data(nsurl *url)
Update an URL's visit data.
Definition: urldb.c:3274

urldb_destroy_host_tree
static void urldb_destroy_host_tree(struct host_part *root)
Destroy a host tree.
Definition: urldb.c:2792

url_bloom
static struct bloom_filter * url_bloom
filter for url presence in database
Definition: urldb.c:313

urldb_add_path_fragment_cmp
static int urldb_add_path_fragment_cmp(const void *a, const void *b)
Fragment comparator callback for qsort.
Definition: urldb.c:1041

urldb_reset_url_visit_data
void urldb_reset_url_visit_data(nsurl *url)
Reset an URL's visit statistics.
Definition: urldb.c:3293

urldb_search_insert
static struct search_node * urldb_search_insert(struct search_node *root, const struct host_part *data)
Insert a node into the search tree.
Definition: urldb.c:1679

FIND_T
#define FIND_T

MIN_URL_FILE_VERSION
#define MIN_URL_FILE_VERSION
Minimum URL database file version.
Definition: urldb.c:298

urldb_concat_cookie
static bool urldb_concat_cookie(struct cookie_internal_data *c, int version, int *used, int *alloc, char **buf)
Concatenate a cookie into the provided buffer.
Definition: urldb.c:2376

urldb_search_match_host
static int urldb_search_match_host(const struct host_part *a, const struct host_part *b)
Compare a pair of host parts.
Definition: urldb.c:1554

urldb_set_url_content_type
nserror urldb_set_url_content_type(nsurl *url, content_type type)
Set an URL's content type.
Definition: urldb.c:3256

urldb_parse_cookie
static struct cookie_internal_data * urldb_parse_cookie(nsurl *url, const char **cookie)
Parse a cookie.
Definition: urldb.c:1838

urldb_delete_cookie
void urldb_delete_cookie(const char *domain, const char *path, const char *name)
Delete a cookie.
Definition: urldb.c:4273

urldb_dump_hosts
static void urldb_dump_hosts(struct host_part *parent)
Dump URL database hosts to stderr.
Definition: urldb.c:1477

urldb_destroy_cookie
static void urldb_destroy_cookie(struct cookie_internal_data *c)
Destroy a cookie node.
Definition: urldb.c:2692

urldb_count_urls
static void urldb_count_urls(const struct path_data *root, time_t expiry, unsigned int *count)
Count number of URLs associated with a host.
Definition: urldb.c:474

urldb_get_cookie
char * urldb_get_cookie(nsurl *url, bool include_http_only)
Retrieve cookies for an URL.
Definition: urldb.c:3997

urldb_parse_avpair
static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v, bool was_quoted)
Parse a cookie avpair.
Definition: urldb.c:1709

urldb_match_path
static struct path_data * urldb_match_path(const struct path_data *parent, const char *path, lwc_string *scheme, unsigned short port)
Match a path string.
Definition: urldb.c:1302

urldb_save_cookie_paths
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
Save a path subtree's cookies.
Definition: urldb.c:2618

ST_EE
#define ST_EE
Definition: urldb.c:280

urldb_iterate_partial
void urldb_iterate_partial(const char *prefix, bool(*callback)(nsurl *url, const struct url_data *data))
Definition: urldb.c:3635

urldb_load_cookies
void urldb_load_cookies(const char *filename)
Load a cookie file into the database.
Definition: urldb.c:4281

urldb_iterate_partial_host
static bool urldb_iterate_partial_host(struct search_node *root, const char *prefix, bool(*callback)(nsurl *url, const struct url_data *data))
Partial host iterator (internal)
Definition: urldb.c:816

COOKIE_FILE_VERSION
#define COOKIE_FILE_VERSION
Current cookie database file version.
Definition: urldb.c:293

urldb__host_is_ip_address
static bool urldb__host_is_ip_address(const char *host)
Check whether a host string is an IP address.
Definition: urldb.c:647

urldb_search_match_string
static int urldb_search_match_string(const struct host_part *a, const char *b)
Compare host part with a string.
Definition: urldb.c:1204

urldb.h
Unified URL information database internal interface.

nsoption.h
Option reading and saving interface.

nsoption_int
#define nsoption_int(OPTION)
Get the value of an integer option.
Definition: nsoption.h:313

utils.h
Interface to a number of general purpose functionality.

max
#define max(x, y)
Definition: utils.h:50

path
static nserror path(const struct redraw_context *ctx, const plot_style_t *pstyle, const float *p, unsigned int n, const float transform[6])
Plots a path.
Definition: plot.c:821