NetSurf
Data Structures | Macros | Typedefs | Functions | Variables
fetch.c File Reference

Implementation of fetching of data from a URL. More...

#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <libwapcaplet/libwapcaplet.h>
#include "utils/config.h"
#include "utils/corestrings.h"
#include "utils/nsoption.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/nsurl.h"
#include "utils/ring.h"
#include "netsurf/misc.h"
#include "desktop/gui_internal.h"
#include "content/fetch.h"
#include "content/fetchers.h"
#include "content/fetchers/resource.h"
#include "content/fetchers/about/about.h"
#include "content/fetchers/curl.h"
#include "content/fetchers/data.h"
#include "content/fetchers/file/file.h"
#include "javascript/fetcher.h"
#include "content/urldb.h"
Include dependency graph for fetch.c:

Go to the source code of this file.

Data Structures

struct  scheme_fetcher_s
 Information about a fetcher for a given scheme. More...
 
struct  fetch
 Information for a single fetch. More...
 

Macros

#define MAX_FETCHERS   10
 The maximum number of fetchers that can be added. More...
 
#define SCHEDULE_TIME   10
 The time in ms between polling the fetchers. More...
 
#define FDSET_TIMEOUT   1000
 The fdset timeout in ms. More...
 

Typedefs

typedef struct scheme_fetcher_s scheme_fetcher
 Information about a fetcher for a given scheme. More...
 

Functions

static void fetch_ref_fetcher (int fetcherd)
 
static void fetch_unref_fetcher (int fetcherd)
 
static int get_fetcher_for_scheme (lwc_string *scheme)
 Find a suitable fetcher for a scheme. More...
 
static bool fetch_dispatch_job (struct fetch *fetch)
 Dispatch a single job. More...
 
static bool fetch_choose_and_dispatch (void)
 Choose and dispatch a single job. More...
 
static void dump_rings (void)
 
static bool fetch_dispatch_jobs (void)
 Dispatch as many jobs as we have room to dispatch. More...
 
static void fetcher_poll (void *unused)
 
nserror fetcher_init (void)
 Initialise all registered fetchers. More...
 
void fetcher_quit (void)
 Clean up for quit. More...
 
nserror fetcher_add (lwc_string *scheme, const struct fetcher_operation_table *ops)
 Register a fetcher for a scheme. More...
 
nserror fetch_fdset (fd_set *read_fd_set, fd_set *write_fd_set, fd_set *except_fd_set, int *maxfd_out)
 Get the set of file descriptors the fetchers are currently using. More...
 
nserror fetch_start (nsurl *url, nsurl *referer, fetch_callback callback, void *p, bool only_2xx, const char *post_urlenc, const struct fetch_multipart_data *post_multipart, bool verifiable, bool downgrade_tls, const char *headers[], struct fetch **fetch_out)
 Start fetching data for the given URL. More...
 
void fetch_abort (struct fetch *f)
 Abort a fetch. More...
 
void fetch_free (struct fetch *f)
 Free a fetch structure and associated resources. More...
 
bool fetch_can_fetch (const nsurl *url)
 Check if a URL's scheme can be fetched. More...
 
void fetch_change_callback (struct fetch *fetch, fetch_callback callback, void *p)
 Change the callback function for a fetch. More...
 
long fetch_http_code (struct fetch *fetch)
 Get the HTTP response code. More...
 
struct fetch_multipart_datafetch_multipart_data_clone (const struct fetch_multipart_data *list)
 Clone a linked list of fetch_multipart_data. More...
 
const char * fetch_multipart_data_find (const struct fetch_multipart_data *list, const char *name)
 Find an entry in a fetch_multipart_data. More...
 
void fetch_multipart_data_destroy (struct fetch_multipart_data *list)
 Free a linked list of fetch_multipart_data. More...
 
nserror fetch_multipart_data_new_kv (struct fetch_multipart_data **list, const char *name, const char *value)
 Create an entry for a fetch_multipart_data. More...
 
void fetch_send_callback (const fetch_msg *msg, struct fetch *fetch)
 send message to fetch More...
 
void fetch_remove_from_queues (struct fetch *fetch)
 remove a queued fetch More...
 
void fetch_set_http_code (struct fetch *fetch, long http_code)
 set the http code of a fetch More...
 
void fetch_set_cookie (struct fetch *fetch, const char *data)
 set cookie data on a fetch More...
 

Variables

static scheme_fetcher fetchers [MAX_FETCHERS]
 
static struct fetchfetch_ring = NULL
 Ring of active fetches. More...
 
static struct fetchqueue_ring = NULL
 Ring of queued fetches. More...
 

Detailed Description

Implementation of fetching of data from a URL.

The implementation is the fetch factory and the generic operations around the fetcher specific methods.

Active fetches are held in the circular linked list fetch_ring. There may be at most nsoption max_fetchers_per_host active requests per Host: header. There may be at most nsoption max_fetchers active requests overall. Inactive fetches are stored in the queue_ring waiting for use.

Definition in file fetch.c.

Macro Definition Documentation

◆ FDSET_TIMEOUT

#define FDSET_TIMEOUT   1000

The fdset timeout in ms.

Definition at line 74 of file fetch.c.

◆ MAX_FETCHERS

#define MAX_FETCHERS   10

The maximum number of fetchers that can be added.

Definition at line 64 of file fetch.c.

◆ SCHEDULE_TIME

#define SCHEDULE_TIME   10

The time in ms between polling the fetchers.

Todo:
The schedule timeout should be profiled to see if there is a better value or even if it needs to be dynamic.

Definition at line 71 of file fetch.c.

Typedef Documentation

◆ scheme_fetcher

Information about a fetcher for a given scheme.

Function Documentation

◆ dump_rings()

static void dump_rings ( void  )
static

Definition at line 203 of file fetch.c.

References fetch_ring, NSLOG, nsurl_access(), queue_ring, fetch::r_next, and fetch::url.

Referenced by fetch_dispatch_jobs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_abort()

void fetch_abort ( struct fetch f)

Abort a fetch.

Definition at line 537 of file fetch.c.

References fetcher_operation_table::abort, FETCH__INTERNAL_ABORTED, fetch::fetcher_handle, fetch::fetcherd, fetchers, fetch::last_msg, NSLOG, nsurl_access(), scheme_fetcher_s::ops, and fetch::url.

Referenced by llcache_fetch_auth(), llcache_fetch_callback(), llcache_fetch_notmodified(), llcache_fetch_redirect(), llcache_handle_abort(), and llcache_object_destroy().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_can_fetch()

bool fetch_can_fetch ( const nsurl url)

Check if a URL's scheme can be fetched.

Parameters
urlURL to check
Returns
true if the scheme is supported

Definition at line 586 of file fetch.c.

References fetcher_operation_table::acceptable, fetch::fetcherd, fetchers, get_fetcher_for_scheme(), nsurl_get_component(), NSURL_SCHEME, scheme_fetcher_s::ops, and fetch::url.

Referenced by llcache_fetch_redirect(), llcache_handle_retrieve(), ro_uri_message_received(), and ro_url_message_received().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_change_callback()

void fetch_change_callback ( struct fetch fetch,
fetch_callback  callback,
void *  p 
)

Change the callback function for a fetch.

Definition at line 602 of file fetch.c.

References fetch::callback, and fetch::p.

◆ fetch_choose_and_dispatch()

static bool fetch_choose_and_dispatch ( void  )
static

Choose and dispatch a single job.

Return false if we failed to dispatch anything.

We don't check the overall dispatch size here because we're not called unless there is room in the fetch queue for us.

Definition at line 173 of file fetch.c.

References fetch_dispatch_job(), fetch_ring, fetch::host, nsoption_int, queue_ring, fetch::r_next, and RING_COUNTBYLWCHOST.

Referenced by fetch_dispatch_jobs().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_dispatch_job()

static bool fetch_dispatch_job ( struct fetch fetch)
static

Dispatch a single job.

Definition at line 148 of file fetch.c.

References fetch::fetch_is_active, fetch_ring, fetch::fetcher_handle, fetch::fetcherd, fetchers, NSLOG, nsurl_access(), scheme_fetcher_s::ops, queue_ring, RING_INSERT, RING_REMOVE, fetcher_operation_table::start, and fetch::url.

Referenced by fetch_choose_and_dispatch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_dispatch_jobs()

static bool fetch_dispatch_jobs ( void  )
static

Dispatch as many jobs as we have room to dispatch.

Returns
true if there are active fetchers that require polling else false.

Definition at line 231 of file fetch.c.

References dump_rings(), fetch_choose_and_dispatch(), fetch_ring, NSLOG, nsoption_int, queue_ring, and RING_GETSIZE.

Referenced by fetch_fdset(), fetch_start(), and fetcher_poll().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_fdset()

nserror fetch_fdset ( fd_set *  read_fd_set,
fd_set *  write_fd_set,
fd_set *  except_fd_set,
int *  maxfd 
)

Get the set of file descriptors the fetchers are currently using.

This obtains the file descriptors the fetch system is using to obtain data. It will cause the fetchers to make progress, if possible, potentially completing fetches before requiring activity on file descriptors.

If a set of descriptors is returned (maxfd is not -1) The caller is expected to wait on them (with select etc.) and continue to obtain the fdset with this call. This will switch the fetchers from polled mode to waiting for network activity which is much more efficient.

Note
If the caller does not subsequently obtain the fdset again the fetchers will fall back to the less efficient polled operation. The fallback to polled operation will only occour after a timeout which introduces additional delay.
Parameters
[out]read_fd_setThe fd set for read.
[out]write_fd_setThe fd set for write.
[out]except_fd_setThe fd set for exceptions.
[out]maxfdThe highest fd number in the set or -1 if no fd available.
Returns
NSERROR_OK on success or appropriate error code.
Note
adjusting the schedule time is only done for curl currently. This is because as it is assumed to be the only fetcher that can possibly have fd to select on. All the other fetchers continue to need polling frequently.

Definition at line 385 of file fetch.c.

References fetcher_operation_table::fdset, FDSET_TIMEOUT, fetch_dispatch_jobs(), fetcher_poll(), fetch::fetcherd, fetchers, guit, MAX_FETCHERS, netsurf_table::misc, NSERROR_OK, NSLOG, scheme_fetcher_s::ops, fetcher_operation_table::poll, and gui_misc_table::schedule.

Referenced by ami_get_msg(), monkey_run(), nsbeos_gui_poll(), and nsgtk_main().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_free()

void fetch_free ( struct fetch f)

Free a fetch structure and associated resources.

Definition at line 548 of file fetch.c.

References fetch_msg::data, fetch_msg::error, FETCH_ERROR, FETCH_MIN_FINISHED_MSG, fetch_send_callback(), fetch_unref_fetcher(), fetch::fetcher_handle, fetch::fetcherd, fetchers, fetcher_operation_table::free, fetch::host, fetch::last_msg, NSLOG, nsurl_access(), nsurl_unref(), scheme_fetcher_s::ops, fetch::referer, fetch_msg::type, and fetch::url.

Referenced by fetch_curl_abort(), fetch_curl_done(), fetch_data_poll(), fetch_file_poll(), fetch_javascript_poll(), fetch_resource_poll(), fetch_rsrc_poll(), and html_css_fetcher_poll().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_http_code()

long fetch_http_code ( struct fetch fetch)

Get the HTTP response code.

Definition at line 612 of file fetch.c.

References fetch::http_code.

Referenced by llcache_fetch_process_data(), and llcache_fetch_redirect().

Here is the caller graph for this function:

◆ fetch_multipart_data_clone()

struct fetch_multipart_data * fetch_multipart_data_clone ( const struct fetch_multipart_data list)

Clone a linked list of fetch_multipart_data.

Parameters
listList to clone
Returns
Pointer to head of cloned list, or NULL on failure

Definition at line 620 of file fetch.c.

References fetch_multipart_data_destroy(), fetch_multipart_data::file, fetch_multipart_data::name, fetch_multipart_data::next, fetch_multipart_data::rawfile, result, and fetch_multipart_data::value.

Referenced by browser_window_navigate(), fetch_curl_alloc_postdata(), and llcache_post_data_clone().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_multipart_data_destroy()

void fetch_multipart_data_destroy ( struct fetch_multipart_data list)

Free a linked list of fetch_multipart_data.

Parameters
listPointer to head of list to free

Definition at line 701 of file fetch.c.

References fetch_multipart_data::file, fetch_multipart_data::name, fetch_multipart_data::next, NSLOG, fetch_multipart_data::rawfile, and fetch_multipart_data::value.

Referenced by browser_window__free_fetch_parameters(), browser_window__reload_current_parameters(), fetch_curl_free_postdata(), fetch_multipart_data_clone(), form_dom_to_data(), form_submit(), and llcache_object_destroy().

Here is the caller graph for this function:

◆ fetch_multipart_data_find()

const char * fetch_multipart_data_find ( const struct fetch_multipart_data list,
const char *  name 
)

Find an entry in a fetch_multipart_data.

Parameters
listPointer to the multipart list
nameThe name to look for in the list
Returns
The value found, or NULL if not present

Definition at line 686 of file fetch.c.

References fetch_multipart_data::name, fetch_multipart_data::next, and fetch_multipart_data::value.

Referenced by navigate_internal_query_auth(), navigate_internal_query_fetcherror(), navigate_internal_query_ssl(), and navigate_internal_query_timeout().

Here is the caller graph for this function:

◆ fetch_multipart_data_new_kv()

nserror fetch_multipart_data_new_kv ( struct fetch_multipart_data **  list,
const char *  name,
const char *  value 
)

Create an entry for a fetch_multipart_data.

If an entry exists of the same name, it will NOT be overwritten

Parameters
listPointer to the pointer to the current multipart list
nameThe name of the entry to create
valueThe value of the entry to create
Returns
The result of the attempt

Definition at line 721 of file fetch.c.

References fetch_multipart_data::name, fetch_multipart_data::next, NSERROR_NOMEM, NSERROR_OK, and fetch_multipart_data::value.

Referenced by browser_window__handle_bad_certs(), browser_window__handle_fetcherror(), browser_window__handle_login(), and browser_window__handle_timeout().

Here is the caller graph for this function:

◆ fetch_ref_fetcher()

static void fetch_ref_fetcher ( int  fetcherd)
inlinestatic

Definition at line 112 of file fetch.c.

References fetch::fetcherd, fetchers, and scheme_fetcher_s::refcount.

Referenced by fetch_start(), and fetcher_add().

Here is the caller graph for this function:

◆ fetch_remove_from_queues()

void fetch_remove_from_queues ( struct fetch fetch)

remove a queued fetch

Definition at line 767 of file fetch.c.

References fetch::fetch_is_active, fetch_ring, fetch::fetcher_handle, NSLOG, queue_ring, RING_GETSIZE, and RING_REMOVE.

Referenced by fetch_curl_abort(), fetch_curl_stop(), fetch_data_poll(), fetch_file_poll(), fetch_javascript_poll(), fetch_resource_poll(), fetch_rsrc_poll(), and html_css_fetcher_poll().

Here is the caller graph for this function:

◆ fetch_send_callback()

void fetch_send_callback ( const fetch_msg msg,
struct fetch fetch 
)

send message to fetch

Definition at line 757 of file fetch.c.

References fetch::callback, fetch::last_msg, fetch::p, and fetch_msg::type.

Referenced by fetch_curl_data(), fetch_curl_done(), fetch_curl_header(), fetch_curl_process_headers(), fetch_curl_progress(), fetch_curl_report_certs_upstream(), fetch_data_send_callback(), fetch_file_send_callback(), fetch_free(), fetch_javascript_send_callback(), fetch_resource_send_callback(), fetch_rsrc_send_callback(), and html_css_fetcher_send_callback().

Here is the caller graph for this function:

◆ fetch_set_cookie()

void fetch_set_cookie ( struct fetch fetch,
const char *  data 
)

set cookie data on a fetch

Todo:
Long-term, this needs to be replaced with a comparison against the origin fetch URI. In the case where a nested object requests a fetch, the origin URI is the nested object's parent URI, whereas the referer for the fetch will be the nested object's URI.

Definition at line 803 of file fetch.c.

References fetch::referer, fetch::url, urldb_set_cookie(), and fetch::verifiable.

Referenced by fetch_curl_header().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_set_http_code()

void fetch_set_http_code ( struct fetch fetch,
long  http_code 
)

set the http code of a fetch

Definition at line 794 of file fetch.c.

References fetch::http_code, and NSLOG.

Referenced by fetch_curl_data(), fetch_curl_process_headers(), fetch_data_poll(), fetch_file_process_dir(), fetch_file_process_error(), fetch_file_process_plain(), fetch_javascript_handler(), fetch_resource_data_handler(), fetch_resource_notfound_handler(), fetch_resource_redirect_handler(), fetch_rsrc_poll(), and html_css_fetcher_poll().

Here is the caller graph for this function:

◆ fetch_start()

nserror fetch_start ( nsurl url,
nsurl referer,
fetch_callback  callback,
void *  p,
bool  only_2xx,
const char *  post_urlenc,
const struct fetch_multipart_data post_multipart,
bool  verifiable,
bool  downgrade_tls,
const char *  headers[],
struct fetch **  fetch_out 
)

Start fetching data for the given URL.

The function returns immediately. The fetch may be queued for later processing.

A pointer to an opaque struct fetch is returned, which can be passed to fetch_abort() to abort the fetch at any time. Returns NULL if memory is exhausted (or some other fatal error occurred).

The caller must supply a callback function which is called when anything interesting happens. The callback function is first called with msg FETCH_HEADER, with the header in data, then one or more times with FETCH_DATA with some data for the url, and finally with FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred: data contains an error message. FETCH_REDIRECT may replace the FETCH_HEADER, FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL.

Parameters
urlURL to fetch
referer
callback
p
only_2xx
post_urlenc
post_multipart
verifiable
downgrade_tls
headers
fetch_outponter to recive new fetch object.
Returns
NSERROR_OK and fetch_out updated else appropriate error code
Todo:
The fetchers setup should return nserror and that be passed back rather than assuming a bad url

Definition at line 449 of file fetch.c.

References fetch::callback, fetch(), fetch_dispatch_jobs(), fetch_ref_fetcher(), fetch::fetcher_handle, fetcher_poll(), fetch::fetcherd, fetchers, get_fetcher_for_scheme(), guit, fetch::host, netsurf_table::misc, NSERROR_BAD_URL, NSERROR_NO_FETCH_HANDLER, NSERROR_NOMEM, NSERROR_OK, NSLOG, nsurl_access(), nsurl_get_component(), NSURL_HOST, nsurl_ref(), NSURL_SCHEME, nsurl_unref(), scheme_fetcher_s::ops, fetch::p, queue_ring, fetch::referer, RING_INSERT, gui_misc_table::schedule, fetcher_operation_table::setup, fetch::url, and fetch::verifiable.

Referenced by llcache_object_refetch().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetch_unref_fetcher()

static void fetch_unref_fetcher ( int  fetcherd)
inlinestatic

Definition at line 117 of file fetch.c.

References fetch::fetcherd, fetchers, fetcher_operation_table::finalise, scheme_fetcher_s::ops, and scheme_fetcher_s::refcount.

Referenced by fetch_free(), and fetcher_quit().

Here is the caller graph for this function:

◆ fetcher_add()

nserror fetcher_add ( lwc_string *  scheme,
const struct fetcher_operation_table ops 
)

Register a fetcher for a scheme.

Parameters
schemeThe scheme fetcher is for (caller relinquishes ownership)
opsThe operations for the fetcher.
Returns
NSERROR_OK or appropriate error code.

Definition at line 357 of file fetch.c.

References fetch_ref_fetcher(), fetch::fetcherd, fetchers, fetcher_operation_table::initialise, MAX_FETCHERS, NSERROR_INIT_FAILED, NSERROR_OK, scheme_fetcher_s::ops, and scheme_fetcher_s::scheme.

Referenced by fetch_curl_register(), fetch_data_register(), fetch_file_register(), fetch_javascript_register(), fetch_resource_register(), fetch_rsrc_register(), and html_css_fetcher_register().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetcher_init()

nserror fetcher_init ( void  )

Initialise all registered fetchers.

Returns
NSERROR_OK or error code

Definition at line 285 of file fetch.c.

References fetch_curl_register(), fetch_data_register(), fetch_file_register(), fetch_javascript_register(), fetch_resource_register(), and NSERROR_OK.

Referenced by netsurf_init().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetcher_poll()

static void fetcher_poll ( void *  unused)
static

Definition at line 262 of file fetch.c.

References fetch_dispatch_jobs(), fetcher_poll(), fetch::fetcherd, fetchers, guit, MAX_FETCHERS, netsurf_table::misc, NSLOG, scheme_fetcher_s::ops, fetcher_operation_table::poll, gui_misc_table::schedule, and SCHEDULE_TIME.

Referenced by fetch_fdset(), fetch_start(), and fetcher_poll().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ fetcher_quit()

void fetcher_quit ( void  )

Clean up for quit.

Must be called before exiting.

Definition at line 322 of file fetch.c.

References fetch_unref_fetcher(), fetch::fetcherd, fetchers, MAX_FETCHERS, NSLOG, and scheme_fetcher_s::refcount.

Referenced by netsurf_exit().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ get_fetcher_for_scheme()

static int get_fetcher_for_scheme ( lwc_string *  scheme)
static

Find a suitable fetcher for a scheme.

Definition at line 129 of file fetch.c.

References fetch::fetcherd, fetchers, and MAX_FETCHERS.

Referenced by fetch_can_fetch(), and fetch_start().

Here is the caller graph for this function:

Variable Documentation

◆ fetch_ring

struct fetch* fetch_ring = NULL
static

Ring of active fetches.

Definition at line 105 of file fetch.c.

Referenced by dump_rings(), fetch_choose_and_dispatch(), fetch_dispatch_job(), fetch_dispatch_jobs(), and fetch_remove_from_queues().

◆ fetchers

scheme_fetcher fetchers[MAX_FETCHERS]
static

◆ queue_ring

struct fetch* queue_ring = NULL
static