95 while (list != NULL) {
108 entry = malloc(
sizeof (*entry));
161 const char *leafname,
164 lwc_string *mime_type)
175 fp = fopen(fname,
"wb");
178 NSLOG(netsurf, INFO,
"fopen(): %s", strerror(errno));
182 fwrite(data,
sizeof(*data), data_len, fp);
209 strbuf = calloc(1, stringlen + 1);
210 if (strbuf == NULL) {
213 memcpy(strbuf,
string, stringlen);
215 matches =
regexec(preg, strbuf, nmatch, pmatch, eflags);
235 const uint8_t *source,
241 unsigned long offset = 0;
242 unsigned int imports = 0;
248 (
SLEN(
"@import") < size) && (offset <= (size -
SLEN(
"@import")));
250 if (source[offset] ==
'@' &&
261 rewritten = malloc(size + imports * 20);
262 if (rewritten == NULL)
267 while (offset < size) {
268 const uint8_t *import_url = NULL;
269 char *import_url_copy;
270 int import_url_len = 0;
276 (
const char *)source + offset,
284 if (match[2].rm_so != -1) {
285 import_url = source + offset + match[2].
rm_so;
286 import_url_len = match[2].
rm_eo - match[2].
rm_so;
287 }
else if (match[4].rm_so != -1) {
288 import_url = source + offset + match[4].
rm_so;
289 import_url_len = match[4].
rm_eo - match[4].
rm_so;
290 }
else if (match[6].rm_so != -1) {
291 import_url = source + offset + match[6].
rm_so;
292 import_url_len = match[6].
rm_eo - match[6].
rm_so;
293 }
else if (match[8].rm_so != -1) {
294 import_url = source + offset + match[8].
rm_so;
295 import_url_len = match[8].
rm_eo - match[8].
rm_so;
296 }
else if (match[10].rm_so != -1) {
297 import_url = source + offset + match[10].
rm_so;
298 import_url_len = match[10].
rm_eo - match[10].
rm_so;
300 assert(import_url != NULL);
302 import_url_copy =
strndup((
const char *)import_url,
304 if (import_url_copy == NULL) {
310 free(import_url_copy);
317 memcpy(rewritten + *osize, source + offset, match[0].rm_so);
318 *osize += match[0].
rm_so;
326 snprintf(buf,
sizeof buf,
"@import '%p'",
328 memcpy(rewritten + *osize, buf, strlen(buf));
329 *osize += strlen(buf);
332 memcpy(rewritten + *osize,
333 source + offset + match[0].rm_so,
334 match[0].rm_eo - match[0].rm_so);
340 memcpy(rewritten + *osize,
341 source + offset + match[0].rm_so,
342 match[0].rm_eo - match[0].rm_so);
346 assert(0 < match[0].rm_eo);
347 offset += match[0].
rm_eo;
352 memcpy(rewritten + *osize, source + offset, size - offset);
353 *osize += size - offset;
362 const uint8_t *css_data;
367 uint32_t import_count;
397 if (source == NULL) {
407 snprintf(filename,
sizeof filename,
"%p", css);
410 source, source_len,
type);
412 lwc_string_unref(
type);
421 uint32_t import_count)
426 for (i = 0; i < import_count; i++) {
428 if (imports[i].
c != NULL) {
443 if (sheet->
sheet == NULL) {
455 unsigned int i,
count;
473 const uint8_t *obj_data;
484 if (obj_data == NULL) {
502 snprintf(filename,
sizeof filename,
"%p", obj);
511 lwc_string_unref(
type);
526 for (;
object != NULL;
object =
object->next) {
527 if ((object->
content != NULL) &&
528 (object->
box != NULL)) {
541 bool (*callback)(dom_node *node,
548 node = dom_node_ref(
root);
550 while (node != NULL) {
551 dom_node *
next = NULL;
554 exc = dom_node_get_first_child(node, &
next);
555 if (exc != DOM_NO_ERR) {
556 dom_node_unref(node);
561 dom_node_unref(node);
564 exc = dom_node_get_next_sibling(node, &
next);
565 if (exc != DOM_NO_ERR) {
566 dom_node_unref(node);
574 dom_node_unref(node);
577 while (node != NULL) {
578 exc = dom_node_get_next_sibling(node,
580 if (exc != DOM_NO_ERR) {
581 dom_node_unref(node);
587 dom_node_unref(
next);
591 exc = dom_node_get_parent_node(node,
593 if (exc != DOM_NO_ERR) {
594 dom_node_unref(node);
603 dom_node_unref(node);
610 exc = dom_node_get_next_sibling(node, &
next);
611 if (exc != DOM_NO_ERR) {
612 dom_node_unref(node);
619 dom_node_unref(node);
624 assert(node != NULL);
636 const char *value,
size_t value_len)
663 fprintf(ctx->
fp,
"\"%s\"", escaped);
668 error =
utf8_to_html(value,
"UTF-8", value_len, &escaped);
672 fprintf(ctx->
fp,
"\"%s\"", escaped);
681 const char *value,
size_t value_len)
686 ret =
utf8_to_html(value,
"UTF-8", value_len, &escaped);
690 fprintf(ctx->
fp,
"\"%s\"", escaped);
698 dom_string *
node_name, dom_string *attr_name,
699 dom_string *attr_value)
701 const char *node_data = dom_string_data(
node_name);
702 size_t node_len = dom_string_byte_length(
node_name);
703 const char *name_data = dom_string_data(attr_name);
704 size_t name_len = dom_string_byte_length(attr_name);
705 const char *value_data = dom_string_data(attr_value);
706 size_t value_len = dom_string_byte_length(attr_value);
719 if (name_len ==
SLEN(
"data") &&
720 strncasecmp(name_data,
"data", name_len) == 0) {
721 if (node_len ==
SLEN(
"object") &&
722 strncasecmp(node_data,
723 "object", node_len) == 0) {
725 value_data, value_len);
728 value_data, value_len);
732 else if (name_len ==
SLEN(
"href") &&
733 strncasecmp(name_data,
"href", name_len) == 0) {
734 if ((node_len ==
SLEN(
"a") &&
735 strncasecmp(node_data,
"a", node_len) == 0) ||
736 (node_len ==
SLEN(
"area") &&
737 strncasecmp(node_data,
"area",
739 (node_len ==
SLEN(
"link") &&
740 strncasecmp(node_data,
"link",
743 value_data, value_len);
746 value_data, value_len);
750 else if (name_len ==
SLEN(
"src") &&
751 strncasecmp(name_data,
"src", name_len) == 0) {
752 if ((node_len ==
SLEN(
"frame") &&
753 strncasecmp(node_data,
"frame",
755 (node_len ==
SLEN(
"iframe") &&
756 strncasecmp(node_data,
"iframe",
758 (node_len ==
SLEN(
"input") &&
759 strncasecmp(node_data,
"input",
761 (node_len ==
SLEN(
"img") &&
762 strncasecmp(node_data,
"img",
764 (node_len ==
SLEN(
"script") &&
765 strncasecmp(node_data,
"script",
768 value_data, value_len);
771 value_data, value_len);
775 else if (name_len ==
SLEN(
"background") &&
776 strncasecmp(name_data,
"background", name_len) == 0) {
778 value_data, value_len);
781 value_data, value_len);
791 const char *name_data;
796 error = dom_attr_get_name(attr, &name);
797 if (error != DOM_NO_ERR)
803 error = dom_attr_get_value(attr, &value);
804 if (error != DOM_NO_ERR) {
805 dom_string_unref(name);
809 name_data = dom_string_data(name);
810 name_len = dom_string_byte_length(name);
813 fwrite(name_data,
sizeof(*name_data), name_len, ctx->
fp);
818 name, value) ==
false) {
819 dom_string_unref(value);
820 dom_string_unref(name);
823 dom_string_unref(value);
826 dom_string_unref(name);
834 dom_namednodemap *attrs)
839 error = dom_namednodemap_get_length(attrs, &length);
840 if (error != DOM_NO_ERR)
843 for (i = 0; i < length; i++) {
846 error = dom_namednodemap_item(attrs, i, (
void *) &attr);
847 if (error != DOM_NO_ERR)
854 dom_node_unref(attr);
858 dom_node_unref(attr);
870 dom_namednodemap *attrs;
871 const char *name_data;
878 error = dom_node_get_node_name(node, &name);
879 if (error != DOM_NO_ERR)
885 name_data = dom_string_data(name);
886 name_len = dom_string_byte_length(name);
888 if ((name_len ==
SLEN(
"base")) &&
889 (strncasecmp(name_data,
"base", name_len) == 0)) {
892 }
else if ((name_len ==
SLEN(
"meta")) &&
893 (strncasecmp(name_data,
"meta", name_len) == 0)) {
900 error = dom_element_get_attribute(node,
901 corestring_dom_http_equiv,
903 if (error != DOM_NO_ERR) {
904 dom_string_unref(name);
909 if (dom_string_length(value) ==
910 SLEN(
"Content-Type") &&
911 strncasecmp(dom_string_data(value),
913 SLEN(
"Content-Type")) == 0)
916 dom_string_unref(value);
920 error = dom_element_has_attribute(node,
921 corestring_dom_charset, &yes);
922 if (error != DOM_NO_ERR) {
923 dom_string_unref(name);
932 ((name_len ==
SLEN(
"link") &&
933 strncasecmp(name_data,
"link", name_len) == 0))) {
938 if (process ==
false) {
939 dom_string_unref(name);
947 fwrite(name_data,
sizeof(*name_data), name_len, ctx->
fp);
950 error = dom_node_get_attributes(node, &attrs);
951 if (error != DOM_NO_ERR) {
952 dom_string_unref(name);
957 dom_namednodemap_unref(attrs);
958 dom_string_unref(name);
962 dom_namednodemap_unref(attrs);
969 strncasecmp(name_data,
"style", name_len) == 0) {
972 error = dom_node_get_text_content(node, &
content);
973 if (error != DOM_NO_ERR) {
974 dom_string_unref(name);
985 (
const uint8_t *)dom_string_data(
content),
986 dom_string_byte_length(
content),
989 if (rewritten == NULL) {
991 dom_string_unref(name);
997 fwrite(rewritten,
sizeof(*rewritten), len, ctx->
fp);
1004 strncasecmp(name_data,
"head", name_len) == 0) {
1006 fputs(
"<META http-equiv=\"Content-Type\" "
1007 "content=\"text/html; charset=utf-8\">",
1011 dom_string_unref(name);
1023 dom_exception error;
1026 error = dom_node_get_node_type(node, &
type);
1027 if (error != DOM_NO_ERR)
1030 if (
type == DOM_ELEMENT_NODE) {
1032 }
else if (
type == DOM_TEXT_NODE ||
type == DOM_COMMENT_NODE) {
1039 const char *text_data;
1042 error = dom_characterdata_get_data(node, &
text);
1043 if (error != DOM_NO_ERR) {
1047 if (
type == DOM_COMMENT_NODE)
1048 fwrite(
"<!--", 1,
sizeof(
"<!--") - 1, ctx->
fp);
1053 text_data = dom_string_data(
text);
1054 text_len = dom_string_byte_length(
text);
1057 text_len, &escaped);
1061 fwrite(escaped,
sizeof(*escaped),
1062 strlen(escaped), ctx->
fp);
1066 dom_string_unref(
text);
1069 if (
type == DOM_COMMENT_NODE) {
1070 fwrite(
"-->", 1,
sizeof(
"-->") - 1, ctx->
fp);
1074 }
else if (
type == DOM_DOCUMENT_TYPE_NODE) {
1076 const char *name_data;
1082 error = dom_document_type_get_name(node, &name);
1083 if (error != DOM_NO_ERR)
1089 name_data = dom_string_data(name);
1090 name_len = dom_string_byte_length(name);
1092 fputs(
"<!DOCTYPE ", ctx->
fp);
1093 fwrite(name_data,
sizeof(*name_data), name_len, ctx->
fp);
1095 dom_string_unref(name);
1097 error = dom_document_type_get_public_id(node, &name);
1098 if (error != DOM_NO_ERR)
1102 name_data = dom_string_data(name);
1103 name_len = dom_string_byte_length(name);
1106 fprintf(ctx->
fp,
" PUBLIC \"%.*s\"",
1107 (
int) name_len, name_data);
1109 dom_string_unref(name);
1112 error = dom_document_type_get_system_id(node, &name);
1113 if (error != DOM_NO_ERR)
1117 name_data = dom_string_data(name);
1118 name_len = dom_string_byte_length(name);
1121 fprintf(ctx->
fp,
" \"%.*s\"",
1122 (
int) name_len, name_data);
1124 dom_string_unref(name);
1127 fputc(
'>', ctx->
fp);
1128 }
else if (
type == DOM_DOCUMENT_NODE) {
1131 NSLOG(netsurf, INFO,
"Unhandled node type: %d",
type);
1146 lwc_string *mime_type;
1150 snprintf(filename,
sizeof filename,
"index");
1152 snprintf(filename,
sizeof filename,
"%p", c);
1160 fp = fopen(fname,
"wb");
1163 NSLOG(netsurf, INFO,
"fopen(): %s", strerror(errno));
1184 if (mime_type != NULL) {
1189 lwc_string_unref(mime_type);
1249 fp = fopen(fname,
"w");
1252 NSLOG(netsurf, INFO,
"fopen(): %s", strerror(errno));
1256 for (entry = ctx->
list; entry != NULL; entry = entry->
next) {
1257 fprintf(fp,
"%p %s\n",
1276 r =
regcomp(preg, regex, cflags);
1279 regerror(r, preg, errbuf,
sizeof errbuf);
1280 NSLOG(netsurf, INFO,
"Failed to compile regexp '%s': %s\n",
1298 "\"(([^\"]|[\\]\")*)\""
1303 "url\\([ \t\r\n\f]*"
1305 "\"(([^\"]|[\\]\")*)\""
1308 "url\\([ \t\r\n\f]*"
1313 "url\\([ \t\r\n\f]*"
Helpers for ASCII string handling.
static char ascii_to_lower(char c)
Convert an upper case character to lower case.
static uint32_t count(const http_directive *list, lwc_string *key)
char * strndup(const char *s, size_t n)
Duplicate up to n characters of a string.
@ CONTENT_NONE
no type for content
@ CONTENT_HTML
content is HTML
Useful interned string pointers (interface).
struct nscss_import * nscss_get_imports(hlcache_handle *h, uint32_t *n)
Retrieve imported stylesheets.
nserror
Enumeration of error codes.
@ NSERROR_SAVE_FAILED
Failed to save data.
@ NSERROR_INIT_FAILED
Initialisation failed.
@ NSERROR_INVALID
Invalid data.
@ NSERROR_NOMEM
Memory exhaustion.
static struct directory * root
Interface to core interface table.
struct content * hlcache_handle_get_content(const hlcache_handle *handle)
Retrieve a content object from a cache handle.
High-level resource cache interface.
struct html_stylesheet * html_get_stylesheets(hlcache_handle *h, unsigned int *n)
Retrieve stylesheets used by HTML document.
nsurl * html_get_base_url(hlcache_handle *h)
Retrieve an HTML content's base URL.
dom_document * html_get_document(hlcache_handle *h)
Retrieve HTML document tree.
Interface to text/html content handler.
struct content_html_object * html_get_objects(struct hlcache_handle *h, unsigned int *n)
Retrieve objects used by HTML document.
Interface to HTML content handler to save documents.
Public content interface.
struct nsurl * hlcache_handle_get_url(const struct hlcache_handle *handle)
Retrieve the URL associated with a high level cache handle.
const uint8_t * content_get_source_data(struct hlcache_handle *h, size_t *size)
Retrieve source of content.
lwc_string * content_get_mime_type(struct hlcache_handle *h)
Retrieve mime-type of content.
content_type content_get_type(struct hlcache_handle *h)
Retrieve computed type of content.
Interface to platform-specific miscellaneous browser operation table.
#define NSLOG(catname, level, logmsg, args...)
Localised message support (interface).
NetSurf URL handling (interface).
bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts)
Compare two URLs.
void nsurl_unref(nsurl *url)
Drop a reference to a NetSurf URL object.
const char * nsurl_access(const nsurl *url)
Access a NetSurf URL object as a string.
size_t nsurl_length(const nsurl *url)
Find the length of a NetSurf URL object's URL, as returned by nsurl_access.
nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined)
Join a base url to a relative link part, creating a new NetSurf URL object.
struct nsurl nsurl
NetSurf URL object.
size_t regerror(int errorcode, const regex_t *restrict preg, char *restrict errbuf, size_t errbuf_size)
int regexec(const regex_t *restrict preg, const char *restrict string, size_t nmatch, regmatch_t pmatch[restrict], int eflags)
void regfree(regex_t *preg)
int regcomp(regex_t *restrict preg, const char *restrictregex, int cflags)
static void save_complete_ctx_finalise(save_complete_ctx *ctx)
static bool save_complete_ctx_has_content(save_complete_ctx *ctx, struct hlcache_handle *content)
void save_complete_init(void)
Initialise save complete module.
static nserror save_complete_save_html_object(save_complete_ctx *ctx, hlcache_handle *obj)
nserror save_complete(hlcache_handle *c, const char *path, save_complete_set_type_cb set_type)
Save an HTML page with all dependencies.
static bool save_complete_handle_element(save_complete_ctx *ctx, dom_node *node, save_complete_event_type event_type)
static int snregexec(regex_t *preg, const char *string, size_t stringlen, size_t nmatch, regmatch_t pmatch[], int eflags)
perform a posix regexec on a string without a null terminator
static nserror save_complete_save_buffer(save_complete_ctx *ctx, const char *leafname, const uint8_t *data, size_t data_len, lwc_string *mime_type)
static nserror save_complete_save_html_document(save_complete_ctx *ctx, hlcache_handle *c, bool index)
static bool save_complete_node_handler(dom_node *node, save_complete_event_type event_type, void *ctxin)
static bool save_complete_handle_attr_value(save_complete_ctx *ctx, dom_string *node_name, dom_string *attr_name, dom_string *attr_value)
static nserror save_complete_ctx_add_content(save_complete_ctx *ctx, struct hlcache_handle *content)
struct save_complete_entry save_complete_entry
An entry in save_complete_list.
static nserror save_complete_save_html_stylesheets(save_complete_ctx *ctx, hlcache_handle *c)
static bool save_complete_rewrite_url_value(save_complete_ctx *ctx, const char *value, size_t value_len)
static bool save_complete_handle_attrs(save_complete_ctx *ctx, dom_string *node_name, dom_namednodemap *attrs)
static nserror regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
Compile a regular expression, handling errors.
static regex_t save_complete_import_re
static nserror save_complete_save_html_stylesheet(save_complete_ctx *ctx, struct html_stylesheet *sheet)
static bool save_complete_libdom_treewalk(dom_node *root, bool(*callback)(dom_node *node, save_complete_event_type event_type, void *ctx), void *ctx)
static bool save_complete_write_value(save_complete_ctx *ctx, const char *value, size_t value_len)
static nserror save_complete_save_html_objects(save_complete_ctx *ctx, hlcache_handle *c)
static bool save_complete_handle_attr(save_complete_ctx *ctx, dom_string *node_name, dom_attr *attr)
struct save_complete_ctx save_complete_ctx
static nserror save_complete_save_imported_sheets(save_complete_ctx *ctx, struct nscss_import *imports, uint32_t import_count)
static nserror save_complete_save_stylesheet(save_complete_ctx *ctx, hlcache_handle *css)
static uint8_t * save_complete_rewrite_stylesheet_urls(save_complete_ctx *ctx, const uint8_t *source, size_t size, const nsurl *base, size_t *osize)
Rewrite stylesheet @import rules for save complete.
static nserror save_complete_save_html(save_complete_ctx *ctx, struct hlcache_handle *c, bool index)
Save an HTML page with all dependencies, recursing through imported pages.
nserror save_complete_finalise(void)
Finalise save complete module.
static struct hlcache_handle * save_complete_ctx_find_content(save_complete_ctx *ctx, const nsurl *url)
find handle to content for url
static nserror save_complete_inventory(save_complete_ctx *ctx)
Create the inventory file listing original URLs.
static void save_complete_ctx_initialise(save_complete_ctx *ctx, const char *path, save_complete_set_type_cb set_type)
Save HTML document with dependencies (interface).
void(* save_complete_set_type_cb)(const char *path, lwc_string *mime_type)
Callback to set type of a file.
static css_error node_name(void *pw, void *node, css_qname *qname)
Callback to retrieve a node's name.
Interface to utility string handling.
An object (img, object, etc.
struct content_html_object * next
Next in chain.
struct box * box
Node in box tree containing it.
struct hlcache_handle * content
Content, or 0.
Content which corresponds to a single URL.
hlcache_entry * next
Next sibling.
struct content * content
Pointer to associated content.
hlcache_entry * entry
Pointer to cache entry.
Container for stylesheets used by an HTML document.
struct hlcache_handle * sheet
Imported stylesheet record.
struct hlcache_handle * c
Content containing sheet.
save_complete_entry * list
enum save_complete_ctx::@76 iter_state
save_complete_set_type_cb set_type
An entry in save_complete_list.
struct hlcache_handle * content
struct save_complete_entry * next
Next entry in list.
nserror netsurf_mkpath(char **str, size_t *size, size_t nelm,...)
Generate a path from one or more component elemnts.
Default operations table for files.
nserror utf8_to_html(const char *string, const char *encname, size_t len, char **result_out)
Convert a UTF-8 encoded string into a string of the given encoding, applying HTML escape sequences wh...
UTF-8 manipulation functions (interface).
Interface to a number of general purpose functionality.
#define SLEN(x)
Calculate length of constant C string.
static nserror path(const struct redraw_context *ctx, const plot_style_t *pstyle, const float *p, unsigned int n, const float transform[6])
Plots a path.
static nserror text(const struct redraw_context *ctx, const struct plot_font_style *fstyle, int x, int y, const char *text, size_t length)
Text plotting.