NetSurf
|
Text export of HTML (implementation). More...
#include <assert.h>
#include <stdbool.h>
#include <string.h>
#include <dom/dom.h>
#include "utils/config.h"
#include "utils/log.h"
#include "utils/utf8.h"
#include "utils/utils.h"
#include "netsurf/content.h"
#include "html/box.h"
#include "html/html_save.h"
#include "netsurf/utf8.h"
#include "desktop/gui_internal.h"
#include "desktop/save_text.h"
Go to the source code of this file.
Functions | |
static void | extract_text (struct box *box, bool *first, save_text_whitespace *before, struct save_text_state *save) |
Traverse though the box tree and add all text to a save buffer. More... | |
static bool | save_text_add_to_buffer (const char *text, size_t length, struct box *box, const char *whitespace_text, size_t whitespace_length, struct save_text_state *save) |
Add text to save text buffer. More... | |
void | save_as_text (struct hlcache_handle *c, char *path) |
Extract the text from an HTML content and save it as a text file. More... | |
void | save_text_solve_whitespace (struct box *box, bool *first, save_text_whitespace *before, const char **whitespace_text, size_t *whitespace_length) |
Decide what whitespace to place before the next bit of content-related text that is saved. More... | |
Text export of HTML (implementation).
Definition in file save_text.c.
|
static |
Traverse though the box tree and add all text to a save buffer.
box | Pointer to box. |
first | Whether this is before the first bit of content-related text to be saved. |
before | Type of whitespace currently intended to be placed before the next bit of content-related text to be saved. Updated if this box is worthy of more significant whitespace. |
save | our save_text_state workspace pointer |
Definition at line 214 of file save_text.c.
References BOX_BR, BOX_FLOAT_LEFT, BOX_FLOAT_RIGHT, box::children, extract_text(), box::length, box::list_marker, box::next, save_text_add_to_buffer(), save_text_solve_whitespace(), box::text, box::type, and WHITESPACE_NONE.
Referenced by extract_text(), and save_as_text().
void save_as_text | ( | struct hlcache_handle * | c, |
char * | path | ||
) |
Extract the text from an HTML content and save it as a text file.
Text is converted to the local encoding.
c | An HTML content. |
path | Path to save text file too. |
Definition at line 57 of file save_text.c.
References save_text_state::block, content_get_type(), CONTENT_HTML, extract_text(), guit, html_get_box_tree(), save_text_state::length, NSERROR_OK, NSLOG, path(), result, netsurf_table::utf8, gui_utf8_table::utf8_to_local, and WHITESPACE_NONE.
Referenced by ami_file_save(), plaintext_button_clicked_cb(), and ro_gui_save_content().
|
static |
Add text to save text buffer.
Any preceding whitespace or following space is also added to the buffer.
text | Pointer to text being added. |
length | Length of text to be appended (bytes). |
box | Pointer to text box. |
whitespace_text | Whitespace to place before text for formatting may be NULL. |
whitespace_length | Length of whitespace_text. |
save | Our save_text_state workspace pointer. |
Definition at line 270 of file save_text.c.
References save_text_state::alloc, save_text_state::block, box::length, save_text_state::length, box::space, and text().
Referenced by extract_text().
void save_text_solve_whitespace | ( | struct box * | box, |
bool * | first, | ||
save_text_whitespace * | before, | ||
const char ** | whitespace_text, | ||
size_t * | whitespace_length | ||
) |
Decide what whitespace to place before the next bit of content-related text that is saved.
Any existing whitespace is overridden if the whitespace for this box is more "significant".
box | Pointer to box. |
first | Whether this is before the first bit of content-related text to be saved. |
before | Type of whitespace currently intended to be placed before the next bit of content-related text to be saved. Updated if this box is worthy of more significant whitespace. |
whitespace_text | Whitespace to place before next bit of content-related text to be saved. Updated if this box is worthy of more significant whitespace. |
whitespace_length | Length of whitespace_text. Updated if this box is worthy of more significant whitespace. |
Definition at line 125 of file save_text.c.
References BOX_BLOCK, BOX_BR, BOX_FLOAT_LEFT, BOX_FLOAT_RIGHT, BOX_INLINE, BOX_INLINE_CONTAINER, BOX_TABLE, BOX_TABLE_CELL, BOX_TABLE_ROW, box::list_marker, box::parent, box::style, box::type, WHITESPACE_NONE, WHITESPACE_ONE_NEW_LINE, WHITESPACE_TAB, and WHITESPACE_TWO_NEW_LINES.
Referenced by extract_text(), and selection_copy().