Hubbub $Id$
Classes | Functions
parser.c File Reference
#include <assert.h>
#include <string.h>
#include <parserutils/charset/mibenum.h>
#include <parserutils/input/inputstream.h>
#include <hubbub/parser.h>
#include "charset/detect.h"
#include "tokeniser/tokeniser.h"
#include "treebuilder/treebuilder.h"
#include "utils/parserutilserror.h"

Classes

struct  hubbub_parser
 Hubbub parser object. More...
 

Functions

hubbub_error hubbub_parser_create (const char *enc, bool fix_enc, hubbub_parser **parser)
 Create a hubbub parser. More...
 
hubbub_error hubbub_parser_destroy (hubbub_parser *parser)
 Destroy a hubbub parser. More...
 
hubbub_error hubbub_parser_setopt (hubbub_parser *parser, hubbub_parser_opttype type, hubbub_parser_optparams *params)
 Configure a hubbub parser. More...
 
hubbub_error hubbub_parser_insert_chunk (hubbub_parser *parser, const uint8_t *data, size_t len)
 Insert a chunk of data into a hubbub parser input stream. More...
 
hubbub_error hubbub_parser_parse_chunk (hubbub_parser *parser, const uint8_t *data, size_t len)
 Pass a chunk of data to a hubbub parser for parsing. More...
 
hubbub_error hubbub_parser_completed (hubbub_parser *parser)
 Inform the parser that the last chunk of data has been parsed. More...
 
const char * hubbub_parser_read_charset (hubbub_parser *parser, hubbub_charset_source *source)
 Read the document charset. More...
 

Function Documentation

◆ hubbub_parser_completed()

hubbub_error hubbub_parser_completed ( hubbub_parser parser)

Inform the parser that the last chunk of data has been parsed.

Parameters
parserParser to inform
Returns
HUBBUB_OK on success, appropriate error otherwise

◆ hubbub_parser_create()

hubbub_error hubbub_parser_create ( const char *  enc,
bool  fix_enc,
hubbub_parser **  parser 
)

Create a hubbub parser.

Parameters
encSource document encoding, or NULL to autodetect
fix_encPermit fixing up of encoding if it's frequently misused
parserPointer to location to receive parser instance
Returns
HUBBUB_OK on success, HUBBUB_BADPARM on bad parameters, HUBBUB_NOMEM on memory exhaustion, HUBBUB_BADENCODING if enc is unsupported

◆ hubbub_parser_destroy()

hubbub_error hubbub_parser_destroy ( hubbub_parser parser)

Destroy a hubbub parser.

Parameters
parserParser instance to destroy
Returns
HUBBUB_OK on success, appropriate error otherwise

◆ hubbub_parser_insert_chunk()

hubbub_error hubbub_parser_insert_chunk ( hubbub_parser parser,
const uint8_t *  data,
size_t  len 
)

Insert a chunk of data into a hubbub parser input stream.

Inserts the given data into the input stream ready for parsing but does not cause any additional processing of the input. This is useful to allow hubbub callbacks to add computed data to the input.

Parameters
parserParser instance to use
dataData to parse (encoded in UTF-8)
lenLength, in bytes, of data
Returns
HUBBUB_OK on success, appropriate error otherwise

◆ hubbub_parser_parse_chunk()

hubbub_error hubbub_parser_parse_chunk ( hubbub_parser parser,
const uint8_t *  data,
size_t  len 
)

Pass a chunk of data to a hubbub parser for parsing.

Parameters
parserParser instance to use
dataData to parse (encoded in the input charset)
lenLength, in bytes, of data
Returns
HUBBUB_OK on success, appropriate error otherwise

◆ hubbub_parser_read_charset()

const char * hubbub_parser_read_charset ( hubbub_parser parser,
hubbub_charset_source source 
)

Read the document charset.

Parameters
parserParser instance to query
sourcePointer to location to receive charset source
Returns
Pointer to charset name (constant; do not free), or NULL if unknown

◆ hubbub_parser_setopt()

hubbub_error hubbub_parser_setopt ( hubbub_parser parser,
hubbub_parser_opttype  type,
hubbub_parser_optparams params 
)

Configure a hubbub parser.

Parameters
parserParser instance to configure
typeOption to set
paramsOption-specific parameters
Returns
HUBBUB_OK on success, appropriate error otherwise