29#include <parserutils/charset/utf8.h>
45 parserutils_error perror;
47 perror = parserutils_charset_utf8_to_ucs4((
const uint8_t *) s_in, l,
49 if (perror != PARSERUTILS_OK)
58 uint8_t *in = (uint8_t *) s;
60 parserutils_error perror;
62 perror = parserutils_charset_utf8_from_ucs4(c, &in, &len);
63 if (perror != PARSERUTILS_OK) {
83 parserutils_error perror;
85 perror = parserutils_charset_utf8_length((
const uint8_t *) s, l, &len);
86 if (perror != PARSERUTILS_OK)
97 while (len < l && c-- > 0)
107 parserutils_error perror;
109 perror = parserutils_charset_utf8_char_byte_length((
const uint8_t *) s,
111 assert(perror == PARSERUTILS_OK);
120 parserutils_error perror;
122 perror = parserutils_charset_utf8_prev((
const uint8_t *) s, o, &prev);
123 assert(perror == PARSERUTILS_OK);
132 parserutils_error perror;
134 perror = parserutils_charset_utf8_next((
const uint8_t *) s, l, o,
136 assert(perror == PARSERUTILS_OK);
166 if (strncasecmp(
last_cd.from, enc_from,
sizeof(
last_cd.from)) == 0 &&
174 cd = iconv_open(enc_to, enc_from);
175 if (
cd == (iconv_t) -1) {
176 if (errno == EINVAL) {
227 size_t *result_len_out)
230 char *temp, *out, *in, *
result;
234 assert(
string &&
from &&
to && result_out);
238 slen = strlen(
string);
247 if ((slen == 0) || (strcasecmp(
from,
to) == 0)) {
248 *result_out =
strndup(
string, slen);
249 if (*result_out == NULL) {
252 if (result_len_out != NULL) {
253 *result_len_out = slen;
270 result_len = slen * 4 + 4;
272 temp = out = malloc(result_len);
278 if (iconv(
cd, (
void *) &in, &slen, &out, &result_len) == (
size_t)-1) {
292 result_len = out - temp;
295 result = realloc(temp, result_len + 4);
303 memset(
result + result_len, 0, 4);
306 if (result_len_out != NULL) {
307 *result_len_out = result_len;
315 size_t len,
char **
result)
322 size_t len,
char **
result,
size_t *result_len)
339 char *pescape, escape[11];
342 ret = iconv(
cd, (
void *) &chunk, &inlen, (
void *) out, outlen);
343 if (ret != (
size_t) -1)
350 esclen = snprintf(escape,
sizeof(escape),
"&#x%06"PRIx32
";", ucs4);
352 ret = iconv(
cd, (
void *) &pescape, &esclen,
353 (
void *) out, outlen);
354 if (ret == (
size_t) -1)
369utf8_to_html(
const char *
string,
const char *encname,
size_t len,
char **result_out)
373 char *out, *origout, *
result;
374 size_t off, prev_off, inlen, outlen, origoutlen, esclen;
376 char *pescape, escape[11];
380 len = strlen(
string);
391 origoutlen = outlen = len * 10 * 4 + 4;
392 origout = out = malloc(outlen);
403 if (
string[off] ==
'&' ||
string[off] ==
'<' ||
404 string[off] ==
'>') {
405 if (off - prev_off > 0) {
407 in =
string + prev_off;
408 inlen = off - prev_off;
420 esclen = snprintf(escape,
sizeof(escape),
421 "&#x%06x;",
string[off]);
432 prev_off = off =
utf8_next(
string, len, off);
439 if (prev_off < len) {
440 in =
string + prev_off;
441 inlen = len - prev_off;
456 result = realloc(origout, origoutlen - outlen);
476 "failed to convert to local encoding, return %d", ret);
480 out = fopen(
path,
"w");
482 int res = fputs(conv, out);
484 NSLOG(netsurf, INFO,
"Warning: writing data failed");
487 res = fputs(
"\n", out);
char * strndup(const char *s, size_t n)
Duplicate up to n characters of a string.
nserror
Enumeration of error codes.
@ NSERROR_BAD_ENCODING
The character set is unknown.
@ NSERROR_NOMEM
Memory exhaustion.
struct netsurf_table * guit
The global interface table.
Interface to core interface table.
Interface to platform-specific utf8 operations.
Netsurf additional integer type formatting macros.
#define NSLOG(catname, level, logmsg, args...)
Interface to utility string handling.
nserror(* utf8_to_local)(const char *string, size_t len, char **result)
Convert a UTF-8 encoded string into the system local encoding.
struct gui_utf8_table * utf8
UTF8 table.
static nserror utf8_convert(const char *string, size_t slen, const char *from, const char *to, char **result_out, size_t *result_len_out)
Convert a string from one encoding to another.
size_t utf8_prev(const char *s, size_t o)
Find previous legal UTF-8 char in string.
uint32_t utf8_to_ucs4(const char *s_in, size_t l)
Convert a UTF-8 multibyte sequence into a single UCS4 character.
nserror utf8_finalise(void)
Finalise the UTF-8 library.
size_t utf8_from_ucs4(uint32_t c, char *s)
Convert a single UCS4 character into a UTF-8 multibyte sequence.
size_t utf8_next(const char *s, size_t l, size_t o)
Find next legal UTF-8 char in string.
bool utf8_save_text(const char *utf8_text, const char *path)
Save the given utf8 text to a file, converting to local encoding.
iconv_t cd
Iconv conversion descriptor.
size_t utf8_bounded_length(const char *s, size_t l)
Calculated the length (in characters) of a bounded UTF-8 string.
nserror utf8_from_enc(const char *string, const char *encname, size_t len, char **result, size_t *result_len)
Convert a string in the named encoding into a UTF-8 string.
size_t utf8_length(const char *s)
Calculate the length (in characters) of a NULL-terminated UTF-8 string.
static struct @151 last_cd
nserror utf8_to_html(const char *string, const char *encname, size_t len, char **result_out)
Convert a UTF-8 encoded string into a string of the given encoding, applying HTML escape sequences wh...
static void utf8_clear_cd_cache(void)
static nserror get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out)
obtain a cached conversion descriptor
nserror utf8_to_enc(const char *string, const char *encname, size_t len, char **result)
Convert a UTF8 string into the named encoding.
char from[32]
Encoding name to convert from.
size_t utf8_char_byte_length(const char *s)
Calculate the length (in bytes) of a UTF-8 character.
char to[32]
Encoding name to convert to.
size_t utf8_bounded_byte_length(const char *s, size_t l, size_t c)
Calculate the length (in bytes) of a bounded UTF-8 string.
static nserror utf8_convert_html_chunk(iconv_t cd, const char *chunk, size_t inlen, char **out, size_t *outlen)
convert a chunk of html data
UTF-8 manipulation functions (interface).
static nserror path(const struct redraw_context *ctx, const plot_style_t *pstyle, const float *p, unsigned int n, const float transform[6])
Plots a path.