58 NSLOG(netsurf, INFO,
"Bad input");
63 NSLOG(netsurf, INFO,
"Output too big");
68 NSLOG(netsurf, INFO,
"Overflow");
97 size_t output_length = 60;
107 &output_length, punycode + 4));
112 output_length +=
SLEN(
"xn--");
113 punycode[output_length] =
'\0';
115 *ace_label = strdup(punycode);
116 *out_len = output_length;
136 int32_t **ucs4_label,
141 size_t output_length = ace_len;
144 assert((ace_label[0] ==
'x') && (ace_label[1] ==
'n') &&
145 (ace_label[2] ==
'-') && (ace_label[3] ==
'-'));
147 ucs4 = malloc(output_length * 4);
153 ace_label + 4, &output_length, (
punycode_uint *)ucs4, NULL));
159 ucs4[output_length] =
'\0';
162 *ucs4_len = output_length;
170#include <libutf8proc/utf8proc.h>
172int32_t idna_contexto[] = {
174 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661,
175 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668,
176 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5,
177 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0
192 if ((cp >= t->
start) && (cp <= t->end)) {
214 if ((cp >= t->
start) && (cp <= t->end)) {
230static bool idna__contexto_rule(int32_t cp)
233 for (t = idna_contexto; *t != 0; t++) {
252static bool idna__contextj_rule(int32_t *label,
int index,
size_t len)
254 const utf8proc_property_t *unicode_props;
263 if (label[index] == 0x200c) {
267 unicode_props = utf8proc_get_property(label[index - 1]);
273 for (i = 0; i < (index - 1); i++) {
274 joining_type = idna__jt_property(label[i]);
283 if (match ==
false) {
291 for (i = (index + 1); i < (int)len; i++) {
292 joining_type = idna__jt_property(label[i]);
302 }
else if (label[index] == 0x200d) {
306 unicode_props = utf8proc_get_property(label[index - 1]);
332 int32_t **ucs4_label,
338 nfc_label = malloc(len * 4);
339 if (nfc_label == NULL) {
343 nfc_size = utf8proc_decompose((
const uint8_t *)utf8_label, len,
344 nfc_label, len * 4, UTF8PROC_STABLE | UTF8PROC_COMPOSE);
349 nfc_size = utf8proc_normalize_utf32(nfc_label, nfc_size,
350 UTF8PROC_STABLE | UTF8PROC_COMPOSE);
355 *ucs4_label = nfc_label;
356 *ucs4_len = nfc_size;
380 ssize_t nfc_size = ucs4_len;
382 nfc_label = malloc(1 + ucs4_len * 4);
383 if (nfc_label == NULL) {
386 memcpy(nfc_label, ucs4_label, ucs4_len * 4);
388 nfc_size = utf8proc_reencode(nfc_label, ucs4_len,
389 UTF8PROC_STABLE | UTF8PROC_COMPOSE);
394 *utf8_label = (
char *)nfc_label;
395 *utf8_len = nfc_size;
410 const utf8proc_property_t *unicode_props;
420 if ((len >= 4) && (label[2] == 0x002d) && (label[3] == 0x002d)) {
422 "Check failed: characters 2 and 3 are '--'");
427 unicode_props = utf8proc_get_property(label[0]);
429 if ((unicode_props->category == UTF8PROC_CATEGORY_MN) ||
430 (unicode_props->category == UTF8PROC_CATEGORY_MC) ||
431 (unicode_props->category == UTF8PROC_CATEGORY_ME)) {
433 "Check failed: character 0 is a combining mark");
437 for (i = 0; i < len; i++) {
438 idna_prop = idna__cp_property(label[i]);
443 "Check failed: character %"PRIsizet" (%"PRIx32
") is DISALLOWED",
451 if (idna__contextj_rule(label, i, len) ==
false) {
453 "Check failed: character %"PRIsizet" (%"PRIx32
") does not conform to CONTEXTJ rule",
463 if (idna__contexto_rule(label[i]) ==
false) {
465 "Check failed: character %"PRIsizet" (%"PRIx32
") has no CONTEXTO rule defined",
475 "Check failed: character %"PRIsizet" (%"PRIx32
") is UNASSIGNED",
501 size_t u_ucs4_len, ace_len;
510 ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len,
511 UTF8PROC_STABLE | UTF8PROC_COMPOSE);
526 if ((len == ace_len) && (strncmp(label, ace, len) == 0)) {
531 NSLOG(netsurf, INFO,
"Re-encoded ACE label %s does not match input",
556 int32_t **ucs4_label,
622 const char *p = host;
625 while (length < max_length) {
626 if ((*p ==
'.') || (*p ==
':') || (*p ==
'\0')) {
646 const char *p = label;
650 if ((p[0] ==
'-') || (p[len - 1] ==
'-'))
654 for (i = 0; i < len; p++) {
656 if (*p ==
'-')
continue;
657 if ((*p >=
'0') && (*p <=
'9'))
continue;
658 if ((*p >=
'a') && (*p <=
'z'))
continue;
659 if ((*p >=
'A') && (*p <=
'Z'))
continue;
683 if ((label[0] ==
'x') && (label[1] ==
'n') &&
684 (label[2] ==
'-') && (label[3] ==
'-')) {
695#define NO_ACTION (void)0
697#define FQDN_APPEND(s, len, action) \
699 if ((FQDN_MAX - fqdn_len) <= len) { \
702 return NSERROR_BAD_URL; \
704 memcpy(fqdn_p, s, len); \
713idna_encode(
const char *host,
size_t len,
char **ace_host,
size_t *ace_len)
717 size_t label_len, output_len, ucs4_len, fqdn_len = 0;
719 char *output, *fqdn_p = fqdn;
722 if (label_len == 0) {
726 while (label_len != 0) {
732 &ucs4_host, &ucs4_len);
745 &output, &output_len);
756 "Cannot verify ACE label %s", host);
765 if ((*host ==
'\0') || (*host ==
':')) {
769 len = len - label_len - 1;
776 *ace_host = strdup(fqdn);
777 *ace_len = fqdn_len - 1;
785idna_decode(
const char *ace_host,
size_t ace_len,
char **host,
size_t *host_len)
789 size_t label_len, output_len, ucs4_len, fqdn_len = 0;
791 char *output, *fqdn_p = fqdn;
794 if (label_len == 0) {
798 while (label_len != 0) {
804 &ucs4_host, &ucs4_len);
811 &output, &output_len);
824 ace_host += label_len;
825 if ((*ace_host ==
'\0') || (*ace_host ==
':')) {
829 ace_len = ace_len - label_len - 1;
836 *host = strdup(fqdn);
837 *host_len = fqdn_len - 1;
nserror
Enumeration of error codes.
@ NSERROR_NOSPACE
Insufficient space.
@ NSERROR_BAD_ENCODING
The character set is unknown.
@ NSERROR_BAD_URL
Bad URL.
@ NSERROR_NOT_IMPLEMENTED
Functionality is not implemented.
@ NSERROR_BAD_SIZE
Bad size.
@ NSERROR_NOMEM
Memory exhaustion.
nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len)
Convert a hostname to an ACE version suitable for DNS lookup.
static nserror idna__utf8_to_ucs4(const char *utf8_label, size_t len, int32_t **ucs4_label, size_t *ucs4_len)
Convert a UTF-8 string to UCS-4.
#define FQDN_APPEND(s, len, action)
nserror idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len)
Convert a hostname from ACE to UTF-8 suitable for display.
static bool idna__is_ace(const char *label, size_t len)
Check if a host label appears to be ACE.
static size_t idna__host_label_length(const char *host, size_t max_length)
Find the length of a host label.
static nserror idna__ucs4_to_ace(int32_t *ucs4_label, size_t len, char **ace_label, size_t *out_len)
Convert a host label in UCS-4 to an ACE version.
static bool idna__verify(const char *label, size_t len)
Verify an ACE label is valid.
static bool idna__is_valid(int32_t *label, size_t len)
Check if a host label is valid for IDNA2008.
static nserror idna__ace_to_ucs4(const char *ace_label, size_t ace_len, int32_t **ucs4_label, size_t *ucs4_len)
Convert a host label in ACE format to UCS-4.
static nserror punycode_status_to_nserror(enum punycode_status status)
Convert punycode status into nserror.
static bool idna__is_ldh(const char *label, size_t len)
Check if a host label is LDH.
static nserror idna__ucs4_to_utf8(const int32_t *ucs4_label, size_t ucs4_len, char **utf8_label, size_t *utf8_len)
Convert a UCS-4 string to UTF-8.
interface to international domain name handling.
#define IDNA_UNICODE_CCC_VIRAMA
Unicode canonical combining class for virama.
idna_table idna_joiningtype[]
idna_table idna_derived[]
Netsurf additional integer type formatting macros.
#define PRIsizet
c99 standard printf formatting for size_t type
#define NSLOG(catname, level, logmsg, args...)
enum punycode_status punycode_decode(size_t input_length, const char input[], size_t *output_length, punycode_uint output[], unsigned char case_flags[])
enum punycode_status punycode_encode(size_t input_length_orig, const punycode_uint input[], const unsigned char case_flags[], size_t *output_length, char output[])
unsigned long punycode_uint
Interface to utility string handling.
UTF-8 manipulation functions (interface).
Interface to a number of general purpose functionality.
#define SLEN(x)
Calculate length of constant C string.