NetSurf
punycode.h
Go to the documentation of this file.
1/*
2punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
3http://www.nicemice.net/idn/
4Adam M. Costello
5http://www.nicemice.net/amc/
6
7This is ANSI C code (C89) implementing Punycode 1.0.x.
8
9This single file contains three sections (an interface, an
10implementation, and a wrapper for testing) that would normally belong
11in three separate files (punycode.h, punycode.c, punycode-test.c), but
12here they are bundled into one file (punycode-sample.c) for convenient
13testing. Anyone wishing to reuse this code will probably want to split
14it apart.
15
16*/
17
18/************************************************************/
19/* Public interface (would normally go in its own .h file): */
20
21#include <limits.h>
22#include <stddef.h>
23
26 punycode_bad_input = 1, /* Input is invalid. */
27 punycode_big_output = 2, /* Output would exceed the space provided. */
28 punycode_overflow = 3 /* Wider integers needed to process input. */
29};
30
31/* punycode_uint needs to be unsigned and needs to be */
32/* at least 26 bits wide. The particular type can be */
33/* specified by defining PUNYCODE_UINT, otherwise a */
34/* suitable type will be chosen automatically. */
35
36#ifdef PUNYCODE_UINT
37 typedef PUNYCODE_UINT punycode_uint;
38#elif UINT_MAX >= (1 << 26) - 1
39 typedef unsigned int punycode_uint;
40#else
41 typedef unsigned long punycode_uint;
42#endif
43
45 size_t, /* input_length */
46 const punycode_uint [], /* input */
47 const unsigned char [], /* case_flags */
48 size_t *, /* output_length */
49 char [] /* output */
50);
51
52/*
53 punycode_encode() converts a sequence of code points (presumed to be
54 Unicode code points) to Punycode.
55
56 Input arguments (to be supplied by the caller):
57
58 input_length
59 The number of code points in the input array and the number
60 of flags in the case_flags array.
61
62 input
63 An array of code points. They are presumed to be Unicode
64 code points, but that is not strictly necessary. The
65 array contains code points, not code units. UTF-16 uses
66 code units D800 through DFFF to refer to code points
67 10000..10FFFF. The code points D800..DFFF do not occur in
68 any valid Unicode string. The code points that can occur in
69 Unicode strings (0..D7FF and E000..10FFFF) are also called
70 Unicode scalar values.
71
72 case_flags
73 A null pointer or an array of boolean values parallel to
74 the input array. Nonzero (true, flagged) suggests that the
75 corresponding Unicode character be forced to uppercase after
76 being decoded (if possible), and zero (false, unflagged)
77 suggests that it be forced to lowercase (if possible).
78 ASCII code points (0..7F) are encoded literally, except that
79 ASCII letters are forced to uppercase or lowercase according
80 to the corresponding case flags. If case_flags is a null
81 pointer then ASCII letters are left as they are, and other
82 code points are treated as unflagged.
83
84 Output arguments (to be filled in by the function):
85
86 output
87 An array of ASCII code points. It is *not* null-terminated;
88 it will contain zeros if and only if the input contains
89 zeros. (Of course the caller can leave room for a
90 terminator and add one if needed.)
91
92 Input/output arguments (to be supplied by the caller and overwritten
93 by the function):
94
95 output_length
96 The caller passes in the maximum number of ASCII code points
97 that it can receive. On successful return it will contain
98 the number of ASCII code points actually output.
99
100 Return value:
101
102 Can be any of the punycode_status values defined above except
103 punycode_bad_input. If not punycode_success, then output_size
104 and output might contain garbage.
105*/
106
108 size_t, /* input_length */
109 const char [], /* input */
110 size_t *, /* output_length */
111 punycode_uint [], /* output */
112 unsigned char [] /* case_flags */
113);
114
115/*
116 punycode_decode() converts Punycode to a sequence of code points
117 (presumed to be Unicode code points).
118
119 Input arguments (to be supplied by the caller):
120
121 input_length
122 The number of ASCII code points in the input array.
123
124 input
125 An array of ASCII code points (0..7F).
126
127 Output arguments (to be filled in by the function):
128
129 output
130 An array of code points like the input argument of
131 punycode_encode() (see above).
132
133 case_flags
134 A null pointer (if the flags are not needed by the caller)
135 or an array of boolean values parallel to the output array.
136 Nonzero (true, flagged) suggests that the corresponding
137 Unicode character be forced to uppercase by the caller (if
138 possible), and zero (false, unflagged) suggests that it
139 be forced to lowercase (if possible). ASCII code points
140 (0..7F) are output already in the proper case, but their
141 flags will be set appropriately so that applying the flags
142 would be harmless.
143
144 Input/output arguments (to be supplied by the caller and overwritten
145 by the function):
146
147 output_length
148 The caller passes in the maximum number of code points
149 that it can receive into the output array (which is also
150 the maximum number of flags that it can receive into the
151 case_flags array, if case_flags is not a null pointer). On
152 successful return it will contain the number of code points
153 actually output (which is also the number of flags actually
154 output, if case_flags is not a null pointer). The decoder
155 will never need to output more code points than the number
156 of ASCII code points in the input, because of the way the
157 encoding is defined. The number of code points output
158 cannot exceed the maximum possible value of a punycode_uint,
159 even if the supplied output_length is greater than that.
160
161 Return value:
162
163 Can be any of the punycode_status values defined above. If not
164 punycode_success, then output_length, output, and case_flags
165 might contain garbage.
166*/
167
punycode_status
Definition: punycode.h:24
@ punycode_bad_input
Definition: punycode.h:26
@ punycode_success
Definition: punycode.h:25
@ punycode_overflow
Definition: punycode.h:28
@ punycode_big_output
Definition: punycode.h:27
unsigned long punycode_uint
Definition: punycode.h:41
enum punycode_status punycode_decode(size_t, const char[], size_t *, punycode_uint[], unsigned char[])
Definition: punycode.c:194
enum punycode_status punycode_encode(size_t, const punycode_uint[], const unsigned char[], size_t *, char[])
Definition: punycode.c:95