NetSurf
url.c
Go to the documentation of this file.
1/*
2 * Copyright 2006 Richard Wilson <info@tinct.net>
3 * Copyright 2005 James Bursa <bursa@users.sourceforge.net>
4 * Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
5 *
6 * This file is part of NetSurf, http://www.netsurf-browser.org/
7 *
8 * NetSurf is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; version 2 of the License.
11 *
12 * NetSurf is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
21/**
22 * \file
23 * \brief Implementation of URI percent escaping.
24 *
25 * Percent encoding of URI is subject to RFC3986 however this is not
26 * implementing URI behaviour purely the percent encoding so only the
27 * unreserved set is not encoded and arbitrary binary data may be
28 * unescaped.
29 *
30 * \note Earlier RFC (2396, 1738 and 1630) list the tilde ~ character
31 * as special so its handling is ambiguious
32 */
33
34#include <ctype.h>
35#include <assert.h>
36#include <string.h>
37#include <stdlib.h>
38#include <stdbool.h>
39
40#include "utils/ascii.h"
41#include "utils/config.h"
42#include "utils/log.h"
43#include "utils/url.h"
44
45
46/**
47 * Convert a hex digit to a hex value
48 *
49 * Must be called with valid hex char, results undefined otherwise.
50 *
51 * \param[in] c character to convert to value
52 * \return the value of c
53 */
54static inline char xdigit_to_hex(char c)
55{
56 if (c >= '0' && c <= '9') {
57 return c - '0';
58 } else if (c >= 'A' && c <= 'F') {
59 return c - 'A' + 10;
60 } else {
61 return c - 'a' + 10;
62 }
63}
64
65
66/* exported interface documented in utils/url.h */
67nserror url_unescape(const char *str, size_t length,
68 size_t *length_out, char **result_out)
69{
70 const char *str_end;
71 size_t new_len;
72 char *res_pos;
73 char *result;
74
75 if ((str == NULL) || (result_out == NULL)) {
77 }
78
79 if (length == 0) {
80 length = strlen(str);
81 }
82
83 result = malloc(length + 1);
84 if (result == NULL) {
85 return NSERROR_NOMEM;
86 }
87
88 res_pos = result;
89 str_end = str + length;
90 if (length >= 3) {
91 str_end -= 2;
92 while (str < str_end) {
93 char c = *str;
94 char c1 = *(str + 1);
95 char c2 = *(str + 2);
96
97 if (c == '%' && ascii_is_hex(c1) && ascii_is_hex(c2)) {
98 c = xdigit_to_hex(c1) << 4 | xdigit_to_hex(c2);
99 str += 2;
100 }
101 *res_pos++ = c;
102 str++;
103 }
104 str_end += 2;
105 }
106
107 while (str < str_end) {
108 *res_pos++ = *str++;
109 }
110
111 *res_pos = '\0';
112 new_len = res_pos - result;
113
114 if (new_len != length) {
115 /* Shrink wrap the allocation around the string */
116 char *tmp = realloc(result, new_len + 1);
117 if (tmp != NULL) {
118 result = tmp;
119 }
120 }
121
122 if (length_out != NULL) {
123 *length_out = new_len;
124 }
125 *result_out = result;
126 return NSERROR_OK;
127}
128
129
130/* exported interface documented in utils/url.h */
131nserror url_escape(const char *unescaped, bool sptoplus,
132 const char *escexceptions, char **result)
133{
134 size_t len, new_len;
135 char *escaped, *pos;
136 const char *c;
137
138 if (unescaped == NULL || result == NULL) {
140 }
141
142 len = strlen(unescaped);
143
144 escaped = malloc(len * 3 + 1);
145 if (escaped == NULL) {
146 return NSERROR_NOMEM;
147 }
148 pos = escaped;
149
150 for (c = unescaped; *c != '\0'; c++) {
151 /* Check if we should escape this byte.
152 * '~' is unreserved and should not be percent encoded, if
153 * you believe the spec; however, leaving it unescaped
154 * breaks a bunch of websites, so we escape it anyway. */
155 if (!isascii(*c) ||
156 (strchr(":/?#[]@" /* gen-delims */
157 "!$&'()*+,;=" /* sub-delims */
158 "<>%\"{}|\\^`~" /* others */, *c) &&
159 (!escexceptions ||
160 !strchr(escexceptions, *c))) ||
161 *c <= 0x20 || *c == 0x7f) {
162 if (*c == 0x20 && sptoplus) {
163 *pos++ = '+';
164 } else {
165 *pos++ = '%';
166 *pos++ = "0123456789ABCDEF"[(*c >> 4) & 0xf];
167 *pos++ = "0123456789ABCDEF"[*c & 0xf];
168 }
169 } else {
170 /* unreserved characters: [a-zA-Z0-9-._] */
171 *pos++ = *c;
172 }
173 }
174 *pos = '\0';
175 new_len = pos - escaped;
176
177 if (new_len != len) {
178 /* Shrink wrap the allocation around the escaped string */
179 char *tmp = realloc(escaped, new_len + 1);
180 if (tmp != NULL) {
181 escaped = tmp;
182 }
183 }
184
185 *result = escaped;
186 return NSERROR_OK;
187}
STATIC char result[100]
Definition: arexx.c:77
Helpers for ASCII string handling.
static bool ascii_is_hex(char c)
Test whether a character is hexadecimal (upper or lower case).
Definition: ascii.h:163
nserror
Enumeration of error codes.
Definition: errors.h:29
@ NSERROR_BAD_PARAMETER
Bad Parameter.
Definition: errors.h:48
@ NSERROR_NOMEM
Memory exhaustion.
Definition: errors.h:32
@ NSERROR_OK
No error.
Definition: errors.h:30
Interface to utility string handling.
nserror url_escape(const char *unescaped, bool sptoplus, const char *escexceptions, char **result)
Escape a string suitable for inclusion in an URL.
Definition: url.c:131
nserror url_unescape(const char *str, size_t length, size_t *length_out, char **result_out)
Convert an escaped string to plain.
Definition: url.c:67
static char xdigit_to_hex(char c)
Convert a hex digit to a hex value.
Definition: url.c:54
Interface to URL parsing and joining operations.