45 const uint8_t *end = data + len;
48 const uint8_t c = *data;
51 if (c <= 0x1f && c != 0x1b && c !=
'\r' && c !=
'\f' &&
52 c !=
'\n' && c !=
'\t')
62 lwc_string **effective_type)
94 box_size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
97 if (len < box_size || box_size % 4 != 0)
101 if (data[4] !=
'f' || data[5] !=
't' ||
102 data[6] !=
'y' || data[7] !=
'p')
106 if (data[8] ==
'm' && data[9] ==
'p' && data[10] ==
'4') {
107 *effective_type = lwc_string_ref(corestring_lwc_video_mp4);
112 for (i = 16; i <= box_size - 4; i += 4) {
113 if (data[i] ==
'm' &&
116 *effective_type = lwc_string_ref(corestring_lwc_video_mp4);
125 lwc_string **effective_type)
127#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
128 static const struct map_s ws_exact_match_types[] = {
129 SIG(&corestring_lwc_text_xml,
"<?xml",
false),
130 { NULL, 0,
false, NULL }
133 static const struct map_s ws_inexact_match_types[] = {
134 SIG(&corestring_lwc_text_html,
"<!DOCTYPE HTML",
false),
135 SIG(&corestring_lwc_text_html,
"<HTML",
false),
136 SIG(&corestring_lwc_text_html,
"<HEAD",
false),
137 SIG(&corestring_lwc_text_html,
"<SCRIPT",
false),
138 SIG(&corestring_lwc_text_html,
"<IFRAME",
false),
139 SIG(&corestring_lwc_text_html,
"<H1",
false),
140 SIG(&corestring_lwc_text_html,
"<DIV",
false),
141 SIG(&corestring_lwc_text_html,
"<FONT",
false),
142 SIG(&corestring_lwc_text_html,
"<TABLE",
false),
143 SIG(&corestring_lwc_text_html,
"<A",
false),
144 SIG(&corestring_lwc_text_html,
"<STYLE",
false),
145 SIG(&corestring_lwc_text_html,
"<TITLE",
false),
146 SIG(&corestring_lwc_text_html,
"<B",
false),
147 SIG(&corestring_lwc_text_html,
"<BODY",
false),
148 SIG(&corestring_lwc_text_html,
"<BR",
false),
149 SIG(&corestring_lwc_text_html,
"<P",
false),
150 SIG(&corestring_lwc_text_html,
"<!--",
false),
151 { NULL, 0,
false, NULL }
154 const uint8_t *end = data +
len;
155 const struct map_s *it;
158 while (data != end) {
159 const uint8_t c = *data;
161 if (c !=
'\t' && c !=
'\n' && c !=
'\f' &&
162 c !=
'\r' && c !=
' ')
173 for (it = ws_exact_match_types; it->
sig != NULL; it++) {
174 if (it->
len <=
len && memcmp(data, it->
sig, it->
len) == 0) {
175 *effective_type = lwc_string_ref(*it->
type);
180 for (it = ws_inexact_match_types; it->
sig != NULL; it++) {
182 if (len < it->
len + 1)
185 if (strncasecmp((
const char *) data,
186 (
const char *) it->
sig, it->
len) == 0 &&
187 (data[it->
len] ==
' ' ||
188 data[it->
len] ==
'>')) {
189 *effective_type = lwc_string_ref(*it->
type);
198 lwc_string **effective_type)
200#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
201 static const struct map_s bom_match_types[] = {
202 SIG(&corestring_lwc_text_plain,
"\xfe\xff",
false),
203 SIG(&corestring_lwc_text_plain,
"\xff\xfe",
false),
204 SIG(&corestring_lwc_text_plain,
"\xef\xbb\xbf",
false),
205 { NULL, 0,
false, NULL }
208 const struct map_s *it;
210 for (it = bom_match_types; it->
sig != NULL; it++) {
211 if (it->
len <=
len && memcmp(data, it->
sig, it->
len) == 0) {
212 *effective_type = lwc_string_ref(*it->
type);
221 lwc_string **effective_type)
223#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
224 static const struct map_s riff_match_types[] = {
225 SIG(&corestring_lwc_image_webp,
"WEBPVP",
true),
226 SIG(&corestring_lwc_audio_wave,
"WAVE",
true),
227 { NULL, 0,
false, NULL }
230 const struct map_s *it;
232 for (it = riff_match_types; it->
sig != NULL; it++) {
234 memcmp(data,
"RIFF",
SLEN(
"RIFF")) == 0 &&
235 memcmp(data +
SLEN(
"RIFF????"),
237 *effective_type = lwc_string_ref(*it->
type);
246 bool allow_unsafe, lwc_string **effective_type)
248#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
249 static const struct map_s exact_match_types[] = {
250 SIG(&corestring_lwc_image_gif,
"GIF87a",
true),
251 SIG(&corestring_lwc_image_gif,
"GIF89a",
true),
252 SIG(&corestring_lwc_image_png,
"\x89PNG\r\n\x1a\n",
true),
253 SIG(&corestring_lwc_image_jpeg,
"\xff\xd8\xff",
true),
254 SIG(&corestring_lwc_image_bmp,
"BM",
true),
255 SIG(&corestring_lwc_image_vnd_microsoft_icon,
"\x00\x00\x01\x00",
true),
256 SIG(&corestring_lwc_application_ogg,
"OggS\x00",
true),
257 SIG(&corestring_lwc_video_webm,
"\x1a\x45\xdf\xa3",
true),
258 SIG(&corestring_lwc_application_x_rar_compressed,
"Rar \x1a\x07\x00",
true),
259 SIG(&corestring_lwc_application_zip,
"PK\x03\x04",
true),
260 SIG(&corestring_lwc_application_x_gzip,
"\x1f\x8b\x08",
true),
261 SIG(&corestring_lwc_application_postscript,
"%!PS-Adobe-",
true),
262 SIG(&corestring_lwc_application_pdf,
"%PDF-",
false),
263 SIG(&corestring_lwc_image_jxl,
"\xFF\x0A",
true),
265 (
const uint8_t *)
"\x00\x00\x00\x0CJXL \x0D\x0A\x87\x0A",
268 &corestring_lwc_image_jxl
270 { NULL, 0,
false, NULL }
273 const struct map_s *it;
275 for (it = exact_match_types; it->
sig != NULL; it++) {
276 if (it->
len <=
len && memcmp(data, it->
sig, it->
len) == 0 &&
277 (allow_unsafe || it->
safe)) {
278 *effective_type = lwc_string_ref(*it->
type);
287 bool allow_unsafe, lwc_string **effective_type)
297 if (allow_unsafe ==
false)
315 lwc_string **effective_type)
329 *effective_type = lwc_string_ref(corestring_lwc_text_plain);
333 *effective_type = lwc_string_ref(corestring_lwc_application_octet_stream);
339 size_t len, lwc_string **effective_type)
347 if (
len >= 3 && ((data[0] == 0xfe && data[1] == 0xff) ||
348 (data[0] == 0xff && data[1] == 0xfe) ||
349 (data[0] == 0xef && data[1] == 0xbb &&
352 *effective_type = lwc_string_ref(corestring_lwc_text_plain);
358 *effective_type = lwc_string_ref(corestring_lwc_text_plain);
366 *effective_type = lwc_string_ref(corestring_lwc_application_octet_stream);
372 const uint8_t *data,
size_t len, lwc_string **effective_type)
374#define SIG(t, s) { (const uint8_t *) s, SLEN(s), t }
375 static const struct it_s {
380 SIG(&corestring_lwc_image_gif,
"GIF87a"),
381 SIG(&corestring_lwc_image_gif,
"GIF89a"),
382 SIG(&corestring_lwc_image_png,
"\x89PNG\r\n\x1a\n"),
383 SIG(&corestring_lwc_image_jpeg,
"\xff\xd8\xff"),
384 SIG(&corestring_lwc_image_bmp,
"BM"),
385 SIG(&corestring_lwc_image_vnd_microsoft_icon,
"\x00\x00\x01\x00"),
386 SIG(&corestring_lwc_image_jxl,
"\xFF\x0A"),
388 (
const uint8_t *)
"\x00\x00\x00\x0CJXL \x0D\x0A\x87\x0A",
390 &corestring_lwc_image_jxl
396 const struct it_s *it;
399 lwc_string_unref(official_type);
404 if (it->len <= len && memcmp(data, it->sig, it->len) == 0) {
405 lwc_string_unref(official_type);
406 *effective_type = lwc_string_ref(*it->type);
412 if (
SLEN(
"RIFF????WEBPVP") <= len &&
413 memcmp(data,
"RIFF",
SLEN(
"RIFF")) == 0 &&
414 memcmp(data +
SLEN(
"RIFF????"),
415 "WEBPVP",
SLEN(
"WEBPVP")) == 0 ) {
416 lwc_string_unref(official_type);
417 *effective_type = lwc_string_ref(corestring_lwc_image_webp);
421 *effective_type = official_type;
427 size_t len, lwc_string **effective_type)
429#define RDF_NS "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
430#define RSS_NS "http://purl.org/rss/1.0"
442 } state = BEFORE_BOM;
444 bool rdf =
false, rss =
false;
450 end = data +
min(len, 512);
453 const uint8_t c = *data;
455#define MATCH(s) SLEN(s) <= (size_t) (end - data) && \
456 memcmp(data, s, SLEN(s)) == 0
460 if (3 <= end - data && c == 0xef && data[1] == 0xbb &&
465 state = BEFORE_MARKUP;
468 if (c ==
'\t' || c ==
'\n' || c ==
'\r' || c ==
' ')
473 state = MARKUP_START;
479 state = COMMENT_OR_DOCTYPE;
481 }
else if (c ==
'?') {
489 case COMMENT_OR_DOCTYPE:
490 if (2 <= end - data && c ==
'-' && data[1] ==
'-') {
499 if (3 <= end - data && c ==
'-' && data[1] ==
'-' &&
501 state = BEFORE_MARKUP;
508 state = BEFORE_MARKUP;
512 if (2 <= end - data && c ==
'?' && data[1] ==
'>') {
513 state = BEFORE_MARKUP;
521 lwc_string_ref(corestring_lwc_application_rss_xml);
523 }
else if (
MATCH(
"feed")) {
525 lwc_string_ref(corestring_lwc_application_atom_xml);
527 }
else if (
MATCH(
"rdf:RDF")) {
529 data +=
SLEN(
"rdf:RDF");
544 *effective_type = lwc_string_ref(corestring_lwc_application_rss_xml);
553 *effective_type = lwc_string_ref(corestring_lwc_text_html);
568 lwc_string **effective_type)
570#define S(s) { s, SLEN(s) }
571 static const struct tt_s {
576 S(
"text/plain; charset=ISO-8859-1"),
577 S(
"text/plain; charset=iso-8859-1"),
578 S(
"text/plain; charset=UTF-8"),
583 size_t content_type_header_len;
585 const struct tt_s *tt;
589 if (content_type_header == NULL) {
590 if (sniff_allowed ==
false) {
600 if (sniff_allowed ==
false)
607 if (sniff_allowed ==
false) {
608 *effective_type = lwc_string_ref(ct->
media_type);
614 lwc_string *official_type;
616 if (lwc_string_caseless_isequal(ct->
media_type,
617 corestring_lwc_image_svg,
618 &match) == lwc_error_ok && match) {
619 *effective_type = lwc_string_ref(corestring_lwc_image_svg);
624 official_type = lwc_string_ref(ct->
media_type);
627 data, len, effective_type);
630 content_type_header_len = strlen(content_type_header);
633 for (tt = text_types; tt->data != NULL; tt++) {
634 if (tt->len == content_type_header_len &&
637 content_type_header_len) == 0) {
645 if ((lwc_string_caseless_isequal(ct->
media_type,
646 corestring_lwc_unknown_unknown,
647 &match) == lwc_error_ok && match) ||
649 corestring_lwc_application_unknown,
650 &match) == lwc_error_ok && match) ||
653 &match) == lwc_error_ok && match)) {
663 "+xml",
SLEN(
"+xml")) == 0) {
665 *effective_type = lwc_string_ref(ct->
media_type);
671 if ((lwc_string_caseless_isequal(ct->
media_type,
672 corestring_lwc_text_xml,
673 &match) == lwc_error_ok && match) ||
675 corestring_lwc_application_xml,
676 &match) == lwc_error_ok && match)) {
678 *effective_type = lwc_string_ref(ct->
media_type);
686 lwc_string *official_type = lwc_string_ref(ct->
media_type);
689 data, len, effective_type);
693 if ((lwc_string_caseless_isequal(ct->
media_type,
694 corestring_lwc_text_html,
695 &match) == lwc_error_ok && match)) {
702 *effective_type = lwc_string_ref(ct->
media_type);
static const content_type image_types
nserror http_parse_content_type(const char *header_value, http_content_type **result)
Parse an HTTP Content-Type header value.
void http_content_type_destroy(http_content_type *victim)
Destroy a content type object.
content_type content_factory_type_from_mime_type(lwc_string *mime_type)
Compute the generic content type for a MIME type.
@ CONTENT_IMAGE
All images.
Useful interned string pointers (interface).
nserror
Enumeration of error codes.
@ NSERROR_NOT_FOUND
Requested item not found.
@ NSERROR_NEED_DATA
More data needed.
HTTP header parsing functions.
static nserror mimesniff__match_unknown_exact(const uint8_t *data, size_t len, bool allow_unsafe, lwc_string **effective_type)
static nserror mimesniff__match_mp4(const uint8_t *data, size_t len, lwc_string **effective_type)
static nserror mimesniff__compute_text_or_binary(const uint8_t *data, size_t len, lwc_string **effective_type)
nserror mimesniff_compute_effective_type(const char *content_type_header, const uint8_t *data, size_t len, bool sniff_allowed, bool image_only, lwc_string **effective_type)
Compute the effective MIME type for an object.
static nserror mimesniff__match_unknown(const uint8_t *data, size_t len, bool allow_unsafe, lwc_string **effective_type)
static nserror mimesniff__match_unknown_bom(const uint8_t *data, size_t len, lwc_string **effective_type)
static bool mimesniff__has_binary_octets(const uint8_t *data, size_t len)
static nserror mimesniff__compute_unknown(const uint8_t *data, size_t len, lwc_string **effective_type)
static nserror mimesniff__match_unknown_ws(const uint8_t *data, size_t len, lwc_string **effective_type)
static nserror mimesniff__match_unknown_riff(const uint8_t *data, size_t len, lwc_string **effective_type)
static nserror mimesniff__compute_image(lwc_string *official_type, const uint8_t *data, size_t len, lwc_string **effective_type)
static nserror mimesniff__compute_feed_or_html(const uint8_t *data, size_t len, lwc_string **effective_type)
MIME type sniffer interface.
Interface to utility string handling.
Interface to a number of general purpose functionality.
#define SLEN(x)
Calculate length of constant C string.