mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-11-21 22:11:22 +03:00
make mimesniffing use core strings
This commit is contained in:
parent
6fb654d356
commit
93e2b4855b
@ -16,8 +16,9 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* MIME type sniffer (implementation)
|
||||
/**
|
||||
* \file
|
||||
* MIME type sniffer implementation
|
||||
*
|
||||
* Spec version: 2011-11-27
|
||||
*/
|
||||
@ -25,11 +26,13 @@
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include "utils/http.h"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/corestrings.h"
|
||||
|
||||
#include "content/content_factory.h"
|
||||
#include "content/llcache.h"
|
||||
#include "content/mimesniff.h"
|
||||
#include "utils/http.h"
|
||||
#include "utils/utils.h"
|
||||
|
||||
struct map_s {
|
||||
const uint8_t *sig;
|
||||
@ -38,103 +41,6 @@ struct map_s {
|
||||
lwc_string **type;
|
||||
};
|
||||
|
||||
static lwc_string *unknown_unknown;
|
||||
static lwc_string *application_unknown;
|
||||
static lwc_string *any;
|
||||
static lwc_string *text_xml;
|
||||
static lwc_string *application_xml;
|
||||
static lwc_string *text_html;
|
||||
static lwc_string *text_plain;
|
||||
static lwc_string *application_octet_stream;
|
||||
static lwc_string *image_gif;
|
||||
static lwc_string *image_png;
|
||||
static lwc_string *image_jpeg;
|
||||
static lwc_string *image_bmp;
|
||||
static lwc_string *image_vnd_microsoft_icon;
|
||||
static lwc_string *image_webp;
|
||||
static lwc_string *application_rss_xml;
|
||||
static lwc_string *application_atom_xml;
|
||||
static lwc_string *audio_wave;
|
||||
static lwc_string *application_ogg;
|
||||
static lwc_string *video_webm;
|
||||
static lwc_string *application_x_rar_compressed;
|
||||
static lwc_string *application_zip;
|
||||
static lwc_string *application_x_gzip;
|
||||
static lwc_string *application_postscript;
|
||||
static lwc_string *application_pdf;
|
||||
static lwc_string *video_mp4;
|
||||
static lwc_string *image_svg;
|
||||
|
||||
nserror mimesniff_init(void)
|
||||
{
|
||||
lwc_error lerror;
|
||||
|
||||
#define SINIT(v, s) \
|
||||
lerror = lwc_intern_string(s, SLEN(s), &v); \
|
||||
if (lerror != lwc_error_ok) \
|
||||
return NSERROR_NOMEM
|
||||
|
||||
SINIT(unknown_unknown, "unknown/unknown");
|
||||
SINIT(application_unknown, "application/unknown");
|
||||
SINIT(any, "*/*");
|
||||
SINIT(text_xml, "text/xml");
|
||||
SINIT(application_xml, "application/xml");
|
||||
SINIT(text_html, "text/html");
|
||||
SINIT(text_plain, "text/plain");
|
||||
SINIT(application_octet_stream, "application/octet-stream");
|
||||
SINIT(image_gif, "image/gif");
|
||||
SINIT(image_png, "image/png");
|
||||
SINIT(image_jpeg, "image/jpeg");
|
||||
SINIT(image_bmp, "image/bmp");
|
||||
SINIT(image_vnd_microsoft_icon, "image/vnd.microsoft.icon");
|
||||
SINIT(image_webp, "image/webp");
|
||||
SINIT(application_rss_xml, "application/rss+xml");
|
||||
SINIT(application_atom_xml, "application/atom+xml");
|
||||
SINIT(audio_wave, "audio/wave");
|
||||
SINIT(application_ogg, "application/ogg");
|
||||
SINIT(video_webm, "video/webm");
|
||||
SINIT(application_x_rar_compressed, "application/x-rar-compressed");
|
||||
SINIT(application_zip, "application/zip");
|
||||
SINIT(application_x_gzip, "application/x-gzip");
|
||||
SINIT(application_postscript, "application/postscript");
|
||||
SINIT(application_pdf, "application/pdf");
|
||||
SINIT(video_mp4, "video/mp4");
|
||||
SINIT(image_svg, "image/svg+xml");
|
||||
#undef SINIT
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
void mimesniff_fini(void)
|
||||
{
|
||||
lwc_string_unref(image_svg);
|
||||
lwc_string_unref(video_mp4);
|
||||
lwc_string_unref(application_pdf);
|
||||
lwc_string_unref(application_postscript);
|
||||
lwc_string_unref(application_x_gzip);
|
||||
lwc_string_unref(application_zip);
|
||||
lwc_string_unref(application_x_rar_compressed);
|
||||
lwc_string_unref(video_webm);
|
||||
lwc_string_unref(application_ogg);
|
||||
lwc_string_unref(audio_wave);
|
||||
lwc_string_unref(application_atom_xml);
|
||||
lwc_string_unref(application_rss_xml);
|
||||
lwc_string_unref(image_webp);
|
||||
lwc_string_unref(image_vnd_microsoft_icon);
|
||||
lwc_string_unref(image_bmp);
|
||||
lwc_string_unref(image_jpeg);
|
||||
lwc_string_unref(image_png);
|
||||
lwc_string_unref(image_gif);
|
||||
lwc_string_unref(application_octet_stream);
|
||||
lwc_string_unref(text_plain);
|
||||
lwc_string_unref(text_html);
|
||||
lwc_string_unref(application_xml);
|
||||
lwc_string_unref(text_xml);
|
||||
lwc_string_unref(any);
|
||||
lwc_string_unref(application_unknown);
|
||||
lwc_string_unref(unknown_unknown);
|
||||
}
|
||||
|
||||
static bool mimesniff__has_binary_octets(const uint8_t *data, size_t len)
|
||||
{
|
||||
const uint8_t *end = data + len;
|
||||
@ -143,7 +49,7 @@ static bool mimesniff__has_binary_octets(const uint8_t *data, size_t len)
|
||||
const uint8_t c = *data;
|
||||
|
||||
/* Binary iff in C0 and not ESC, CR, FF, LF, HT */
|
||||
if (c <= 0x1f && c != 0x1b && c != '\r' && c != '\f' &&
|
||||
if (c <= 0x1f && c != 0x1b && c != '\r' && c != '\f' &&
|
||||
c != '\n' && c != '\t')
|
||||
break;
|
||||
|
||||
@ -168,19 +74,19 @@ static nserror mimesniff__match_mp4(const uint8_t *data, size_t len,
|
||||
* uint32_t compatible_brands[];
|
||||
* }
|
||||
*
|
||||
* Note 1: A size of 0 implies that the length of the box is designated
|
||||
* by the remaining input data (and thus may only occur in the last
|
||||
* box in the input). We'll reject this below, as it's pointless
|
||||
* Note 1: A size of 0 implies that the length of the box is designated
|
||||
* by the remaining input data (and thus may only occur in the last
|
||||
* box in the input). We'll reject this below, as it's pointless
|
||||
* sniffing input that contains no boxes other than 'ftyp'.
|
||||
*
|
||||
* Note 2: A size of 1 implies an additional uint64_t field after
|
||||
* the type which contains the extended box size. We'll reject this,
|
||||
* too, as it implies a minimum of (2^32 - 24) / 4 compatible brands,
|
||||
* Note 2: A size of 1 implies an additional uint64_t field after
|
||||
* the type which contains the extended box size. We'll reject this,
|
||||
* too, as it implies a minimum of (2^32 - 24) / 4 compatible brands,
|
||||
* which is decidely unlikely.
|
||||
*/
|
||||
|
||||
/* 12 reflects the minimum number of octets needed to sniff useful
|
||||
* information out of an 'ftyp' box (i.e. the size, type,
|
||||
/* 12 reflects the minimum number of octets needed to sniff useful
|
||||
* information out of an 'ftyp' box (i.e. the size, type,
|
||||
* and major_brand words). */
|
||||
if (len < 12)
|
||||
return NSERROR_NOT_FOUND;
|
||||
@ -193,22 +99,22 @@ static nserror mimesniff__match_mp4(const uint8_t *data, size_t len,
|
||||
return NSERROR_NOT_FOUND;
|
||||
|
||||
/* Ensure this is an 'ftyp' box */
|
||||
if (data[4] != 'f' || data[5] != 't' ||
|
||||
if (data[4] != 'f' || data[5] != 't' ||
|
||||
data[6] != 'y' || data[7] != 'p')
|
||||
return NSERROR_NOT_FOUND;
|
||||
|
||||
/* Check if major brand begins with 'mp4' */
|
||||
if (data[8] == 'm' && data[9] == 'p' && data[10] == '4') {
|
||||
*effective_type = lwc_string_ref(video_mp4);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_video_mp4);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
/* Search each compatible brand in the box for "mp4" */
|
||||
for (i = 16; i <= box_size - 4; i += 4) {
|
||||
if (data[i] == 'm' &&
|
||||
data[i+1] == 'p' &&
|
||||
if (data[i] == 'm' &&
|
||||
data[i+1] == 'p' &&
|
||||
data[i+2] == '4') {
|
||||
*effective_type = lwc_string_ref(video_mp4);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_video_mp4);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
}
|
||||
@ -221,28 +127,28 @@ static nserror mimesniff__match_unknown_ws(const uint8_t *data, size_t len,
|
||||
{
|
||||
#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
|
||||
static const struct map_s ws_exact_match_types[] = {
|
||||
SIG(&text_xml, "<?xml", false),
|
||||
SIG(&corestring_lwc_text_xml, "<?xml", false),
|
||||
{ NULL, 0, false, NULL }
|
||||
};
|
||||
|
||||
static const struct map_s ws_inexact_match_types[] = {
|
||||
SIG(&text_html, "<!DOCTYPE HTML", false),
|
||||
SIG(&text_html, "<HTML", false),
|
||||
SIG(&text_html, "<HEAD", false),
|
||||
SIG(&text_html, "<SCRIPT", false),
|
||||
SIG(&text_html, "<IFRAME", false),
|
||||
SIG(&text_html, "<H1", false),
|
||||
SIG(&text_html, "<DIV", false),
|
||||
SIG(&text_html, "<FONT", false),
|
||||
SIG(&text_html, "<TABLE", false),
|
||||
SIG(&text_html, "<A", false),
|
||||
SIG(&text_html, "<STYLE", false),
|
||||
SIG(&text_html, "<TITLE", false),
|
||||
SIG(&text_html, "<B", false),
|
||||
SIG(&text_html, "<BODY", false),
|
||||
SIG(&text_html, "<BR", false),
|
||||
SIG(&text_html, "<P", false),
|
||||
SIG(&text_html, "<!--", false),
|
||||
SIG(&corestring_lwc_text_html, "<!DOCTYPE HTML", false),
|
||||
SIG(&corestring_lwc_text_html, "<HTML", false),
|
||||
SIG(&corestring_lwc_text_html, "<HEAD", false),
|
||||
SIG(&corestring_lwc_text_html, "<SCRIPT", false),
|
||||
SIG(&corestring_lwc_text_html, "<IFRAME", false),
|
||||
SIG(&corestring_lwc_text_html, "<H1", false),
|
||||
SIG(&corestring_lwc_text_html, "<DIV", false),
|
||||
SIG(&corestring_lwc_text_html, "<FONT", false),
|
||||
SIG(&corestring_lwc_text_html, "<TABLE", false),
|
||||
SIG(&corestring_lwc_text_html, "<A", false),
|
||||
SIG(&corestring_lwc_text_html, "<STYLE", false),
|
||||
SIG(&corestring_lwc_text_html, "<TITLE", false),
|
||||
SIG(&corestring_lwc_text_html, "<B", false),
|
||||
SIG(&corestring_lwc_text_html, "<BODY", false),
|
||||
SIG(&corestring_lwc_text_html, "<BR", false),
|
||||
SIG(&corestring_lwc_text_html, "<P", false),
|
||||
SIG(&corestring_lwc_text_html, "<!--", false),
|
||||
{ NULL, 0, false, NULL }
|
||||
};
|
||||
#undef SIG
|
||||
@ -253,7 +159,7 @@ static nserror mimesniff__match_unknown_ws(const uint8_t *data, size_t len,
|
||||
while (data != end) {
|
||||
const uint8_t c = *data;
|
||||
|
||||
if (c != '\t' && c != '\n' && c != '\f' &&
|
||||
if (c != '\t' && c != '\n' && c != '\f' &&
|
||||
c != '\r' && c != ' ')
|
||||
break;
|
||||
|
||||
@ -277,9 +183,9 @@ static nserror mimesniff__match_unknown_ws(const uint8_t *data, size_t len,
|
||||
if (len < it->len + 1)
|
||||
continue;
|
||||
|
||||
if (strncasecmp((const char *) data,
|
||||
(const char *) it->sig, it->len) == 0 &&
|
||||
(data[it->len] == ' ' ||
|
||||
if (strncasecmp((const char *) data,
|
||||
(const char *) it->sig, it->len) == 0 &&
|
||||
(data[it->len] == ' ' ||
|
||||
data[it->len] == '>')) {
|
||||
*effective_type = lwc_string_ref(*it->type);
|
||||
return NSERROR_OK;
|
||||
@ -294,9 +200,9 @@ static nserror mimesniff__match_unknown_bom(const uint8_t *data, size_t len,
|
||||
{
|
||||
#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
|
||||
static const struct map_s bom_match_types[] = {
|
||||
SIG(&text_plain, "\xfe\xff", false),
|
||||
SIG(&text_plain, "\xff\xfe", false),
|
||||
SIG(&text_plain, "\xef\xbb\xbf", false),
|
||||
SIG(&corestring_lwc_text_plain, "\xfe\xff", false),
|
||||
SIG(&corestring_lwc_text_plain, "\xff\xfe", false),
|
||||
SIG(&corestring_lwc_text_plain, "\xef\xbb\xbf", false),
|
||||
{ NULL, 0, false, NULL }
|
||||
};
|
||||
#undef SIG
|
||||
@ -317,17 +223,17 @@ static nserror mimesniff__match_unknown_riff(const uint8_t *data, size_t len,
|
||||
{
|
||||
#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
|
||||
static const struct map_s riff_match_types[] = {
|
||||
SIG(&image_webp, "WEBPVP", true),
|
||||
SIG(&audio_wave, "WAVE", true),
|
||||
SIG(&corestring_lwc_image_webp, "WEBPVP", true),
|
||||
SIG(&corestring_lwc_audio_wave, "WAVE", true),
|
||||
{ NULL, 0, false, NULL }
|
||||
};
|
||||
#undef SIG
|
||||
const struct map_s *it;
|
||||
|
||||
for (it = riff_match_types; it->sig != NULL; it++) {
|
||||
if (it->len + SLEN("RIFF????") <= len &&
|
||||
if (it->len + SLEN("RIFF????") <= len &&
|
||||
memcmp(data, "RIFF", SLEN("RIFF")) == 0 &&
|
||||
memcmp(data + SLEN("RIFF????"),
|
||||
memcmp(data + SLEN("RIFF????"),
|
||||
it->sig, it->len) == 0) {
|
||||
*effective_type = lwc_string_ref(*it->type);
|
||||
return NSERROR_OK;
|
||||
@ -342,19 +248,19 @@ static nserror mimesniff__match_unknown_exact(const uint8_t *data, size_t len,
|
||||
{
|
||||
#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
|
||||
static const struct map_s exact_match_types[] = {
|
||||
SIG(&image_gif, "GIF87a", true),
|
||||
SIG(&image_gif, "GIF89a", true),
|
||||
SIG(&image_png, "\x89PNG\r\n\x1a\n", true),
|
||||
SIG(&image_jpeg, "\xff\xd8\xff", true),
|
||||
SIG(&image_bmp, "BM", true),
|
||||
SIG(&image_vnd_microsoft_icon, "\x00\x00\x01\x00", true),
|
||||
SIG(&application_ogg, "OggS\x00", true),
|
||||
SIG(&video_webm, "\x1a\x45\xdf\xa3", true),
|
||||
SIG(&application_x_rar_compressed, "Rar \x1a\x07\x00", true),
|
||||
SIG(&application_zip, "PK\x03\x04", true),
|
||||
SIG(&application_x_gzip, "\x1f\x8b\x08", true),
|
||||
SIG(&application_postscript, "%!PS-Adobe-", true),
|
||||
SIG(&application_pdf, "%PDF-", false),
|
||||
SIG(&corestring_lwc_image_gif, "GIF87a", true),
|
||||
SIG(&corestring_lwc_image_gif, "GIF89a", true),
|
||||
SIG(&corestring_lwc_image_png, "\x89PNG\r\n\x1a\n", true),
|
||||
SIG(&corestring_lwc_image_jpeg, "\xff\xd8\xff", true),
|
||||
SIG(&corestring_lwc_image_bmp, "BM", true),
|
||||
SIG(&corestring_lwc_image_vnd_microsoft_icon, "\x00\x00\x01\x00", true),
|
||||
SIG(&corestring_lwc_application_ogg, "OggS\x00", true),
|
||||
SIG(&corestring_lwc_video_webm, "\x1a\x45\xdf\xa3", true),
|
||||
SIG(&corestring_lwc_application_x_rar_compressed, "Rar \x1a\x07\x00", true),
|
||||
SIG(&corestring_lwc_application_zip, "PK\x03\x04", true),
|
||||
SIG(&corestring_lwc_application_x_gzip, "\x1f\x8b\x08", true),
|
||||
SIG(&corestring_lwc_application_postscript, "%!PS-Adobe-",true),
|
||||
SIG(&corestring_lwc_application_pdf, "%PDF-", false),
|
||||
{ NULL, 0, false, NULL }
|
||||
};
|
||||
#undef SIG
|
||||
@ -374,11 +280,11 @@ static nserror mimesniff__match_unknown_exact(const uint8_t *data, size_t len,
|
||||
static nserror mimesniff__match_unknown(const uint8_t *data, size_t len,
|
||||
bool allow_unsafe, lwc_string **effective_type)
|
||||
{
|
||||
if (mimesniff__match_unknown_exact(data, len, allow_unsafe,
|
||||
if (mimesniff__match_unknown_exact(data, len, allow_unsafe,
|
||||
effective_type) == NSERROR_OK)
|
||||
return NSERROR_OK;
|
||||
|
||||
if (mimesniff__match_unknown_riff(data, len,
|
||||
if (mimesniff__match_unknown_riff(data, len,
|
||||
effective_type) == NSERROR_OK)
|
||||
return NSERROR_OK;
|
||||
|
||||
@ -407,49 +313,51 @@ static nserror mimesniff__compute_unknown(const uint8_t *data, size_t len,
|
||||
|
||||
len = min(len, 512);
|
||||
|
||||
if (mimesniff__match_unknown(data, len, true,
|
||||
effective_type) == NSERROR_OK)
|
||||
return NSERROR_OK;
|
||||
|
||||
if (mimesniff__has_binary_octets(data, len) == false) {
|
||||
/* No binary octets => text/plain */
|
||||
*effective_type = lwc_string_ref(text_plain);
|
||||
if (mimesniff__match_unknown(data, len, true,
|
||||
effective_type) == NSERROR_OK) {
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
*effective_type = lwc_string_ref(application_octet_stream);
|
||||
if (mimesniff__has_binary_octets(data, len) == false) {
|
||||
/* No binary octets => text/plain */
|
||||
*effective_type = lwc_string_ref(corestring_lwc_text_plain);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
*effective_type = lwc_string_ref(corestring_lwc_application_octet_stream);
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
static nserror mimesniff__compute_text_or_binary(const uint8_t *data,
|
||||
static nserror mimesniff__compute_text_or_binary(const uint8_t *data,
|
||||
size_t len, lwc_string **effective_type)
|
||||
{
|
||||
if (data == NULL)
|
||||
if (data == NULL) {
|
||||
return NSERROR_NEED_DATA;
|
||||
}
|
||||
|
||||
len = min(len, 512);
|
||||
|
||||
if (len >= 3 && ((data[0] == 0xfe && data[1] == 0xff) ||
|
||||
(data[0] == 0xff && data[1] == 0xfe) ||
|
||||
(data[0] == 0xef && data[1] == 0xbb &&
|
||||
(data[0] == 0xef && data[1] == 0xbb &&
|
||||
data[2] == 0xbf))) {
|
||||
/* Found a BOM => text/plain */
|
||||
*effective_type = lwc_string_ref(text_plain);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_text_plain);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
if (mimesniff__has_binary_octets(data, len) == false) {
|
||||
/* No binary octets => text/plain */
|
||||
*effective_type = lwc_string_ref(text_plain);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_text_plain);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
if (mimesniff__match_unknown(data, len, false,
|
||||
if (mimesniff__match_unknown(data, len, false,
|
||||
effective_type) == NSERROR_OK)
|
||||
return NSERROR_OK;
|
||||
|
||||
*effective_type = lwc_string_ref(application_octet_stream);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_application_octet_stream);
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
@ -463,12 +371,12 @@ static nserror mimesniff__compute_image(lwc_string *official_type,
|
||||
size_t len;
|
||||
lwc_string **type;
|
||||
} image_types[] = {
|
||||
SIG(&image_gif, "GIF87a"),
|
||||
SIG(&image_gif, "GIF89a"),
|
||||
SIG(&image_png, "\x89PNG\r\n\x1a\n"),
|
||||
SIG(&image_jpeg, "\xff\xd8\xff"),
|
||||
SIG(&image_bmp, "BM"),
|
||||
SIG(&image_vnd_microsoft_icon, "\x00\x00\x01\x00"),
|
||||
SIG(&corestring_lwc_image_gif, "GIF87a"),
|
||||
SIG(&corestring_lwc_image_gif, "GIF89a"),
|
||||
SIG(&corestring_lwc_image_png, "\x89PNG\r\n\x1a\n"),
|
||||
SIG(&corestring_lwc_image_jpeg, "\xff\xd8\xff"),
|
||||
SIG(&corestring_lwc_image_bmp, "BM"),
|
||||
SIG(&corestring_lwc_image_vnd_microsoft_icon, "\x00\x00\x01\x00"),
|
||||
{ NULL, 0, NULL }
|
||||
};
|
||||
#undef SIG
|
||||
@ -489,12 +397,12 @@ static nserror mimesniff__compute_image(lwc_string *official_type,
|
||||
}
|
||||
|
||||
/* WebP has a signature that doesn't fit into the above table */
|
||||
if (SLEN("RIFF????WEBPVP") <= len &&
|
||||
memcmp(data, "RIFF", SLEN("RIFF")) == 0 &&
|
||||
memcmp(data + SLEN("RIFF????"),
|
||||
if (SLEN("RIFF????WEBPVP") <= len &&
|
||||
memcmp(data, "RIFF", SLEN("RIFF")) == 0 &&
|
||||
memcmp(data + SLEN("RIFF????"),
|
||||
"WEBPVP", SLEN("WEBPVP")) == 0 ) {
|
||||
lwc_string_unref(official_type);
|
||||
*effective_type = lwc_string_ref(image_webp);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_image_webp);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
@ -537,7 +445,7 @@ static nserror mimesniff__compute_feed_or_html(const uint8_t *data,
|
||||
|
||||
switch (state) {
|
||||
case BEFORE_BOM:
|
||||
if (3 <= end - data && c == 0xef && data[1] == 0xbb &&
|
||||
if (3 <= end - data && c == 0xef && data[1] == 0xbb &&
|
||||
data[2] == 0xbf) {
|
||||
data += 3;
|
||||
}
|
||||
@ -597,12 +505,12 @@ static nserror mimesniff__compute_feed_or_html(const uint8_t *data,
|
||||
break;
|
||||
case IN_TAG:
|
||||
if (MATCH("rss")) {
|
||||
*effective_type =
|
||||
lwc_string_ref(application_rss_xml);
|
||||
*effective_type =
|
||||
lwc_string_ref(corestring_lwc_application_rss_xml);
|
||||
return NSERROR_OK;
|
||||
} else if (MATCH("feed")) {
|
||||
*effective_type =
|
||||
lwc_string_ref(application_atom_xml);
|
||||
*effective_type =
|
||||
lwc_string_ref(corestring_lwc_application_atom_xml);
|
||||
return NSERROR_OK;
|
||||
} else if (MATCH("rdf:RDF")) {
|
||||
state = IN_RDF;
|
||||
@ -621,8 +529,7 @@ static nserror mimesniff__compute_feed_or_html(const uint8_t *data,
|
||||
data++;
|
||||
|
||||
if (rdf && rss) {
|
||||
*effective_type =
|
||||
lwc_string_ref(application_rss_xml);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_application_rss_xml);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
@ -631,7 +538,7 @@ static nserror mimesniff__compute_feed_or_html(const uint8_t *data,
|
||||
#undef MATCH
|
||||
}
|
||||
|
||||
*effective_type = lwc_string_ref(text_html);
|
||||
*effective_type = lwc_string_ref(corestring_lwc_text_html);
|
||||
|
||||
return NSERROR_OK;
|
||||
|
||||
@ -664,7 +571,7 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
bool match;
|
||||
nserror error;
|
||||
|
||||
content_type_header =
|
||||
content_type_header =
|
||||
llcache_handle_get_header(handle, "Content-Type");
|
||||
if (content_type_header == NULL) {
|
||||
if (sniff_allowed == false)
|
||||
@ -692,9 +599,10 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
if (image_only) {
|
||||
lwc_string *official_type;
|
||||
|
||||
if (lwc_string_caseless_isequal(ct->media_type, image_svg,
|
||||
&match) == lwc_error_ok && match) {
|
||||
*effective_type = lwc_string_ref(image_svg);
|
||||
if (lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_image_svg,
|
||||
&match) == lwc_error_ok && match) {
|
||||
*effective_type = lwc_string_ref(corestring_lwc_image_svg);
|
||||
http_content_type_destroy(ct);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
@ -710,8 +618,9 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
/* Look for text types */
|
||||
for (tt = text_types; tt->data != NULL; tt++) {
|
||||
if (tt->len == content_type_header_len &&
|
||||
memcmp(tt->data, content_type_header,
|
||||
content_type_header_len) == 0) {
|
||||
memcmp(tt->data,
|
||||
content_type_header,
|
||||
content_type_header_len) == 0) {
|
||||
http_content_type_destroy(ct);
|
||||
return mimesniff__compute_text_or_binary(data, len,
|
||||
effective_type);
|
||||
@ -719,22 +628,24 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
}
|
||||
|
||||
/* unknown/unknown, application/unknown, * / * */
|
||||
if ((lwc_string_caseless_isequal(ct->media_type, unknown_unknown,
|
||||
&match) == lwc_error_ok && match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type,
|
||||
application_unknown, &match) == lwc_error_ok &&
|
||||
match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type, any,
|
||||
&match) == lwc_error_ok && match)) {
|
||||
if ((lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_unknown_unknown,
|
||||
&match) == lwc_error_ok && match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_application_unknown,
|
||||
&match) == lwc_error_ok && match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_any,
|
||||
&match) == lwc_error_ok && match)) {
|
||||
http_content_type_destroy(ct);
|
||||
return mimesniff__compute_unknown(data, len, effective_type);
|
||||
}
|
||||
|
||||
/* +xml */
|
||||
if (lwc_string_length(ct->media_type) > SLEN("+xml") &&
|
||||
strncasecmp(lwc_string_data(ct->media_type) +
|
||||
lwc_string_length(ct->media_type) -
|
||||
SLEN("+xml"),
|
||||
strncasecmp(lwc_string_data(ct->media_type) +
|
||||
lwc_string_length(ct->media_type) -
|
||||
SLEN("+xml"),
|
||||
"+xml", SLEN("+xml")) == 0) {
|
||||
/* Use official type */
|
||||
*effective_type = lwc_string_ref(ct->media_type);
|
||||
@ -743,19 +654,20 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
}
|
||||
|
||||
/* text/xml, application/xml */
|
||||
if ((lwc_string_caseless_isequal(ct->media_type, text_xml,
|
||||
&match) == lwc_error_ok && match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type,
|
||||
application_xml, &match) == lwc_error_ok &&
|
||||
match)) {
|
||||
if ((lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_text_xml,
|
||||
&match) == lwc_error_ok && match) ||
|
||||
(lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_application_xml,
|
||||
&match) == lwc_error_ok && match)) {
|
||||
/* Use official type */
|
||||
*effective_type = lwc_string_ref(ct->media_type);
|
||||
http_content_type_destroy(ct);
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
||||
/* Image types */
|
||||
if (content_factory_type_from_mime_type(ct->media_type) ==
|
||||
if (content_factory_type_from_mime_type(ct->media_type) ==
|
||||
CONTENT_IMAGE) {
|
||||
lwc_string *official_type = lwc_string_ref(ct->media_type);
|
||||
http_content_type_destroy(ct);
|
||||
@ -764,11 +676,12 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
}
|
||||
|
||||
/* text/html */
|
||||
if ((lwc_string_caseless_isequal(ct->media_type, text_html,
|
||||
&match) == lwc_error_ok && match)) {
|
||||
if ((lwc_string_caseless_isequal(ct->media_type,
|
||||
corestring_lwc_text_html,
|
||||
&match) == lwc_error_ok && match)) {
|
||||
http_content_type_destroy(ct);
|
||||
return mimesniff__compute_feed_or_html(data, len,
|
||||
effective_type);
|
||||
effective_type);
|
||||
}
|
||||
|
||||
/* Use official type */
|
||||
@ -778,4 +691,3 @@ nserror mimesniff_compute_effective_type(llcache_handle *handle,
|
||||
|
||||
return NSERROR_OK;
|
||||
}
|
||||
|
||||
|
@ -16,8 +16,9 @@
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* MIME type sniffer (interface)
|
||||
/**
|
||||
* \file
|
||||
* MIME type sniffer interface
|
||||
*/
|
||||
|
||||
#ifndef NETSURF_CONTENT_MIMESNIFF_H_
|
||||
@ -31,8 +32,10 @@
|
||||
struct llcache_handle;
|
||||
|
||||
/**
|
||||
* Compute the effective MIME type for an object using the sniffing
|
||||
* algorithm described in http://mimesniff.spec.whatwg.org/
|
||||
* Compute the effective MIME type for an object
|
||||
*
|
||||
* The implementation uses the sniffing algorithm described in
|
||||
* http://mimesniff.spec.whatwg.org/
|
||||
*
|
||||
* \param handle Source data handle to sniff
|
||||
* \param data First data chunk, or NULL
|
||||
@ -49,7 +52,4 @@ nserror mimesniff_compute_effective_type(struct llcache_handle *handle,
|
||||
const uint8_t *data, size_t len, bool sniff_allowed,
|
||||
bool image_only, lwc_string **effective_type);
|
||||
|
||||
nserror mimesniff_init(void);
|
||||
void mimesniff_fini(void);
|
||||
|
||||
#endif
|
||||
|
@ -212,10 +212,6 @@ nserror netsurf_init(const char *store_path)
|
||||
if (ret != NSERROR_OK)
|
||||
return ret;
|
||||
|
||||
ret = mimesniff_init();
|
||||
if (ret != NSERROR_OK)
|
||||
return ret;
|
||||
|
||||
setlocale(LC_ALL, "");
|
||||
|
||||
/* initialise the fetchers */
|
||||
@ -262,8 +258,6 @@ void netsurf_exit(void)
|
||||
LOG("Closing fetches");
|
||||
fetcher_quit();
|
||||
|
||||
mimesniff_fini();
|
||||
|
||||
/* dump any remaining cache entries */
|
||||
image_cache_fini();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user