kolibrios/programs/network/netsurf/libparserutils/include/parserutils/input/inputstream.h

/*
 * This file is part of LibParserUtils.
 * Licensed under the MIT License,
 *                http://www.opensource.org/licenses/mit-license.php
 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
 */

#ifndef parserutils_input_inputstream_h_
#define parserutils_input_inputstream_h_

#ifdef __cplusplus
extern "C"
{
#endif

#include <stdbool.h>
#ifndef NDEBUG
#include <stdio.h>
#endif
#include <stdlib.h>
#include <inttypes.h>

#include <parserutils/errors.h>
#include <parserutils/functypes.h>
#include <parserutils/types.h>
#include <parserutils/charset/utf8.h>
#include <parserutils/utils/buffer.h>

/**
 * Type of charset detection function
 */
typedef parserutils_error (*parserutils_charset_detect_func)(
		const uint8_t *data, size_t len, 
		uint16_t *mibenum, uint32_t *source);

/**
 * Input stream object
 */
typedef struct parserutils_inputstream 
{
	parserutils_buffer *utf8;	/**< Buffer containing UTF-8 data */

	uint32_t cursor;		/**< Byte offset of current position */

	bool had_eof;			/**< Whether EOF has been reached */
} parserutils_inputstream;

/* Create an input stream */
parserutils_error parserutils_inputstream_create(const char *enc,
		uint32_t encsrc, parserutils_charset_detect_func csdetect,
		parserutils_alloc alloc, void *pw, 
		parserutils_inputstream **stream);
/* Destroy an input stream */
parserutils_error parserutils_inputstream_destroy(
		parserutils_inputstream *stream);

/* Append data to an input stream */
parserutils_error parserutils_inputstream_append(
		parserutils_inputstream *stream,
		const uint8_t *data, size_t len);
/* Insert data into stream at current location */
parserutils_error parserutils_inputstream_insert(
		parserutils_inputstream *stream,
		const uint8_t *data, size_t len);

/* Slow form of css_inputstream_peek. */
parserutils_error parserutils_inputstream_peek_slow(
		parserutils_inputstream *stream, 
		size_t offset, const uint8_t **ptr, size_t *length);

/**
 * Look at the character in the stream that starts at 
 * offset bytes from the cursor
 *
 * \param stream  Stream to look in
 * \param offset  Byte offset of start of character
 * \param ptr     Pointer to location to receive pointer to character data
 * \param length  Pointer to location to receive character length (in bytes)
 * \return PARSERUTILS_OK on success, 
 *                    _NEEDDATA on reaching the end of available input,
 *                    _EOF on reaching the end of all input,
 *                    _BADENCODING if the input cannot be decoded,
 *                    _NOMEM on memory exhaustion,
 *                    _BADPARM if bad parameters are passed.
 *
 * Once the character pointed to by the result of this call has been advanced
 * past (i.e. parserutils_inputstream_advance has caused the stream cursor to 
 * pass over the character), then no guarantee is made as to the validity of 
 * the data pointed to. Thus, any attempt to dereference the pointer after 
 * advancing past the data it points to is a bug.
 */
static inline parserutils_error parserutils_inputstream_peek(
		parserutils_inputstream *stream, size_t offset, 
		const uint8_t **ptr, size_t *length)
{
	parserutils_error error = PARSERUTILS_OK;
	const parserutils_buffer *utf8;
	const uint8_t *utf8_data;
	size_t len, off, utf8_len;

	if (stream == NULL || ptr == NULL || length == NULL)
		return PARSERUTILS_BADPARM;

#ifndef NDEBUG
#ifdef VERBOSE_INPUTSTREAM
	fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",
			stream->utf8->length, stream->cursor, offset);
#endif
#ifdef RANDOMISE_INPUTSTREAM
	parserutils_buffer_randomise(stream->utf8);
#endif
#endif

	utf8 = stream->utf8;
	utf8_data = utf8->data;
	utf8_len = utf8->length;
	off = stream->cursor + offset;

#define IS_ASCII(x) (((x) & 0x80) == 0)

	if (off < utf8_len) {
		if (IS_ASCII(utf8_data[off])) {
			/* Early exit for ASCII case */
			(*length) = 1;
			(*ptr) = (utf8_data + off);
			return PARSERUTILS_OK;
		} else {
			error = parserutils_charset_utf8_char_byte_length(
				utf8_data + off, &len);

			if (error == PARSERUTILS_OK) {
				(*length) = len;
				(*ptr) = (utf8_data + off);
				return PARSERUTILS_OK;
			} else if (error != PARSERUTILS_NEEDDATA) {
				return error;
			}
		}
	}

#undef IS_ASCII

	if (off != utf8_len && error != PARSERUTILS_NEEDDATA)
		abort();

	return parserutils_inputstream_peek_slow(stream, offset, ptr, length);
}

/**
 * Advance the stream's current position
 *
 * \param stream  The stream whose position to advance
 * \param bytes   The number of bytes to advance
 */
static inline void parserutils_inputstream_advance(
		parserutils_inputstream *stream, size_t bytes)
{
	if (stream == NULL)
		return;

#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)
	fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",
			stream->utf8->length, stream->cursor, bytes);
#endif

	if (bytes > stream->utf8->length - stream->cursor)
		abort();

	if (stream->cursor == stream->utf8->length)
		return;

	stream->cursor += bytes;
}

/* Read the document charset */
const char *parserutils_inputstream_read_charset(
		parserutils_inputstream *stream, uint32_t *source);
/* Change the document charset */
parserutils_error parserutils_inputstream_change_charset(
		parserutils_inputstream *stream, 
		const char *enc, uint32_t source);

#ifdef __cplusplus
}
#endif

#endif
Netsurf initial port (still needs native ui and cURL) git-svn-id: svn://kolibrios.org@3584 a494cfbc-eb01-0410-851d-a64ba20cac60 2013-06-01 21:14:14 +04:00			`/*`
			`* This file is part of LibParserUtils.`
			`* Licensed under the MIT License,`
			`* http://www.opensource.org/licenses/mit-license.php`
			`* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>`
			`*/`

			`#ifndef parserutils_input_inputstream_h_`
			`#define parserutils_input_inputstream_h_`

			`#ifdef __cplusplus`
			`extern "C"`
			`{`
			`#endif`

			`#include <stdbool.h>`
			`#ifndef NDEBUG`
			`#include <stdio.h>`
			`#endif`
			`#include <stdlib.h>`
			`#include <inttypes.h>`

			`#include <parserutils/errors.h>`
			`#include <parserutils/functypes.h>`
			`#include <parserutils/types.h>`
			`#include <parserutils/charset/utf8.h>`
			`#include <parserutils/utils/buffer.h>`

			`/**`
			`* Type of charset detection function`
			`*/`
			`typedef parserutils_error (*parserutils_charset_detect_func)(`
			`const uint8_t *data, size_t len,`
			`uint16_t mibenum, uint32_t source);`

			`/**`
			`* Input stream object`
			`*/`
			`typedef struct parserutils_inputstream`
			`{`
			`parserutils_buffer utf8; /< Buffer containing UTF-8 data /`

			`uint32_t cursor; /*< Byte offset of current position /`

			`bool had_eof; /*< Whether EOF has been reached /`
			`} parserutils_inputstream;`

			`/* Create an input stream */`
			`parserutils_error parserutils_inputstream_create(const char *enc,`
			`uint32_t encsrc, parserutils_charset_detect_func csdetect,`
			`parserutils_alloc alloc, void *pw,`
			`parserutils_inputstream **stream);`
			`/* Destroy an input stream */`
			`parserutils_error parserutils_inputstream_destroy(`
			`parserutils_inputstream *stream);`

			`/* Append data to an input stream */`
			`parserutils_error parserutils_inputstream_append(`
			`parserutils_inputstream *stream,`
			`const uint8_t *data, size_t len);`
			`/* Insert data into stream at current location */`
			`parserutils_error parserutils_inputstream_insert(`
			`parserutils_inputstream *stream,`
			`const uint8_t *data, size_t len);`

			`/* Slow form of css_inputstream_peek. */`
			`parserutils_error parserutils_inputstream_peek_slow(`
			`parserutils_inputstream *stream,`
			`size_t offset, const uint8_t *ptr, size_t length);`

			`/**`
			`* Look at the character in the stream that starts at`
			`* offset bytes from the cursor`
			`*`
			`* \param stream Stream to look in`
			`* \param offset Byte offset of start of character`
			`* \param ptr Pointer to location to receive pointer to character data`
			`* \param length Pointer to location to receive character length (in bytes)`
			`* \return PARSERUTILS_OK on success,`
			`* _NEEDDATA on reaching the end of available input,`
			`* _EOF on reaching the end of all input,`
			`* _BADENCODING if the input cannot be decoded,`
			`* _NOMEM on memory exhaustion,`
			`* _BADPARM if bad parameters are passed.`
			`*`
			`* Once the character pointed to by the result of this call has been advanced`
			`* past (i.e. parserutils_inputstream_advance has caused the stream cursor to`
			`* pass over the character), then no guarantee is made as to the validity of`
			`* the data pointed to. Thus, any attempt to dereference the pointer after`
			`* advancing past the data it points to is a bug.`
			`*/`
			`static inline parserutils_error parserutils_inputstream_peek(`
			`parserutils_inputstream *stream, size_t offset,`
			`const uint8_t *ptr, size_t length)`
			`{`
			`parserutils_error error = PARSERUTILS_OK;`
			`const parserutils_buffer *utf8;`
			`const uint8_t *utf8_data;`
			`size_t len, off, utf8_len;`

			`if (stream == NULL \|\| ptr == NULL \|\| length == NULL)`
			`return PARSERUTILS_BADPARM;`

			`#ifndef NDEBUG`
			`#ifdef VERBOSE_INPUTSTREAM`
			`fprintf(stdout, "Peek: len: %zu cur: %u off: %zu\n",`
			`stream->utf8->length, stream->cursor, offset);`
			`#endif`
			`#ifdef RANDOMISE_INPUTSTREAM`
			`parserutils_buffer_randomise(stream->utf8);`
			`#endif`
			`#endif`

			`utf8 = stream->utf8;`
			`utf8_data = utf8->data;`
			`utf8_len = utf8->length;`
			`off = stream->cursor + offset;`

			`#define IS_ASCII(x) (((x) & 0x80) == 0)`

			`if (off < utf8_len) {`
			`if (IS_ASCII(utf8_data[off])) {`
			`/* Early exit for ASCII case */`
			`(*length) = 1;`
			`(*ptr) = (utf8_data + off);`
			`return PARSERUTILS_OK;`
			`} else {`
			`error = parserutils_charset_utf8_char_byte_length(`
			`utf8_data + off, &len);`

			`if (error == PARSERUTILS_OK) {`
			`(*length) = len;`
			`(*ptr) = (utf8_data + off);`
			`return PARSERUTILS_OK;`
			`} else if (error != PARSERUTILS_NEEDDATA) {`
			`return error;`
			`}`
			`}`
			`}`

			`#undef IS_ASCII`

			`if (off != utf8_len && error != PARSERUTILS_NEEDDATA)`
			`abort();`

			`return parserutils_inputstream_peek_slow(stream, offset, ptr, length);`
			`}`

			`/**`
			`* Advance the stream's current position`
			`*`
			`* \param stream The stream whose position to advance`
			`* \param bytes The number of bytes to advance`
			`*/`
			`static inline void parserutils_inputstream_advance(`
			`parserutils_inputstream *stream, size_t bytes)`
			`{`
			`if (stream == NULL)`
			`return;`

			`#if !defined(NDEBUG) && defined(VERBOSE_INPUTSTREAM)`
			`fprintf(stdout, "Advance: len: %zu cur: %u bytes: %zu\n",`
			`stream->utf8->length, stream->cursor, bytes);`
			`#endif`

			`if (bytes > stream->utf8->length - stream->cursor)`
			`abort();`

			`if (stream->cursor == stream->utf8->length)`
			`return;`

			`stream->cursor += bytes;`
			`}`

			`/* Read the document charset */`
			`const char *parserutils_inputstream_read_charset(`
			`parserutils_inputstream stream, uint32_t source);`
			`/* Change the document charset */`
			`parserutils_error parserutils_inputstream_change_charset(`
			`parserutils_inputstream *stream,`
			`const char *enc, uint32_t source);`

			`#ifdef __cplusplus`
			`}`
			`#endif`

			`#endif`