mirror of
https://github.com/lexborisov/Modest
synced 2024-11-25 07:09:35 +03:00
465 lines
14 KiB
C
Executable File
465 lines
14 KiB
C
Executable File
/*
|
|
Copyright (C) 2016 Alexander Borisov
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with this library; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
Author: lex.borisov@gmail.com (Alexander Borisov)
|
|
Author: https://github.com/EmielBruijntjes (Emiel Bruijntjes)
|
|
*/
|
|
|
|
#include <setjmp.h>
|
|
#include "myhtml/serialization.h"
|
|
|
|
/**
|
|
* Forward declaration of all the functions that are used inside this module
|
|
*/
|
|
static void myhtml_serialization_append(const char* str, size_t size, mycore_callback_serialize_f callback, void *ptr);
|
|
static void myhtml_serialization_append_attr(const char* str, size_t length, mycore_callback_serialize_f callback, void *ptr);
|
|
static void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, mycore_callback_serialize_f callback, void *ptr);
|
|
static void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr);
|
|
static void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr);
|
|
|
|
/**
|
|
* See the function myhtml_serialization_tree_buffer
|
|
*/
|
|
bool myhtml_serialization(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str)
|
|
{
|
|
return myhtml_serialization_tree_buffer(scope_node, str);
|
|
}
|
|
|
|
/**
|
|
* See the function myhtml_serialization_node_buffer
|
|
*/
|
|
bool myhtml_serialization_node(myhtml_tree_node_t* node, mycore_string_raw_t* str)
|
|
{
|
|
return myhtml_serialization_node_buffer(node, str);
|
|
}
|
|
|
|
/**
|
|
* The serialize function for an entire tree
|
|
* @param tree the tree to be serialized
|
|
* @param scope_node the scope_node
|
|
* @param callback function that will be called for all strings that have to be printed
|
|
* @param ptr user-supplied pointer
|
|
* @return bool
|
|
*/
|
|
bool myhtml_serialization_tree_callback(myhtml_tree_node_t* scope_node, mycore_callback_serialize_f callback, void *ptr)
|
|
{
|
|
myhtml_tree_node_t* node = scope_node;
|
|
|
|
while(node) {
|
|
if(!myhtml_serialization_node_callback(node, callback, ptr)) return false;
|
|
|
|
if(node->child)
|
|
node = node->child;
|
|
else {
|
|
while(node != scope_node && node->next == NULL) {
|
|
myhtml_serialization_node_append_close(node, callback, ptr);
|
|
node = node->parent;
|
|
}
|
|
|
|
if(node == scope_node) {
|
|
if(node != node->tree->document) myhtml_serialization_node_append_close(node, callback, ptr);
|
|
break;
|
|
}
|
|
|
|
myhtml_serialization_node_append_close(node, callback, ptr);
|
|
node = node->next;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* The serialize function for a single node
|
|
* @param tree the tree to be serialized
|
|
* @param node the node that is going to be serialized
|
|
* @param callback function that will be called for all strings that have to be printed
|
|
* @param ptr user-supplied pointer
|
|
* @return bool
|
|
*/
|
|
bool myhtml_serialization_node_callback(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void *ptr)
|
|
{
|
|
switch (node->tag_id) {
|
|
case MyHTML_TAG__TEXT: {
|
|
myhtml_serialization_node_append_text_node(node, callback, ptr);
|
|
break;
|
|
}
|
|
case MyHTML_TAG__COMMENT: {
|
|
callback("<!--", 4, ptr);
|
|
if(node->token && node->token->str.data) callback(node->token->str.data, node->token->str.length, ptr);
|
|
callback("-->", 3, ptr);
|
|
break;
|
|
}
|
|
case MyHTML_TAG__DOCTYPE: {
|
|
callback("<!DOCTYPE", 9, ptr);
|
|
|
|
if(node->token) {
|
|
myhtml_tree_attr_t* attr = node->token->attr_first;
|
|
|
|
if(attr->key.data && attr->key.length) {
|
|
callback(" ", 1, ptr);
|
|
callback(attr->key.data, attr->key.length, ptr);
|
|
}
|
|
}
|
|
callback(">", 1, ptr);
|
|
break;
|
|
}
|
|
default: {
|
|
size_t length;
|
|
const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length);
|
|
|
|
callback("<", 1, ptr);
|
|
callback(tag, length, ptr);
|
|
if(node->token) myhtml_serialization_attributes(node->tree, node->token->attr_first, callback, ptr);
|
|
callback(">", 1, ptr);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Internal function to process attributes
|
|
* @param tree
|
|
* @param attr
|
|
* @param callback
|
|
* @param ptr
|
|
*/
|
|
void myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, mycore_callback_serialize_f callback, void* ptr)
|
|
{
|
|
while(attr) {
|
|
callback(" ", 1, ptr);
|
|
|
|
switch (attr->ns) {
|
|
case MyHTML_NAMESPACE_XML:
|
|
callback("xml:", 4, ptr);
|
|
break;
|
|
case MyHTML_NAMESPACE_XMLNS: {
|
|
/*
|
|
If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns
|
|
The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
|
|
*/
|
|
if(attr->key.data && attr->key.length == 5 && mycore_strcmp(attr->key.data, "xmlns")) {
|
|
callback("xmlns:", 6, ptr);
|
|
}
|
|
|
|
break;
|
|
}
|
|
case MyHTML_NAMESPACE_XLINK: {
|
|
callback("xlink:", 6, ptr);
|
|
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
|
|
|
|
size_t length;
|
|
const char *data = myhtml_attribute_key(attr, &length);
|
|
if(data) callback(data, length, ptr);
|
|
callback("=\"", 2, ptr);
|
|
|
|
data = myhtml_attribute_value(attr, &length);
|
|
if(data) myhtml_serialization_append_attr(data, length, callback, ptr);
|
|
callback("\"", 1, ptr);
|
|
attr = attr->next;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Internal function to process a close tag
|
|
* @param tree
|
|
* @param node
|
|
* @param callback
|
|
* @param ptr
|
|
*/
|
|
void myhtml_serialization_node_append_close(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void* ptr)
|
|
{
|
|
if(node->tag_id != MyHTML_TAG__TEXT &&
|
|
node->tag_id != MyHTML_TAG__COMMENT &&
|
|
node->tag_id != MyHTML_TAG__DOCTYPE)
|
|
{
|
|
size_t length;
|
|
const char *tag = myhtml_tag_name_by_id(node->tree, node->tag_id, &length);
|
|
|
|
callback("</", 2, ptr);
|
|
callback(tag, length, ptr);
|
|
callback(">", 1, ptr);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Internal function to process a text node
|
|
* @param tree
|
|
* @param node
|
|
* @param callback
|
|
* @param ptr
|
|
*/
|
|
void myhtml_serialization_node_append_text_node(myhtml_tree_node_t* node, mycore_callback_serialize_f callback, void* ptr)
|
|
{
|
|
if(node->token == NULL || node->token->str.data == NULL) return;
|
|
|
|
if(node->parent == NULL) {
|
|
myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr);
|
|
return;
|
|
}
|
|
|
|
switch (node->parent->tag_id) {
|
|
case MyHTML_TAG_STYLE:
|
|
case MyHTML_TAG_SCRIPT:
|
|
case MyHTML_TAG_XMP:
|
|
case MyHTML_TAG_IFRAME:
|
|
case MyHTML_TAG_NOEMBED:
|
|
case MyHTML_TAG_NOFRAMES:
|
|
case MyHTML_TAG_PLAINTEXT:
|
|
callback(node->token->str.data, node->token->str.length, ptr);
|
|
break;
|
|
default:
|
|
myhtml_serialization_append(node->token->str.data, node->token->str.length, callback, ptr);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Internal method to process a buffer that could contain to-be-quoted output
|
|
* @param data
|
|
* @param size
|
|
* @param callback
|
|
* @param ptr
|
|
*/
|
|
void myhtml_serialization_append(const char *data, size_t size, mycore_callback_serialize_f callback, void* ptr)
|
|
{
|
|
// number of chars not yet displayed
|
|
size_t notwritten = 0;
|
|
|
|
// iterate over the buffer
|
|
for (size_t i = 0; i < size; ++i) {
|
|
switch ((unsigned char)data[i]) {
|
|
case '&':
|
|
if (notwritten) callback(data + i - notwritten, notwritten, ptr);
|
|
callback("&", 5, ptr);
|
|
notwritten = 0;
|
|
break;
|
|
case '<':
|
|
if (notwritten) callback(data + i - notwritten, notwritten, ptr);
|
|
callback("<", 4, ptr);
|
|
notwritten = 0;
|
|
break;
|
|
case '>':
|
|
if (notwritten) callback(data + i - notwritten, notwritten, ptr);
|
|
callback(">", 4, ptr);
|
|
notwritten = 0;
|
|
break;
|
|
case 0xA0:
|
|
if(i > 0 && (unsigned char)(data[(i - 1)]) == 0xC2) {
|
|
if (notwritten) callback(data + i - notwritten, (notwritten - 1), ptr);
|
|
callback(" ", 6, ptr);
|
|
notwritten = 0;
|
|
}
|
|
else {
|
|
++notwritten;
|
|
}
|
|
|
|
break;
|
|
default:
|
|
++notwritten;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (notwritten) callback(data + size - notwritten, notwritten, ptr);
|
|
}
|
|
|
|
/**
|
|
* Internal method to process an attribute
|
|
* @param data
|
|
* @param size
|
|
* @param callback
|
|
* @param ptr
|
|
*/
|
|
void myhtml_serialization_append_attr(const char* data, size_t size, mycore_callback_serialize_f callback, void* ptr)
|
|
{
|
|
// number of chars not yet displayed
|
|
size_t notwritten = 0;
|
|
|
|
// iterate over the buffer
|
|
for (size_t i = 0; i < size; ++i) {
|
|
switch ((unsigned char)data[i]) {
|
|
case '&':
|
|
if (notwritten) callback(data + i - notwritten, notwritten, ptr);
|
|
callback("&", 5, ptr);
|
|
notwritten = 0;
|
|
break;
|
|
case '"':
|
|
if (notwritten) callback(data + i - notwritten, notwritten, ptr);
|
|
callback(""", 6, ptr);
|
|
notwritten = 0;
|
|
break;
|
|
case 0xA0:
|
|
if(i > 0 && (unsigned char)(data[(i - 1)]) == 0xC2) {
|
|
if (notwritten) callback(data + i - notwritten, (notwritten - 1), ptr);
|
|
callback(" ", 6, ptr);
|
|
notwritten = 0;
|
|
}
|
|
else {
|
|
++notwritten;
|
|
}
|
|
|
|
break;
|
|
default:
|
|
++notwritten;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (notwritten) callback(data + size - notwritten, notwritten, ptr);
|
|
}
|
|
|
|
/**
|
|
* Because we want to leap out of the algorithm if we're halfway through
|
|
* serializing, we use a longjmp() call to jump back to the public APP
|
|
* @var jmp_buf
|
|
*/
|
|
static jmp_buf leap;
|
|
|
|
/**
|
|
* Reallocate the buffer
|
|
* @param str the buffer to reallocate
|
|
* @param size new size
|
|
*/
|
|
void myhtml_serialization_reallocate(mycore_string_raw_t *str, size_t size)
|
|
{
|
|
// construct a buffer
|
|
char *data = (char*)mycore_realloc(str->data, size * sizeof(char));
|
|
|
|
// was it ok?
|
|
if (data == NULL) {
|
|
|
|
// allocation failed, reset the string object
|
|
mycore_free(str->data);
|
|
memset(str, 0, sizeof(mycore_string_raw_t));
|
|
|
|
// leap back to the source of the serialization algorithm
|
|
longjmp(leap, 1);
|
|
}
|
|
else {
|
|
|
|
// reallocation succeeded
|
|
str->data = data;
|
|
str->size = size;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Implementation of the mycore_callback_serialize_f function for internal
|
|
* use that concatenats everything to a string
|
|
* @param data
|
|
* @param size
|
|
*/
|
|
void myhtml_serialization_concatenate(const char* data, size_t length, void *ptr)
|
|
{
|
|
// get the string back
|
|
mycore_string_raw_t* str = (mycore_string_raw_t *)ptr;
|
|
|
|
// do we still have enough size in the output buffer?
|
|
if ((length + str->length) >= str->size) myhtml_serialization_reallocate(str, length + str->length + 4096);
|
|
|
|
// copy data
|
|
strncpy(&str->data[ str->length ], data, length);
|
|
|
|
// update counters
|
|
str->length += length;
|
|
str->data[ str->length ] = '\0';
|
|
}
|
|
|
|
/**
|
|
* Serialize tree to an output string
|
|
* @param tree
|
|
* @param scope_node
|
|
* @param str
|
|
* @return bool
|
|
*/
|
|
bool myhtml_serialization_tree_buffer(myhtml_tree_node_t* scope_node, mycore_string_raw_t* str) {
|
|
|
|
// we need an output variable
|
|
if(str == NULL) return false;
|
|
|
|
// allocate space that is most likely enough for the output
|
|
if(str->data == NULL) {
|
|
str->size = 4098 * 5;
|
|
str->length = 0;
|
|
str->data = (char*)mycore_malloc(str->size * sizeof(char));
|
|
|
|
if(str->data == NULL) {
|
|
str->size = 0;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// if allocation halfway the algorithm fails, we want to leap back
|
|
if (setjmp(leap) == 0)
|
|
{
|
|
// serialize the entire tree
|
|
return myhtml_serialization_tree_callback(scope_node, myhtml_serialization_concatenate, str);
|
|
}
|
|
else
|
|
{
|
|
// the serialization algorithm failed because of a memory-allocation failure
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Serialize node to an output string
|
|
* @param tree
|
|
* @param node
|
|
* @param str
|
|
* @return bool
|
|
*/
|
|
bool myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_string_raw_t* str) {
|
|
|
|
// we need an output variable
|
|
if(str == NULL) return false;
|
|
|
|
// allocate space that is most likely enough for the output
|
|
if(str->data == NULL) {
|
|
str->size = 2048;
|
|
str->length = 0;
|
|
str->data = (char*)mycore_malloc(str->size * sizeof(char));
|
|
|
|
if(str->data == NULL) {
|
|
str->size = 0;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// if allocation halfway the algorithm fails, we want to leap back
|
|
if (setjmp(leap) == 0)
|
|
{
|
|
// pass on
|
|
return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str);
|
|
}
|
|
else
|
|
{
|
|
// the serialization algorithm failed because of a memory-allocation failure
|
|
return false;
|
|
}
|
|
}
|
|
|