Sync with current myhtml: 8dfd72f69f

This commit is contained in:
lexborisov 2016-09-16 23:48:41 +04:00
parent 40cf5dc937
commit 581c8587ca
10 changed files with 740 additions and 2 deletions

View File

@ -532,6 +532,13 @@ struct myhtml_string {
}
typedef myhtml_string_t;
struct myhtml_string_raw {
char* data;
size_t size;
size_t length;
}
typedef myhtml_string_raw_t;
/**
* @struct myhtml_collection_t
*/
@ -2362,6 +2369,48 @@ myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id,
void
myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data);
/***********************************************************************************
*
* MyHTML_STRING_RAW
*
* All work with myhtml_string_raw_t object occurs through
* myhtml_malloc (standart malloc), myhtml_realloc (standart realloc),
* myhtml_free (standart free).
*
* You are free to change them on without fear that something will happen
* You can call free for str_raw.data, or change str_raw.length = 0
*
***********************************************************************************/
/**
* Clean myhtml_string_raw_t object. In reality, data length set to 0
*
* @param[in] myhtml_string_raw_t*
*/
void
myhtml_string_raw_clean(myhtml_string_raw_t* str_raw);
/**
* Full clean myhtml_string_raw_t object.
* Equivalently: memset(str_raw, 0, sizeof(myhtml_string_raw_t))
*
* @param[in] myhtml_string_raw_t*
*/
void
myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw);
/**
* Free resources for myhtml_string_raw_t object
*
* @param[in] myhtml_string_raw_t*
* @param[in] call free function for current object or not
*
* @return NULL if destroy_obj set true, otherwise a current myhtml_string_raw_t object
*/
myhtml_string_raw_t*
myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj);
/***********************************************************************************
*
* MyHTML_INCOMING
@ -2638,6 +2687,36 @@ myhtml_strcasecmp(const char* str1, const char* str2);
size_t
myhtml_strncasecmp(const char* str1, const char* str2, size_t size);
/***********************************************************************************
*
* MyHTML_SERIALIZATION
*
***********************************************************************************/
/**
* Tree fragment serialization
*
* @param[in] myhtml_tree_t*
* @param[in] scope node, myhtml_tree_node_t*
* @param[in] myhtml_string_raw_t* (date to be created if str_raw.data == NULL)
*
* @return true if successful, otherwise false
*/
bool
myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
/**
* Only one tree node serialization
*
* @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
* @param[in] myhtml_string_raw_t* (date to be created if str_raw.data == NULL)
*
* @return true if successful, otherwise false
*/
bool
myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
/***********************************************************************************
*
* MyHTML_VERSION

View File

@ -164,6 +164,7 @@ typedef myhtml_encoding_t;
typedef struct myhtml_data_process_entry myhtml_data_process_entry_t;
// strings
typedef struct myhtml_string_raw myhtml_string_raw_t;
typedef struct myhtml_string myhtml_string_t;
// thread

View File

@ -79,6 +79,12 @@ struct myhtml_string {
size_t node_idx;
};
struct myhtml_string_raw {
char* data;
size_t size;
size_t length;
};
typedef size_t myhtml_string_index_t;
char * myhtml_string_init(mchar_async_t *mchar, size_t node_idx, myhtml_string_t* str, size_t size);
@ -88,6 +94,10 @@ void myhtml_string_clean(myhtml_string_t* str);
void myhtml_string_clean_all(myhtml_string_t* str);
myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj);
void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw);
void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw);
myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj);
/* basic api */
char * myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size);
char * myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size);

View File

@ -0,0 +1,53 @@
/*
Copyright (C) 2016 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
#ifndef MyHTML_SERIALIZATION_H
#define MyHTML_SERIALIZATION_H
#pragma once
#include <myhtml/myosi.h>
#include <myhtml/mystring.h>
#include <myhtml/tree.h>
#define myhtml_serialization_realloc_if_need(len) \
if((len + str->length) >= str->size) { \
size_t size = (len + str->length) + 4096; \
char *data = (char*)myhtml_realloc(str->data, size * sizeof(char)); \
\
if(data) { \
str->data = data; \
str->size = size; \
} \
else \
return false; \
}
#ifdef __cplusplus
extern "C" {
#endif
bool myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MyHTML_SERIALIZATION_H */

View File

@ -532,6 +532,13 @@ struct myhtml_string {
}
typedef myhtml_string_t;
struct myhtml_string_raw {
char* data;
size_t size;
size_t length;
}
typedef myhtml_string_raw_t;
/**
* @struct myhtml_collection_t
*/
@ -2362,6 +2369,48 @@ myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id,
void
myhtml_string_data_free(mchar_async_t *mchar, size_t node_id, char *data);
/***********************************************************************************
*
* MyHTML_STRING_RAW
*
* All work with myhtml_string_raw_t object occurs through
* myhtml_malloc (standart malloc), myhtml_realloc (standart realloc),
* myhtml_free (standart free).
*
* You are free to change them on without fear that something will happen
* You can call free for str_raw.data, or change str_raw.length = 0
*
***********************************************************************************/
/**
* Clean myhtml_string_raw_t object. In reality, data length set to 0
*
* @param[in] myhtml_string_raw_t*
*/
void
myhtml_string_raw_clean(myhtml_string_raw_t* str_raw);
/**
* Full clean myhtml_string_raw_t object.
* Equivalently: memset(str_raw, 0, sizeof(myhtml_string_raw_t))
*
* @param[in] myhtml_string_raw_t*
*/
void
myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw);
/**
* Free resources for myhtml_string_raw_t object
*
* @param[in] myhtml_string_raw_t*
* @param[in] call free function for current object or not
*
* @return NULL if destroy_obj set true, otherwise a current myhtml_string_raw_t object
*/
myhtml_string_raw_t*
myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj);
/***********************************************************************************
*
* MyHTML_INCOMING
@ -2638,6 +2687,36 @@ myhtml_strcasecmp(const char* str1, const char* str2);
size_t
myhtml_strncasecmp(const char* str1, const char* str2, size_t size);
/***********************************************************************************
*
* MyHTML_SERIALIZATION
*
***********************************************************************************/
/**
* Tree fragment serialization
*
* @param[in] myhtml_tree_t*
* @param[in] scope node, myhtml_tree_node_t*
* @param[in] myhtml_string_raw_t* (date to be created if str_raw.data == NULL)
*
* @return true if successful, otherwise false
*/
bool
myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
/**
* Only one tree node serialization
*
* @param[in] myhtml_tree_t*
* @param[in] myhtml_tree_node_t*
* @param[in] myhtml_string_raw_t* (date to be created if str_raw.data == NULL)
*
* @return true if successful, otherwise false
*/
bool
myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
/***********************************************************************************
*
* MyHTML_VERSION

View File

@ -164,6 +164,7 @@ typedef myhtml_encoding_t;
typedef struct myhtml_data_process_entry myhtml_data_process_entry_t;
// strings
typedef struct myhtml_string_raw myhtml_string_raw_t;
typedef struct myhtml_string myhtml_string_t;
// thread

View File

@ -55,10 +55,40 @@ myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj)
if(str->data && str->mchar && str->node_idx)
mchar_async_free(str->mchar, str->node_idx, str->data);
if(destroy_obj && str)
if(destroy_obj && str) {
myhtml_free(str);
return NULL;
}
return NULL;
return str;
}
void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw)
{
str_raw->length = 0;
}
void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw)
{
memset(str_raw, 0, sizeof(myhtml_string_raw_t));
}
myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj)
{
if(str_raw == NULL)
return NULL;
if(str_raw->data) {
myhtml_free(str_raw->data);
str_raw->data = NULL;
}
if(destroy_obj && str_raw) {
myhtml_free(str_raw);
return NULL;
}
return str_raw;
}
char * myhtml_string_realloc(myhtml_string_t *str, size_t new_size)

View File

@ -79,6 +79,12 @@ struct myhtml_string {
size_t node_idx;
};
struct myhtml_string_raw {
char* data;
size_t size;
size_t length;
};
typedef size_t myhtml_string_index_t;
char * myhtml_string_init(mchar_async_t *mchar, size_t node_idx, myhtml_string_t* str, size_t size);
@ -88,6 +94,10 @@ void myhtml_string_clean(myhtml_string_t* str);
void myhtml_string_clean_all(myhtml_string_t* str);
myhtml_string_t * myhtml_string_destroy(myhtml_string_t* str, bool destroy_obj);
void myhtml_string_raw_clean(myhtml_string_raw_t* str_raw);
void myhtml_string_raw_clean_all(myhtml_string_raw_t* str_raw);
myhtml_string_raw_t * myhtml_string_raw_destroy(myhtml_string_raw_t* str_raw, bool destroy_obj);
/* basic api */
char * myhtml_string_data_alloc(mchar_async_t *mchar, size_t node_id, size_t size);
char * myhtml_string_data_realloc(mchar_async_t *mchar, size_t node_id, char *data, size_t len_to_copy, size_t size);

View File

@ -0,0 +1,422 @@
/*
Copyright (C) 2016 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
#include "myhtml/serialization.h"
bool myhtml_serialization_append(myhtml_string_raw_t* str, const char* src_data, size_t length);
bool myhtml_serialization_append_attr(myhtml_string_raw_t* str, const char* src_data, size_t length);
bool myhtml_serialization_append_raw(myhtml_string_raw_t* str, const char* src_data, size_t length);
bool myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, myhtml_string_raw_t* str);
bool myhtml_serialization_node_append_text_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
bool myhtml_serialization_node_append_close(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
bool myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str)
{
if(str == NULL)
return false;
if(str->data == NULL) {
str->size = 4098 * 5;
str->length = 0;
str->data = (char*)myhtml_malloc(str->size * sizeof(char));
if(str->data == NULL) {
str->size = 0;
return false;
}
}
myhtml_tree_node_t* node = scope_node;
if(node == tree->document) {
if(tree->document)
node = tree->document->child;
else {
myhtml_free(str->data);
memset(str, 0, sizeof(myhtml_string_raw_t));
return false;
}
}
while(node) {
if(myhtml_serialization_node(tree, node, str) == false) {
myhtml_free(str->data);
memset(str, 0, sizeof(myhtml_string_raw_t));
return false;
}
if(node->child)
node = node->child;
else {
while(node != scope_node && node->next == NULL) {
myhtml_serialization_node_append_close(tree, node, str);
node = node->parent;
}
if(node == scope_node) {
if(node != tree->document) {
if(myhtml_serialization_node_append_close(tree, node, str) == false) {
myhtml_free(str->data);
memset(str, 0, sizeof(myhtml_string_raw_t));
return false;
}
}
break;
}
if(myhtml_serialization_node_append_close(tree, node, str) == false) {
myhtml_free(str->data);
memset(str, 0, sizeof(myhtml_string_raw_t));
return false;
}
node = node->next;
}
}
return true;
}
bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str)
{
if(str == NULL)
return false;
if(str->data == NULL) {
str->size = 2048;
str->length = 0;
str->data = (char*)myhtml_malloc(str->size * sizeof(char));
if(str->data == NULL) {
str->size = 0;
return false;
}
}
switch (node->tag_id) {
case MyHTML_TAG__TEXT: {
if(myhtml_serialization_node_append_text_node(tree, node, str) == false)
return false;
break;
}
case MyHTML_TAG__COMMENT: {
if(myhtml_serialization_append_raw(str, "<!--", 4) == false)
return false;
if(node->token && node->token->str.data) {
if(myhtml_serialization_append_raw(str, node->token->str.data, node->token->str.length) == false)
return false;
}
if(myhtml_serialization_append_raw(str, "-->", 3) == false)
return false;
break;
}
case MyHTML_TAG__DOCTYPE: {
if(myhtml_serialization_append_raw(str, "<!DOCTYPE", 9) == false)
return false;
if(node->token) {
myhtml_tree_attr_t* attr = node->token->attr_first;
if(attr->key.data && attr->key.length) {
if(myhtml_serialization_append_raw(str, " ", 1) == false)
return false;
if(myhtml_serialization_append_raw(str, attr->key.data, attr->key.length) == false)
return false;
}
}
if(myhtml_serialization_append_raw(str, ">", 1) == false)
return false;
break;
}
default: {
size_t length;
const char *tag = myhtml_tag_name_by_id(tree, node->tag_id, &length);
if(myhtml_serialization_append_raw(str, "<", 1) == false)
return false;
if(myhtml_serialization_append_raw(str, tag, length) == false)
return false;
if(node->token) {
if(myhtml_serialization_attributes(tree, node->token->attr_first, str) == false)
return false;
}
if(myhtml_serialization_append_raw(str, ">", 1) == false)
return false;
break;
}
}
return true;
}
bool myhtml_serialization_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, myhtml_string_raw_t* str)
{
while(attr) {
if(myhtml_serialization_append_raw(str, " ", 1) == false)
return false;
switch (attr->ns) {
case MyHTML_NAMESPACE_XML: {
if(myhtml_serialization_append_raw(str, "xml:", 4) == false)
return false;
break;
}
case MyHTML_NAMESPACE_XMLNS: {
/*
If the attribute is in the XMLNS namespace and the attribute's local name is not xmlns
The attribute's serialized name is the string "xmlns:" followed by the attribute's local name.
*/
if(attr->key.data && attr->key.length == 5 && myhtml_strcmp(attr->key.data, "xmlns")) {
if(myhtml_serialization_append_raw(str, "xmlns:", 6) == false)
return false;
}
break;
}
case MyHTML_NAMESPACE_XLINK: {
if(myhtml_serialization_append_raw(str, "xlink:", 6) == false)
return false;
break;
}
default:
break;
}
size_t length;
const char *data = myhtml_attribute_key(attr, &length);
if(data) {
if(myhtml_serialization_append_raw(str, data, length) == false)
return false;
}
if(myhtml_serialization_append_raw(str, "=\"", 2) == false)
return false;
data = myhtml_attribute_value(attr, &length);
if(data) {
if(myhtml_serialization_append_attr(str, data, length) == false)
return false;
}
if(myhtml_serialization_append_raw(str, "\"", 1) == false)
return false;
attr = attr->next;
}
return true;
}
bool myhtml_serialization_node_append_close(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str)
{
if(node->tag_id != MyHTML_TAG__TEXT &&
node->tag_id != MyHTML_TAG__COMMENT &&
node->tag_id != MyHTML_TAG__DOCTYPE)
{
size_t length;
const char *tag = myhtml_tag_name_by_id(tree, node->tag_id, &length);
if(myhtml_serialization_append_raw(str, "</", 2) == false)
return false;
if(myhtml_serialization_append_raw(str, tag, length) == false)
return false;
if(myhtml_serialization_append_raw(str, ">", 1) == false)
return false;
}
return true;
}
bool myhtml_serialization_node_append_text_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str)
{
if(node->token == NULL || node->token->str.data == NULL)
return str;
if(node->parent == NULL)
return myhtml_serialization_append(str, node->token->str.data, node->token->str.length);
switch (node->parent->tag_id) {
case MyHTML_TAG_STYLE:
case MyHTML_TAG_SCRIPT:
case MyHTML_TAG_XMP:
case MyHTML_TAG_IFRAME:
case MyHTML_TAG_NOEMBED:
case MyHTML_TAG_NOFRAMES:
case MyHTML_TAG_PLAINTEXT:
if(myhtml_serialization_append_raw(str, node->token->str.data, node->token->str.length) == false)
return false;
break;
default:
if(myhtml_serialization_append(str, node->token->str.data, node->token->str.length) == false)
return false;
break;
}
return true;
}
bool myhtml_serialization_append_raw(myhtml_string_raw_t* str, const char* src_data, size_t length)
{
myhtml_serialization_realloc_if_need((length + 1))
strncpy(&str->data[ str->length ], src_data, length);
str->length += length;
str->data[ str->length ] = '\0';
return true;
}
bool myhtml_serialization_append(myhtml_string_raw_t* str, const char* src_data, size_t length)
{
myhtml_serialization_realloc_if_need(length)
for(size_t i = 0; i < length; i++) {
if(src_data[i] == '&') {
myhtml_serialization_realloc_if_need(6)
strncpy(&str->data[str->length], "&amp;", 5);
str->length += 5;
}
else if(src_data[i] == '<') {
myhtml_serialization_realloc_if_need(5)
strncpy(&str->data[str->length], "&lt;", 4);
str->length += 4;
}
else if(src_data[i] == '>') {
myhtml_serialization_realloc_if_need(5)
strncpy(&str->data[str->length], "&gt;", 4);
str->length += 4;
}
else if((unsigned char)src_data[i] == 0xC2) {
i++;
if(i >= length) {
str->data[str->length] = (unsigned char)0xC2;
str->length++;
myhtml_serialization_realloc_if_need(1)
break;
}
if((unsigned char)src_data[i] == 0xA0) {
myhtml_serialization_realloc_if_need(7)
strncpy(&str->data[str->length], "&nbsp;", 6);
str->length += 6;
}
else {
str->data[str->length] = src_data[i];
str->length++;
myhtml_serialization_realloc_if_need(1)
}
}
else {
str->data[str->length] = src_data[i];
str->length++;
myhtml_serialization_realloc_if_need(1)
}
}
str->data[ str->length ] = '\0';
return true;
}
bool myhtml_serialization_append_attr(myhtml_string_raw_t* str, const char* src_data, size_t length)
{
myhtml_serialization_realloc_if_need(length)
for(size_t i = 0; i < length; i++) {
if(src_data[i] == '&') {
myhtml_serialization_realloc_if_need(6)
strncpy(&str->data[str->length], "&amp;", 5);
str->length += 5;
}
else if(src_data[i] == '"') {
myhtml_serialization_realloc_if_need(7)
strncpy(&str->data[str->length], "&quot;", 6);
str->length += 6;
}
else if((unsigned char)src_data[i] == 0xC2) {
i++;
if(i >= length) {
str->data[str->length] = (unsigned char)0xC2;
str->length++;
myhtml_serialization_realloc_if_need(1)
break;
}
if((unsigned char)src_data[i] == 0xA0) {
myhtml_serialization_realloc_if_need(7)
strncpy(&str->data[str->length], "&nbsp;", 6);
str->length += 6;
}
else {
str->data[str->length] = src_data[i];
str->length++;
myhtml_serialization_realloc_if_need(1)
}
}
else {
str->data[str->length] = src_data[i];
str->length++;
myhtml_serialization_realloc_if_need(1)
}
}
str->data[ str->length ] = '\0';
return true;
}

View File

@ -0,0 +1,53 @@
/*
Copyright (C) 2016 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
#ifndef MyHTML_SERIALIZATION_H
#define MyHTML_SERIALIZATION_H
#pragma once
#include "myhtml/myosi.h"
#include "myhtml/mystring.h"
#include "myhtml/tree.h"
#define myhtml_serialization_realloc_if_need(len) \
if((len + str->length) >= str->size) { \
size_t size = (len + str->length) + 4096; \
char *data = (char*)myhtml_realloc(str->data, size * sizeof(char)); \
\
if(data) { \
str->data = data; \
str->size = size; \
} \
else \
return false; \
}
#ifdef __cplusplus
extern "C" {
#endif
bool myhtml_serialization(myhtml_tree_t* tree, myhtml_tree_node_t* scope_node, myhtml_string_raw_t* str);
bool myhtml_serialization_node(myhtml_tree_t* tree, myhtml_tree_node_t* node, myhtml_string_raw_t* str);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* MyHTML_SERIALIZATION_H */