Merge pull request #33 from swizard0/master

typo fix: "pasition" -> "position"
This commit is contained in:
lex 2017-08-27 20:26:52 +03:00 committed by GitHub
commit ec59dbf322
7 changed files with 489 additions and 496 deletions

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2016 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -37,30 +37,30 @@ struct res_html load_html_file(const char* filename)
fprintf(stderr, "Can't open html file: %s\n", filename);
exit(EXIT_FAILURE);
}
if(fseek(fh, 0L, SEEK_END) != 0) {
fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename);
exit(EXIT_FAILURE);
}
long size = ftell(fh);
if(fseek(fh, 0L, SEEK_SET) != 0) {
fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename);
exit(EXIT_FAILURE);
}
if(size <= 0) {
fprintf(stderr, "Can't get file size or file is empty: %s\n", filename);
exit(EXIT_FAILURE);
}
char *html = (char*)malloc(size + 1);
if(html == NULL) {
fprintf(stderr, "Can't allocate mem for html file: %s\n", filename);
exit(EXIT_FAILURE);
}
size_t nread = fread(html, 1, size, fh);
if (nread != size) {
fprintf(stderr, "could not read %ld bytes (" MyCORE_FMT_Z " bytes done)\n", size, nread);
@ -68,7 +68,7 @@ struct res_html load_html_file(const char* filename)
}
fclose(fh);
struct res_html res = {html, (size_t)size};
return res;
}
@ -77,10 +77,10 @@ void colorize_print(mycore_incoming_buffer_t *inc_buf, size_t begin, size_t leng
{
if(length) {
inc_buf = mycore_incoming_buffer_find_by_position(inc_buf, begin);
size_t between_begin = (begin - mycore_incoming_buffer_offset(inc_buf));
const char* between_data = mycore_incoming_buffer_data(inc_buf);
printf("%s%.*s\e[0m", color, (int)length, &between_data[between_begin]);
}
}
@ -91,16 +91,16 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
{
myhtml_position_t key_pos = myhtml_attribute_key_raw_position(attr);
myhtml_position_t value_pos = myhtml_attribute_value_raw_position(attr);
if(key_pos.length)
{
/* print <div[ ]key=value> */
if(last_pos < key_pos.begin)
colorize_print(inc_buf, last_pos, (key_pos.begin - last_pos), "\e[31m");
/* print <div [key]=value> */
colorize_print(inc_buf, key_pos.begin, key_pos.length, "\e[33m");
/* get/check max position */
if((key_pos.begin + key_pos.length) > last_pos)
last_pos = key_pos.begin + key_pos.length;
@ -110,7 +110,7 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
if(value_pos.length && last_pos < value_pos.begin)
colorize_print(inc_buf, last_pos, (value_pos.begin - last_pos), "\e[31m");
}
if(value_pos.length)
{
/* print <div key[=]value> */
@ -118,37 +118,37 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
size_t between_begin = key_pos.begin + key_pos.length;
colorize_print(inc_buf, between_begin, (value_pos.begin - between_begin), "\e[31m");
}
/* print <div key=[value]> */
colorize_print(inc_buf, value_pos.begin, value_pos.length, "\e[34m");
/* get/check max position */
if(value_pos.begin + value_pos.length > last_pos)
last_pos = value_pos.begin + value_pos.length;
}
attr = myhtml_attribute_next(attr);
}
return last_pos;
}
void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx)
{
mycore_incoming_buffer_t *inc_buf = myhtml_tree_incoming_buffer_first(tree);
myhtml_position_t token_pos = myhtml_token_node_raw_pasition(token);
myhtml_position_t token_element_pos = myhtml_token_node_element_pasition(token);
myhtml_position_t token_pos = myhtml_token_node_raw_position(token);
myhtml_position_t token_element_pos = myhtml_token_node_element_position(token);
size_t last_pos = token_pos.begin + token_pos.length;
switch (myhtml_token_node_tag_id(token)) {
case MyHTML_TAG__DOCTYPE: {
/* print [<!DOCTYPE] */
colorize_print(inc_buf, token_element_pos.begin, (token_pos.begin - token_element_pos.begin), "\e[37m");
colorize_print(inc_buf, token_pos.begin, token_pos.length, "\e[37m");
/* print [>] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[37m");
break;
@ -160,9 +160,9 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod
case MyHTML_TAG__COMMENT: {
/* print [<!--] */
colorize_print(inc_buf, token_element_pos.begin, (token_pos.begin - token_element_pos.begin), "\e[32m");
colorize_print(inc_buf, token_pos.begin, token_pos.length, "\e[32m");
/* print [-->] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[32m");
break;
@ -170,20 +170,20 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod
default: {
/* print [<]div> */
colorize_print(inc_buf, token_element_pos.begin, (token_pos.begin - token_element_pos.begin), "\e[31m");
/* print <[div]> */
colorize_print(inc_buf, token_pos.begin, token_pos.length, "\e[31m");
if(myhtml_token_node_attribute_first(token))
last_pos = colorize_print_attributes(tree, myhtml_token_node_attribute_first(token), inc_buf, last_pos);
/* print <div[>] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[31m");
break;
}
}
return ctx;
}
@ -198,30 +198,28 @@ int main(int argc, const char * argv[])
printf("Bad ARGV!\nUse: tokenizer_colorize_high_level <path_to_html_file>\n");
exit(EXIT_FAILURE);
}
struct res_html res = load_html_file(path);
// basic init
myhtml_t* myhtml = myhtml_create();
myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
// init tree
myhtml_tree_t* tree = myhtml_tree_create();
myhtml_tree_init(tree, myhtml);
myhtml_callback_before_token_done_set(tree, colorize_callback_before_token_done, NULL);
// parse html
myhtml_parse(tree, MyENCODING_UTF_8, res.html, res.size);
printf("\n");
myhtml_tree_destroy(tree);
myhtml_destroy(myhtml);
free(res.html);
return 0;
}

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -328,7 +328,7 @@ enum myhtml_tags {
// base
/*
Very important!!!
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
@ -380,7 +380,7 @@ enum myhtml_namespace {
MyHTML_NAMESPACE_XLINK = 0x04,
MyHTML_NAMESPACE_XML = 0x05,
MyHTML_NAMESPACE_XMLNS = 0x06,
/* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */
MyHTML_NAMESPACE_ANY = 0x07,
MyHTML_NAMESPACE_LAST_ENTRY = 0x07
@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
myhtml_node_raw_pasition(myhtml_tree_node_t *node);
myhtml_node_raw_position(myhtml_tree_node_t *node);
/**
* Get element position for Tree Node in Incoming Buffer
@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
myhtml_node_element_pasition(myhtml_tree_node_t *node);
myhtml_node_element_position(myhtml_tree_node_t *node);
/**
* Get data value from tree node
@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
/**
* Get element position for Token Node in Incoming Buffer
@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
myhtml_token_node_element_position(myhtml_token_node_t *token_node);
/**
* Get first attribute of a token node

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -69,10 +69,10 @@ struct myhtml {
mythread_t* thread_batch;
mythread_t* thread_list[3];
size_t thread_total;
myhtml_tokenizer_state_f* parse_state_func;
myhtml_insertion_f* insertion_func;
enum myhtml_options opt;
myhtml_tree_node_t *marker;
};
@ -177,8 +177,8 @@ myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node);
myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node);
const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length);
mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node);
myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node);
myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node);
myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node);
myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node);
void myhtml_node_set_data(myhtml_tree_node_t *node, void* data);
void * myhtml_node_get_data(myhtml_tree_node_t *node);

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -42,7 +42,7 @@ extern "C" {
struct myhtml_token_replacement_entry {
char* from;
size_t from_size;
char* to;
size_t to_size;
};
@ -50,55 +50,55 @@ struct myhtml_token_replacement_entry {
struct myhtml_token_namespace_replacement {
char* from;
size_t from_size;
char* to;
size_t to_size;
enum myhtml_namespace ns;
};
struct myhtml_token_attr {
myhtml_token_attr_t* next;
myhtml_token_attr_t* prev;
mycore_string_t key;
mycore_string_t value;
size_t raw_key_begin;
size_t raw_key_length;
size_t raw_value_begin;
size_t raw_value_length;
enum myhtml_namespace ns;
};
struct myhtml_token_node {
myhtml_tag_id_t tag_id;
mycore_string_t str;
size_t raw_begin;
size_t raw_length;
size_t element_begin;
size_t element_length;
myhtml_token_attr_t* attr_first;
myhtml_token_attr_t* attr_last;
volatile enum myhtml_token_type type;
};
struct myhtml_token {
myhtml_tree_t* tree; // ref
mcobject_async_t* nodes_obj; // myhtml_token_node_t
mcobject_async_t* attr_obj; // myhtml_token_attr_t
// def thread node id
size_t mcasync_token_id;
size_t mcasync_attr_id;
bool is_new_tmp;
};
@ -108,8 +108,8 @@ void myhtml_token_clean_all(myhtml_token_t* token);
myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token);
myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node);
myhtml_tree_attr_t * myhtml_token_node_attribute_first(myhtml_token_node_t *token_node);
myhtml_tree_attr_t * myhtml_token_node_attribute_last(myhtml_token_node_t *token_node);

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -328,7 +328,7 @@ enum myhtml_tags {
// base
/*
Very important!!!
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
@ -380,7 +380,7 @@ enum myhtml_namespace {
MyHTML_NAMESPACE_XLINK = 0x04,
MyHTML_NAMESPACE_XML = 0x05,
MyHTML_NAMESPACE_XMLNS = 0x06,
/* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */
MyHTML_NAMESPACE_ANY = 0x07,
MyHTML_NAMESPACE_LAST_ENTRY = 0x07
@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
myhtml_node_raw_pasition(myhtml_tree_node_t *node);
myhtml_node_raw_position(myhtml_tree_node_t *node);
/**
* Get element position for Tree Node in Incoming Buffer
@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
myhtml_node_element_pasition(myhtml_tree_node_t *node);
myhtml_node_element_position(myhtml_tree_node_t *node);
/**
* Get data value from tree node
@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
/**
* Get element position for Token Node in Incoming Buffer
@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
myhtml_token_node_element_position(myhtml_token_node_t *token_node);
/**
* Get first attribute of a token node

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@ -101,36 +101,36 @@ myhtml_token_t * myhtml_token_create(myhtml_tree_t* tree, size_t size)
{
if(size == 0)
size = 4096;
myhtml_token_t* token = (myhtml_token_t*)mycore_malloc(sizeof(myhtml_token_t));
if(token == NULL)
return NULL;
token->nodes_obj = mcobject_async_create();
if(token->nodes_obj == NULL) {
mycore_free(token);
return NULL;
}
token->attr_obj = mcobject_async_create();
if(token->attr_obj == NULL) {
mycore_free(token->nodes_obj);
mycore_free(token);
return NULL;
}
mcobject_async_init(token->nodes_obj, 128, size, sizeof(myhtml_token_node_t));
mcobject_async_init(token->attr_obj, 128, size, sizeof(myhtml_token_attr_t));
token->mcasync_token_id = mcobject_async_node_add(token->nodes_obj, NULL);
token->mcasync_attr_id = mcobject_async_node_add(token->attr_obj, NULL);
token->tree = tree;
return token;
}
@ -150,15 +150,15 @@ myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token)
{
if(token == NULL)
return NULL;
if(token->nodes_obj)
token->nodes_obj = mcobject_async_destroy(token->nodes_obj, 1);
if(token->attr_obj)
token->attr_obj = mcobject_async_destroy(token->attr_obj, 1);
mycore_free(token);
return NULL;
}
@ -167,7 +167,7 @@ myhtml_token_node_t * myhtml_token_node_create(myhtml_token_t* token, size_t asy
myhtml_token_node_t *token_node = (myhtml_token_node_t*)mcobject_async_malloc(token->nodes_obj, async_node_id, NULL);
if(token_node == NULL)
return NULL;
myhtml_token_node_clean(token_node);
return token_node;
}
@ -176,7 +176,7 @@ void myhtml_token_node_clean(myhtml_token_node_t* node)
{
memset(node, 0, sizeof(myhtml_token_node_t));
node->type = MyHTML_TOKEN_TYPE_OPEN|MyHTML_TOKEN_TYPE_WHITESPACE;
mycore_string_clean_all(&node->str);
}
@ -185,7 +185,7 @@ myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t asy
myhtml_token_attr_t *attr_node = mcobject_async_malloc(token->attr_obj, async_node_id, NULL);
if(attr_node == NULL)
return NULL;
myhtml_token_attr_clean(attr_node);
return attr_node;
}
@ -194,7 +194,7 @@ void myhtml_token_attr_clean(myhtml_token_attr_t* attr)
{
memset(attr, 0, sizeof(myhtml_token_attr_t));
attr->ns = MyHTML_NAMESPACE_HTML;
mycore_string_clean_all(&attr->key);
mycore_string_clean_all(&attr->value);
}
@ -204,19 +204,19 @@ myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node)
return token_node->tag_id;
}
myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node)
myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node)
{
if(token_node)
return (myhtml_position_t){token_node->raw_begin, token_node->raw_length};
return (myhtml_position_t){0, 0};
}
myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node)
myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node)
{
if(token_node)
return (myhtml_position_t){token_node->element_begin, token_node->element_length};
return (myhtml_position_t){0, 0};
}
@ -234,7 +234,7 @@ const char * myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *len
{
if(length)
*length = token_node->str.length;
return token_node->str.data;
}
@ -269,13 +269,13 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke
{
if(node == NULL)
return NULL;
myhtml_tree_t* tree = token->tree;
myhtml_token_node_t* new_node = myhtml_token_node_create(token, token_thread_idx);
if(new_node == NULL)
return NULL;
new_node->tag_id = node->tag_id;
new_node->type = node->type;
new_node->attr_first = NULL;
@ -284,17 +284,17 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke
new_node->raw_length = node->raw_length;
new_node->element_begin = node->element_begin;
new_node->element_length = node->element_length;
mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size);
myhtml_token_node_attr_copy(token, node, new_node, attr_thread_idx);
return new_node;
}
void myhtml_token_node_text_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* text, size_t text_len)
{
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &dest->str, (text_len + 2));
mycore_string_t* string = &dest->str;
mycore_string_append(string, text, text_len);
}
@ -305,36 +305,36 @@ myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtm
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
if(key_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1));
mycore_string_append_lowercase(&new_attr->key, key, key_len);
}
else
mycore_string_clean_all(&new_attr->key);
if(value_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1));
mycore_string_append(&new_attr->value, value, value_len);
}
else
mycore_string_clean_all(&new_attr->value);
if(dest->attr_first == NULL) {
new_attr->prev = 0;
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
dest->attr_last = new_attr;
}
new_attr->ns = MyHTML_NAMESPACE_HTML;
return new_attr;
}
@ -345,10 +345,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
if(key_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1));
if(encoding == MyENCODING_UTF_8)
mycore_string_append_lowercase(&new_attr->key, key, key_len);
else
@ -356,10 +356,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
}
else
mycore_string_clean_all(&new_attr->key);
if(value_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1));
if(encoding == MyENCODING_UTF_8)
mycore_string_append(&new_attr->value, value, value_len);
else
@ -367,35 +367,35 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
}
else
mycore_string_clean_all(&new_attr->value);
if(dest->attr_first == NULL) {
new_attr->prev = 0;
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
dest->attr_last = new_attr;
}
new_attr->ns = MyHTML_NAMESPACE_HTML;
return new_attr;
}
void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx)
{
myhtml_token_attr_t* attr = target->attr_first;
while (attr)
{
if(attr->key.length && myhtml_token_attr_by_name(dest, attr->key.data, attr->key.length) == NULL) {
myhtml_token_attr_copy(token, attr, dest, thread_idx);
}
attr = attr->next;
}
}
@ -403,7 +403,7 @@ void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_
void myhtml_token_node_attr_copy(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx)
{
myhtml_token_attr_t* attr = target->attr_first;
while (attr)
{
myhtml_token_attr_copy(token, attr, dest, thread_idx);
@ -415,36 +415,36 @@ bool myhtml_token_attr_copy(myhtml_token_t* token, myhtml_token_attr_t* attr, my
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
if(attr->key.length) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (attr->key.length + 1));
mycore_string_append_lowercase(&new_attr->key, attr->key.data, attr->key.length);
}
else
mycore_string_clean_all(&new_attr->key);
if(attr->value.length) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (attr->value.length + 1));
mycore_string_append(&new_attr->value, attr->value.data, attr->value.length);
}
else
mycore_string_clean_all(&new_attr->value);
if(dest->attr_first == NULL) {
new_attr->prev = 0;
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
dest->attr_last = new_attr;
}
new_attr->ns = attr->ns;
return true;
}
@ -452,7 +452,7 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke
const char* key, size_t key_size, const char* value, size_t value_size)
{
myhtml_token_attr_t* attr = target->attr_first;
while (attr)
{
if(attr->key.length == key_size && attr->value.length == value_size)
@ -464,10 +464,10 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke
return NULL;
}
}
attr = attr->next;
}
return NULL;
}
@ -475,7 +475,7 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml
const char* key, size_t key_size, const char* value, size_t value_size)
{
myhtml_token_attr_t* attr = target->attr_first;
while (attr)
{
if(attr->key.length == key_size && attr->value.length == value_size)
@ -487,17 +487,17 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml
return NULL;
}
}
attr = attr->next;
}
return NULL;
}
void myhtml_token_adjust_mathml_attributes(myhtml_token_node_t* target)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, "definitionurl", 13);
if(attr) {
memcpy(attr->key.data, "definitionURL", 13);
}
@ -512,10 +512,10 @@ void _myhtml_token_create_copy_srt(myhtml_token_t* token, const char* from, size
void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_tree_doctype_t* return_doctype)
{
myhtml_token_attr_t* attr = target->attr_first;
if(attr && attr->key.length) {
_myhtml_token_create_copy_srt(token, attr->key.data, attr->key.length, &return_doctype->attr_name);
if(mycore_strcmp("html", return_doctype->attr_name))
return_doctype->is_html = false;
else
@ -523,41 +523,41 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
}
else {
return_doctype->is_html = false;
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_name);
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
return;
}
attr = attr->next;
if(attr && attr->value.length)
{
if(mycore_strcasecmp(attr->value.data, "PUBLIC") == 0)
{
// try see public
attr = attr->next;
if(attr && attr->value.length) {
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_public);
// try see system
attr = attr->next;
if(attr && attr->value.length)
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system);
else {
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_system);
}
}
@ -565,7 +565,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@ -574,7 +574,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
else if(mycore_strncasecmp(attr->value.data, "SYSTEM", attr->value.length) == 0)
{
attr = attr->next;
if(attr && attr->value.length) {
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_public);
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system);
@ -583,7 +583,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@ -593,7 +593,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@ -617,7 +617,7 @@ bool myhtml_token_doctype_check_xhtml_1_0(myhtml_tree_doctype_t* return_doctype)
{
if(return_doctype->attr_system == NULL)
return true;
return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.0 Strict//EN") &&
mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
}
@ -626,7 +626,7 @@ bool myhtml_token_doctype_check_xhtml_1_1(myhtml_tree_doctype_t* return_doctype)
{
if(return_doctype->attr_system == NULL)
return true;
return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.1//EN") &&
mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
}
@ -635,19 +635,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my
{
if(return_doctype == NULL)
return false;
myhtml_token_strict_doctype_by_token(token, target, return_doctype);
if(return_doctype->attr_name == NULL)
return false;
if((return_doctype->is_html ||
return_doctype->attr_public ||
(return_doctype->attr_system && mycore_strcmp(return_doctype->attr_system, "about:legacy-compat"))))
{
if(return_doctype->attr_public == NULL)
return false;
if(return_doctype->is_html &&
myhtml_token_doctype_check_html_4_0(return_doctype) &&
myhtml_token_doctype_check_html_4_01(return_doctype) &&
@ -657,19 +657,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my
return false;
}
}
return true;
}
void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target)
{
size_t count = sizeof(myhtml_token_attr_svg_replacement) / sizeof(myhtml_token_replacement_entry_t);
for (size_t i = 0; i < count; i++)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_svg_replacement[i].from,
myhtml_token_attr_svg_replacement[i].from_size);
if(attr) {
mycore_string_clean(&attr->key);
mycore_string_append(&attr->key, myhtml_token_attr_svg_replacement[i].to,
@ -681,17 +681,17 @@ void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target)
void myhtml_token_adjust_foreign_attributes(myhtml_token_node_t* target)
{
size_t count = sizeof(myhtml_token_attr_namespace_replacement) / sizeof(myhtml_token_namespace_replacement_t);
for (size_t i = 0; i < count; i++)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_namespace_replacement[i].from,
myhtml_token_attr_namespace_replacement[i].from_size);
if(attr) {
mycore_string_clean(&attr->key);
mycore_string_append(&attr->key, myhtml_token_attr_namespace_replacement[i].to,
myhtml_token_attr_namespace_replacement[i].to_size);
attr->ns = myhtml_token_attr_namespace_replacement[i].ns;
}
}
@ -701,10 +701,10 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t*
{
if(target == NULL || dest == NULL)
return false;
myhtml_token_attr_t* target_attr = target->attr_first;
myhtml_token_attr_t* dest_attr = dest->attr_first;
while (target_attr && dest_attr)
{
if(target_attr->key.length == dest_attr->key.length &&
@ -712,37 +712,37 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t*
{
if(mycore_strcmp(target_attr->key.data, dest_attr->key.data) != 0)
break;
if(mycore_strcasecmp(target_attr->value.data, dest_attr->value.data) != 0)
break;
}
else
break;
target_attr = target_attr->next;
dest_attr = dest_attr->next;
}
if(target_attr == NULL && dest_attr == NULL)
return true;
return false;
}
myhtml_token_attr_t * myhtml_token_attr_by_name(myhtml_token_node_t* node, const char* name, size_t name_length)
{
myhtml_token_attr_t* attr = node->attr_first;
while (attr)
{
if(name_length == attr->key.length) {
if(mycore_strcmp(attr->key.data, name) == 0)
break;
}
attr = attr->next;
}
return attr;
}
@ -751,24 +751,24 @@ void myhtml_token_delete(myhtml_token_t* token, myhtml_token_node_t* node)
if(node->str.data && node->str.mchar) {
mchar_async_free(node->str.mchar, node->str.node_idx, node->str.data);
}
mcobject_async_free(token->nodes_obj, node);
}
void myhtml_token_attr_delete_all(myhtml_token_t* token, myhtml_token_node_t* node)
{
myhtml_token_attr_t* attr = node->attr_first;
while (attr)
{
if(attr->key.data && attr->key.mchar) {
mchar_async_free(attr->key.mchar, attr->key.node_idx, attr->key.data);
}
if(attr->value.data && attr->value.mchar) {
mchar_async_free(attr->value.mchar, attr->value.node_idx, attr->value.data);
}
attr = attr->next;
}
}
@ -783,18 +783,18 @@ myhtml_token_attr_t * myhtml_token_attr_remove(myhtml_token_node_t* node, myhtml
else {
node->attr_first = attr->next;
}
if(attr->next) {
attr->next->prev = attr->prev;
}
else {
node->attr_last = attr->prev;
}
attr->next = NULL;
attr->prev = NULL;
}
return attr;
}
@ -807,13 +807,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
{
myhtml_token_node_wait_for_done(tree->token, token_to);
myhtml_token_node_wait_for_done(tree->token, token_from);
mycore_string_t *string1 = &token_to->str;
mycore_string_t *string2 = &token_from->str;
token_to->raw_begin = 0;
token_to->raw_length = 0;
if(token_to->str.node_idx == tree->mchar_node_id)
{
if(cp_reverse) {
@ -822,7 +822,7 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
else {
mycore_string_copy(string1, string2);
}
return token_to;
}
if(token_from->str.node_idx == tree->mchar_node_id)
@ -833,13 +833,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
else {
mycore_string_copy(string1, string2);
}
return token_from;
}
else {
mycore_string_t string_base;
mycore_string_init(tree->mchar, tree->mchar_node_id, &string_base, (string1->length + string2->length + 2));
if(cp_reverse) {
mycore_string_copy(&string_base, string2);
mycore_string_copy(&string_base, string1);
@ -848,22 +848,21 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
mycore_string_copy(&string_base, string1);
mycore_string_copy(&string_base, string2);
}
token_to->str = string_base;
}
return token_to;
}
void myhtml_token_set_replacement_character_for_null_token(myhtml_tree_t* tree, myhtml_token_node_t* node)
{
myhtml_token_node_wait_for_done(tree->token, node);
mycore_string_t new_str;
mycore_string_init(tree->mchar, tree->mchar_node_id, &new_str, (node->str.length + 2));
mycore_string_append_with_replacement_null_characters(&new_str, node->str.data, node->str.length);
node->str = new_str;
}