diff --git a/examples/myhtml/tokenizer_colorize_high_level.c b/examples/myhtml/tokenizer_colorize_high_level.c
index 05ee28f..aa3424a 100644
--- a/examples/myhtml/tokenizer_colorize_high_level.c
+++ b/examples/myhtml/tokenizer_colorize_high_level.c
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2016 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -37,30 +37,30 @@ struct res_html load_html_file(const char* filename)
fprintf(stderr, "Can't open html file: %s\n", filename);
exit(EXIT_FAILURE);
}
-
+
if(fseek(fh, 0L, SEEK_END) != 0) {
fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename);
exit(EXIT_FAILURE);
}
-
+
long size = ftell(fh);
-
+
if(fseek(fh, 0L, SEEK_SET) != 0) {
fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename);
exit(EXIT_FAILURE);
}
-
+
if(size <= 0) {
fprintf(stderr, "Can't get file size or file is empty: %s\n", filename);
exit(EXIT_FAILURE);
}
-
+
char *html = (char*)malloc(size + 1);
if(html == NULL) {
fprintf(stderr, "Can't allocate mem for html file: %s\n", filename);
exit(EXIT_FAILURE);
}
-
+
size_t nread = fread(html, 1, size, fh);
if (nread != size) {
fprintf(stderr, "could not read %ld bytes (" MyCORE_FMT_Z " bytes done)\n", size, nread);
@@ -68,7 +68,7 @@ struct res_html load_html_file(const char* filename)
}
fclose(fh);
-
+
struct res_html res = {html, (size_t)size};
return res;
}
@@ -77,10 +77,10 @@ void colorize_print(mycore_incoming_buffer_t *inc_buf, size_t begin, size_t leng
{
if(length) {
inc_buf = mycore_incoming_buffer_find_by_position(inc_buf, begin);
-
+
size_t between_begin = (begin - mycore_incoming_buffer_offset(inc_buf));
const char* between_data = mycore_incoming_buffer_data(inc_buf);
-
+
printf("%s%.*s\e[0m", color, (int)length, &between_data[between_begin]);
}
}
@@ -91,16 +91,16 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
{
myhtml_position_t key_pos = myhtml_attribute_key_raw_position(attr);
myhtml_position_t value_pos = myhtml_attribute_value_raw_position(attr);
-
+
if(key_pos.length)
{
/* print
*/
if(last_pos < key_pos.begin)
colorize_print(inc_buf, last_pos, (key_pos.begin - last_pos), "\e[31m");
-
+
/* print
*/
colorize_print(inc_buf, key_pos.begin, key_pos.length, "\e[33m");
-
+
/* get/check max position */
if((key_pos.begin + key_pos.length) > last_pos)
last_pos = key_pos.begin + key_pos.length;
@@ -110,7 +110,7 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
if(value_pos.length && last_pos < value_pos.begin)
colorize_print(inc_buf, last_pos, (value_pos.begin - last_pos), "\e[31m");
}
-
+
if(value_pos.length)
{
/* print
*/
@@ -118,37 +118,37 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr,
size_t between_begin = key_pos.begin + key_pos.length;
colorize_print(inc_buf, between_begin, (value_pos.begin - between_begin), "\e[31m");
}
-
+
/* print
*/
colorize_print(inc_buf, value_pos.begin, value_pos.length, "\e[34m");
-
+
/* get/check max position */
if(value_pos.begin + value_pos.length > last_pos)
last_pos = value_pos.begin + value_pos.length;
}
-
+
attr = myhtml_attribute_next(attr);
}
-
+
return last_pos;
}
void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx)
{
mycore_incoming_buffer_t *inc_buf = myhtml_tree_incoming_buffer_first(tree);
-
- myhtml_position_t token_pos = myhtml_token_node_raw_pasition(token);
- myhtml_position_t token_element_pos = myhtml_token_node_element_pasition(token);
-
+
+ myhtml_position_t token_pos = myhtml_token_node_raw_position(token);
+ myhtml_position_t token_element_pos = myhtml_token_node_element_position(token);
+
size_t last_pos = token_pos.begin + token_pos.length;
-
+
switch (myhtml_token_node_tag_id(token)) {
case MyHTML_TAG__DOCTYPE: {
/* print [] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[37m");
break;
@@ -160,9 +160,9 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod
case MyHTML_TAG__COMMENT: {
/* print [] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[32m");
break;
@@ -170,20 +170,20 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod
default: {
/* print [<]div> */
colorize_print(inc_buf, token_element_pos.begin, (token_pos.begin - token_element_pos.begin), "\e[31m");
-
+
/* print <[div]> */
colorize_print(inc_buf, token_pos.begin, token_pos.length, "\e[31m");
-
+
if(myhtml_token_node_attribute_first(token))
last_pos = colorize_print_attributes(tree, myhtml_token_node_attribute_first(token), inc_buf, last_pos);
-
+
/* print
] */
colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[31m");
-
+
break;
}
}
-
+
return ctx;
}
@@ -198,30 +198,28 @@ int main(int argc, const char * argv[])
printf("Bad ARGV!\nUse: tokenizer_colorize_high_level
\n");
exit(EXIT_FAILURE);
}
-
+
struct res_html res = load_html_file(path);
-
+
// basic init
myhtml_t* myhtml = myhtml_create();
myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0);
-
+
// init tree
myhtml_tree_t* tree = myhtml_tree_create();
myhtml_tree_init(tree, myhtml);
-
+
myhtml_callback_before_token_done_set(tree, colorize_callback_before_token_done, NULL);
-
+
// parse html
myhtml_parse(tree, MyENCODING_UTF_8, res.html, res.size);
-
+
printf("\n");
-
+
myhtml_tree_destroy(tree);
myhtml_destroy(myhtml);
-
+
free(res.html);
-
+
return 0;
}
-
-
diff --git a/include/myhtml/api.h b/include/myhtml/api.h
index 55fb672..03659e0 100755
--- a/include/myhtml/api.h
+++ b/include/myhtml/api.h
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -328,7 +328,7 @@ enum myhtml_tags {
// base
/*
Very important!!!
-
+
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
@@ -380,7 +380,7 @@ enum myhtml_namespace {
MyHTML_NAMESPACE_XLINK = 0x04,
MyHTML_NAMESPACE_XML = 0x05,
MyHTML_NAMESPACE_XMLNS = 0x06,
-
+
/* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */
MyHTML_NAMESPACE_ANY = 0x07,
MyHTML_NAMESPACE_LAST_ENTRY = 0x07
@@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
-myhtml_node_raw_pasition(myhtml_tree_node_t *node);
+myhtml_node_raw_position(myhtml_tree_node_t *node);
/**
* Get element position for Tree Node in Incoming Buffer
@@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
-myhtml_node_element_pasition(myhtml_tree_node_t *node);
+myhtml_node_element_position(myhtml_tree_node_t *node);
/**
* Get data value from tree node
@@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
-myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
+myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
/**
* Get element position for Token Node in Incoming Buffer
@@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
-myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
+myhtml_token_node_element_position(myhtml_token_node_t *token_node);
/**
* Get first attribute of a token node
diff --git a/include/myhtml/myhtml.h b/include/myhtml/myhtml.h
index d111add..56213b7 100644
--- a/include/myhtml/myhtml.h
+++ b/include/myhtml/myhtml.h
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -69,10 +69,10 @@ struct myhtml {
mythread_t* thread_batch;
mythread_t* thread_list[3];
size_t thread_total;
-
+
myhtml_tokenizer_state_f* parse_state_func;
myhtml_insertion_f* insertion_func;
-
+
enum myhtml_options opt;
myhtml_tree_node_t *marker;
};
@@ -177,8 +177,8 @@ myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node);
myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node);
const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length);
mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node);
-myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node);
-myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node);
+myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node);
+myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node);
void myhtml_node_set_data(myhtml_tree_node_t *node, void* data);
void * myhtml_node_get_data(myhtml_tree_node_t *node);
diff --git a/include/myhtml/token.h b/include/myhtml/token.h
index bf40736..eee981e 100644
--- a/include/myhtml/token.h
+++ b/include/myhtml/token.h
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -42,7 +42,7 @@ extern "C" {
struct myhtml_token_replacement_entry {
char* from;
size_t from_size;
-
+
char* to;
size_t to_size;
};
@@ -50,55 +50,55 @@ struct myhtml_token_replacement_entry {
struct myhtml_token_namespace_replacement {
char* from;
size_t from_size;
-
+
char* to;
size_t to_size;
-
+
enum myhtml_namespace ns;
};
struct myhtml_token_attr {
myhtml_token_attr_t* next;
myhtml_token_attr_t* prev;
-
+
mycore_string_t key;
mycore_string_t value;
-
+
size_t raw_key_begin;
size_t raw_key_length;
size_t raw_value_begin;
size_t raw_value_length;
-
+
enum myhtml_namespace ns;
};
struct myhtml_token_node {
myhtml_tag_id_t tag_id;
-
+
mycore_string_t str;
-
+
size_t raw_begin;
size_t raw_length;
-
+
size_t element_begin;
size_t element_length;
-
+
myhtml_token_attr_t* attr_first;
myhtml_token_attr_t* attr_last;
-
+
volatile enum myhtml_token_type type;
};
struct myhtml_token {
myhtml_tree_t* tree; // ref
-
+
mcobject_async_t* nodes_obj; // myhtml_token_node_t
mcobject_async_t* attr_obj; // myhtml_token_attr_t
-
+
// def thread node id
size_t mcasync_token_id;
size_t mcasync_attr_id;
-
+
bool is_new_tmp;
};
@@ -108,8 +108,8 @@ void myhtml_token_clean_all(myhtml_token_t* token);
myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token);
myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
-myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
-myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
+myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
+myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node);
myhtml_tree_attr_t * myhtml_token_node_attribute_first(myhtml_token_node_t *token_node);
myhtml_tree_attr_t * myhtml_token_node_attribute_last(myhtml_token_node_t *token_node);
diff --git a/source/myhtml/api.h b/source/myhtml/api.h
index 55fb672..03659e0 100755
--- a/source/myhtml/api.h
+++ b/source/myhtml/api.h
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -328,7 +328,7 @@ enum myhtml_tags {
// base
/*
Very important!!!
-
+
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
@@ -380,7 +380,7 @@ enum myhtml_namespace {
MyHTML_NAMESPACE_XLINK = 0x04,
MyHTML_NAMESPACE_XML = 0x05,
MyHTML_NAMESPACE_XMLNS = 0x06,
-
+
/* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */
MyHTML_NAMESPACE_ANY = 0x07,
MyHTML_NAMESPACE_LAST_ENTRY = 0x07
@@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
-myhtml_node_raw_pasition(myhtml_tree_node_t *node);
+myhtml_node_raw_position(myhtml_tree_node_t *node);
/**
* Get element position for Tree Node in Incoming Buffer
@@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node);
* @return myhtml_tree_node_t
*/
myhtml_position_t
-myhtml_node_element_pasition(myhtml_tree_node_t *node);
+myhtml_node_element_position(myhtml_tree_node_t *node);
/**
* Get data value from tree node
@@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
-myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
+myhtml_token_node_raw_position(myhtml_token_node_t *token_node);
/**
* Get element position for Token Node in Incoming Buffer
@@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node);
* @return myhtml_position_t
*/
myhtml_position_t
-myhtml_token_node_element_pasition(myhtml_token_node_t *token_node);
+myhtml_token_node_element_position(myhtml_token_node_t *token_node);
/**
* Get first attribute of a token node
diff --git a/source/myhtml/myhtml.c b/source/myhtml/myhtml.c
index 479f33e..b76f129 100644
--- a/source/myhtml/myhtml.c
+++ b/source/myhtml/myhtml.c
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -23,7 +23,7 @@
void myhtml_init_marker(myhtml_t* myhtml)
{
myhtml->marker = (myhtml_tree_node_t*)mycore_malloc(sizeof(myhtml_tree_node_t));
-
+
if(myhtml->marker)
myhtml_tree_node_clean(myhtml->marker);
}
@@ -39,20 +39,20 @@ mystatus_t myhtml_stream_create(myhtml_t* myhtml, mystatus_t* status, size_t cou
{
if(count == 0) {
myhtml->thread_stream = NULL;
-
+
*status = MyHTML_STATUS_OK;
return *status;
}
-
+
myhtml->thread_stream = mythread_create();
if(myhtml->thread_stream == NULL)
*status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION;
-
+
*status = mythread_init(myhtml->thread_stream, MyTHREAD_TYPE_STREAM, count, id_increase);
-
+
if(*status)
myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true);
-
+
return *status;
}
@@ -60,40 +60,40 @@ mystatus_t myhtml_batch_create(myhtml_t* myhtml, mystatus_t* status, size_t coun
{
if(count == 0) {
myhtml->thread_batch = NULL;
-
+
*status = MyHTML_STATUS_OK;
return *status;
}
-
+
myhtml->thread_batch = mythread_create();
if(myhtml->thread_stream == NULL) {
myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true);
*status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION;
}
-
+
*status = mythread_init(myhtml->thread_batch, MyTHREAD_TYPE_BATCH, count, id_increase);
-
+
if(*status)
myhtml->thread_batch = mythread_destroy(myhtml->thread_batch , NULL, NULL, true);
-
+
return *status;
}
mystatus_t myhtml_create_stream_and_batch(myhtml_t* myhtml, size_t stream_count, size_t batch_count)
{
mystatus_t status;
-
+
/* stream */
if(myhtml_stream_create(myhtml, &status, stream_count, 0)) {
return status;
}
-
+
/* batch */
if(myhtml_batch_create(myhtml, &status, batch_count, stream_count)) {
myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true);
return status;
}
-
+
return status;
}
#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */
@@ -106,80 +106,80 @@ myhtml_t * myhtml_create(void)
mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size)
{
mystatus_t status;
-
+
myhtml->opt = opt;
myhtml_init_marker(myhtml);
-
+
status = myhtml_tokenizer_state_init(myhtml);
if(status)
return status;
-
+
status = myhtml_rules_init(myhtml);
#ifdef MyCORE_BUILD_WITHOUT_THREADS
-
+
myhtml->thread_stream = NULL;
myhtml->thread_batch = NULL;
myhtml->thread_total = 0;
-
+
#else /* if undef MyCORE_BUILD_WITHOUT_THREADS */
if(status)
return status;
-
+
switch (opt) {
case MyHTML_OPTIONS_PARSE_MODE_SINGLE:
if((status = myhtml_create_stream_and_batch(myhtml, 0, 0)))
return status;
-
+
break;
-
+
case MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE:
if((status = myhtml_create_stream_and_batch(myhtml, 1, 0)))
return status;
-
+
myhtml->thread_stream->context = mythread_queue_list_create(&status);
status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_worker_stream, MyTHREAD_OPT_STOP);
-
+
break;
-
+
default:
// default MyHTML_OPTIONS_PARSE_MODE_SEPARATELY
if(thread_count < 2)
thread_count = 2;
-
+
if((status = myhtml_create_stream_and_batch(myhtml, 1, (thread_count - 1))))
return status;
-
+
myhtml->thread_stream->context = mythread_queue_list_create(&status);
myhtml->thread_batch->context = myhtml->thread_stream->context;
-
+
status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_stream, MyTHREAD_OPT_STOP);
if(status)
return status;
-
+
for(size_t i = 0; i < myhtml->thread_batch->entries_size; i++) {
status = myhread_entry_create(myhtml->thread_batch, mythread_function_queue_batch, myhtml_parser_worker, MyTHREAD_OPT_STOP);
-
+
if(status)
return status;
}
-
+
break;
}
-
+
myhtml->thread_total = thread_count;
-
+
myhtml->thread_list[0] = myhtml->thread_stream;
myhtml->thread_list[1] = myhtml->thread_batch;
myhtml->thread_list[2] = NULL;
-
+
#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */
-
+
if(status)
return status;
-
+
myhtml_clean(myhtml);
-
+
return status;
}
@@ -192,33 +192,33 @@ myhtml_t* myhtml_destroy(myhtml_t* myhtml)
{
if(myhtml == NULL)
return NULL;
-
+
myhtml_destroy_marker(myhtml);
-
+
#ifndef MyCORE_BUILD_WITHOUT_THREADS
if(myhtml->thread_stream) {
mythread_queue_list_t* queue_list = myhtml->thread_stream->context;
if(queue_list)
mythread_queue_list_wait_for_done(myhtml->thread_stream, queue_list);
-
+
myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, mythread_callback_quit, NULL, true);
-
+
if(myhtml->thread_batch)
myhtml->thread_batch = mythread_destroy(myhtml->thread_batch, mythread_callback_quit, NULL, true);
-
+
if(queue_list)
mythread_queue_list_destroy(queue_list);
}
#endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */
-
+
myhtml_tokenizer_state_destroy(myhtml);
-
+
if(myhtml->insertion_func)
mycore_free(myhtml->insertion_func);
-
+
mycore_free(myhtml);
-
+
return NULL;
}
@@ -227,13 +227,13 @@ mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char*
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
myhtml_encoding_set(tree, encoding);
mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size);
-
+
if(status)
return status;
-
+
return myhtml_tokenizer_end(tree);
}
@@ -242,22 +242,22 @@ mystatus_t myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, con
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
if(tag_id == 0)
tag_id = MyHTML_TAG_DIV;
-
+
if(ns == 0)
ns = MyHTML_NAMESPACE_HTML;
-
+
if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL)
return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT;
-
+
myhtml_encoding_set(tree, encoding);
mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size);
-
+
if(status)
return status;
-
+
return myhtml_tokenizer_end(tree);
}
@@ -266,16 +266,16 @@ mystatus_t myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE;
-
+
myhtml_encoding_set(tree, encoding);
-
+
mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size);
-
+
if(status)
return status;
-
+
return myhtml_tokenizer_end(tree);
}
@@ -284,25 +284,25 @@ mystatus_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encodi
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
if(tag_id == 0)
tag_id = MyHTML_TAG_DIV;
-
+
if(ns == 0)
ns = MyHTML_NAMESPACE_HTML;
-
+
tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE;
-
+
if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL)
return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT;
-
+
myhtml_encoding_set(tree, encoding);
-
+
mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size);
-
+
if(status)
return status;
-
+
return myhtml_tokenizer_end(tree);
}
@@ -311,7 +311,7 @@ mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
return myhtml_tokenizer_chunk(tree, html, html_size);
}
@@ -320,16 +320,16 @@ mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, si
if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) {
myhtml_tree_clean(tree);
}
-
+
if(tag_id == 0)
tag_id = MyHTML_TAG_DIV;
-
+
if(ns == 0)
ns = MyHTML_NAMESPACE_HTML;
-
+
if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL)
return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT;
-
+
return myhtml_tokenizer_chunk(tree, html, html_size);
}
@@ -337,7 +337,7 @@ mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size
{
if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0)
tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE;
-
+
return myhtml_parse_chunk(tree, html, html_size);
}
@@ -345,7 +345,7 @@ mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* h
{
if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0)
tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE;
-
+
return myhtml_parse_chunk_fragment(tree, html, html_size, tag_id, ns);
}
@@ -358,7 +358,7 @@ void myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding)
{
if(encoding >= MyENCODING_LAST_ENTRY)
return;
-
+
tree->encoding_usereq = encoding;
tree->encoding = encoding;
}
@@ -378,22 +378,22 @@ mystatus_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_node_t
if(node->tag_id == tag_id) {
collection->list[ collection->length ] = node;
collection->length++;
-
+
if(collection->length >= collection->size)
{
mystatus_t mystatus = myhtml_collection_check_size(collection, 1024, 0);
-
+
if(mystatus != MyHTML_STATUS_OK)
return mystatus;
}
}
-
+
if(node->child)
myhtml_get_nodes_by_tag_id_in_scope_find_recursion(node->child, collection, tag_id);
-
+
node = node->next;
}
-
+
return MyHTML_STATUS_OK;
}
@@ -401,28 +401,28 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, m
{
if(node == NULL)
return NULL;
-
+
mystatus_t mystatus = MyHTML_STATUS_OK;
-
+
if(collection == NULL) {
collection = myhtml_collection_create(1024, &mystatus);
}
-
+
if(mystatus) {
if(status)
*status = mystatus;
-
+
return collection;
}
-
+
if(node->child)
mystatus = myhtml_get_nodes_by_tag_id_in_scope_find_recursion(node->child, collection, tag_id);
-
+
collection->list[collection->length] = NULL;
-
+
if(status)
*status = mystatus;
-
+
return collection;
}
@@ -436,13 +436,13 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_col
{
if(collection == NULL) {
collection = myhtml_collection_create(1024, NULL);
-
+
if(collection == NULL)
return NULL;
}
-
+
myhtml_tree_node_t *node = tree->node_html;
-
+
while(node)
{
if(node->tag_id == tag_id)
@@ -454,41 +454,41 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_col
else {
if(status)
*status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
-
+
return collection;
}
}
-
+
if(node->child)
node = node->child;
else {
while(node != tree->node_html && node->next == NULL)
node = node->parent;
-
+
if(node == tree->node_html)
break;
-
+
node = node->next;
}
}
-
+
if(myhtml_collection_check_size(collection, 1, 1024) == MyHTML_STATUS_OK) {
collection->list[ collection->length ] = NULL;
}
else if(status) {
*status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
}
-
+
return collection;
}
myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, mystatus_t *status)
{
const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, html, length);
-
+
if(tag_ctx == NULL)
return NULL;
-
+
return myhtml_get_nodes_by_tag_id(tree, collection, tag_ctx->id, status);
}
@@ -506,7 +506,7 @@ myhtml_tree_node_t * myhtml_node_first(myhtml_tree_t* tree)
// document -> html
return tree->document->child;
}
-
+
return NULL;
}
@@ -538,10 +538,10 @@ myhtml_tree_node_t * myhtml_node_last_child(myhtml_tree_node_t *node)
myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, enum myhtml_namespace ns)
{
myhtml_tree_node_t *node = myhtml_tree_node_create(tree);
-
+
node->tag_id = tag_id;
node->ns = ns;
-
+
return node;
}
@@ -569,9 +569,9 @@ myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtm
{
if(target == NULL || node == NULL)
return NULL;
-
+
myhtml_tree_node_insert_before(target, node);
-
+
return node;
}
@@ -579,9 +579,9 @@ myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml
{
if(target == NULL || node == NULL)
return NULL;
-
+
myhtml_tree_node_insert_after(target, node);
-
+
return node;
}
@@ -589,9 +589,9 @@ myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t *target, myhtml
{
if(target == NULL || node == NULL)
return NULL;
-
+
myhtml_tree_node_add_child(target, node);
-
+
return node;
}
@@ -599,15 +599,15 @@ myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t
{
if(target == NULL || node == NULL)
return NULL;
-
+
enum myhtml_tree_insertion_mode mode;
-
+
target->tree->foster_parenting = true;
target = myhtml_tree_appropriate_place_inserting_in_tree(target, &mode);
target->tree->foster_parenting = false;
-
+
myhtml_tree_node_insert_by_mode(target, node, mode);
-
+
return node;
}
@@ -615,19 +615,19 @@ mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex
{
if(node == NULL)
return NULL;
-
+
if(encoding >= MyENCODING_LAST_ENTRY)
return NULL;
-
+
myhtml_tree_t* tree = node->tree;
-
+
if(node->token == NULL) {
node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
-
+
if(node->token == NULL)
return NULL;
}
-
+
if(node->token->str.data == NULL) {
mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2));
}
@@ -639,17 +639,17 @@ mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex
else
node->token->str.length = 0;
}
-
+
if(encoding != MyENCODING_UTF_8) {
myencoding_string_append(&node->token->str, text, length, encoding);
}
else {
mycore_string_append(&node->token->str, text, length);
}
-
+
node->token->raw_begin = 0;
node->token->raw_length = 0;
-
+
return &node->token->str;
}
@@ -657,19 +657,19 @@ mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con
{
if(node == NULL)
return NULL;
-
+
if(encoding >= MyENCODING_LAST_ENTRY)
return NULL;
-
+
myhtml_tree_t* tree = node->tree;
-
+
if(node->token == NULL) {
node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
-
+
if(node->token == NULL)
return NULL;
}
-
+
if(node->token->str.data == NULL) {
mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2));
}
@@ -681,19 +681,19 @@ mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con
else
node->token->str.length = 0;
}
-
+
myhtml_data_process_entry_t proc_entry;
myhtml_data_process_entry_clean(&proc_entry);
-
+
proc_entry.encoding = encoding;
myencoding_result_clean(&proc_entry.res);
-
+
myhtml_data_process(&proc_entry, &node->token->str, text, length);
myhtml_data_process_end(&proc_entry, &node->token->str);
-
+
node->token->raw_begin = 0;
node->token->raw_length = 0;
-
+
return &node->token->str;
}
@@ -721,18 +721,18 @@ const char * myhtml_tag_name_by_id(myhtml_tree_t* tree, myhtml_tag_id_t tag_id,
{
if(length)
*length = 0;
-
+
if(tree == NULL || tree->tags == NULL)
return NULL;
-
+
const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, tag_id);
-
+
if(tag_ctx == NULL)
return NULL;
-
+
if(length)
*length = tag_ctx->name_length;
-
+
return tag_ctx->name;
}
@@ -740,12 +740,12 @@ myhtml_tag_id_t myhtml_tag_id_by_name(myhtml_tree_t* tree, const char *tag_name,
{
if(tree == NULL || tree->tags == NULL)
return MyHTML_TAG__UNDEF;
-
+
const myhtml_tag_context_t *ctx = myhtml_tag_get_by_name(tree->tags, tag_name, length);
-
+
if(ctx == NULL)
return MyHTML_TAG__UNDEF;
-
+
return ctx->id;
}
@@ -753,7 +753,7 @@ bool myhtml_node_is_close_self(myhtml_tree_node_t *node)
{
if(node->token)
return (node->token->type & MyHTML_TOKEN_TYPE_CLOSE_SELF);
-
+
return false;
}
@@ -761,7 +761,7 @@ myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node)
{
if(node->token)
return node->token->attr_first;
-
+
return NULL;
}
@@ -769,7 +769,7 @@ myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node)
{
if(node->token)
return node->token->attr_last;
-
+
return NULL;
}
@@ -779,13 +779,13 @@ const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length)
{
if(length)
*length = node->token->str.length;
-
+
return node->token->str.data;
}
-
+
if(length)
*length = 0;
-
+
return NULL;
}
@@ -793,23 +793,23 @@ mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node)
{
if(node && node->token)
return &node->token->str;
-
+
return NULL;
}
-myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node)
+myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node)
{
if(node && node->token)
return (myhtml_position_t){node->token->raw_begin, node->token->raw_length};
-
+
return (myhtml_position_t){0, 0};
}
-myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node)
+myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node)
{
if(node && node->token)
return (myhtml_position_t){node->token->element_begin, node->token->element_length};
-
+
return (myhtml_position_t){0, 0};
}
@@ -834,36 +834,36 @@ mystatus_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* node,
{
if(node->token && node->token->attr_first) {
myhtml_tree_attr_t* attr = node->token->attr_first;
-
+
while(attr) {
mycore_string_t* str_key = &attr->key;
-
+
if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0) {
collection->list[ collection->length ] = node;
-
+
collection->length++;
if(collection->length >= collection->size) {
mystatus_t status = myhtml_collection_check_size(collection, 1024, 0);
-
+
if(status)
return status;
}
}
-
+
attr = attr->next;
}
}
-
+
if(node->child) {
mystatus_t status = myhtml_get_nodes_by_attribute_key_recursion(node->child, collection, key, key_len);
-
+
if(status)
return status;
}
-
+
node = node->next;
}
-
+
return MyHTML_STATUS_OK;
}
@@ -871,19 +871,19 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myh
{
if(collection == NULL) {
collection = myhtml_collection_create(1024, status);
-
+
if((status && *status) || collection == NULL)
return NULL;
}
-
+
if(scope_node == NULL)
scope_node = tree->node_html;
-
+
mystatus_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(scope_node, collection, key, key_len);
-
+
if(rec_status && status)
*status = rec_status;
-
+
return collection;
}
@@ -897,14 +897,14 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(mycore_s
{
if(str->length < value_len)
return false;
-
+
const char *data = str->data;
-
+
if(mycore_strncmp(data, value, value_len) == 0) {
if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len)
return true;
}
-
+
for(size_t i = 1; (str->length - i) >= value_len; i++)
{
if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) {
@@ -914,7 +914,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(mycore_s
}
}
}
-
+
return false;
}
@@ -922,7 +922,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_begin(mycore_string_t* str, c
{
if(str->length < value_len)
return false;
-
+
return mycore_strncmp(str->data, value, value_len) == 0;
}
@@ -930,7 +930,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_end(mycore_string_t* str, con
{
if(str->length < value_len)
return false;
-
+
return mycore_strncmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0;
}
@@ -938,23 +938,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain(mycore_string_t* str,
{
if(str->length < value_len)
return false;
-
+
const char *data = str->data;
-
+
for(size_t i = 0; (str->length - i) >= value_len; i++)
{
if(mycore_strncmp(&data[i], value, value_len) == 0) {
return true;
}
}
-
+
return false;
}
bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(mycore_string_t* str, const char* value, size_t value_len)
{
const char *data = str->data;
-
+
if(str->length < value_len)
return false;
else if(str->length == value_len && mycore_strncmp(data, value, value_len) == 0) {
@@ -963,7 +963,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(mycore_strin
else if(mycore_strncmp(data, value, value_len) == 0 && data[value_len] == '-') {
return true;
}
-
+
return false;
}
@@ -977,14 +977,14 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(mycore
{
if(str->length < value_len)
return false;
-
+
const char *data = str->data;
-
+
if(mycore_strncasecmp(data, value, value_len) == 0) {
if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len)
return true;
}
-
+
for(size_t i = 1; (str->length - i) >= value_len; i++)
{
if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) {
@@ -994,7 +994,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(mycore
}
}
}
-
+
return false;
}
@@ -1002,7 +1002,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_begin_i(mycore_string_t* str,
{
if(str->length < value_len)
return false;
-
+
return mycore_strncasecmp(str->data, value, value_len) == 0;
}
@@ -1010,7 +1010,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_end_i(mycore_string_t* str, c
{
if(str->length < value_len)
return false;
-
+
return mycore_strncasecmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0;
}
@@ -1018,23 +1018,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(mycore_string_t* st
{
if(str->length < value_len)
return false;
-
+
const char *data = str->data;
-
+
for(size_t i = 0; (str->length - i) >= value_len; i++)
{
if(mycore_strncasecmp(&data[i], value, value_len) == 0) {
return true;
}
}
-
+
return false;
}
bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(mycore_string_t* str, const char* value, size_t value_len)
{
const char *data = str->data;
-
+
if(str->length < value_len)
return false;
else if(str->length == value_len && mycore_strncasecmp(data, value, value_len) == 0) {
@@ -1043,7 +1043,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(mycore_str
else if(mycore_strncasecmp(data, value, value_len) == 0 && data[value_len] == '-') {
return true;
}
-
+
return false;
}
@@ -1056,36 +1056,36 @@ mystatus_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t* nod
{
if(node->token && node->token->attr_first) {
myhtml_tree_attr_t* attr = node->token->attr_first;
-
+
while(attr) {
mycore_string_t* str = &attr->value;
-
+
if(func_eq(str, value, value_len)) {
collection->list[ collection->length ] = node;
-
+
collection->length++;
if(collection->length >= collection->size) {
mystatus_t status = myhtml_collection_check_size(collection, 1024, 0);
-
+
if(status)
return status;
}
}
-
+
attr = attr->next;
}
}
-
+
if(node->child) {
mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion(node->child, collection, func_eq, value, value_len);
-
+
if(status)
return status;
}
-
+
node = node->next;
}
-
+
return MyHTML_STATUS_OK;
}
@@ -1098,41 +1098,41 @@ mystatus_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_node
{
if(node->token && node->token->attr_first) {
myhtml_tree_attr_t* attr = node->token->attr_first;
-
+
while(attr) {
mycore_string_t* str_key = &attr->key;
mycore_string_t* str = &attr->value;
-
+
if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0)
{
if(func_eq(str, value, value_len)) {
collection->list[ collection->length ] = node;
-
+
collection->length++;
if(collection->length >= collection->size) {
mystatus_t status = myhtml_collection_check_size(collection, 1024, 0);
-
+
if(status)
return status;
}
}
}
-
+
attr = attr->next;
}
}
-
+
if(node->child) {
mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node->child, collection, func_eq,
key, key_len, value, value_len);
-
+
if(status)
return status;
}
-
+
node = node->next;
}
-
+
return MyHTML_STATUS_OK;
}
@@ -1144,24 +1144,24 @@ myhtml_collection_t * _myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree,
{
if(collection == NULL) {
collection = myhtml_collection_create(1024, status);
-
+
if((status && *status) || collection == NULL)
return NULL;
}
-
+
if(node == NULL)
node = tree->node_html;
-
+
mystatus_t rec_status;
-
+
if(key && key_len)
rec_status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node, collection, func_eq, key, key_len, value, value_len);
else
rec_status = myhtml_get_nodes_by_attribute_value_recursion(node, collection, func_eq, value, value_len);
-
+
if(rec_status && status)
*status = rec_status;
-
+
return collection;
}
@@ -1176,7 +1176,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, m
myhtml_get_nodes_by_attribute_value_recursion_eq_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_eq,
key, key_len, value, value_len, status);
@@ -1193,7 +1193,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_whitespace_separated(m
myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated,
key, key_len, value, value_len, status);
@@ -1210,7 +1210,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *t
myhtml_get_nodes_by_attribute_value_recursion_begin_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_begin,
key, key_len, value, value_len, status);
@@ -1227,7 +1227,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tre
myhtml_get_nodes_by_attribute_value_recursion_end_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_end,
key, key_len, value, value_len, status);
@@ -1244,7 +1244,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t
myhtml_get_nodes_by_attribute_value_recursion_contain_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_contain,
key, key_len, value, value_len, status);
@@ -1261,7 +1261,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtm
myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i,
key, key_len, value, value_len, status);
}
-
+
return _myhtml_get_nodes_by_attribute_value(tree, collection, node,
myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated,
key, key_len, value, value_len, status);
@@ -1296,13 +1296,13 @@ const char * myhtml_attribute_key(myhtml_tree_attr_t *attr, size_t *length)
{
if(length)
*length = attr->key.length;
-
+
return attr->key.data;
}
-
+
if(length)
*length = 0;
-
+
return NULL;
}
@@ -1312,13 +1312,13 @@ const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length)
{
if(length)
*length = attr->value.length;
-
+
return attr->value.data;
}
-
+
if(length)
*length = 0;
-
+
return NULL;
}
@@ -1326,7 +1326,7 @@ mycore_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr)
{
if(attr)
return &attr->key;
-
+
return NULL;
}
@@ -1334,7 +1334,7 @@ mycore_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr)
{
if(attr)
return &attr->value;
-
+
return NULL;
}
@@ -1342,7 +1342,7 @@ myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const cha
{
if(node == NULL || node->token == NULL)
return NULL;
-
+
return myhtml_token_attr_by_name(node->token, key, key_len);
}
@@ -1350,16 +1350,16 @@ myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *
{
if(node == NULL)
return NULL;
-
+
myhtml_tree_t *tree = node->tree;
-
+
if(node->token == NULL) {
node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id);
-
+
if(node->token == NULL)
return NULL;
}
-
+
return myhtml_token_node_attr_append_with_convert_encoding(tree->token, node->token, key, key_len,
value, value_len, tree->mcasync_rules_token_id, encoding);
}
@@ -1368,7 +1368,7 @@ myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tr
{
if(node == NULL || node->token == NULL)
return NULL;
-
+
return myhtml_token_attr_remove(node->token, attr);
}
@@ -1376,7 +1376,7 @@ myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, co
{
if(node == NULL || node->token == NULL)
return NULL;
-
+
return myhtml_token_attr_remove_by_name(node->token, key, key_len);
}
@@ -1384,7 +1384,7 @@ void myhtml_attribute_delete(myhtml_tree_t *tree, myhtml_tree_node_t *node, myht
{
if(node == NULL || node->token == NULL)
return;
-
+
myhtml_token_attr_remove(node->token, attr);
myhtml_attribute_free(tree, attr);
}
@@ -1395,7 +1395,7 @@ void myhtml_attribute_free(myhtml_tree_t *tree, myhtml_tree_attr_t *attr)
mchar_async_free(attr->key.mchar, attr->key.node_idx, attr->key.data);
if(attr->value.data)
mchar_async_free(attr->value.mchar, attr->value.node_idx, attr->value.data);
-
+
mcobject_async_free(tree->token->attr_obj, attr);
}
@@ -1403,7 +1403,7 @@ myhtml_position_t myhtml_attribute_key_raw_position(myhtml_tree_attr_t *attr)
{
if(attr)
return (myhtml_position_t){attr->raw_key_begin, attr->raw_key_length};
-
+
return (myhtml_position_t){0, 0};
}
@@ -1411,7 +1411,7 @@ myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr)
{
if(attr)
return (myhtml_position_t){attr->raw_value_begin, attr->raw_value_length};
-
+
return (myhtml_position_t){0, 0};
}
@@ -1421,30 +1421,30 @@ myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr)
myhtml_collection_t * myhtml_collection_create(size_t size, mystatus_t *status)
{
myhtml_collection_t *collection = (myhtml_collection_t*)mycore_malloc(sizeof(myhtml_collection_t));
-
+
if(collection == NULL) {
if(status)
*status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
-
+
return NULL;
}
-
+
collection->size = size;
collection->length = 0;
collection->list = (myhtml_tree_node_t **)mycore_malloc(sizeof(myhtml_tree_node_t*) * size);
-
+
if(collection->list == NULL) {
mycore_free(collection);
-
+
if(status)
*status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
-
+
return NULL;
}
-
+
if(status)
*status = MyHTML_STATUS_OK;
-
+
return collection;
}
@@ -1454,7 +1454,7 @@ mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t
{
size_t tmp_size = collection->length + need + upto_length + 1;
myhtml_tree_node_t **tmp = (myhtml_tree_node_t **)mycore_realloc(collection->list, sizeof(myhtml_tree_node_t*) * tmp_size);
-
+
if(tmp) {
collection->size = tmp_size;
collection->list = tmp;
@@ -1462,7 +1462,7 @@ mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t
else
return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
}
-
+
return MyHTML_STATUS_OK;
}
@@ -1476,12 +1476,12 @@ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection)
{
if(collection == NULL)
return NULL;
-
+
if(collection->list)
mycore_free(collection->list);
-
+
mycore_free(collection);
-
+
return NULL;
}
@@ -1491,84 +1491,84 @@ mystatus_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node
// TODO: need refactoring this code
// too many conditions
mythread_queue_node_t *qnode = tree->current_qnode;
-
+
if(tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN) {
if(token && token->tag_id == MyHTML_TAG__TEXT && token->type & MyHTML_TOKEN_TYPE_WHITESPACE)
{
myhtml_token_node_clean(token);
token->raw_begin = token->element_begin = (tree->global_offset + begin);
-
+
return MyHTML_STATUS_OK;
}
}
-
+
#ifndef MyCORE_BUILD_WITHOUT_THREADS
-
+
if(tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) {
if(qnode && token) {
qnode->args = token;
-
+
myhtml_parser_worker(0, qnode);
myhtml_parser_stream(0, qnode);
}
-
+
tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL);
}
else {
if(qnode)
qnode->args = token;
-
+
tree->current_qnode = mythread_queue_node_malloc_round(tree->myhtml->thread_stream, tree->queue_entry, NULL);
}
-
+
#else
-
+
if(qnode && token) {
qnode->args = token;
-
+
myhtml_parser_worker(0, qnode);
myhtml_parser_stream(0, qnode);
}
-
+
tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL);
-
+
#endif /* MyCORE_BUILD_WITHOUT_THREADS */
-
+
if(tree->current_qnode == NULL)
return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
-
+
tree->current_qnode->context = tree;
tree->current_qnode->prev = qnode;
-
+
if(qnode && token)
myhtml_tokenizer_calc_current_namespace(tree, token);
-
+
tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id);
if(tree->current_token_node == NULL)
return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION;
-
+
tree->current_token_node->raw_begin = tree->current_token_node->element_begin = (tree->global_offset + begin);
-
+
return MyHTML_STATUS_OK;
}
bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size)
{
size_t i = 0;
-
+
for(;;) {
if(i == size)
return true;
-
+
if((const unsigned char)(to_lowercase[i] > 0x40 && to_lowercase[i] < 0x5b ?
(to_lowercase[i]|0x60) : to_lowercase[i]) != (const unsigned char)ab[i])
{
return false;
}
-
+
i++;
}
-
+
return false;
}
@@ -1576,7 +1576,7 @@ bool myhtml_is_html_node(myhtml_tree_node_t *node, myhtml_tag_id_t tag_id)
{
if(node == NULL)
return false;
-
+
return node->tag_id == tag_id && node->ns == MyHTML_NAMESPACE_HTML;
}
@@ -1585,7 +1585,3 @@ myhtml_version_t myhtml_version(void)
{
return (myhtml_version_t){MyHTML_VERSION_MAJOR, MyHTML_VERSION_MINOR, MyHTML_VERSION_PATCH};
}
-
-
-
-
diff --git a/source/myhtml/token.c b/source/myhtml/token.c
index 0828dfc..bc28dac 100644
--- a/source/myhtml/token.c
+++ b/source/myhtml/token.c
@@ -1,20 +1,20 @@
/*
Copyright (C) 2015-2017 Alexander Borisov
-
+
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
-
+
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
-
+
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
+
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
@@ -101,36 +101,36 @@ myhtml_token_t * myhtml_token_create(myhtml_tree_t* tree, size_t size)
{
if(size == 0)
size = 4096;
-
+
myhtml_token_t* token = (myhtml_token_t*)mycore_malloc(sizeof(myhtml_token_t));
-
+
if(token == NULL)
return NULL;
-
+
token->nodes_obj = mcobject_async_create();
-
+
if(token->nodes_obj == NULL) {
mycore_free(token);
return NULL;
}
-
+
token->attr_obj = mcobject_async_create();
-
+
if(token->attr_obj == NULL) {
mycore_free(token->nodes_obj);
mycore_free(token);
-
+
return NULL;
}
-
+
mcobject_async_init(token->nodes_obj, 128, size, sizeof(myhtml_token_node_t));
mcobject_async_init(token->attr_obj, 128, size, sizeof(myhtml_token_attr_t));
-
+
token->mcasync_token_id = mcobject_async_node_add(token->nodes_obj, NULL);
token->mcasync_attr_id = mcobject_async_node_add(token->attr_obj, NULL);
-
+
token->tree = tree;
-
+
return token;
}
@@ -150,15 +150,15 @@ myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token)
{
if(token == NULL)
return NULL;
-
+
if(token->nodes_obj)
token->nodes_obj = mcobject_async_destroy(token->nodes_obj, 1);
-
+
if(token->attr_obj)
token->attr_obj = mcobject_async_destroy(token->attr_obj, 1);
-
+
mycore_free(token);
-
+
return NULL;
}
@@ -167,7 +167,7 @@ myhtml_token_node_t * myhtml_token_node_create(myhtml_token_t* token, size_t asy
myhtml_token_node_t *token_node = (myhtml_token_node_t*)mcobject_async_malloc(token->nodes_obj, async_node_id, NULL);
if(token_node == NULL)
return NULL;
-
+
myhtml_token_node_clean(token_node);
return token_node;
}
@@ -176,7 +176,7 @@ void myhtml_token_node_clean(myhtml_token_node_t* node)
{
memset(node, 0, sizeof(myhtml_token_node_t));
node->type = MyHTML_TOKEN_TYPE_OPEN|MyHTML_TOKEN_TYPE_WHITESPACE;
-
+
mycore_string_clean_all(&node->str);
}
@@ -185,7 +185,7 @@ myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t asy
myhtml_token_attr_t *attr_node = mcobject_async_malloc(token->attr_obj, async_node_id, NULL);
if(attr_node == NULL)
return NULL;
-
+
myhtml_token_attr_clean(attr_node);
return attr_node;
}
@@ -194,7 +194,7 @@ void myhtml_token_attr_clean(myhtml_token_attr_t* attr)
{
memset(attr, 0, sizeof(myhtml_token_attr_t));
attr->ns = MyHTML_NAMESPACE_HTML;
-
+
mycore_string_clean_all(&attr->key);
mycore_string_clean_all(&attr->value);
}
@@ -204,19 +204,19 @@ myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node)
return token_node->tag_id;
}
-myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node)
+myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node)
{
if(token_node)
return (myhtml_position_t){token_node->raw_begin, token_node->raw_length};
-
+
return (myhtml_position_t){0, 0};
}
-myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node)
+myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node)
{
if(token_node)
return (myhtml_position_t){token_node->element_begin, token_node->element_length};
-
+
return (myhtml_position_t){0, 0};
}
@@ -234,7 +234,7 @@ const char * myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *len
{
if(length)
*length = token_node->str.length;
-
+
return token_node->str.data;
}
@@ -269,13 +269,13 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke
{
if(node == NULL)
return NULL;
-
+
myhtml_tree_t* tree = token->tree;
myhtml_token_node_t* new_node = myhtml_token_node_create(token, token_thread_idx);
-
+
if(new_node == NULL)
return NULL;
-
+
new_node->tag_id = node->tag_id;
new_node->type = node->type;
new_node->attr_first = NULL;
@@ -284,17 +284,17 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke
new_node->raw_length = node->raw_length;
new_node->element_begin = node->element_begin;
new_node->element_length = node->element_length;
-
+
mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size);
myhtml_token_node_attr_copy(token, node, new_node, attr_thread_idx);
-
+
return new_node;
}
void myhtml_token_node_text_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* text, size_t text_len)
{
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &dest->str, (text_len + 2));
-
+
mycore_string_t* string = &dest->str;
mycore_string_append(string, text, text_len);
}
@@ -305,36 +305,36 @@ myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtm
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
-
+
if(key_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1));
mycore_string_append_lowercase(&new_attr->key, key, key_len);
}
else
mycore_string_clean_all(&new_attr->key);
-
+
if(value_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1));
mycore_string_append(&new_attr->value, value, value_len);
}
else
mycore_string_clean_all(&new_attr->value);
-
+
if(dest->attr_first == NULL) {
new_attr->prev = 0;
-
+
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
-
+
dest->attr_last = new_attr;
}
-
+
new_attr->ns = MyHTML_NAMESPACE_HTML;
-
+
return new_attr;
}
@@ -345,10 +345,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
-
+
if(key_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1));
-
+
if(encoding == MyENCODING_UTF_8)
mycore_string_append_lowercase(&new_attr->key, key, key_len);
else
@@ -356,10 +356,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
}
else
mycore_string_clean_all(&new_attr->key);
-
+
if(value_len) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1));
-
+
if(encoding == MyENCODING_UTF_8)
mycore_string_append(&new_attr->value, value, value_len);
else
@@ -367,35 +367,35 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml
}
else
mycore_string_clean_all(&new_attr->value);
-
+
if(dest->attr_first == NULL) {
new_attr->prev = 0;
-
+
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
-
+
dest->attr_last = new_attr;
}
-
+
new_attr->ns = MyHTML_NAMESPACE_HTML;
-
+
return new_attr;
}
void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx)
{
myhtml_token_attr_t* attr = target->attr_first;
-
+
while (attr)
{
if(attr->key.length && myhtml_token_attr_by_name(dest, attr->key.data, attr->key.length) == NULL) {
myhtml_token_attr_copy(token, attr, dest, thread_idx);
}
-
+
attr = attr->next;
}
}
@@ -403,7 +403,7 @@ void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_
void myhtml_token_node_attr_copy(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx)
{
myhtml_token_attr_t* attr = target->attr_first;
-
+
while (attr)
{
myhtml_token_attr_copy(token, attr, dest, thread_idx);
@@ -415,36 +415,36 @@ bool myhtml_token_attr_copy(myhtml_token_t* token, myhtml_token_attr_t* attr, my
{
myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL);
new_attr->next = 0;
-
+
if(attr->key.length) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (attr->key.length + 1));
mycore_string_append_lowercase(&new_attr->key, attr->key.data, attr->key.length);
}
else
mycore_string_clean_all(&new_attr->key);
-
+
if(attr->value.length) {
mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (attr->value.length + 1));
mycore_string_append(&new_attr->value, attr->value.data, attr->value.length);
}
else
mycore_string_clean_all(&new_attr->value);
-
+
if(dest->attr_first == NULL) {
new_attr->prev = 0;
-
+
dest->attr_first = new_attr;
dest->attr_last = new_attr;
}
else {
dest->attr_last->next = new_attr;
new_attr->prev = dest->attr_last;
-
+
dest->attr_last = new_attr;
}
-
+
new_attr->ns = attr->ns;
-
+
return true;
}
@@ -452,7 +452,7 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke
const char* key, size_t key_size, const char* value, size_t value_size)
{
myhtml_token_attr_t* attr = target->attr_first;
-
+
while (attr)
{
if(attr->key.length == key_size && attr->value.length == value_size)
@@ -464,10 +464,10 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke
return NULL;
}
}
-
+
attr = attr->next;
}
-
+
return NULL;
}
@@ -475,7 +475,7 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml
const char* key, size_t key_size, const char* value, size_t value_size)
{
myhtml_token_attr_t* attr = target->attr_first;
-
+
while (attr)
{
if(attr->key.length == key_size && attr->value.length == value_size)
@@ -487,17 +487,17 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml
return NULL;
}
}
-
+
attr = attr->next;
}
-
+
return NULL;
}
void myhtml_token_adjust_mathml_attributes(myhtml_token_node_t* target)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, "definitionurl", 13);
-
+
if(attr) {
memcpy(attr->key.data, "definitionURL", 13);
}
@@ -512,10 +512,10 @@ void _myhtml_token_create_copy_srt(myhtml_token_t* token, const char* from, size
void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_tree_doctype_t* return_doctype)
{
myhtml_token_attr_t* attr = target->attr_first;
-
+
if(attr && attr->key.length) {
_myhtml_token_create_copy_srt(token, attr->key.data, attr->key.length, &return_doctype->attr_name);
-
+
if(mycore_strcmp("html", return_doctype->attr_name))
return_doctype->is_html = false;
else
@@ -523,41 +523,41 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
}
else {
return_doctype->is_html = false;
-
+
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_name);
-
+
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
-
+
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
-
+
return;
}
-
+
attr = attr->next;
-
+
if(attr && attr->value.length)
{
if(mycore_strcasecmp(attr->value.data, "PUBLIC") == 0)
{
// try see public
attr = attr->next;
-
+
if(attr && attr->value.length) {
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_public);
-
+
// try see system
attr = attr->next;
-
+
if(attr && attr->value.length)
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system);
else {
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
-
+
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_system);
}
}
@@ -565,7 +565,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
-
+
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@@ -574,7 +574,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
else if(mycore_strncasecmp(attr->value.data, "SYSTEM", attr->value.length) == 0)
{
attr = attr->next;
-
+
if(attr && attr->value.length) {
_myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_public);
_myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system);
@@ -583,7 +583,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
-
+
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@@ -593,7 +593,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no
if(return_doctype->attr_public)
mycore_free(return_doctype->attr_public);
return_doctype->attr_public = NULL;
-
+
if(return_doctype->attr_system)
mycore_free(return_doctype->attr_system);
return_doctype->attr_system = NULL;
@@ -617,7 +617,7 @@ bool myhtml_token_doctype_check_xhtml_1_0(myhtml_tree_doctype_t* return_doctype)
{
if(return_doctype->attr_system == NULL)
return true;
-
+
return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.0 Strict//EN") &&
mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
}
@@ -626,7 +626,7 @@ bool myhtml_token_doctype_check_xhtml_1_1(myhtml_tree_doctype_t* return_doctype)
{
if(return_doctype->attr_system == NULL)
return true;
-
+
return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.1//EN") &&
mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
}
@@ -635,19 +635,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my
{
if(return_doctype == NULL)
return false;
-
+
myhtml_token_strict_doctype_by_token(token, target, return_doctype);
-
+
if(return_doctype->attr_name == NULL)
return false;
-
+
if((return_doctype->is_html ||
return_doctype->attr_public ||
(return_doctype->attr_system && mycore_strcmp(return_doctype->attr_system, "about:legacy-compat"))))
{
if(return_doctype->attr_public == NULL)
return false;
-
+
if(return_doctype->is_html &&
myhtml_token_doctype_check_html_4_0(return_doctype) &&
myhtml_token_doctype_check_html_4_01(return_doctype) &&
@@ -657,19 +657,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my
return false;
}
}
-
+
return true;
}
void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target)
{
size_t count = sizeof(myhtml_token_attr_svg_replacement) / sizeof(myhtml_token_replacement_entry_t);
-
+
for (size_t i = 0; i < count; i++)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_svg_replacement[i].from,
myhtml_token_attr_svg_replacement[i].from_size);
-
+
if(attr) {
mycore_string_clean(&attr->key);
mycore_string_append(&attr->key, myhtml_token_attr_svg_replacement[i].to,
@@ -681,17 +681,17 @@ void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target)
void myhtml_token_adjust_foreign_attributes(myhtml_token_node_t* target)
{
size_t count = sizeof(myhtml_token_attr_namespace_replacement) / sizeof(myhtml_token_namespace_replacement_t);
-
+
for (size_t i = 0; i < count; i++)
{
myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_namespace_replacement[i].from,
myhtml_token_attr_namespace_replacement[i].from_size);
-
+
if(attr) {
mycore_string_clean(&attr->key);
mycore_string_append(&attr->key, myhtml_token_attr_namespace_replacement[i].to,
myhtml_token_attr_namespace_replacement[i].to_size);
-
+
attr->ns = myhtml_token_attr_namespace_replacement[i].ns;
}
}
@@ -701,10 +701,10 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t*
{
if(target == NULL || dest == NULL)
return false;
-
+
myhtml_token_attr_t* target_attr = target->attr_first;
myhtml_token_attr_t* dest_attr = dest->attr_first;
-
+
while (target_attr && dest_attr)
{
if(target_attr->key.length == dest_attr->key.length &&
@@ -712,37 +712,37 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t*
{
if(mycore_strcmp(target_attr->key.data, dest_attr->key.data) != 0)
break;
-
+
if(mycore_strcasecmp(target_attr->value.data, dest_attr->value.data) != 0)
break;
}
else
break;
-
+
target_attr = target_attr->next;
dest_attr = dest_attr->next;
}
-
+
if(target_attr == NULL && dest_attr == NULL)
return true;
-
+
return false;
}
myhtml_token_attr_t * myhtml_token_attr_by_name(myhtml_token_node_t* node, const char* name, size_t name_length)
{
myhtml_token_attr_t* attr = node->attr_first;
-
+
while (attr)
{
if(name_length == attr->key.length) {
if(mycore_strcmp(attr->key.data, name) == 0)
break;
}
-
+
attr = attr->next;
}
-
+
return attr;
}
@@ -751,24 +751,24 @@ void myhtml_token_delete(myhtml_token_t* token, myhtml_token_node_t* node)
if(node->str.data && node->str.mchar) {
mchar_async_free(node->str.mchar, node->str.node_idx, node->str.data);
}
-
+
mcobject_async_free(token->nodes_obj, node);
}
void myhtml_token_attr_delete_all(myhtml_token_t* token, myhtml_token_node_t* node)
{
myhtml_token_attr_t* attr = node->attr_first;
-
+
while (attr)
{
if(attr->key.data && attr->key.mchar) {
mchar_async_free(attr->key.mchar, attr->key.node_idx, attr->key.data);
}
-
+
if(attr->value.data && attr->value.mchar) {
mchar_async_free(attr->value.mchar, attr->value.node_idx, attr->value.data);
}
-
+
attr = attr->next;
}
}
@@ -783,18 +783,18 @@ myhtml_token_attr_t * myhtml_token_attr_remove(myhtml_token_node_t* node, myhtml
else {
node->attr_first = attr->next;
}
-
+
if(attr->next) {
attr->next->prev = attr->prev;
}
else {
node->attr_last = attr->prev;
}
-
+
attr->next = NULL;
attr->prev = NULL;
}
-
+
return attr;
}
@@ -807,13 +807,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
{
myhtml_token_node_wait_for_done(tree->token, token_to);
myhtml_token_node_wait_for_done(tree->token, token_from);
-
+
mycore_string_t *string1 = &token_to->str;
mycore_string_t *string2 = &token_from->str;
-
+
token_to->raw_begin = 0;
token_to->raw_length = 0;
-
+
if(token_to->str.node_idx == tree->mchar_node_id)
{
if(cp_reverse) {
@@ -822,7 +822,7 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
else {
mycore_string_copy(string1, string2);
}
-
+
return token_to;
}
if(token_from->str.node_idx == tree->mchar_node_id)
@@ -833,13 +833,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
else {
mycore_string_copy(string1, string2);
}
-
+
return token_from;
}
else {
mycore_string_t string_base;
mycore_string_init(tree->mchar, tree->mchar_node_id, &string_base, (string1->length + string2->length + 2));
-
+
if(cp_reverse) {
mycore_string_copy(&string_base, string2);
mycore_string_copy(&string_base, string1);
@@ -848,22 +848,21 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree,
mycore_string_copy(&string_base, string1);
mycore_string_copy(&string_base, string2);
}
-
+
token_to->str = string_base;
}
-
+
return token_to;
}
void myhtml_token_set_replacement_character_for_null_token(myhtml_tree_t* tree, myhtml_token_node_t* node)
{
myhtml_token_node_wait_for_done(tree->token, node);
-
+
mycore_string_t new_str;
mycore_string_init(tree->mchar, tree->mchar_node_id, &new_str, (node->str.length + 2));
-
+
mycore_string_append_with_replacement_null_characters(&new_str, node->str.data, node->str.length);
-
+
node->str = new_str;
}
-