diff --git a/examples/myhtml/tokenizer_colorize_high_level.c b/examples/myhtml/tokenizer_colorize_high_level.c index 05ee28f..aa3424a 100644 --- a/examples/myhtml/tokenizer_colorize_high_level.c +++ b/examples/myhtml/tokenizer_colorize_high_level.c @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2016 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -37,30 +37,30 @@ struct res_html load_html_file(const char* filename) fprintf(stderr, "Can't open html file: %s\n", filename); exit(EXIT_FAILURE); } - + if(fseek(fh, 0L, SEEK_END) != 0) { fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename); exit(EXIT_FAILURE); } - + long size = ftell(fh); - + if(fseek(fh, 0L, SEEK_SET) != 0) { fprintf(stderr, "Can't set position (fseek) in file: %s\n", filename); exit(EXIT_FAILURE); } - + if(size <= 0) { fprintf(stderr, "Can't get file size or file is empty: %s\n", filename); exit(EXIT_FAILURE); } - + char *html = (char*)malloc(size + 1); if(html == NULL) { fprintf(stderr, "Can't allocate mem for html file: %s\n", filename); exit(EXIT_FAILURE); } - + size_t nread = fread(html, 1, size, fh); if (nread != size) { fprintf(stderr, "could not read %ld bytes (" MyCORE_FMT_Z " bytes done)\n", size, nread); @@ -68,7 +68,7 @@ struct res_html load_html_file(const char* filename) } fclose(fh); - + struct res_html res = {html, (size_t)size}; return res; } @@ -77,10 +77,10 @@ void colorize_print(mycore_incoming_buffer_t *inc_buf, size_t begin, size_t leng { if(length) { inc_buf = mycore_incoming_buffer_find_by_position(inc_buf, begin); - + size_t between_begin = (begin - mycore_incoming_buffer_offset(inc_buf)); const char* between_data = mycore_incoming_buffer_data(inc_buf); - + printf("%s%.*s\e[0m", color, (int)length, &between_data[between_begin]); } } @@ -91,16 +91,16 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, { myhtml_position_t key_pos = myhtml_attribute_key_raw_position(attr); myhtml_position_t value_pos = myhtml_attribute_value_raw_position(attr); - + if(key_pos.length) { /* print */ if(last_pos < key_pos.begin) colorize_print(inc_buf, last_pos, (key_pos.begin - last_pos), "\e[31m"); - + /* print
*/ colorize_print(inc_buf, key_pos.begin, key_pos.length, "\e[33m"); - + /* get/check max position */ if((key_pos.begin + key_pos.length) > last_pos) last_pos = key_pos.begin + key_pos.length; @@ -110,7 +110,7 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, if(value_pos.length && last_pos < value_pos.begin) colorize_print(inc_buf, last_pos, (value_pos.begin - last_pos), "\e[31m"); } - + if(value_pos.length) { /* print
*/ @@ -118,37 +118,37 @@ size_t colorize_print_attributes(myhtml_tree_t* tree, myhtml_tree_attr_t* attr, size_t between_begin = key_pos.begin + key_pos.length; colorize_print(inc_buf, between_begin, (value_pos.begin - between_begin), "\e[31m"); } - + /* print
*/ colorize_print(inc_buf, value_pos.begin, value_pos.length, "\e[34m"); - + /* get/check max position */ if(value_pos.begin + value_pos.length > last_pos) last_pos = value_pos.begin + value_pos.length; } - + attr = myhtml_attribute_next(attr); } - + return last_pos; } void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_node_t* token, void* ctx) { mycore_incoming_buffer_t *inc_buf = myhtml_tree_incoming_buffer_first(tree); - - myhtml_position_t token_pos = myhtml_token_node_raw_pasition(token); - myhtml_position_t token_element_pos = myhtml_token_node_element_pasition(token); - + + myhtml_position_t token_pos = myhtml_token_node_raw_position(token); + myhtml_position_t token_element_pos = myhtml_token_node_element_position(token); + size_t last_pos = token_pos.begin + token_pos.length; - + switch (myhtml_token_node_tag_id(token)) { case MyHTML_TAG__DOCTYPE: { /* print [] */ colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[37m"); break; @@ -160,9 +160,9 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod case MyHTML_TAG__COMMENT: { /* print [] */ colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[32m"); break; @@ -170,20 +170,20 @@ void * colorize_callback_before_token_done(myhtml_tree_t* tree, myhtml_token_nod default: { /* print [<]div> */ colorize_print(inc_buf, token_element_pos.begin, (token_pos.begin - token_element_pos.begin), "\e[31m"); - + /* print <[div]> */ colorize_print(inc_buf, token_pos.begin, token_pos.length, "\e[31m"); - + if(myhtml_token_node_attribute_first(token)) last_pos = colorize_print_attributes(tree, myhtml_token_node_attribute_first(token), inc_buf, last_pos); - + /* print ] */ colorize_print(inc_buf, last_pos, ((token_element_pos.begin + token_element_pos.length) - last_pos), "\e[31m"); - + break; } } - + return ctx; } @@ -198,30 +198,28 @@ int main(int argc, const char * argv[]) printf("Bad ARGV!\nUse: tokenizer_colorize_high_level \n"); exit(EXIT_FAILURE); } - + struct res_html res = load_html_file(path); - + // basic init myhtml_t* myhtml = myhtml_create(); myhtml_init(myhtml, MyHTML_OPTIONS_DEFAULT, 1, 0); - + // init tree myhtml_tree_t* tree = myhtml_tree_create(); myhtml_tree_init(tree, myhtml); - + myhtml_callback_before_token_done_set(tree, colorize_callback_before_token_done, NULL); - + // parse html myhtml_parse(tree, MyENCODING_UTF_8, res.html, res.size); - + printf("\n"); - + myhtml_tree_destroy(tree); myhtml_destroy(myhtml); - + free(res.html); - + return 0; } - - diff --git a/include/myhtml/api.h b/include/myhtml/api.h index 55fb672..03659e0 100755 --- a/include/myhtml/api.h +++ b/include/myhtml/api.h @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -328,7 +328,7 @@ enum myhtml_tags { // base /* Very important!!! - + for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000 for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000 for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000 @@ -380,7 +380,7 @@ enum myhtml_namespace { MyHTML_NAMESPACE_XLINK = 0x04, MyHTML_NAMESPACE_XML = 0x05, MyHTML_NAMESPACE_XMLNS = 0x06, - + /* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */ MyHTML_NAMESPACE_ANY = 0x07, MyHTML_NAMESPACE_LAST_ENTRY = 0x07 @@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node); * @return myhtml_tree_node_t */ myhtml_position_t -myhtml_node_raw_pasition(myhtml_tree_node_t *node); +myhtml_node_raw_position(myhtml_tree_node_t *node); /** * Get element position for Tree Node in Incoming Buffer @@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node); * @return myhtml_tree_node_t */ myhtml_position_t -myhtml_node_element_pasition(myhtml_tree_node_t *node); +myhtml_node_element_position(myhtml_tree_node_t *node); /** * Get data value from tree node @@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node); * @return myhtml_position_t */ myhtml_position_t -myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node); +myhtml_token_node_raw_position(myhtml_token_node_t *token_node); /** * Get element position for Token Node in Incoming Buffer @@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node); * @return myhtml_position_t */ myhtml_position_t -myhtml_token_node_element_pasition(myhtml_token_node_t *token_node); +myhtml_token_node_element_position(myhtml_token_node_t *token_node); /** * Get first attribute of a token node diff --git a/include/myhtml/myhtml.h b/include/myhtml/myhtml.h index d111add..56213b7 100644 --- a/include/myhtml/myhtml.h +++ b/include/myhtml/myhtml.h @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -69,10 +69,10 @@ struct myhtml { mythread_t* thread_batch; mythread_t* thread_list[3]; size_t thread_total; - + myhtml_tokenizer_state_f* parse_state_func; myhtml_insertion_f* insertion_func; - + enum myhtml_options opt; myhtml_tree_node_t *marker; }; @@ -177,8 +177,8 @@ myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node); myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node); const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length); mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node); -myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node); -myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node); +myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node); +myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node); void myhtml_node_set_data(myhtml_tree_node_t *node, void* data); void * myhtml_node_get_data(myhtml_tree_node_t *node); diff --git a/include/myhtml/token.h b/include/myhtml/token.h index bf40736..eee981e 100644 --- a/include/myhtml/token.h +++ b/include/myhtml/token.h @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -42,7 +42,7 @@ extern "C" { struct myhtml_token_replacement_entry { char* from; size_t from_size; - + char* to; size_t to_size; }; @@ -50,55 +50,55 @@ struct myhtml_token_replacement_entry { struct myhtml_token_namespace_replacement { char* from; size_t from_size; - + char* to; size_t to_size; - + enum myhtml_namespace ns; }; struct myhtml_token_attr { myhtml_token_attr_t* next; myhtml_token_attr_t* prev; - + mycore_string_t key; mycore_string_t value; - + size_t raw_key_begin; size_t raw_key_length; size_t raw_value_begin; size_t raw_value_length; - + enum myhtml_namespace ns; }; struct myhtml_token_node { myhtml_tag_id_t tag_id; - + mycore_string_t str; - + size_t raw_begin; size_t raw_length; - + size_t element_begin; size_t element_length; - + myhtml_token_attr_t* attr_first; myhtml_token_attr_t* attr_last; - + volatile enum myhtml_token_type type; }; struct myhtml_token { myhtml_tree_t* tree; // ref - + mcobject_async_t* nodes_obj; // myhtml_token_node_t mcobject_async_t* attr_obj; // myhtml_token_attr_t - + // def thread node id size_t mcasync_token_id; size_t mcasync_attr_id; - + bool is_new_tmp; }; @@ -108,8 +108,8 @@ void myhtml_token_clean_all(myhtml_token_t* token); myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token); myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node); -myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node); -myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node); +myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node); +myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node); myhtml_tree_attr_t * myhtml_token_node_attribute_first(myhtml_token_node_t *token_node); myhtml_tree_attr_t * myhtml_token_node_attribute_last(myhtml_token_node_t *token_node); diff --git a/source/myhtml/api.h b/source/myhtml/api.h index 55fb672..03659e0 100755 --- a/source/myhtml/api.h +++ b/source/myhtml/api.h @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -328,7 +328,7 @@ enum myhtml_tags { // base /* Very important!!! - + for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000 for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000 for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000 @@ -380,7 +380,7 @@ enum myhtml_namespace { MyHTML_NAMESPACE_XLINK = 0x04, MyHTML_NAMESPACE_XML = 0x05, MyHTML_NAMESPACE_XMLNS = 0x06, - + /* MyHTML_NAMESPACE_ANY == MyHTML_NAMESPACE_LAST_ENTRY */ MyHTML_NAMESPACE_ANY = 0x07, MyHTML_NAMESPACE_LAST_ENTRY = 0x07 @@ -1366,7 +1366,7 @@ myhtml_node_string(myhtml_tree_node_t *node); * @return myhtml_tree_node_t */ myhtml_position_t -myhtml_node_raw_pasition(myhtml_tree_node_t *node); +myhtml_node_raw_position(myhtml_tree_node_t *node); /** * Get element position for Tree Node in Incoming Buffer @@ -1378,7 +1378,7 @@ myhtml_node_raw_pasition(myhtml_tree_node_t *node); * @return myhtml_tree_node_t */ myhtml_position_t -myhtml_node_element_pasition(myhtml_tree_node_t *node); +myhtml_node_element_position(myhtml_tree_node_t *node); /** * Get data value from tree node @@ -1619,7 +1619,7 @@ myhtml_token_node_tag_id(myhtml_token_node_t *token_node); * @return myhtml_position_t */ myhtml_position_t -myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node); +myhtml_token_node_raw_position(myhtml_token_node_t *token_node); /** * Get element position for Token Node in Incoming Buffer @@ -1631,7 +1631,7 @@ myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node); * @return myhtml_position_t */ myhtml_position_t -myhtml_token_node_element_pasition(myhtml_token_node_t *token_node); +myhtml_token_node_element_position(myhtml_token_node_t *token_node); /** * Get first attribute of a token node diff --git a/source/myhtml/myhtml.c b/source/myhtml/myhtml.c index 479f33e..b76f129 100644 --- a/source/myhtml/myhtml.c +++ b/source/myhtml/myhtml.c @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -23,7 +23,7 @@ void myhtml_init_marker(myhtml_t* myhtml) { myhtml->marker = (myhtml_tree_node_t*)mycore_malloc(sizeof(myhtml_tree_node_t)); - + if(myhtml->marker) myhtml_tree_node_clean(myhtml->marker); } @@ -39,20 +39,20 @@ mystatus_t myhtml_stream_create(myhtml_t* myhtml, mystatus_t* status, size_t cou { if(count == 0) { myhtml->thread_stream = NULL; - + *status = MyHTML_STATUS_OK; return *status; } - + myhtml->thread_stream = mythread_create(); if(myhtml->thread_stream == NULL) *status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION; - + *status = mythread_init(myhtml->thread_stream, MyTHREAD_TYPE_STREAM, count, id_increase); - + if(*status) myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); - + return *status; } @@ -60,40 +60,40 @@ mystatus_t myhtml_batch_create(myhtml_t* myhtml, mystatus_t* status, size_t coun { if(count == 0) { myhtml->thread_batch = NULL; - + *status = MyHTML_STATUS_OK; return *status; } - + myhtml->thread_batch = mythread_create(); if(myhtml->thread_stream == NULL) { myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); *status = MyCORE_STATUS_THREAD_ERROR_MEMORY_ALLOCATION; } - + *status = mythread_init(myhtml->thread_batch, MyTHREAD_TYPE_BATCH, count, id_increase); - + if(*status) myhtml->thread_batch = mythread_destroy(myhtml->thread_batch , NULL, NULL, true); - + return *status; } mystatus_t myhtml_create_stream_and_batch(myhtml_t* myhtml, size_t stream_count, size_t batch_count) { mystatus_t status; - + /* stream */ if(myhtml_stream_create(myhtml, &status, stream_count, 0)) { return status; } - + /* batch */ if(myhtml_batch_create(myhtml, &status, batch_count, stream_count)) { myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, NULL, NULL, true); return status; } - + return status; } #endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ @@ -106,80 +106,80 @@ myhtml_t * myhtml_create(void) mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size) { mystatus_t status; - + myhtml->opt = opt; myhtml_init_marker(myhtml); - + status = myhtml_tokenizer_state_init(myhtml); if(status) return status; - + status = myhtml_rules_init(myhtml); #ifdef MyCORE_BUILD_WITHOUT_THREADS - + myhtml->thread_stream = NULL; myhtml->thread_batch = NULL; myhtml->thread_total = 0; - + #else /* if undef MyCORE_BUILD_WITHOUT_THREADS */ if(status) return status; - + switch (opt) { case MyHTML_OPTIONS_PARSE_MODE_SINGLE: if((status = myhtml_create_stream_and_batch(myhtml, 0, 0))) return status; - + break; - + case MyHTML_OPTIONS_PARSE_MODE_ALL_IN_ONE: if((status = myhtml_create_stream_and_batch(myhtml, 1, 0))) return status; - + myhtml->thread_stream->context = mythread_queue_list_create(&status); status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_worker_stream, MyTHREAD_OPT_STOP); - + break; - + default: // default MyHTML_OPTIONS_PARSE_MODE_SEPARATELY if(thread_count < 2) thread_count = 2; - + if((status = myhtml_create_stream_and_batch(myhtml, 1, (thread_count - 1)))) return status; - + myhtml->thread_stream->context = mythread_queue_list_create(&status); myhtml->thread_batch->context = myhtml->thread_stream->context; - + status = myhread_entry_create(myhtml->thread_stream, mythread_function_queue_stream, myhtml_parser_stream, MyTHREAD_OPT_STOP); if(status) return status; - + for(size_t i = 0; i < myhtml->thread_batch->entries_size; i++) { status = myhread_entry_create(myhtml->thread_batch, mythread_function_queue_batch, myhtml_parser_worker, MyTHREAD_OPT_STOP); - + if(status) return status; } - + break; } - + myhtml->thread_total = thread_count; - + myhtml->thread_list[0] = myhtml->thread_stream; myhtml->thread_list[1] = myhtml->thread_batch; myhtml->thread_list[2] = NULL; - + #endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ - + if(status) return status; - + myhtml_clean(myhtml); - + return status; } @@ -192,33 +192,33 @@ myhtml_t* myhtml_destroy(myhtml_t* myhtml) { if(myhtml == NULL) return NULL; - + myhtml_destroy_marker(myhtml); - + #ifndef MyCORE_BUILD_WITHOUT_THREADS if(myhtml->thread_stream) { mythread_queue_list_t* queue_list = myhtml->thread_stream->context; if(queue_list) mythread_queue_list_wait_for_done(myhtml->thread_stream, queue_list); - + myhtml->thread_stream = mythread_destroy(myhtml->thread_stream, mythread_callback_quit, NULL, true); - + if(myhtml->thread_batch) myhtml->thread_batch = mythread_destroy(myhtml->thread_batch, mythread_callback_quit, NULL, true); - + if(queue_list) mythread_queue_list_destroy(queue_list); } #endif /* if undef MyCORE_BUILD_WITHOUT_THREADS */ - + myhtml_tokenizer_state_destroy(myhtml); - + if(myhtml->insertion_func) mycore_free(myhtml->insertion_func); - + mycore_free(myhtml); - + return NULL; } @@ -227,13 +227,13 @@ mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + myhtml_encoding_set(tree, encoding); mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); - + if(status) return status; - + return myhtml_tokenizer_end(tree); } @@ -242,22 +242,22 @@ mystatus_t myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, con if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + if(tag_id == 0) tag_id = MyHTML_TAG_DIV; - + if(ns == 0) ns = MyHTML_NAMESPACE_HTML; - + if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL) return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT; - + myhtml_encoding_set(tree, encoding); mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); - + if(status) return status; - + return myhtml_tokenizer_end(tree); } @@ -266,16 +266,16 @@ mystatus_t myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; - + myhtml_encoding_set(tree, encoding); - + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); - + if(status) return status; - + return myhtml_tokenizer_end(tree); } @@ -284,25 +284,25 @@ mystatus_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encodi if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + if(tag_id == 0) tag_id = MyHTML_TAG_DIV; - + if(ns == 0) ns = MyHTML_NAMESPACE_HTML; - + tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; - + if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL) return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT; - + myhtml_encoding_set(tree, encoding); - + mystatus_t status = myhtml_tokenizer_begin(tree, html, html_size); - + if(status) return status; - + return myhtml_tokenizer_end(tree); } @@ -311,7 +311,7 @@ mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + return myhtml_tokenizer_chunk(tree, html, html_size); } @@ -320,16 +320,16 @@ mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, si if(tree->flags & MyHTML_TREE_FLAGS_PARSE_END) { myhtml_tree_clean(tree); } - + if(tag_id == 0) tag_id = MyHTML_TAG_DIV; - + if(ns == 0) ns = MyHTML_NAMESPACE_HTML; - + if(myhtml_tokenizer_fragment_init(tree, tag_id, ns) == NULL) return MyHTML_STATUS_TOKENIZER_ERROR_FRAGMENT_INIT; - + return myhtml_tokenizer_chunk(tree, html, html_size); } @@ -337,7 +337,7 @@ mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size { if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; - + return myhtml_parse_chunk(tree, html, html_size); } @@ -345,7 +345,7 @@ mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* h { if((tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) == 0) tree->flags |= MyHTML_TREE_FLAGS_SINGLE_MODE; - + return myhtml_parse_chunk_fragment(tree, html, html_size, tag_id, ns); } @@ -358,7 +358,7 @@ void myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding) { if(encoding >= MyENCODING_LAST_ENTRY) return; - + tree->encoding_usereq = encoding; tree->encoding = encoding; } @@ -378,22 +378,22 @@ mystatus_t myhtml_get_nodes_by_tag_id_in_scope_find_recursion(myhtml_tree_node_t if(node->tag_id == tag_id) { collection->list[ collection->length ] = node; collection->length++; - + if(collection->length >= collection->size) { mystatus_t mystatus = myhtml_collection_check_size(collection, 1024, 0); - + if(mystatus != MyHTML_STATUS_OK) return mystatus; } } - + if(node->child) myhtml_get_nodes_by_tag_id_in_scope_find_recursion(node->child, collection, tag_id); - + node = node->next; } - + return MyHTML_STATUS_OK; } @@ -401,28 +401,28 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, m { if(node == NULL) return NULL; - + mystatus_t mystatus = MyHTML_STATUS_OK; - + if(collection == NULL) { collection = myhtml_collection_create(1024, &mystatus); } - + if(mystatus) { if(status) *status = mystatus; - + return collection; } - + if(node->child) mystatus = myhtml_get_nodes_by_tag_id_in_scope_find_recursion(node->child, collection, tag_id); - + collection->list[collection->length] = NULL; - + if(status) *status = mystatus; - + return collection; } @@ -436,13 +436,13 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_col { if(collection == NULL) { collection = myhtml_collection_create(1024, NULL); - + if(collection == NULL) return NULL; } - + myhtml_tree_node_t *node = tree->node_html; - + while(node) { if(node->tag_id == tag_id) @@ -454,41 +454,41 @@ myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_col else { if(status) *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - + return collection; } } - + if(node->child) node = node->child; else { while(node != tree->node_html && node->next == NULL) node = node->parent; - + if(node == tree->node_html) break; - + node = node->next; } } - + if(myhtml_collection_check_size(collection, 1, 1024) == MyHTML_STATUS_OK) { collection->list[ collection->length ] = NULL; } else if(status) { *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; } - + return collection; } myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, mystatus_t *status) { const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_name(tree->tags, html, length); - + if(tag_ctx == NULL) return NULL; - + return myhtml_get_nodes_by_tag_id(tree, collection, tag_ctx->id, status); } @@ -506,7 +506,7 @@ myhtml_tree_node_t * myhtml_node_first(myhtml_tree_t* tree) // document -> html return tree->document->child; } - + return NULL; } @@ -538,10 +538,10 @@ myhtml_tree_node_t * myhtml_node_last_child(myhtml_tree_node_t *node) myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, enum myhtml_namespace ns) { myhtml_tree_node_t *node = myhtml_tree_node_create(tree); - + node->tag_id = tag_id; node->ns = ns; - + return node; } @@ -569,9 +569,9 @@ myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtm { if(target == NULL || node == NULL) return NULL; - + myhtml_tree_node_insert_before(target, node); - + return node; } @@ -579,9 +579,9 @@ myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml { if(target == NULL || node == NULL) return NULL; - + myhtml_tree_node_insert_after(target, node); - + return node; } @@ -589,9 +589,9 @@ myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t *target, myhtml { if(target == NULL || node == NULL) return NULL; - + myhtml_tree_node_add_child(target, node); - + return node; } @@ -599,15 +599,15 @@ myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t { if(target == NULL || node == NULL) return NULL; - + enum myhtml_tree_insertion_mode mode; - + target->tree->foster_parenting = true; target = myhtml_tree_appropriate_place_inserting_in_tree(target, &mode); target->tree->foster_parenting = false; - + myhtml_tree_node_insert_by_mode(target, node, mode); - + return node; } @@ -615,19 +615,19 @@ mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex { if(node == NULL) return NULL; - + if(encoding >= MyENCODING_LAST_ENTRY) return NULL; - + myhtml_tree_t* tree = node->tree; - + if(node->token == NULL) { node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id); - + if(node->token == NULL) return NULL; } - + if(node->token->str.data == NULL) { mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } @@ -639,17 +639,17 @@ mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* tex else node->token->str.length = 0; } - + if(encoding != MyENCODING_UTF_8) { myencoding_string_append(&node->token->str, text, length, encoding); } else { mycore_string_append(&node->token->str, text, length); } - + node->token->raw_begin = 0; node->token->raw_length = 0; - + return &node->token->str; } @@ -657,19 +657,19 @@ mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con { if(node == NULL) return NULL; - + if(encoding >= MyENCODING_LAST_ENTRY) return NULL; - + myhtml_tree_t* tree = node->tree; - + if(node->token == NULL) { node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id); - + if(node->token == NULL) return NULL; } - + if(node->token->str.data == NULL) { mycore_string_init(tree->mchar, tree->mchar_node_id, &node->token->str, (length + 2)); } @@ -681,19 +681,19 @@ mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, con else node->token->str.length = 0; } - + myhtml_data_process_entry_t proc_entry; myhtml_data_process_entry_clean(&proc_entry); - + proc_entry.encoding = encoding; myencoding_result_clean(&proc_entry.res); - + myhtml_data_process(&proc_entry, &node->token->str, text, length); myhtml_data_process_end(&proc_entry, &node->token->str); - + node->token->raw_begin = 0; node->token->raw_length = 0; - + return &node->token->str; } @@ -721,18 +721,18 @@ const char * myhtml_tag_name_by_id(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, { if(length) *length = 0; - + if(tree == NULL || tree->tags == NULL) return NULL; - + const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, tag_id); - + if(tag_ctx == NULL) return NULL; - + if(length) *length = tag_ctx->name_length; - + return tag_ctx->name; } @@ -740,12 +740,12 @@ myhtml_tag_id_t myhtml_tag_id_by_name(myhtml_tree_t* tree, const char *tag_name, { if(tree == NULL || tree->tags == NULL) return MyHTML_TAG__UNDEF; - + const myhtml_tag_context_t *ctx = myhtml_tag_get_by_name(tree->tags, tag_name, length); - + if(ctx == NULL) return MyHTML_TAG__UNDEF; - + return ctx->id; } @@ -753,7 +753,7 @@ bool myhtml_node_is_close_self(myhtml_tree_node_t *node) { if(node->token) return (node->token->type & MyHTML_TOKEN_TYPE_CLOSE_SELF); - + return false; } @@ -761,7 +761,7 @@ myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node) { if(node->token) return node->token->attr_first; - + return NULL; } @@ -769,7 +769,7 @@ myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node) { if(node->token) return node->token->attr_last; - + return NULL; } @@ -779,13 +779,13 @@ const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length) { if(length) *length = node->token->str.length; - + return node->token->str.data; } - + if(length) *length = 0; - + return NULL; } @@ -793,23 +793,23 @@ mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node) { if(node && node->token) return &node->token->str; - + return NULL; } -myhtml_position_t myhtml_node_raw_pasition(myhtml_tree_node_t *node) +myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node) { if(node && node->token) return (myhtml_position_t){node->token->raw_begin, node->token->raw_length}; - + return (myhtml_position_t){0, 0}; } -myhtml_position_t myhtml_node_element_pasition(myhtml_tree_node_t *node) +myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node) { if(node && node->token) return (myhtml_position_t){node->token->element_begin, node->token->element_length}; - + return (myhtml_position_t){0, 0}; } @@ -834,36 +834,36 @@ mystatus_t myhtml_get_nodes_by_attribute_key_recursion(myhtml_tree_node_t* node, { if(node->token && node->token->attr_first) { myhtml_tree_attr_t* attr = node->token->attr_first; - + while(attr) { mycore_string_t* str_key = &attr->key; - + if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0) { collection->list[ collection->length ] = node; - + collection->length++; if(collection->length >= collection->size) { mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); - + if(status) return status; } } - + attr = attr->next; } } - + if(node->child) { mystatus_t status = myhtml_get_nodes_by_attribute_key_recursion(node->child, collection, key, key_len); - + if(status) return status; } - + node = node->next; } - + return MyHTML_STATUS_OK; } @@ -871,19 +871,19 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myh { if(collection == NULL) { collection = myhtml_collection_create(1024, status); - + if((status && *status) || collection == NULL) return NULL; } - + if(scope_node == NULL) scope_node = tree->node_html; - + mystatus_t rec_status = myhtml_get_nodes_by_attribute_key_recursion(scope_node, collection, key, key_len); - + if(rec_status && status) *status = rec_status; - + return collection; } @@ -897,14 +897,14 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(mycore_s { if(str->length < value_len) return false; - + const char *data = str->data; - + if(mycore_strncmp(data, value, value_len) == 0) { if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) return true; } - + for(size_t i = 1; (str->length - i) >= value_len; i++) { if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) { @@ -914,7 +914,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated(mycore_s } } } - + return false; } @@ -922,7 +922,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_begin(mycore_string_t* str, c { if(str->length < value_len) return false; - + return mycore_strncmp(str->data, value, value_len) == 0; } @@ -930,7 +930,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_end(mycore_string_t* str, con { if(str->length < value_len) return false; - + return mycore_strncmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; } @@ -938,23 +938,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain(mycore_string_t* str, { if(str->length < value_len) return false; - + const char *data = str->data; - + for(size_t i = 0; (str->length - i) >= value_len; i++) { if(mycore_strncmp(&data[i], value, value_len) == 0) { return true; } } - + return false; } bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(mycore_string_t* str, const char* value, size_t value_len) { const char *data = str->data; - + if(str->length < value_len) return false; else if(str->length == value_len && mycore_strncmp(data, value, value_len) == 0) { @@ -963,7 +963,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated(mycore_strin else if(mycore_strncmp(data, value, value_len) == 0 && data[value_len] == '-') { return true; } - + return false; } @@ -977,14 +977,14 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(mycore { if(str->length < value_len) return false; - + const char *data = str->data; - + if(mycore_strncasecmp(data, value, value_len) == 0) { if((str->length > value_len && mycore_utils_whithspace(data[value_len], ==, ||)) || str->length == value_len) return true; } - + for(size_t i = 1; (str->length - i) >= value_len; i++) { if(mycore_utils_whithspace(data[(i - 1)], ==, ||)) { @@ -994,7 +994,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i(mycore } } } - + return false; } @@ -1002,7 +1002,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_begin_i(mycore_string_t* str, { if(str->length < value_len) return false; - + return mycore_strncasecmp(str->data, value, value_len) == 0; } @@ -1010,7 +1010,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_end_i(mycore_string_t* str, c { if(str->length < value_len) return false; - + return mycore_strncasecmp(&str->data[ (str->length - (str->length - value_len)) ], value, value_len) == 0; } @@ -1018,23 +1018,23 @@ bool myhtml_get_nodes_by_attribute_value_recursion_contain_i(mycore_string_t* st { if(str->length < value_len) return false; - + const char *data = str->data; - + for(size_t i = 0; (str->length - i) >= value_len; i++) { if(mycore_strncasecmp(&data[i], value, value_len) == 0) { return true; } } - + return false; } bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(mycore_string_t* str, const char* value, size_t value_len) { const char *data = str->data; - + if(str->length < value_len) return false; else if(str->length == value_len && mycore_strncasecmp(data, value, value_len) == 0) { @@ -1043,7 +1043,7 @@ bool myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i(mycore_str else if(mycore_strncasecmp(data, value, value_len) == 0 && data[value_len] == '-') { return true; } - + return false; } @@ -1056,36 +1056,36 @@ mystatus_t myhtml_get_nodes_by_attribute_value_recursion(myhtml_tree_node_t* nod { if(node->token && node->token->attr_first) { myhtml_tree_attr_t* attr = node->token->attr_first; - + while(attr) { mycore_string_t* str = &attr->value; - + if(func_eq(str, value, value_len)) { collection->list[ collection->length ] = node; - + collection->length++; if(collection->length >= collection->size) { mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); - + if(status) return status; } } - + attr = attr->next; } } - + if(node->child) { mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion(node->child, collection, func_eq, value, value_len); - + if(status) return status; } - + node = node->next; } - + return MyHTML_STATUS_OK; } @@ -1098,41 +1098,41 @@ mystatus_t myhtml_get_nodes_by_attribute_value_recursion_by_key(myhtml_tree_node { if(node->token && node->token->attr_first) { myhtml_tree_attr_t* attr = node->token->attr_first; - + while(attr) { mycore_string_t* str_key = &attr->key; mycore_string_t* str = &attr->value; - + if(str_key->length == key_len && mycore_strncasecmp(str_key->data, key, key_len) == 0) { if(func_eq(str, value, value_len)) { collection->list[ collection->length ] = node; - + collection->length++; if(collection->length >= collection->size) { mystatus_t status = myhtml_collection_check_size(collection, 1024, 0); - + if(status) return status; } } } - + attr = attr->next; } } - + if(node->child) { mystatus_t status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node->child, collection, func_eq, key, key_len, value, value_len); - + if(status) return status; } - + node = node->next; } - + return MyHTML_STATUS_OK; } @@ -1144,24 +1144,24 @@ myhtml_collection_t * _myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, { if(collection == NULL) { collection = myhtml_collection_create(1024, status); - + if((status && *status) || collection == NULL) return NULL; } - + if(node == NULL) node = tree->node_html; - + mystatus_t rec_status; - + if(key && key_len) rec_status = myhtml_get_nodes_by_attribute_value_recursion_by_key(node, collection, func_eq, key, key_len, value, value_len); else rec_status = myhtml_get_nodes_by_attribute_value_recursion(node, collection, func_eq, value, value_len); - + if(rec_status && status) *status = rec_status; - + return collection; } @@ -1176,7 +1176,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, m myhtml_get_nodes_by_attribute_value_recursion_eq_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_eq, key, key_len, value, value_len, status); @@ -1193,7 +1193,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_whitespace_separated(m myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_whitespace_separated, key, key_len, value, value_len, status); @@ -1210,7 +1210,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *t myhtml_get_nodes_by_attribute_value_recursion_begin_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_begin, key, key_len, value, value_len, status); @@ -1227,7 +1227,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tre myhtml_get_nodes_by_attribute_value_recursion_end_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_end, key, key_len, value, value_len, status); @@ -1244,7 +1244,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t myhtml_get_nodes_by_attribute_value_recursion_contain_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_contain, key, key_len, value, value_len, status); @@ -1261,7 +1261,7 @@ myhtml_collection_t * myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtm myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated_i, key, key_len, value, value_len, status); } - + return _myhtml_get_nodes_by_attribute_value(tree, collection, node, myhtml_get_nodes_by_attribute_value_recursion_hyphen_separated, key, key_len, value, value_len, status); @@ -1296,13 +1296,13 @@ const char * myhtml_attribute_key(myhtml_tree_attr_t *attr, size_t *length) { if(length) *length = attr->key.length; - + return attr->key.data; } - + if(length) *length = 0; - + return NULL; } @@ -1312,13 +1312,13 @@ const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length) { if(length) *length = attr->value.length; - + return attr->value.data; } - + if(length) *length = 0; - + return NULL; } @@ -1326,7 +1326,7 @@ mycore_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr) { if(attr) return &attr->key; - + return NULL; } @@ -1334,7 +1334,7 @@ mycore_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr) { if(attr) return &attr->value; - + return NULL; } @@ -1342,7 +1342,7 @@ myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const cha { if(node == NULL || node->token == NULL) return NULL; - + return myhtml_token_attr_by_name(node->token, key, key_len); } @@ -1350,16 +1350,16 @@ myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char * { if(node == NULL) return NULL; - + myhtml_tree_t *tree = node->tree; - + if(node->token == NULL) { node->token = myhtml_token_node_create(tree->token, tree->mcasync_rules_token_id); - + if(node->token == NULL) return NULL; } - + return myhtml_token_node_attr_append_with_convert_encoding(tree->token, node->token, key, key_len, value, value_len, tree->mcasync_rules_token_id, encoding); } @@ -1368,7 +1368,7 @@ myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tr { if(node == NULL || node->token == NULL) return NULL; - + return myhtml_token_attr_remove(node->token, attr); } @@ -1376,7 +1376,7 @@ myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, co { if(node == NULL || node->token == NULL) return NULL; - + return myhtml_token_attr_remove_by_name(node->token, key, key_len); } @@ -1384,7 +1384,7 @@ void myhtml_attribute_delete(myhtml_tree_t *tree, myhtml_tree_node_t *node, myht { if(node == NULL || node->token == NULL) return; - + myhtml_token_attr_remove(node->token, attr); myhtml_attribute_free(tree, attr); } @@ -1395,7 +1395,7 @@ void myhtml_attribute_free(myhtml_tree_t *tree, myhtml_tree_attr_t *attr) mchar_async_free(attr->key.mchar, attr->key.node_idx, attr->key.data); if(attr->value.data) mchar_async_free(attr->value.mchar, attr->value.node_idx, attr->value.data); - + mcobject_async_free(tree->token->attr_obj, attr); } @@ -1403,7 +1403,7 @@ myhtml_position_t myhtml_attribute_key_raw_position(myhtml_tree_attr_t *attr) { if(attr) return (myhtml_position_t){attr->raw_key_begin, attr->raw_key_length}; - + return (myhtml_position_t){0, 0}; } @@ -1411,7 +1411,7 @@ myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr) { if(attr) return (myhtml_position_t){attr->raw_value_begin, attr->raw_value_length}; - + return (myhtml_position_t){0, 0}; } @@ -1421,30 +1421,30 @@ myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr) myhtml_collection_t * myhtml_collection_create(size_t size, mystatus_t *status) { myhtml_collection_t *collection = (myhtml_collection_t*)mycore_malloc(sizeof(myhtml_collection_t)); - + if(collection == NULL) { if(status) *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - + return NULL; } - + collection->size = size; collection->length = 0; collection->list = (myhtml_tree_node_t **)mycore_malloc(sizeof(myhtml_tree_node_t*) * size); - + if(collection->list == NULL) { mycore_free(collection); - + if(status) *status = MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - + return NULL; } - + if(status) *status = MyHTML_STATUS_OK; - + return collection; } @@ -1454,7 +1454,7 @@ mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t { size_t tmp_size = collection->length + need + upto_length + 1; myhtml_tree_node_t **tmp = (myhtml_tree_node_t **)mycore_realloc(collection->list, sizeof(myhtml_tree_node_t*) * tmp_size); - + if(tmp) { collection->size = tmp_size; collection->list = tmp; @@ -1462,7 +1462,7 @@ mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t else return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; } - + return MyHTML_STATUS_OK; } @@ -1476,12 +1476,12 @@ myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection) { if(collection == NULL) return NULL; - + if(collection->list) mycore_free(collection->list); - + mycore_free(collection); - + return NULL; } @@ -1491,84 +1491,84 @@ mystatus_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node // TODO: need refactoring this code // too many conditions mythread_queue_node_t *qnode = tree->current_qnode; - + if(tree->parse_flags & MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN) { if(token && token->tag_id == MyHTML_TAG__TEXT && token->type & MyHTML_TOKEN_TYPE_WHITESPACE) { myhtml_token_node_clean(token); token->raw_begin = token->element_begin = (tree->global_offset + begin); - + return MyHTML_STATUS_OK; } } - + #ifndef MyCORE_BUILD_WITHOUT_THREADS - + if(tree->flags & MyHTML_TREE_FLAGS_SINGLE_MODE) { if(qnode && token) { qnode->args = token; - + myhtml_parser_worker(0, qnode); myhtml_parser_stream(0, qnode); } - + tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL); } else { if(qnode) qnode->args = token; - + tree->current_qnode = mythread_queue_node_malloc_round(tree->myhtml->thread_stream, tree->queue_entry, NULL); } - + #else - + if(qnode && token) { qnode->args = token; - + myhtml_parser_worker(0, qnode); myhtml_parser_stream(0, qnode); } - + tree->current_qnode = mythread_queue_node_malloc_limit(tree->myhtml->thread_stream, tree->queue, 4, NULL); - + #endif /* MyCORE_BUILD_WITHOUT_THREADS */ - + if(tree->current_qnode == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - + tree->current_qnode->context = tree; tree->current_qnode->prev = qnode; - + if(qnode && token) myhtml_tokenizer_calc_current_namespace(tree, token); - + tree->current_token_node = myhtml_token_node_create(tree->token, tree->token->mcasync_token_id); if(tree->current_token_node == NULL) return MyHTML_STATUS_ERROR_MEMORY_ALLOCATION; - + tree->current_token_node->raw_begin = tree->current_token_node->element_begin = (tree->global_offset + begin); - + return MyHTML_STATUS_OK; } bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size) { size_t i = 0; - + for(;;) { if(i == size) return true; - + if((const unsigned char)(to_lowercase[i] > 0x40 && to_lowercase[i] < 0x5b ? (to_lowercase[i]|0x60) : to_lowercase[i]) != (const unsigned char)ab[i]) { return false; } - + i++; } - + return false; } @@ -1576,7 +1576,7 @@ bool myhtml_is_html_node(myhtml_tree_node_t *node, myhtml_tag_id_t tag_id) { if(node == NULL) return false; - + return node->tag_id == tag_id && node->ns == MyHTML_NAMESPACE_HTML; } @@ -1585,7 +1585,3 @@ myhtml_version_t myhtml_version(void) { return (myhtml_version_t){MyHTML_VERSION_MAJOR, MyHTML_VERSION_MINOR, MyHTML_VERSION_PATCH}; } - - - - diff --git a/source/myhtml/token.c b/source/myhtml/token.c index 0828dfc..bc28dac 100644 --- a/source/myhtml/token.c +++ b/source/myhtml/token.c @@ -1,20 +1,20 @@ /* Copyright (C) 2015-2017 Alexander Borisov - + This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. - + This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. - + You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - + Author: lex.borisov@gmail.com (Alexander Borisov) */ @@ -101,36 +101,36 @@ myhtml_token_t * myhtml_token_create(myhtml_tree_t* tree, size_t size) { if(size == 0) size = 4096; - + myhtml_token_t* token = (myhtml_token_t*)mycore_malloc(sizeof(myhtml_token_t)); - + if(token == NULL) return NULL; - + token->nodes_obj = mcobject_async_create(); - + if(token->nodes_obj == NULL) { mycore_free(token); return NULL; } - + token->attr_obj = mcobject_async_create(); - + if(token->attr_obj == NULL) { mycore_free(token->nodes_obj); mycore_free(token); - + return NULL; } - + mcobject_async_init(token->nodes_obj, 128, size, sizeof(myhtml_token_node_t)); mcobject_async_init(token->attr_obj, 128, size, sizeof(myhtml_token_attr_t)); - + token->mcasync_token_id = mcobject_async_node_add(token->nodes_obj, NULL); token->mcasync_attr_id = mcobject_async_node_add(token->attr_obj, NULL); - + token->tree = tree; - + return token; } @@ -150,15 +150,15 @@ myhtml_token_t * myhtml_token_destroy(myhtml_token_t* token) { if(token == NULL) return NULL; - + if(token->nodes_obj) token->nodes_obj = mcobject_async_destroy(token->nodes_obj, 1); - + if(token->attr_obj) token->attr_obj = mcobject_async_destroy(token->attr_obj, 1); - + mycore_free(token); - + return NULL; } @@ -167,7 +167,7 @@ myhtml_token_node_t * myhtml_token_node_create(myhtml_token_t* token, size_t asy myhtml_token_node_t *token_node = (myhtml_token_node_t*)mcobject_async_malloc(token->nodes_obj, async_node_id, NULL); if(token_node == NULL) return NULL; - + myhtml_token_node_clean(token_node); return token_node; } @@ -176,7 +176,7 @@ void myhtml_token_node_clean(myhtml_token_node_t* node) { memset(node, 0, sizeof(myhtml_token_node_t)); node->type = MyHTML_TOKEN_TYPE_OPEN|MyHTML_TOKEN_TYPE_WHITESPACE; - + mycore_string_clean_all(&node->str); } @@ -185,7 +185,7 @@ myhtml_token_attr_t * myhtml_token_attr_create(myhtml_token_t* token, size_t asy myhtml_token_attr_t *attr_node = mcobject_async_malloc(token->attr_obj, async_node_id, NULL); if(attr_node == NULL) return NULL; - + myhtml_token_attr_clean(attr_node); return attr_node; } @@ -194,7 +194,7 @@ void myhtml_token_attr_clean(myhtml_token_attr_t* attr) { memset(attr, 0, sizeof(myhtml_token_attr_t)); attr->ns = MyHTML_NAMESPACE_HTML; - + mycore_string_clean_all(&attr->key); mycore_string_clean_all(&attr->value); } @@ -204,19 +204,19 @@ myhtml_tag_id_t myhtml_token_node_tag_id(myhtml_token_node_t *token_node) return token_node->tag_id; } -myhtml_position_t myhtml_token_node_raw_pasition(myhtml_token_node_t *token_node) +myhtml_position_t myhtml_token_node_raw_position(myhtml_token_node_t *token_node) { if(token_node) return (myhtml_position_t){token_node->raw_begin, token_node->raw_length}; - + return (myhtml_position_t){0, 0}; } -myhtml_position_t myhtml_token_node_element_pasition(myhtml_token_node_t *token_node) +myhtml_position_t myhtml_token_node_element_position(myhtml_token_node_t *token_node) { if(token_node) return (myhtml_position_t){token_node->element_begin, token_node->element_length}; - + return (myhtml_position_t){0, 0}; } @@ -234,7 +234,7 @@ const char * myhtml_token_node_text(myhtml_token_node_t *token_node, size_t *len { if(length) *length = token_node->str.length; - + return token_node->str.data; } @@ -269,13 +269,13 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke { if(node == NULL) return NULL; - + myhtml_tree_t* tree = token->tree; myhtml_token_node_t* new_node = myhtml_token_node_create(token, token_thread_idx); - + if(new_node == NULL) return NULL; - + new_node->tag_id = node->tag_id; new_node->type = node->type; new_node->attr_first = NULL; @@ -284,17 +284,17 @@ myhtml_token_node_t * myhtml_token_node_clone(myhtml_token_t* token, myhtml_toke new_node->raw_length = node->raw_length; new_node->element_begin = node->element_begin; new_node->element_length = node->element_length; - + mycore_string_init(tree->mchar, tree->mchar_node_id, &new_node->str, node->str.size); myhtml_token_node_attr_copy(token, node, new_node, attr_thread_idx); - + return new_node; } void myhtml_token_node_text_append(myhtml_token_t* token, myhtml_token_node_t* dest, const char* text, size_t text_len) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &dest->str, (text_len + 2)); - + mycore_string_t* string = &dest->str; mycore_string_append(string, text, text_len); } @@ -305,36 +305,36 @@ myhtml_token_attr_t * myhtml_token_node_attr_append(myhtml_token_t* token, myhtm { myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL); new_attr->next = 0; - + if(key_len) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); mycore_string_append_lowercase(&new_attr->key, key, key_len); } else mycore_string_clean_all(&new_attr->key); - + if(value_len) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); mycore_string_append(&new_attr->value, value, value_len); } else mycore_string_clean_all(&new_attr->value); - + if(dest->attr_first == NULL) { new_attr->prev = 0; - + dest->attr_first = new_attr; dest->attr_last = new_attr; } else { dest->attr_last->next = new_attr; new_attr->prev = dest->attr_last; - + dest->attr_last = new_attr; } - + new_attr->ns = MyHTML_NAMESPACE_HTML; - + return new_attr; } @@ -345,10 +345,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml { myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL); new_attr->next = 0; - + if(key_len) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (key_len + 1)); - + if(encoding == MyENCODING_UTF_8) mycore_string_append_lowercase(&new_attr->key, key, key_len); else @@ -356,10 +356,10 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml } else mycore_string_clean_all(&new_attr->key); - + if(value_len) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (value_len + 1)); - + if(encoding == MyENCODING_UTF_8) mycore_string_append(&new_attr->value, value, value_len); else @@ -367,35 +367,35 @@ myhtml_token_attr_t * myhtml_token_node_attr_append_with_convert_encoding(myhtml } else mycore_string_clean_all(&new_attr->value); - + if(dest->attr_first == NULL) { new_attr->prev = 0; - + dest->attr_first = new_attr; dest->attr_last = new_attr; } else { dest->attr_last->next = new_attr; new_attr->prev = dest->attr_last; - + dest->attr_last = new_attr; } - + new_attr->ns = MyHTML_NAMESPACE_HTML; - + return new_attr; } void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx) { myhtml_token_attr_t* attr = target->attr_first; - + while (attr) { if(attr->key.length && myhtml_token_attr_by_name(dest, attr->key.data, attr->key.length) == NULL) { myhtml_token_attr_copy(token, attr, dest, thread_idx); } - + attr = attr->next; } } @@ -403,7 +403,7 @@ void myhtml_token_node_attr_copy_with_check(myhtml_token_t* token, myhtml_token_ void myhtml_token_node_attr_copy(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_token_node_t* dest, size_t thread_idx) { myhtml_token_attr_t* attr = target->attr_first; - + while (attr) { myhtml_token_attr_copy(token, attr, dest, thread_idx); @@ -415,36 +415,36 @@ bool myhtml_token_attr_copy(myhtml_token_t* token, myhtml_token_attr_t* attr, my { myhtml_token_attr_t* new_attr = mcobject_async_malloc(token->attr_obj, thread_idx, NULL); new_attr->next = 0; - + if(attr->key.length) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->key, (attr->key.length + 1)); mycore_string_append_lowercase(&new_attr->key, attr->key.data, attr->key.length); } else mycore_string_clean_all(&new_attr->key); - + if(attr->value.length) { mycore_string_init(token->tree->mchar, token->tree->mchar_node_id, &new_attr->value, (attr->value.length + 1)); mycore_string_append(&new_attr->value, attr->value.data, attr->value.length); } else mycore_string_clean_all(&new_attr->value); - + if(dest->attr_first == NULL) { new_attr->prev = 0; - + dest->attr_first = new_attr; dest->attr_last = new_attr; } else { dest->attr_last->next = new_attr; new_attr->prev = dest->attr_last; - + dest->attr_last = new_attr; } - + new_attr->ns = attr->ns; - + return true; } @@ -452,7 +452,7 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke const char* key, size_t key_size, const char* value, size_t value_size) { myhtml_token_attr_t* attr = target->attr_first; - + while (attr) { if(attr->key.length == key_size && attr->value.length == value_size) @@ -464,10 +464,10 @@ myhtml_token_attr_t * myhtml_token_attr_match(myhtml_token_t* token, myhtml_toke return NULL; } } - + attr = attr->next; } - + return NULL; } @@ -475,7 +475,7 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml const char* key, size_t key_size, const char* value, size_t value_size) { myhtml_token_attr_t* attr = target->attr_first; - + while (attr) { if(attr->key.length == key_size && attr->value.length == value_size) @@ -487,17 +487,17 @@ myhtml_token_attr_t * myhtml_token_attr_match_case(myhtml_token_t* token, myhtml return NULL; } } - + attr = attr->next; } - + return NULL; } void myhtml_token_adjust_mathml_attributes(myhtml_token_node_t* target) { myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, "definitionurl", 13); - + if(attr) { memcpy(attr->key.data, "definitionURL", 13); } @@ -512,10 +512,10 @@ void _myhtml_token_create_copy_srt(myhtml_token_t* token, const char* from, size void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_node_t* target, myhtml_tree_doctype_t* return_doctype) { myhtml_token_attr_t* attr = target->attr_first; - + if(attr && attr->key.length) { _myhtml_token_create_copy_srt(token, attr->key.data, attr->key.length, &return_doctype->attr_name); - + if(mycore_strcmp("html", return_doctype->attr_name)) return_doctype->is_html = false; else @@ -523,41 +523,41 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no } else { return_doctype->is_html = false; - + _myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_name); - + if(return_doctype->attr_public) mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; - + if(return_doctype->attr_system) mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; - + return; } - + attr = attr->next; - + if(attr && attr->value.length) { if(mycore_strcasecmp(attr->value.data, "PUBLIC") == 0) { // try see public attr = attr->next; - + if(attr && attr->value.length) { _myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_public); - + // try see system attr = attr->next; - + if(attr && attr->value.length) _myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system); else { if(return_doctype->attr_system) mycore_free(return_doctype->attr_system); - + _myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_system); } } @@ -565,7 +565,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no if(return_doctype->attr_public) mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; - + if(return_doctype->attr_system) mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; @@ -574,7 +574,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no else if(mycore_strncasecmp(attr->value.data, "SYSTEM", attr->value.length) == 0) { attr = attr->next; - + if(attr && attr->value.length) { _myhtml_token_create_copy_srt(token, "\0", 1, &return_doctype->attr_public); _myhtml_token_create_copy_srt(token, attr->value.data, attr->value.length, &return_doctype->attr_system); @@ -583,7 +583,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no if(return_doctype->attr_public) mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; - + if(return_doctype->attr_system) mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; @@ -593,7 +593,7 @@ void myhtml_token_strict_doctype_by_token(myhtml_token_t* token, myhtml_token_no if(return_doctype->attr_public) mycore_free(return_doctype->attr_public); return_doctype->attr_public = NULL; - + if(return_doctype->attr_system) mycore_free(return_doctype->attr_system); return_doctype->attr_system = NULL; @@ -617,7 +617,7 @@ bool myhtml_token_doctype_check_xhtml_1_0(myhtml_tree_doctype_t* return_doctype) { if(return_doctype->attr_system == NULL) return true; - + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.0 Strict//EN") && mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"); } @@ -626,7 +626,7 @@ bool myhtml_token_doctype_check_xhtml_1_1(myhtml_tree_doctype_t* return_doctype) { if(return_doctype->attr_system == NULL) return true; - + return mycore_strcmp(return_doctype->attr_public, "-//W3C//DTD XHTML 1.1//EN") && mycore_strcmp(return_doctype->attr_system, "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"); } @@ -635,19 +635,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my { if(return_doctype == NULL) return false; - + myhtml_token_strict_doctype_by_token(token, target, return_doctype); - + if(return_doctype->attr_name == NULL) return false; - + if((return_doctype->is_html || return_doctype->attr_public || (return_doctype->attr_system && mycore_strcmp(return_doctype->attr_system, "about:legacy-compat")))) { if(return_doctype->attr_public == NULL) return false; - + if(return_doctype->is_html && myhtml_token_doctype_check_html_4_0(return_doctype) && myhtml_token_doctype_check_html_4_01(return_doctype) && @@ -657,19 +657,19 @@ bool myhtml_token_release_and_check_doctype_attributes(myhtml_token_t* token, my return false; } } - + return true; } void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target) { size_t count = sizeof(myhtml_token_attr_svg_replacement) / sizeof(myhtml_token_replacement_entry_t); - + for (size_t i = 0; i < count; i++) { myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_svg_replacement[i].from, myhtml_token_attr_svg_replacement[i].from_size); - + if(attr) { mycore_string_clean(&attr->key); mycore_string_append(&attr->key, myhtml_token_attr_svg_replacement[i].to, @@ -681,17 +681,17 @@ void myhtml_token_adjust_svg_attributes(myhtml_token_node_t* target) void myhtml_token_adjust_foreign_attributes(myhtml_token_node_t* target) { size_t count = sizeof(myhtml_token_attr_namespace_replacement) / sizeof(myhtml_token_namespace_replacement_t); - + for (size_t i = 0; i < count; i++) { myhtml_token_attr_t* attr = myhtml_token_attr_by_name(target, myhtml_token_attr_namespace_replacement[i].from, myhtml_token_attr_namespace_replacement[i].from_size); - + if(attr) { mycore_string_clean(&attr->key); mycore_string_append(&attr->key, myhtml_token_attr_namespace_replacement[i].to, myhtml_token_attr_namespace_replacement[i].to_size); - + attr->ns = myhtml_token_attr_namespace_replacement[i].ns; } } @@ -701,10 +701,10 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t* { if(target == NULL || dest == NULL) return false; - + myhtml_token_attr_t* target_attr = target->attr_first; myhtml_token_attr_t* dest_attr = dest->attr_first; - + while (target_attr && dest_attr) { if(target_attr->key.length == dest_attr->key.length && @@ -712,37 +712,37 @@ bool myhtml_token_attr_compare(myhtml_token_node_t* target, myhtml_token_node_t* { if(mycore_strcmp(target_attr->key.data, dest_attr->key.data) != 0) break; - + if(mycore_strcasecmp(target_attr->value.data, dest_attr->value.data) != 0) break; } else break; - + target_attr = target_attr->next; dest_attr = dest_attr->next; } - + if(target_attr == NULL && dest_attr == NULL) return true; - + return false; } myhtml_token_attr_t * myhtml_token_attr_by_name(myhtml_token_node_t* node, const char* name, size_t name_length) { myhtml_token_attr_t* attr = node->attr_first; - + while (attr) { if(name_length == attr->key.length) { if(mycore_strcmp(attr->key.data, name) == 0) break; } - + attr = attr->next; } - + return attr; } @@ -751,24 +751,24 @@ void myhtml_token_delete(myhtml_token_t* token, myhtml_token_node_t* node) if(node->str.data && node->str.mchar) { mchar_async_free(node->str.mchar, node->str.node_idx, node->str.data); } - + mcobject_async_free(token->nodes_obj, node); } void myhtml_token_attr_delete_all(myhtml_token_t* token, myhtml_token_node_t* node) { myhtml_token_attr_t* attr = node->attr_first; - + while (attr) { if(attr->key.data && attr->key.mchar) { mchar_async_free(attr->key.mchar, attr->key.node_idx, attr->key.data); } - + if(attr->value.data && attr->value.mchar) { mchar_async_free(attr->value.mchar, attr->value.node_idx, attr->value.data); } - + attr = attr->next; } } @@ -783,18 +783,18 @@ myhtml_token_attr_t * myhtml_token_attr_remove(myhtml_token_node_t* node, myhtml else { node->attr_first = attr->next; } - + if(attr->next) { attr->next->prev = attr->prev; } else { node->attr_last = attr->prev; } - + attr->next = NULL; attr->prev = NULL; } - + return attr; } @@ -807,13 +807,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, { myhtml_token_node_wait_for_done(tree->token, token_to); myhtml_token_node_wait_for_done(tree->token, token_from); - + mycore_string_t *string1 = &token_to->str; mycore_string_t *string2 = &token_from->str; - + token_to->raw_begin = 0; token_to->raw_length = 0; - + if(token_to->str.node_idx == tree->mchar_node_id) { if(cp_reverse) { @@ -822,7 +822,7 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, else { mycore_string_copy(string1, string2); } - + return token_to; } if(token_from->str.node_idx == tree->mchar_node_id) @@ -833,13 +833,13 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, else { mycore_string_copy(string1, string2); } - + return token_from; } else { mycore_string_t string_base; mycore_string_init(tree->mchar, tree->mchar_node_id, &string_base, (string1->length + string2->length + 2)); - + if(cp_reverse) { mycore_string_copy(&string_base, string2); mycore_string_copy(&string_base, string1); @@ -848,22 +848,21 @@ myhtml_token_node_t * myhtml_token_merged_two_token_string(myhtml_tree_t* tree, mycore_string_copy(&string_base, string1); mycore_string_copy(&string_base, string2); } - + token_to->str = string_base; } - + return token_to; } void myhtml_token_set_replacement_character_for_null_token(myhtml_tree_t* tree, myhtml_token_node_t* node) { myhtml_token_node_wait_for_done(tree->token, node); - + mycore_string_t new_str; mycore_string_init(tree->mchar, tree->mchar_node_id, &new_str, (node->str.length + 2)); - + mycore_string_append_with_replacement_null_characters(&new_str, node->str.data, node->str.length); - + node->str = new_str; } -