From 68d0adbe4d79200f590017df179a714ad4e5d8b2 Mon Sep 17 00:00:00 2001 From: Azq2 Date: Sat, 12 Jan 2019 18:38:11 +0200 Subject: [PATCH] sync modest from myhtml --- include/myencoding/encoding.h | 4 ++-- include/myhtml/api.h | 6 ++--- include/myhtml/myosi.h | 2 +- source/myencoding/detect.c | 38 ++++++++++++++++---------------- source/myencoding/encoding.h | 4 ++-- source/myhtml/api.h | 6 ++--- source/myhtml/myhtml.c | 6 ++++- source/myhtml/myosi.h | 2 +- source/myhtml/serialization.c | 1 + source/myhtml/tokenizer.c | 4 +++- source/myhtml/tokenizer_script.c | 4 ++++ 11 files changed, 44 insertions(+), 33 deletions(-) diff --git a/include/myencoding/encoding.h b/include/myencoding/encoding.h index 24c6c70..7511a58 100644 --- a/include/myencoding/encoding.h +++ b/include/myencoding/encoding.h @@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length); bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding); myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); -bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght); -myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght); +bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length); +myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length); #ifdef __cplusplus } /* extern "C" */ diff --git a/include/myhtml/api.h b/include/myhtml/api.h index 0c9981b..5f46ad6 100755 --- a/include/myhtml/api.h +++ b/include/myhtml/api.h @@ -36,7 +36,7 @@ #define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 -#define MyHTML_VERSION_PATCH 2 +#define MyHTML_VERSION_PATCH 5 #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) @@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d */ myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, - const char **found, size_t *found_lenght); + const char **found, size_t *found_length); /** * Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data. @@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, - const char **found, size_t *found_lenght); + const char **found, size_t *found_length); /*********************************************************************************** * diff --git a/include/myhtml/myosi.h b/include/myhtml/myosi.h index 41f0a34..9bdba3b 100644 --- a/include/myhtml/myosi.h +++ b/include/myhtml/myosi.h @@ -26,7 +26,7 @@ #define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 -#define MyHTML_VERSION_PATCH 2 +#define MyHTML_VERSION_PATCH 5 #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) diff --git a/source/myencoding/detect.c b/source/myencoding/detect.c index 3382eea..064e34d 100644 --- a/source/myencoding/detect.c +++ b/source/myencoding/detect.c @@ -411,14 +411,14 @@ bool myencoding_extracting_character_encoding_from_charset(const char *data, siz return myencoding_extracting_character_encoding_from_charset_with_found(data, data_size, encoding, NULL, NULL); } -bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght) +bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length) { *encoding = MyENCODING_NOT_DETERMINED; if(found) *found = NULL; - if(found_lenght) - *found_lenght = 0; + if(found_length) + *found_length = 0; /* 1 */ size_t length = 0; @@ -485,8 +485,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char { if(found) *found = &data[begin]; - if(found_lenght) - *found_lenght = (length - begin); + if(found_length) + *found_length = (length - begin); return myencoding_by_name(&data[begin], (length - begin), encoding); } @@ -508,8 +508,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char { if(found) *found = &data[begin]; - if(found_lenght) - *found_lenght = (length - begin); + if(found_length) + *found_length = (length - begin); return myencoding_by_name(&data[begin], (length - begin), encoding); } @@ -533,8 +533,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char { if(found) *found = &data[begin]; - if(found_lenght) - *found_lenght = (length - begin); + if(found_length) + *found_length = (length - begin); return myencoding_by_name(&data[begin], (length - begin), encoding); } @@ -544,8 +544,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char if(found) *found = &data[begin]; - if(found_lenght) - *found_lenght = (length - begin); + if(found_length) + *found_length = (length - begin); return myencoding_by_name(&data[begin], (length - begin), encoding); } @@ -748,7 +748,7 @@ size_t myencoding_prescan_stream_to_determine_encoding_get_attr(const unsigned c return myencoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last); } -bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght) +bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length) { myencoding_detect_attr_t attr; @@ -788,7 +788,7 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c if((is_exists & 2) == 0) { is_exists |= 2; - if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_lenght)) { + if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_length)) { need_pragma = 2; } } @@ -801,8 +801,8 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c if(found) *found = (const char*)(&udata[ attr.value_begin ]); - if(found_lenght) - *found_lenght = attr.value_length; + if(found_length) + *found_length = attr.value_length; myencoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding); need_pragma = 1; @@ -937,15 +937,15 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, s return myencoding_prescan_stream_to_determine_encoding_with_found(data, data_size, NULL, NULL); } -myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght) +myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length) { const unsigned char* udata = (const unsigned char*)data; myencoding_t encoding = MyENCODING_NOT_DETERMINED; if(found) *found = NULL; - if(found_lenght) - *found_lenght = 0; + if(found_length) + *found_length = 0; size_t i = 0; while(i < data_size) { @@ -974,7 +974,7 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const ch { i++; - if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_lenght)) + if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_length)) return encoding; } } diff --git a/source/myencoding/encoding.h b/source/myencoding/encoding.h index 6e2f38a..7cdd980 100644 --- a/source/myencoding/encoding.h +++ b/source/myencoding/encoding.h @@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length); bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding); myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); -bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght); -myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght); +bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length); +myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length); #ifdef __cplusplus } /* extern "C" */ diff --git a/source/myhtml/api.h b/source/myhtml/api.h index 0c9981b..5f46ad6 100755 --- a/source/myhtml/api.h +++ b/source/myhtml/api.h @@ -36,7 +36,7 @@ #define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 -#define MyHTML_VERSION_PATCH 2 +#define MyHTML_VERSION_PATCH 5 #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) @@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d */ myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, - const char **found, size_t *found_lenght); + const char **found, size_t *found_length); /** * Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data. @@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, - const char **found, size_t *found_lenght); + const char **found, size_t *found_length); /*********************************************************************************** * diff --git a/source/myhtml/myhtml.c b/source/myhtml/myhtml.c index 217f356..81b377e 100644 --- a/source/myhtml/myhtml.c +++ b/source/myhtml/myhtml.c @@ -125,7 +125,11 @@ mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_ #else /* if undef MyCORE_BUILD_WITHOUT_THREADS */ if(status) return status; - + + if(thread_count == 0) { + thread_count = 1; + } + switch (opt) { case MyHTML_OPTIONS_PARSE_MODE_SINGLE: if((status = myhtml_create_stream_and_batch(myhtml, 0, 0))) diff --git a/source/myhtml/myosi.h b/source/myhtml/myosi.h index 993a74c..c123b1b 100644 --- a/source/myhtml/myosi.h +++ b/source/myhtml/myosi.h @@ -26,7 +26,7 @@ #define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MINOR 0 -#define MyHTML_VERSION_PATCH 2 +#define MyHTML_VERSION_PATCH 5 #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) diff --git a/source/myhtml/serialization.c b/source/myhtml/serialization.c index c10a513..7c8476e 100755 --- a/source/myhtml/serialization.c +++ b/source/myhtml/serialization.c @@ -629,3 +629,4 @@ mystatus_t myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_str return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str); } + diff --git a/source/myhtml/tokenizer.c b/source/myhtml/tokenizer.c index 1dca519..bc6de7f 100644 --- a/source/myhtml/tokenizer.c +++ b/source/myhtml/tokenizer.c @@ -77,7 +77,9 @@ mystatus_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* html, #endif if(tree->current_qnode == NULL) { - myhtml_tokenizer_set_first_settings(tree, html, html_length); + mystatus_t status = myhtml_tokenizer_set_first_settings(tree, html, html_length); + if(status) + return status; } size_t offset = 0; diff --git a/source/myhtml/tokenizer_script.c b/source/myhtml/tokenizer_script.c index dda927e..e60ddf2 100644 --- a/source/myhtml/tokenizer_script.c +++ b/source/myhtml/tokenizer_script.c @@ -594,3 +594,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree, return html_offset; } + + + +