sync modest from myhtml

This commit is contained in:
Azq2 2019-01-12 18:38:11 +02:00
parent d021b90751
commit 68d0adbe4d
11 changed files with 44 additions and 33 deletions

View File

@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length);
bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding); bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding);
myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght); bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght); myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

View File

@ -36,7 +36,7 @@
#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2 #define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)
@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d
*/ */
myencoding_t myencoding_t
myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size,
const char **found, size_t *found_lenght); const char **found, size_t *found_length);
/** /**
* Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data. * Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data.
@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz
bool bool
myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size,
myencoding_t *encoding, myencoding_t *encoding,
const char **found, size_t *found_lenght); const char **found, size_t *found_length);
/*********************************************************************************** /***********************************************************************************
* *

View File

@ -26,7 +26,7 @@
#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2 #define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)

View File

@ -411,14 +411,14 @@ bool myencoding_extracting_character_encoding_from_charset(const char *data, siz
return myencoding_extracting_character_encoding_from_charset_with_found(data, data_size, encoding, NULL, NULL); return myencoding_extracting_character_encoding_from_charset_with_found(data, data_size, encoding, NULL, NULL);
} }
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght) bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length)
{ {
*encoding = MyENCODING_NOT_DETERMINED; *encoding = MyENCODING_NOT_DETERMINED;
if(found) if(found)
*found = NULL; *found = NULL;
if(found_lenght) if(found_length)
*found_lenght = 0; *found_length = 0;
/* 1 */ /* 1 */
size_t length = 0; size_t length = 0;
@ -485,8 +485,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{ {
if(found) if(found)
*found = &data[begin]; *found = &data[begin];
if(found_lenght) if(found_length)
*found_lenght = (length - begin); *found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding); return myencoding_by_name(&data[begin], (length - begin), encoding);
} }
@ -508,8 +508,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{ {
if(found) if(found)
*found = &data[begin]; *found = &data[begin];
if(found_lenght) if(found_length)
*found_lenght = (length - begin); *found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding); return myencoding_by_name(&data[begin], (length - begin), encoding);
} }
@ -533,8 +533,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{ {
if(found) if(found)
*found = &data[begin]; *found = &data[begin];
if(found_lenght) if(found_length)
*found_lenght = (length - begin); *found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding); return myencoding_by_name(&data[begin], (length - begin), encoding);
} }
@ -544,8 +544,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
if(found) if(found)
*found = &data[begin]; *found = &data[begin];
if(found_lenght) if(found_length)
*found_lenght = (length - begin); *found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding); return myencoding_by_name(&data[begin], (length - begin), encoding);
} }
@ -748,7 +748,7 @@ size_t myencoding_prescan_stream_to_determine_encoding_get_attr(const unsigned c
return myencoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last); return myencoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last);
} }
bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght) bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length)
{ {
myencoding_detect_attr_t attr; myencoding_detect_attr_t attr;
@ -788,7 +788,7 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c
if((is_exists & 2) == 0) { if((is_exists & 2) == 0) {
is_exists |= 2; is_exists |= 2;
if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_lenght)) { if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_length)) {
need_pragma = 2; need_pragma = 2;
} }
} }
@ -801,8 +801,8 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c
if(found) if(found)
*found = (const char*)(&udata[ attr.value_begin ]); *found = (const char*)(&udata[ attr.value_begin ]);
if(found_lenght) if(found_length)
*found_lenght = attr.value_length; *found_length = attr.value_length;
myencoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding); myencoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding);
need_pragma = 1; need_pragma = 1;
@ -937,15 +937,15 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, s
return myencoding_prescan_stream_to_determine_encoding_with_found(data, data_size, NULL, NULL); return myencoding_prescan_stream_to_determine_encoding_with_found(data, data_size, NULL, NULL);
} }
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght) myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length)
{ {
const unsigned char* udata = (const unsigned char*)data; const unsigned char* udata = (const unsigned char*)data;
myencoding_t encoding = MyENCODING_NOT_DETERMINED; myencoding_t encoding = MyENCODING_NOT_DETERMINED;
if(found) if(found)
*found = NULL; *found = NULL;
if(found_lenght) if(found_length)
*found_lenght = 0; *found_length = 0;
size_t i = 0; size_t i = 0;
while(i < data_size) { while(i < data_size) {
@ -974,7 +974,7 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const ch
{ {
i++; i++;
if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_lenght)) if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_length))
return encoding; return encoding;
} }
} }

View File

@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length);
bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding); bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding);
myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size); myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght); bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght); myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */

View File

@ -36,7 +36,7 @@
#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2 #define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)
@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d
*/ */
myencoding_t myencoding_t
myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size,
const char **found, size_t *found_lenght); const char **found, size_t *found_length);
/** /**
* Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data. * Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data.
@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz
bool bool
myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size,
myencoding_t *encoding, myencoding_t *encoding,
const char **found, size_t *found_lenght); const char **found, size_t *found_length);
/*********************************************************************************** /***********************************************************************************
* *

View File

@ -125,7 +125,11 @@ mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_
#else /* if undef MyCORE_BUILD_WITHOUT_THREADS */ #else /* if undef MyCORE_BUILD_WITHOUT_THREADS */
if(status) if(status)
return status; return status;
if(thread_count == 0) {
thread_count = 1;
}
switch (opt) { switch (opt) {
case MyHTML_OPTIONS_PARSE_MODE_SINGLE: case MyHTML_OPTIONS_PARSE_MODE_SINGLE:
if((status = myhtml_create_stream_and_batch(myhtml, 0, 0))) if((status = myhtml_create_stream_and_batch(myhtml, 0, 0)))

View File

@ -26,7 +26,7 @@
#define MyHTML_VERSION_MAJOR 4 #define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0 #define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2 #define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH) #define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)

View File

@ -629,3 +629,4 @@ mystatus_t myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_str
return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str); return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str);
} }

View File

@ -77,7 +77,9 @@ mystatus_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* html,
#endif #endif
if(tree->current_qnode == NULL) { if(tree->current_qnode == NULL) {
myhtml_tokenizer_set_first_settings(tree, html, html_length); mystatus_t status = myhtml_tokenizer_set_first_settings(tree, html, html_length);
if(status)
return status;
} }
size_t offset = 0; size_t offset = 0;

View File

@ -594,3 +594,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,
return html_offset; return html_offset;
} }