sync modest from myhtml

This commit is contained in:
Azq2 2019-01-12 18:38:11 +02:00
parent d021b90751
commit 68d0adbe4d
11 changed files with 44 additions and 33 deletions

View File

@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length);
bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding);
myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -36,7 +36,7 @@
#define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2
#define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)
@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d
*/
myencoding_t
myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size,
const char **found, size_t *found_lenght);
const char **found, size_t *found_length);
/**
* Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data.
@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz
bool
myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size,
myencoding_t *encoding,
const char **found, size_t *found_lenght);
const char **found, size_t *found_length);
/***********************************************************************************
*

View File

@ -26,7 +26,7 @@
#define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2
#define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)

View File

@ -411,14 +411,14 @@ bool myencoding_extracting_character_encoding_from_charset(const char *data, siz
return myencoding_extracting_character_encoding_from_charset_with_found(data, data_size, encoding, NULL, NULL);
}
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght)
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length)
{
*encoding = MyENCODING_NOT_DETERMINED;
if(found)
*found = NULL;
if(found_lenght)
*found_lenght = 0;
if(found_length)
*found_length = 0;
/* 1 */
size_t length = 0;
@ -485,8 +485,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{
if(found)
*found = &data[begin];
if(found_lenght)
*found_lenght = (length - begin);
if(found_length)
*found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding);
}
@ -508,8 +508,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{
if(found)
*found = &data[begin];
if(found_lenght)
*found_lenght = (length - begin);
if(found_length)
*found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding);
}
@ -533,8 +533,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
{
if(found)
*found = &data[begin];
if(found_lenght)
*found_lenght = (length - begin);
if(found_length)
*found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding);
}
@ -544,8 +544,8 @@ bool myencoding_extracting_character_encoding_from_charset_with_found(const char
if(found)
*found = &data[begin];
if(found_lenght)
*found_lenght = (length - begin);
if(found_length)
*found_length = (length - begin);
return myencoding_by_name(&data[begin], (length - begin), encoding);
}
@ -748,7 +748,7 @@ size_t myencoding_prescan_stream_to_determine_encoding_get_attr(const unsigned c
return myencoding_prescan_stream_to_determine_encoding_get_attr_value(udata, length, data_size, attr, it_last);
}
bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght)
bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned char *udata, size_t *length, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length)
{
myencoding_detect_attr_t attr;
@ -788,7 +788,7 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c
if((is_exists & 2) == 0) {
is_exists |= 2;
if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_lenght)) {
if(myencoding_extracting_character_encoding_from_charset_with_found((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding, found, found_length)) {
need_pragma = 2;
}
}
@ -801,8 +801,8 @@ bool myencoding_prescan_stream_to_determine_encoding_check_meta(const unsigned c
if(found)
*found = (const char*)(&udata[ attr.value_begin ]);
if(found_lenght)
*found_lenght = attr.value_length;
if(found_length)
*found_length = attr.value_length;
myencoding_by_name((const char*)(&udata[ attr.value_begin ]), attr.value_length, encoding);
need_pragma = 1;
@ -937,15 +937,15 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, s
return myencoding_prescan_stream_to_determine_encoding_with_found(data, data_size, NULL, NULL);
}
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght)
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length)
{
const unsigned char* udata = (const unsigned char*)data;
myencoding_t encoding = MyENCODING_NOT_DETERMINED;
if(found)
*found = NULL;
if(found_lenght)
*found_lenght = 0;
if(found_length)
*found_length = 0;
size_t i = 0;
while(i < data_size) {
@ -974,7 +974,7 @@ myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const ch
{
i++;
if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_lenght))
if(myencoding_prescan_stream_to_determine_encoding_check_meta(udata, &i, data_size, &encoding, found, found_length))
return encoding;
}
}

View File

@ -153,8 +153,8 @@ const char * myencoding_name_by_id(myencoding_t encoding, size_t *length);
bool myencoding_extracting_character_encoding_from_charset(const char *data, size_t data_size, myencoding_t *encoding);
myencoding_t myencoding_prescan_stream_to_determine_encoding(const char *data, size_t data_size);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_lenght);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_lenght);
bool myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size, myencoding_t *encoding, const char **found, size_t *found_length);
myencoding_t myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size, const char **found, size_t *found_length);
#ifdef __cplusplus
} /* extern "C" */

View File

@ -36,7 +36,7 @@
#define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2
#define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)
@ -1991,7 +1991,7 @@ myencoding_extracting_character_encoding_from_charset(const char *data, size_t d
*/
myencoding_t
myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, size_t data_size,
const char **found, size_t *found_lenght);
const char **found, size_t *found_length);
/**
* Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data.
@ -2011,7 +2011,7 @@ myencoding_prescan_stream_to_determine_encoding_with_found(const char *data, siz
bool
myencoding_extracting_character_encoding_from_charset_with_found(const char *data, size_t data_size,
myencoding_t *encoding,
const char **found, size_t *found_lenght);
const char **found, size_t *found_length);
/***********************************************************************************
*

View File

@ -126,6 +126,10 @@ mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_
if(status)
return status;
if(thread_count == 0) {
thread_count = 1;
}
switch (opt) {
case MyHTML_OPTIONS_PARSE_MODE_SINGLE:
if((status = myhtml_create_stream_and_batch(myhtml, 0, 0)))

View File

@ -26,7 +26,7 @@
#define MyHTML_VERSION_MAJOR 4
#define MyHTML_VERSION_MINOR 0
#define MyHTML_VERSION_PATCH 2
#define MyHTML_VERSION_PATCH 5
#define MyHTML_VERSION_STRING MyCORE_STR(MyHTML_VERSION_MAJOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_MINOR) MyCORE_STR(.) MyCORE_STR(MyHTML_VERSION_PATCH)

View File

@ -629,3 +629,4 @@ mystatus_t myhtml_serialization_node_buffer(myhtml_tree_node_t* node, mycore_str
return myhtml_serialization_node_callback(node, myhtml_serialization_concatenate, str);
}

View File

@ -77,7 +77,9 @@ mystatus_t myhtml_tokenizer_chunk_process(myhtml_tree_t* tree, const char* html,
#endif
if(tree->current_qnode == NULL) {
myhtml_tokenizer_set_first_settings(tree, html, html_length);
mystatus_t status = myhtml_tokenizer_set_first_settings(tree, html, html_length);
if(status)
return status;
}
size_t offset = 0;

View File

@ -594,3 +594,7 @@ size_t myhtml_tokenizer_state_script_data_double_escape_end(myhtml_tree_t* tree,
return html_offset;
}