Change rules for parse flags skip whitespace

MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN skip ws token, but not
for RCDATA, RAWTEXT, CDATA and PLAINTEXT
This commit is contained in:
lexborisov 2016-09-02 10:29:18 +04:00
parent 0ab92fce22
commit 702e2782f2
5 changed files with 52 additions and 13 deletions

View File

@ -370,7 +370,15 @@ enum myhtml_tags {
// base // base
/* /*
Very important!!! Very important!!!
see modest/myosi.h:modest_status_t
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
for myrender 030000..03ffff; MyRENDER_STATUS_OK == 0x000000
for mydom 040000..04ffff; MyDOM_STATUS_OK == 0x000000
for mynetwork 050000..05ffff; MyNETWORK_STATUS_OK == 0x000000
for myecma 060000..06ffff; MyECMA_STATUS_OK == 0x000000
not occupied 070000..
*/ */
enum myhtml_status { enum myhtml_status {
MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_OK = 0x0000,
@ -426,7 +434,8 @@ enum myhtml_status {
} }
typedef myhtml_status_t; typedef myhtml_status_t;
#define MYHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK) #define MYHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK) /* deprecated */
#define MyHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK)
/** /**
* @struct myhtml namespace * @struct myhtml namespace
@ -463,7 +472,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000, MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001, MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
} }
typedef myhtml_tree_parse_flags_t; typedef myhtml_tree_parse_flags_t;

View File

@ -102,6 +102,7 @@ extern "C" {
sizeof(strcn) * myhtml->sizen); \ sizeof(strcn) * myhtml->sizen); \
} }
#define MyHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK)
// encoding // encoding
// https://encoding.spec.whatwg.org/#the-encoding // https://encoding.spec.whatwg.org/#the-encoding
@ -203,7 +204,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000, MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001, MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
} }
typedef myhtml_tree_parse_flags_t; typedef myhtml_tree_parse_flags_t;
@ -375,7 +376,15 @@ enum myhtml_insertion_mode {
// base // base
/* /*
Very important!!! Very important!!!
see modest/myosi.h:modest_status_t
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
for myrender 030000..03ffff; MyRENDER_STATUS_OK == 0x000000
for mydom 040000..04ffff; MyDOM_STATUS_OK == 0x000000
for mynetwork 050000..05ffff; MyNETWORK_STATUS_OK == 0x000000
for myecma 060000..06ffff; MyECMA_STATUS_OK == 0x000000
not occupied 070000..
*/ */
enum myhtml_status { enum myhtml_status {
MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_OK = 0x0000,

View File

@ -370,7 +370,15 @@ enum myhtml_tags {
// base // base
/* /*
Very important!!! Very important!!!
see modest/myosi.h:modest_status_t
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
for myrender 030000..03ffff; MyRENDER_STATUS_OK == 0x000000
for mydom 040000..04ffff; MyDOM_STATUS_OK == 0x000000
for mynetwork 050000..05ffff; MyNETWORK_STATUS_OK == 0x000000
for myecma 060000..06ffff; MyECMA_STATUS_OK == 0x000000
not occupied 070000..
*/ */
enum myhtml_status { enum myhtml_status {
MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_OK = 0x0000,
@ -426,7 +434,8 @@ enum myhtml_status {
} }
typedef myhtml_status_t; typedef myhtml_status_t;
#define MYHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK) #define MYHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK) /* deprecated */
#define MyHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK)
/** /**
* @struct myhtml namespace * @struct myhtml namespace
@ -463,7 +472,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000, MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001, MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
} }
typedef myhtml_tree_parse_flags_t; typedef myhtml_tree_parse_flags_t;

View File

@ -102,6 +102,7 @@ extern "C" {
sizeof(strcn) * myhtml->sizen); \ sizeof(strcn) * myhtml->sizen); \
} }
#define MyHTML_FAILED(_status_) ((_status_) != MyHTML_STATUS_OK)
// encoding // encoding
// https://encoding.spec.whatwg.org/#the-encoding // https://encoding.spec.whatwg.org/#the-encoding
@ -203,7 +204,7 @@ enum myhtml_tree_parse_flags {
MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000, MyHTML_TREE_PARSE_FLAGS_CLEAN = 0x000,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001, MyHTML_TREE_PARSE_FLAGS_WITHOUT_BUILD_TREE = 0x001,
MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003, MyHTML_TREE_PARSE_FLAGS_WITHOUT_PROCESS_TOKEN = 0x003,
MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, MyHTML_TREE_PARSE_FLAGS_SKIP_WHITESPACE_TOKEN = 0x004, /* skip ws token, but not for RCDATA, RAWTEXT, CDATA and PLAINTEXT */
MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008, MyHTML_TREE_PARSE_FLAGS_WITHOUT_DOCTYPE_IN_TREE = 0x008,
} }
typedef myhtml_tree_parse_flags_t; typedef myhtml_tree_parse_flags_t;
@ -375,7 +376,15 @@ enum myhtml_insertion_mode {
// base // base
/* /*
Very important!!! Very important!!!
see modest/myosi.h:modest_status_t
for myhtml 0..00ffff; MyHTML_STATUS_OK == 0x000000
for mycss and modules 010000..01ffff; MyCSS_STATUS_OK == 0x000000
for modest 020000..02ffff; MODEST_STATUS_OK == 0x000000
for myrender 030000..03ffff; MyRENDER_STATUS_OK == 0x000000
for mydom 040000..04ffff; MyDOM_STATUS_OK == 0x000000
for mynetwork 050000..05ffff; MyNETWORK_STATUS_OK == 0x000000
for myecma 060000..06ffff; MyECMA_STATUS_OK == 0x000000
not occupied 070000..
*/ */
enum myhtml_status { enum myhtml_status {
MyHTML_STATUS_OK = 0x0000, MyHTML_STATUS_OK = 0x0000,

View File

@ -438,7 +438,8 @@ bool _myhtml_tokenizer_state_andata_end_tag_name(myhtml_tree_t* tree, myhtml_tok
token_node->element_begin = tmp_begin; token_node->element_begin = tmp_begin;
token_node->element_length = token_node->raw_length; token_node->element_length = token_node->raw_length;
token_node->type |= type; token_node->type |= type;
token_node->tag_id = MyHTML_TAG__TEXT; token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
token_node->tag_id = MyHTML_TAG__TEXT;
myhtml_queue_add(tree, *html_offset, token_node); myhtml_queue_add(tree, *html_offset, token_node);
token_node = tree->current_token_node; token_node = tree->current_token_node;
@ -648,9 +649,10 @@ size_t myhtml_tokenizer_state_plaintext(myhtml_tree_t* tree, myhtml_token_node_t
if((token_node->type & MyHTML_TOKEN_TYPE_PLAINTEXT) == 0) if((token_node->type & MyHTML_TOKEN_TYPE_PLAINTEXT) == 0)
token_node->type |= MyHTML_TOKEN_TYPE_PLAINTEXT; token_node->type |= MyHTML_TOKEN_TYPE_PLAINTEXT;
token_node->raw_begin = (html_offset + tree->global_offset); token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
token_node->raw_begin = (html_offset + tree->global_offset);
token_node->raw_length = token_node->element_length = (html_size + tree->global_offset) - token_node->raw_begin; token_node->raw_length = token_node->element_length = (html_size + tree->global_offset) - token_node->raw_begin;
token_node->tag_id = MyHTML_TAG__TEXT; token_node->tag_id = MyHTML_TAG__TEXT;
myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA; myhtml_tokenizer_state_set(tree) = MyHTML_TOKENIZER_STATE_DATA;
myhtml_queue_add(tree, html_size, token_node); myhtml_queue_add(tree, html_size, token_node);
@ -871,6 +873,7 @@ size_t myhtml_tokenizer_state_markup_declaration_open(myhtml_tree_t* tree, myhtm
token_node->raw_begin += 7; token_node->raw_begin += 7;
token_node->raw_length = 0; token_node->raw_length = 0;
token_node->tag_id = MyHTML_TAG__TEXT; token_node->tag_id = MyHTML_TAG__TEXT;
token_node->type ^= (token_node->type & MyHTML_TOKEN_TYPE_WHITESPACE);
return html_offset; return html_offset;
} }