Modest/source/myhtml/myhtml.h

232 lines
13 KiB
C
Raw Permalink Normal View History

2016-08-29 00:20:40 +03:00
/*
2017-03-14 23:44:48 +03:00
Copyright (C) 2015-2017 Alexander Borisov
2016-08-29 00:20:40 +03:00
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Author: lex.borisov@gmail.com (Alexander Borisov)
*/
#ifndef MyHTML_MYHTML_H
#define MyHTML_MYHTML_H
#pragma once
#ifdef __cplusplus
extern "C" {
#endif
#include "myhtml/myosi.h"
#include "mycore/utils/mctree.h"
#include "mycore/utils/mcobject_async.h"
2017-03-09 23:47:39 +03:00
#include "mycore/mythread.h"
#include "mycore/incoming.h"
#include "myencoding/encoding.h"
2016-08-29 00:20:40 +03:00
#include "myhtml/tree.h"
#include "myhtml/tag.h"
#include "myhtml/def.h"
#include "myhtml/parser.h"
#include "myhtml/tokenizer.h"
#include "myhtml/rules.h"
#include "myhtml/token.h"
#include "myhtml/charef.h"
#include "myhtml/callback.h"
#define mh_queue_current() tree->queue
#define myhtml_tokenizer_state_set(tree) myhtml_tree_set(tree, state)
#define mh_queue_get(idx, attr) myhtml->queue->nodes[idx].attr
// space, tab, LF, FF, CR
#define myhtml_whithspace(onechar, action, logic) \
onechar action ' ' logic \
onechar action '\t' logic \
onechar action '\n' logic \
onechar action '\f' logic \
onechar action '\r'
#define myhtml_ascii_char_cmp(onechar) \
((onechar >= 'a' && onechar <= 'z') || \
(onechar >= 'A' && onechar <= 'Z'))
#define myhtml_ascii_char_unless_cmp(onechar) \
((onechar < 'a' || onechar > 'z') && \
(onechar < 'A' || onechar > 'Z'))
struct myhtml {
2017-03-09 23:47:39 +03:00
mythread_t* thread_stream;
mythread_t* thread_batch;
mythread_t* thread_list[3];
size_t thread_total;
2016-08-29 00:20:40 +03:00
myhtml_tokenizer_state_f* parse_state_func;
myhtml_insertion_f* insertion_func;
enum myhtml_options opt;
myhtml_tree_node_t *marker;
};
struct myhtml_collection {
myhtml_tree_node_t **list;
size_t size;
size_t length;
};
myhtml_t * myhtml_create(void);
mystatus_t myhtml_init(myhtml_t* myhtml, enum myhtml_options opt, size_t thread_count, size_t queue_size);
2016-08-29 00:20:40 +03:00
void myhtml_clean(myhtml_t* myhtml);
myhtml_t* myhtml_destroy(myhtml_t* myhtml);
mystatus_t myhtml_parse(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size);
mystatus_t myhtml_parse_fragment(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
2016-08-29 00:20:40 +03:00
mystatus_t myhtml_parse_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size);
mystatus_t myhtml_parse_fragment_single(myhtml_tree_t* tree, myencoding_t encoding, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
2016-08-29 00:20:40 +03:00
mystatus_t myhtml_parse_chunk(myhtml_tree_t* tree, const char* html, size_t html_size);
mystatus_t myhtml_parse_chunk_fragment(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
mystatus_t myhtml_parse_chunk_single(myhtml_tree_t* tree, const char* html, size_t html_size);
mystatus_t myhtml_parse_chunk_fragment_single(myhtml_tree_t* tree, const char* html, size_t html_size, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
mystatus_t myhtml_parse_chunk_end(myhtml_tree_t* tree);
2016-08-29 00:20:40 +03:00
// encoding
void myhtml_encoding_set(myhtml_tree_t* tree, myencoding_t encoding);
myencoding_t myhtml_encoding_get(myhtml_tree_t* tree);
2016-08-29 00:20:40 +03:00
myhtml_collection_t * myhtml_get_nodes_by_tag_id(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tag_id_t tag_id, mystatus_t *status);
myhtml_collection_t * myhtml_get_nodes_by_name(myhtml_tree_t* tree, myhtml_collection_t *collection, const char* html, size_t length, mystatus_t *status);
myhtml_collection_t * myhtml_get_nodes_by_attribute_key(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* scope_node, const char* key, size_t key_len, mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some=value] or #id */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some~=value] or .class */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value_whitespace_separated(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some^=value] */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value_begin(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some$=value] */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value_end(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some*=value] */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value_contain(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
/* like a [some|=value] */
myhtml_collection_t * myhtml_get_nodes_by_attribute_value_hyphen_separated(myhtml_tree_t *tree, myhtml_collection_t* collection, myhtml_tree_node_t* node,
bool case_insensitive,
const char* key, size_t key_len,
const char* value, size_t value_len,
mystatus_t* status);
2016-08-29 00:20:40 +03:00
myhtml_collection_t * myhtml_get_nodes_by_tag_id_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, myhtml_tag_id_t tag_id, mystatus_t *status);
myhtml_collection_t * myhtml_get_nodes_by_name_in_scope(myhtml_tree_t* tree, myhtml_collection_t *collection, myhtml_tree_node_t *node, const char* html, size_t length, mystatus_t *status);
2016-08-29 00:20:40 +03:00
myhtml_tree_node_t * myhtml_node_first(myhtml_tree_t* tree);
myhtml_tree_node_t * myhtml_node_next(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_prev(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_parent(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_child(myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_last_child(myhtml_tree_node_t *node);
2017-02-14 14:49:05 +03:00
myhtml_tree_node_t * myhtml_node_insert_to_appropriate_place(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_append_child(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_insert_after(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
myhtml_tree_node_t * myhtml_node_insert_before(myhtml_tree_node_t *target, myhtml_tree_node_t *node);
2016-08-29 00:20:40 +03:00
myhtml_tree_node_t * myhtml_node_create(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, enum myhtml_namespace ns);
myhtml_tree_node_t * myhtml_node_clone(myhtml_tree_t* dest_tree, myhtml_tree_node_t* src);
myhtml_tree_node_t * myhtml_node_clone_deep(myhtml_tree_t* dest_tree, myhtml_tree_node_t* src);
2017-02-14 14:49:05 +03:00
myhtml_tree_node_t * myhtml_node_remove(myhtml_tree_node_t *node);
void myhtml_node_delete(myhtml_tree_node_t *node);
void myhtml_node_delete_recursive(myhtml_tree_node_t *node);
void myhtml_node_free(myhtml_tree_node_t *node);
2016-08-29 00:20:40 +03:00
myhtml_token_node_t* myhtml_node_token(myhtml_tree_node_t *node);
myhtml_namespace_t myhtml_node_namespace(myhtml_tree_node_t *node);
void myhtml_node_namespace_set(myhtml_tree_node_t *node, myhtml_namespace_t ns);
myhtml_tag_id_t myhtml_node_tag_id(myhtml_tree_node_t *node);
const char * myhtml_tag_name_by_id(myhtml_tree_t* tree, myhtml_tag_id_t tag_id, size_t *length);
myhtml_tag_id_t myhtml_tag_id_by_name(myhtml_tree_t* tree, const char *tag_name, size_t length);
bool myhtml_node_is_close_self(myhtml_tree_node_t *node);
2017-09-07 11:30:53 +03:00
bool myhtml_node_is_void_element(myhtml_tree_node_t *node);
2016-08-29 00:20:40 +03:00
myhtml_tree_attr_t * myhtml_node_attribute_first(myhtml_tree_node_t *node);
myhtml_tree_attr_t * myhtml_node_attribute_last(myhtml_tree_node_t *node);
const char * myhtml_node_text(myhtml_tree_node_t *node, size_t *length);
mycore_string_t * myhtml_node_string(myhtml_tree_node_t *node);
2017-08-31 18:53:50 +03:00
myhtml_position_t myhtml_node_raw_position(myhtml_tree_node_t *node);
myhtml_position_t myhtml_node_element_position(myhtml_tree_node_t *node);
void myhtml_node_set_data(myhtml_tree_node_t *node, void* data);
void * myhtml_node_get_data(myhtml_tree_node_t *node);
2016-08-29 00:20:40 +03:00
/* attributes */
myhtml_tree_attr_t * myhtml_attribute_next(myhtml_tree_attr_t *attr);
myhtml_tree_attr_t * myhtml_attribute_prev(myhtml_tree_attr_t *attr);
enum myhtml_namespace myhtml_attribute_namespace(myhtml_tree_attr_t *attr);
void myhtml_attribute_namespace_set(myhtml_tree_attr_t *attr, myhtml_namespace_t ns);
const char * myhtml_attribute_key(myhtml_tree_attr_t *attr, size_t *length);
const char * myhtml_attribute_value(myhtml_tree_attr_t *attr, size_t *length);
mycore_string_t * myhtml_attribute_key_string(myhtml_tree_attr_t* attr);
mycore_string_t * myhtml_attribute_value_string(myhtml_tree_attr_t* attr);
2016-08-29 00:20:40 +03:00
myhtml_tree_attr_t * myhtml_attribute_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len);
myhtml_tree_attr_t * myhtml_attribute_add(myhtml_tree_node_t *node, const char *key, size_t key_len, const char *value, size_t value_len, myencoding_t encoding);
2016-08-29 00:20:40 +03:00
myhtml_tree_attr_t * myhtml_attribute_remove(myhtml_tree_node_t *node, myhtml_tree_attr_t *attr);
myhtml_tree_attr_t * myhtml_attribute_remove_by_key(myhtml_tree_node_t *node, const char *key, size_t key_len);
void myhtml_attribute_delete(myhtml_tree_t *tree, myhtml_tree_node_t *node, myhtml_tree_attr_t *attr);
void myhtml_attribute_free(myhtml_tree_t *tree, myhtml_tree_attr_t *attr);
myhtml_position_t myhtml_attribute_key_raw_position(myhtml_tree_attr_t *attr);
myhtml_position_t myhtml_attribute_value_raw_position(myhtml_tree_attr_t *attr);
/* collection */
myhtml_collection_t * myhtml_collection_create(size_t size, mystatus_t *status);
2016-08-29 00:20:40 +03:00
void myhtml_collection_clean(myhtml_collection_t *collection);
myhtml_collection_t * myhtml_collection_destroy(myhtml_collection_t *collection);
mystatus_t myhtml_collection_check_size(myhtml_collection_t *collection, size_t need, size_t upto_length);
2016-08-29 00:20:40 +03:00
// strings
mycore_string_t * myhtml_node_text_set(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding);
mycore_string_t * myhtml_node_text_set_with_charef(myhtml_tree_node_t *node, const char* text, size_t length, myencoding_t encoding);
2016-08-29 00:20:40 +03:00
bool myhtml_utils_strcmp(const char* ab, const char* to_lowercase, size_t size);
bool myhtml_is_html_node(myhtml_tree_node_t *node, myhtml_tag_id_t tag_id);
// queue
mystatus_t myhtml_queue_add(myhtml_tree_t *tree, size_t begin, myhtml_token_node_t* token);
2016-08-29 00:20:40 +03:00
/* version */
myhtml_version_t myhtml_version(void);
2016-08-29 00:20:40 +03:00
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif