netsurf/utils/libdom.c
2013-08-16 08:25:24 +01:00

384 lines
8.6 KiB
C

/*
* Copyright 2012 Vincent Sanders <vince@netsurf-browser.org>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* libdom utilities (implementation).
*/
#include <assert.h>
#include <dom/dom.h>
#include "utils/config.h"
#include "utils/log.h"
#include "utils/libdom.h"
/* exported interface documented in libdom.h */
bool libdom_treewalk(dom_node *root,
bool (*callback)(dom_node *node, dom_string *name, void *ctx),
void *ctx)
{
dom_node *node;
bool result = true;
node = dom_node_ref(root); /* tree root */
while (node != NULL) {
dom_node *next = NULL;
dom_node_type type;
dom_string *name;
dom_exception exc;
exc = dom_node_get_first_child(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
if (next != NULL) { /* 1. children */
dom_node_unref(node);
node = next;
} else {
exc = dom_node_get_next_sibling(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
if (next != NULL) { /* 2. siblings */
dom_node_unref(node);
node = next;
} else { /* 3. ancestor siblings */
while (node != NULL) {
exc = dom_node_get_next_sibling(node,
&next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
node = NULL;
break;
}
if (next != NULL) {
dom_node_unref(next);
break;
}
exc = dom_node_get_parent_node(node,
&next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
node = NULL;
break;
}
dom_node_unref(node);
node = next;
}
if (node == NULL)
break;
exc = dom_node_get_next_sibling(node, &next);
if (exc != DOM_NO_ERR) {
dom_node_unref(node);
break;
}
dom_node_unref(node);
node = next;
}
}
assert(node != NULL);
exc = dom_node_get_node_type(node, &type);
if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE))
continue;
exc = dom_node_get_node_name(node, &name);
if (exc != DOM_NO_ERR)
continue;
result = callback(node, name, ctx);
dom_string_unref(name);
if (result == false) {
break; /* callback caused early termination */
}
}
return result;
}
/* libdom_treewalk context for libdom_find_element */
struct find_element_ctx {
lwc_string *search;
dom_node *found;
};
/* libdom_treewalk callback for libdom_find_element */
static bool libdom_find_element_callback(dom_node *node, dom_string *name,
void *ctx)
{
struct find_element_ctx *data = ctx;
if (dom_string_caseless_lwc_isequal(name, data->search)) {
/* Found element */
data->found = node;
return false; /* Discontinue search */
}
return true; /* Continue search */
}
/* exported interface documented in libdom.h */
dom_node *libdom_find_element(dom_node *node, lwc_string *element_name)
{
struct find_element_ctx data;
assert(element_name != NULL);
if (node == NULL)
return NULL;
data.search = element_name;
data.found = NULL;
libdom_treewalk(node, libdom_find_element_callback, &data);
return data.found;
}
/* exported interface documented in libdom.h */
dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name)
{
dom_node *element;
dom_exception exc;
dom_string *node_name = NULL;
dom_node_type node_type;
dom_node *next_node;
exc = dom_node_get_first_child(parent, &element);
if ((exc != DOM_NO_ERR) || (element == NULL)) {
return NULL;
}
/* find first node thats a element */
do {
exc = dom_node_get_node_type(element, &node_type);
if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) {
exc = dom_node_get_node_name(element, &node_name);
if ((exc == DOM_NO_ERR) && (node_name != NULL)) {
if (dom_string_caseless_lwc_isequal(node_name,
element_name)) {
dom_string_unref(node_name);
break;
}
dom_string_unref(node_name);
}
}
exc = dom_node_get_next_sibling(element, &next_node);
dom_node_unref(element);
if (exc == DOM_NO_ERR) {
element = next_node;
} else {
element = NULL;
}
} while (element != NULL);
return element;
}
/* exported interface documented in libdom.h */
/* TODO: return appropriate errors */
nserror libdom_iterate_child_elements(dom_node *parent,
libdom_iterate_cb cb, void *ctx)
{
dom_nodelist *children;
uint32_t index, num_children;
dom_exception error;
error = dom_node_get_child_nodes(parent, &children);
if (error != DOM_NO_ERR || children == NULL)
return NSERROR_NOMEM;
error = dom_nodelist_get_length(children, &num_children);
if (error != DOM_NO_ERR) {
dom_nodelist_unref(children);
return NSERROR_NOMEM;
}
for (index = 0; index < num_children; index++) {
dom_node *child;
dom_node_type type;
error = dom_nodelist_item(children, index, &child);
if (error != DOM_NO_ERR) {
dom_nodelist_unref(children);
return NSERROR_NOMEM;
}
error = dom_node_get_node_type(child, &type);
if (error == DOM_NO_ERR && type == DOM_ELEMENT_NODE) {
nserror err = cb(child, ctx);
if (err != NSERROR_OK) {
dom_node_unref(child);
dom_nodelist_unref(children);
return err;
}
}
dom_node_unref(child);
}
dom_nodelist_unref(children);
return NSERROR_OK;
}
/* exported interface documented in libdom.h */
nserror libdom_hubbub_error_to_nserror(dom_hubbub_error error)
{
switch (error) {
/* HUBBUB_REPROCESS is not handled here because it can
* never occur outside the hubbub treebuilder
*/
case DOM_HUBBUB_OK:
/* parsed ok */
return NSERROR_OK;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED):
/* hubbub input paused */
return NSERROR_OK;
case DOM_HUBBUB_NOMEM:
/* out of memory error from DOM */
return NSERROR_NOMEM;
case DOM_HUBBUB_BADPARM:
/* Bad parameter passed to creation */
return NSERROR_BAD_PARAMETER;
case DOM_HUBBUB_DOM:
/* DOM call returned error */
return NSERROR_DOM;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE):
/* encoding changed */
return NSERROR_ENCODING_CHANGE;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NOMEM):
/* out of memory error from parser */
return NSERROR_NOMEM;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADPARM):
return NSERROR_BAD_PARAMETER;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_INVALID):
return NSERROR_INVALID;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_FILENOTFOUND):
return NSERROR_NOT_FOUND;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NEEDDATA):
return NSERROR_NEED_DATA;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADENCODING):
return NSERROR_BAD_ENCODING;
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_UNKNOWN):
/* currently only generated by the libdom hubbub binding */
return NSERROR_DOM;
default:
/* unknown error */
/** @todo better error handling and reporting */
return NSERROR_UNKNOWN;
}
return NSERROR_UNKNOWN;
}
static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg, ...)
{
}
/* exported interface documented in libdom.h */
nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc)
{
dom_hubbub_parser_params parse_params;
dom_hubbub_error error;
dom_hubbub_parser *parser;
dom_document *document;
FILE *fp = NULL;
#define BUF_SIZE 512
uint8_t buf[BUF_SIZE];
fp = fopen(filename, "r");
if (fp == NULL) {
return NSERROR_NOT_FOUND;
}
parse_params.enc = encoding;
parse_params.fix_enc = false;
parse_params.enable_script = false;
parse_params.msg = ignore_dom_msg;
parse_params.script = NULL;
parse_params.ctx = NULL;
parse_params.daf = NULL;
error = dom_hubbub_parser_create(&parse_params, &parser, &document);
if (error != DOM_HUBBUB_OK) {
fclose(fp);
return libdom_hubbub_error_to_nserror(error);
}
while (feof(fp) == 0) {
size_t read = fread(buf, sizeof(buf[0]), BUF_SIZE, fp);
error = dom_hubbub_parser_parse_chunk(parser, buf, read);
if (error != DOM_HUBBUB_OK) {
dom_node_unref(document);
dom_hubbub_parser_destroy(parser);
fclose(fp);
return NSERROR_DOM;
}
}
error = dom_hubbub_parser_completed(parser);
if (error != DOM_HUBBUB_OK) {
dom_node_unref(document);
dom_hubbub_parser_destroy(parser);
fclose(fp);
return libdom_hubbub_error_to_nserror(error);
}
dom_hubbub_parser_destroy(parser);
fclose(fp);
*doc = document;
return NSERROR_OK;
}