559 lines
13 KiB
C
559 lines
13 KiB
C
/*
|
|
* Copyright 2012 Vincent Sanders <vince@netsurf-browser.org>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/** \file
|
|
* libdom utilities (implementation).
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
#include <dom/dom.h>
|
|
|
|
#include "utils/config.h"
|
|
#include "utils/log.h"
|
|
#include "utils/libdom.h"
|
|
|
|
/* exported interface documented in libdom.h */
|
|
bool libdom_treewalk(dom_node *root,
|
|
bool (*callback)(dom_node *node, dom_string *name, void *ctx),
|
|
void *ctx)
|
|
{
|
|
dom_node *node;
|
|
bool result = true;
|
|
|
|
node = dom_node_ref(root); /* tree root */
|
|
|
|
while (node != NULL) {
|
|
dom_node *next = NULL;
|
|
dom_node_type type;
|
|
dom_string *name;
|
|
dom_exception exc;
|
|
|
|
exc = dom_node_get_first_child(node, &next);
|
|
if (exc != DOM_NO_ERR) {
|
|
dom_node_unref(node);
|
|
break;
|
|
}
|
|
|
|
if (next != NULL) {
|
|
/* 1. Got children */
|
|
dom_node_unref(node);
|
|
node = next;
|
|
} else {
|
|
/* No children; siblings & ancestor's siblings */
|
|
while (node != NULL) {
|
|
exc = dom_node_get_next_sibling(node, &next);
|
|
if (exc != DOM_NO_ERR) {
|
|
dom_node_unref(node);
|
|
node = NULL;
|
|
break;
|
|
}
|
|
|
|
if (next != NULL) {
|
|
/* 2. Got sibling */
|
|
break;
|
|
}
|
|
|
|
exc = dom_node_get_parent_node(node, &next);
|
|
if (exc != DOM_NO_ERR) {
|
|
dom_node_unref(node);
|
|
node = NULL;
|
|
break;
|
|
}
|
|
|
|
/* 3. Try parent */
|
|
dom_node_unref(node);
|
|
node = next;
|
|
}
|
|
|
|
if (node == NULL)
|
|
break;
|
|
|
|
dom_node_unref(node);
|
|
node = next;
|
|
}
|
|
|
|
assert(node != NULL);
|
|
|
|
exc = dom_node_get_node_type(node, &type);
|
|
if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE))
|
|
continue;
|
|
|
|
exc = dom_node_get_node_name(node, &name);
|
|
if (exc != DOM_NO_ERR)
|
|
continue;
|
|
|
|
result = callback(node, name, ctx);
|
|
|
|
dom_string_unref(name);
|
|
|
|
if (result == false) {
|
|
break; /* callback caused early termination */
|
|
}
|
|
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/* libdom_treewalk context for libdom_find_element */
|
|
struct find_element_ctx {
|
|
lwc_string *search;
|
|
dom_node *found;
|
|
};
|
|
|
|
/* libdom_treewalk callback for libdom_find_element */
|
|
static bool libdom_find_element_callback(dom_node *node, dom_string *name,
|
|
void *ctx)
|
|
{
|
|
struct find_element_ctx *data = ctx;
|
|
|
|
if (dom_string_caseless_lwc_isequal(name, data->search)) {
|
|
/* Found element */
|
|
data->found = node;
|
|
return false; /* Discontinue search */
|
|
}
|
|
|
|
return true; /* Continue search */
|
|
}
|
|
|
|
|
|
/* exported interface documented in libdom.h */
|
|
dom_node *libdom_find_element(dom_node *node, lwc_string *element_name)
|
|
{
|
|
struct find_element_ctx data;
|
|
|
|
assert(element_name != NULL);
|
|
|
|
if (node == NULL)
|
|
return NULL;
|
|
|
|
data.search = element_name;
|
|
data.found = NULL;
|
|
|
|
libdom_treewalk(node, libdom_find_element_callback, &data);
|
|
|
|
return data.found;
|
|
}
|
|
|
|
|
|
/* exported interface documented in libdom.h */
|
|
dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name)
|
|
{
|
|
dom_node *element;
|
|
dom_exception exc;
|
|
dom_string *node_name = NULL;
|
|
dom_node_type node_type;
|
|
dom_node *next_node;
|
|
|
|
exc = dom_node_get_first_child(parent, &element);
|
|
if ((exc != DOM_NO_ERR) || (element == NULL)) {
|
|
return NULL;
|
|
}
|
|
|
|
/* find first node thats a element */
|
|
do {
|
|
exc = dom_node_get_node_type(element, &node_type);
|
|
|
|
if ((exc == DOM_NO_ERR) && (node_type == DOM_ELEMENT_NODE)) {
|
|
exc = dom_node_get_node_name(element, &node_name);
|
|
if ((exc == DOM_NO_ERR) && (node_name != NULL)) {
|
|
if (dom_string_caseless_lwc_isequal(node_name,
|
|
element_name)) {
|
|
dom_string_unref(node_name);
|
|
break;
|
|
}
|
|
dom_string_unref(node_name);
|
|
}
|
|
}
|
|
|
|
exc = dom_node_get_next_sibling(element, &next_node);
|
|
dom_node_unref(element);
|
|
if (exc == DOM_NO_ERR) {
|
|
element = next_node;
|
|
} else {
|
|
element = NULL;
|
|
}
|
|
} while (element != NULL);
|
|
|
|
return element;
|
|
}
|
|
|
|
/* exported interface documented in libdom.h */
|
|
/* TODO: return appropriate errors */
|
|
nserror libdom_iterate_child_elements(dom_node *parent,
|
|
libdom_iterate_cb cb, void *ctx)
|
|
{
|
|
dom_nodelist *children;
|
|
uint32_t index, num_children;
|
|
dom_exception error;
|
|
|
|
error = dom_node_get_child_nodes(parent, &children);
|
|
if (error != DOM_NO_ERR || children == NULL)
|
|
return NSERROR_NOMEM;
|
|
|
|
error = dom_nodelist_get_length(children, &num_children);
|
|
if (error != DOM_NO_ERR) {
|
|
dom_nodelist_unref(children);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
for (index = 0; index < num_children; index++) {
|
|
dom_node *child;
|
|
dom_node_type type;
|
|
|
|
error = dom_nodelist_item(children, index, &child);
|
|
if (error != DOM_NO_ERR) {
|
|
dom_nodelist_unref(children);
|
|
return NSERROR_NOMEM;
|
|
}
|
|
|
|
error = dom_node_get_node_type(child, &type);
|
|
if (error == DOM_NO_ERR && type == DOM_ELEMENT_NODE) {
|
|
nserror err = cb(child, ctx);
|
|
if (err != NSERROR_OK) {
|
|
dom_node_unref(child);
|
|
dom_nodelist_unref(children);
|
|
return err;
|
|
}
|
|
}
|
|
|
|
dom_node_unref(child);
|
|
}
|
|
|
|
dom_nodelist_unref(children);
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
/* exported interface documented in libdom.h */
|
|
nserror libdom_hubbub_error_to_nserror(dom_hubbub_error error)
|
|
{
|
|
switch (error) {
|
|
|
|
/* HUBBUB_REPROCESS is not handled here because it can
|
|
* never occur outside the hubbub treebuilder
|
|
*/
|
|
|
|
case DOM_HUBBUB_OK:
|
|
/* parsed ok */
|
|
return NSERROR_OK;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_PAUSED):
|
|
/* hubbub input paused */
|
|
return NSERROR_OK;
|
|
|
|
case DOM_HUBBUB_NOMEM:
|
|
/* out of memory error from DOM */
|
|
return NSERROR_NOMEM;
|
|
|
|
case DOM_HUBBUB_BADPARM:
|
|
/* Bad parameter passed to creation */
|
|
return NSERROR_BAD_PARAMETER;
|
|
|
|
case DOM_HUBBUB_DOM:
|
|
/* DOM call returned error */
|
|
return NSERROR_DOM;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_ENCODINGCHANGE):
|
|
/* encoding changed */
|
|
return NSERROR_ENCODING_CHANGE;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NOMEM):
|
|
/* out of memory error from parser */
|
|
return NSERROR_NOMEM;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADPARM):
|
|
return NSERROR_BAD_PARAMETER;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_INVALID):
|
|
return NSERROR_INVALID;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_FILENOTFOUND):
|
|
return NSERROR_NOT_FOUND;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_NEEDDATA):
|
|
return NSERROR_NEED_DATA;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_BADENCODING):
|
|
return NSERROR_BAD_ENCODING;
|
|
|
|
case (DOM_HUBBUB_HUBBUB_ERR | HUBBUB_UNKNOWN):
|
|
/* currently only generated by the libdom hubbub binding */
|
|
return NSERROR_DOM;
|
|
default:
|
|
/* unknown error */
|
|
/** @todo better error handling and reporting */
|
|
return NSERROR_UNKNOWN;
|
|
}
|
|
return NSERROR_UNKNOWN;
|
|
}
|
|
|
|
|
|
static void ignore_dom_msg(uint32_t severity, void *ctx, const char *msg, ...)
|
|
{
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* Dump attribute/value for an element node
|
|
*
|
|
* \param node The element node to dump attribute details for
|
|
* \param f file handle to dump to.
|
|
* \param attribute The attribute to dump
|
|
* \return true on success, or false on error
|
|
*/
|
|
static bool dump_dom_element_attribute(dom_node *node, FILE *f, const char *attribute)
|
|
{
|
|
dom_exception exc;
|
|
dom_string *attr = NULL;
|
|
dom_string *attr_value = NULL;
|
|
dom_node_type type;
|
|
const char *string;
|
|
size_t length;
|
|
|
|
/* Should only have element nodes here */
|
|
exc = dom_node_get_node_type(node, &type);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, " Exception raised for node_get_node_type\n");
|
|
return false;
|
|
}
|
|
assert(type == DOM_ELEMENT_NODE);
|
|
|
|
/* Create a dom_string containing required attribute name. */
|
|
exc = dom_string_create_interned((uint8_t *)attribute,
|
|
strlen(attribute), &attr);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, " Exception raised for dom_string_create\n");
|
|
return false;
|
|
}
|
|
|
|
/* Get class attribute's value */
|
|
exc = dom_element_get_attribute(node, attr, &attr_value);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, " Exception raised for element_get_attribute\n");
|
|
dom_string_unref(attr);
|
|
return false;
|
|
} else if (attr_value == NULL) {
|
|
/* Element lacks required attribute */
|
|
dom_string_unref(attr);
|
|
return true;
|
|
}
|
|
|
|
/* Finished with the attr dom_string */
|
|
dom_string_unref(attr);
|
|
|
|
/* Get attribute value's string data */
|
|
string = dom_string_data(attr_value);
|
|
length = dom_string_byte_length(attr_value);
|
|
|
|
/* Print attribute info */
|
|
fprintf(f, " %s=\"%.*s\"", attribute, (int)length, string);
|
|
|
|
/* Finished with the attr_value dom_string */
|
|
dom_string_unref(attr_value);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
* Print a line in a DOM structure dump for an element
|
|
*
|
|
* \param node The node to dump
|
|
* \param f file handle to dump to.
|
|
* \param depth The node's depth
|
|
* \return true on success, or false on error
|
|
*/
|
|
static bool dump_dom_element(dom_node *node, FILE *f, int depth)
|
|
{
|
|
dom_exception exc;
|
|
dom_string *node_name = NULL;
|
|
dom_node_type type;
|
|
int i;
|
|
const char *string;
|
|
size_t length;
|
|
|
|
/* Only interested in element nodes */
|
|
exc = dom_node_get_node_type(node, &type);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, "Exception raised for node_get_node_type\n");
|
|
return false;
|
|
} else if (type != DOM_ELEMENT_NODE) {
|
|
/* Nothing to print */
|
|
return true;
|
|
}
|
|
|
|
/* Get element name */
|
|
exc = dom_node_get_node_name(node, &node_name);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, "Exception raised for get_node_name\n");
|
|
return false;
|
|
} else if (node_name == NULL) {
|
|
fprintf(f, "Broken: root_name == NULL\n");
|
|
return false;
|
|
}
|
|
|
|
/* Print ASCII tree structure for current node */
|
|
if (depth > 0) {
|
|
for (i = 0; i < depth; i++) {
|
|
fprintf(f, "| ");
|
|
}
|
|
fprintf(f, "+-");
|
|
}
|
|
|
|
/* Get string data and print element name */
|
|
string = dom_string_data(node_name);
|
|
length = dom_string_byte_length(node_name);
|
|
fprintf(f, "[%.*s]", (int)length, string);
|
|
|
|
if (length == 5 && strncmp(string, "title", 5) == 0) {
|
|
/* Title tag, gather the title */
|
|
dom_string *str;
|
|
exc = dom_node_get_text_content(node, &str);
|
|
if (exc == DOM_NO_ERR && str != NULL) {
|
|
fprintf(f, " $%.*s$", (int)dom_string_byte_length(str),
|
|
dom_string_data(str));
|
|
dom_string_unref(str);
|
|
}
|
|
}
|
|
|
|
/* Finished with the node_name dom_string */
|
|
dom_string_unref(node_name);
|
|
|
|
/* Print the element's id & class, if it has them */
|
|
if (dump_dom_element_attribute(node, f, "id") == false ||
|
|
dump_dom_element_attribute(node, f, "class") == false) {
|
|
/* Error occured */
|
|
fprintf(f, "\n");
|
|
return false;
|
|
}
|
|
|
|
fprintf(f, "\n");
|
|
return true;
|
|
}
|
|
|
|
|
|
/* exported interface documented in libdom.h */
|
|
nserror libdom_dump_structure(dom_node *node, FILE *f, int depth)
|
|
{
|
|
dom_exception exc;
|
|
dom_node *child;
|
|
nserror ret;
|
|
dom_node *next_child;
|
|
|
|
/* Print this node's entry */
|
|
if (dump_dom_element(node, f, depth) == false) {
|
|
/* There was an error; return */
|
|
return NSERROR_DOM;
|
|
}
|
|
|
|
/* Get the node's first child */
|
|
exc = dom_node_get_first_child(node, &child);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, "Exception raised for node_get_first_child\n");
|
|
return NSERROR_DOM;
|
|
} else if (child != NULL) {
|
|
/* node has children; decend to children's depth */
|
|
depth++;
|
|
|
|
/* Loop though all node's children */
|
|
do {
|
|
/* Visit node's descendents */
|
|
ret = libdom_dump_structure(child, f, depth);
|
|
if (ret !=NSERROR_OK) {
|
|
/* There was an error; return */
|
|
dom_node_unref(child);
|
|
return NSERROR_DOM;
|
|
}
|
|
|
|
/* Go to next sibling */
|
|
exc = dom_node_get_next_sibling(child, &next_child);
|
|
if (exc != DOM_NO_ERR) {
|
|
fprintf(f, "Exception raised for node_get_next_sibling\n");
|
|
dom_node_unref(child);
|
|
return NSERROR_DOM;
|
|
}
|
|
|
|
dom_node_unref(child);
|
|
child = next_child;
|
|
} while (child != NULL); /* No more children */
|
|
}
|
|
|
|
return NSERROR_OK;
|
|
}
|
|
|
|
|
|
/* exported interface documented in libdom.h */
|
|
nserror libdom_parse_file(const char *filename, const char *encoding, dom_document **doc)
|
|
{
|
|
dom_hubbub_parser_params parse_params;
|
|
dom_hubbub_error error;
|
|
dom_hubbub_parser *parser;
|
|
dom_document *document;
|
|
FILE *fp = NULL;
|
|
#define BUF_SIZE 512
|
|
uint8_t buf[BUF_SIZE];
|
|
|
|
fp = fopen(filename, "r");
|
|
if (fp == NULL) {
|
|
return NSERROR_NOT_FOUND;
|
|
}
|
|
|
|
parse_params.enc = encoding;
|
|
parse_params.fix_enc = false;
|
|
parse_params.enable_script = false;
|
|
parse_params.msg = ignore_dom_msg;
|
|
parse_params.script = NULL;
|
|
parse_params.ctx = NULL;
|
|
parse_params.daf = NULL;
|
|
|
|
error = dom_hubbub_parser_create(&parse_params, &parser, &document);
|
|
if (error != DOM_HUBBUB_OK) {
|
|
fclose(fp);
|
|
return libdom_hubbub_error_to_nserror(error);
|
|
}
|
|
|
|
while (feof(fp) == 0) {
|
|
size_t read = fread(buf, sizeof(buf[0]), BUF_SIZE, fp);
|
|
|
|
error = dom_hubbub_parser_parse_chunk(parser, buf, read);
|
|
if (error != DOM_HUBBUB_OK) {
|
|
dom_node_unref(document);
|
|
dom_hubbub_parser_destroy(parser);
|
|
fclose(fp);
|
|
return NSERROR_DOM;
|
|
}
|
|
}
|
|
|
|
error = dom_hubbub_parser_completed(parser);
|
|
if (error != DOM_HUBBUB_OK) {
|
|
dom_node_unref(document);
|
|
dom_hubbub_parser_destroy(parser);
|
|
fclose(fp);
|
|
return libdom_hubbub_error_to_nserror(error);
|
|
}
|
|
|
|
dom_hubbub_parser_destroy(parser);
|
|
fclose(fp);
|
|
|
|
*doc = document;
|
|
return NSERROR_OK;
|
|
}
|