[project @ 2004-06-06 19:39:17 by bursa]

Add and improve error handling of save_complete. Save HTML as UTF-8 to prevent encoding issues.

svn path=/import/netsurf/; revision=931
This commit is contained in:
James Bursa 2004-06-06 19:39:17 +00:00
parent 9dde0f406f
commit 6879c02a33
4 changed files with 318 additions and 224 deletions

View File

@ -35,7 +35,7 @@ extern struct content *save_content;
typedef enum { LINK_ACORN, LINK_ANT, LINK_TEXT } link_format;
static void ro_gui_save_complete(struct content *c, char *path);
static bool ro_gui_save_complete(struct content *c, char *path);
static void ro_gui_save_object_native(struct content *c, char *path);
static bool ro_gui_save_link(struct content *c, link_format format, char *path);
@ -166,7 +166,8 @@ void ro_gui_save_datasave_ack(wimp_message *message)
break;
case GUI_SAVE_COMPLETE:
ro_gui_save_complete(c, path);
if (!ro_gui_save_complete(c, path))
return;
break;
case GUI_SAVE_DRAW:
@ -236,13 +237,17 @@ void ro_gui_save_datasave_ack(wimp_message *message)
/**
* Prepare an application directory and save_complete() to it.
*
* \param c content of type CONTENT_HTML to save
* \param path path to save as
* \return true on success, false on error and error reported
*/
#define WIDTH 64
#define HEIGHT 64
#define SPRITE_SIZE (16 + 44 + ((WIDTH / 2 + 3) & ~3) * HEIGHT / 2)
void ro_gui_save_complete(struct content *c, char *path)
bool ro_gui_save_complete(struct content *c, char *path)
{
char buf[256];
FILE *fp;
@ -258,7 +263,7 @@ void ro_gui_save_complete(struct content *c, char *path)
LOG(("xosfile_create_dir: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return;
return false;
}
/* Save !Run file */
@ -267,7 +272,7 @@ void ro_gui_save_complete(struct content *c, char *path)
if (!fp) {
LOG(("fopen(): errno = %i", errno));
warn_user("SaveError", strerror(errno));
return;
return false;
}
fprintf(fp, "Filer_Run <Obey$Dir>.index\n");
fclose(fp);
@ -276,7 +281,7 @@ void ro_gui_save_complete(struct content *c, char *path)
LOG(("xosfile_set_type: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return;
return false;
}
/* Create !Sprites */
@ -288,7 +293,7 @@ void ro_gui_save_complete(struct content *c, char *path)
area = thumbnail_initialise(34, 34, os_MODE8BPP90X90);
if (!area) {
warn_user("NoMemory", 0);
return;
return false;
}
sprite_header = (osspriteop_header *)(area + 1);
strncpy(sprite_header->name, appname + 1, 12);
@ -305,12 +310,14 @@ void ro_gui_save_complete(struct content *c, char *path)
LOG(("xosspriteop_save_sprite_file: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return;
return false;
}
save_complete(c, path);
return save_complete(c, path);
}
void ro_gui_save_object_native(struct content *c, char *path)
{
os_error *error;

View File

@ -12,6 +12,7 @@
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
@ -33,24 +34,24 @@ regex_t save_complete_import_re;
/** An entry in save_complete_list. */
struct save_complete_entry {
char *url; /**< Fully qualified URL, as per url_join output */
int ptr; /**< Pointer to object's location in memory */
struct save_complete_entry *next; /**< Next entry in list */
struct content *content;
struct save_complete_entry *next; /**< Next entry in list */
};
/** List of urls seen and saved so far. */
static struct save_complete_entry *save_complete_list;
static struct save_complete_entry *save_complete_list = 0;
static void save_complete_html(struct content *c, const char *path,
bool index);
static void save_imported_sheets(struct content *c, const char *path);
static bool save_complete_html(struct content *c, const char *path,
bool index);
static bool save_imported_sheets(struct content *c, const char *path);
static char * rewrite_stylesheet_urls(const char *source, unsigned int size,
int *osize, const char *base);
static int rewrite_document_urls(xmlDoc *doc, const char *base);
static int rewrite_urls(xmlNode *n, const char *base);
static void rewrite_url(xmlNode *n, const char *attr, const char *base);
static void save_complete_add_url(const char *url, int id);
static int save_complete_find_url(const char *url);
static bool rewrite_document_urls(xmlDoc *doc, const char *base);
static bool rewrite_urls(xmlNode *n, const char *base);
static bool rewrite_url(xmlNode *n, const char *attr, const char *base);
static bool save_complete_list_add(struct content *content);
static struct content * save_complete_list_find(const char *url);
static bool save_complete_list_check(struct content *content);
/**
@ -58,64 +59,89 @@ static int save_complete_find_url(const char *url);
*
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
* \return true on success, false on error and error reported
*/
void save_complete(struct content *c, const char *path)
bool save_complete(struct content *c, const char *path)
{
save_complete_list = 0;
bool result;
save_complete_html(c, path, true);
result = save_complete_html(c, path, true);
/* free save_complete_list */
/* free save_complete_list */
while (save_complete_list) {
struct save_complete_entry *next = save_complete_list->next;
free(save_complete_list->url);
free(save_complete_list);
save_complete_list = next;
}
return result;
}
/**
* Save an HTML page with all dependencies, recursing through imported pages.
*
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
* \param index true to save as "index"
* \return true on success, false on error and error reported
*/
void save_complete_html(struct content *c, const char *path, bool index)
bool save_complete_html(struct content *c, const char *path, bool index)
{
char spath[256];
unsigned int i;
htmlParserCtxtPtr toSave;
htmlParserCtxtPtr parser;
os_error *error;
if (c->type != CONTENT_HTML)
return;
return false;
/* save stylesheets, ignoring the base sheet */
for (i = 1; i != c->data.html.stylesheet_count; i++) {
if (save_complete_list_check(c))
return true;
/* save stylesheets, ignoring the base sheet */
for (i = 1; i != c->data.html.stylesheet_count; i++) {
struct content *css = c->data.html.stylesheet_content[i];
char *source;
int source_len;
if (!css)
continue;
if (!css)
continue;
if (save_complete_list_check(css))
continue;
save_complete_add_url(css->url, (int) css);
if (!save_complete_list_add(css)) {
warn_user("NoMemory", 0);
return false;
}
save_imported_sheets(css, path);
if (!save_imported_sheets(css, path))
return false;
if (i == 1) continue; /* don't save <style> elements */
if (i == 1)
continue; /* don't save <style> elements */
snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data,
css->source_size, &source_len, css->url);
if (source) {
xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
free(source);
if (!source) {
warn_user("NoMemory", 0);
return false;
}
}
error = xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
free(source);
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
}
/* save objects */
for (i = 0; i != c->data.html.object_count; i++) {
@ -124,79 +150,134 @@ void save_complete_html(struct content *c, const char *path, bool index)
/* skip difficult content types */
if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data)
continue;
if (save_complete_list_check(obj))
continue;
save_complete_add_url(obj->url, (int) obj);
if (!save_complete_list_add(obj)) {
warn_user("NoMemory", 0);
return false;
}
if (obj->type == CONTENT_HTML) {
save_complete_html(obj, path, false);
}
else {
snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) obj);
xosfile_save_stamped(spath,
ro_content_filetype(obj),
obj->source_data,
obj->source_data + obj->source_size);
if (obj->type == CONTENT_HTML) {
if (!save_complete_html(obj, path, false))
return false;
continue;
}
snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) obj);
error = xosfile_save_stamped(spath,
ro_content_filetype(obj),
obj->source_data,
obj->source_data + obj->source_size);
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
}
/* make a copy of the document tree */
toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
htmlParseDocument(toSave);
parser = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
if (!parser) {
warn_user("NoMemory", 0);
return false;
}
/** \todo set parser charset */
htmlParseDocument(parser);
/* rewrite all urls we know about */
if (rewrite_document_urls(toSave->myDoc, c->data.html.base_url) == 0) {
xfree(spath);
xmlFreeDoc(toSave->myDoc);
htmlFreeParserCtxt(toSave);
return;
}
if (!rewrite_document_urls(parser->myDoc, c->data.html.base_url)) {
xmlFreeDoc(parser->myDoc);
htmlFreeParserCtxt(parser);
warn_user("NoMemory", 0);
return false;
}
/* save the html file out last of all */
if (index)
snprintf(spath, sizeof spath, "%s.index", path);
else
snprintf(spath, sizeof spath, "%s.%x", path, (unsigned int)c);
htmlSaveFile(spath, toSave->myDoc);
xosfile_set_type(spath, 0xfaf);
snprintf(spath, sizeof spath, "%s.index", path);
else
snprintf(spath, sizeof spath, "%s.%x", path, (unsigned int)c);
xmlFreeDoc(toSave->myDoc);
htmlFreeParserCtxt(toSave);
errno = 0;
if (htmlSaveFileFormat(spath, parser->myDoc, 0, 0) == -1) {
if (errno)
warn_user("SaveError", strerror(errno));
else
warn_user("SaveError", "htmlSaveFileFormat failed");
return false;
}
error = xosfile_set_type(spath, 0xfaf);
if (error) {
LOG(("xosfile_set_type: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
xmlFreeDoc(parser->myDoc);
htmlFreeParserCtxt(parser);
return true;
}
/**
* Save all imported stylesheets
* Save stylesheets imported by a CONTENT_CSS.
*
* \param c The content containing the stylesheet
* \param path Path to save to
* \param c a CONTENT_CSS
* \param path path to save to
* \return true on success, false on error and error reported
*/
void save_imported_sheets(struct content *c, const char *path)
bool save_imported_sheets(struct content *c, const char *path)
{
char spath[256];
unsigned int j;
char *source;
int source_len;
unsigned int j;
char *source;
int source_len;
os_error *error;
for (j = 0; j != c->data.css.import_count; j++) {
for (j = 0; j != c->data.css.import_count; j++) {
struct content *css = c->data.css.import_content[j];
if (!css)
continue;
if (!css)
continue;
if (save_complete_list_check(css))
continue;
save_complete_add_url(css->url, (int) css);
if (!save_complete_list_add(css)) {
warn_user("NoMemory", 0);
return false;
}
save_imported_sheets(css, path);
if (!save_imported_sheets(css, path))
return false;
snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data,
css->source_size, &source_len, css->url);
if (source) {
xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
free(source);
if (!source) {
warn_user("NoMemory", 0);
return false;
}
}
error = xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
free(source);
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
}
return true;
}
@ -244,7 +325,7 @@ void save_complete_init(void)
* @param size size of source
* @param osize updated with the size of the result
* @param base url of stylesheet
* @return converted source, or 0 on error
* @return converted source, or 0 on out of memory
*/
char * rewrite_stylesheet_urls(const char *source, unsigned int size,
@ -256,7 +337,7 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
char buf[20];
unsigned int offset = 0;
int url_len = 0;
int id;
struct content *content;
int m;
unsigned int i;
unsigned int imports = 0;
@ -276,10 +357,8 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
}
res = malloc(size + imports * 20);
if (!res) {
warn_user("NoMemory", 0);
if (!res)
return 0;
}
*osize = 0;
while (offset < size) {
@ -317,26 +396,25 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
url2 = strndup(url, url_len);
if (!url2) {
warn_user("NoMemory", 0);
free(res);
return 0;
}
url = url_join(url2, base);
free(url2);
if (!url) {
warn_user("NoMemory", 0);
free(res);
return 0;
}
}
/* copy data before match */
memcpy(res + *osize, source + offset, match[0].rm_so);
*osize += match[0].rm_so;
id = save_complete_find_url(url);
if (id) {
content = save_complete_list_find(url);
if (content) {
/* replace import */
sprintf(buf, "@import '%x'", id);
snprintf(buf, sizeof buf, "@import '%x'",
(unsigned int) content);
memcpy(res + *osize, buf, strlen(buf));
*osize += strlen(buf);
} else {
@ -361,116 +439,110 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
/**
* Rewrite URLs in a HTML document to be relative
* Rewrite URLs in a HTML document to be relative.
*
* @param doc The root of the document tree
* @return 0 on error. >0 otherwise
* \param doc root of the document tree
* \param base base url of document
* \return true on success, false on out of memory
*/
int rewrite_document_urls(xmlDoc *doc, const char *base)
bool rewrite_document_urls(xmlDoc *doc, const char *base)
{
xmlNode *html;
xmlNode *node;
/* find the html element */
for (html = doc->children;
html!=0 && html->type != XML_ELEMENT_NODE;
html = html->next)
;
if (html == 0 || strcmp((const char*)html->name, "html") != 0) {
return 0;
}
for (node = doc->children; node; node = node->next)
if (node->type == XML_ELEMENT_NODE)
if (!rewrite_urls(node, base))
return false;
rewrite_urls(html, base);
return 1;
return true;
}
/**
* Traverse tree, rewriting URLs as we go.
*
* \param n xmlNode of type XML_ELEMENT_NODE to rewrite
* \param base base url of document
* @param n The root of the tree
* @return 0 on error. >0 otherwise
* \return true on success, false on out of memory
*
* URLs in the tree rooted at element n are rewritten.
*/
int rewrite_urls(xmlNode *n, const char *base)
bool rewrite_urls(xmlNode *n, const char *base)
{
xmlNode *this;
xmlNode *child;
/**
* We only need to consider the following cases:
*
* Attribute: Elements:
*
* 1) data <object>
* 2) href <a> <area> <link> <base>
* 3) src <script> <input> <frame> <iframe> <img>
* 4) n/a <style>
*/
if (n->type == XML_ELEMENT_NODE) {
/* 1 */
if (strcmp(n->name, "object") == 0) {
rewrite_url(n, "data", base);
}
/* 2 */
else if (strcmp(n->name, "a") == 0 ||
strcmp(n->name, "area") == 0 ||
strcmp(n->name, "link") == 0 ||
strcmp(n->name, "base") == 0) {
rewrite_url(n, "href", base);
}
/* 3 */
else if (strcmp(n->name, "frame") == 0 ||
strcmp(n->name, "iframe") == 0 ||
strcmp(n->name, "input") == 0 ||
strcmp(n->name, "img") == 0 ||
strcmp(n->name, "script") == 0) {
rewrite_url(n, "src", base);
}
/* 4 */
else if (strcmp(n->name, "style") == 0) {
unsigned int len;
xmlChar *content;
/**
* We only need to consider the following cases:
*
* Attribute: Elements:
*
* 1) data <object>
* 2) href <a> <area> <link> <base>
* 3) src <script> <input> <frame> <iframe> <img>
* 4) n/a <style>
*/
/* 1 */
if (strcmp(n->name, "object") == 0) {
if (!rewrite_url(n, "data", base))
return false;
}
/* 2 */
else if (strcmp(n->name, "a") == 0 ||
strcmp(n->name, "area") == 0 ||
strcmp(n->name, "link") == 0 ||
strcmp(n->name, "base") == 0) {
if (!rewrite_url(n, "href", base))
return false;
}
/* 3 */
else if (strcmp(n->name, "frame") == 0 ||
strcmp(n->name, "iframe") == 0 ||
strcmp(n->name, "input") == 0 ||
strcmp(n->name, "img") == 0 ||
strcmp(n->name, "script") == 0) {
if (!rewrite_url(n, "src", base))
return false;
}
/* 4 */
else if (strcmp(n->name, "style") == 0) {
unsigned int len;
xmlChar *content;
for (this = n->children; this != 0; this = this->next) {
/* Get current content */
content = xmlNodeGetContent(this);
if (!content) continue;
for (child = n->children; child != 0; child = child->next) {
/* Get current content */
content = xmlNodeGetContent(child);
if (!content)
/* unfortunately we don't know if this is due to
* memory exhaustion, or because there is no
* content for this node */
continue;
/* Rewrite @import rules */
char *rewritten = rewrite_stylesheet_urls(
content,
strlen((char*)content),
&len, base);
if (!rewritten) {
xmlFree(content);
continue;
}
/* Rewrite @import rules */
char *rewritten = rewrite_stylesheet_urls(
content,
strlen((char*)content),
&len, base);
xmlFree(content);
if (!rewritten)
return false;
/* set new content */
xmlNodeSetContentLen(this,
(const xmlChar*)rewritten,
len);
/* set new content */
xmlNodeSetContentLen(child,
(const xmlChar*)rewritten,
len);
}
/* free old content */
xmlFree(content);
}
return 1;
}
}
else {
return 0;
}
/* now recurse */
for (this = n->children; this != 0; this = this->next) {
rewrite_urls(this, base);
return true;
}
return 1;
/* now recurse */
for (child = n->children; child; child = child->next)
if (!rewrite_urls(child, base))
return false;
return true;
}
@ -480,80 +552,98 @@ int rewrite_urls(xmlNode *n, const char *base)
* \param n The node to modify
* \param attr The html attribute to modify
* \param base base url of document
* \return true on success, false on out of memory
*/
void rewrite_url(xmlNode *n,const char *attr, const char *base)
bool rewrite_url(xmlNode *n, const char *attr, const char *base)
{
char *url, *data;
char rel[256];
int id;
char *url, *data;
char rel[20];
struct content *content;
data = xmlGetProp(n, (const xmlChar*)attr);
if (!xmlHasProp(n, (const xmlChar *) attr))
return true;
if (!data) return;
data = xmlGetProp(n, (const xmlChar *) attr);
if (!data)
return false;
url = url_join(data, base);
if (!url) {
xmlFree(data);
return;
}
url = url_join(data, base);
xmlFree(data);
if (!url)
return false;
id = save_complete_find_url(url);
if (id) {
content = save_complete_list_find(url);
if (content) {
/* found a match */
snprintf(rel, sizeof rel, "%x", id);
xmlSetProp(n, (const xmlChar *) attr, (xmlChar *) rel);
free(url);
snprintf(rel, sizeof rel, "%x", (unsigned int) content);
if (!xmlSetProp(n, (const xmlChar *) attr, (xmlChar *) rel))
return false;
} else {
/* no match found */
xmlSetProp(n, (const xmlChar *) attr, (xmlChar *) url);
if (!xmlSetProp(n, (const xmlChar *) attr, (xmlChar *) url)) {
free(url);
return false;
}
free(url);
}
free(url);
xmlFree(data);
return true;
}
/**
* Add a url to the save_complete_list.
* Add a content to the save_complete_list.
*
* \param url url to add (copied)
* \param id id to use for url
* \param content content to add
* \return true on success, false on out of memory
*/
void save_complete_add_url(const char *url, int id)
bool save_complete_list_add(struct content *content)
{
struct save_complete_entry *entry;
entry = malloc(sizeof (*entry));
if (!entry)
return;
entry->url = strdup(url);
if (!url) {
free(entry);
return;
}
entry->ptr = id;
return false;
entry->content = content;
entry->next = save_complete_list;
save_complete_list = entry;
return true;
}
/**
* Look up a url in the save_complete_list.
*
* \param url url to find
* \param len length of url
* \return id to use for url, or 0 if not present
* \param url url to find
* \return content if found, 0 otherwise
*/
int save_complete_find_url(const char *url)
struct content * save_complete_list_find(const char *url)
{
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
if (strcmp(url, entry->url) == 0)
break;
if (entry)
return entry->ptr;
if (strcmp(url, entry->content->url) == 0)
return entry->content;
return 0;
}
/**
* Look up a content in the save_complete_list.
*
* \param content pointer to content
* \return true if the content is in the save_complete_list
*/
bool save_complete_list_check(struct content *content)
{
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
if (entry->content == content)
return true;
return false;
}
#endif

View File

@ -15,6 +15,6 @@
struct content;
void save_complete_init(void);
void save_complete(struct content *c, const char *path);
bool save_complete(struct content *c, const char *path);
#endif

View File

@ -22,12 +22,9 @@
#include "oslib/wimpspriteop.h"
#include "netsurf/css/css.h"
#include "netsurf/utils/config.h"
#include "netsurf/desktop/save_text.h"
#include "netsurf/riscos/constdata.h"
#include "netsurf/riscos/gui.h"
#include "netsurf/riscos/options.h"
#include "netsurf/riscos/save_complete.h"
#include "netsurf/riscos/save_draw.h"
#include "netsurf/riscos/theme.h"
#include "netsurf/riscos/thumbnail.h"
#include "netsurf/riscos/wimp.h"