[project @ 2004-04-05 21:54:22 by jmb]

Implement outstanding functionality

svn path=/import/netsurf/; revision=722
This commit is contained in:
John Mark Bell 2004-04-05 21:54:22 +00:00
parent aed81aa917
commit f5a0093a88

View File

@ -29,11 +29,6 @@
#ifdef WITH_SAVE_COMPLETE #ifdef WITH_SAVE_COMPLETE
/** \todo URL rewriting for @import rules
* Objects used by embedded html pages
* GUI
*/
regex_t save_complete_import_re; regex_t save_complete_import_re;
/** An entry in save_complete_list. */ /** An entry in save_complete_list. */
@ -42,18 +37,17 @@ struct save_complete_entry {
int ptr; /**< Pointer to object's location in memory */ int ptr; /**< Pointer to object's location in memory */
struct save_complete_entry *next; /**< Next entry in list */ struct save_complete_entry *next; /**< Next entry in list */
}; };
/** List of urls seen and saved so far. */
static struct save_complete_entry *save_complete_list;
static void save_complete_internal(struct content *c, const char *path,
static void save_imported_sheets(struct content *c, const char *path); bool index);
static char * rewrite_stylesheet_urls(const char *source, unsigned int size, static void save_imported_sheets(struct save_complete_entry **save_complete_list, struct content *c, const char *path);
static char * rewrite_stylesheet_urls(struct save_complete_entry *save_complete_list, const char *source, unsigned int size,
int *osize, const char *base); int *osize, const char *base);
static int rewrite_document_urls(xmlDoc *doc, const char *base); static int rewrite_document_urls(struct save_complete_entry *save_complete_list, xmlDoc *doc, const char *base);
static int rewrite_urls(xmlNode *n, const char *base); static int rewrite_urls(struct save_complete_entry *save_complete_list, xmlNode *n, const char *base);
static void rewrite_url(xmlNode *n, const char *attr, const char *base); static void rewrite_url(struct save_complete_entry *save_complete_list, xmlNode *n, const char *attr, const char *base);
static void save_complete_add_url(const char *url, int id); static void save_complete_add_url(struct save_complete_entry **save_complete_list, const char *url, int id);
static int save_complete_find_url(const char *url); static int save_complete_find_url(struct save_complete_entry *save_complete_list, const char *url);
/** /**
@ -64,18 +58,34 @@ static int save_complete_find_url(const char *url);
*/ */
void save_complete(struct content *c, const char *path) void save_complete(struct content *c, const char *path)
{
save_complete_internal(c, path, true);
}
/**
* Save an HTML page with all dependencies, recursing through imported pages.
*
* \param save_complete_list head of list of URLs
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
*/
void save_complete_internal(struct content *c, const char *path, bool index)
{ {
char spath[256]; char spath[256];
unsigned int i; unsigned int i;
htmlParserCtxtPtr toSave; htmlParserCtxtPtr toSave;
/** List of urls seen and saved so far. One per HTML document */
struct save_complete_entry *save_complete_list;
if (c->type != CONTENT_HTML) if (c->type != CONTENT_HTML)
return; return;
save_complete_list = 0; save_complete_list = 0;
/* save stylesheets, ignoring the base sheet and <style> elements */ /* save stylesheets, ignoring the base sheet */
for (i = 2; i != c->data.html.stylesheet_count; i++) { for (i = 1; i != c->data.html.stylesheet_count; i++) {
struct content *css = c->data.html.stylesheet_content[i]; struct content *css = c->data.html.stylesheet_content[i];
char *source; char *source;
int source_len; int source_len;
@ -83,13 +93,16 @@ void save_complete(struct content *c, const char *path)
if (!css) if (!css)
continue; continue;
save_complete_add_url(css->url, (int) css); save_complete_add_url(&save_complete_list, css->url, (int) css);
save_imported_sheets(css, path); save_imported_sheets(&save_complete_list, css, path);
if (i == 1) continue; /* don't save <style> elements */
snprintf(spath, sizeof spath, "%s.%x", path, snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) css); (unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data, source = rewrite_stylesheet_urls(save_complete_list,
css->source_data,
css->source_size, &source_len, css->url); css->source_size, &source_len, css->url);
if (source) { if (source) {
xosfile_save_stamped(spath, 0xf79, source, xosfile_save_stamped(spath, 0xf79, source,
@ -106,8 +119,12 @@ void save_complete(struct content *c, const char *path)
if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data) if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data)
continue; continue;
save_complete_add_url(obj->url, (int) obj); save_complete_add_url(&save_complete_list, obj->url, (int) obj);
if (obj->type == CONTENT_HTML) {
save_complete_internal(obj, path, false);
}
else {
snprintf(spath, sizeof spath, "%s.%x", path, snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) obj); (unsigned int) obj);
xosfile_save_stamped(spath, xosfile_save_stamped(spath,
@ -115,13 +132,14 @@ void save_complete(struct content *c, const char *path)
obj->source_data, obj->source_data,
obj->source_data + obj->source_size); obj->source_data + obj->source_size);
} }
}
/* make a copy of the document tree */ /* make a copy of the document tree */
toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size); toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
htmlParseDocument(toSave); htmlParseDocument(toSave);
/* rewrite all urls we know about */ /* rewrite all urls we know about */
if (rewrite_document_urls(toSave->myDoc, c->data.html.base_url) == 0) { if (rewrite_document_urls(save_complete_list, toSave->myDoc, c->data.html.base_url) == 0) {
xfree(spath); xfree(spath);
xmlFreeDoc(toSave->myDoc); xmlFreeDoc(toSave->myDoc);
htmlFreeParserCtxt(toSave); htmlFreeParserCtxt(toSave);
@ -129,7 +147,10 @@ void save_complete(struct content *c, const char *path)
} }
/* save the html file out last of all */ /* save the html file out last of all */
if (index)
snprintf(spath, sizeof spath, "%s.index", path); snprintf(spath, sizeof spath, "%s.index", path);
else
snprintf(spath, sizeof spath, "%s.%x", path, (unsigned int)c);
htmlSaveFile(spath, toSave->myDoc); htmlSaveFile(spath, toSave->myDoc);
xosfile_set_type(spath, 0xfaf); xosfile_set_type(spath, 0xfaf);
@ -145,8 +166,15 @@ void save_complete(struct content *c, const char *path)
} }
} }
/**
void save_imported_sheets(struct content *c, const char *path) * Save all imported stylesheets
*
* \param save_complete_list head of list of URLs
* \param c The content containing the stylesheet
* \param path Path to save to
*/
void save_imported_sheets(struct save_complete_entry **save_complete_list,
struct content *c, const char *path)
{ {
char spath[256]; char spath[256];
unsigned int j; unsigned int j;
@ -159,13 +187,14 @@ void save_imported_sheets(struct content *c, const char *path)
if (!css) if (!css)
continue; continue;
save_complete_add_url(css->url, (int) css); save_complete_add_url(save_complete_list, css->url, (int) css);
save_imported_sheets(css, path); save_imported_sheets(save_complete_list, css, path);
snprintf(spath, sizeof spath, "%s.%x", path, snprintf(spath, sizeof spath, "%s.%x", path,
(unsigned int) css); (unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data, source = rewrite_stylesheet_urls(*save_complete_list,
css->source_data,
css->source_size, &source_len, css->url); css->source_size, &source_len, css->url);
if (source) { if (source) {
xosfile_save_stamped(spath, 0xf79, source, xosfile_save_stamped(spath, 0xf79, source,
@ -216,15 +245,15 @@ void save_complete_init(void)
/** /**
* Rewrite stylesheet @import rules for save complete. * Rewrite stylesheet @import rules for save complete.
* *
* \param save_complete_list head of list of URLs
* @param source stylesheet source * @param source stylesheet source
* @param size size of source * @param size size of source
* @param osize updated with the size of the result * @param osize updated with the size of the result
* @param head pointer to the head of the list containing imported urls
* @param base url of stylesheet * @param base url of stylesheet
* @return converted source, or 0 on error * @return converted source, or 0 on error
*/ */
char * rewrite_stylesheet_urls(const char *source, unsigned int size, char * rewrite_stylesheet_urls(struct save_complete_entry *save_complete_list, const char *source, unsigned int size,
int *osize, const char *base) int *osize, const char *base)
{ {
char *res; char *res;
@ -310,7 +339,7 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
memcpy(res + *osize, source + offset, match[0].rm_so); memcpy(res + *osize, source + offset, match[0].rm_so);
*osize += match[0].rm_so; *osize += match[0].rm_so;
id = save_complete_find_url(url); id = save_complete_find_url(save_complete_list, url);
if (id) { if (id) {
/* replace import */ /* replace import */
sprintf(buf, "@import '%x'", id); sprintf(buf, "@import '%x'", id);
@ -340,12 +369,14 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
/** /**
* Rewrite URLs in a HTML document to be relative * Rewrite URLs in a HTML document to be relative
* *
* \param save_complete_list head of list of URLs
* @param doc The root of the document tree * @param doc The root of the document tree
* @return 0 on error. >0 otherwise * @return 0 on error. >0 otherwise
* \param base base url of document * \param base base url of document
*/ */
int rewrite_document_urls(xmlDoc *doc, const char *base) int rewrite_document_urls(struct save_complete_entry *save_complete_list,
xmlDoc *doc, const char *base)
{ {
xmlNode *html; xmlNode *html;
@ -358,7 +389,7 @@ int rewrite_document_urls(xmlDoc *doc, const char *base)
return 0; return 0;
} }
rewrite_urls(html, base); rewrite_urls(save_complete_list, html, base);
return 1; return 1;
} }
@ -367,12 +398,14 @@ int rewrite_document_urls(xmlDoc *doc, const char *base)
/** /**
* Traverse tree, rewriting URLs as we go. * Traverse tree, rewriting URLs as we go.
* *
* \param save_complete_list head of list of URLs
* \param base base url of document
* @param n The root of the tree * @param n The root of the tree
* @return 0 on error. >0 otherwise * @return 0 on error. >0 otherwise
* \param base base url of document
*/ */
int rewrite_urls(xmlNode *n, const char *base) int rewrite_urls(struct save_complete_entry *save_complete_list, xmlNode *n,
const char *base)
{ {
xmlNode *this; xmlNode *this;
@ -384,18 +417,19 @@ int rewrite_urls(xmlNode *n, const char *base)
* 1) data <object> * 1) data <object>
* 2) href <a> <area> <link> <base> * 2) href <a> <area> <link> <base>
* 3) src <script> <input> <frame> <iframe> <img> * 3) src <script> <input> <frame> <iframe> <img>
* 4) n/a <style>
*/ */
if (n->type == XML_ELEMENT_NODE) { if (n->type == XML_ELEMENT_NODE) {
/* 1 */ /* 1 */
if (strcmp(n->name, "object") == 0) { if (strcmp(n->name, "object") == 0) {
rewrite_url(n, "data", base); rewrite_url(save_complete_list, n, "data", base);
} }
/* 2 */ /* 2 */
else if (strcmp(n->name, "a") == 0 || else if (strcmp(n->name, "a") == 0 ||
strcmp(n->name, "area") == 0 || strcmp(n->name, "area") == 0 ||
strcmp(n->name, "link") == 0 || strcmp(n->name, "link") == 0 ||
strcmp(n->name, "base") == 0) { strcmp(n->name, "base") == 0) {
rewrite_url(n, "href", base); rewrite_url(save_complete_list, n, "href", base);
} }
/* 3 */ /* 3 */
else if (strcmp(n->name, "frame") == 0 || else if (strcmp(n->name, "frame") == 0 ||
@ -403,7 +437,29 @@ int rewrite_urls(xmlNode *n, const char *base)
strcmp(n->name, "input") == 0 || strcmp(n->name, "input") == 0 ||
strcmp(n->name, "img") == 0 || strcmp(n->name, "img") == 0 ||
strcmp(n->name, "script") == 0) { strcmp(n->name, "script") == 0) {
rewrite_url(n, "src", base); rewrite_url(save_complete_list, n, "src", base);
}
/* 4 */
else if (strcmp(n->name, "style") == 0) {
unsigned int len;
/* Get current content */
xmlChar *content = xmlNodeGetContent(n);
if (!content) return 0;
/* Rewrite @import rules */
char *rewritten = rewrite_stylesheet_urls(
save_complete_list,
content,
strlen((char*)content),
&len, base);
if (!rewritten) return 0;
/* set new content */
xmlNodeSetContentLen(n, (const xmlChar*)rewritten,
len);
/* free old content */
xmlFree(content);
return 1;
} }
} }
else { else {
@ -412,7 +468,7 @@ int rewrite_urls(xmlNode *n, const char *base)
/* now recurse */ /* now recurse */
for (this = n->children; this != 0; this = this->next) { for (this = n->children; this != 0; this = this->next) {
rewrite_urls(this, base); rewrite_urls(save_complete_list, this, base);
} }
return 1; return 1;
@ -422,12 +478,14 @@ int rewrite_urls(xmlNode *n, const char *base)
/** /**
* Rewrite an URL in a HTML document. * Rewrite an URL in a HTML document.
* *
* \param save_complete_list head of list of URLs
* \param n The node to modify * \param n The node to modify
* \param attr The html attribute to modify * \param attr The html attribute to modify
* \param base base url of document * \param base base url of document
*/ */
void rewrite_url(xmlNode *n, const char *attr, const char *base) void rewrite_url(struct save_complete_entry *save_complete_list, xmlNode *n,
const char *attr, const char *base)
{ {
char *url, *data; char *url, *data;
char rel[256]; char rel[256];
@ -443,7 +501,7 @@ void rewrite_url(xmlNode *n, const char *attr, const char *base)
return; return;
} }
id = save_complete_find_url(url); id = save_complete_find_url(save_complete_list, url);
if (id) { if (id) {
/* found a match */ /* found a match */
snprintf(rel, sizeof rel, "%x", id); snprintf(rel, sizeof rel, "%x", id);
@ -461,11 +519,13 @@ void rewrite_url(xmlNode *n, const char *attr, const char *base)
/** /**
* Add a url to the save_complete_list. * Add a url to the save_complete_list.
* *
* \param save_complete_list head of list of URLs
* \param url url to add (copied) * \param url url to add (copied)
* \param id id to use for url * \param id id to use for url
*/ */
void save_complete_add_url(const char *url, int id) void save_complete_add_url(struct save_complete_entry **save_complete_list,
const char *url, int id)
{ {
struct save_complete_entry *entry; struct save_complete_entry *entry;
entry = malloc(sizeof (*entry)); entry = malloc(sizeof (*entry));
@ -477,20 +537,22 @@ void save_complete_add_url(const char *url, int id)
return; return;
} }
entry->ptr = id; entry->ptr = id;
entry->next = save_complete_list; entry->next = *save_complete_list;
save_complete_list = entry; *save_complete_list = entry;
} }
/** /**
* Look up a url in the save_complete_list. * Look up a url in the save_complete_list.
* *
* \param save_complete_list head of list of URLs
* \param url url to find * \param url url to find
* \param len length of url * \param len length of url
* \return id to use for url, or 0 if not present * \return id to use for url, or 0 if not present
*/ */
int save_complete_find_url(const char *url) int save_complete_find_url(struct save_complete_entry *save_complete_list,
const char *url)
{ {
struct save_complete_entry *entry; struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next) for (entry = save_complete_list; entry; entry = entry->next)