mirror of
https://github.com/netsurf-browser/netsurf
synced 2025-01-08 20:12:01 +03:00
[project @ 2004-04-05 21:54:22 by jmb]
Implement outstanding functionality svn path=/import/netsurf/; revision=722
This commit is contained in:
parent
aed81aa917
commit
f5a0093a88
@ -29,11 +29,6 @@
|
|||||||
|
|
||||||
#ifdef WITH_SAVE_COMPLETE
|
#ifdef WITH_SAVE_COMPLETE
|
||||||
|
|
||||||
/** \todo URL rewriting for @import rules
|
|
||||||
* Objects used by embedded html pages
|
|
||||||
* GUI
|
|
||||||
*/
|
|
||||||
|
|
||||||
regex_t save_complete_import_re;
|
regex_t save_complete_import_re;
|
||||||
|
|
||||||
/** An entry in save_complete_list. */
|
/** An entry in save_complete_list. */
|
||||||
@ -42,18 +37,17 @@ struct save_complete_entry {
|
|||||||
int ptr; /**< Pointer to object's location in memory */
|
int ptr; /**< Pointer to object's location in memory */
|
||||||
struct save_complete_entry *next; /**< Next entry in list */
|
struct save_complete_entry *next; /**< Next entry in list */
|
||||||
};
|
};
|
||||||
/** List of urls seen and saved so far. */
|
|
||||||
static struct save_complete_entry *save_complete_list;
|
|
||||||
|
|
||||||
|
static void save_complete_internal(struct content *c, const char *path,
|
||||||
static void save_imported_sheets(struct content *c, const char *path);
|
bool index);
|
||||||
static char * rewrite_stylesheet_urls(const char *source, unsigned int size,
|
static void save_imported_sheets(struct save_complete_entry **save_complete_list, struct content *c, const char *path);
|
||||||
|
static char * rewrite_stylesheet_urls(struct save_complete_entry *save_complete_list, const char *source, unsigned int size,
|
||||||
int *osize, const char *base);
|
int *osize, const char *base);
|
||||||
static int rewrite_document_urls(xmlDoc *doc, const char *base);
|
static int rewrite_document_urls(struct save_complete_entry *save_complete_list, xmlDoc *doc, const char *base);
|
||||||
static int rewrite_urls(xmlNode *n, const char *base);
|
static int rewrite_urls(struct save_complete_entry *save_complete_list, xmlNode *n, const char *base);
|
||||||
static void rewrite_url(xmlNode *n, const char *attr, const char *base);
|
static void rewrite_url(struct save_complete_entry *save_complete_list, xmlNode *n, const char *attr, const char *base);
|
||||||
static void save_complete_add_url(const char *url, int id);
|
static void save_complete_add_url(struct save_complete_entry **save_complete_list, const char *url, int id);
|
||||||
static int save_complete_find_url(const char *url);
|
static int save_complete_find_url(struct save_complete_entry *save_complete_list, const char *url);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -64,18 +58,34 @@ static int save_complete_find_url(const char *url);
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
void save_complete(struct content *c, const char *path)
|
void save_complete(struct content *c, const char *path)
|
||||||
|
{
|
||||||
|
save_complete_internal(c, path, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save an HTML page with all dependencies, recursing through imported pages.
|
||||||
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
|
* \param c CONTENT_HTML to save
|
||||||
|
* \param path directory to save to (must exist)
|
||||||
|
*/
|
||||||
|
|
||||||
|
void save_complete_internal(struct content *c, const char *path, bool index)
|
||||||
{
|
{
|
||||||
char spath[256];
|
char spath[256];
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
htmlParserCtxtPtr toSave;
|
htmlParserCtxtPtr toSave;
|
||||||
|
/** List of urls seen and saved so far. One per HTML document */
|
||||||
|
struct save_complete_entry *save_complete_list;
|
||||||
|
|
||||||
|
|
||||||
if (c->type != CONTENT_HTML)
|
if (c->type != CONTENT_HTML)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
save_complete_list = 0;
|
save_complete_list = 0;
|
||||||
|
|
||||||
/* save stylesheets, ignoring the base sheet and <style> elements */
|
/* save stylesheets, ignoring the base sheet */
|
||||||
for (i = 2; i != c->data.html.stylesheet_count; i++) {
|
for (i = 1; i != c->data.html.stylesheet_count; i++) {
|
||||||
struct content *css = c->data.html.stylesheet_content[i];
|
struct content *css = c->data.html.stylesheet_content[i];
|
||||||
char *source;
|
char *source;
|
||||||
int source_len;
|
int source_len;
|
||||||
@ -83,13 +93,16 @@ void save_complete(struct content *c, const char *path)
|
|||||||
if (!css)
|
if (!css)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
save_complete_add_url(css->url, (int) css);
|
save_complete_add_url(&save_complete_list, css->url, (int) css);
|
||||||
|
|
||||||
save_imported_sheets(css, path);
|
save_imported_sheets(&save_complete_list, css, path);
|
||||||
|
|
||||||
|
if (i == 1) continue; /* don't save <style> elements */
|
||||||
|
|
||||||
snprintf(spath, sizeof spath, "%s.%x", path,
|
snprintf(spath, sizeof spath, "%s.%x", path,
|
||||||
(unsigned int) css);
|
(unsigned int) css);
|
||||||
source = rewrite_stylesheet_urls(css->source_data,
|
source = rewrite_stylesheet_urls(save_complete_list,
|
||||||
|
css->source_data,
|
||||||
css->source_size, &source_len, css->url);
|
css->source_size, &source_len, css->url);
|
||||||
if (source) {
|
if (source) {
|
||||||
xosfile_save_stamped(spath, 0xf79, source,
|
xosfile_save_stamped(spath, 0xf79, source,
|
||||||
@ -106,8 +119,12 @@ void save_complete(struct content *c, const char *path)
|
|||||||
if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data)
|
if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
save_complete_add_url(obj->url, (int) obj);
|
save_complete_add_url(&save_complete_list, obj->url, (int) obj);
|
||||||
|
|
||||||
|
if (obj->type == CONTENT_HTML) {
|
||||||
|
save_complete_internal(obj, path, false);
|
||||||
|
}
|
||||||
|
else {
|
||||||
snprintf(spath, sizeof spath, "%s.%x", path,
|
snprintf(spath, sizeof spath, "%s.%x", path,
|
||||||
(unsigned int) obj);
|
(unsigned int) obj);
|
||||||
xosfile_save_stamped(spath,
|
xosfile_save_stamped(spath,
|
||||||
@ -115,13 +132,14 @@ void save_complete(struct content *c, const char *path)
|
|||||||
obj->source_data,
|
obj->source_data,
|
||||||
obj->source_data + obj->source_size);
|
obj->source_data + obj->source_size);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* make a copy of the document tree */
|
/* make a copy of the document tree */
|
||||||
toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
|
toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
|
||||||
htmlParseDocument(toSave);
|
htmlParseDocument(toSave);
|
||||||
|
|
||||||
/* rewrite all urls we know about */
|
/* rewrite all urls we know about */
|
||||||
if (rewrite_document_urls(toSave->myDoc, c->data.html.base_url) == 0) {
|
if (rewrite_document_urls(save_complete_list, toSave->myDoc, c->data.html.base_url) == 0) {
|
||||||
xfree(spath);
|
xfree(spath);
|
||||||
xmlFreeDoc(toSave->myDoc);
|
xmlFreeDoc(toSave->myDoc);
|
||||||
htmlFreeParserCtxt(toSave);
|
htmlFreeParserCtxt(toSave);
|
||||||
@ -129,7 +147,10 @@ void save_complete(struct content *c, const char *path)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* save the html file out last of all */
|
/* save the html file out last of all */
|
||||||
|
if (index)
|
||||||
snprintf(spath, sizeof spath, "%s.index", path);
|
snprintf(spath, sizeof spath, "%s.index", path);
|
||||||
|
else
|
||||||
|
snprintf(spath, sizeof spath, "%s.%x", path, (unsigned int)c);
|
||||||
htmlSaveFile(spath, toSave->myDoc);
|
htmlSaveFile(spath, toSave->myDoc);
|
||||||
xosfile_set_type(spath, 0xfaf);
|
xosfile_set_type(spath, 0xfaf);
|
||||||
|
|
||||||
@ -145,8 +166,15 @@ void save_complete(struct content *c, const char *path)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
void save_imported_sheets(struct content *c, const char *path)
|
* Save all imported stylesheets
|
||||||
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
|
* \param c The content containing the stylesheet
|
||||||
|
* \param path Path to save to
|
||||||
|
*/
|
||||||
|
void save_imported_sheets(struct save_complete_entry **save_complete_list,
|
||||||
|
struct content *c, const char *path)
|
||||||
{
|
{
|
||||||
char spath[256];
|
char spath[256];
|
||||||
unsigned int j;
|
unsigned int j;
|
||||||
@ -159,13 +187,14 @@ void save_imported_sheets(struct content *c, const char *path)
|
|||||||
if (!css)
|
if (!css)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
save_complete_add_url(css->url, (int) css);
|
save_complete_add_url(save_complete_list, css->url, (int) css);
|
||||||
|
|
||||||
save_imported_sheets(css, path);
|
save_imported_sheets(save_complete_list, css, path);
|
||||||
|
|
||||||
snprintf(spath, sizeof spath, "%s.%x", path,
|
snprintf(spath, sizeof spath, "%s.%x", path,
|
||||||
(unsigned int) css);
|
(unsigned int) css);
|
||||||
source = rewrite_stylesheet_urls(css->source_data,
|
source = rewrite_stylesheet_urls(*save_complete_list,
|
||||||
|
css->source_data,
|
||||||
css->source_size, &source_len, css->url);
|
css->source_size, &source_len, css->url);
|
||||||
if (source) {
|
if (source) {
|
||||||
xosfile_save_stamped(spath, 0xf79, source,
|
xosfile_save_stamped(spath, 0xf79, source,
|
||||||
@ -216,15 +245,15 @@ void save_complete_init(void)
|
|||||||
/**
|
/**
|
||||||
* Rewrite stylesheet @import rules for save complete.
|
* Rewrite stylesheet @import rules for save complete.
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
* @param source stylesheet source
|
* @param source stylesheet source
|
||||||
* @param size size of source
|
* @param size size of source
|
||||||
* @param osize updated with the size of the result
|
* @param osize updated with the size of the result
|
||||||
* @param head pointer to the head of the list containing imported urls
|
|
||||||
* @param base url of stylesheet
|
* @param base url of stylesheet
|
||||||
* @return converted source, or 0 on error
|
* @return converted source, or 0 on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
char * rewrite_stylesheet_urls(const char *source, unsigned int size,
|
char * rewrite_stylesheet_urls(struct save_complete_entry *save_complete_list, const char *source, unsigned int size,
|
||||||
int *osize, const char *base)
|
int *osize, const char *base)
|
||||||
{
|
{
|
||||||
char *res;
|
char *res;
|
||||||
@ -310,7 +339,7 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
|
|||||||
memcpy(res + *osize, source + offset, match[0].rm_so);
|
memcpy(res + *osize, source + offset, match[0].rm_so);
|
||||||
*osize += match[0].rm_so;
|
*osize += match[0].rm_so;
|
||||||
|
|
||||||
id = save_complete_find_url(url);
|
id = save_complete_find_url(save_complete_list, url);
|
||||||
if (id) {
|
if (id) {
|
||||||
/* replace import */
|
/* replace import */
|
||||||
sprintf(buf, "@import '%x'", id);
|
sprintf(buf, "@import '%x'", id);
|
||||||
@ -340,12 +369,14 @@ char * rewrite_stylesheet_urls(const char *source, unsigned int size,
|
|||||||
/**
|
/**
|
||||||
* Rewrite URLs in a HTML document to be relative
|
* Rewrite URLs in a HTML document to be relative
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
* @param doc The root of the document tree
|
* @param doc The root of the document tree
|
||||||
* @return 0 on error. >0 otherwise
|
* @return 0 on error. >0 otherwise
|
||||||
* \param base base url of document
|
* \param base base url of document
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int rewrite_document_urls(xmlDoc *doc, const char *base)
|
int rewrite_document_urls(struct save_complete_entry *save_complete_list,
|
||||||
|
xmlDoc *doc, const char *base)
|
||||||
{
|
{
|
||||||
xmlNode *html;
|
xmlNode *html;
|
||||||
|
|
||||||
@ -358,7 +389,7 @@ int rewrite_document_urls(xmlDoc *doc, const char *base)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
rewrite_urls(html, base);
|
rewrite_urls(save_complete_list, html, base);
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -367,12 +398,14 @@ int rewrite_document_urls(xmlDoc *doc, const char *base)
|
|||||||
/**
|
/**
|
||||||
* Traverse tree, rewriting URLs as we go.
|
* Traverse tree, rewriting URLs as we go.
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
|
* \param base base url of document
|
||||||
* @param n The root of the tree
|
* @param n The root of the tree
|
||||||
* @return 0 on error. >0 otherwise
|
* @return 0 on error. >0 otherwise
|
||||||
* \param base base url of document
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int rewrite_urls(xmlNode *n, const char *base)
|
int rewrite_urls(struct save_complete_entry *save_complete_list, xmlNode *n,
|
||||||
|
const char *base)
|
||||||
{
|
{
|
||||||
xmlNode *this;
|
xmlNode *this;
|
||||||
|
|
||||||
@ -384,18 +417,19 @@ int rewrite_urls(xmlNode *n, const char *base)
|
|||||||
* 1) data <object>
|
* 1) data <object>
|
||||||
* 2) href <a> <area> <link> <base>
|
* 2) href <a> <area> <link> <base>
|
||||||
* 3) src <script> <input> <frame> <iframe> <img>
|
* 3) src <script> <input> <frame> <iframe> <img>
|
||||||
|
* 4) n/a <style>
|
||||||
*/
|
*/
|
||||||
if (n->type == XML_ELEMENT_NODE) {
|
if (n->type == XML_ELEMENT_NODE) {
|
||||||
/* 1 */
|
/* 1 */
|
||||||
if (strcmp(n->name, "object") == 0) {
|
if (strcmp(n->name, "object") == 0) {
|
||||||
rewrite_url(n, "data", base);
|
rewrite_url(save_complete_list, n, "data", base);
|
||||||
}
|
}
|
||||||
/* 2 */
|
/* 2 */
|
||||||
else if (strcmp(n->name, "a") == 0 ||
|
else if (strcmp(n->name, "a") == 0 ||
|
||||||
strcmp(n->name, "area") == 0 ||
|
strcmp(n->name, "area") == 0 ||
|
||||||
strcmp(n->name, "link") == 0 ||
|
strcmp(n->name, "link") == 0 ||
|
||||||
strcmp(n->name, "base") == 0) {
|
strcmp(n->name, "base") == 0) {
|
||||||
rewrite_url(n, "href", base);
|
rewrite_url(save_complete_list, n, "href", base);
|
||||||
}
|
}
|
||||||
/* 3 */
|
/* 3 */
|
||||||
else if (strcmp(n->name, "frame") == 0 ||
|
else if (strcmp(n->name, "frame") == 0 ||
|
||||||
@ -403,7 +437,29 @@ int rewrite_urls(xmlNode *n, const char *base)
|
|||||||
strcmp(n->name, "input") == 0 ||
|
strcmp(n->name, "input") == 0 ||
|
||||||
strcmp(n->name, "img") == 0 ||
|
strcmp(n->name, "img") == 0 ||
|
||||||
strcmp(n->name, "script") == 0) {
|
strcmp(n->name, "script") == 0) {
|
||||||
rewrite_url(n, "src", base);
|
rewrite_url(save_complete_list, n, "src", base);
|
||||||
|
}
|
||||||
|
/* 4 */
|
||||||
|
else if (strcmp(n->name, "style") == 0) {
|
||||||
|
unsigned int len;
|
||||||
|
/* Get current content */
|
||||||
|
xmlChar *content = xmlNodeGetContent(n);
|
||||||
|
if (!content) return 0;
|
||||||
|
|
||||||
|
/* Rewrite @import rules */
|
||||||
|
char *rewritten = rewrite_stylesheet_urls(
|
||||||
|
save_complete_list,
|
||||||
|
content,
|
||||||
|
strlen((char*)content),
|
||||||
|
&len, base);
|
||||||
|
if (!rewritten) return 0;
|
||||||
|
|
||||||
|
/* set new content */
|
||||||
|
xmlNodeSetContentLen(n, (const xmlChar*)rewritten,
|
||||||
|
len);
|
||||||
|
/* free old content */
|
||||||
|
xmlFree(content);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -412,7 +468,7 @@ int rewrite_urls(xmlNode *n, const char *base)
|
|||||||
|
|
||||||
/* now recurse */
|
/* now recurse */
|
||||||
for (this = n->children; this != 0; this = this->next) {
|
for (this = n->children; this != 0; this = this->next) {
|
||||||
rewrite_urls(this, base);
|
rewrite_urls(save_complete_list, this, base);
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
@ -422,12 +478,14 @@ int rewrite_urls(xmlNode *n, const char *base)
|
|||||||
/**
|
/**
|
||||||
* Rewrite an URL in a HTML document.
|
* Rewrite an URL in a HTML document.
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
* \param n The node to modify
|
* \param n The node to modify
|
||||||
* \param attr The html attribute to modify
|
* \param attr The html attribute to modify
|
||||||
* \param base base url of document
|
* \param base base url of document
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void rewrite_url(xmlNode *n, const char *attr, const char *base)
|
void rewrite_url(struct save_complete_entry *save_complete_list, xmlNode *n,
|
||||||
|
const char *attr, const char *base)
|
||||||
{
|
{
|
||||||
char *url, *data;
|
char *url, *data;
|
||||||
char rel[256];
|
char rel[256];
|
||||||
@ -443,7 +501,7 @@ void rewrite_url(xmlNode *n, const char *attr, const char *base)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
id = save_complete_find_url(url);
|
id = save_complete_find_url(save_complete_list, url);
|
||||||
if (id) {
|
if (id) {
|
||||||
/* found a match */
|
/* found a match */
|
||||||
snprintf(rel, sizeof rel, "%x", id);
|
snprintf(rel, sizeof rel, "%x", id);
|
||||||
@ -461,11 +519,13 @@ void rewrite_url(xmlNode *n, const char *attr, const char *base)
|
|||||||
/**
|
/**
|
||||||
* Add a url to the save_complete_list.
|
* Add a url to the save_complete_list.
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
* \param url url to add (copied)
|
* \param url url to add (copied)
|
||||||
* \param id id to use for url
|
* \param id id to use for url
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void save_complete_add_url(const char *url, int id)
|
void save_complete_add_url(struct save_complete_entry **save_complete_list,
|
||||||
|
const char *url, int id)
|
||||||
{
|
{
|
||||||
struct save_complete_entry *entry;
|
struct save_complete_entry *entry;
|
||||||
entry = malloc(sizeof (*entry));
|
entry = malloc(sizeof (*entry));
|
||||||
@ -477,20 +537,22 @@ void save_complete_add_url(const char *url, int id)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
entry->ptr = id;
|
entry->ptr = id;
|
||||||
entry->next = save_complete_list;
|
entry->next = *save_complete_list;
|
||||||
save_complete_list = entry;
|
*save_complete_list = entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Look up a url in the save_complete_list.
|
* Look up a url in the save_complete_list.
|
||||||
*
|
*
|
||||||
|
* \param save_complete_list head of list of URLs
|
||||||
* \param url url to find
|
* \param url url to find
|
||||||
* \param len length of url
|
* \param len length of url
|
||||||
* \return id to use for url, or 0 if not present
|
* \return id to use for url, or 0 if not present
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int save_complete_find_url(const char *url)
|
int save_complete_find_url(struct save_complete_entry *save_complete_list,
|
||||||
|
const char *url)
|
||||||
{
|
{
|
||||||
struct save_complete_entry *entry;
|
struct save_complete_entry *entry;
|
||||||
for (entry = save_complete_list; entry; entry = entry->next)
|
for (entry = save_complete_list; entry; entry = entry->next)
|
||||||
|
Loading…
Reference in New Issue
Block a user