AmigaOS implementation of complete page save.

svn path=/trunk/netsurf/; revision=5653
This commit is contained in:
Chris Young 2008-11-08 23:08:55 +00:00
parent 66a759e2a1
commit 834eb20101
12 changed files with 931 additions and 29 deletions

View File

@ -229,6 +229,7 @@ CloseWindow:Close window
SaveAs:Save as
Source:Source...
TextNS:Text...
SaveCompNS:Complete...
PDF:PDF...
# Edit menu

View File

@ -229,6 +229,7 @@ CloseWindow:Close window
SaveAs:Save as
Source:Source...
TextNS:Text...
SaveCompNS:Complete...
PDF:PDF...
# Edit menu

View File

@ -229,6 +229,7 @@ CloseWindow:Close window
SaveAs:Save as
Source:Source...
TextNS:Text...
SaveCompNS:Complete...
PDF:PDF...
# Edit menu

View File

@ -230,6 +230,7 @@ CloseWindow:Chiudi finestra
SaveAs:Salva come
Source:Sorgente...
TextNS:Testo...
SaveCompNS:Complete...
PDF:PDF...
# Edit menu

View File

@ -229,6 +229,7 @@ CloseWindow:Close window
SaveAs:Save as
Source:Source...
TextNS:Text...
SaveCompNS:Complete...
PDF:PDF...
# Edit menu

View File

@ -90,7 +90,7 @@ S_DEBUG := $(addprefix debug/,$(S_DEBUG))
S_AMIGA := compat.c gui.c tree.c history.c hotlist.c schedule.c \
thumbnail.c misc.c bitmap.c font.c filetype.c utf8.c login.c \
plotters.c object.c menu.c save_pdf.c arexx.c version.c \
cookies.c context_menu.c clipboard.c
cookies.c context_menu.c clipboard.c save_complete.c fetch_file.c
S_AMIGA := $(addprefix amiga/,$(S_AMIGA))
# S_FRAMEBUFFER are sources purely for the framebuffer build

View File

@ -66,6 +66,7 @@
#include "amiga/cookies.h"
#include "amiga/clipboard.h"
#include <proto/keymap.h>
#include "amiga/save_complete.h"
#ifdef WITH_HUBBUB
#include <hubbub/hubbub.h>
@ -339,6 +340,7 @@ void gui_init(int argc, char** argv)
if(!hotlist) ami_hotlist_init(&hotlist);
ami_global_history_initialise();
ami_cookies_initialise();
save_complete_init();
strcpy(&throbberfile,option_theme);
AddPart(&throbberfile,"Theme",100);
@ -393,6 +395,7 @@ void gui_init2(int argc, char** argv)
long rarray[] = {0};
struct RDArgs *args;
STRPTR template = "URL/A";
STRPTR temp_homepage_url = NULL;
enum
{
@ -412,7 +415,7 @@ void gui_init2(int argc, char** argv)
{
if(rarray[A_URL])
{
option_homepage_url = (char *)strdup(rarray[A_URL]);
temp_homepage_url = (char *)strdup(rarray[A_URL]);
}
FreeArgs(args);
}
@ -451,7 +454,15 @@ void gui_init2(int argc, char** argv)
TAG_DONE);
}
bw = browser_window_create(option_homepage_url, 0, 0, true,false); // curbw = temp
if(temp_homepage_url)
{
bw = browser_window_create(temp_homepage_url, 0, 0, true,false);
free(temp_homepage_url);
}
else
{
bw = browser_window_create(option_homepage_url, 0, 0, true,false); // curbw = temp
}
}
void ami_handle_msg(void)
@ -1004,6 +1015,8 @@ void ami_switch_tab(struct gui_window_2 *gwin,bool redraw)
if(gwin->tabs == 0) return;
gui_window_get_scroll(gwin->bw->window,&gwin->bw->window->scrollx,&gwin->bw->window->scrolly);
GetAttr(CLICKTAB_CurrentNode,gwin->gadgets[GID_TABS],(ULONG *)&tabnode);
GetClickTabNodeAttrs(tabnode,
TNA_UserData,&gwin->bw,
@ -1014,6 +1027,8 @@ void ami_switch_tab(struct gui_window_2 *gwin,bool redraw)
if(redraw)
{
gui_window_set_scroll(gwin->bw->window,gwin->bw->window->scrollx,gwin->bw->window->scrolly);
browser_window_update(gwin->bw,false);
if(gwin->bw->current_content)
@ -1253,6 +1268,8 @@ struct gui_window *gui_create_browser_window(struct browser_window *bw,
if(option_new_tab_active) ami_switch_tab(gwin->shared,false);
ami_update_buttons(gwin->shared);
return gwin;
}

View File

@ -118,6 +118,8 @@ struct gui_window
int c_x;
int c_y;
int c_h;
int scrollx;
int scrolly;
};
void ami_get_msg(void);

View File

@ -36,6 +36,7 @@
#include "amiga/cookies.h"
#include <proto/exec.h>
#include "amiga/arexx.h"
#include "amiga/save_complete.h"
BOOL menualreadyinit;
@ -62,31 +63,32 @@ void ami_init_menulabs(void)
menulab[4] = ami_utf8_easy((char *)messages_get("SaveAs"));
menulab[5] = ami_utf8_easy((char *)messages_get("Source"));
menulab[6] = ami_utf8_easy((char *)messages_get("TextNS"));
menulab[7] = ami_utf8_easy((char *)messages_get("PDF"));
menulab[8] = NM_BARLABEL;
menulab[9] = ami_utf8_easy((char *)messages_get("CloseTab"));
menulab[10] = ami_utf8_easy((char *)messages_get("CloseWindow"));
menulab[11] = NM_BARLABEL;
menulab[12] = ami_utf8_easy((char *)messages_get("Quit"));
menulab[13] = ami_utf8_easy((char *)messages_get("Edit"));
menulab[14] = ami_utf8_easy((char *)messages_get("CopyNS"));
menulab[15] = ami_utf8_easy((char *)messages_get("Paste"));
menulab[16] = ami_utf8_easy((char *)messages_get("SelectAllNS"));
menulab[17] = ami_utf8_easy((char *)messages_get("ClearNS"));
menulab[18] = ami_utf8_easy((char *)messages_get("Browser"));
menulab[19] = ami_utf8_easy((char *)messages_get("HistGlobalNS"));
menulab[20] = ami_utf8_easy((char *)messages_get("ShowCookies"));
menulab[21] = ami_utf8_easy((char *)messages_get("Hotlist"));
menulab[22] = ami_utf8_easy((char *)messages_get("HotlistAdd"));
menulab[23] = ami_utf8_easy((char *)messages_get("HotlistShowNS"));
menulab[24] = NM_BARLABEL;
menulab[7] = ami_utf8_easy((char *)messages_get("SaveCompNS"));
menulab[8] = ami_utf8_easy((char *)messages_get("PDF"));
menulab[9] = NM_BARLABEL;
menulab[10] = ami_utf8_easy((char *)messages_get("CloseTab"));
menulab[11] = ami_utf8_easy((char *)messages_get("CloseWindow"));
menulab[12] = NM_BARLABEL;
menulab[13] = ami_utf8_easy((char *)messages_get("Quit"));
menulab[14] = ami_utf8_easy((char *)messages_get("Edit"));
menulab[15] = ami_utf8_easy((char *)messages_get("CopyNS"));
menulab[16] = ami_utf8_easy((char *)messages_get("Paste"));
menulab[17] = ami_utf8_easy((char *)messages_get("SelectAllNS"));
menulab[18] = ami_utf8_easy((char *)messages_get("ClearNS"));
menulab[19] = ami_utf8_easy((char *)messages_get("Browser"));
menulab[20] = ami_utf8_easy((char *)messages_get("HistGlobalNS"));
menulab[21] = ami_utf8_easy((char *)messages_get("ShowCookies"));
menulab[22] = ami_utf8_easy((char *)messages_get("Hotlist"));
menulab[23] = ami_utf8_easy((char *)messages_get("HotlistAdd"));
menulab[24] = ami_utf8_easy((char *)messages_get("HotlistShowNS"));
menulab[25] = NM_BARLABEL;
menulab[65] = ami_utf8_easy((char *)messages_get("Settings"));
menulab[66] = ami_utf8_easy((char *)messages_get("SnapshotWindow"));
menulab[67] = ami_utf8_easy((char *)messages_get("SettingsSave"));
menulab[68] = ami_utf8_easy((char *)messages_get("ARexx"));
menulab[69] = ami_utf8_easy((char *)messages_get("ARexxExecute"));
menulab[70] = NM_BARLABEL;
menulab[AMI_MENU_HOTLIST_MAX] = ami_utf8_easy((char *)messages_get("Settings"));
menulab[AMI_MENU_HOTLIST_MAX+1] = ami_utf8_easy((char *)messages_get("SnapshotWindow"));
menulab[AMI_MENU_HOTLIST_MAX+2] = ami_utf8_easy((char *)messages_get("SettingsSave"));
menulab[AMI_MENU_HOTLIST_MAX+3] = ami_utf8_easy((char *)messages_get("ARexx"));
menulab[AMI_MENU_HOTLIST_MAX+4] = ami_utf8_easy((char *)messages_get("ARexxExecute"));
menulab[AMI_MENU_HOTLIST_MAX+5] = NM_BARLABEL;
}
struct NewMenu *ami_create_menu(ULONG type)
@ -101,6 +103,7 @@ struct NewMenu *ami_create_menu(ULONG type)
{ NM_ITEM,0,0,0,0,0,}, // save
{ NM_SUB,0,"S",0,0,0,}, // save as source
{ NM_SUB,0,0,0,0,0,}, // save as text
{ NM_SUB,0,0,0,0,0,}, // save as complete
{ NM_SUB,0,0,0,0,0,}, // save as pdf
{ NM_ITEM,NM_BARLABEL,0,0,0,0,},
{ NM_ITEM,0,"K",0,0,0,}, // close tab
@ -356,6 +359,7 @@ void ami_menupick(ULONG code,struct gui_window_2 *gwin,struct MenuItem *item)
bool openwin=false;
bool opentab=true;
char *temp;
BPTR lock = 0;
if(option_force_tabs)
{
@ -425,6 +429,27 @@ void ami_menupick(ULONG code,struct gui_window_2 *gwin,struct MenuItem *item)
break;
case 2:
if(AslRequestTags(filereq,
ASLFR_TitleText,messages_get("NetSurf"),
ASLFR_Screen,scrn,
ASLFR_DoSaveMode,TRUE,
ASLFR_InitialFile,FilePart(gwin->bw->current_content->url),
TAG_DONE))
{
strlcpy(&fname,filereq->fr_Drawer,1024);
AddPart(fname,filereq->fr_File,1024);
ami_update_pointer(gwin->win,GUI_POINTER_WAIT);
if(lock = CreateDir(fname))
{
UnLock(lock);
save_complete(gwin->bw->current_content,fname);
SetComment(fname,gwin->bw->current_content->url);
}
ami_update_pointer(gwin->win,GUI_POINTER_DEFAULT);
}
break;
case 3:
#ifdef WITH_PDF_EXPORT
if(AslRequestTags(filereq,
ASLFR_TitleText,messages_get("NetSurf"),

View File

@ -28,10 +28,10 @@
/* Maximum number of menu items - first value is number of static items
* (ie. everything not intially defined as NM_IGNORE) */
#define AMI_MENU_MAX 31 + AMI_HOTLIST_ITEMS
#define AMI_MENU_MAX 32 + AMI_HOTLIST_ITEMS
/* Where the hotlist entries start */
#define AMI_MENU_HOTLIST 25
#define AMI_MENU_HOTLIST 26
/* Where the hotlist entries end */
#define AMI_MENU_HOTLIST_MAX AMI_MENU_HOTLIST+AMI_HOTLIST_ITEMS

821
amiga/save_complete.c Executable file
View File

@ -0,0 +1,821 @@
/*
* Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2004-2007 James Bursa <bursa@users.sourceforge.net>
* Copyright 2008 Chris Young <chris@unsatisfactorysoftware.co.uk>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Save HTML document with dependencies (implementation).
*/
#include "utils/config.h"
//#define _GNU_SOURCE /* for strndup */
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <regex.h>
#include <libxml/HTMLtree.h>
#include <libxml/parserInternals.h>
#include "utils/config.h"
#include "css/css.h"
#include "render/box.h"
#include "amiga/save_complete.h"
#include "utils/log.h"
#include "utils/url.h"
#include "utils/utils.h"
#include <proto/dos.h>
#include <proto/icon.h>
#include <workbench/icon.h>
regex_t save_complete_import_re;
/** An entry in save_complete_list. */
struct save_complete_entry {
struct content *content;
struct save_complete_entry *next; /**< Next entry in list */
};
/** List of urls seen and saved so far. */
static struct save_complete_entry *save_complete_list = 0;
static bool save_complete_html(struct content *c, const char *path,
bool index);
static bool save_imported_sheets(struct content *c, const char *path);
static char * rewrite_stylesheet_urls(const char *source, unsigned int size,
int *osize, const char *base);
static bool rewrite_document_urls(xmlDoc *doc, const char *base);
static bool rewrite_urls(xmlNode *n, const char *base);
static bool rewrite_url(xmlNode *n, const char *attr, const char *base);
static bool save_complete_list_add(struct content *content);
static struct content * save_complete_list_find(const char *url);
static bool save_complete_list_check(struct content *content);
/* static void save_complete_list_dump(void); */
static bool save_complete_inventory(const char *path);
/**
* Save an HTML page with all dependencies.
*
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
* \return true on success, false on error and error reported
*/
bool save_complete(struct content *c, const char *path)
{
bool result;
result = save_complete_html(c, path, true);
if (result)
result = save_complete_inventory(path);
/* free save_complete_list */
while (save_complete_list) {
struct save_complete_entry *next = save_complete_list->next;
free(save_complete_list);
save_complete_list = next;
}
return result;
}
/**
* Save an HTML page with all dependencies, recursing through imported pages.
*
* \param c CONTENT_HTML to save
* \param path directory to save to (must exist)
* \param index true to save as "index"
* \return true on success, false on error and error reported
*/
bool save_complete_html(struct content *c, const char *path, bool index)
{
char spath[256];
unsigned int i;
htmlParserCtxtPtr parser;
BPTR fh = 0;
if (c->type != CONTENT_HTML)
return false;
if (save_complete_list_check(c))
return true;
/* save stylesheets, ignoring the base and adblocking sheets */
for (i = STYLESHEET_STYLE; i != c->data.html.stylesheet_count; i++) {
struct content *css = c->data.html.stylesheet_content[i];
char *source;
int source_len;
if (!css)
continue;
if (save_complete_list_check(css))
continue;
if (i != STYLESHEET_STYLE) {
if (!save_complete_list_add(css)) {
warn_user("NoMemory", 0);
return false;
}
}
if (!save_imported_sheets(css, path))
return false;
if (i == STYLESHEET_STYLE)
continue; /* don't save <style> elements */
snprintf(spath, sizeof spath, "%s/%x", path,
(unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data,
css->source_size, &source_len, css->url);
if (!source) {
warn_user("NoMemory", 0);
return false;
}
/*
error = xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
*/
if(fh = FOpen(spath,MODE_NEWFILE,0))
{
FWrite(fh,source,1,source_len);
FClose(fh);
SetComment(spath,c->url);
}
free(source);
/*
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
*/
}
/* save objects */
for (i = 0; i != c->data.html.object_count; i++) {
struct content *obj = c->data.html.object[i].content;
/* skip difficult content types */
if (!obj || obj->type >= CONTENT_OTHER || !obj->source_data)
continue;
if (save_complete_list_check(obj))
continue;
if (!save_complete_list_add(obj)) {
warn_user("NoMemory", 0);
return false;
}
if (obj->type == CONTENT_HTML) {
if (!save_complete_html(obj, path, false))
return false;
continue;
}
snprintf(spath, sizeof spath, "%s/%x", path,
(unsigned int) obj);
/*
error = xosfile_save_stamped(spath,
ro_content_filetype(obj),
obj->source_data,
obj->source_data + obj->source_size);
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
*/
if(fh = FOpen(spath,MODE_NEWFILE,0))
{
FWrite(fh,obj->source_data,1,obj->source_size);
FClose(fh);
SetComment(spath,obj->url);
}
}
/*save_complete_list_dump();*/
/* make a copy of the document tree */
parser = htmlCreateMemoryParserCtxt(c->source_data, c->source_size);
if (!parser) {
warn_user("NoMemory", 0);
return false;
}
/* set parser charset */
if (c->data.html.encoding) {
xmlCharEncodingHandler *enc_handler;
enc_handler =
xmlFindCharEncodingHandler(c->data.html.encoding);
if (enc_handler) {
xmlCtxtResetLastError(parser);
if (xmlSwitchToEncoding(parser, enc_handler)) {
xmlFreeDoc(parser->myDoc);
htmlFreeParserCtxt(parser);
warn_user("MiscError",
"Encoding switch failed");
return false;
}
}
}
htmlParseDocument(parser);
/* rewrite all urls we know about */
if (!rewrite_document_urls(parser->myDoc, c->data.html.base_url)) {
xmlFreeDoc(parser->myDoc);
htmlFreeParserCtxt(parser);
warn_user("NoMemory", 0);
return false;
}
/* save the html file out last of all */
if (index)
{
struct DiskObject *dobj = NULL;
snprintf(spath, sizeof spath, "%s/index", path);
dobj = GetIconTags(NULL,ICONGETA_GetDefaultName,"html",
ICONGETA_GetDefaultType,WBPROJECT,
TAG_DONE);
PutIconTags(spath,dobj,
ICONPUTA_NotifyWorkbench,TRUE,
TAG_DONE);
}
else
{
snprintf(spath, sizeof spath, "%s/%x", path, (unsigned int)c);
}
errno = 0;
if (htmlSaveFileFormat(spath, parser->myDoc, 0, 0) == -1) {
if (errno)
warn_user("SaveError", strerror(errno));
else
warn_user("SaveError", "htmlSaveFileFormat failed");
return false;
}
/*
error = xosfile_set_type(spath, 0xfaf);
if (error) {
LOG(("xosfile_set_type: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
*/
xmlFreeDoc(parser->myDoc);
htmlFreeParserCtxt(parser);
return true;
}
/**
* Save stylesheets imported by a CONTENT_CSS.
*
* \param c a CONTENT_CSS
* \param path path to save to
* \return true on success, false on error and error reported
*/
bool save_imported_sheets(struct content *c, const char *path)
{
char spath[256];
unsigned int j;
char *source;
int source_len;
BPTR fh = 0;
for (j = 0; j != c->data.css.import_count; j++) {
struct content *css = c->data.css.import_content[j];
if (!css)
continue;
if (save_complete_list_check(css))
continue;
if (!save_complete_list_add(css)) {
warn_user("NoMemory", 0);
return false;
}
if (!save_imported_sheets(css, path))
return false;
snprintf(spath, sizeof spath, "%s/%x", path,
(unsigned int) css);
source = rewrite_stylesheet_urls(css->source_data,
css->source_size, &source_len, css->url);
if (!source) {
warn_user("NoMemory", 0);
return false;
}
if(fh = FOpen(spath,MODE_NEWFILE,0))
{
FWrite(fh,source,1,source_len);
FClose(fh);
SetComment(spath,c->url);
}
/*
error = xosfile_save_stamped(spath, 0xf79, source,
source + source_len);
*/
free(source);
/*
if (error) {
LOG(("xosfile_save_stamped: 0x%x: %s",
error->errnum, error->errmess));
warn_user("SaveError", error->errmess);
return false;
}
*/
}
return true;
}
/**
* Initialise the save_complete module.
*/
void save_complete_init(void)
{
/* Match an @import rule - see CSS 2.1 G.1. */
regcomp_wrapper(&save_complete_import_re,
"@import" /* IMPORT_SYM */
"[ \t\r\n\f]*" /* S* */
/* 1 */
"(" /* [ */
/* 2 3 */
"\"(([^\"]|[\\]\")*)\"" /* STRING (approximated) */
"|"
/* 4 5 */
"'(([^']|[\\]')*)'"
"|" /* | */
"url\\([ \t\r\n\f]*" /* URI (approximated) */
/* 6 7 */
"\"(([^\"]|[\\]\")*)\""
"[ \t\r\n\f]*\\)"
"|"
"url\\([ \t\r\n\f]*"
/* 8 9 */
"'(([^']|[\\]')*)'"
"[ \t\r\n\f]*\\)"
"|"
"url\\([ \t\r\n\f]*"
/* 10 */
"([^) \t\r\n\f]*)"
"[ \t\r\n\f]*\\)"
")", /* ] */
REG_EXTENDED | REG_ICASE);
}
/**
* Rewrite stylesheet \@import rules for save complete.
*
* @param source stylesheet source
* @param size size of source
* @param osize updated with the size of the result
* @param base url of stylesheet
* @return converted source, or 0 on out of memory
*/
char * rewrite_stylesheet_urls(const char *source, unsigned int size,
int *osize, const char *base)
{
char *res;
const char *url;
char *url2;
char buf[20];
unsigned int offset = 0;
int url_len = 0;
struct content *content;
int m;
unsigned int i;
unsigned int imports = 0;
regmatch_t match[11];
url_func_result result;
/* count number occurences of @import to (over)estimate result size */
/* can't use strstr because source is not 0-terminated string */
for (i = 0; 7 < size && i != size - 7; i++) {
if (source[i] == '@' &&
tolower(source[i + 1]) == 'i' &&
tolower(source[i + 2]) == 'm' &&
tolower(source[i + 3]) == 'p' &&
tolower(source[i + 4]) == 'o' &&
tolower(source[i + 5]) == 'r' &&
tolower(source[i + 6]) == 't')
imports++;
}
res = malloc(size + imports * 20);
if (!res)
return 0;
*osize = 0;
while (offset < size) {
m = regexec(&save_complete_import_re, source + offset,
11, match, 0);
if (m)
break;
/*for (unsigned int i = 0; i != 11; i++) {
if (match[i].rm_so == -1)
continue;
fprintf(stderr, "%i: '%.*s'\n", i,
match[i].rm_eo - match[i].rm_so,
source + offset + match[i].rm_so);
}*/
url = 0;
if (match[2].rm_so != -1) {
url = source + offset + match[2].rm_so;
url_len = match[2].rm_eo - match[2].rm_so;
} else if (match[4].rm_so != -1) {
url = source + offset + match[4].rm_so;
url_len = match[4].rm_eo - match[4].rm_so;
} else if (match[6].rm_so != -1) {
url = source + offset + match[6].rm_so;
url_len = match[6].rm_eo - match[6].rm_so;
} else if (match[8].rm_so != -1) {
url = source + offset + match[8].rm_so;
url_len = match[8].rm_eo - match[8].rm_so;
} else if (match[10].rm_so != -1) {
url = source + offset + match[10].rm_so;
url_len = match[10].rm_eo - match[10].rm_so;
}
assert(url);
url2 = strndup(url, url_len);
if (!url2) {
free(res);
return 0;
}
result = url_join(url2, base, (char**)&url);
free(url2);
if (result == URL_FUNC_NOMEM) {
free(res);
return 0;
}
/* copy data before match */
memcpy(res + *osize, source + offset, match[0].rm_so);
*osize += match[0].rm_so;
if (result == URL_FUNC_OK) {
content = save_complete_list_find(url);
if (content) {
/* replace import */
snprintf(buf, sizeof buf, "@import '%x'",
(unsigned int) content);
memcpy(res + *osize, buf, strlen(buf));
*osize += strlen(buf);
} else {
/* copy import */
memcpy(res + *osize, source + offset + match[0].rm_so,
match[0].rm_eo - match[0].rm_so);
*osize += match[0].rm_eo - match[0].rm_so;
}
}
else {
/* copy import */
memcpy(res + *osize, source + offset + match[0].rm_so,
match[0].rm_eo - match[0].rm_so);
*osize += match[0].rm_eo - match[0].rm_so;
}
assert(0 < match[0].rm_eo);
offset += match[0].rm_eo;
}
/* copy rest of source */
if (offset < size) {
memcpy(res + *osize, source + offset, size - offset);
*osize += size - offset;
}
return res;
}
/**
* Rewrite URLs in a HTML document to be relative.
*
* \param doc root of the document tree
* \param base base url of document
* \return true on success, false on out of memory
*/
bool rewrite_document_urls(xmlDoc *doc, const char *base)
{
xmlNode *node;
for (node = doc->children; node; node = node->next)
if (node->type == XML_ELEMENT_NODE)
if (!rewrite_urls(node, base))
return false;
return true;
}
/**
* Traverse tree, rewriting URLs as we go.
*
* \param n xmlNode of type XML_ELEMENT_NODE to rewrite
* \param base base url of document
* \return true on success, false on out of memory
*
* URLs in the tree rooted at element n are rewritten.
*/
bool rewrite_urls(xmlNode *n, const char *base)
{
xmlNode *child;
assert(n->type == XML_ELEMENT_NODE);
/**
* We only need to consider the following cases:
*
* Attribute: Elements:
*
* 1) data <object>
* 2) href <a> <area> <link>
* 3) src <script> <input> <frame> <iframe> <img>
* 4) n/a <style>
* 5) n/a any <base> tag
* 6) background any (except those above)
*/
if (!n->name) {
/* ignore */
}
/* 1 */
else if (strcmp(n->name, "object") == 0) {
if (!rewrite_url(n, "data", base))
return false;
}
/* 2 */
else if (strcmp(n->name, "a") == 0 ||
strcmp(n->name, "area") == 0 ||
strcmp(n->name, "link") == 0) {
if (!rewrite_url(n, "href", base))
return false;
}
/* 3 */
else if (strcmp(n->name, "frame") == 0 ||
strcmp(n->name, "iframe") == 0 ||
strcmp(n->name, "input") == 0 ||
strcmp(n->name, "img") == 0 ||
strcmp(n->name, "script") == 0) {
if (!rewrite_url(n, "src", base))
return false;
}
/* 4 */
else if (strcmp(n->name, "style") == 0) {
unsigned int len;
xmlChar *content;
for (child = n->children; child != 0; child = child->next) {
/* Get current content */
content = xmlNodeGetContent(child);
if (!content)
/* unfortunately we don't know if this is
* due to memory exhaustion, or because
* there is no content for this node */
continue;
/* Rewrite @import rules */
char *rewritten = rewrite_stylesheet_urls(
content,
strlen((char*)content),
&len, base);
xmlFree(content);
if (!rewritten)
return false;
/* set new content */
xmlNodeSetContentLen(child,
(const xmlChar*)rewritten,
len);
}
return true;
}
/* 5 */
else if (strcmp(n->name, "base") == 0) {
/* simply remove any <base> tags from the document */
xmlUnlinkNode(n);
xmlFreeNode(n);
/* base tags have no content, so there's no point recursing
* additionally, we've just destroyed this node, so trying
* to recurse would result in bad things happening */
return true;
}
/* 6 */
else {
if (!rewrite_url(n, "background", base))
return false;
}
/* now recurse */
for (child = n->children; child;) {
/* we must extract the next child now, as if the current
* child is a <base> element, it will be removed from the
* tree (see 5, above), thus preventing extraction of the
* next child */
xmlNode *next = child->next;
if (child->type == XML_ELEMENT_NODE) {
if (!rewrite_urls(child, base))
return false;
}
child = next;
}
return true;
}
/**
* Rewrite an URL in a HTML document.
*
* \param n The node to modify
* \param attr The html attribute to modify
* \param base base url of document
* \return true on success, false on out of memory
*/
bool rewrite_url(xmlNode *n, const char *attr, const char *base)
{
char *url, *data;
char rel[20];
struct content *content;
url_func_result res;
if (!xmlHasProp(n, (const xmlChar *) attr))
return true;
data = xmlGetProp(n, (const xmlChar *) attr);
if (!data)
return false;
res = url_join(data, base, &url);
xmlFree(data);
if (res == URL_FUNC_NOMEM)
return false;
else if (res == URL_FUNC_OK) {
content = save_complete_list_find(url);
if (content) {
/* found a match */
free(url);
snprintf(rel, sizeof rel, "%x",
(unsigned int) content);
if (!xmlSetProp(n, (const xmlChar *) attr,
(xmlChar *) rel))
return false;
} else {
/* no match found */
if (!xmlSetProp(n, (const xmlChar *) attr,
(xmlChar *) url)) {
free(url);
return false;
}
free(url);
}
}
return true;
}
/**
* Add a content to the save_complete_list.
*
* \param content content to add
* \return true on success, false on out of memory
*/
bool save_complete_list_add(struct content *content)
{
struct save_complete_entry *entry;
entry = malloc(sizeof (*entry));
if (!entry)
return false;
entry->content = content;
entry->next = save_complete_list;
save_complete_list = entry;
return true;
}
/**
* Look up a url in the save_complete_list.
*
* \param url url to find
* \return content if found, 0 otherwise
*/
struct content * save_complete_list_find(const char *url)
{
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
if (strcmp(url, entry->content->url) == 0)
return entry->content;
return 0;
}
/**
* Look up a content in the save_complete_list.
*
* \param content pointer to content
* \return true if the content is in the save_complete_list
*/
bool save_complete_list_check(struct content *content)
{
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
if (entry->content == content)
return true;
return false;
}
#if 0
/**
* Dump save complete list to stderr
*/
void save_complete_list_dump(void)
{
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
fprintf(stderr, "%p : %s\n", entry->content,
entry->content->url);
}
#endif
/**
* Create the inventory file listing original URLs.
*/
bool save_complete_inventory(const char *path)
{
char spath[256];
FILE *fp;
snprintf(spath, sizeof spath, "%s/Inventory", path);
fp = fopen(spath, "w");
if (!fp) {
LOG(("fopen(): errno = %i", errno));
warn_user("SaveError", strerror(errno));
return false;
}
struct save_complete_entry *entry;
for (entry = save_complete_list; entry; entry = entry->next)
fprintf(fp, "%x %s\n",
(unsigned int) entry->content,
entry->content->url);
fclose(fp);
return true;
}

32
amiga/save_complete.h Executable file
View File

@ -0,0 +1,32 @@
/*
* Copyright 2004 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2008 Chris Young <chris@unsatisfactorysoftware.co.uk>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Save HTML document with dependencies (interface).
*/
#ifndef AMIGA_SAVE_COMPLETE_H
#define AMIGA_SAVE_COMPLETE_H
#include "content/content.h"
void save_complete_init(void);
bool save_complete(struct content *c, const char *path);
#endif