diff --git a/desktop/save_text.c b/desktop/save_text.c new file mode 100644 index 000000000..916567717 --- /dev/null +++ b/desktop/save_text.c @@ -0,0 +1,117 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2004 John M Bell + */ + +#include +#include + +#include "libxml/HTMLtree.h" + +#include "netsurf/utils/config.h" +#include "netsurf/content/content.h" +#include "netsurf/desktop/save_text.h" +#include "netsurf/utils/log.h" +#include "netsurf/utils/utils.h" + +#ifdef WITH_TEXT_EXPORT + +static void extract_text(xmlDoc *doc); +static void extract_text_from_tree(xmlNode *n); + +static FILE *out; + +void save_as_text(struct content *c, char *path) { + + htmlParserCtxtPtr toSave; + + if (c->type != CONTENT_HTML) { + return; + } + + out = fopen(path, "w+"); + if (!out) return; + + toSave = htmlCreateMemoryParserCtxt(c->source_data, c->source_size); + htmlParseDocument(toSave); + + extract_text(toSave->myDoc); + + fclose(out); + + xmlFreeDoc(toSave->myDoc); + htmlFreeParserCtxt(toSave); +} + +void extract_text(xmlDoc *doc) +{ + xmlNode *html; + + /* find the html element */ + for (html = doc->children; + html!=0 && html->type != XML_ELEMENT_NODE; + html = html->next) + ; + if (html == 0 || strcmp((const char*)html->name, "html") != 0) { + return; + } + + extract_text_from_tree(html); +} + +void extract_text_from_tree(xmlNode *n) +{ + xmlNode *this; + char *text; + int len, need_nl = 0; + + if (n->type == XML_ELEMENT_NODE) { + if (strcmp(n->name, "dl") == 0 || + strcmp(n->name, "h1") == 0 || + strcmp(n->name, "h2") == 0 || + strcmp(n->name, "h3") == 0 || + strcmp(n->name, "ol") == 0 || + strcmp(n->name, "title") == 0 || + strcmp(n->name, "ul") == 0) { + need_nl = 2; + } + else if (strcmp(n->name, "applet") == 0 || + strcmp(n->name, "br") == 0 || + strcmp(n->name, "div") == 0 || + strcmp(n->name, "dt") == 0 || + strcmp(n->name, "h4") == 0 || + strcmp(n->name, "h5") == 0 || + strcmp(n->name, "h6") == 0 || + strcmp(n->name, "li") == 0 || + strcmp(n->name, "object") == 0 || + strcmp(n->name, "p") == 0 || + strcmp(n->name, "tr") == 0) { + need_nl = 1; + } + /* do nothing, we just recurse through these nodes */ + } + else if (n->type == XML_TEXT_NODE) { + text = squash_tolat1(n->content); + fprintf(out, "%s", text); + xfree(text); + return; + } + else { + return; + } + + /* now recurse */ + for (this = n->children; this != 0; this = this->next) { + extract_text_from_tree(this); + } + + if (need_nl) { + for (len = 0; len != need_nl; len++) { + fprintf(out, "\n"); + } + } +} + +#endif diff --git a/desktop/save_text.h b/desktop/save_text.h new file mode 100644 index 000000000..94dcc5fb7 --- /dev/null +++ b/desktop/save_text.h @@ -0,0 +1,15 @@ +/* + * This file is part of NetSurf, http://netsurf.sourceforge.net/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2004 John M Bell + */ + +#ifndef _NETSURF_DESKTOP_SAVE_TEXT_H_ +#define _NETSURF_DESKTOP_SAVE_TEXT_H_ + +struct content; + +void save_as_text(struct content *c, char *path); + +#endif diff --git a/riscos/save.c b/riscos/save.c index c3773bc71..e7d896198 100644 --- a/riscos/save.c +++ b/riscos/save.c @@ -13,9 +13,9 @@ #include #include "oslib/dragasprite.h" #include "oslib/wimp.h" +#include "netsurf/desktop/save_text.h" #include "netsurf/riscos/gui.h" #include "netsurf/riscos/save_draw.h" -#include "netsurf/riscos/save_text.h" #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/utils.h" diff --git a/riscos/window.c b/riscos/window.c index 270fcd52b..dc0547ca6 100644 --- a/riscos/window.c +++ b/riscos/window.c @@ -19,12 +19,12 @@ #include "oslib/wimpspriteop.h" #include "netsurf/css/css.h" #include "netsurf/utils/config.h" +#include "netsurf/desktop/save_text.h" #include "netsurf/riscos/constdata.h" #include "netsurf/riscos/gui.h" #include "netsurf/riscos/options.h" #include "netsurf/riscos/save_complete.h" #include "netsurf/riscos/save_draw.h" -#include "netsurf/riscos/save_text.h" #include "netsurf/riscos/theme.h" #include "netsurf/riscos/thumbnail.h" #include "netsurf/utils/log.h"