netsurf/render/html.h
John Mark Bell 78d194cb77 Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache.

The new scheme works as follows:

1) Request content for URL (fetchcache()
2) Start fetch of content (fetchcache_go()
3) If no redirect, continue through LOADING, READY, DONE etc. states as before
   If redirect, receive NEWPTR for each redirect that occurs, then continue
   through LOADING, READY, DONE etc. states as before.

The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring.

As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before.

svn path=/trunk/netsurf/; revision=3787
2008-01-28 01:35:00 +00:00

211 lines
6.6 KiB
C

/*
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Content for text/html (interface).
*
* These functions should in general be called via the content interface.
*/
#ifndef _NETSURF_RENDER_HTML_H_
#define _NETSURF_RENDER_HTML_H_
#include <stdbool.h>
#include <libxml/HTMLparser.h>
#include "content/content_type.h"
#include "css/css.h"
struct box;
struct rect;
struct browser_window;
struct content;
struct form_successful_control;
struct imagemap;
struct object_params;
struct plotters;
/* entries in stylesheet_content */
#define STYLESHEET_BASE 0 /* base style sheet */
#define STYLESHEET_ADBLOCK 1 /* adblocking stylesheet */
#define STYLESHEET_STYLE 2 /* <style> elements (not cached) */
#define STYLESHEET_START 3 /* start of document stylesheets */
extern char *default_stylesheet_url;
extern char *adblock_stylesheet_url;
struct frame_dimension {
float value;
enum {
FRAME_DIMENSION_PIXELS, /* '100', '200' */
FRAME_DIMENSION_PERCENT, /* '5%', '20%' */
FRAME_DIMENSION_RELATIVE /* '*', '2*' */
} unit;
};
typedef enum {
SCROLLING_AUTO,
SCROLLING_YES,
SCROLLING_NO
} frame_scrolling;
/** An object (<img>, <object>, etc.) in a CONTENT_HTML document. */
struct content_html_object {
struct content *content; /**< Content, or 0. */
struct box *box; /**< Node in box tree containing it. */
/** Pointer to array of permitted content_type, terminated by
* CONTENT_UNKNOWN, or 0 if any type is acceptable. */
const content_type *permitted_types;
bool background; /**< This object is a background image. */
};
/** Frame tree (<frameset>, <frame>) */
struct content_html_frames {
int cols; /** number of columns in frameset */
int rows; /** number of rows in frameset */
struct frame_dimension width; /** frame width */
struct frame_dimension height; /** frame width */
int margin_width; /** frame margin width */
int margin_height; /** frame margin height */
char *name; /** frame name (for targetting) */
char *url; /** frame url */
bool no_resize; /** frame is not resizable */
frame_scrolling scrolling; /** scrolling characteristics */
bool border; /** frame has a border */
colour border_colour; /** frame border colour */
struct content_html_frames *children; /** [cols * rows] children */
};
/** Inline frame list (<iframe>) */
struct content_html_iframe {
struct box *box;
int margin_width; /** frame margin width */
int margin_height; /** frame margin height */
char *name; /** frame name (for targetting) */
char *url; /** frame url */
frame_scrolling scrolling; /** scrolling characteristics */
bool border; /** frame has a border */
colour border_colour; /** frame border colour */
struct content_html_iframe *next;
};
/** Data specific to CONTENT_HTML. */
struct content_html_data {
htmlParserCtxt *parser; /**< HTML parser context. */
/** HTML parser encoding handler. */
xmlCharEncodingHandler *encoding_handler;
char *encoding; /**< Encoding of source, 0 if unknown. */
enum { ENCODING_SOURCE_HEADER, ENCODING_SOURCE_DETECTED,
ENCODING_SOURCE_META } encoding_source;
/**< Source of encoding information. */
bool getenc; /**< Need to get the encoding from the document, as it
* wasn't specified in the Content-Type header. */
char *base_url; /**< Base URL (may be a copy of content->url). */
char *base_target; /**< Base target */
struct box *layout; /**< Box tree, or 0. */
colour background_colour; /**< Document background colour. */
/** Number of entries in stylesheet_content. */
unsigned int stylesheet_count;
/** Stylesheets. Each may be 0. */
struct content **stylesheet_content;
struct css_style *style; /**< Base style. */
/** Working stylesheet. */
struct css_working_stylesheet *working_stylesheet;
/** Number of entries in object. */
unsigned int object_count;
/** Objects. Each may be 0. */
struct content_html_object *object;
/** Forms, in reverse order to document. */
struct form *forms;
/** Hash table of imagemaps. */
struct imagemap **imagemaps;
/** Browser window containing this document, or 0 if not open. */
struct browser_window *bw;
/** Frameset information */
struct content_html_frames *frameset;
/** Inline frame information */
struct content_html_iframe *iframe;
/** Content of type CONTENT_HTML containing this, or 0 if not an object
* within a page. */
struct content *page;
/** Index in page->data.html.object, or 0 if not an object. */
unsigned int index;
/** Box containing this, or 0 if not an object. */
struct box *box;
};
/** Render padding and margin box outlines in html_redraw(). */
extern bool html_redraw_debug;
bool html_create(struct content *c, const char *params[]);
bool html_process_data(struct content *c, char *data, unsigned int size);
bool html_convert(struct content *c, int width, int height);
void html_reformat(struct content *c, int width, int height);
void html_destroy(struct content *c);
bool html_fetch_object(struct content *c, char *url, struct box *box,
const content_type *permitted_types,
int available_width, int available_height,
bool background);
bool html_replace_object(struct content *c, unsigned int i, char *url,
char *post_urlenc,
struct form_successful_control *post_multipart);
void html_stop(struct content *c);
void html_open(struct content *c, struct browser_window *bw,
struct content *page, unsigned int index, struct box *box,
struct object_params *params);
void html_close(struct content *c);
/* in render/html_redraw.c */
bool html_redraw(struct content *c, int x, int y,
int width, int height,
int clip_x0, int clip_y0, int clip_x1, int clip_y1,
float scale, unsigned long background_colour);
/* redraw a short text string, complete with highlighting
(for selection/search) and ghost caret */
bool text_redraw(const char *utf8_text, size_t utf8_len,
size_t offset, bool space,
struct css_style *style,
int x, int y,
struct rect *clip,
int height,
float scale, colour current_background_color,
bool excluded);
#endif