netsurf/render/textplain.c

719 lines
18 KiB
C

/*
* Copyright 2006 James Bursa <bursa@users.sourceforge.net>
* Copyright 2006 Adrian Lees <adrianl@users.sourceforge.net>
*
* This file is part of NetSurf, http://www.netsurf-browser.org/
*
* NetSurf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* NetSurf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/** \file
* Content for text/plain (implementation).
*/
#include <assert.h>
#include <errno.h>
#include <stddef.h>
#define LIBICONV_PLUG
#include <iconv.h>
#include "content/content.h"
#include "css/css.h"
#include "desktop/gui.h"
#include "desktop/plotters.h"
#include "desktop/selection.h"
#include "render/box.h"
#include "render/font.h"
#include "render/textplain.h"
#include "utils/log.h"
#include "utils/messages.h"
#include "utils/talloc.h"
#include "utils/utils.h"
#include "utils/utf8.h"
#define CHUNK 20480
#define MARGIN 4
#define TAB_WIDTH 8 /* must be power of 2 currently */
static struct css_style textplain_style;
static int textplain_tab_width = 256; /* try for a sensible default */
static int textplain_coord_from_offset(const char *text, size_t offset,
size_t length);
/**
* Create a CONTENT_TEXTPLAIN.
*/
bool textplain_create(struct content *c, const char *params[])
{
unsigned int i;
char *utf8_data;
const char *encoding = "iso-8859-1";
iconv_t iconv_cd;
union content_msg_data msg_data;
textplain_style = css_base_style;
textplain_style.font_family = CSS_FONT_FAMILY_MONOSPACE;
utf8_data = talloc_array(c, char, CHUNK);
if (!utf8_data)
goto no_memory;
for (i = 0; params[i]; i += 2) {
if (strcasecmp(params[i], "charset") == 0) {
encoding = talloc_strdup(c, params[i + 1]);
if (!encoding)
goto no_memory;
break;
}
}
iconv_cd = iconv_open("utf-8", encoding);
if (iconv_cd == (iconv_t)(-1) && errno == EINVAL) {
LOG(("unsupported encoding \"%s\"", encoding));
iconv_cd = iconv_open("utf-8", "iso-8859-1");
}
if (iconv_cd == (iconv_t)(-1)) {
char buf[300];
snprintf(buf, sizeof buf, "IconvFailed %s", strerror(errno));
buf[sizeof buf - 1] = 0;
msg_data.error = buf;
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
c->data.textplain.encoding = encoding;
c->data.textplain.iconv_cd = iconv_cd;
c->data.textplain.converted = 0;
c->data.textplain.utf8_data = utf8_data;
c->data.textplain.utf8_data_size = 0;
c->data.textplain.utf8_data_allocated = CHUNK;
c->data.textplain.physical_line = 0;
c->data.textplain.physical_line_count = 0;
c->data.textplain.formatted_width = 0;
return true;
no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/**
* Process data for CONTENT_TEXTPLAIN.
*/
bool textplain_process_data(struct content *c, char *data, unsigned int size)
{
iconv_t iconv_cd = c->data.textplain.iconv_cd;
size_t count;
union content_msg_data msg_data;
do {
char *inbuf = c->source_data + c->data.textplain.converted;
size_t inbytesleft = c->source_size -
c->data.textplain.converted;
char *outbuf = c->data.textplain.utf8_data +
c->data.textplain.utf8_data_size;
size_t outbytesleft = c->data.textplain.utf8_data_allocated -
c->data.textplain.utf8_data_size;
count = iconv(iconv_cd, &inbuf, &inbytesleft,
&outbuf, &outbytesleft);
c->data.textplain.converted = inbuf - c->source_data;
c->data.textplain.utf8_data_size = c->data.textplain.
utf8_data_allocated - outbytesleft;
if (count == (size_t)(-1) && errno == E2BIG) {
size_t allocated = CHUNK +
c->data.textplain.utf8_data_allocated;
char *utf8_data = talloc_realloc(c,
c->data.textplain.utf8_data,
char, allocated);
if (!utf8_data)
goto no_memory;
c->data.textplain.utf8_data = utf8_data;
c->data.textplain.utf8_data_allocated = allocated;
} else if (count == (size_t)(-1) && errno != EINVAL) {
char buf[300];
snprintf(buf, sizeof buf, "IconvFailed %s",
strerror(errno));
buf[sizeof buf - 1] = 0;
msg_data.error = buf;
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
gui_multitask();
} while (!(c->data.textplain.converted == c->source_size ||
(count == (size_t)(-1) && errno == EINVAL)));
return true;
no_memory:
msg_data.error = messages_get("NoMemory");
content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
return false;
}
/**
* Convert a CONTENT_TEXTPLAIN for display.
*/
bool textplain_convert(struct content *c, int width, int height)
{
iconv_close(c->data.textplain.iconv_cd);
c->data.textplain.iconv_cd = 0;
textplain_reformat(c, width, height);
c->status = CONTENT_STATUS_DONE;
content_set_status(c, messages_get("Done"));
return true;
}
/**
* Reformat a CONTENT_TEXTPLAIN to a new width.
*/
void textplain_reformat(struct content *c, int width, int height)
{
char *utf8_data = c->data.textplain.utf8_data;
size_t utf8_data_size = c->data.textplain.utf8_data_size;
unsigned long line_count = 0;
struct textplain_line *line = c->data.textplain.physical_line;
struct textplain_line *line1;
size_t i, space, col;
size_t columns = 80;
int character_width;
size_t line_start;
/* compute available columns (assuming monospaced font) - use 8
* characters for better accuracy */
if (!nsfont_width(&textplain_style, "ABCDEFGH", 8, &character_width))
return;
columns = (width - MARGIN - MARGIN) * 8 / character_width;
textplain_tab_width = (TAB_WIDTH * character_width) / 8;
c->data.textplain.formatted_width = width;
c->data.textplain.physical_line_count = 0;
if (!line) {
c->data.textplain.physical_line = line =
talloc_array(c, struct textplain_line, 1024 + 3);
if (!line)
goto no_memory;
}
line[line_count++].start = line_start = 0;
space = 0;
for (i = 0, col = 0; i != utf8_data_size; i++) {
bool term = (utf8_data[i] == '\n' || utf8_data[i] == '\r');
size_t next_col = col + 1;
if (utf8_data[i] == '\t')
next_col = (next_col + TAB_WIDTH - 1) & ~(TAB_WIDTH - 1);
if (term || next_col >= columns) {
if (line_count % 1024 == 0) {
line1 = talloc_realloc(c, line,
struct textplain_line, line_count + 1024 + 3);
if (!line1)
goto no_memory;
c->data.textplain.physical_line =
line = line1;
}
if (term) {
line[line_count-1].length = i - line_start;
/* skip second char of CR/LF or LF/CR pair */
if (i + 1 < utf8_data_size &&
utf8_data[i+1] != utf8_data[i] &&
(utf8_data[i+1] == '\n' || utf8_data[i+1] == '\r'))
i++;
}
else {
if (space) {
/* break at last space in line */
i = space;
line[line_count-1].length = (i + 1) - line_start;
}
else
line[line_count-1].length = i - line_start;
}
line[line_count++].start = line_start = i + 1;
col = 0;
space = 0;
} else {
col++;
if (utf8_data[i] == ' ')
space = i;
}
}
line[line_count-1].length = i - line[line_count-1].start;
line[line_count].start = utf8_data_size;
c->data.textplain.physical_line_count = line_count;
c->width = width;
c->height = line_count *
css_len2px(&textplain_style.font_size.value.length,
&textplain_style) * 1.2 + MARGIN + MARGIN;
return;
no_memory:
LOG(("out of memory (line_count %lu)", line_count));
return;
}
/**
* Destroy a CONTENT_TEXTPLAIN and free all resources it owns.
*/
void textplain_destroy(struct content *c)
{
if (c->data.textplain.iconv_cd)
iconv_close(c->data.textplain.iconv_cd);
}
/**
* Draw a CONTENT_TEXTPLAIN using the current set of plotters (plot).
*
* \param c content of type CONTENT_TEXTPLAIN
* \param x coordinate for top-left of redraw
* \param y coordinate for top-left of redraw
* \param width available width
* \param height available height
* \param clip_x0 clip rectangle
* \param clip_y0 clip rectangle
* \param clip_x1 clip rectangle
* \param clip_y1 clip rectangle
* \param scale scale for redraw
* \param background_colour the background colour
* \return true if successful, false otherwise
*
* x, y, clip_[xy][01] are in target coordinates.
*/
bool textplain_redraw(struct content *c, int x, int y,
int width, int height,
int clip_x0, int clip_y0, int clip_x1, int clip_y1,
float scale, unsigned long background_colour)
{
struct browser_window *bw = current_redraw_browser;
char *utf8_data = c->data.textplain.utf8_data;
long lineno;
unsigned long line_count = c->data.textplain.physical_line_count;
float line_height = css_len2px(&textplain_style.font_size.value.length,
&textplain_style) * 1.2;
float scaled_line_height = line_height * scale;
long line0 = clip_y0 / scaled_line_height - 1;
long line1 = clip_y1 / scaled_line_height + 1;
struct textplain_line *line = c->data.textplain.physical_line;
colour hback_col;
struct rect clip;
size_t length;
clip.x0 = clip_x0;
clip.y0 = clip_y0;
clip.x1 = clip_x1;
clip.y1 = clip_y1;
if (line0 < 0)
line0 = 0;
if (line1 < 0)
line1 = 0;
if (line_count < (unsigned long) line0)
line0 = line_count;
if (line_count < (unsigned long) line1)
line1 = line_count;
if (line1 < line0)
line1 = line0;
if (!plot.clg(0xffffff))
return false;
if (!line)
return true;
/* choose a suitable background colour for any highlighted text */
if ((background_colour & 0x808080) == 0x808080)
hback_col = 0;
else
hback_col = 0xffffff;
x += MARGIN * scale;
y += MARGIN * scale;
for (lineno = line0; lineno != line1; lineno++) {
const char *text = utf8_data + line[lineno].start;
int tab_width = textplain_tab_width * scale;
size_t offset = 0;
int tx = x;
if (!tab_width) tab_width = 1;
length = line[lineno].length;
if (!length)
continue;
while (offset < length) {
size_t next_offset = offset;
int width;
int ntx;
while (next_offset < length && text[next_offset] != '\t')
next_offset = utf8_next(text, length, next_offset);
if (!text_redraw(text + offset, next_offset - offset,
line[lineno].start + offset, false,
&textplain_style,
tx, y + (lineno * scaled_line_height),
&clip, line_height, scale,
background_colour, false))
return false;
if (next_offset >= length)
break;
/* locate end of string and align to next tab position */
if (nsfont_width(&textplain_style, &text[offset],
next_offset - offset, &width))
tx += (int)(width * scale);
ntx = x + ((1 + (tx - x) / tab_width) * tab_width);
/* if the tab character lies within the selection, if any,
then we must draw it as a filled rectangle so that it's
consistent with background of the selected text */
if (bw) {
unsigned tab_ofst = line[lineno].start + next_offset;
struct selection *sel = bw->sel;
bool highlighted = false;
if (selection_defined(sel)) {
unsigned start_idx, end_idx;
if (selection_highlighted(sel,
tab_ofst, tab_ofst + 1,
&start_idx, &end_idx))
highlighted = true;
}
if (!highlighted && search_current_window == bw->window) {
unsigned start_idx, end_idx;
if (gui_search_term_highlighted(bw->window,
tab_ofst, tab_ofst + 1,
&start_idx, &end_idx))
highlighted = true;
}
if (highlighted) {
int sy = y + (lineno * scaled_line_height);
if (!plot.fill(tx, sy, ntx, sy + scaled_line_height,
hback_col))
return false;
}
}
offset = next_offset + 1;
tx = ntx;
}
}
return true;
}
/**
* Return byte offset within UTF8 textplain content, given the co-ordinates
* of a point within a textplain content. 'dir' specifies the direction in
* which to search (-1 = above-left, +1 = below-right) if the co-ordinates are not
* contained within a line.
*
* \param c content of type CONTENT_TEXTPLAIN
* \param x x ordinate of point
* \param y y ordinate of point
* \param dir direction of search if not within line
* \return byte offset of character containing (or nearest to) point
*/
size_t textplain_offset_from_coords(struct content *c, int x, int y, int dir)
{
float line_height = css_len2px(&textplain_style.font_size.value.length,
&textplain_style) * 1.2;
struct textplain_line *line;
const char *text;
unsigned nlines;
size_t length;
int idx;
assert(c->type == CONTENT_TEXTPLAIN);
y = (int)((float)(y - MARGIN) / line_height);
x -= MARGIN;
nlines = c->data.textplain.physical_line_count;
if (!nlines)
return 0;
if (y <= 0) y = 0;
else if ((unsigned)y >= nlines)
y = nlines - 1;
line = &c->data.textplain.physical_line[y];
text = c->data.textplain.utf8_data + line->start;
length = line->length;
idx = 0;
while (x > 0) {
size_t next_offset = 0;
int width = INT_MAX;
while (next_offset < length && text[next_offset] != '\t')
next_offset = utf8_next(text, length, next_offset);
if (next_offset < length)
nsfont_width(&textplain_style, text, next_offset, &width);
if (x <= width) {
int pixel_offset;
size_t char_offset;
nsfont_position_in_string(&textplain_style,
text, next_offset, x,
&char_offset, &pixel_offset);
idx += char_offset;
break;
}
x -= width;
length -= next_offset;
text += next_offset;
idx += next_offset;
/* check if it's within the tab */
width = textplain_tab_width - (width % textplain_tab_width);
if (x <= width) break;
x -= width;
length--;
text++;
idx++;
}
return line->start + idx;
}
/**
* Given a byte offset within the text, return the line number
* of the line containing that offset (or -1 if offset invalid)
*
* \param c content of type CONTENT_TEXTPLAIN
* \param offset byte offset within textual representation
* \return line number, or -1 if offset invalid (larger than size)
*/
int textplain_find_line(struct content *c, unsigned offset)
{
struct textplain_line *line = c->data.textplain.physical_line;
int nlines = c->data.textplain.physical_line_count;
int lineno = 0;
assert(c->type == CONTENT_TEXTPLAIN);
if (offset > c->data.textplain.utf8_data_size)
return -1;
/* \todo - implement binary search here */
while (lineno < nlines && line[lineno].start < offset)
lineno++;
if (line[lineno].start > offset)
lineno--;
return lineno;
}
/**
* Convert a character offset within a line of text into the
* horizontal co-ordinate, taking into account the font being
* used and any tabs in the text
*
* \param text line of text
* \param offset char offset within text
* \param length line length
* \return x ordinate
*/
int textplain_coord_from_offset(const char *text, size_t offset, size_t length)
{
int x = 0;
while (offset > 0) {
size_t next_offset = 0;
int tx;
while (next_offset < offset && text[next_offset] != '\t')
next_offset = utf8_next(text, length, next_offset);
nsfont_width(&textplain_style, text, next_offset, &tx);
x += tx;
if (next_offset >= offset)
break;
/* align to next tab boundary */
next_offset++;
x = (1 + (x / textplain_tab_width)) * textplain_tab_width;
offset -= next_offset;
text += next_offset;
length -= next_offset;
}
return x;
}
/**
* Given a range of byte offsets within a UTF8 textplain content,
* return a box that fully encloses the text
*
* \param c content of type CONTENT_TEXTPLAIN
* \param start byte offset of start of text range
* \param end byte offset of end
* \param r rectangle to be completed
*/
void textplain_coords_from_range(struct content *c, unsigned start, unsigned end,
struct rect *r)
{
float line_height = css_len2px(&textplain_style.font_size.value.length,
&textplain_style) * 1.2;
char *utf8_data = c->data.textplain.utf8_data;
struct textplain_line *line;
unsigned lineno = 0;
unsigned nlines;
assert(c->type == CONTENT_TEXTPLAIN);
assert(start <= end);
assert(end <= c->data.textplain.utf8_data_size);
nlines = c->data.textplain.physical_line_count;
line = c->data.textplain.physical_line;
/* find start */
lineno = textplain_find_line(c, start);
r->y0 = (int)(MARGIN + lineno * line_height);
if (lineno + 1 <= nlines || line[lineno + 1].start >= end) {
/* \todo - it may actually be more efficient just to run
forwards most of the time */
/* find end */
lineno = textplain_find_line(c, end);
r->x0 = 0;
r->x1 = c->data.textplain.formatted_width;
}
else {
/* single line */
const char *text = utf8_data + line[lineno].start;
r->x0 = textplain_coord_from_offset(text, start - line[lineno].start,
line[lineno].length);
r->x1 = textplain_coord_from_offset(text, end - line[lineno].start,
line[lineno].length);
}
r->y1 = (int)(MARGIN + (lineno + 1) * line_height);
}
/**
* Return a pointer to the requested line of text.
*
* \param c content of type CONTENT_TEXTPLAIN
* \param lineno line number
* \param poffset receives byte offset of line start within text
* \param plen receives length of returned line
* \return pointer to text, or NULL if invalid line number
*/
char *textplain_get_line(struct content *c, unsigned lineno,
size_t *poffset, size_t *plen)
{
struct textplain_line *line;
assert(c->type == CONTENT_TEXTPLAIN);
if (lineno >= c->data.textplain.physical_line_count)
return NULL;
line = &c->data.textplain.physical_line[lineno];
*poffset = line->start;
*plen = line->length;
return c->data.textplain.utf8_data + line->start;
}
/**
* Return a pointer to the raw UTF-8 data, as opposed to the reformatted
* text to fit the window width. Thus only hard newlines are preserved
* in the saved/copied text of a selection.
*
* \param c content of type CONTENT_TEXTPLAIN
* \param start starting byte offset within UTF-8 text
* \param end ending byte offset
* \param plen receives validated length
* \return pointer to text, or NULL if no text
*/
char *textplain_get_raw_data(struct content *c, unsigned start, unsigned end,
size_t *plen)
{
size_t utf8_size = c->data.textplain.utf8_data_size;
assert(c->type == CONTENT_TEXTPLAIN);
/* any text at all? */
if (!utf8_size) return NULL;
/* clamp to valid offset range */
if (start >= utf8_size) start = utf8_size;
if (end >= utf8_size) end = utf8_size;
*plen = end - start;
return c->data.textplain.utf8_data + start;
}