[project @ 2005-06-26 01:55:20 by jmb]

Move acceptable character set determination to form submission time, rather thanat box tree creation time.
Use UTF-8 encoding, if specified, else use first specified encoding.
Improve use of utf8_to_enc - falling back to document encoding then 8859-1 where appropriate.

svn path=/import/netsurf/; revision=1765
This commit is contained in:
John Mark Bell 2005-06-26 01:55:20 +00:00
parent 26872ea54f
commit 3286c99dd5
3 changed files with 117 additions and 41 deletions

View File

@ -472,7 +472,6 @@ bool box_construct_element(xmlNode *n, struct content *content,
}
}
/* fetch any background image for this box */
if (style->background_image.type == CSS_BACKGROUND_IMAGE_URI) {
if (!html_fetch_object(content, style->background_image.uri,
@ -832,6 +831,7 @@ struct css_style * box_get_style(struct content *c,
value;
}
}
xmlFree(s);
}
}
@ -856,6 +856,7 @@ struct css_style * box_get_style(struct content *c,
CSS_UNIT_PX;
}
}
xmlFree(s);
}
if ((s = (char *) xmlGetProp(n,
(const xmlChar *) "vspace"))) {
@ -876,6 +877,7 @@ struct css_style * box_get_style(struct content *c,
CSS_UNIT_PX;
}
}
xmlFree(s);
}
}
@ -1663,6 +1665,7 @@ bool box_iframe(BOX_SPECIAL_PARAMS)
/* start fetch */
ok = html_fetch_object(content, url, box, 0,
content->available_width, 1000, false);
free(url);
return ok;
}
@ -1698,34 +1701,13 @@ bool box_form(BOX_SPECIAL_PARAMS)
}
/* acceptable encoding(s) for form data */
if ((charset = (char *) xmlGetProp(n, (const xmlChar *) "accept-charset"))) {
char *comma = strchr(charset, ',');
if (!comma)
/* only one => use it */
comma = strdup(charset);
else
/* multiple => use first */
comma = strndup(charset, comma - charset);
charset = (char *) xmlGetProp(n, (const xmlChar *) "accept-charset");
xmlFree(charset);
charset = comma;
}
else if (content->data.html.encoding)
/* none specified => try document encoding */
charset = strdup(content->data.html.encoding);
else
/* none specified and no document encoding => 8859-1 */
charset = strdup("ISO-8859-1");
if (!charset) {
xmlFree(action);
return false;
}
form = form_new(action, fmethod, charset);
form = form_new(action, fmethod, charset,
content->data.html.encoding);
if (!form) {
xmlFree(action);
free(charset);
xmlFree(charset);
return false;
}
form->prev = content->data.html.forms;

View File

@ -4,6 +4,7 @@
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2005 John M Bell <jmb202@ecs.soton.ac.uk>
*/
/** \file
@ -11,6 +12,7 @@
*/
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
@ -23,17 +25,20 @@
static char *form_textarea_value(struct form_control *textarea);
static char *form_acceptable_charset(struct form *form);
/**
* Create a struct form.
*
* \param action URL to submit form to, used directly (not copied)
* \param method method and enctype
* \param charset characterset of form (not copied)
* \param charset acceptable charactersets for form submission (not copied)
* \param doc_charset characterset of containing document (not copied)
* \return a new structure, or 0 on memory exhaustion
*/
struct form *form_new(char *action, form_method method, char *charset)
struct form *form_new(char *action, form_method method, char *charset,
char *doc_charset)
{
struct form *form;
@ -42,7 +47,8 @@ struct form *form_new(char *action, form_method method, char *charset)
return 0;
form->action = action;
form->method = method;
form->charset = charset;
form->accept_charsets = charset;
form->document_charset = doc_charset;
form->controls = 0;
form->last_control = 0;
form->prev = 0;
@ -83,6 +89,9 @@ struct form_control *form_new_control(form_control_type type)
/**
* Add a control to the list of controls in a form.
*
* \param form The form to add the control to
* \param control The control to add
*/
void form_add_control(struct form *form, struct form_control *control)
@ -485,6 +494,7 @@ char *form_url_encode(struct form *form,
{
char *name, *value, *n_temp, *v_temp;
char *s = malloc(1), *s2;
char *charset;
unsigned int len = 0, len1;
utf8_convert_ret err;
@ -492,23 +502,37 @@ char *form_url_encode(struct form *form,
return 0;
s[0] = 0;
charset = form_acceptable_charset(form);
if (!charset)
return 0;
for (; control; control = control->next) {
/** \todo fallback to document encoding or 8859-1 as
* last resort.
* What would also be an improvement would be to choose
* an encoding acceptable by the server which covers as much
* of the input values as possible. Additionally, we need to
* handle the case where none of the acceptable encodings
* cover all the textual input values.
*/
err = utf8_to_enc(control->name, form->charset, 0, &n_temp);
err = utf8_to_enc(control->name, charset, 0, &n_temp);
if (err == UTF8_CONVERT_BADENC) {
/* charset not understood, try document charset */
err = utf8_to_enc(control->name,
form->document_charset, 0, &n_temp);
if (err == UTF8_CONVERT_BADENC)
/* that also failed, use 8859-1 */
err = utf8_to_enc(control->name,
"ISO-8859-1", 0, &n_temp);
}
if (err != UTF8_CONVERT_OK) {
free(charset);
free(s);
return 0;
}
err = utf8_to_enc(control->value, form->charset, 0, &v_temp);
err = utf8_to_enc(control->value, charset, 0, &v_temp);
if (err == UTF8_CONVERT_BADENC) {
err = utf8_to_enc(control->value,
form->document_charset, 0, &v_temp);
if (err == UTF8_CONVERT_BADENC)
err = utf8_to_enc(control->value,
"ISO-8859-1", 0, &v_temp);
}
if (err != UTF8_CONVERT_OK) {
free(n_temp);
free(charset);
free(s);
return 0;
}
@ -521,6 +545,7 @@ char *form_url_encode(struct form *form,
curl_free(name);
free(v_temp);
free(n_temp);
free(charset);
free(s);
return 0;
}
@ -532,6 +557,9 @@ char *form_url_encode(struct form *form,
free(v_temp);
free(n_temp);
}
free(charset);
if (len)
s[len - 1] = 0;
return s;
@ -540,6 +568,8 @@ char *form_url_encode(struct form *form,
/**
* Free a linked list of form_successful_control.
*
* \param control Pointer to head of list to free
*/
void form_free_successful(struct form_successful_control *control)
@ -552,3 +582,65 @@ void form_free_successful(struct form_successful_control *control)
free(control);
}
}
/**
* Find an acceptable character set encoding with which to submit the form
*
* \param form The form
* \return Pointer to charset name (on heap, caller should free) or NULL
*/
char *form_acceptable_charset(struct form *form)
{
char *temp, *c;
if (!form)
return NULL;
if (!form->accept_charsets) {
/* no accept-charsets attribute for this form */
if (form->document_charset)
/* document charset present, so use it */
return strdup(form->document_charset);
else
/* no document charset, so default to 8859-1 */
return strdup("ISO-8859-1");
}
/* make temporary copy of accept-charsets attribute */
temp = strdup(form->accept_charsets);
if (!temp)
return NULL;
/* make it upper case */
for (c = temp; *c; c++)
*c = toupper(c);
/* is UTF-8 specified? */
c = strstr(temp, "UTF-8");
if (c) {
free(temp);
return strdup("UTF-8");
}
/* dispense with temporary copy */
free(temp);
/* according to RFC2070, the accept-charsets attribute of the
* form element contains a space and/or comma separated list */
c = form->accept_charsets;
/* What would be an improvement would be to choose an encoding
* acceptable to the server which covers as much of the input
* values as possible. Additionally, we need to handle the case
* where none of the acceptable encodings cover all the textual
* input values.
* For now, we just extract the first element of the charset list
*/
while (*c && !isspace(*c)) {
if (*c == ',')
break;
c++;
}
return strndup(form->accept_charsets, c - form->accept_charsets);
}

View File

@ -31,7 +31,8 @@ typedef enum {
struct form {
char *action; /**< URL to submit to. */
form_method method; /**< Method and enctype. */
char *charset; /**< Charset to submit form in */
char *accept_charsets; /**< Charset to submit form in */
char *document_charset; /**< Charset of document containing form */
struct form_control *controls; /**< Linked list of controls. */
struct form_control *last_control; /**< Last control in list. */
struct form *prev; /**< Previous form in doc. */
@ -102,7 +103,8 @@ struct form_successful_control {
struct form_successful_control *next; /**< Next in linked list. */
};
struct form *form_new(char *action, form_method method, char *charset);
struct form *form_new(char *action, form_method method, char *charset,
char *doc_charset);
struct form_control *form_new_control(form_control_type type);
void form_add_control(struct form *form, struct form_control *control);
void form_free_control(struct form_control *control);