2003-06-30 16:44:03 +04:00
|
|
|
/*
|
|
|
|
* This file is part of NetSurf, http://netsurf.sourceforge.net/
|
|
|
|
* Licensed under the GNU General Public License,
|
|
|
|
* http://www.opensource.org/licenses/gpl-license
|
|
|
|
* Copyright 2003 James Bursa <bursa@users.sourceforge.net>
|
|
|
|
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
|
|
|
|
* Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk>
|
2002-04-22 13:24:35 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
2003-11-08 02:47:55 +03:00
|
|
|
#include <sys/stat.h>
|
2003-12-26 03:17:55 +03:00
|
|
|
#include <uri.h>
|
2003-12-27 23:15:23 +03:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <regex.h>
|
2003-12-28 05:35:46 +03:00
|
|
|
#include <time.h>
|
2003-04-12 01:06:51 +04:00
|
|
|
#include "libxml/encoding.h"
|
2003-04-06 01:38:06 +04:00
|
|
|
#include "libxml/uri.h"
|
2004-01-05 05:10:59 +03:00
|
|
|
#include "netsurf/utils/config.h"
|
2003-12-28 02:49:31 +03:00
|
|
|
#ifdef riscos
|
|
|
|
#include "netsurf/riscos/about.h"
|
|
|
|
#include "netsurf/riscos/constdata.h"
|
|
|
|
#endif
|
2003-04-06 01:38:06 +04:00
|
|
|
#include "netsurf/utils/log.h"
|
2003-12-19 03:59:36 +03:00
|
|
|
#include "netsurf/utils/messages.h"
|
2003-02-09 15:58:15 +03:00
|
|
|
#include "netsurf/utils/utils.h"
|
2002-04-22 13:24:35 +04:00
|
|
|
|
|
|
|
void die(const char * const error)
|
|
|
|
{
|
|
|
|
fprintf(stderr, "Fatal: %s\n", error);
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
char * strip(char * const s)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
for (i = strlen(s); i != 0 && isspace(s[i-1]); i--)
|
|
|
|
;
|
|
|
|
s[i] = 0;
|
|
|
|
return s + strspn(s, " \t\r\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int whitespace(const char * str)
|
|
|
|
{
|
|
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < strlen(str); i++)
|
|
|
|
if (!isspace(str[i]))
|
|
|
|
return 0;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void * xcalloc(const size_t n, const size_t size)
|
|
|
|
{
|
|
|
|
void * p = calloc(n, size);
|
|
|
|
if (p == 0) die("Out of memory in xcalloc()");
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
void * xrealloc(void * p, const size_t size)
|
|
|
|
{
|
|
|
|
p = realloc(p, size);
|
|
|
|
if (p == 0) die("Out of memory in xrealloc()");
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2002-09-11 18:24:02 +04:00
|
|
|
void xfree(void* p)
|
|
|
|
{
|
|
|
|
if (p == 0)
|
|
|
|
fprintf(stderr, "Attempt to free NULL pointer\n");
|
|
|
|
else
|
|
|
|
free(p);
|
|
|
|
}
|
|
|
|
|
2002-04-22 13:24:35 +04:00
|
|
|
char * xstrdup(const char * const s)
|
|
|
|
{
|
|
|
|
char * c = malloc(strlen(s) + 1);
|
|
|
|
if (c == 0) die("Out of memory in xstrdup()");
|
|
|
|
strcpy(c, s);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
|
|
|
char * load(const char * const path)
|
|
|
|
{
|
2002-06-19 01:24:21 +04:00
|
|
|
FILE * fp = fopen(path, "rb");
|
|
|
|
char * buf;
|
|
|
|
long size, read;
|
|
|
|
|
|
|
|
if (fp == 0) die("Failed to open file");
|
|
|
|
if (fseek(fp, 0, SEEK_END) != 0) die("fseek() failed");
|
|
|
|
if ((size = ftell(fp)) == -1) die("ftell() failed");
|
2002-06-21 22:16:24 +04:00
|
|
|
buf = xcalloc((size_t) size, 1);
|
2002-06-19 01:24:21 +04:00
|
|
|
|
|
|
|
if (fseek(fp, 0, SEEK_SET) != 0) die("fseek() failed");
|
2002-06-21 22:16:24 +04:00
|
|
|
read = fread(buf, 1, (size_t) size, fp);
|
2002-06-19 01:24:21 +04:00
|
|
|
if (read < size) die("fread() failed");
|
|
|
|
|
2002-04-22 13:24:35 +04:00
|
|
|
return buf;
|
|
|
|
}
|
2002-05-22 01:27:29 +04:00
|
|
|
|
|
|
|
char * squash_whitespace(const char * s)
|
|
|
|
{
|
|
|
|
char * c = malloc(strlen(s) + 1);
|
|
|
|
int i = 0, j = 0;
|
|
|
|
if (c == 0) die("Out of memory in squash_whitespace()");
|
|
|
|
do {
|
|
|
|
if (isspace(s[i])) {
|
|
|
|
c[j++] = ' ';
|
|
|
|
while (s[i] != 0 && isspace(s[i]))
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
c[j++] = s[i++];
|
|
|
|
} while (s[i - 1] != 0);
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2003-04-12 01:06:51 +04:00
|
|
|
char * tolat1(xmlChar * s)
|
|
|
|
{
|
|
|
|
unsigned int length = strlen((char*) s);
|
2003-12-29 03:38:59 +03:00
|
|
|
unsigned int space = length + 100;
|
|
|
|
char *d = xcalloc(space, sizeof(char));
|
2003-04-12 01:06:51 +04:00
|
|
|
char *d0 = d;
|
2003-12-29 03:38:59 +03:00
|
|
|
char *end = d0 + space - 10;
|
2003-04-12 01:06:51 +04:00
|
|
|
int u, chars;
|
|
|
|
|
|
|
|
while (*s != 0) {
|
|
|
|
chars = length;
|
|
|
|
u = xmlGetUTF8Char((unsigned char *) s, &chars);
|
2003-12-29 03:38:59 +03:00
|
|
|
if (chars <= 0) {
|
|
|
|
s += 1;
|
|
|
|
length -= 1;
|
|
|
|
LOG(("UTF-8 error"));
|
|
|
|
continue;
|
|
|
|
}
|
2003-04-12 01:06:51 +04:00
|
|
|
s += chars;
|
|
|
|
length -= chars;
|
|
|
|
if (u == 0x09 || u == 0x0a || u == 0x0d)
|
2003-12-29 03:38:59 +03:00
|
|
|
*d++ = ' ';
|
2003-04-12 01:06:51 +04:00
|
|
|
else if ((0x20 <= u && u <= 0x7f) || (0xa0 <= u && u <= 0xff))
|
2003-12-29 03:38:59 +03:00
|
|
|
*d++ = u;
|
|
|
|
else {
|
|
|
|
unicode_transliterate((unsigned int) u, &d);
|
|
|
|
if (end < d) {
|
|
|
|
space += 100;
|
|
|
|
d0 = xrealloc(d0, space);
|
|
|
|
end = d0 + space - 10;
|
|
|
|
}
|
|
|
|
}
|
2003-04-12 01:06:51 +04:00
|
|
|
}
|
|
|
|
*d = 0;
|
|
|
|
|
|
|
|
return d0;
|
|
|
|
}
|
|
|
|
|
2003-10-08 01:34:39 +04:00
|
|
|
char * tolat1_pre(xmlChar * s)
|
|
|
|
{
|
|
|
|
unsigned int length = strlen((char*) s);
|
|
|
|
char *d = xcalloc(length + 1, sizeof(char));
|
|
|
|
char *d0 = d;
|
|
|
|
int u, chars;
|
|
|
|
|
|
|
|
while (*s != 0) {
|
|
|
|
chars = length;
|
|
|
|
u = xmlGetUTF8Char((unsigned char *) s, &chars);
|
|
|
|
s += chars;
|
|
|
|
length -= chars;
|
|
|
|
if (u == 0x09 || u == 0x0a || u == 0x0d ||
|
|
|
|
(0x20 <= u && u <= 0x7f) ||
|
|
|
|
(0xa0 <= u && u <= 0xff))
|
|
|
|
*d = u;
|
|
|
|
else
|
|
|
|
*d = '?';
|
|
|
|
d++;
|
|
|
|
}
|
|
|
|
*d = 0;
|
|
|
|
|
|
|
|
return d0;
|
|
|
|
}
|
|
|
|
|
2003-04-12 01:06:51 +04:00
|
|
|
char *squash_tolat1(xmlChar *s)
|
|
|
|
{
|
|
|
|
/* TODO: optimize */
|
|
|
|
char *lat1 = tolat1(s);
|
|
|
|
char *squash = squash_whitespace(lat1);
|
|
|
|
free(lat1);
|
|
|
|
return squash;
|
|
|
|
}
|
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Calculate a URL from a relative and base URL.
|
|
|
|
*
|
2003-12-28 02:49:31 +03:00
|
|
|
* base may be 0 for a new URL, in which case the URL is canonicalized and
|
2003-12-26 03:17:55 +03:00
|
|
|
* returned. Returns 0 in case of error.
|
|
|
|
*/
|
|
|
|
|
|
|
|
char *url_join(char *rel_url, char *base_url)
|
2003-04-06 01:38:06 +04:00
|
|
|
{
|
2003-12-26 03:17:55 +03:00
|
|
|
char *res;
|
|
|
|
uri_t *base = 0, *rel = 0, *abs;
|
|
|
|
|
|
|
|
LOG(("rel_url = %s, base_url = %s", rel_url, base_url));
|
|
|
|
|
2003-12-28 02:49:31 +03:00
|
|
|
#ifdef riscos
|
|
|
|
/* hacky, hacky, hacky...
|
|
|
|
* It is, however, best to do this here as it avoids
|
|
|
|
* duplicating code for clicking links and url bar handling.
|
|
|
|
* It simplifies the code it the other places too (they just
|
|
|
|
* call this as usual, then we handle it here).
|
|
|
|
*/
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_ABOUT
|
2003-12-28 02:49:31 +03:00
|
|
|
if (strcasecmp(rel_url, "about:") == 0) {
|
|
|
|
about_create();
|
|
|
|
return xstrdup(ABOUT_URL);
|
|
|
|
}
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_COOKIES
|
2003-12-28 02:49:31 +03:00
|
|
|
else if (strcasecmp(rel_url, "about:cookies") == 0) {
|
|
|
|
cookie_create();
|
|
|
|
return xstrdup(COOKIE_URL);
|
|
|
|
}
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|
|
|
|
else
|
|
|
|
#endif
|
|
|
|
if (strcasecmp(rel_url, "help:") == 0) {
|
2003-12-28 02:49:31 +03:00
|
|
|
return xstrdup(HELP_URL);
|
|
|
|
}
|
|
|
|
else if (strcasecmp(rel_url, "home:") == 0) {
|
|
|
|
return xstrdup(HOME_URL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
if (!base_url) {
|
2003-12-27 03:37:10 +03:00
|
|
|
res = uri_cannonicalize_string(rel_url,
|
|
|
|
(int)(strlen(rel_url)),
|
2003-12-26 03:17:55 +03:00
|
|
|
URI_STRING_URI_STYLE);
|
|
|
|
LOG(("res = %s", res));
|
|
|
|
if (res)
|
|
|
|
return xstrdup(res);
|
|
|
|
return 0;
|
|
|
|
}
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-12-27 03:37:10 +03:00
|
|
|
base = uri_alloc(base_url, (int)(strlen(base_url)));
|
|
|
|
rel = uri_alloc(rel_url, (int)(strlen(rel_url)));
|
2003-12-26 03:17:55 +03:00
|
|
|
if (!base || !rel)
|
|
|
|
goto fail;
|
|
|
|
if (!base->scheme)
|
|
|
|
goto fail;
|
2003-04-06 01:38:06 +04:00
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
abs = uri_abs_1(base, rel);
|
2003-12-27 03:37:10 +03:00
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
res = xstrdup(uri_uri(abs));
|
|
|
|
|
|
|
|
uri_free(base);
|
|
|
|
uri_free(rel);
|
2003-12-27 03:37:10 +03:00
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
LOG(("res = %s", res));
|
|
|
|
return res;
|
|
|
|
|
|
|
|
fail:
|
|
|
|
if (base)
|
|
|
|
uri_free(base);
|
|
|
|
if (rel)
|
|
|
|
uri_free(rel);
|
|
|
|
|
|
|
|
LOG(("error"));
|
2003-05-22 17:21:45 +04:00
|
|
|
|
2003-12-26 03:17:55 +03:00
|
|
|
return 0;
|
2003-04-06 01:38:06 +04:00
|
|
|
}
|
|
|
|
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-12-26 19:21:46 +03:00
|
|
|
/**
|
|
|
|
* Extract the host name from a url.
|
|
|
|
*
|
|
|
|
* \param url an absolute URL
|
|
|
|
* \return a new string, or 0 in case of error
|
|
|
|
*/
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-12-26 19:21:46 +03:00
|
|
|
char *get_host_from_url(char *url)
|
|
|
|
{
|
|
|
|
char *host = 0;
|
|
|
|
uri_t *uri;
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-12-27 03:37:10 +03:00
|
|
|
uri = uri_alloc(url, (int)(strlen(url)));
|
2003-12-26 19:21:46 +03:00
|
|
|
if (!uri)
|
|
|
|
return 0;
|
|
|
|
if (uri->host)
|
|
|
|
host = xstrdup(uri->host);
|
|
|
|
uri_free(uri);
|
2003-10-25 23:20:13 +04:00
|
|
|
|
2003-12-26 19:21:46 +03:00
|
|
|
return host;
|
2003-10-25 23:20:13 +04:00
|
|
|
}
|
2003-11-08 02:47:55 +03:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Check if a directory exists.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bool is_dir(const char *path)
|
|
|
|
{
|
|
|
|
struct stat s;
|
|
|
|
|
|
|
|
if (stat(path, &s))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return S_ISDIR(s.st_mode) ? true : false;
|
|
|
|
}
|
2003-12-27 23:15:23 +03:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compile a regular expression, handling errors.
|
|
|
|
*
|
|
|
|
* Parameters as for regcomp(), see man regex.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
|
|
|
|
{
|
|
|
|
char errbuf[200];
|
|
|
|
int r;
|
|
|
|
r = regcomp(preg, regex, cflags);
|
|
|
|
if (r) {
|
|
|
|
regerror(r, preg, errbuf, sizeof errbuf);
|
|
|
|
fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
|
|
|
|
die(errbuf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-12-28 05:35:46 +03:00
|
|
|
/**
|
|
|
|
* Remove expired cookies from the cookie jar.
|
|
|
|
* libcurl /really/ should do this for us.
|
|
|
|
* This gets called every time a window is closed or NetSurf is quit.
|
|
|
|
*/
|
2004-01-05 05:10:59 +03:00
|
|
|
#ifdef WITH_COOKIES
|
2003-12-28 05:35:46 +03:00
|
|
|
void clean_cookiejar(void) {
|
|
|
|
|
|
|
|
FILE *fp;
|
|
|
|
int len;
|
|
|
|
char *cookies = 0, *pos;
|
|
|
|
char domain[256], flag[10], path[256], secure[10],
|
|
|
|
exp[50], name[256], val[256];
|
|
|
|
long int expiry;
|
|
|
|
|
2003-12-30 19:47:16 +03:00
|
|
|
fp = fopen(messages_get("cookiefile"), "r");
|
2003-12-28 05:35:46 +03:00
|
|
|
if (!fp) {
|
|
|
|
LOG(("Failed to open cookie jar"));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read file length */
|
|
|
|
fseek(fp, 0, SEEK_END);
|
|
|
|
len = ftell(fp);
|
|
|
|
fseek(fp, 0, SEEK_SET);
|
|
|
|
|
|
|
|
cookies = xcalloc((unsigned int)len, sizeof(char));
|
|
|
|
fread(cookies, (unsigned int)len, sizeof(char), fp);
|
|
|
|
fclose(fp);
|
|
|
|
|
|
|
|
if (remove(messages_get("cookiejar"))) {
|
|
|
|
LOG(("Failed to remove old jar"));
|
|
|
|
xfree(cookies);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
fp = fopen(messages_get("cookiejar"), "w+");
|
|
|
|
if (!fp) {
|
|
|
|
xfree(cookies);
|
|
|
|
LOG(("Failed to create new jar"));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/* write header */
|
|
|
|
fputs("# Netscape HTTP Cookie File\n"
|
|
|
|
"# http://www.netscape.com/newsref/std/cookie_spec.html\n"
|
|
|
|
"# This file was generated by libcurl! Edit at your own risk.\n\n",
|
|
|
|
fp);
|
|
|
|
|
|
|
|
pos = cookies;
|
|
|
|
while (pos != (cookies+len-1)) {
|
|
|
|
if (*pos == '#') {
|
|
|
|
for (; *pos != '\n'; pos++);
|
|
|
|
pos += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
sscanf(pos, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", domain, flag,
|
|
|
|
path, secure, exp, name, val);
|
|
|
|
pos += (strlen(domain) + strlen(flag) + strlen(path) +
|
|
|
|
strlen(secure) + strlen(exp) + strlen(name) +
|
|
|
|
strlen(val) + 7);
|
|
|
|
sscanf(exp, "%ld", &expiry);
|
|
|
|
if (time(NULL) < expiry) { /* cookie hasn't expired */
|
|
|
|
fprintf(fp, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", domain,
|
|
|
|
flag, path, secure, exp, name, val);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fclose(fp);
|
|
|
|
|
|
|
|
xfree(cookies);
|
|
|
|
}
|
2004-01-05 05:10:59 +03:00
|
|
|
#endif
|