mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-11-24 15:29:45 +03:00
355799ce0b
svn path=/trunk/netsurf/; revision=9729
466 lines
12 KiB
C
466 lines
12 KiB
C
/*
|
|
* Copyright 2007 Rob Kendrick <rjek@netsurf-browser.org>
|
|
* Copyright 2004-2007 James Bursa <bursa@users.sourceforge.net>
|
|
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
|
|
* Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk>
|
|
* Copyright 2004 John Tytgat <joty@netsurf-browser.org>
|
|
*
|
|
* This file is part of NetSurf, http://www.netsurf-browser.org/
|
|
*
|
|
* NetSurf is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; version 2 of the License.
|
|
*
|
|
* NetSurf is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <sys/time.h>
|
|
#include <regex.h>
|
|
#include <time.h>
|
|
#include "utils/config.h"
|
|
#define NDEBUG
|
|
#include "utils/log.h"
|
|
#undef NDEBUG
|
|
#include "utils/messages.h"
|
|
#include "utils/utf8.h"
|
|
#include "utils/utils.h"
|
|
|
|
|
|
char * strip(char * const s)
|
|
{
|
|
size_t i;
|
|
for (i = strlen(s);
|
|
i != 0 && (s[i - 1] == ' ' || s[i - 1] == '\n' ||
|
|
s[i - 1] == '\r' || s[i - 1] == '\t');
|
|
i--)
|
|
;
|
|
s[i] = 0;
|
|
return s + strspn(s, " \t\r\n");
|
|
}
|
|
|
|
int whitespace(const char * str)
|
|
{
|
|
unsigned int i;
|
|
for (i = 0; i < strlen(str); i++)
|
|
if (!isspace(str[i]))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* returns a string without its underscores
|
|
* \param replacespace true to insert a space where there was an underscore
|
|
*/
|
|
|
|
char *remove_underscores(const char *s, bool replacespace)
|
|
{
|
|
size_t i, ii, len;
|
|
char *ret;
|
|
len = strlen(s);
|
|
ret = malloc(len + 1);
|
|
if (ret == NULL)
|
|
return NULL;
|
|
for (i = 0, ii = 0; i < len; i++) {
|
|
if (s[i] != '_')
|
|
ret[ii++] = s[i];
|
|
else if (replacespace)
|
|
ret[ii++] = ' ';
|
|
}
|
|
ret[ii] = '\0';
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Replace consecutive whitespace with a single space.
|
|
*
|
|
* \param s source string
|
|
* \return heap allocated result, or 0 on memory exhaustion
|
|
*/
|
|
|
|
char * squash_whitespace(const char *s)
|
|
{
|
|
char *c = malloc(strlen(s) + 1);
|
|
int i = 0, j = 0;
|
|
if (!c)
|
|
return 0;
|
|
do {
|
|
if (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' ||
|
|
s[i] == '\t') {
|
|
c[j++] = ' ';
|
|
while (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' ||
|
|
s[i] == '\t')
|
|
i++;
|
|
}
|
|
c[j++] = s[i++];
|
|
} while (s[i - 1] != 0);
|
|
return c;
|
|
}
|
|
|
|
|
|
/**
|
|
* Converts NUL terminated UTF-8 encoded string s containing zero or more
|
|
* spaces (char 32) or TABs (char 9) to non-breaking spaces
|
|
* (0xC2 + 0xA0 in UTF-8 encoding).
|
|
*
|
|
* Caller needs to free() result. Returns NULL in case of error. No
|
|
* checking is done on validness of the UTF-8 input string.
|
|
*/
|
|
char *cnv_space2nbsp(const char *s)
|
|
{
|
|
const char *srcP;
|
|
char *d, *d0;
|
|
unsigned int numNBS;
|
|
/* Convert space & TAB into non breaking space character (0xA0) */
|
|
for (numNBS = 0, srcP = (const char *)s; *srcP != '\0'; ++srcP)
|
|
if (*srcP == ' ' || *srcP == '\t')
|
|
++numNBS;
|
|
if ((d = (char *)malloc((srcP - s) + numNBS + 1)) == NULL)
|
|
return NULL;
|
|
for (d0 = d, srcP = (const char *)s; *srcP != '\0'; ++srcP) {
|
|
if (*srcP == ' ' || *srcP == '\t') {
|
|
*d0++ = 0xC2;
|
|
*d0++ = 0xA0;
|
|
} else
|
|
*d0++ = *srcP;
|
|
}
|
|
*d0 = '\0';
|
|
return d;
|
|
}
|
|
|
|
/**
|
|
* Check if a directory exists.
|
|
*/
|
|
|
|
bool is_dir(const char *path)
|
|
{
|
|
struct stat s;
|
|
|
|
if (stat(path, &s))
|
|
return false;
|
|
|
|
return S_ISDIR(s.st_mode) ? true : false;
|
|
}
|
|
|
|
|
|
/**
|
|
* Compile a regular expression, handling errors.
|
|
*
|
|
* Parameters as for regcomp(), see man regex.
|
|
*/
|
|
|
|
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
|
|
{
|
|
int r;
|
|
r = regcomp(preg, regex, cflags);
|
|
if (r) {
|
|
char errbuf[200];
|
|
regerror(r, preg, errbuf, sizeof errbuf);
|
|
fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
|
|
die(errbuf);
|
|
}
|
|
}
|
|
|
|
/** We can have a fairly good estimate of how long the buffer needs to
|
|
* be. The unsigned long can store a value representing a maximum size
|
|
* of around 4 GB. Therefore the greatest space required is to
|
|
* represent 1023MB. Currently that would be represented as "1023MB" so 12
|
|
* including a null terminator.
|
|
* Ideally we would be able to know this value for sure, in the mean
|
|
* time the following should suffice.
|
|
**/
|
|
|
|
#define BYTESIZE_BUFFER_SIZE 20
|
|
|
|
/**
|
|
* Does a simple conversion which assumes the user speaks English. The buffer
|
|
* returned is one of three static ones so may change each time this call is
|
|
* made. Don't store the buffer for later use. It's done this way for
|
|
* convenience and to fight possible memory leaks, it is not necessarily pretty.
|
|
**/
|
|
|
|
char *human_friendly_bytesize(unsigned long bsize) {
|
|
static char buffer1[BYTESIZE_BUFFER_SIZE];
|
|
static char buffer2[BYTESIZE_BUFFER_SIZE];
|
|
static char buffer3[BYTESIZE_BUFFER_SIZE];
|
|
static char *curbuffer = buffer3;
|
|
enum {bytes, kilobytes, megabytes, gigabytes} unit = bytes;
|
|
static char units[][7] = {"Bytes", "kBytes", "MBytes", "GBytes"};
|
|
|
|
float bytesize = (float)bsize;
|
|
|
|
if (curbuffer == buffer1)
|
|
curbuffer = buffer2;
|
|
else if (curbuffer == buffer2)
|
|
curbuffer = buffer3;
|
|
else
|
|
curbuffer = buffer1;
|
|
|
|
if (bytesize > 1024) {
|
|
bytesize /= 1024;
|
|
unit = kilobytes;
|
|
}
|
|
|
|
if (bytesize > 1024) {
|
|
bytesize /= 1024;
|
|
unit = megabytes;
|
|
}
|
|
|
|
if (bytesize > 1024) {
|
|
bytesize /= 1024;
|
|
unit = gigabytes;
|
|
}
|
|
|
|
sprintf(curbuffer, "%3.2f%s", bytesize, messages_get(units[unit]));
|
|
|
|
return curbuffer;
|
|
}
|
|
|
|
/**
|
|
* Create an RFC 1123 compliant date string from a Unix timestamp
|
|
*
|
|
* \param t The timestamp to consider
|
|
* \return Pointer to buffer containing string - invalidated by next call.
|
|
*/
|
|
const char *rfc1123_date(time_t t)
|
|
{
|
|
static char ret[30];
|
|
|
|
struct tm *tm = gmtime(&t);
|
|
const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" },
|
|
*months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
|
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
|
|
|
|
snprintf(ret, sizeof ret, "%s, %02d %s %d %02d:%02d:%02d GMT",
|
|
days[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
|
|
tm->tm_year + 1900, tm->tm_hour, tm->tm_min,
|
|
tm->tm_sec);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Returns a number of centiseconds, that increases in real time, for the
|
|
* purposes of measuring how long something takes in wall-clock terms. It uses
|
|
* gettimeofday() for this. Should the call to gettimeofday() fail, it returns
|
|
* zero.
|
|
*
|
|
* \return number of centiseconds that increases monotonically
|
|
*/
|
|
unsigned int wallclock(void)
|
|
{
|
|
struct timeval tv;
|
|
|
|
if (gettimeofday(&tv, NULL) == -1)
|
|
return 0;
|
|
|
|
return ((tv.tv_sec * 100) + (tv.tv_usec / 10000));
|
|
}
|
|
|
|
#ifndef HAVE_STRCASESTR
|
|
|
|
/**
|
|
* Case insensitive strstr implementation
|
|
*
|
|
* \param haystack String to search in
|
|
* \param needle String to look for
|
|
* \return Pointer to start of found substring, or NULL if not found
|
|
*/
|
|
char *strcasestr(const char *haystack, const char *needle)
|
|
{
|
|
size_t needle_len = strlen(needle);
|
|
const char * last_start = haystack + (strlen(haystack) - needle_len);
|
|
|
|
while (haystack <= last_start) {
|
|
if (strncasecmp(haystack, needle, needle_len) == 0)
|
|
return (char *)haystack;
|
|
haystack++;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef HAVE_STRNDUP
|
|
|
|
/**
|
|
* Duplicate up to n characters of a string.
|
|
*/
|
|
|
|
char *strndup(const char *s, size_t n)
|
|
{
|
|
size_t len;
|
|
char *s2;
|
|
|
|
for (len = 0; len != n && s[len]; len++)
|
|
continue;
|
|
|
|
s2 = malloc(len + 1);
|
|
if (!s2)
|
|
return 0;
|
|
|
|
memcpy(s2, s, len);
|
|
s2[len] = 0;
|
|
return s2;
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifndef HAVE_STRCHRNUL
|
|
|
|
/**
|
|
* Find the first occurrence of C in S or the final NUL byte.
|
|
*
|
|
* \note This implementation came from glibc 2.2.5
|
|
*/
|
|
char *strchrnul (const char *s, int c_in)
|
|
{
|
|
const unsigned char *char_ptr;
|
|
const unsigned long int *longword_ptr;
|
|
unsigned long int longword, magic_bits, charmask;
|
|
unsigned char c;
|
|
|
|
c = (unsigned char) c_in;
|
|
|
|
/* Handle the first few characters by reading one character at a time.
|
|
Do this until CHAR_PTR is aligned on a longword boundary. */
|
|
for (char_ptr = (const unsigned char *)s; ((unsigned long int) char_ptr
|
|
& (sizeof (longword) - 1)) != 0;
|
|
++char_ptr)
|
|
if (*char_ptr == c || *char_ptr == '\0')
|
|
return (void *) char_ptr;
|
|
|
|
/* All these elucidatory comments refer to 4-byte longwords,
|
|
but the theory applies equally well to 8-byte longwords. */
|
|
|
|
longword_ptr = (unsigned long int *) char_ptr;
|
|
|
|
/* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
|
|
the "holes." Note that there is a hole just to the left of
|
|
each byte, with an extra at the end:
|
|
|
|
bits: 01111110 11111110 11111110 11111111
|
|
bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
|
|
|
|
The 1-bits make sure that carries propagate to the next 0-bit.
|
|
The 0-bits provide holes for carries to fall into. */
|
|
switch (sizeof (longword))
|
|
{
|
|
case 4: magic_bits = 0x7efefeffL; break;
|
|
case 8: magic_bits = ((0x7efefefeL << 16) << 16) | 0xfefefeffL; break;
|
|
default:
|
|
abort ();
|
|
}
|
|
|
|
/* Set up a longword, each of whose bytes is C. */
|
|
charmask = c | (c << 8);
|
|
charmask |= charmask << 16;
|
|
if (sizeof (longword) > 4)
|
|
/* Do the shift in two steps to avoid a warning if long has 32 bits. */
|
|
charmask |= (charmask << 16) << 16;
|
|
if (sizeof (longword) > 8)
|
|
abort ();
|
|
|
|
/* Instead of the traditional loop which tests each character,
|
|
we will test a longword at a time. The tricky part is testing
|
|
if *any of the four* bytes in the longword in question are zero. */
|
|
for (;;)
|
|
{
|
|
/* We tentatively exit the loop if adding MAGIC_BITS to
|
|
LONGWORD fails to change any of the hole bits of LONGWORD.
|
|
|
|
1) Is this safe? Will it catch all the zero bytes?
|
|
Suppose there is a byte with all zeros. Any carry bits
|
|
propagating from its left will fall into the hole at its
|
|
least significant bit and stop. Since there will be no
|
|
carry from its most significant bit, the LSB of the
|
|
byte to the left will be unchanged, and the zero will be
|
|
detected.
|
|
|
|
2) Is this worthwhile? Will it ignore everything except
|
|
zero bytes? Suppose every byte of LONGWORD has a bit set
|
|
somewhere. There will be a carry into bit 8. If bit 8
|
|
is set, this will carry into bit 16. If bit 8 is clear,
|
|
one of bits 9-15 must be set, so there will be a carry
|
|
into bit 16. Similarly, there will be a carry into bit
|
|
24. If one of bits 24-30 is set, there will be a carry
|
|
into bit 31, so all of the hole bits will be changed.
|
|
|
|
The one misfire occurs when bits 24-30 are clear and bit
|
|
31 is set; in this case, the hole at bit 31 is not
|
|
changed. If we had access to the processor carry flag,
|
|
we could close this loophole by putting the fourth hole
|
|
at bit 32!
|
|
|
|
So it ignores everything except 128's, when they're aligned
|
|
properly.
|
|
|
|
3) But wait! Aren't we looking for C as well as zero?
|
|
Good point. So what we do is XOR LONGWORD with a longword,
|
|
each of whose bytes is C. This turns each byte that is C
|
|
into a zero. */
|
|
|
|
longword = *longword_ptr++;
|
|
|
|
/* Add MAGIC_BITS to LONGWORD. */
|
|
if ((((longword + magic_bits)
|
|
|
|
/* Set those bits that were unchanged by the addition. */
|
|
^ ~longword)
|
|
|
|
/* Look at only the hole bits. If any of the hole bits
|
|
are unchanged, most likely one of the bytes was a
|
|
zero. */
|
|
& ~magic_bits) != 0 ||
|
|
|
|
/* That caught zeroes. Now test for C. */
|
|
((((longword ^ charmask) + magic_bits) ^ ~(longword ^ charmask))
|
|
& ~magic_bits) != 0)
|
|
{
|
|
/* Which of the bytes was C or zero?
|
|
If none of them were, it was a misfire; continue the search. */
|
|
|
|
const unsigned char *cp = (const unsigned char *) (longword_ptr - 1);
|
|
|
|
if (*cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (sizeof (longword) > 4)
|
|
{
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
if (*++cp == c || *cp == '\0')
|
|
return (char *) cp;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* This should never happen. */
|
|
return NULL;
|
|
}
|
|
|
|
#endif
|