netsurf/utils/utils.c

238 lines
5.7 KiB
C
Raw Normal View History

/*
* This file is part of NetSurf, http://netsurf.sourceforge.net/
* Licensed under the GNU General Public License,
* http://www.opensource.org/licenses/gpl-license
* Copyright 2004 James Bursa <bursa@users.sourceforge.net>
* Copyright 2003 Phil Mellor <monkeyson@users.sourceforge.net>
* Copyright 2003 John M Bell <jmb202@ecs.soton.ac.uk>
* Copyright 2004 John Tytgat <John.Tytgat@aaug.net>
*/
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <regex.h>
#include <time.h>
#include "netsurf/utils/config.h"
#define NDEBUG
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/utf8.h"
#include "netsurf/utils/utils.h"
char * strip(char * const s)
{
size_t i;
for (i = strlen(s);
i != 0 && (s[i - 1] == ' ' || s[i - 1] == '\n' ||
s[i - 1] == '\r' || s[i - 1] == '\t');
i--)
;
s[i] = 0;
return s + strspn(s, " \t\r\n");
}
int whitespace(const char * str)
{
unsigned int i;
for (i = 0; i < strlen(str); i++)
if (!isspace(str[i]))
return 0;
return 1;
}
/**
* Replace consecutive whitespace with a single space.
*
* \param s source string
* \return heap allocated result, or 0 on memory exhaustion
*/
char * squash_whitespace(const char *s)
{
char *c = malloc(strlen(s) + 1);
int i = 0, j = 0;
if (!c)
return 0;
do {
if (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' ||
s[i] == '\t') {
c[j++] = ' ';
while (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' ||
s[i] == '\t')
i++;
}
c[j++] = s[i++];
} while (s[i - 1] != 0);
return c;
}
/**
* Converts NUL terminated UTF-8 encoded string s containing zero or more
* spaces (char 32) or TABs (char 9) to non-breaking spaces
* (0xC2 + 0xA0 in UTF-8 encoding).
*
* Caller needs to free() result. Returns NULL in case of error. No
* checking is done on validness of the UTF-8 input string.
*/
char *cnv_space2nbsp(const char *s)
{
const char *srcP;
char *d, *d0;
unsigned int numNBS;
/* Convert space & TAB into non breaking space character (0xA0) */
for (numNBS = 0, srcP = (const char *)s; *srcP != '\0'; ++srcP)
if (*srcP == ' ' || *srcP == '\t')
++numNBS;
if ((d = (char *)malloc((srcP - s) + numNBS + 1)) == NULL)
return NULL;
for (d0 = d, srcP = (const char *)s; *srcP != '\0'; ++srcP) {
if (*srcP == ' ' || *srcP == '\t') {
*d0++ = 0xC2;
*d0++ = 0xA0;
} else
*d0++ = *srcP;
}
*d0 = '\0';
return d;
}
/**
* Check if a directory exists.
*/
bool is_dir(const char *path)
{
struct stat s;
if (stat(path, &s))
return false;
return S_ISDIR(s.st_mode) ? true : false;
}
/**
* Compile a regular expression, handling errors.
*
* Parameters as for regcomp(), see man regex.
*/
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
{
int r;
r = regcomp(preg, regex, cflags);
if (r) {
char errbuf[200];
regerror(r, preg, errbuf, sizeof errbuf);
fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
die(errbuf);
}
}
/** We can have a fairly good estimate of how long the buffer needs to
* be. The unsigned long can store a value representing a maximum size
* of around 4 GB. Therefore the greatest space required is to
* represent 1023MB. Currently that would be represented as "1023MB" so 12
* including a null terminator.
* Ideally we would be able to know this value for sure, in the mean
* time the following should suffice.
**/
#define BYTESIZE_BUFFER_SIZE 20
/**
* Does a simple conversion which assumes the user speaks English. The buffer
* returned is one of three static ones so may change each time this call is
* made. Don't store the buffer for later use. It's done this way for
* convenience and to fight possible memory leaks, it is not necessarily pretty.
**/
char *human_friendly_bytesize(unsigned long bsize) {
static char buffer1[BYTESIZE_BUFFER_SIZE];
static char buffer2[BYTESIZE_BUFFER_SIZE];
static char buffer3[BYTESIZE_BUFFER_SIZE];
static char *curbuffer = buffer3;
float bytesize = (float)bsize;
if (curbuffer == buffer1)
curbuffer = buffer2;
else if (curbuffer == buffer2)
curbuffer = buffer3;
else
curbuffer = buffer1;
enum {bytes, kilobytes, megabytes, gigabytes} unit = bytes;
static char units[][7] = {"Bytes", "kBytes", "MBytes", "GBytes"};
if (bytesize > 1024) {
bytesize /= 1024;
unit = kilobytes;
}
if (bytesize > 1024) {
bytesize /= 1024;
unit = megabytes;
}
if (bytesize > 1024) {
bytesize /= 1024;
unit = gigabytes;
}
sprintf(curbuffer, "%3.2f%s", bytesize, messages_get(units[unit]));
return curbuffer;
}
/**
* Create an RFC 1123 compliant date string from a Unix timestamp
*
* \param t The timestamp to consider
* \return Pointer to buffer containing string - invalidated by next call.
*/
const char *rfc1123_date(time_t t)
{
static char ret[30];
struct tm *tm = gmtime(&t);
const char *days[] = { "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" },
*months[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
snprintf(ret, sizeof ret, "%s, %02d %s %d %02d:%02d:%02d GMT",
days[tm->tm_wday], tm->tm_mday, months[tm->tm_mon],
tm->tm_year + 1900, tm->tm_hour, tm->tm_min,
tm->tm_sec);
return ret;
}
/**
* Case insensitive strstr implementation
*
* \param haystack String to search in
* \param needle String to look for
* \return Pointer to start of found substring, or NULL if not found
*/
char *strcasestr(const char *haystack, const char *needle)
{
size_t needle_len = strlen(needle);
const char * last_start = haystack + (strlen(haystack) - needle_len);
while (haystack <= last_start) {
if (strncasecmp(haystack, needle, needle_len) == 0)
return (char *)haystack;
haystack++;
}
return NULL;
}