Remove unused url_normalise() and don't include regex.h.

svn path=/trunk/netsurf/; revision=12971
This commit is contained in:
Michael Drake 2011-10-06 12:38:47 +00:00
parent 9493cec576
commit 3fde9589c1
2 changed files with 1 additions and 199 deletions

View File

@ -28,7 +28,6 @@
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include <unistd.h>
#include "curl/curl.h"
@ -168,208 +167,12 @@ out_true:
return true;
}
/**
* Normalize a URL.
*
* \param url an absolute URL
* \param result pointer to pointer to buffer to hold cleaned up url. Caller
* gets ownership of pointer to buffer value. On failure the
* pointer to buffer value will be NULL.
* \return URL_FUNC_OK on success
*
* If there is no scheme, http:// is added. The scheme and host are
* lower-cased. Default ports are removed (http only). An empty path is
* replaced with "/". Characters are unescaped if safe.
*/
url_func_result url_normalize(const char *url, char **result)
{
char c;
int m;
size_t i;
size_t len;
size_t bufsize;
char* norm;
bool http = false;
regmatch_t match[10];
*result = NULL;
/* skip past any leading whitespace (likely if URL was copy-pasted) */
while (isspace(*url))
url++;
/* allocate sufficiently large buffer for new URL */
len = strlen(url);
/* "+ 1" for the terminating NUL character. */
bufsize = len + 1 + SLEN("http://") + SLEN("/");
/* work out how much extra to leave for internal whitespace */
for(i = 0; i < len; i++) {
if(isspace(url[i])) bufsize += 2; /* ' ' -> '%20' */
}
if ((norm = malloc(bufsize)) == NULL) {
LOG(("malloc failed"));
return URL_FUNC_NOMEM;
}
*result = norm;
strcpy(norm, url);
/* truncate trailing whitespace (significant should be uriencoded) */
for (i = len - 1; (i > 0) && isspace(norm[i]); i--) {
norm[i] = '\0';
len--;
}
/* encode any remaining (internal) whitespace */
for (i = 0; i < len; i++) {
if(isspace(norm[i])) {
char space = norm[i];
memmove(norm + i + 2, norm + i, 1 + len - i);
len += 2;
norm[ i] = '%';
norm[++i] = digit2lowcase_hex(space >> 4);
norm[++i] = digit2lowcase_hex(space & 0xf);
}
}
/* finally verify that it's actually an URL we're working on
* (RFC regex too fussy to tolerate above WSP problems) */
if (regexec(&url_re, norm, 10, match, 0)) {
LOG(("url '%s' failed to match regex", url));
free(norm);
*result = NULL;
return URL_FUNC_FAILED;
}
if (match[URL_RE_SCHEME].rm_so == -1) {
/* scheme missing: add http:// and reparse */
memmove(norm + SLEN("http://"), norm, len + 1);
memcpy(norm, "http://", SLEN("http://")); /* do NOT copy NUL */
len += SLEN("http://");
if (regexec(&url_re, norm, 10, match, 0)) {
LOG(("url '%s' failed to match regex", norm));
free(norm);
*result = NULL;
return URL_FUNC_FAILED;
}
}
/*for (unsigned int i = 0; i != 10; i++) {
if (match[i].rm_so == -1)
continue;
fprintf(stderr, "%i: '%.*s'\n", i,
match[i].rm_eo - match[i].rm_so,
res + match[i].rm_so);
}*/
/* see RFC 2616 section 3.2.3 */
/* make scheme lower-case */
if (match[URL_RE_SCHEME].rm_so != -1) {
for (i = match[URL_RE_SCHEME].rm_so;
(regoff_t) i != match[URL_RE_SCHEME].rm_eo; i++)
norm[i] = tolower(norm[i]);
if (match[URL_RE_SCHEME].rm_eo == 4
&& norm[0] == 'h'
&& norm[1] == 't'
&& norm[2] == 't'
&& norm[3] == 'p')
http = true;
}
/* make empty path into "/" */
if (match[URL_RE_PATH].rm_so != -1 &&
match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) {
memmove(norm + match[URL_RE_PATH].rm_so + 1,
norm + match[URL_RE_PATH].rm_so,
len - match[URL_RE_PATH].rm_so + 1);
norm[match[URL_RE_PATH].rm_so] = '/';
len++;
}
/* make host lower-case */
if (match[URL_RE_AUTHORITY].rm_so != -1) {
/* Find @ delimiting credentials from host, if any */
for (i = match[URL_RE_AUTHORITY].rm_so;
(regoff_t) i != match[URL_RE_AUTHORITY].rm_eo;
i++) {
if (norm[i] == '@') {
i++;
break;
}
}
/* No credentials; transform entire host */
if ((regoff_t) i == match[URL_RE_AUTHORITY].rm_eo)
i = match[URL_RE_AUTHORITY].rm_so;
for (; (regoff_t) i != match[URL_RE_AUTHORITY].rm_eo; i++) {
if (norm[i] == ':' && (i + 3) < len) {
if (http && norm[i + 1] == '8' &&
norm[i + 2] == '0' &&
(regoff_t) i + 3 ==
match[URL_RE_AUTHORITY].rm_eo) {
memmove(norm + i,
norm + i + 3,
len -
match[URL_RE_AUTHORITY].
rm_eo);
len -= 3;
norm[len] = '\0';
} else if ((regoff_t) i + 1 == match[4].rm_eo) {
memmove(norm + i,
norm + i + 1,
len -
match[URL_RE_AUTHORITY].
rm_eo);
len--;
norm[len] = '\0';
}
break;
}
norm[i] = tolower(norm[i]);
}
}
/* unescape non-"reserved" escaped characters */
for (i = 0; i + 2 < len; i++) {
if (norm[i] != '%')
continue;
c = tolower(norm[i + 1]);
if ('0' <= c && c <= '9')
m = 16 * (c - '0');
else if ('a' <= c && c <= 'f')
m = 16 * (c - 'a' + 10);
else
continue;
c = tolower(norm[i + 2]);
if ('0' <= c && c <= '9')
m += c - '0';
else if ('a' <= c && c <= 'f')
m += c - 'a' + 10;
else
continue;
if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) ||
m >= 0x7f) {
i += 2;
continue;
}
norm[i] = m;
memmove(norm + i + 1, norm + i + 3, len - i - 2);
len -= 2;
}
/* norm and *result point to same memory, so just return ok */
return URL_FUNC_OK;
}
/**
* Resolve a relative URL to absolute form.
*
* \param rel relative URL
* \param base base URL, must be absolute and cleaned as by url_normalize()
* \param base base URL, must be absolute and cleaned as by nsurl_create()
* \param result pointer to pointer to buffer to hold absolute url
* \return URL_FUNC_OK on success
*/

View File

@ -45,7 +45,6 @@ struct url_components {
void url_init(void);
bool url_host_is_ip_address(const char *host);
url_func_result url_normalize(const char *url, char **result);
url_func_result url_join(const char *rel, const char *base, char **result);
url_func_result url_host(const char *url, char **result);
url_func_result url_scheme(const char *url, char **result);