mirror of
https://github.com/netsurf-browser/netsurf
synced 2024-11-23 14:59:47 +03:00
Remove unused url_normalise() and don't include regex.h.
svn path=/trunk/netsurf/; revision=12971
This commit is contained in:
parent
9493cec576
commit
3fde9589c1
199
utils/url.c
199
utils/url.c
@ -28,7 +28,6 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <regex.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "curl/curl.h"
|
||||
@ -168,208 +167,12 @@ out_true:
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a URL.
|
||||
*
|
||||
* \param url an absolute URL
|
||||
* \param result pointer to pointer to buffer to hold cleaned up url. Caller
|
||||
* gets ownership of pointer to buffer value. On failure the
|
||||
* pointer to buffer value will be NULL.
|
||||
* \return URL_FUNC_OK on success
|
||||
*
|
||||
* If there is no scheme, http:// is added. The scheme and host are
|
||||
* lower-cased. Default ports are removed (http only). An empty path is
|
||||
* replaced with "/". Characters are unescaped if safe.
|
||||
*/
|
||||
|
||||
url_func_result url_normalize(const char *url, char **result)
|
||||
{
|
||||
char c;
|
||||
int m;
|
||||
size_t i;
|
||||
size_t len;
|
||||
size_t bufsize;
|
||||
char* norm;
|
||||
bool http = false;
|
||||
regmatch_t match[10];
|
||||
|
||||
*result = NULL;
|
||||
|
||||
/* skip past any leading whitespace (likely if URL was copy-pasted) */
|
||||
while (isspace(*url))
|
||||
url++;
|
||||
|
||||
/* allocate sufficiently large buffer for new URL */
|
||||
len = strlen(url);
|
||||
/* "+ 1" for the terminating NUL character. */
|
||||
bufsize = len + 1 + SLEN("http://") + SLEN("/");
|
||||
/* work out how much extra to leave for internal whitespace */
|
||||
for(i = 0; i < len; i++) {
|
||||
if(isspace(url[i])) bufsize += 2; /* ' ' -> '%20' */
|
||||
}
|
||||
if ((norm = malloc(bufsize)) == NULL) {
|
||||
LOG(("malloc failed"));
|
||||
return URL_FUNC_NOMEM;
|
||||
}
|
||||
*result = norm;
|
||||
strcpy(norm, url);
|
||||
|
||||
/* truncate trailing whitespace (significant should be uriencoded) */
|
||||
for (i = len - 1; (i > 0) && isspace(norm[i]); i--) {
|
||||
norm[i] = '\0';
|
||||
len--;
|
||||
}
|
||||
|
||||
/* encode any remaining (internal) whitespace */
|
||||
for (i = 0; i < len; i++) {
|
||||
if(isspace(norm[i])) {
|
||||
char space = norm[i];
|
||||
memmove(norm + i + 2, norm + i, 1 + len - i);
|
||||
len += 2;
|
||||
norm[ i] = '%';
|
||||
norm[++i] = digit2lowcase_hex(space >> 4);
|
||||
norm[++i] = digit2lowcase_hex(space & 0xf);
|
||||
}
|
||||
}
|
||||
|
||||
/* finally verify that it's actually an URL we're working on
|
||||
* (RFC regex too fussy to tolerate above WSP problems) */
|
||||
if (regexec(&url_re, norm, 10, match, 0)) {
|
||||
LOG(("url '%s' failed to match regex", url));
|
||||
free(norm);
|
||||
*result = NULL;
|
||||
return URL_FUNC_FAILED;
|
||||
}
|
||||
|
||||
if (match[URL_RE_SCHEME].rm_so == -1) {
|
||||
/* scheme missing: add http:// and reparse */
|
||||
memmove(norm + SLEN("http://"), norm, len + 1);
|
||||
memcpy(norm, "http://", SLEN("http://")); /* do NOT copy NUL */
|
||||
len += SLEN("http://");
|
||||
if (regexec(&url_re, norm, 10, match, 0)) {
|
||||
LOG(("url '%s' failed to match regex", norm));
|
||||
free(norm);
|
||||
*result = NULL;
|
||||
return URL_FUNC_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
/*for (unsigned int i = 0; i != 10; i++) {
|
||||
if (match[i].rm_so == -1)
|
||||
continue;
|
||||
fprintf(stderr, "%i: '%.*s'\n", i,
|
||||
match[i].rm_eo - match[i].rm_so,
|
||||
res + match[i].rm_so);
|
||||
}*/
|
||||
|
||||
/* see RFC 2616 section 3.2.3 */
|
||||
/* make scheme lower-case */
|
||||
if (match[URL_RE_SCHEME].rm_so != -1) {
|
||||
for (i = match[URL_RE_SCHEME].rm_so;
|
||||
(regoff_t) i != match[URL_RE_SCHEME].rm_eo; i++)
|
||||
norm[i] = tolower(norm[i]);
|
||||
if (match[URL_RE_SCHEME].rm_eo == 4
|
||||
&& norm[0] == 'h'
|
||||
&& norm[1] == 't'
|
||||
&& norm[2] == 't'
|
||||
&& norm[3] == 'p')
|
||||
http = true;
|
||||
}
|
||||
|
||||
/* make empty path into "/" */
|
||||
if (match[URL_RE_PATH].rm_so != -1 &&
|
||||
match[URL_RE_PATH].rm_so == match[URL_RE_PATH].rm_eo) {
|
||||
memmove(norm + match[URL_RE_PATH].rm_so + 1,
|
||||
norm + match[URL_RE_PATH].rm_so,
|
||||
len - match[URL_RE_PATH].rm_so + 1);
|
||||
norm[match[URL_RE_PATH].rm_so] = '/';
|
||||
len++;
|
||||
}
|
||||
|
||||
/* make host lower-case */
|
||||
if (match[URL_RE_AUTHORITY].rm_so != -1) {
|
||||
/* Find @ delimiting credentials from host, if any */
|
||||
for (i = match[URL_RE_AUTHORITY].rm_so;
|
||||
(regoff_t) i != match[URL_RE_AUTHORITY].rm_eo;
|
||||
i++) {
|
||||
if (norm[i] == '@') {
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* No credentials; transform entire host */
|
||||
if ((regoff_t) i == match[URL_RE_AUTHORITY].rm_eo)
|
||||
i = match[URL_RE_AUTHORITY].rm_so;
|
||||
|
||||
for (; (regoff_t) i != match[URL_RE_AUTHORITY].rm_eo; i++) {
|
||||
if (norm[i] == ':' && (i + 3) < len) {
|
||||
if (http && norm[i + 1] == '8' &&
|
||||
norm[i + 2] == '0' &&
|
||||
(regoff_t) i + 3 ==
|
||||
match[URL_RE_AUTHORITY].rm_eo) {
|
||||
memmove(norm + i,
|
||||
norm + i + 3,
|
||||
len -
|
||||
match[URL_RE_AUTHORITY].
|
||||
rm_eo);
|
||||
len -= 3;
|
||||
norm[len] = '\0';
|
||||
} else if ((regoff_t) i + 1 == match[4].rm_eo) {
|
||||
memmove(norm + i,
|
||||
norm + i + 1,
|
||||
len -
|
||||
match[URL_RE_AUTHORITY].
|
||||
rm_eo);
|
||||
len--;
|
||||
norm[len] = '\0';
|
||||
}
|
||||
break;
|
||||
}
|
||||
norm[i] = tolower(norm[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/* unescape non-"reserved" escaped characters */
|
||||
for (i = 0; i + 2 < len; i++) {
|
||||
if (norm[i] != '%')
|
||||
continue;
|
||||
c = tolower(norm[i + 1]);
|
||||
if ('0' <= c && c <= '9')
|
||||
m = 16 * (c - '0');
|
||||
else if ('a' <= c && c <= 'f')
|
||||
m = 16 * (c - 'a' + 10);
|
||||
else
|
||||
continue;
|
||||
c = tolower(norm[i + 2]);
|
||||
if ('0' <= c && c <= '9')
|
||||
m += c - '0';
|
||||
else if ('a' <= c && c <= 'f')
|
||||
m += c - 'a' + 10;
|
||||
else
|
||||
continue;
|
||||
|
||||
if (m <= 0x20 || strchr(";/?:@&=+$," "<>#%\"{}|\\^[]`", m) ||
|
||||
m >= 0x7f) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
norm[i] = m;
|
||||
memmove(norm + i + 1, norm + i + 3, len - i - 2);
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
/* norm and *result point to same memory, so just return ok */
|
||||
return URL_FUNC_OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Resolve a relative URL to absolute form.
|
||||
*
|
||||
* \param rel relative URL
|
||||
* \param base base URL, must be absolute and cleaned as by url_normalize()
|
||||
* \param base base URL, must be absolute and cleaned as by nsurl_create()
|
||||
* \param result pointer to pointer to buffer to hold absolute url
|
||||
* \return URL_FUNC_OK on success
|
||||
*/
|
||||
|
@ -45,7 +45,6 @@ struct url_components {
|
||||
|
||||
void url_init(void);
|
||||
bool url_host_is_ip_address(const char *host);
|
||||
url_func_result url_normalize(const char *url, char **result);
|
||||
url_func_result url_join(const char *rel, const char *base, char **result);
|
||||
url_func_result url_host(const char *url, char **result);
|
||||
url_func_result url_scheme(const char *url, char **result);
|
||||
|
Loading…
Reference in New Issue
Block a user