Support : Validation for Host Portion of URL

Partly Fixes #14377

Change-Id: Ia611d3653d2c16c6dcdc48ce57bd61bb6e6db366
Reviewed-on: https://review.haiku-os.org/476
Reviewed-by: Adrien Destugues <pulkomandy@pulkomandy.tk>
This commit is contained in:
Andrew Lindesay 2018-08-22 07:41:52 +02:00 committed by waddlesplash
parent df8b692ac0
commit 8f30879b98
4 changed files with 150 additions and 56 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright 2010-2016 Haiku Inc. All rights reserved.
* Copyright 2010-2018 Haiku Inc. All rights reserved.
* Distributed under the terms of the MIT License.
*/
#ifndef _B_URL_H_
@ -113,10 +113,18 @@ private:
static BString _DoUrlDecodeChunk(const BString& chunk,
bool strict);
bool _IsProtocolValid();
bool _IsHostValid() const;
bool _IsHostIPV6Valid(size_t offset,
int32 length) const;
bool _IsProtocolValid() const;
static bool _IsUnreserved(char c);
static bool _IsGenDelim(char c);
static bool _IsSubDelim(char c);
static bool _IsIPV6Char(char c);
static bool _IsUsernameChar(char c);
static bool _IsPasswordChar(char c);
static bool _IsHostChar(char c);
static bool _IsPortChar(char c);
BString _UrlMimeType() const;

View File

@ -1,5 +1,5 @@
/*
* Copyright 2010-2016 Haiku Inc. All rights reserved.
* Copyright 2010-2018 Haiku Inc. All rights reserved.
* Distributed under the terms of the MIT License.
*
* Authors:
@ -497,11 +497,19 @@ BUrl::IsValid() const
if (!fHasProtocol)
return false;
if (!_IsProtocolValid())
return false;
// it is possible that there can be an authority but no host.
// wierd://tea:tree@/x
if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid())
return false;
if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
return fHasHost && !fHost.IsEmpty();
return HasHost() && !fHost.IsEmpty();
}
if (fProtocol == "file")
@ -1124,40 +1132,6 @@ enum authority_parse_state {
AUTHORITY_COMPLETE
};
static bool
authority_is_username_char(char c)
{
return !(c == ':' || c == '@');
}
static bool
authority_is_password_char(char c)
{
return !(c == '@');
}
static bool
authority_is_ipv6_host_char(char c) {
return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
|| (c >= '0' && c <= '9') || c == ':';
}
static bool
authority_is_host_char(char c) {
return !(c == ':' || c == '/');
}
static bool
authority_is_port_char(char c) {
return c >= '0' && c <= '9';
}
void
BUrl::SetAuthority(const BString& authority)
{
@ -1185,8 +1159,7 @@ BUrl::SetAuthority(const BString& authority)
{
if (hasUsernamePassword) {
int32 end_username = char_offset_until_fn_false(
authority_c, length, offset,
authority_is_username_char);
authority_c, length, offset, _IsUsernameChar);
SetUserName(BString(&authority_c[offset],
end_username - offset));
@ -1204,8 +1177,7 @@ BUrl::SetAuthority(const BString& authority)
if (hasUsernamePassword && ':' == authority[offset]) {
offset++; // move past the delimiter
int32 end_password = char_offset_until_fn_false(
authority_c, length, offset,
authority_is_password_char);
authority_c, length, offset, _IsPasswordChar);
SetPassword(BString(&authority_c[offset],
end_password - offset));
@ -1232,8 +1204,7 @@ BUrl::SetAuthority(const BString& authority)
if (authority_c[offset] == '[') {
int32 end_ipv6_host = char_offset_until_fn_false(
authority_c, length, offset + 1,
authority_is_ipv6_host_char);
authority_c, length, offset + 1, _IsIPV6Char);
if (authority_c[end_ipv6_host] == ']') {
SetHost(BString(&authority_c[offset],
@ -1247,7 +1218,7 @@ BUrl::SetAuthority(const BString& authority)
if (AUTHORITY_HOST == state) {
int32 end_host = char_offset_until_fn_false(
authority_c, length, offset, authority_is_host_char);
authority_c, length, offset, _IsHostChar);
SetHost(BString(&authority_c[offset], end_host - offset));
state = AUTHORITY_PORT;
@ -1262,7 +1233,7 @@ BUrl::SetAuthority(const BString& authority)
if (authority_c[offset] == ':') {
offset++;
int32 end_port = char_offset_until_fn_false(
authority_c, length, offset, authority_is_port_char);
authority_c, length, offset, _IsPortChar);
SetPort(atoi(&authority_c[offset]));
offset = end_port;
}
@ -1345,7 +1316,50 @@ BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
bool
BUrl::_IsProtocolValid()
BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const
{
for (int32 i = 0; i < length; i++) {
char c = fHost[offset + i];
if (!_IsIPV6Char(c))
return false;
}
return length > 0;
}
bool
BUrl::_IsHostValid() const
{
if (fHost.StartsWith("[") && fHost.EndsWith("]"))
return _IsHostIPV6Valid(1, fHost.Length() - 2);
bool lastWasDot = false;
for (int32 i = 0; i < fHost.Length(); i++) {
char c = fHost[i];
if (c == '.') {
if (lastWasDot || i == 0)
return false;
lastWasDot = true;
} else {
lastWasDot = false;
}
if (!_IsHostChar(c) && c != '.') {
// the underscore is technically not allowed, but occurs sometimes
// in the wild.
return false;
}
}
return true;
}
bool
BUrl::_IsProtocolValid() const
{
for (int8 index = 0; index < fProtocol.Length(); index++) {
char c = fProtocol[index];
@ -1356,7 +1370,7 @@ BUrl::_IsProtocolValid()
return false;
}
return fProtocol.Length() > 0;
return !fProtocol.IsEmpty();
}
@ -1384,6 +1398,42 @@ BUrl::_IsSubDelim(char c)
}
bool
BUrl::_IsUsernameChar(char c)
{
return !(c == ':' || c == '@');
}
bool
BUrl::_IsPasswordChar(char c)
{
return !(c == '@');
}
bool
BUrl::_IsHostChar(char c)
{
return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.'
|| c == '%';
}
bool
BUrl::_IsPortChar(char c)
{
return isdigit(c);
}
bool
BUrl::_IsIPV6Char(char c)
{
return c == ':' || isxdigit(c);
}
BString
BUrl::_UrlMimeType() const
{

View File

@ -1,5 +1,5 @@
/*
* Copyright 2016, Andrew Lindesay, apl@lindesay.co.nz.
* Copyright 2016-2018, Andrew Lindesay, apl@lindesay.co.nz.
* Distributed under the terms of the MIT License.
*/
@ -34,7 +34,7 @@ NetworkUrlTest::tearDown()
}
// General Tests ---------------------------------------------------------------
// #pragma mark - General Tests
/*
This is the "happy days" tests that checks that a URL featuring all of the
@ -103,13 +103,13 @@ void NetworkUrlTest::TestHostWithFragment()
void NetworkUrlTest::TestIpv6HostPortPathAndRequest()
{
BUrl url("http://[123:123:0:123::123]:8080/some/path?key1=value1");
BUrl url("http://[123:a3:0:E3::123]:8080/some/path?key1=value1");
CPPUNIT_ASSERT(url.IsValid());
CPPUNIT_ASSERT(url.Protocol() == "http");
CPPUNIT_ASSERT(url.HasProtocol());
CPPUNIT_ASSERT(!url.HasUserName());
CPPUNIT_ASSERT(!url.HasPassword());
CPPUNIT_ASSERT(url.Host() == "[123:123:0:123::123]");
CPPUNIT_ASSERT(url.Host() == "[123:a3:0:E3::123]");
CPPUNIT_ASSERT(url.HasHost());
CPPUNIT_ASSERT(url.Port() == 8080);
CPPUNIT_ASSERT(url.HasPort());
@ -154,7 +154,7 @@ void NetworkUrlTest::TestDataUrl()
}
// Authority Tests (UserName, Password, Host, Port) ----------------------------
// #pragma mark - Authority Tests (UserName, Password, Host, Port)
void NetworkUrlTest::TestWithUserNameAndPasswordNoHostAndPort()
@ -299,7 +299,7 @@ void NetworkUrlTest::TestMailTo()
}
// Various Authority Checks ----------------------------------------------------
// #pragma mark - Various Authority Checks
void NetworkUrlTest::TestAuthorityNoUserName()
@ -353,7 +353,7 @@ void NetworkUrlTest::TestAuthorityBadPort()
}
// Invalid Forms ---------------------------------------------------------------
// #pragma mark - Invalid Forms
void NetworkUrlTest::TestWhitespaceBefore()
@ -391,7 +391,40 @@ void NetworkUrlTest::TestEmpty()
}
// Control ---------------------------------------------------------------------
// #pragma mark - Host validation
void NetworkUrlTest::TestBadHosts()
{
CPPUNIT_ASSERT_MESSAGE("control check",
BUrl("http://host.example.com").IsValid());
CPPUNIT_ASSERT_MESSAGE("hyphen in middle",
(BUrl("http://host.peppermint_tea.com").IsValid()));
CPPUNIT_ASSERT_MESSAGE("dot at end",
(BUrl("http://host.camomile.co.nz.").IsValid()));
CPPUNIT_ASSERT_MESSAGE("simple host",
(BUrl("http://tumeric").IsValid()));
CPPUNIT_ASSERT_MESSAGE("idn domain encoded",
(BUrl("http://xn--bcher-kva.tld").IsValid()));
CPPUNIT_ASSERT_MESSAGE("idn domain unencoded",
(BUrl("http://www.b\xc3\xbcch.at").IsValid()));
CPPUNIT_ASSERT_MESSAGE("dot at start",
!(BUrl("http://.host.example.com").IsValid()));
CPPUNIT_ASSERT_MESSAGE("double dot in domain",
!(BUrl("http://host.example..com").IsValid()));
CPPUNIT_ASSERT_MESSAGE("double dot",
!(BUrl("http://host.example..com").IsValid()));
CPPUNIT_ASSERT_MESSAGE("unexpected characters",
!(BUrl("http://<unexpected.characters>").IsValid()));
CPPUNIT_ASSERT_MESSAGE("whitespace",
!(BUrl("http://host.exa ple.com").IsValid()));
}
// #pragma mark - Control
/*static*/ void
@ -467,6 +500,8 @@ NetworkUrlTest::AddTests(BTestSuite& parent)
&NetworkUrlTest::TestFileUrl));
suite.addTest(new CppUnit::TestCaller<NetworkUrlTest>(
"NetworkUrlTest::TestValidFullUrl", &NetworkUrlTest::TestValidFullUrl));
suite.addTest(new CppUnit::TestCaller<NetworkUrlTest>(
"NetworkUrlTest::TestBadHosts", &NetworkUrlTest::TestBadHosts));
parent.addTest("NetworkUrlTest", &suite);
}

View File

@ -45,6 +45,7 @@ public:
void TestWhitespaceMiddle();
void TestHttpNoHost();
void TestEmpty();
void TestBadHosts();
static void AddTests(BTestSuite& suite);