BUrl: use a regex to parse URLs
* The RFC provide a regular expression for URI parsing, so just use it. * Allows parsing URIs with missing components (no scheme or authority) * This allows to parse relative URLs as expected * Can also handle things such as data: or mailto: * Also more fixes to handling of incomplete URIs, some flags weren't always set to the right values. This gets Windows Live Mail (or is it called Outlook?) working, with some other fixes on WebKit side.
This commit is contained in:
parent
831819980e
commit
ced0e0be04
@ -16,6 +16,7 @@ public:
|
||||
BUrl(const char* url);
|
||||
BUrl(BMessage* archive);
|
||||
BUrl(const BUrl& other);
|
||||
BUrl(const BUrl& base, const BString& relative);
|
||||
BUrl();
|
||||
virtual ~BUrl();
|
||||
|
||||
@ -24,12 +25,12 @@ public:
|
||||
BUrl& SetProtocol(const BString& scheme);
|
||||
BUrl& SetUserName(const BString& user);
|
||||
BUrl& SetPassword(const BString& password);
|
||||
void SetAuthority(const BString& authority);
|
||||
BUrl& SetHost(const BString& host);
|
||||
BUrl& SetPort(int port);
|
||||
BUrl& SetPath(const BString& path);
|
||||
BUrl& SetRequest(const BString& request);
|
||||
BUrl& SetFragment(const BString& fragment);
|
||||
void Redirect(const BString& newLocation);
|
||||
|
||||
// URL fields access
|
||||
const BString& UrlString() const;
|
||||
@ -88,7 +89,6 @@ public:
|
||||
private:
|
||||
void _ResetFields();
|
||||
void _ExplodeUrlString(const BString& urlString);
|
||||
void _ExplodeAuthority();
|
||||
|
||||
static BString _DoUrlEncodeChunk(const BString& chunk,
|
||||
bool strict, bool directory = false);
|
||||
|
@ -301,7 +301,7 @@ BHttpRequest::_ProtocolLoop()
|
||||
if (fResult.StatusCode() == B_HTTP_STATUS_MOVED_PERMANENTLY) {
|
||||
BString locationUrl = fHeaders["Location"];
|
||||
|
||||
fUrl.Redirect(locationUrl);
|
||||
fUrl = BUrl(fUrl, locationUrl);
|
||||
|
||||
if (--maxRedirs > 0) {
|
||||
newRequest = true;
|
||||
|
@ -72,7 +72,7 @@ BUrl::BUrl(const BUrl& other)
|
||||
}
|
||||
|
||||
|
||||
BUrl::BUrl()
|
||||
BUrl::BUrl(const BUrl& base, const BString& location)
|
||||
:
|
||||
fUrlString(),
|
||||
fProtocol(),
|
||||
@ -83,6 +83,61 @@ BUrl::BUrl()
|
||||
fPath(),
|
||||
fRequest(),
|
||||
fHasAuthority(false)
|
||||
{
|
||||
// This implements the algorithm in RFC3986, Section 5.2.
|
||||
|
||||
BUrl relative(location);
|
||||
if(relative.HasProtocol()) {
|
||||
SetProtocol(relative.Protocol());
|
||||
SetAuthority(relative.Authority());
|
||||
SetPath(relative.Path()); // TODO _RemoveDotSegments()
|
||||
SetRequest(relative.Request());
|
||||
} else {
|
||||
if(relative.HasAuthority()) {
|
||||
SetAuthority(relative.Authority());
|
||||
SetPath(relative.Path()); // TODO _RemoveDotSegments()
|
||||
SetRequest(relative.Request());
|
||||
} else {
|
||||
if(relative.Path().IsEmpty()) {
|
||||
SetPath(base.Path());
|
||||
if(relative.HasRequest())
|
||||
SetRequest(relative.Request());
|
||||
else
|
||||
SetRequest(Request());
|
||||
} else {
|
||||
if (relative.Path()[0] == '/')
|
||||
SetPath(relative.Path());
|
||||
else {
|
||||
BString path = base.Path();
|
||||
// Remove last part of path (the file, if any) so we get the
|
||||
// "current directory"
|
||||
path.Truncate(path.FindLast('/') + 1);
|
||||
path += relative.Path();
|
||||
// TODO _RemoveDotSegments()
|
||||
SetPath(path);
|
||||
}
|
||||
SetRequest(relative.Request());
|
||||
}
|
||||
SetAuthority(base.Authority());
|
||||
}
|
||||
SetProtocol(base.Protocol());
|
||||
}
|
||||
|
||||
SetFragment(relative.Fragment());
|
||||
}
|
||||
|
||||
|
||||
BUrl::BUrl()
|
||||
:
|
||||
fUrlString(),
|
||||
fProtocol(),
|
||||
fUser(),
|
||||
fPassword(),
|
||||
fHost(),
|
||||
fPort(0),
|
||||
fPath(),
|
||||
fRequest(),
|
||||
fHasAuthority(false)
|
||||
{
|
||||
}
|
||||
|
||||
@ -95,42 +150,6 @@ BUrl::~BUrl()
|
||||
// #pragma mark URL fields modifiers
|
||||
|
||||
|
||||
void
|
||||
BUrl::Redirect(const BString& newLocation)
|
||||
{
|
||||
BString oldUrl = UrlString();
|
||||
BUrl newUrl(newLocation);
|
||||
|
||||
if(newUrl.Protocol() != "")
|
||||
{
|
||||
*this = newUrl;
|
||||
} else {
|
||||
// the new location seems to be relative to ours.
|
||||
const BString& newPath = newUrl.Path();
|
||||
|
||||
if(newPath[0] == '/') {
|
||||
// new path is absolute, just adopt it
|
||||
SetPath(newPath);
|
||||
} else {
|
||||
// Path is relative, append it to current one
|
||||
// TODO resolve '..'
|
||||
BString path = Path();
|
||||
// Remove last part of path (the file, if any) so we get the
|
||||
// "current directory"
|
||||
path.Truncate(path.FindLast('/') + 1);
|
||||
path += newPath;
|
||||
SetPath(path);
|
||||
}
|
||||
|
||||
// Also copy request and fragment from the other URL
|
||||
if(newUrl.Request() != "")
|
||||
SetRequest(newUrl.Request());
|
||||
if(newUrl.Fragment() != "")
|
||||
SetRequest(newUrl.Fragment());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BUrl&
|
||||
BUrl::SetUrlString(const BString& url)
|
||||
{
|
||||
@ -143,7 +162,7 @@ BUrl&
|
||||
BUrl::SetProtocol(const BString& protocol)
|
||||
{
|
||||
fProtocol = protocol;
|
||||
fHasProtocol = true;
|
||||
fHasProtocol = !fProtocol.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
return *this;
|
||||
}
|
||||
@ -153,7 +172,7 @@ BUrl&
|
||||
BUrl::SetUserName(const BString& user)
|
||||
{
|
||||
fUser = user;
|
||||
fHasUserName = true;
|
||||
fHasUserName = !fUser.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
fAuthorityValid = false;
|
||||
fUserInfoValid = false;
|
||||
@ -165,7 +184,7 @@ BUrl&
|
||||
BUrl::SetPassword(const BString& password)
|
||||
{
|
||||
fPassword = password;
|
||||
fHasPassword = true;
|
||||
fHasPassword = !fPassword.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
fAuthorityValid = false;
|
||||
fUserInfoValid = false;
|
||||
@ -177,7 +196,7 @@ BUrl&
|
||||
BUrl::SetHost(const BString& host)
|
||||
{
|
||||
fHost = host;
|
||||
fHasHost = true;
|
||||
fHasHost = !fHost.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
fAuthorityValid = false;
|
||||
return *this;
|
||||
@ -188,7 +207,7 @@ BUrl&
|
||||
BUrl::SetPort(int port)
|
||||
{
|
||||
fPort = port;
|
||||
fHasPort = true;
|
||||
fHasPort = (port != 0);
|
||||
fUrlStringValid = false;
|
||||
fAuthorityValid = false;
|
||||
return *this;
|
||||
@ -199,7 +218,7 @@ BUrl&
|
||||
BUrl::SetPath(const BString& path)
|
||||
{
|
||||
fPath = path;
|
||||
fHasPath = true;
|
||||
fHasPath = true; // RFC says an empty path is still a path
|
||||
fUrlStringValid = false;
|
||||
return *this;
|
||||
}
|
||||
@ -209,7 +228,7 @@ BUrl&
|
||||
BUrl::SetRequest(const BString& request)
|
||||
{
|
||||
fRequest = request;
|
||||
fHasRequest = true;
|
||||
fHasRequest = !fRequest.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
return *this;
|
||||
}
|
||||
@ -219,7 +238,7 @@ BUrl&
|
||||
BUrl::SetFragment(const BString& fragment)
|
||||
{
|
||||
fFragment = fragment;
|
||||
fHasFragment = true;
|
||||
fHasFragment = !fFragment.IsEmpty();
|
||||
fUrlStringValid = false;
|
||||
return *this;
|
||||
}
|
||||
@ -637,7 +656,7 @@ BUrl::_ExplodeUrlString(const BString& url)
|
||||
// Authority (including user credentials, host, and port
|
||||
url.CopyInto(fAuthority, match.GroupStartOffsetAt(3),
|
||||
match.GroupEndOffsetAt(3) - match.GroupStartOffsetAt(3));
|
||||
_ExplodeAuthority();
|
||||
SetAuthority(fAuthority);
|
||||
|
||||
// Path
|
||||
url.CopyInto(fPath, match.GroupStartOffsetAt(4),
|
||||
@ -660,13 +679,18 @@ BUrl::_ExplodeUrlString(const BString& url)
|
||||
|
||||
|
||||
void
|
||||
BUrl::_ExplodeAuthority()
|
||||
BUrl::SetAuthority(const BString& authority)
|
||||
{
|
||||
fAuthority = authority;
|
||||
|
||||
fHasPort = false;
|
||||
fHasUserInfo = false;
|
||||
fHasHost = false;
|
||||
|
||||
if(fAuthority.IsEmpty())
|
||||
return;
|
||||
|
||||
fHasAuthority = true;
|
||||
|
||||
int32 userInfoEnd = fAuthority.FindFirst('@');
|
||||
|
||||
// URL contains userinfo field
|
||||
@ -677,18 +701,15 @@ BUrl::_ExplodeAuthority()
|
||||
int16 colonDelimiter = userInfo.FindFirst(':', 0);
|
||||
|
||||
if (colonDelimiter == 0) {
|
||||
fHasPassword = true;
|
||||
fPassword = userInfo;
|
||||
SetPassword(userInfo);
|
||||
} else if (colonDelimiter != -1) {
|
||||
fHasUserName = true;
|
||||
fHasPassword = true;
|
||||
|
||||
userInfo.CopyInto(fUser, 0, colonDelimiter);
|
||||
userInfo.CopyInto(fPassword, colonDelimiter + 1,
|
||||
userInfo.Length() - colonDelimiter);
|
||||
SetUserName(fUser);
|
||||
SetPassword(fPassword);
|
||||
} else {
|
||||
fHasUserName = true;
|
||||
fUser = userInfo;
|
||||
SetUserName(fUser);
|
||||
}
|
||||
|
||||
fHasUserInfo = true;
|
||||
@ -709,7 +730,7 @@ BUrl::_ExplodeAuthority()
|
||||
// defined, but in some weird cases, it's not.
|
||||
if (hostEnd != userInfoEnd) {
|
||||
fAuthority.CopyInto(fHost, userInfoEnd, hostEnd - userInfoEnd);
|
||||
fHasHost = true;
|
||||
SetHost(fHost);
|
||||
}
|
||||
|
||||
// Extract the port part
|
||||
@ -788,7 +809,7 @@ BUrl::_IsProtocolValid()
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
return fProtocol.Length() > 0;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user