NetServices: Rework parsing of content-length and move to HttpParser

This fixes a bug in the HttpAuthTest, and in general, moves responsibility of determining
the shape and size of the content to the right place.

Still to do is to fix the case where there really is a variable length content. This will
now probably error out as a connection closed error.

Change-Id: I13c20028e834cc9af8c7bc1d7d7613bf60838e64
This commit is contained in:
Niels Sascha Reedijk 2022-08-08 07:58:45 +01:00
parent 9cb56a4881
commit 7b1d966cf2
7 changed files with 173 additions and 128 deletions

View File

@ -605,6 +605,19 @@ namespace Network {
*/
/*!
\fn size_t BHttpFields::CountFields(const std::string_view &name) const noexcept
\brief Cound the number of fields that have this \a name.
\param name The name of the field you are looking for. Name matching will be done case
insensitively.
\return The number of field with the \a name.
\since Haiku R1
*/
//! @}

View File

@ -115,6 +115,7 @@ public:
// Querying
ConstIterator FindField(const std::string_view& name) const noexcept;
size_t CountFields() const noexcept;
size_t CountFields(const std::string_view& name) const noexcept;
// Range-based iteration
ConstIterator begin() const noexcept;

View File

@ -461,6 +461,18 @@ BHttpFields::CountFields() const noexcept
}
size_t
BHttpFields::CountFields(const std::string_view& name) const noexcept
{
size_t count = 0;
for (auto it = fFields.cbegin(); it != fFields.cend(); it++) {
if ((*it).Name() == name)
count += 1;
}
return count;
}
BHttpFields::ConstIterator
BHttpFields::begin() const noexcept
{

View File

@ -15,6 +15,7 @@
#include <NetServicesDefs.h>
#include <ZlibCompressionAlgorithm.h>
using namespace std::literals;
using namespace BPrivate::Network;
@ -52,6 +53,7 @@ HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status)
}
status.text = std::move(statusLine.value());
fStatus.code = status.code; // cache the status code
return true;
}
@ -62,6 +64,10 @@ HttpParser::ParseStatus(HttpBuffer& buffer, BHttpStatus& status)
The fields are parsed incrementally, meaning that even if the full header is not yet in the
\a buffer, it will still parse all complete fields and store them in the \a fields.
After all fields have been parsed, it will determine the properties of the request body.
This means it will determine whether there is any content compression, if there is a body,
and if so if it has a fixed size or not.
\retval true All fields were succesfully parsed
\retval false There is not enough data in the buffer to complete parsing of fields.
@ -78,13 +84,84 @@ HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields)
fieldLine = buffer.GetNextLine();
}
if (fieldLine && fieldLine.value().IsEmpty()){
// end of the header section of the message
return true;
} else {
if (!fieldLine || (fieldLine && !fieldLine.value().IsEmpty())){
// there is more to parse
return false;
}
// Determine the properties for the body
// RFC 7230 section 3.3.3 has a prioritized list of 7 rules around determining the body:
if (fBodyType == HttpBodyType::NoContent
|| fStatus.StatusCode() == BHttpStatusCode::NoContent
|| fStatus.StatusCode() == BHttpStatusCode::NotModified) {
// [1] In case of HEAD (set previously), status codes 1xx (TODO!), status code 204 or 304, no content
// [2] NOT SUPPORTED: when doing a CONNECT request, no content
fBodyType = HttpBodyType::NoContent;
} else if (auto header = fields.FindField("Transfer-Encoding"sv);
header != fields.end() && header->Value() == "chunked"sv) {
// [3] If there is a Transfer-Encoding heading set to 'chunked'
// TODO: support the more advanced rules in the RFC around the meaning of this field
fBodyType = HttpBodyType::Chunked;
} else if (fields.CountFields("Content-Length"sv) > 0) {
// [4] When there is no Transfer-Encoding, then look for Content-Encoding:
// - If there are more than one, the values must match
// - The value must be a valid number
// [5] If there is a valid value, then that is the expected size of the body
try {
auto contentLength = std::string();
for (const auto& field: fields) {
if (field.Name() == "Content-Length"sv) {
if (contentLength.size() == 0)
contentLength = field.Value();
else if (contentLength != field.Value()) {
throw BNetworkRequestError(__PRETTY_FUNCTION__,
BNetworkRequestError::ProtocolError,
"Multiple Content-Length fields with differing values");
}
}
}
auto bodyBytesTotal = std::stol(contentLength);
if (bodyBytesTotal == 0)
fBodyType = HttpBodyType::NoContent;
else {
fBodyType = HttpBodyType::FixedSize;
fBodyBytesTotal = bodyBytesTotal;
}
} catch (const std::logic_error& e) {
throw BNetworkRequestError(__PRETTY_FUNCTION__,
BNetworkRequestError::ProtocolError,
"Cannot parse Content-Length field value (logic_error)");
}
} else {
// [6] Applies to request messages only (this is a response)
// [7] If nothing else then the received message is all data until connection close
// (this is the default)
}
// Content-Encoding
auto header = fields.FindField("Content-Encoding"sv);
if (header != fields.end()
&& (header->Value() == "gzip" || header->Value() == "deflate"))
{
_SetGzipCompression();
}
return true;
}
/*!
\brief Parse the body from the \a buffer and use \a writeToBody function to save.
*/
size_t
HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody)
{
if (fBodyType == HttpBodyType::NoContent) {
return 0;
} else if (fBodyType == HttpBodyType::Chunked) {
return _ParseBodyChunked(buffer, writeToBody);
} else {
return _ParseBodyRaw(buffer, writeToBody);
}
}
@ -94,52 +171,21 @@ HttpParser::ParseFields(HttpBuffer& buffer, BHttpFields& fields)
\exception std::bad_alloc in case there is an error allocating memory.
*/
void
HttpParser::SetGzipCompression(bool compression)
HttpParser::_SetGzipCompression()
{
if (compression) {
fDecompressorStorage = std::make_unique<BMallocIO>();
fDecompressorStorage = std::make_unique<BMallocIO>();
BDataIO* stream = nullptr;
auto result = BZlibCompressionAlgorithm()
.CreateDecompressingOutputStream(fDecompressorStorage.get(), nullptr, stream);
if (result != B_OK) {
throw BNetworkRequestError(
"BZlibCompressionAlgorithm().CreateCompressingOutputStream",
BNetworkRequestError::SystemError, result);
}
BDataIO* stream = nullptr;
auto result = BZlibCompressionAlgorithm()
.CreateDecompressingOutputStream(fDecompressorStorage.get(), nullptr, stream);
fDecompressingStream = std::unique_ptr<BDataIO>(stream);
} else {
fDecompressingStream = nullptr;
fDecompressorStorage = nullptr;
if (result != B_OK) {
throw BNetworkRequestError(
"BZlibCompressionAlgorithm().CreateCompressingOutputStream",
BNetworkRequestError::SystemError, result);
}
}
/*!
\brief Set the content length of the body.
If a content length is set, the body will not be handled as a chunked transfer.
*/
void
HttpParser::SetContentLength(std::optional<off_t> contentLength) noexcept
{
fBodyBytesTotal = contentLength;
}
/*!
\brief Parse the body from the \a buffer and use \a writeToBody function to save.
*/
size_t
HttpParser::ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody)
{
if (fBodyBytesTotal.has_value()) {
return _ParseBodyRaw(buffer, writeToBody);
} else {
return _ParseBodyChunked(buffer, writeToBody);
}
fDecompressingStream = std::unique_ptr<BDataIO>(stream);
}
@ -270,10 +316,14 @@ HttpParser::_ParseBodyChunked(HttpBuffer& buffer, HttpTransferFunction writeToBo
bool
HttpParser::Complete() const noexcept
{
if (_IsChunked())
if (fBodyType == HttpBodyType::Chunked)
return fBodyState == HttpBodyInputStreamState::Done;
return fBodyBytesTotal.value() == fTransferredBodySize;
else if (fBodyType == HttpBodyType::FixedSize)
return fBodyBytesTotal.value() == fTransferredBodySize;
else if (fBodyType == HttpBodyType::NoContent)
return true;
else
return false;
}
@ -333,13 +383,3 @@ HttpParser::_ReadChunk(HttpBuffer& buffer, HttpTransferFunction writeToBody, siz
}
return size;
}
/*!
\brief Internal helper to determine if the body is sent as a chunked transfer.
*/
bool
HttpParser::_IsChunked() const noexcept
{
return fBodyBytesTotal == std::nullopt;
}

View File

@ -32,32 +32,48 @@ enum class HttpBodyInputStreamState {
};
enum class HttpBodyType {
NoContent,
Chunked,
FixedSize,
VariableSize
};
class HttpParser {
public:
HttpParser() {};
// Explicitly mark request as having no content
void SetNoContent() { fBodyType = HttpBodyType::NoContent; };
// HTTP Header
bool ParseStatus(HttpBuffer& buffer, BHttpStatus& status);
bool ParseFields(HttpBuffer& buffer, BHttpFields& fields);
// HTTP Body
void SetGzipCompression(bool compression = true);
void SetContentLength(std::optional<off_t> contentLength) noexcept;
size_t ParseBody(HttpBuffer& buffer, HttpTransferFunction writeToBody);
void SetConnectionClosed();
// Details on the body status
bool HasContent() const noexcept { return fBodyType != HttpBodyType::NoContent; };
std::optional<off_t> BodyBytesTotal() const noexcept { return fBodyBytesTotal; };
off_t BodyBytesTransferred() const noexcept { return fTransferredBodySize; };
bool Complete() const noexcept;
private:
void _SetGzipCompression();
size_t _ParseBodyRaw(HttpBuffer& buffer, HttpTransferFunction writeToBody);
size_t _ParseBodyChunked(HttpBuffer& buffer, HttpTransferFunction writeToBody);
size_t _ReadChunk(HttpBuffer& buffer, HttpTransferFunction writeToBody,
size_t maxSize, bool flush);
bool _IsChunked() const noexcept;
private:
off_t fHeaderBytes = 0;
BHttpStatus fStatus;
// Body type
HttpBodyType fBodyType = HttpBodyType::VariableSize;
// Support for chunked transfers
HttpBodyInputStreamState fBodyState = HttpBodyInputStreamState::ChunkSize;

View File

@ -37,6 +37,7 @@
#include "HttpResultPrivate.h"
#include "NetServicesPrivate.h"
using namespace std::literals;
using namespace BPrivate::Network;
@ -122,11 +123,11 @@ private:
HttpParser fParser;
// Receive state
BHttpStatus fStatus;
BHttpFields fFields;
bool fNoContent = false;
// Redirection
std::optional<BHttpStatus> fRedirectStatus;
bool fMightRedirect = false;
int8 fRemainingRedirects;
// Connection counter
@ -692,6 +693,10 @@ BHttpSession::Request::Request(BHttpRequest&& request, std::unique_ptr<BDataIO>
// create shared data
fResult = std::make_shared<HttpResultPrivate>(identifier);
fResult->ownedBody = std::move(target);
// inform the parser when we do a HEAD request, so not to expect content
if (fRequest.Method() == BHttpMethod::Head)
fParser.SetNoContent();
}
@ -709,6 +714,10 @@ BHttpSession::Request::Request(Request& original, const BHttpSession::Redirect&
}
fRemainingRedirects = original.fRemainingRedirects--;
// inform the parser when we do a HEAD request, so not to expect content
if (fRequest.Method() == BHttpMethod::Head)
fParser.SetNoContent();
}
@ -871,13 +880,12 @@ BHttpSession::Request::ReceiveResult()
SendMessage(UrlEvent::ResponseStarted);
}
BHttpStatus status;
if (fParser.ParseStatus(fBuffer, status)) {
if (fParser.ParseStatus(fBuffer, fStatus)) {
// the status headers are now received, decide what to do next
// Determine if we can handle redirects; else notify of receiving status
if (fRemainingRedirects > 0) {
switch (status.StatusCode()) {
switch (fStatus.StatusCode()) {
case BHttpStatusCode::MovedPermanently:
case BHttpStatusCode::TemporaryRedirect:
case BHttpStatusCode::PermanentRedirect:
@ -891,34 +899,30 @@ BHttpSession::Request::ReceiveResult()
case BHttpStatusCode::SeeOther:
// These redirects redirect to GET, so we don't care if we can rewind the
// body; in this case redirect
fRedirectStatus = std::move(status);
fMightRedirect = true;
break;
default:
break;
}
}
// Register NoContent before moving the status to the result
if (status.StatusCode() == BHttpStatusCode::NoContent)
fNoContent = true;
if ((status.StatusClass() == BHttpStatusClass::ClientError
|| status.StatusClass() == BHttpStatusClass::ServerError)
if ((fStatus.StatusClass() == BHttpStatusClass::ClientError
|| fStatus.StatusClass() == BHttpStatusClass::ServerError)
&& fRequest.StopOnError())
{
fRequestStatus = ContentReceived;
fResult->SetStatus(std::move(status));
fResult->SetStatus(std::move(fStatus));
fResult->SetFields(BHttpFields());
fResult->SetBody();
return true;
}
if (!fRedirectStatus) {
if (!fMightRedirect) {
// we are not redirecting and there is no error, so inform listeners
SendMessage(UrlEvent::HttpStatus, [&status](BMessage& msg) {
msg.AddInt16(UrlEventData::HttpStatusCode, status.code);
SendMessage(UrlEvent::HttpStatus, [this](BMessage& msg) {
msg.AddInt16(UrlEventData::HttpStatusCode, fStatus.code);
});
fResult->SetStatus(std::move(status));
fResult->SetStatus(BHttpStatus{fStatus.code, std::move(fStatus.text)});
}
fRequestStatus = StatusReceived;
@ -938,9 +942,9 @@ BHttpSession::Request::ReceiveResult()
// The headers have been received, now set up the rest of the response handling
// Handle redirects
if (fRedirectStatus) {
if (fMightRedirect) {
auto redirectToGet = false;
switch (fRedirectStatus->StatusCode()) {
switch (fStatus.StatusCode()) {
case BHttpStatusCode::Found:
case BHttpStatusCode::SeeOther:
// 302 and 303 redirections convert all requests to GET request, except for HEAD
@ -950,7 +954,7 @@ BHttpSession::Request::ReceiveResult()
case BHttpStatusCode::TemporaryRedirect:
case BHttpStatusCode::PermanentRedirect:
{
std::cout << "ReceiveResult() [" << Id() << "] Handle redirect with status: " << fRedirectStatus->code << std::endl;
std::cout << "ReceiveResult() [" << Id() << "] Handle redirect with status: " << fStatus.code << std::endl;
auto locationField = fFields.FindField("Location");
if (locationField == fFields.end()) {
throw BNetworkRequestError(__PRETTY_FUNCTION__,
@ -976,63 +980,22 @@ BHttpSession::Request::ReceiveResult()
default:
// ignore other status codes and continue regular processing
SendMessage(UrlEvent::HttpStatus, [this](BMessage& msg) {
msg.AddInt16(UrlEventData::HttpStatusCode, fRedirectStatus->code);
msg.AddInt16(UrlEventData::HttpStatusCode, fStatus.code);
});
fResult->SetStatus(std::move(fRedirectStatus.value()));
fResult->SetStatus(BHttpStatus{fStatus.code, std::move(fStatus.text)});
break;
}
}
// TODO: Parse received cookies
// Handle Chunked Transfers
auto chunked = false;
auto header = fFields.FindField("Transfer-Encoding");
if (header != fFields.end() && header->Value() == "chunked") {
fParser.SetContentLength(std::nullopt);
chunked = true;
}
// Content-encoding
header = fFields.FindField("Content-Encoding");
if (header != fFields.end()
&& (header->Value() == "gzip" || header->Value() == "deflate"))
{
std::cout << "ReceiveResult() [" << Id() << "] Content-Encoding has compression: " << header->Value() << std::endl;
fParser.SetGzipCompression(true);
}
// Content-length
if (!chunked && !fNoContent && fRequest.Method() != BHttpMethod::Head) {
std::optional<off_t> bodyBytesTotal = std::nullopt;
header = fFields.FindField("Content-Length");
if (header != fFields.end()) {
try {
auto contentLength = std::string(header->Value());
bodyBytesTotal = std::stol(contentLength);
if (bodyBytesTotal.value() == 0)
fNoContent = true;
fParser.SetContentLength(bodyBytesTotal);
} catch (const std::logic_error& e) {
throw BNetworkRequestError(__PRETTY_FUNCTION__,
BNetworkRequestError::ProtocolError,
"Cannot parse Content-Length field value (logic_error)");
}
}
if (bodyBytesTotal == std::nullopt) {
throw BNetworkRequestError(__PRETTY_FUNCTION__,
BNetworkRequestError::ProtocolError, "Expected Content-Length field");
}
}
// Move headers to the result and inform listener
fResult->SetFields(std::move(fFields));
SendMessage(UrlEvent::HttpFields);
fRequestStatus = HeadersReceived;
if (fRequest.Method() == BHttpMethod::Head || fNoContent) {
// HEAD requests and requests with status 204 (No content) are finished
if (!fParser.HasContent()) {
// Any requests with not content are finished
std::cout << "ReceiveResult() [" << Id() << "] Request is completing without content" << std::endl;
fResult->SetBody();
SendMessage(UrlEvent::RequestCompleted, [](BMessage& msg) {

View File

@ -742,7 +742,7 @@ HttpIntegrationTest::PostTest()
CPPUNIT_ASSERT_EQUAL(kExpectedPostBody.Length(), result.Body().text.Length());
CPPUNIT_ASSERT(result.Body().text == kExpectedPostBody);
usleep(1000); // give some time to catch up on receiving all messages
usleep(2000); // give some time to catch up on receiving all messages
observer->Lock();
// Assert that the messages have the right contents.