aboutsummaryrefslogtreecommitdiff
path: root/web
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-04-19 20:48:09 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-04-19 20:53:18 +0300
commit6be5bc707876ece1cd09d7c304ba559512ef5257 (patch)
tree2287f09e4cc21f3d4e07bc6faf62543921822b2c /web
parent01adb23a543bc4c83ef9570117692261e88f61cd (diff)
Implement request body caching
Diffstat (limited to 'web')
-rw-r--r--web/apache/request32
-rw-r--r--web/apache/request.cxx319
-rw-r--r--web/apache/request.ixx46
-rw-r--r--web/apache/stream4
-rw-r--r--web/module19
5 files changed, 331 insertions, 89 deletions
diff --git a/web/apache/request b/web/apache/request
index 7f8cf8c..a35c5dc 100644
--- a/web/apache/request
+++ b/web/apache/request
@@ -50,17 +50,23 @@ namespace web
writing
};
+ // Extends istreambuf with read limit checking, caching, etc. (see the
+ // implementation for details).
+ //
+ class istreambuf_cache;
+
class request: public web::request,
public web::response,
public stream_state
{
friend class service;
- request (request_rec* rec) noexcept
- : rec_ (rec)
- {
- rec_->status = HTTP_OK;
- }
+ // Can not be inline/default due to the member of
+ // unique_ptr<istreambuf_cache> type. Note that istreambuf_cache type is
+ // incomplete.
+ //
+ request (request_rec* rec) noexcept;
+ ~request ();
request_state
state () const noexcept {return state_;}
@@ -87,7 +93,7 @@ namespace web
// Get request body data stream.
//
virtual std::istream&
- content (bool buffer = false);
+ content (size_t limit = 0, size_t buffer = 0);
// Get request parameters.
//
@@ -128,7 +134,10 @@ namespace web
bool buffer = true);
private:
- // Get application/x-www-form-urlencoded form data.
+ // Get application/x-www-form-urlencoded form data. If request::content()
+ // was not called yet (and so limits are not specified) then set both of
+ // them to 64KB. Rewind the stream afterwards, so it's available for the
+ // application as well, unless no buffering were requested beforehand.
//
const std::string&
form_data ();
@@ -152,6 +161,12 @@ namespace web
virtual void
set_write_state () {state (request_state::writing);}
+ // Rewind the input stream (that must exist). Throw sequence_error if
+ // some unbuffered content have already been read.
+ //
+ void
+ rewind_istream ();
+
private:
request_rec* rec_;
request_state state_ = request_state::initial;
@@ -160,7 +175,8 @@ namespace web
std::unique_ptr<name_values> parameters_;
std::unique_ptr<name_values> cookies_;
std::unique_ptr<std::string> form_data_;
- std::unique_ptr<std::streambuf> in_buf_;
+
+ std::unique_ptr<istreambuf_cache> in_buf_;
std::unique_ptr<std::istream> in_;
std::unique_ptr<std::streambuf> out_buf_;
diff --git a/web/apache/request.cxx b/web/apache/request.cxx
index 4e9d1fa..f69fedc 100644
--- a/web/apache/request.cxx
+++ b/web/apache/request.cxx
@@ -10,9 +10,10 @@
#include <httpd.h> // request_rec, HTTP_*, OK
#include <http_protocol.h> // ap_*()
-#include <strings.h> // strcasecmp()
+#include <strings.h> // strcasecmp(), strncasecmp()
#include <ctime> // strftime(), time_t
+#include <vector>
#include <chrono>
#include <memory> // unique_ptr
#include <string>
@@ -20,11 +21,12 @@
#include <sstream>
#include <ostream>
#include <istream>
-#include <cstring> // str*(), size_t
+#include <cstring> // str*(), memcpy(), size_t
#include <utility> // move()
#include <stdexcept> // invalid_argument
#include <exception> // current_exception()
#include <streambuf>
+#include <algorithm> // min()
#include <butl/optional>
@@ -36,6 +38,211 @@ namespace web
{
namespace apache
{
+ // Extend the Apache stream with checking for the read limit and caching
+ // the content if requested. Replay the cached content after rewind.
+ //
+ class istreambuf_cache: public istreambuf
+ {
+ enum class mode
+ {
+ cache, // Read from Apache stream, save the read data into the cache.
+ replay, // Read from the cache.
+ proxy // Read from Apache stream (don't save into the cache).
+ };
+
+ public:
+ istreambuf_cache (size_t read_limit, size_t cache_limit,
+ request_rec* r,
+ stream_state& s,
+ size_t bufsize = 1024,
+ size_t putback = 1)
+ : istreambuf (r, s, bufsize, putback),
+ read_limit_ (read_limit),
+ cache_limit_ (cache_limit)
+ {
+ }
+
+ void
+ rewind ()
+ {
+ // Fail if some content is already missed in the cache.
+ //
+ if (mode_ == mode::proxy)
+ throw sequence_error (
+ string ("web::apache::istreambuf_cache::rewind: ") +
+ (cache_limit_ > 0
+ ? "half-buffered"
+ : "unbuffered"));
+
+ mode_ = mode::replay;
+ replay_pos_ = 0;
+ setg (nullptr, nullptr, nullptr);
+ }
+
+ void
+ limits (size_t read_limit, size_t cache_limit)
+ {
+ if (read_limit > 0)
+ read_limit_ = read_limit;
+
+ if (cache_limit > 0)
+ {
+ // We can not increase the cache limit if some content is already
+ // missed in the cache.
+ //
+ if (cache_limit > cache_limit_ && mode_ == mode::proxy)
+ throw sequence_error (
+ "web::apache::istreambuf_cache::limits: unbuffered");
+
+ cache_limit_ = cache_limit;
+ }
+ }
+
+ size_t read_limit () const noexcept {return read_limit_;}
+ size_t cache_limit () const noexcept {return cache_limit_;}
+
+ private:
+ virtual int_type
+ underflow ();
+
+ private:
+ // Limits
+ //
+ size_t read_limit_;
+ size_t cache_limit_;
+
+ // State
+ //
+ mode mode_ = mode::cache;
+ size_t read_bytes_ = 0;
+ bool eof_ = false; // End of Apache stream is reached.
+
+ // Cache
+ //
+ struct chunk
+ {
+ vector<char> data;
+ size_t offset;
+
+ chunk (vector<char>&& d, size_t o): data (move (d)), offset (o) {}
+
+ // Make the type move constructible-only to avoid copying of chunks on
+ // vector growth.
+ //
+ chunk (chunk&&) = default;
+ };
+
+ vector<chunk> cache_;
+ size_t cache_size_ = 0;
+ size_t replay_pos_ = 0;
+ };
+
+ istreambuf_cache::int_type istreambuf_cache::
+ underflow ()
+ {
+ if (gptr () < egptr ())
+ return traits_type::to_int_type (*gptr ());
+
+ if (mode_ == mode::replay)
+ {
+ if (replay_pos_ < cache_.size ())
+ {
+ chunk& ch (cache_[replay_pos_++]);
+ char* p (ch.data.data ());
+ setg (p, p + ch.offset, p + ch.data.size ());
+ return traits_type::to_int_type (*gptr ());
+ }
+
+ // No more data to replay, so switch to the cache mode. That includes
+ // resetting eback, gptr and egptr, so they point into the istreambuf's
+ // internal buffer. Putback area should also be restored.
+ //
+ mode_ = mode::cache;
+
+ // Bailout if the end of stream is reached.
+ //
+ if (eof_)
+ return traits_type::eof ();
+
+ char* p (buf_.data () + putback_);
+ size_t pb (0);
+
+ // Restore putback area if there is any cached data. Thanks to
+ // istreambuf, it's all in a single chunk.
+ //
+ if (!cache_.empty ())
+ {
+ chunk& ch (cache_.back ());
+ pb = min (putback_, ch.data.size ());
+ memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb);
+ }
+
+ setg (p - pb, p, p);
+ }
+
+ // Delegate reading to the base class in the cache or proxy modes, but
+ // check for the read limit first.
+ //
+ if (read_limit_ && read_bytes_ >= read_limit_)
+ throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE,
+ "payload too large");
+
+ // Throws the sequence_error exception if some unbuffered content is
+ // already written.
+ //
+ int_type r (istreambuf::underflow ());
+
+ if (r == traits_type::eof ())
+ {
+ eof_ = true;
+ return r;
+ }
+
+ // Increment the read bytes counter.
+ //
+ size_t rb (egptr () - gptr ());
+ read_bytes_ += rb;
+
+ // In the cache mode save the read data if the cache limit is not
+ // reached, otherwise switch to the proxy mode.
+ //
+ if (mode_ == mode::cache)
+ {
+ // Not to complicate things we will copy the buffer into the cache
+ // together with the putback area, which is OK as it usually takes a
+ // small fraction of the buffer. By the same reason we will cache the
+ // whole data read even though we can exceed the limits by
+ // bufsize - putback - 1 bytes.
+ //
+ if (cache_size_ < cache_limit_)
+ {
+ chunk ch (vector<char> (eback (), egptr ()),
+ static_cast<size_t> (gptr () - eback ()));
+
+ cache_.emplace_back (move (ch));
+ cache_size_ += rb;
+ }
+ else
+ mode_ = mode::proxy;
+ }
+
+ return r;
+ }
+
+ // request
+ //
+ request::
+ request (request_rec* rec) noexcept
+ : rec_ (rec)
+ {
+ rec_->status = HTTP_OK;
+ }
+
+ request::
+ ~request ()
+ {
+ }
+
void request::
state (request_state s)
{
@@ -84,50 +291,57 @@ namespace web
void request::
rewind ()
{
- // @@ Request content buffering, and response cookies buffering are not
- // supported yet. When done will be possible to rewind in broader
- // range of cases.
+ // @@ Response cookies buffering is not supported yet. When done will be
+ // possible to rewind in broader range of cases.
//
+ if (state_ > request_state::reading)
+ throw sequence_error ("web::apache::request::rewind: unbuffered");
- if (state_ == request_state::initial ||
+ out_.reset ();
+ out_buf_.reset ();
- // Form data have been read. Lucky case, can rewind.
- //
- (state_ == request_state::reading &&
- dynamic_cast<stringbuf*> (in_buf_.get ()) != nullptr))
- {
- out_.reset ();
- out_buf_.reset ();
+ rec_->status = HTTP_OK;
- rec_->status = HTTP_OK;
+ ap_set_content_type (rec_, nullptr); // Unset the output content type.
- ap_set_content_type (rec_, nullptr);
+ if (in_ != nullptr)
+ rewind_istream ();
+ }
- if (in_)
- in_->seekg (0);
- }
- else
- throw sequence_error ("web::apache::request::rewind");
+ void request::
+ rewind_istream ()
+ {
+ assert (in_buf_ != nullptr && in_ != nullptr);
+
+ in_buf_->rewind (); // Throws if impossible to rewind.
+ in_->clear (); // Clears *bit flags (in particular eofbit).
}
istream& request::
- content (bool buffer)
+ content (size_t limit, size_t buffer)
{
- assert (!buffer); // Request content buffering is not implemented yet.
-
- if (!in_)
+ // Create the input stream/streambuf if not present, otherwise adjust the
+ // limits.
+ //
+ if (in_ == nullptr)
{
- unique_ptr<streambuf> in_buf (new istreambuf (rec_, *this));
+ unique_ptr<istreambuf_cache> in_buf (
+ new istreambuf_cache (limit, buffer, rec_, *this));
in_.reset (new istream (in_buf.get ()));
in_buf_ = move (in_buf);
in_->exceptions (istream::failbit | istream::badbit);
- // Save form data now otherwise will not be available to do later
- // when data already read from stream.
+ // Save form data now otherwise will not be available to do later when
+ // data is already read from stream.
//
form_data ();
}
+ else
+ {
+ assert (in_buf_ != nullptr);
+ in_buf_->limits (limit, buffer);
+ }
return *in_;
}
@@ -245,8 +459,8 @@ namespace web
if (!buffer)
// Request body will be discarded prior first byte of content is
- // written. Save form data now to make it available for furture
- // parameters () call.
+ // written. Save form data now to make it available for future
+ // parameters() call.
//
form_data ();
@@ -360,5 +574,52 @@ namespace web
n = e ? e + 1 : nullptr;
}
}
+
+ const string& request::
+ form_data ()
+ {
+ if (!form_data_)
+ {
+ form_data_.reset (new string ());
+
+ if (rec_->method_number == M_POST)
+ {
+ const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
+
+ if (ct != nullptr &&
+ strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
+ {
+ size_t limit (0);
+ bool rewind (true);
+
+ // Assign some reasonable (64K) input content read/cache limits if
+ // not done explicitly yet (with the request::content() call).
+ // Rewind afterwards unless the cache limit is set to zero.
+ //
+ if (in_buf_ == nullptr)
+ limit = 64 * 1024;
+ else
+ rewind = in_buf_->cache_limit () > 0;
+
+ istream& istr (content (limit, limit));
+
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (getline() failed to extract any character).
+ //
+ istream::iostate e (istr.exceptions ()); // Save exception mask.
+ istr.exceptions (istream::badbit);
+ getline (istr, *form_data_);
+ istr.exceptions (e); // Restore exception mask.
+
+ // Rewind the stream unless no buffering were requested beforehand.
+ //
+ if (rewind)
+ rewind_istream ();
+ }
+ }
+ }
+
+ return *form_data_;
+ }
}
}
diff --git a/web/apache/request.ixx b/web/apache/request.ixx
index 6dde6ce..4218756 100644
--- a/web/apache/request.ixx
+++ b/web/apache/request.ixx
@@ -2,14 +2,9 @@
// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
// license : MIT; see accompanying LICENSE file
-#include <strings.h> // strncasecmp()
-
-#include <apr_tables.h> // apr_table_*
-
#include <http_protocol.h> // ap_*()
-#include <sstream>
-#include <utility> // move()
+#include <sstream> // stringbuf
namespace web
{
@@ -47,44 +42,5 @@ namespace web
? OK
: rec_->status;
}
-
- inline const std::string& request::
- form_data ()
- {
- if (!form_data_)
- {
- form_data_.reset (new std::string ());
-
- if (rec_->method_number == M_POST)
- {
- const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
-
- if (ct != nullptr &&
- strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
- {
- std::istream& istr (content ());
-
- // Do not throw when eofbit is set (end of stream reached), and
- // when failbit is set (getline() failed to extract any
- // character).
- //
- istr.exceptions (std::istream::badbit);
- std::getline (istr, *form_data_);
-
- // Make this data the content of the input stream, so it's
- // available for the application as well.
- //
- std::unique_ptr<std::streambuf> in_buf (
- new std::stringbuf (*form_data_));
-
- in_.reset (new std::istream (in_buf.get ()));
- in_buf_ = std::move (in_buf);
- in_->exceptions (std::istream::failbit | std::istream::badbit);
- }
- }
- }
-
- return *form_data_;
- }
}
}
diff --git a/web/apache/stream b/web/apache/stream
index 9230d1b..d4abb4e 100644
--- a/web/apache/stream
+++ b/web/apache/stream
@@ -113,7 +113,7 @@ namespace web
setg (p, p, p);
}
- private:
+ protected:
virtual int_type
underflow ()
{
@@ -138,7 +138,7 @@ namespace web
return traits_type::to_int_type (*gptr ());
}
- private:
+ protected:
size_t bufsize_;
size_t putback_;
std::vector<char> buf_;
diff --git a/web/module b/web/module
index b770d67..1e588a4 100644
--- a/web/module
+++ b/web/module
@@ -11,6 +11,7 @@
#include <iosfwd>
#include <chrono>
#include <cstdint> // uint16_t
+#include <cstddef> // size_t
#include <utility> // move()
#include <stdexcept> // runtime_error
@@ -111,13 +112,21 @@ namespace web
virtual const name_values&
cookies () = 0;
- // Get the stream to read the request content from. If the buffer argument
- // is false, then reading content after any unbuffered content has been
- // written or after a retry is undefined behavior. The implementation may
- // detect this and throw sequence_error but is not required to do so.
+ // Get the stream to read the request content from. If the limit argument
+ // is zero, then the content limit is left unchanged (unlimited initially).
+ // Otherwise the requested limit is set, and the invalid_request exception
+ // with the code 413 (payload too large) will be thrown when the specified
+ // limit is reached while reading from the stream. If the buffer argument
+ // is zero, then the buffer size is left unchanged (zero initially). If it
+ // is impossible to increase the buffer size (because, for example, some
+ // content is already read unbuffered), then the sequence_error is thrown.
+ //
+ // Note that unread input content is discarded when any unbuffered content
+ // is written, and any attempt to read it will result in the
+ // sequence_error exception being thrown.
//
virtual std::istream&
- content (bool buffer = false) = 0;
+ content (size_t limit = 0, size_t buffer = 0) = 0;
};
class response