aboutsummaryrefslogtreecommitdiff
path: root/web/apache/request.cxx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2017-04-19 20:48:09 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2017-04-19 20:53:18 +0300
commit6be5bc707876ece1cd09d7c304ba559512ef5257 (patch)
tree2287f09e4cc21f3d4e07bc6faf62543921822b2c /web/apache/request.cxx
parent01adb23a543bc4c83ef9570117692261e88f61cd (diff)
Implement request body caching
Diffstat (limited to 'web/apache/request.cxx')
-rw-r--r--web/apache/request.cxx319
1 files changed, 290 insertions, 29 deletions
diff --git a/web/apache/request.cxx b/web/apache/request.cxx
index 4e9d1fa..f69fedc 100644
--- a/web/apache/request.cxx
+++ b/web/apache/request.cxx
@@ -10,9 +10,10 @@
#include <httpd.h> // request_rec, HTTP_*, OK
#include <http_protocol.h> // ap_*()
-#include <strings.h> // strcasecmp()
+#include <strings.h> // strcasecmp(), strncasecmp()
#include <ctime> // strftime(), time_t
+#include <vector>
#include <chrono>
#include <memory> // unique_ptr
#include <string>
@@ -20,11 +21,12 @@
#include <sstream>
#include <ostream>
#include <istream>
-#include <cstring> // str*(), size_t
+#include <cstring> // str*(), memcpy(), size_t
#include <utility> // move()
#include <stdexcept> // invalid_argument
#include <exception> // current_exception()
#include <streambuf>
+#include <algorithm> // min()
#include <butl/optional>
@@ -36,6 +38,211 @@ namespace web
{
namespace apache
{
+ // Extend the Apache stream with checking for the read limit and caching
+ // the content if requested. Replay the cached content after rewind.
+ //
+ class istreambuf_cache: public istreambuf
+ {
+ enum class mode
+ {
+ cache, // Read from Apache stream, save the read data into the cache.
+ replay, // Read from the cache.
+ proxy // Read from Apache stream (don't save into the cache).
+ };
+
+ public:
+ istreambuf_cache (size_t read_limit, size_t cache_limit,
+ request_rec* r,
+ stream_state& s,
+ size_t bufsize = 1024,
+ size_t putback = 1)
+ : istreambuf (r, s, bufsize, putback),
+ read_limit_ (read_limit),
+ cache_limit_ (cache_limit)
+ {
+ }
+
+ void
+ rewind ()
+ {
+ // Fail if some content is already missed in the cache.
+ //
+ if (mode_ == mode::proxy)
+ throw sequence_error (
+ string ("web::apache::istreambuf_cache::rewind: ") +
+ (cache_limit_ > 0
+ ? "half-buffered"
+ : "unbuffered"));
+
+ mode_ = mode::replay;
+ replay_pos_ = 0;
+ setg (nullptr, nullptr, nullptr);
+ }
+
+ void
+ limits (size_t read_limit, size_t cache_limit)
+ {
+ if (read_limit > 0)
+ read_limit_ = read_limit;
+
+ if (cache_limit > 0)
+ {
+ // We can not increase the cache limit if some content is already
+ // missed in the cache.
+ //
+ if (cache_limit > cache_limit_ && mode_ == mode::proxy)
+ throw sequence_error (
+ "web::apache::istreambuf_cache::limits: unbuffered");
+
+ cache_limit_ = cache_limit;
+ }
+ }
+
+ size_t read_limit () const noexcept {return read_limit_;}
+ size_t cache_limit () const noexcept {return cache_limit_;}
+
+ private:
+ virtual int_type
+ underflow ();
+
+ private:
+ // Limits
+ //
+ size_t read_limit_;
+ size_t cache_limit_;
+
+ // State
+ //
+ mode mode_ = mode::cache;
+ size_t read_bytes_ = 0;
+ bool eof_ = false; // End of Apache stream is reached.
+
+ // Cache
+ //
+ struct chunk
+ {
+ vector<char> data;
+ size_t offset;
+
+ chunk (vector<char>&& d, size_t o): data (move (d)), offset (o) {}
+
+ // Make the type move constructible-only to avoid copying of chunks on
+ // vector growth.
+ //
+ chunk (chunk&&) = default;
+ };
+
+ vector<chunk> cache_;
+ size_t cache_size_ = 0;
+ size_t replay_pos_ = 0;
+ };
+
+ istreambuf_cache::int_type istreambuf_cache::
+ underflow ()
+ {
+ if (gptr () < egptr ())
+ return traits_type::to_int_type (*gptr ());
+
+ if (mode_ == mode::replay)
+ {
+ if (replay_pos_ < cache_.size ())
+ {
+ chunk& ch (cache_[replay_pos_++]);
+ char* p (ch.data.data ());
+ setg (p, p + ch.offset, p + ch.data.size ());
+ return traits_type::to_int_type (*gptr ());
+ }
+
+ // No more data to replay, so switch to the cache mode. That includes
+ // resetting eback, gptr and egptr, so they point into the istreambuf's
+ // internal buffer. Putback area should also be restored.
+ //
+ mode_ = mode::cache;
+
+ // Bailout if the end of stream is reached.
+ //
+ if (eof_)
+ return traits_type::eof ();
+
+ char* p (buf_.data () + putback_);
+ size_t pb (0);
+
+ // Restore putback area if there is any cached data. Thanks to
+ // istreambuf, it's all in a single chunk.
+ //
+ if (!cache_.empty ())
+ {
+ chunk& ch (cache_.back ());
+ pb = min (putback_, ch.data.size ());
+ memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb);
+ }
+
+ setg (p - pb, p, p);
+ }
+
+ // Delegate reading to the base class in the cache or proxy modes, but
+ // check for the read limit first.
+ //
+ if (read_limit_ && read_bytes_ >= read_limit_)
+ throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE,
+ "payload too large");
+
+ // Throws the sequence_error exception if some unbuffered content is
+ // already written.
+ //
+ int_type r (istreambuf::underflow ());
+
+ if (r == traits_type::eof ())
+ {
+ eof_ = true;
+ return r;
+ }
+
+ // Increment the read bytes counter.
+ //
+ size_t rb (egptr () - gptr ());
+ read_bytes_ += rb;
+
+ // In the cache mode save the read data if the cache limit is not
+ // reached, otherwise switch to the proxy mode.
+ //
+ if (mode_ == mode::cache)
+ {
+ // Not to complicate things we will copy the buffer into the cache
+ // together with the putback area, which is OK as it usually takes a
+ // small fraction of the buffer. By the same reason we will cache the
+ // whole data read even though we can exceed the limits by
+ // bufsize - putback - 1 bytes.
+ //
+ if (cache_size_ < cache_limit_)
+ {
+ chunk ch (vector<char> (eback (), egptr ()),
+ static_cast<size_t> (gptr () - eback ()));
+
+ cache_.emplace_back (move (ch));
+ cache_size_ += rb;
+ }
+ else
+ mode_ = mode::proxy;
+ }
+
+ return r;
+ }
+
+ // request
+ //
+ request::
+ request (request_rec* rec) noexcept
+ : rec_ (rec)
+ {
+ rec_->status = HTTP_OK;
+ }
+
+ request::
+ ~request ()
+ {
+ }
+
void request::
state (request_state s)
{
@@ -84,50 +291,57 @@ namespace web
void request::
rewind ()
{
- // @@ Request content buffering, and response cookies buffering are not
- // supported yet. When done will be possible to rewind in broader
- // range of cases.
+ // @@ Response cookies buffering is not supported yet. When done will be
+ // possible to rewind in broader range of cases.
//
+ if (state_ > request_state::reading)
+ throw sequence_error ("web::apache::request::rewind: unbuffered");
- if (state_ == request_state::initial ||
+ out_.reset ();
+ out_buf_.reset ();
- // Form data have been read. Lucky case, can rewind.
- //
- (state_ == request_state::reading &&
- dynamic_cast<stringbuf*> (in_buf_.get ()) != nullptr))
- {
- out_.reset ();
- out_buf_.reset ();
+ rec_->status = HTTP_OK;
- rec_->status = HTTP_OK;
+ ap_set_content_type (rec_, nullptr); // Unset the output content type.
- ap_set_content_type (rec_, nullptr);
+ if (in_ != nullptr)
+ rewind_istream ();
+ }
- if (in_)
- in_->seekg (0);
- }
- else
- throw sequence_error ("web::apache::request::rewind");
+ void request::
+ rewind_istream ()
+ {
+ assert (in_buf_ != nullptr && in_ != nullptr);
+
+ in_buf_->rewind (); // Throws if impossible to rewind.
+ in_->clear (); // Clears *bit flags (in particular eofbit).
}
istream& request::
- content (bool buffer)
+ content (size_t limit, size_t buffer)
{
- assert (!buffer); // Request content buffering is not implemented yet.
-
- if (!in_)
+ // Create the input stream/streambuf if not present, otherwise adjust the
+ // limits.
+ //
+ if (in_ == nullptr)
{
- unique_ptr<streambuf> in_buf (new istreambuf (rec_, *this));
+ unique_ptr<istreambuf_cache> in_buf (
+ new istreambuf_cache (limit, buffer, rec_, *this));
in_.reset (new istream (in_buf.get ()));
in_buf_ = move (in_buf);
in_->exceptions (istream::failbit | istream::badbit);
- // Save form data now otherwise will not be available to do later
- // when data already read from stream.
+ // Save form data now otherwise will not be available to do later when
+ // data is already read from stream.
//
form_data ();
}
+ else
+ {
+ assert (in_buf_ != nullptr);
+ in_buf_->limits (limit, buffer);
+ }
return *in_;
}
@@ -245,8 +459,8 @@ namespace web
if (!buffer)
// Request body will be discarded prior first byte of content is
- // written. Save form data now to make it available for furture
- // parameters () call.
+ // written. Save form data now to make it available for future
+ // parameters() call.
//
form_data ();
@@ -360,5 +574,52 @@ namespace web
n = e ? e + 1 : nullptr;
}
}
+
+ const string& request::
+ form_data ()
+ {
+ if (!form_data_)
+ {
+ form_data_.reset (new string ());
+
+ if (rec_->method_number == M_POST)
+ {
+ const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
+
+ if (ct != nullptr &&
+ strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
+ {
+ size_t limit (0);
+ bool rewind (true);
+
+ // Assign some reasonable (64K) input content read/cache limits if
+ // not done explicitly yet (with the request::content() call).
+ // Rewind afterwards unless the cache limit is set to zero.
+ //
+ if (in_buf_ == nullptr)
+ limit = 64 * 1024;
+ else
+ rewind = in_buf_->cache_limit () > 0;
+
+ istream& istr (content (limit, limit));
+
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (getline() failed to extract any character).
+ //
+ istream::iostate e (istr.exceptions ()); // Save exception mask.
+ istr.exceptions (istream::badbit);
+ getline (istr, *form_data_);
+ istr.exceptions (e); // Restore exception mask.
+
+ // Rewind the stream unless no buffering were requested beforehand.
+ //
+ if (rewind)
+ rewind_istream ();
+ }
+ }
+ }
+
+ return *form_data_;
+ }
}
}