1 files changed, 484 insertions, 108 deletions
diff --git a/web/apache/request.cxx b/web/apache/request.cxx
index 32a1737..a019183 100644
--- a/web/apache/request.cxx
+++ b/web/apache/request.cxx
@@ -4,13 +4,20 @@
 
 #include <web/apache/request.hxx>
 
-#include <apr_tables.h>  // apr_table_*, apr_array_header_t
+#include <apr.h>         // APR_SIZE_MAX
+#include <apr_errno.h>   // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror()
+#include <apr_tables.h>  // apr_table_*, apr_table_*(), apr_array_header_t
 #include <apr_strings.h> // apr_pstrdup()
+#include <apr_buckets.h> // apr_bucket*, apr_bucket_*(), apr_brigade_*(),
+                         // APR_BRIGADE_*()
 
 #include <httpd.h>         // request_rec, HTTP_*, OK
 #include <http_protocol.h> // ap_*()
 
-#include <strings.h> // strcasecmp(), strncasecmp()
+#include <apreq2/apreq.h>        // APREQ_*
+#include <apreq2/apreq_util.h>   // apreq_brigade_copy()
+#include <apreq2/apreq_param.h>  // apreq_param_t, apreq_value_to_param()
+#include <apreq2/apreq_parser.h> // apreq_parser_t, apreq_parser_make()
 
 #include <ctime>     // strftime(), time_t
 #include <vector>
@@ -22,11 +29,13 @@
 #include <istream>
 #include <cstring>   // str*(), memcpy(), size_t
 #include <utility>   // move()
-#include <stdexcept> // invalid_argument
+#include <iterator>  // istreambuf_iterator
+#include <stdexcept> // invalid_argument, runtime_error
 #include <exception> // current_exception()
 #include <streambuf>
 #include <algorithm> // min()
 
+#include <libbutl/utility.mxx>   // casecmp()
 #include <libbutl/optional.mxx>
 #include <libbutl/timestamp.mxx>
 
@@ -39,6 +48,13 @@ namespace web
 {
   namespace apache
   {
+    [[noreturn]] static void
+    throw_internal_error (apr_status_t s, const string& what)
+    {
+      char buf[1024];
+      throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf)));
+    }
+
     // Extend the Apache stream with checking for the read limit and caching
     // the content if requested. Replay the cached content after rewind.
     //
@@ -160,7 +176,7 @@ namespace web
         //
         mode_ = mode::cache;
 
-        // Bailout if the end of stream is reached.
+        // Bail out if the end of stream is reached.
         //
         if (eof_)
           return traits_type::eof ();
@@ -230,6 +246,152 @@ namespace web
       return r;
     }
 
+    // Stream interface for reading from the Apache's bucket brigade. Put back
+    // is not supported.
+    //
+    // Note that reading from a brigade bucket modifies the brigade in the
+    // general case. For example, reading from a file bucket adds a new heap
+    // bucket before the file bucket on every read. Traversing/reading through
+    // such a bucket brigade effectively loads the whole file into the memory,
+    // so the subsequent brigade traversal results in iterating over the
+    // loaded heap buckets.
+    //
+    // To avoid such a behavior we will make a shallow copy of the original
+    // bucket brigade, initially and for each rewind. Then, instead of
+    // iterating, we will always read from the first bucket removing it after
+    // the use.
+    //
+    class istreambuf_buckets: public streambuf
+    {
+    public:
+      // The bucket brigade must exist during the object's lifetime.
+      //
+      explicit
+      istreambuf_buckets (const apr_bucket_brigade* bs)
+          : orig_buckets_ (bs),
+            buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc))
+
+      {
+        if (buckets_ == nullptr)
+          throw_internal_error (APR_ENOMEM, "apr_brigade_create");
+
+        rewind (); // Copy the original buckets.
+      }
+
+      void
+      rewind ()
+      {
+        // Note that apreq_brigade_copy() appends buckets to the destination,
+        // so we clean it up first.
+        //
+        apr_status_t r (apr_brigade_cleanup (buckets_.get ()));
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apr_brigade_cleanup");
+
+        r = apreq_brigade_copy (
+          buckets_.get (),
+          const_cast<apr_bucket_brigade*> (orig_buckets_));
+
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apreq_brigade_copy");
+
+        setg (nullptr, nullptr, nullptr);
+      }
+
+    private:
+      virtual int_type
+      underflow ()
+      {
+        if (gptr () < egptr ())
+          return traits_type::to_int_type (*gptr ());
+
+        // If the get-pointer is not NULL then it points to the data referred
+        // by the first brigade bucket. As we will bail out or rewrite such a
+        // pointer now there is no need for the bucket either, so we can
+        // safely delete it.
+        //
+        if (gptr () != nullptr)
+        {
+          assert (!APR_BRIGADE_EMPTY (buckets_));
+
+          // Note that apr_bucket_delete() is a macro and the following
+          // call ends up badly (with SIGSEGV).
+          //
+          // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_));
+          //
+          apr_bucket* b (APR_BRIGADE_FIRST (buckets_));
+          apr_bucket_delete (b);
+        }
+
+        if (APR_BRIGADE_EMPTY (buckets_))
+          return traits_type::eof ();
+
+        apr_size_t n;
+        const char* d;
+        apr_bucket* b (APR_BRIGADE_FIRST (buckets_));
+        apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ));
+
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apr_bucket_read");
+
+        char* p (const_cast<char*> (d));
+        setg (p, p, p + n);
+        return traits_type::to_int_type (*gptr ());
+      }
+
+    private:
+      const apr_bucket_brigade* orig_buckets_;
+
+      struct brigade_deleter
+      {
+        void operator() (apr_bucket_brigade* p) const
+        {
+          if (p != nullptr)
+          {
+            apr_status_t r (apr_brigade_destroy (p));
+
+            // Shouldn't fail unless something is severely damaged.
+            //
+            assert (r == APR_SUCCESS);
+          }
+        }
+      };
+
+      unique_ptr<apr_bucket_brigade, brigade_deleter> buckets_;
+    };
+
+    class istream_buckets_base
+    {
+    public:
+      explicit
+      istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {}
+
+    protected:
+      istreambuf_buckets buf_;
+    };
+
+    class istream_buckets: public istream_buckets_base, public istream
+    {
+    public:
+      explicit
+      istream_buckets (const apr_bucket_brigade* bs)
+          // Note that calling dtor for istream object before init() is called
+          // is undefined behavior. That's the reason for inventing the
+          // istream_buckets_base class.
+          //
+          : istream_buckets_base (bs), istream (&buf_)
+      {
+        exceptions (failbit | badbit);
+      }
+
+      void
+      rewind ()
+      {
+        buf_.rewind ();
+        clear ();       // Clears *bit flags (in particular eofbit).
+      }
+    };
+
     // request
     //
     request::
@@ -305,17 +467,27 @@ namespace web
 
       ap_set_content_type (rec_, nullptr); // Unset the output content type.
 
-      if (in_ != nullptr)
-        rewind_istream ();
-    }
+      // We don't need to rewind the input stream (which well may fail if
+      // unbuffered) if the form data is already read.
+      //
+      if (in_ != nullptr && form_data_ == nullptr)
+      {
+        assert (in_buf_ != nullptr);
 
-    void request::
-    rewind_istream ()
-    {
-      assert (in_buf_ != nullptr && in_ != nullptr);
+        in_buf_->rewind (); // Throws if impossible to rewind.
+        in_->clear ();      // Clears *bit flags (in particular eofbit).
+      }
 
-      in_buf_->rewind (); // Throws if impossible to rewind.
-      in_->clear ();      // Clears *bit flags (in particular eofbit).
+      // Rewind uploaded file streams.
+      //
+      if (uploads_ != nullptr)
+      {
+        for (const unique_ptr<istream_buckets>& is: *uploads_)
+        {
+          if (is != nullptr)
+            is->rewind ();
+        }
+      }
     }
 
     istream& request::
@@ -332,11 +504,6 @@ namespace web
         in_.reset (new istream (in_buf.get ()));
         in_buf_ = move (in_buf);
         in_->exceptions (istream::failbit | istream::badbit);
-
-        // Save form data now otherwise will not be available to do later when
-        // data is already read from stream.
-        //
-        form_data ();
       }
       else
       {
@@ -363,26 +530,309 @@ namespace web
     }
 
     const name_values& request::
-    parameters ()
+    parameters (size_t limit, bool url_only)
     {
-      if (parameters_ == nullptr)
+      if (parameters_ == nullptr || url_only < url_only_parameters_)
       {
-        parameters_.reset (new name_values ());
-
         try
         {
-          parse_parameters (rec_->args);
-          parse_parameters (form_data ().c_str ());
+          if (parameters_ == nullptr)
+          {
+            parameters_.reset (new name_values ());
+            parse_url_parameters (rec_->args);
+          }
+
+          if (!url_only && form_data (limit))
+          {
+            // After the form data is parsed we can clean it up for the
+            // application/x-www-form-urlencoded encoding but not for the
+            // multipart/form-data (see parse_multipart_parameters() for
+            // details).
+            //
+            if (form_multipart_)
+              parse_multipart_parameters (*form_data_);
+            else
+            {
+              // Make the character vector a NULL-terminated string.
+              //
+              form_data_->push_back ('\0');
+
+              parse_url_parameters (form_data_->data ());
+              *form_data_ = vector<char> (); // Reset the cache.
+            }
+          }
         }
-        catch (const invalid_argument& )
+        catch (const invalid_argument&)
         {
           throw invalid_request ();
         }
+
+        url_only_parameters_ = url_only;
       }
 
       return *parameters_;
     }
 
+    bool request::
+    form_data (size_t limit)
+    {
+      if (form_data_ == nullptr)
+      {
+        form_data_.reset (new vector<char> ());
+
+        // We will not consider POST body as a form data if the request is in
+        // the reading or later state.
+        //
+        if (rec_->method_number == M_POST && state_ < request_state::reading)
+        {
+          const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
+
+          if (ct != nullptr)
+          {
+            form_multipart_ = casecmp ("multipart/form-data", ct, 19) == 0;
+
+            if (form_multipart_ ||
+                casecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
+              *form_data_ = vector<char> (
+                istreambuf_iterator<char> (content (limit)),
+                istreambuf_iterator<char> ());
+          }
+        }
+      }
+
+      return !form_data_->empty ();
+    }
+
+    void request::
+    parse_url_parameters (const char* args)
+    {
+      assert (parameters_ != nullptr);
+
+      for (auto n (args); n != nullptr; )
+      {
+        const char* v (strchr (n, '='));
+        const char* e (strchr (n, '&'));
+
+        if (e != nullptr && e < v)
+          v = nullptr;
+
+        string name (v != nullptr
+                     ? mime_url_decode (n, v) :
+                     (e
+                      ? mime_url_decode (n, e)
+                      : mime_url_decode (n, n + strlen (n))));
+
+        optional<string> value;
+
+        if (v++)
+          value = e
+            ? mime_url_decode (v, e)
+            : mime_url_decode (v, v + strlen (v));
+
+        if (!name.empty () || value)
+          parameters_->emplace_back (move (name), move (value));
+
+        n = e ? e + 1 : nullptr;
+      }
+    }
+
+    void request::
+    parse_multipart_parameters (const vector<char>& body)
+    {
+      assert (parameters_ != nullptr && uploads_ == nullptr);
+
+      auto throw_bad_request = [] (apr_status_t s,
+                                   status_code sc = HTTP_BAD_REQUEST)
+      {
+        char buf[1024];
+        throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf)));
+      };
+
+      // Create the file upload stream list, filling it with NULLs for the
+      // parameters parsed from the URL query part.
+      //
+      uploads_.reset (
+        new vector<unique_ptr<istream_buckets>> (parameters_->size ()));
+
+      // All the required objects (parser, input/output buckets, etc.) will be
+      // allocated in the request memory pool and so will have the HTTP
+      // request duration lifetime.
+      //
+      apr_pool_t* pool (rec_->pool);
+
+      // Create the input bucket brigade containing a single bucket that
+      // references the form data.
+      //
+      apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool));
+      if (ba == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create");
+
+      apr_bucket_brigade* bb (apr_brigade_create (pool, ba));
+      if (bb == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_brigade_create");
+
+      apr_bucket* b (
+        apr_bucket_immortal_create (body.data (), body.size (), ba));
+
+      if (b == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create");
+
+      APR_BRIGADE_INSERT_TAIL (bb, b);
+
+      if ((b = apr_bucket_eos_create (ba)) == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create");
+
+      APR_BRIGADE_INSERT_TAIL (bb, b);
+
+      // Make sure that the parser will not swap the parsed data to disk
+      // passing the maximum possible value for the brigade limit. This way
+      // the resulting buckets will reference the form data directly, making
+      // no copies. This why we should not reset the form data cache after
+      // the parsing.
+      //
+      // Note that in future we may possibly setup the parser to read from the
+      // Apache internals directly and enable swapping the data to disk to
+      // minimize memory consumption.
+      //
+      apreq_parser_t* parser (
+        apreq_parser_make (pool,
+                           ba,
+                           apr_table_get (rec_->headers_in, "Content-Type"),
+                           apreq_parse_multipart,
+                           APR_SIZE_MAX /* brigade_limit */,
+                           nullptr /* temp_dir */,
+                           nullptr /* hook */,
+                           nullptr /* ctx */));
+
+      if (parser == nullptr)
+        throw_internal_error (APR_ENOMEM, "apreq_parser_make");
+
+      // Create the output table that will be filled with the parsed
+      // parameters.
+      //
+      apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS));
+      if (params == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_table_make");
+
+      // Parse the form data.
+      //
+      apr_status_t r (apreq_parser_run (parser, params, bb));
+      if (r != APR_SUCCESS)
+        throw_bad_request (r);
+
+      // Fill the parameter and file upload stream lists.
+      //
+      const apr_array_header_t* ps (apr_table_elts (params));
+      size_t n (ps->nelts);
+
+      for (auto p (reinterpret_cast<const apr_table_entry_t*> (ps->elts));
+           n--; ++p)
+      {
+        assert (p->key != nullptr && p->val != nullptr);
+
+        if (*p->key != '\0')
+        {
+          parameters_->emplace_back (p->key, optional<string> (p->val));
+
+          const apreq_param_t* ap (apreq_value_to_param (p->val));
+          assert (ap != nullptr); // Must always be resolvable.
+
+          uploads_->emplace_back (ap->upload != nullptr
+                                  ? new istream_buckets (ap->upload)
+                                  : nullptr);
+        }
+      }
+    }
+
+    request::uploads_type& request::
+    uploads () const
+    {
+      if (parameters_ == nullptr || url_only_parameters_)
+        sequence_error ("web::apache::request::uploads");
+
+      if (uploads_ == nullptr)
+        throw invalid_argument ("no uploads");
+
+      assert (uploads_->size () == parameters_->size ());
+      return *uploads_;
+    }
+
+    istream& request::
+    open_upload (size_t index)
+    {
+      uploads_type& us (uploads ());
+      size_t n (us.size ());
+
+      if (index >= n)
+        throw invalid_argument ("invalid index");
+
+      const unique_ptr<istream_buckets>& is (us[index]);
+
+      if (is == nullptr)
+        throw invalid_argument ("no upload");
+
+      return *is;
+    }
+
+    istream& request::
+    open_upload (const string& name)
+    {
+      uploads_type& us (uploads ());
+      size_t n (us.size ());
+
+      istream* r (nullptr);
+      for (size_t i (0); i < n; ++i)
+      {
+        if ((*parameters_)[i].name == name)
+        {
+          istream* is (us[i].get ());
+
+          if (is != nullptr)
+          {
+            if (r != nullptr)
+              throw invalid_argument ("multiple uploads for '" + name + "'");
+
+            r = is;
+          }
+        }
+      }
+
+      if (r == nullptr)
+        throw invalid_argument ("no upload");
+
+      return *r;
+    }
+
+    const name_values& request::
+    headers ()
+    {
+      if (headers_ == nullptr)
+      {
+        headers_.reset (new name_values ());
+
+        const apr_array_header_t* ha (apr_table_elts (rec_->headers_in));
+        size_t n (ha->nelts);
+
+        headers_->reserve (n + 1); // One for the custom :Client-IP header.
+
+        auto add = [this] (const char* n, const char* v)
+        {
+          assert (n != nullptr && v != nullptr);
+          headers_->emplace_back (n, optional<string> (v));
+        };
+
+        for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts));
+             n--; ++h)
+          add (h->key, h->val);
+
+        assert (rec_->connection != nullptr);
+
+        add (":Client-IP", rec_->connection->client_ip);
+      }
+
+      return *headers_;
+    }
+
     const name_values& request::
     cookies ()
     {
@@ -393,10 +843,12 @@ namespace web
         const apr_array_header_t* ha (apr_table_elts (rec_->headers_in));
         size_t n (ha->nelts);
 
-        for (auto h (reinterpret_cast<const apr_table_entry_t *> (ha->elts));
+        for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts));
              n--; ++h)
         {
-          if (strcasecmp (h->key, "Cookie") == 0)
+          assert (h->key != nullptr);
+
+          if (casecmp (h->key, "Cookie") == 0)
           {
             for (const char* n (h->val); n != nullptr; )
             {
@@ -447,8 +899,7 @@ namespace web
 
           // Same content type.
           //
-          strcasecmp (rec_->content_type ? rec_->content_type : "",
-                      type.c_str ()) == 0)
+          casecmp (type, rec_->content_type ? rec_->content_type : "") == 0)
       {
         // No change, return the existing stream.
         //
@@ -463,7 +914,10 @@ namespace web
         // written. Save form data now to make it available for future
         // parameters() call.
         //
-        form_data ();
+        // In the rare cases when the form data is expectedly bigger than 64K
+        // the client can always call parameters(limit) explicitly.
+        //
+        form_data (64 * 1024);
 
       unique_ptr<streambuf> out_buf (
         buffer
@@ -548,83 +1002,5 @@ namespace web
       state (request_state::headers);
       apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ());
     }
-
-    void request::
-    parse_parameters (const char* args)
-    {
-      for (auto n (args); n != nullptr; )
-      {
-        const char* v (strchr (n, '='));
-        const char* e (strchr (n, '&'));
-
-        if (e != nullptr && e < v)
-          v = nullptr;
-
-        string name (v != nullptr
-                     ? mime_url_decode (n, v) :
-                     (e
-                      ? mime_url_decode (n, e)
-                      : mime_url_decode (n, n + strlen (n))));
-
-        optional<string> value;
-
-        if (v++)
-          value = e
-            ? mime_url_decode (v, e)
-            : mime_url_decode (v, v + strlen (v));
-
-        if (!name.empty () || value)
-          parameters_->emplace_back (move (name), move (value));
-
-        n = e ? e + 1 : nullptr;
-      }
-    }
-
-    const string& request::
-    form_data ()
-    {
-      if (!form_data_)
-      {
-        form_data_.reset (new string ());
-
-        if (rec_->method_number == M_POST)
-        {
-          const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
-
-          if (ct != nullptr &&
-              strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
-          {
-            size_t limit  (0);
-            bool   rewind (true);
-
-            // Assign some reasonable (64K) input content read/cache limits if
-            // not done explicitly yet (with the request::content() call).
-            // Rewind afterwards unless the cache limit is set to zero.
-            //
-            if (in_buf_ == nullptr)
-              limit = 64 * 1024;
-            else
-              rewind = in_buf_->cache_limit () > 0;
-
-            istream& istr (content (limit, limit));
-
-            // Do not throw when eofbit is set (end of stream reached), and
-            // when failbit is set (getline() failed to extract any character).
-            //
-            istream::iostate e (istr.exceptions ()); // Save exception mask.
-            istr.exceptions (istream::badbit);
-            getline (istr, *form_data_);
-            istr.exceptions (e);                     // Restore exception mask.
-
-            // Rewind the stream unless no buffering were requested beforehand.
-            //
-            if (rewind)
-              rewind_istream ();
-          }
-        }
-      }
-
-      return *form_data_;
-    }
   }
 }