Add support for package submission

author: Karen Arutyunov <karen@codesynthesis.com> 2018-07-07 19:09:53 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2018-07-10 22:03:18 +0300
commit: 21033565488f6c63b4c40962cccfdc8b6ca32b2a (patch)
tree: 44732ab7e1c7a7b25e64b82bf61d293f6cff2f86 /web
parent: 026377d0c145277b24b3af5fdcf707222e854bd3 (diff)
7 files changed, 666 insertions, 208 deletions
diff --git a/web/apache/request.cxx b/web/apache/request.cxx
index 32a1737..a019183 100644
--- a/web/apache/request.cxx
+++ b/web/apache/request.cxx
@@ -4,13 +4,20 @@
 
 #include <web/apache/request.hxx>
 
-#include <apr_tables.h>  // apr_table_*, apr_array_header_t
+#include <apr.h>         // APR_SIZE_MAX
+#include <apr_errno.h>   // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror()
+#include <apr_tables.h>  // apr_table_*, apr_table_*(), apr_array_header_t
 #include <apr_strings.h> // apr_pstrdup()
+#include <apr_buckets.h> // apr_bucket*, apr_bucket_*(), apr_brigade_*(),
+                         // APR_BRIGADE_*()
 
 #include <httpd.h>         // request_rec, HTTP_*, OK
 #include <http_protocol.h> // ap_*()
 
-#include <strings.h> // strcasecmp(), strncasecmp()
+#include <apreq2/apreq.h>        // APREQ_*
+#include <apreq2/apreq_util.h>   // apreq_brigade_copy()
+#include <apreq2/apreq_param.h>  // apreq_param_t, apreq_value_to_param()
+#include <apreq2/apreq_parser.h> // apreq_parser_t, apreq_parser_make()
 
 #include <ctime>     // strftime(), time_t
 #include <vector>
@@ -22,11 +29,13 @@
 #include <istream>
 #include <cstring>   // str*(), memcpy(), size_t
 #include <utility>   // move()
-#include <stdexcept> // invalid_argument
+#include <iterator>  // istreambuf_iterator
+#include <stdexcept> // invalid_argument, runtime_error
 #include <exception> // current_exception()
 #include <streambuf>
 #include <algorithm> // min()
 
+#include <libbutl/utility.mxx>   // casecmp()
 #include <libbutl/optional.mxx>
 #include <libbutl/timestamp.mxx>
 
@@ -39,6 +48,13 @@ namespace web
 {
   namespace apache
   {
+    [[noreturn]] static void
+    throw_internal_error (apr_status_t s, const string& what)
+    {
+      char buf[1024];
+      throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf)));
+    }
+
     // Extend the Apache stream with checking for the read limit and caching
     // the content if requested. Replay the cached content after rewind.
     //
@@ -160,7 +176,7 @@ namespace web
         //
         mode_ = mode::cache;
 
-        // Bailout if the end of stream is reached.
+        // Bail out if the end of stream is reached.
         //
         if (eof_)
           return traits_type::eof ();
@@ -230,6 +246,152 @@ namespace web
       return r;
     }
 
+    // Stream interface for reading from the Apache's bucket brigade. Put back
+    // is not supported.
+    //
+    // Note that reading from a brigade bucket modifies the brigade in the
+    // general case. For example, reading from a file bucket adds a new heap
+    // bucket before the file bucket on every read. Traversing/reading through
+    // such a bucket brigade effectively loads the whole file into the memory,
+    // so the subsequent brigade traversal results in iterating over the
+    // loaded heap buckets.
+    //
+    // To avoid such a behavior we will make a shallow copy of the original
+    // bucket brigade, initially and for each rewind. Then, instead of
+    // iterating, we will always read from the first bucket removing it after
+    // the use.
+    //
+    class istreambuf_buckets: public streambuf
+    {
+    public:
+      // The bucket brigade must exist during the object's lifetime.
+      //
+      explicit
+      istreambuf_buckets (const apr_bucket_brigade* bs)
+          : orig_buckets_ (bs),
+            buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc))
+
+      {
+        if (buckets_ == nullptr)
+          throw_internal_error (APR_ENOMEM, "apr_brigade_create");
+
+        rewind (); // Copy the original buckets.
+      }
+
+      void
+      rewind ()
+      {
+        // Note that apreq_brigade_copy() appends buckets to the destination,
+        // so we clean it up first.
+        //
+        apr_status_t r (apr_brigade_cleanup (buckets_.get ()));
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apr_brigade_cleanup");
+
+        r = apreq_brigade_copy (
+          buckets_.get (),
+          const_cast<apr_bucket_brigade*> (orig_buckets_));
+
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apreq_brigade_copy");
+
+        setg (nullptr, nullptr, nullptr);
+      }
+
+    private:
+      virtual int_type
+      underflow ()
+      {
+        if (gptr () < egptr ())
+          return traits_type::to_int_type (*gptr ());
+
+        // If the get-pointer is not NULL then it points to the data referred
+        // by the first brigade bucket. As we will bail out or rewrite such a
+        // pointer now there is no need for the bucket either, so we can
+        // safely delete it.
+        //
+        if (gptr () != nullptr)
+        {
+          assert (!APR_BRIGADE_EMPTY (buckets_));
+
+          // Note that apr_bucket_delete() is a macro and the following
+          // call ends up badly (with SIGSEGV).
+          //
+          // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_));
+          //
+          apr_bucket* b (APR_BRIGADE_FIRST (buckets_));
+          apr_bucket_delete (b);
+        }
+
+        if (APR_BRIGADE_EMPTY (buckets_))
+          return traits_type::eof ();
+
+        apr_size_t n;
+        const char* d;
+        apr_bucket* b (APR_BRIGADE_FIRST (buckets_));
+        apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ));
+
+        if (r != APR_SUCCESS)
+          throw_internal_error (r, "apr_bucket_read");
+
+        char* p (const_cast<char*> (d));
+        setg (p, p, p + n);
+        return traits_type::to_int_type (*gptr ());
+      }
+
+    private:
+      const apr_bucket_brigade* orig_buckets_;
+
+      struct brigade_deleter
+      {
+        void operator() (apr_bucket_brigade* p) const
+        {
+          if (p != nullptr)
+          {
+            apr_status_t r (apr_brigade_destroy (p));
+
+            // Shouldn't fail unless something is severely damaged.
+            //
+            assert (r == APR_SUCCESS);
+          }
+        }
+      };
+
+      unique_ptr<apr_bucket_brigade, brigade_deleter> buckets_;
+    };
+
+    class istream_buckets_base
+    {
+    public:
+      explicit
+      istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {}
+
+    protected:
+      istreambuf_buckets buf_;
+    };
+
+    class istream_buckets: public istream_buckets_base, public istream
+    {
+    public:
+      explicit
+      istream_buckets (const apr_bucket_brigade* bs)
+          // Note that calling dtor for istream object before init() is called
+          // is undefined behavior. That's the reason for inventing the
+          // istream_buckets_base class.
+          //
+          : istream_buckets_base (bs), istream (&buf_)
+      {
+        exceptions (failbit | badbit);
+      }
+
+      void
+      rewind ()
+      {
+        buf_.rewind ();
+        clear ();       // Clears *bit flags (in particular eofbit).
+      }
+    };
+
     // request
     //
     request::
@@ -305,17 +467,27 @@ namespace web
 
       ap_set_content_type (rec_, nullptr); // Unset the output content type.
 
-      if (in_ != nullptr)
-        rewind_istream ();
-    }
+      // We don't need to rewind the input stream (which well may fail if
+      // unbuffered) if the form data is already read.
+      //
+      if (in_ != nullptr && form_data_ == nullptr)
+      {
+        assert (in_buf_ != nullptr);
 
-    void request::
-    rewind_istream ()
-    {
-      assert (in_buf_ != nullptr && in_ != nullptr);
+        in_buf_->rewind (); // Throws if impossible to rewind.
+        in_->clear ();      // Clears *bit flags (in particular eofbit).
+      }
 
-      in_buf_->rewind (); // Throws if impossible to rewind.
-      in_->clear ();      // Clears *bit flags (in particular eofbit).
+      // Rewind uploaded file streams.
+      //
+      if (uploads_ != nullptr)
+      {
+        for (const unique_ptr<istream_buckets>& is: *uploads_)
+        {
+          if (is != nullptr)
+            is->rewind ();
+        }
+      }
     }
 
     istream& request::
@@ -332,11 +504,6 @@ namespace web
         in_.reset (new istream (in_buf.get ()));
         in_buf_ = move (in_buf);
         in_->exceptions (istream::failbit | istream::badbit);
-
-        // Save form data now otherwise will not be available to do later when
-        // data is already read from stream.
-        //
-        form_data ();
       }
       else
       {
@@ -363,26 +530,309 @@ namespace web
     }
 
     const name_values& request::
-    parameters ()
+    parameters (size_t limit, bool url_only)
     {
-      if (parameters_ == nullptr)
+      if (parameters_ == nullptr || url_only < url_only_parameters_)
       {
-        parameters_.reset (new name_values ());
-
         try
         {
-          parse_parameters (rec_->args);
-          parse_parameters (form_data ().c_str ());
+          if (parameters_ == nullptr)
+          {
+            parameters_.reset (new name_values ());
+            parse_url_parameters (rec_->args);
+          }
+
+          if (!url_only && form_data (limit))
+          {
+            // After the form data is parsed we can clean it up for the
+            // application/x-www-form-urlencoded encoding but not for the
+            // multipart/form-data (see parse_multipart_parameters() for
+            // details).
+            //
+            if (form_multipart_)
+              parse_multipart_parameters (*form_data_);
+            else
+            {
+              // Make the character vector a NULL-terminated string.
+              //
+              form_data_->push_back ('\0');
+
+              parse_url_parameters (form_data_->data ());
+              *form_data_ = vector<char> (); // Reset the cache.
+            }
+          }
         }
-        catch (const invalid_argument& )
+        catch (const invalid_argument&)
         {
           throw invalid_request ();
         }
+
+        url_only_parameters_ = url_only;
       }
 
       return *parameters_;
     }
 
+    bool request::
+    form_data (size_t limit)
+    {
+      if (form_data_ == nullptr)
+      {
+        form_data_.reset (new vector<char> ());
+
+        // We will not consider POST body as a form data if the request is in
+        // the reading or later state.
+        //
+        if (rec_->method_number == M_POST && state_ < request_state::reading)
+        {
+          const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
+
+          if (ct != nullptr)
+          {
+            form_multipart_ = casecmp ("multipart/form-data", ct, 19) == 0;
+
+            if (form_multipart_ ||
+                casecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
+              *form_data_ = vector<char> (
+                istreambuf_iterator<char> (content (limit)),
+                istreambuf_iterator<char> ());
+          }
+        }
+      }
+
+      return !form_data_->empty ();
+    }
+
+    void request::
+    parse_url_parameters (const char* args)
+    {
+      assert (parameters_ != nullptr);
+
+      for (auto n (args); n != nullptr; )
+      {
+        const char* v (strchr (n, '='));
+        const char* e (strchr (n, '&'));
+
+        if (e != nullptr && e < v)
+          v = nullptr;
+
+        string name (v != nullptr
+                     ? mime_url_decode (n, v) :
+                     (e
+                      ? mime_url_decode (n, e)
+                      : mime_url_decode (n, n + strlen (n))));
+
+        optional<string> value;
+
+        if (v++)
+          value = e
+            ? mime_url_decode (v, e)
+            : mime_url_decode (v, v + strlen (v));
+
+        if (!name.empty () || value)
+          parameters_->emplace_back (move (name), move (value));
+
+        n = e ? e + 1 : nullptr;
+      }
+    }
+
+    void request::
+    parse_multipart_parameters (const vector<char>& body)
+    {
+      assert (parameters_ != nullptr && uploads_ == nullptr);
+
+      auto throw_bad_request = [] (apr_status_t s,
+                                   status_code sc = HTTP_BAD_REQUEST)
+      {
+        char buf[1024];
+        throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf)));
+      };
+
+      // Create the file upload stream list, filling it with NULLs for the
+      // parameters parsed from the URL query part.
+      //
+      uploads_.reset (
+        new vector<unique_ptr<istream_buckets>> (parameters_->size ()));
+
+      // All the required objects (parser, input/output buckets, etc.) will be
+      // allocated in the request memory pool and so will have the HTTP
+      // request duration lifetime.
+      //
+      apr_pool_t* pool (rec_->pool);
+
+      // Create the input bucket brigade containing a single bucket that
+      // references the form data.
+      //
+      apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool));
+      if (ba == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create");
+
+      apr_bucket_brigade* bb (apr_brigade_create (pool, ba));
+      if (bb == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_brigade_create");
+
+      apr_bucket* b (
+        apr_bucket_immortal_create (body.data (), body.size (), ba));
+
+      if (b == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create");
+
+      APR_BRIGADE_INSERT_TAIL (bb, b);
+
+      if ((b = apr_bucket_eos_create (ba)) == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create");
+
+      APR_BRIGADE_INSERT_TAIL (bb, b);
+
+      // Make sure that the parser will not swap the parsed data to disk
+      // passing the maximum possible value for the brigade limit. This way
+      // the resulting buckets will reference the form data directly, making
+      // no copies. This why we should not reset the form data cache after
+      // the parsing.
+      //
+      // Note that in future we may possibly setup the parser to read from the
+      // Apache internals directly and enable swapping the data to disk to
+      // minimize memory consumption.
+      //
+      apreq_parser_t* parser (
+        apreq_parser_make (pool,
+                           ba,
+                           apr_table_get (rec_->headers_in, "Content-Type"),
+                           apreq_parse_multipart,
+                           APR_SIZE_MAX /* brigade_limit */,
+                           nullptr /* temp_dir */,
+                           nullptr /* hook */,
+                           nullptr /* ctx */));
+
+      if (parser == nullptr)
+        throw_internal_error (APR_ENOMEM, "apreq_parser_make");
+
+      // Create the output table that will be filled with the parsed
+      // parameters.
+      //
+      apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS));
+      if (params == nullptr)
+        throw_internal_error (APR_ENOMEM, "apr_table_make");
+
+      // Parse the form data.
+      //
+      apr_status_t r (apreq_parser_run (parser, params, bb));
+      if (r != APR_SUCCESS)
+        throw_bad_request (r);
+
+      // Fill the parameter and file upload stream lists.
+      //
+      const apr_array_header_t* ps (apr_table_elts (params));
+      size_t n (ps->nelts);
+
+      for (auto p (reinterpret_cast<const apr_table_entry_t*> (ps->elts));
+           n--; ++p)
+      {
+        assert (p->key != nullptr && p->val != nullptr);
+
+        if (*p->key != '\0')
+        {
+          parameters_->emplace_back (p->key, optional<string> (p->val));
+
+          const apreq_param_t* ap (apreq_value_to_param (p->val));
+          assert (ap != nullptr); // Must always be resolvable.
+
+          uploads_->emplace_back (ap->upload != nullptr
+                                  ? new istream_buckets (ap->upload)
+                                  : nullptr);
+        }
+      }
+    }
+
+    request::uploads_type& request::
+    uploads () const
+    {
+      if (parameters_ == nullptr || url_only_parameters_)
+        sequence_error ("web::apache::request::uploads");
+
+      if (uploads_ == nullptr)
+        throw invalid_argument ("no uploads");
+
+      assert (uploads_->size () == parameters_->size ());
+      return *uploads_;
+    }
+
+    istream& request::
+    open_upload (size_t index)
+    {
+      uploads_type& us (uploads ());
+      size_t n (us.size ());
+
+      if (index >= n)
+        throw invalid_argument ("invalid index");
+
+      const unique_ptr<istream_buckets>& is (us[index]);
+
+      if (is == nullptr)
+        throw invalid_argument ("no upload");
+
+      return *is;
+    }
+
+    istream& request::
+    open_upload (const string& name)
+    {
+      uploads_type& us (uploads ());
+      size_t n (us.size ());
+
+      istream* r (nullptr);
+      for (size_t i (0); i < n; ++i)
+      {
+        if ((*parameters_)[i].name == name)
+        {
+          istream* is (us[i].get ());
+
+          if (is != nullptr)
+          {
+            if (r != nullptr)
+              throw invalid_argument ("multiple uploads for '" + name + "'");
+
+            r = is;
+          }
+        }
+      }
+
+      if (r == nullptr)
+        throw invalid_argument ("no upload");
+
+      return *r;
+    }
+
+    const name_values& request::
+    headers ()
+    {
+      if (headers_ == nullptr)
+      {
+        headers_.reset (new name_values ());
+
+        const apr_array_header_t* ha (apr_table_elts (rec_->headers_in));
+        size_t n (ha->nelts);
+
+        headers_->reserve (n + 1); // One for the custom :Client-IP header.
+
+        auto add = [this] (const char* n, const char* v)
+        {
+          assert (n != nullptr && v != nullptr);
+          headers_->emplace_back (n, optional<string> (v));
+        };
+
+        for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts));
+             n--; ++h)
+          add (h->key, h->val);
+
+        assert (rec_->connection != nullptr);
+
+        add (":Client-IP", rec_->connection->client_ip);
+      }
+
+      return *headers_;
+    }
+
     const name_values& request::
     cookies ()
     {
@@ -393,10 +843,12 @@ namespace web
         const apr_array_header_t* ha (apr_table_elts (rec_->headers_in));
         size_t n (ha->nelts);
 
-        for (auto h (reinterpret_cast<const apr_table_entry_t *> (ha->elts));
+        for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts));
              n--; ++h)
         {
-          if (strcasecmp (h->key, "Cookie") == 0)
+          assert (h->key != nullptr);
+
+          if (casecmp (h->key, "Cookie") == 0)
           {
             for (const char* n (h->val); n != nullptr; )
             {
@@ -447,8 +899,7 @@ namespace web
 
           // Same content type.
           //
-          strcasecmp (rec_->content_type ? rec_->content_type : "",
-                      type.c_str ()) == 0)
+          casecmp (type, rec_->content_type ? rec_->content_type : "") == 0)
       {
         // No change, return the existing stream.
         //
@@ -463,7 +914,10 @@ namespace web
         // written. Save form data now to make it available for future
         // parameters() call.
         //
-        form_data ();
+        // In the rare cases when the form data is expectedly bigger than 64K
+        // the client can always call parameters(limit) explicitly.
+        //
+        form_data (64 * 1024);
 
       unique_ptr<streambuf> out_buf (
         buffer
@@ -548,83 +1002,5 @@ namespace web
       state (request_state::headers);
       apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ());
     }
-
-    void request::
-    parse_parameters (const char* args)
-    {
-      for (auto n (args); n != nullptr; )
-      {
-        const char* v (strchr (n, '='));
-        const char* e (strchr (n, '&'));
-
-        if (e != nullptr && e < v)
-          v = nullptr;
-
-        string name (v != nullptr
-                     ? mime_url_decode (n, v) :
-                     (e
-                      ? mime_url_decode (n, e)
-                      : mime_url_decode (n, n + strlen (n))));
-
-        optional<string> value;
-
-        if (v++)
-          value = e
-            ? mime_url_decode (v, e)
-            : mime_url_decode (v, v + strlen (v));
-
-        if (!name.empty () || value)
-          parameters_->emplace_back (move (name), move (value));
-
-        n = e ? e + 1 : nullptr;
-      }
-    }
-
-    const string& request::
-    form_data ()
-    {
-      if (!form_data_)
-      {
-        form_data_.reset (new string ());
-
-        if (rec_->method_number == M_POST)
-        {
-          const char* ct (apr_table_get (rec_->headers_in, "Content-Type"));
-
-          if (ct != nullptr &&
-              strncasecmp ("application/x-www-form-urlencoded", ct, 33) == 0)
-          {
-            size_t limit  (0);
-            bool   rewind (true);
-
-            // Assign some reasonable (64K) input content read/cache limits if
-            // not done explicitly yet (with the request::content() call).
-            // Rewind afterwards unless the cache limit is set to zero.
-            //
-            if (in_buf_ == nullptr)
-              limit = 64 * 1024;
-            else
-              rewind = in_buf_->cache_limit () > 0;
-
-            istream& istr (content (limit, limit));
-
-            // Do not throw when eofbit is set (end of stream reached), and
-            // when failbit is set (getline() failed to extract any character).
-            //
-            istream::iostate e (istr.exceptions ()); // Save exception mask.
-            istr.exceptions (istream::badbit);
-            getline (istr, *form_data_);
-            istr.exceptions (e);                     // Restore exception mask.
-
-            // Rewind the stream unless no buffering were requested beforehand.
-            //
-            if (rewind)
-              rewind_istream ();
-          }
-        }
-      }
-
-      return *form_data_;
-    }
   }
 }
diff --git a/web/apache/request.hxx b/web/apache/request.hxx
index ba815dc..8c42f57 100644
--- a/web/apache/request.hxx
+++ b/web/apache/request.hxx
@@ -10,6 +10,7 @@
 #include <chrono>
 #include <memory>    // unique_ptr
 #include <string>
+#include <vector>
 #include <istream>
 #include <ostream>
 #include <streambuf>
@@ -55,6 +56,10 @@ namespace web
     //
     class istreambuf_cache;
 
+    // Stream type for reading from Apache's bucket brigades.
+    //
+    class istream_buckets;
+
     class request: public web::request,
                    public web::response,
                    public stream_state
@@ -93,12 +98,25 @@ namespace web
       // Get request body data stream.
       //
       virtual std::istream&
-      content (size_t limit = 0, size_t buffer = 0);
+      content (std::size_t limit = 0, std::size_t buffer = 0);
 
       // Get request parameters.
       //
       virtual const name_values&
-      parameters ();
+      parameters (std::size_t limit, bool url_only = false);
+
+      // Get upload stream.
+      //
+      virtual std::istream&
+      open_upload (std::size_t index);
+
+      virtual std::istream&
+      open_upload (const std::string& name);
+
+      // Get request headers.
+      //
+      virtual const name_values&
+      headers ();
 
       // Get request cookies.
       //
@@ -134,16 +152,35 @@ namespace web
               bool buffer = true);
 
     private:
-      // Get application/x-www-form-urlencoded form data. If request::content()
-      // was not called yet (and so limits are not specified) then set both of
-      // them to 64KB. Rewind the stream afterwards, so it's available for the
-      // application as well, unless no buffering were requested beforehand.
+      // On the first call cache the application/x-www-form-urlencoded or
+      // multipart/form-data form data for the subsequent parameters parsing
+      // and set the multipart flag accordingly. Don't cache if the request is
+      // in the reading or later state. Return true if the cache contains the
+      // form data.
+      //
+      // Note that the function doesn't change the content buffering (see
+      // content() function for details) nor rewind the content stream after
+      // reading.
+      //
+      bool
+      form_data (std::size_t limit);
+
+      // Used to also parse application/x-www-form-urlencoded POST body.
       //
-      const std::string&
-      form_data ();
+      void
+      parse_url_parameters (const char* args);
 
       void
-      parse_parameters (const char* args);
+      parse_multipart_parameters (const std::vector<char>& body);
+
+      // Return a list of the upload input streams. Throw sequence_error if
+      // the parameters() function was not called yet. Throw invalid_argument
+      // if the request doesn't contain multipart form data.
+      //
+      using uploads_type = std::vector<std::unique_ptr<istream_buckets>>;
+
+      uploads_type&
+      uploads () const;
 
       // Advance the request processing state. Noop if new state is equal to
       // the current one. Throw sequence_error if the new state is less then
@@ -161,20 +198,27 @@ namespace web
       virtual void
       set_write_state () {state (request_state::writing);}
 
-      // Rewind the input stream (that must exist). Throw sequence_error if
-      // some unbuffered content have already been read.
-      //
-      void
-      rewind_istream ();
-
     private:
       request_rec* rec_;
       request_state state_ = request_state::initial;
 
       path_type path_;
+
       std::unique_ptr<name_values> parameters_;
+      bool url_only_parameters_; // Meaningless if parameters_ is NULL;
+
+      // Uploaded file streams. If not NULL, is parallel to the parameters
+      // list.
+      //
+      std::unique_ptr<uploads_type> uploads_;
+
+      std::unique_ptr<name_values> headers_;
       std::unique_ptr<name_values> cookies_;
-      std::unique_ptr<std::string> form_data_;
+
+      // Form data cache. Is empty if the body doesn't contain the form data.
+      //
+      std::unique_ptr<std::vector<char>> form_data_;
+      bool form_multipart_; // Meaningless if form_data_ is NULL or empty;
 
       std::unique_ptr<istreambuf_cache> in_buf_;
       std::unique_ptr<std::istream> in_;
diff --git a/web/apache/service.cxx b/web/apache/service.cxx
index b72aa3f..bad98cc 100644
--- a/web/apache/service.cxx
+++ b/web/apache/service.cxx
@@ -69,8 +69,8 @@ namespace web
           };
       }
 
-      // Track if the module is allowed to handle a request in the specific
-      // configuration scope. The module exemplar will be created (and
+      // Track if the handler is allowed to handle a request in the specific
+      // configuration scope. The handler exemplar will be created (and
       // initialized) only for configuration contexts that have
       // 'SetHandler <mod_name>' in effect for the corresponding scope.
       //
diff --git a/web/apache/service.hxx b/web/apache/service.hxx
index fca0ea2..2fbcd0a 100644
--- a/web/apache/service.hxx
+++ b/web/apache/service.hxx
@@ -31,22 +31,22 @@ namespace web
     // configuration context to the request handler.
     //
     // This Apache service implementation first makes a copy of the provided
-    // (in the constructor below) module exemplar for each directory context.
+    // (in the constructor below) handler exemplar for each directory context.
     // It then initializes each of these "context exemplars" with the (merged)
     // set of configuration options. Finally, when handling a request, it
     // copies the corresponding "context exemplar" to create the "handling
     // instance". Note that the "context exemplars" are created as a copy of
     // the provided exemplar, which is never initialized. As a result, it is
-    // possible to detect if the module's copy constructor is used to create a
-    // "context exemplar" or a "handling instance".
+    // possible to detect if the handler's copy constructor is used to create
+    // a "context exemplar" or a "handling instance".
     //
     class service: ::module
     {
     public:
       // Note that the module exemplar is stored by-reference.
       //
-      template <typename M>
-      service (const std::string& name, M& exemplar)
+      template <typename H>
+      service (const std::string& name, H& exemplar)
           : ::module
             {
               STANDARD20_MODULE_STUFF,
@@ -55,7 +55,7 @@ namespace web
               nullptr,
               nullptr,
               nullptr,
-              &register_hooks<M>
+              &register_hooks<H>
 
 #ifdef AP_MODULE_HAS_FLAGS
               , AP_MODULE_FLAG_NONE
@@ -69,15 +69,15 @@ namespace web
         // Set configuration context management hooks.
         //
         // The overall process of building the configuration hierarchy for a
-        // module is as follows:
+        // handler is as follows:
         //
         // 1. Apache creates directory and server configuration contexts for
-        //    scopes containing module-defined directives by calling the
+        //    scopes containing handler-defined directives by calling the
         //    create_{server,dir}_context() callback functions. For directives
         //    at the server scope the special directory context is created as
         //    well.
         //
-        // 2. Apache calls parse_option() function for each module-defined
+        // 2. Apache calls parse_option() function for each handler-defined
         //    directive. The function parses the directives and places the
         //    resulting options into the corresponding configuration context.
         //    It also establishes the directory-server contexts relations.
@@ -89,7 +89,7 @@ namespace web
         // 4. Apache calls config_finalizer() which complements the directory
         //    contexts options with the ones from the enclosing servers.
         //
-        // 5. Apache calls worker_initializer() which creates module exemplar
+        // 5. Apache calls worker_initializer() which creates handler exemplar
         //    for each directory configuration context that have
         //    'SetHandler <mod_name>' directive in effect for it.
         //
@@ -100,14 +100,14 @@ namespace web
         //
         create_server_config = &create_server_context;
         create_dir_config = &create_dir_context;
-        merge_server_config = &merge_server_context<M>;
+        merge_server_config = &merge_server_context<H>;
 
-        // instance<M> () is invented to delegate processing from apache
+        // instance<H> () is invented to delegate processing from apache
         // request handler C function to the service non static member
         // function. This appoach resticts number of service objects per
-        // specific module implementation class with just one instance.
+        // specific handler implementation class with just one instance.
         //
-        service*& srv (instance<M> ());
+        service*& srv (instance<H> ());
         assert (srv == nullptr);
         srv = this;
       }
@@ -118,7 +118,7 @@ namespace web
       }
 
     private:
-      template <typename M>
+      template <typename H>
       static service*&
       instance () noexcept
       {
@@ -126,45 +126,45 @@ namespace web
         return instance;
       }
 
-      template <typename M>
+      template <typename H>
       static void
       register_hooks (apr_pool_t*) noexcept
       {
         // The config_finalizer() function is called at the end of Apache
         // server configuration parsing.
         //
-        ap_hook_post_config (&config_finalizer<M>, NULL, NULL, APR_HOOK_LAST);
+        ap_hook_post_config (&config_finalizer<H>, NULL, NULL, APR_HOOK_LAST);
 
         // The worker_initializer() function is called right after Apache
         // worker process is started. Called for every new process spawned.
         //
         ap_hook_child_init (
-          &worker_initializer<M>, NULL, NULL, APR_HOOK_LAST);
+          &worker_initializer<H>, NULL, NULL, APR_HOOK_LAST);
 
         // The request_handler () function is called for each client request.
         //
-        ap_hook_handler (&request_handler<M>, NULL, NULL, APR_HOOK_LAST);
+        ap_hook_handler (&request_handler<H>, NULL, NULL, APR_HOOK_LAST);
       }
 
-      template <typename M>
+      template <typename H>
       static int
       config_finalizer (apr_pool_t*, apr_pool_t*, apr_pool_t*, server_rec* s)
         noexcept
       {
-        instance<M> ()->finalize_config (s);
+        instance<H> ()->finalize_config (s);
         return OK;
       }
 
-      template <typename M>
+      template <typename H>
       static void
       worker_initializer (apr_pool_t*, server_rec* s) noexcept
       {
-        auto srv (instance<M> ());
+        auto srv (instance<H> ());
         log l (s, srv);
-        srv->template init_worker<M> (l);
+        srv->template init_worker<H> (l);
       }
 
-      template <typename M>
+      template <typename H>
       static int
       request_handler (request_rec* r) noexcept;
 
@@ -176,12 +176,12 @@ namespace web
       enum class request_handling
       {
         // Configuration scope has 'SetHandler <mod_name>' directive
-        // specified. The module is allowed to handle a request in the scope.
+        // specified. The handler is allowed to handle a request in the scope.
         //
         allowed,
 
         // Configuration scope has 'SetHandler <other_mod_name>|None'
-        // directive specified. The module is disallowed to handle a request
+        // directive specified. The handler is disallowed to handle a request
         // in the scope.
         //
         disallowed,
@@ -207,7 +207,7 @@ namespace web
       //
       // We will then use the pointers to these context objects as keys in
       // maps to (1) the corresponding application-level option lists during
-      // the configuration cycle and to (2) the corresponding module exemplar
+      // the configuration cycle and to (2) the corresponding handler exemplar
       // during the HTTP request handling phase. We will also use the same
       // type for both directory and server configuration contexts.
       //
@@ -267,12 +267,12 @@ namespace web
       static void*
       create_dir_context (apr_pool_t*, char* dir) noexcept;
 
-      template <typename M>
+      template <typename H>
       static void*
       merge_server_context (apr_pool_t*, void* enclosing, void* enclosed)
         noexcept
       {
-        instance<M> ()->complement (
+        instance<H> ()->complement (
           context_cast (enclosed), context_cast (enclosing));
 
         return enclosed;
@@ -298,17 +298,17 @@ namespace web
       void
       complement (context* enclosed, context* enclosing);
 
-      template <typename M>
+      template <typename H>
       void
       init_worker (log&);
 
-      template <typename M>
+      template <typename H>
       int
       handle (request&, const context*, log&) const;
 
     private:
       std::string name_;
-      module& exemplar_;
+      handler& exemplar_;
       option_descriptions option_descriptions_;
 
       // The context objects pointed to by the key can change during the
@@ -320,7 +320,7 @@ namespace web
       // The context objects pointed to by the key can not change during the
       // request handling phase.
       //
-      using exemplars = std::map<const context*, std::unique_ptr<module>>;
+      using exemplars = std::map<const context*, std::unique_ptr<handler>>;
       exemplars exemplars_;
 
       bool options_parsed_ = false;
diff --git a/web/apache/service.txx b/web/apache/service.txx
index 36c6826..6b1baad 100644
--- a/web/apache/service.txx
+++ b/web/apache/service.txx
@@ -15,20 +15,22 @@ namespace web
 {
   namespace apache
   {
-    template <typename M>
+    template <typename H>
     void service::
     init_worker (log& l)
     {
-      const std::string func_name (
+      using namespace std;
+
+      const string func_name (
         "web::apache::service<" + name_ + ">::init_worker");
 
       try
       {
-        const M* exemplar (dynamic_cast<const M*> (&exemplar_));
+        const H* exemplar (dynamic_cast<const H*> (&exemplar_));
         assert (exemplar != nullptr);
 
-        // For each directory configuration context, for which the module is
-        // allowed to handle a request, create the module exemplar as a deep
+        // For each directory configuration context, for which the handler is
+        // allowed to handle a request, create the handler exemplar as a deep
         // copy of the exemplar_ member, and initialize it with the
         // context-specific option list.
         //
@@ -42,7 +44,7 @@ namespace web
             auto r (
               exemplars_.emplace (
                 c,
-                std::unique_ptr<module> (new M (*exemplar))));
+                unique_ptr<handler> (new H (*exemplar))));
 
             r.first->second->init (o.second, l);
           }
@@ -52,7 +54,7 @@ namespace web
         //
         options_.clear ();
       }
-      catch (const std::exception& e)
+      catch (const exception& e)
       {
         l.write (nullptr, 0, func_name.c_str (), APLOG_EMERG, e.what ());
 
@@ -72,7 +74,7 @@ namespace web
         // create any new ones, it keeps trying to create the worker process
         // at one-second intervals.
         //
-        std::exit (APEXIT_CHILDSICK);
+        exit (APEXIT_CHILDSICK);
       }
       catch (...)
       {
@@ -84,15 +86,15 @@ namespace web
 
         // Terminate the worker apache process.
         //
-        std::exit (APEXIT_CHILDSICK);
+        exit (APEXIT_CHILDSICK);
       }
     }
 
-    template <typename M>
+    template <typename H>
     int service::
     request_handler (request_rec* r) noexcept
     {
-      auto srv (instance<M> ());
+      auto srv (instance<H> ());
       if (!r->handler || srv->name_ != r->handler) return DECLINED;
 
       assert (r->per_dir_config != nullptr);
@@ -106,14 +108,16 @@ namespace web
 
       request rq (r);
       log lg (r->server, srv);
-      return srv->template handle<M> (rq, cx, lg);
+      return srv->template handle<H> (rq, cx, lg);
     }
 
-    template <typename M>
+    template <typename H>
     int service::
     handle (request& rq, const context* cx, log& lg) const
     {
-      static const std::string func_name (
+      using namespace std;
+
+      static const string func_name (
         "web::apache::service<" + name_ + ">::handle");
 
       try
@@ -121,14 +125,14 @@ namespace web
         auto i (exemplars_.find (cx));
         assert (i != exemplars_.end ());
 
-        const M* e (dynamic_cast<const M*> (i->second.get ()));
+        const H* e (dynamic_cast<const H*> (i->second.get ()));
         assert (e != nullptr);
 
-        for (M m (*e);;)
+        for (H h (*e);;)
         {
           try
           {
-            if (static_cast<module&> (m).handle (rq, rq, lg))
+            if (static_cast<handler&> (h).handle (rq, rq, lg))
               return rq.flush ();
 
             if (rq.state () == request_state::initial)
@@ -138,7 +142,7 @@ namespace web
                       "handling declined being partially executed");
             break;
           }
-          catch (const module::retry&)
+          catch (const handler::retry&)
           {
             // Retry to handle the request.
             //
@@ -152,10 +156,10 @@ namespace web
         {
           try
           {
-            rq.content (e.status, e.type) << e.content;
+            rq.content (e.status, e.type) << e.content << endl;
             return rq.flush ();
           }
-          catch (const std::exception& e)
+          catch (const exception& e)
           {
             lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ());
           }
@@ -163,7 +167,7 @@ namespace web
 
         return e.status;
       }
-      catch (const std::exception& e)
+      catch (const exception& e)
       {
         lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ());
 
@@ -173,11 +177,11 @@ namespace web
           {
             rq.content (
               HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8")
-              << e;
+              << e << endl;
 
             return rq.flush ();
           }
-          catch (const std::exception& e)
+          catch (const exception& e)
           {
             lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ());
           }
@@ -193,11 +197,11 @@ namespace web
           {
             rq.content (
               HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8")
-              << "unknown error";
+              << "unknown error" << endl;
 
             return rq.flush ();
           }
-          catch (const std::exception& e)
+          catch (const exception& e)
           {
             lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ());
           }
diff --git a/web/buildfile b/web/buildfile
index 6eb5ee5..df5f812 100644
--- a/web/buildfile
+++ b/web/buildfile
@@ -8,6 +8,7 @@
 # While we don't need to link to APR, we need to find its header location.
 #
 import libs  = libapr1%lib{apr-1}
+import libs += libapreq2%lib{apreq2}
 import libs += libstudxml%lib{studxml}
 import libs += libbutl%lib{butl}
 
diff --git a/web/module.hxx b/web/module.hxx
index de534fb..dd98c29 100644
--- a/web/module.hxx
+++ b/web/module.hxx
@@ -62,7 +62,7 @@ namespace web
     sequence_error (std::string d): std::runtime_error (std::move (d)) {}
   };
 
-  // Map of module configuration option names to the boolean flag indicating
+  // Map of handler configuration option names to the boolean flag indicating
   // whether the value is expected for the option.
   //
   using option_descriptions = std::map<std::string, bool>;
@@ -90,9 +90,9 @@ namespace web
     virtual
     ~request () = default;
 
-    // Corresponds to abs_path portion of HTTP URL as described in
-    // "3.2.2 HTTP URL" of http://tools.ietf.org/html/rfc2616.
-    // Returns '/' if no abs_path is present in URL.
+    // Corresponds to abs_path portion of HTTP URL as described in "3.2.2 HTTP
+    // URL" of http://tools.ietf.org/html/rfc2616. Returns '/' if no abs_path
+    // is present in URL.
     //
     virtual const path_type&
     path () = 0;
@@ -102,10 +102,43 @@ namespace web
     //   in name_values.
     //@@ Maybe parameter_list() and parameter_map()?
     //
-    // Throw invalid_request if decoding of any name or value fails.
+    // Parse parameters from the URL query part and from the HTTP POST request
+    // body for the application/x-www-form-urlencoded or multipart/form-data
+    // content type. Optionally limit the amount of data read from the body
+    // (see the content() function for the semantics). Throw invalid_request
+    // if parameters decoding fails.
     //
     virtual const name_values&
-    parameters () = 0;
+    parameters (std::size_t limit, bool url_only = false) = 0;
+
+    // Open the input stream for the upload corresponding to the specified
+    // parameter index. Must be called after the parameters() function is
+    // called, throw sequence_error if that's not the case. Throw
+    // invalid_argument if the index doesn't have an upload (for example,
+    // because the parameter is not <input type="file"/> form field).
+    //
+    // Note also that reopening the same upload (within the same retry)
+    // returns the same stream reference.
+    //
+    virtual std::istream&
+    open_upload (std::size_t index) = 0;
+
+    // As above but specify the parameter by name. Throw invalid_argument if
+    // there are multiple uploads for this parameter name.
+    //
+    virtual std::istream&
+    open_upload (const std::string& name) = 0;
+
+    // Request headers.
+    //
+    // The implementation may add custom pseudo-headers reflecting additional
+    // request options. Such headers should start with ':'. If possible, the
+    // implementation should add the following well-known pseudo-headers:
+    //
+    // :Client-IP - IP address of the connecting client.
+    //
+    virtual const name_values&
+    headers () = 0;
 
     // Throw invalid_request if cookies are malformed.
     //
@@ -126,7 +159,7 @@ namespace web
     // sequence_error exception being thrown.
     //
     virtual std::istream&
-    content (size_t limit = 0, size_t buffer = 0) = 0;
+    content (std::size_t limit, std::size_t buffer = 0) = 0;
   };
 
   class response
@@ -145,7 +178,7 @@ namespace web
     // and the status code is changed, then the old content is
     // discarded. If the content was not buffered and the status
     // is changed, then the sequence_error exception is thrown.
-    // If this exception leaves module::handle(), then the
+    // If this exception leaves handler::handle(), then the
     // implementation shall terminate the response in a suitable
     // but unspecified manner. In particular, there is no guarantee
     // that the user will be notified of an error or observe the
@@ -176,11 +209,11 @@ namespace web
             bool buffer = true) = 0;
   };
 
-  // A web server logging backend. The module can use it to log
+  // A web server logging backend. The handler can use it to log
   // diagnostics that is meant for the web server operator rather
   // than the user.
   //
-  // The module can cast this basic interface to the web server's
+  // The handler can cast this basic interface to the web server's
   // specific implementation that may provide a richer interface.
   //
   class log
@@ -193,39 +226,39 @@ namespace web
     write (const char* msg) = 0;
   };
 
-  // The web server creates a new module instance for each request
-  // by copy-initializing it with the module exemplar. This way we
-  // achieve two things: we can freely use module data members
+  // The web server creates a new handler instance for each request
+  // by copy-initializing it with the handler exemplar. This way we
+  // achieve two things: we can freely use handler data members
   // without worrying about multi-threading issues and we
   // automatically get started with the initial state for each
   // request. If you really need to share some rw-data between
-  // all the modules, use static data members with appropriate
+  // all the handlers, use static data members with appropriate
   // locking. See the <service> header in one of the web server
   // directories (e.g., apache/) if you need to see the code that
   // does this.
   //
-  class module
+  class handler
   {
   public:
     virtual
-    ~module () = default;
+    ~handler () = default;
 
-    // Description of configuration options supported by this module. Note:
+    // Description of configuration options supported by this handler. Note:
     // should be callable during static initialization.
     //
     virtual option_descriptions
     options () = 0;
 
-    // During startup the web server calls this function on the module
-    // exemplar to log the module version information. It is up to the web
-    // server whether to call this function once per module implementation
+    // During startup the web server calls this function on the handler
+    // exemplar to log the handler version information. It is up to the web
+    // server whether to call this function once per handler implementation
     // type. Therefore, it is expected that this function will log the same
-    // information for all the module exemplars.
+    // information for all the handler exemplars.
     //
     virtual void
     version (log&) = 0;
 
-    // During startup the web server calls this function on the module
+    // During startup the web server calls this function on the handler
     // exemplar passing a list of configuration options. The place these
     // configuration options come from is implementation-specific (normally
     // a configuration file). The web server guarantees that only options
@@ -242,7 +275,7 @@ namespace web
     // unspecified manner.
     //
     // Throw retry if need to retry handling the request. The retry will
-    // happen on the same instance of the module and the implementation is
+    // happen on the same instance of the handler and the implementation is
     // expected to "rewind" the request and response objects to their initial
     // state. This is only guaranteed to be possible if the relevant functions
     // in the request and response objects were called in buffered mode (the
author	Karen Arutyunov <karen@codesynthesis.com>	2018-07-07 19:09:53 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2018-07-10 22:03:18 +0300
commit	21033565488f6c63b4c40962cccfdc8b6ca32b2a (patch)
tree	44732ab7e1c7a7b25e64b82bf61d293f6cff2f86 /web
parent	026377d0c145277b24b3af5fdcf707222e854bd3 (diff)