diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2020-03-18 22:17:49 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2020-03-27 17:28:44 +0300 |
commit | 35359f038f571dc46de3d14af72a2bc911fb0a24 (patch) | |
tree | de3e89d678e78b9efc4d395274fd7ccc68f4a213 /web/server | |
parent | 8ad672cc7211952716ffe1fbf76c179b4f1149e3 (diff) |
Implement brep-monitor
Diffstat (limited to 'web/server')
-rw-r--r-- | web/server/apache/log.hxx | 80 | ||||
-rw-r--r-- | web/server/apache/request.cxx | 1005 | ||||
-rw-r--r-- | web/server/apache/request.hxx | 233 | ||||
-rw-r--r-- | web/server/apache/request.ixx | 45 | ||||
-rw-r--r-- | web/server/apache/service.cxx | 268 | ||||
-rw-r--r-- | web/server/apache/service.hxx | 333 | ||||
-rw-r--r-- | web/server/apache/service.txx | 213 | ||||
-rw-r--r-- | web/server/apache/stream.hxx | 148 | ||||
-rw-r--r-- | web/server/buildfile | 15 | ||||
-rw-r--r-- | web/server/mime-url-encoding.cxx | 66 | ||||
-rw-r--r-- | web/server/mime-url-encoding.hxx | 32 | ||||
-rw-r--r-- | web/server/module.hxx | 299 |
12 files changed, 2737 insertions, 0 deletions
diff --git a/web/server/apache/log.hxx b/web/server/apache/log.hxx new file mode 100644 index 0000000..f7738ef --- /dev/null +++ b/web/server/apache/log.hxx @@ -0,0 +1,80 @@ +// file : web/server/apache/log.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_LOG_HXX +#define WEB_SERVER_APACHE_LOG_HXX + +#include <httpd.h> // request_rec, server_rec +#include <http_log.h> +#include <http_config.h> // module + +#include <cstdint> // uint64_t +#include <algorithm> // min() + +#include <web/server/module.hxx> + +namespace web +{ + namespace apache + { + class log: public web::log + { + public: + + log (server_rec* s, const ::module* m) noexcept + : server_ (s), module_ (m) {} + + virtual void + write (const char* msg) {write (APLOG_ERR, msg);} + + // Apache-specific interface. + // + void + write (int level, const char* msg) const noexcept + { + write (nullptr, 0, nullptr, level, msg); + } + + void + write (const char* file, + std::uint64_t line, + const char* func, + int level, + const char* msg) const noexcept + { + if (file && *file) + file = nullptr; // Skip file/line placeholder from log line. + + level = std::min (level, APLOG_TRACE8); + + if (func) + ap_log_error (file, + line, + module_->module_index, + level, + 0, + server_, + "[%s]: %s", + func, + msg); + else + // Skip function name placeholder from log line. + // + ap_log_error (file, + line, + module_->module_index, + level, + 0, + server_, + ": %s", + msg); + } + + private: + server_rec* server_; + const ::module* module_; // Apache module. + }; + } +} + +#endif // WEB_SERVER_APACHE_LOG_HXX diff --git a/web/server/apache/request.cxx b/web/server/apache/request.cxx new file mode 100644 index 0000000..a413081 --- /dev/null +++ b/web/server/apache/request.cxx @@ -0,0 +1,1005 @@ +// file : web/server/apache/request.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/apache/request.hxx> + +#include <apr.h> // APR_SIZE_MAX +#include <apr_errno.h> // apr_status_t, APR_SUCCESS, APR_E*, apr_strerror() +#include <apr_tables.h> // apr_table_*, apr_table_*(), apr_array_header_t +#include <apr_strings.h> // apr_pstrdup() +#include <apr_buckets.h> // apr_bucket*, apr_bucket_*(), apr_brigade_*(), + // APR_BRIGADE_*() + +#include <httpd.h> // request_rec, HTTP_*, OK +#include <http_protocol.h> // ap_*() + +#include <apreq2/apreq.h> // APREQ_* +#include <apreq2/apreq_util.h> // apreq_brigade_copy() +#include <apreq2/apreq_param.h> // apreq_param_t, apreq_value_to_param() +#include <apreq2/apreq_parser.h> // apreq_parser_t, apreq_parser_make() + +#include <ctime> // strftime(), time_t +#include <vector> +#include <chrono> +#include <memory> // unique_ptr +#include <string> +#include <cassert> +#include <ostream> +#include <istream> +#include <cstring> // str*(), memcpy(), size_t +#include <utility> // move() +#include <iterator> // istreambuf_iterator +#include <stdexcept> // invalid_argument, runtime_error +#include <exception> // current_exception() +#include <streambuf> +#include <algorithm> // min() + +#include <libbutl/utility.mxx> // icasecmp() +#include <libbutl/optional.mxx> +#include <libbutl/timestamp.mxx> + +#include <web/server/mime-url-encoding.hxx> + +using namespace std; +using namespace butl; + +namespace web +{ + namespace apache + { + [[noreturn]] static void + throw_internal_error (apr_status_t s, const string& what) + { + char buf[1024]; + throw runtime_error (what + ": " + apr_strerror (s, buf, sizeof (buf))); + } + + // Extend the Apache stream with checking for the read limit and caching + // the content if requested. Replay the cached content after rewind. + // + class istreambuf_cache: public istreambuf + { + enum class mode + { + cache, // Read from Apache stream, save the read data into the cache. + replay, // Read from the cache. + proxy // Read from Apache stream (don't save into the cache). + }; + + public: + istreambuf_cache (size_t read_limit, size_t cache_limit, + request_rec* r, + stream_state& s, + size_t bufsize = 1024, + size_t putback = 1) + : istreambuf (r, s, bufsize, putback), + read_limit_ (read_limit), + cache_limit_ (cache_limit) + { + } + + void + rewind () + { + // Fail if some content is already missed in the cache. + // + if (mode_ == mode::proxy) + throw sequence_error ( + string ("web::apache::istreambuf_cache::rewind: ") + + (cache_limit_ > 0 + ? "half-buffered" + : "unbuffered")); + + mode_ = mode::replay; + replay_pos_ = 0; + setg (nullptr, nullptr, nullptr); + } + + void + limits (size_t read_limit, size_t cache_limit) + { + if (read_limit > 0) + read_limit_ = read_limit; + + if (cache_limit > 0) + { + // We can not increase the cache limit if some content is already + // missed in the cache. + // + if (cache_limit > cache_limit_ && mode_ == mode::proxy) + throw sequence_error ( + "web::apache::istreambuf_cache::limits: unbuffered"); + + cache_limit_ = cache_limit; + } + } + + size_t read_limit () const noexcept {return read_limit_;} + size_t cache_limit () const noexcept {return cache_limit_;} + + private: + virtual int_type + underflow (); + + private: + // Limits + // + size_t read_limit_; + size_t cache_limit_; + + // State + // + mode mode_ = mode::cache; + size_t read_bytes_ = 0; + bool eof_ = false; // End of Apache stream is reached. + + // Cache + // + struct chunk + { + vector<char> data; + size_t offset; + + chunk (vector<char>&& d, size_t o): data (move (d)), offset (o) {} + + // Make the type move constructible-only to avoid copying of chunks on + // vector growth. + // + chunk (chunk&&) = default; + }; + + vector<chunk> cache_; + size_t cache_size_ = 0; + size_t replay_pos_ = 0; + }; + + istreambuf_cache::int_type istreambuf_cache:: + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + if (mode_ == mode::replay) + { + if (replay_pos_ < cache_.size ()) + { + chunk& ch (cache_[replay_pos_++]); + char* p (ch.data.data ()); + setg (p, p + ch.offset, p + ch.data.size ()); + return traits_type::to_int_type (*gptr ()); + } + + // No more data to replay, so switch to the cache mode. That includes + // resetting eback, gptr and egptr, so they point into the istreambuf's + // internal buffer. Putback area should also be restored. + // + mode_ = mode::cache; + + // Bail out if the end of stream is reached. + // + if (eof_) + return traits_type::eof (); + + char* p (buf_.data () + putback_); + size_t pb (0); + + // Restore putback area if there is any cached data. Thanks to + // istreambuf, it's all in a single chunk. + // + if (!cache_.empty ()) + { + chunk& ch (cache_.back ()); + pb = min (putback_, ch.data.size ()); + memcpy (p - pb, ch.data.data () + ch.data.size () - pb, pb); + } + + setg (p - pb, p, p); + } + + // Delegate reading to the base class in the cache or proxy modes, but + // check for the read limit first. + // + if (read_limit_ && read_bytes_ >= read_limit_) + throw invalid_request (HTTP_REQUEST_ENTITY_TOO_LARGE, + "payload too large"); + + // Throws the sequence_error exception if some unbuffered content is + // already written. + // + int_type r (istreambuf::underflow ()); + + if (r == traits_type::eof ()) + { + eof_ = true; + return r; + } + + // Increment the read bytes counter. + // + size_t rb (egptr () - gptr ()); + read_bytes_ += rb; + + // In the cache mode save the read data if the cache limit is not + // reached, otherwise switch to the proxy mode. + // + if (mode_ == mode::cache) + { + // Not to complicate things we will copy the buffer into the cache + // together with the putback area, which is OK as it usually takes a + // small fraction of the buffer. By the same reason we will cache the + // whole data read even though we can exceed the limits by + // bufsize - putback - 1 bytes. + // + if (cache_size_ < cache_limit_) + { + chunk ch (vector<char> (eback (), egptr ()), + static_cast<size_t> (gptr () - eback ())); + + cache_.emplace_back (move (ch)); + cache_size_ += rb; + } + else + mode_ = mode::proxy; + } + + return r; + } + + // Stream interface for reading from the Apache's bucket brigade. Put back + // is not supported. + // + // Note that reading from a brigade bucket modifies the brigade in the + // general case. For example, reading from a file bucket adds a new heap + // bucket before the file bucket on every read. Traversing/reading through + // such a bucket brigade effectively loads the whole file into the memory, + // so the subsequent brigade traversal results in iterating over the + // loaded heap buckets. + // + // To avoid such a behavior we will make a shallow copy of the original + // bucket brigade, initially and for each rewind. Then, instead of + // iterating, we will always read from the first bucket removing it after + // the use. + // + class istreambuf_buckets: public streambuf + { + public: + // The bucket brigade must exist during the object's lifetime. + // + explicit + istreambuf_buckets (const apr_bucket_brigade* bs) + : orig_buckets_ (bs), + buckets_ (apr_brigade_create (bs->p, bs->bucket_alloc)) + + { + if (buckets_ == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + rewind (); // Copy the original buckets. + } + + void + rewind () + { + // Note that apreq_brigade_copy() appends buckets to the destination, + // so we clean it up first. + // + apr_status_t r (apr_brigade_cleanup (buckets_.get ())); + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_brigade_cleanup"); + + r = apreq_brigade_copy ( + buckets_.get (), + const_cast<apr_bucket_brigade*> (orig_buckets_)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apreq_brigade_copy"); + + setg (nullptr, nullptr, nullptr); + } + + private: + virtual int_type + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + // If the get-pointer is not NULL then it points to the data referred + // by the first brigade bucket. As we will bail out or rewrite such a + // pointer now there is no need for the bucket either, so we can + // safely delete it. + // + if (gptr () != nullptr) + { + assert (!APR_BRIGADE_EMPTY (buckets_)); + + // Note that apr_bucket_delete() is a macro and the following + // call ends up badly (with SIGSEGV). + // + // apr_bucket_delete (APR_BRIGADE_FIRST (buckets_)); + // + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_bucket_delete (b); + } + + if (APR_BRIGADE_EMPTY (buckets_)) + return traits_type::eof (); + + apr_size_t n; + const char* d; + apr_bucket* b (APR_BRIGADE_FIRST (buckets_)); + apr_status_t r (apr_bucket_read (b, &d, &n, APR_BLOCK_READ)); + + if (r != APR_SUCCESS) + throw_internal_error (r, "apr_bucket_read"); + + char* p (const_cast<char*> (d)); + setg (p, p, p + n); + return traits_type::to_int_type (*gptr ()); + } + + private: + const apr_bucket_brigade* orig_buckets_; + + struct brigade_deleter + { + void operator() (apr_bucket_brigade* p) const + { + if (p != nullptr) + { + apr_status_t r (apr_brigade_destroy (p)); + + // Shouldn't fail unless something is severely damaged. + // + assert (r == APR_SUCCESS); + } + } + }; + + unique_ptr<apr_bucket_brigade, brigade_deleter> buckets_; + }; + + class istream_buckets_base + { + public: + explicit + istream_buckets_base (const apr_bucket_brigade* bs): buf_ (bs) {} + + protected: + istreambuf_buckets buf_; + }; + + class istream_buckets: public istream_buckets_base, public istream + { + public: + explicit + istream_buckets (const apr_bucket_brigade* bs) + // Note that calling dtor for istream object before init() is called + // is undefined behavior. That's the reason for inventing the + // istream_buckets_base class. + // + : istream_buckets_base (bs), istream (&buf_) + { + exceptions (failbit | badbit); + } + + void + rewind () + { + buf_.rewind (); + clear (); // Clears *bit flags (in particular eofbit). + } + }; + + // request + // + request:: + request (request_rec* rec) noexcept + : rec_ (rec) + { + rec_->status = HTTP_OK; + } + + request:: + ~request () + { + } + + void request:: + state (request_state s) + { + assert (s != request_state::initial); + + if (s == state_) + return; // Noop. + + if (s < state_) + { + // Can't "unwind" irrevocable interaction with Apache API. + // + static const char* names[] = { + "initial", "reading", "headers", "writing"}; + + string str ("web::apache::request::set_state: "); + str += names[static_cast<size_t> (state_)]; + str += " to "; + str += names[static_cast<size_t> (s)]; + + throw sequence_error (move (str)); + } + + if (s == request_state::reading) + { + // Prepare request content for reading. + // + int r (ap_setup_client_block (rec_, REQUEST_CHUNKED_DECHUNK)); + + if (r != OK) + throw invalid_request (r); + } + else if (s > request_state::reading && state_ <= request_state::reading) + { + // Read request content if any, discard whatever is received. + // + int r (ap_discard_request_body (rec_)); + + if (r != OK) + throw invalid_request (r); + } + + state_ = s; + } + + void request:: + rewind () + { + // @@ Response cookies buffering is not supported yet. When done will be + // possible to rewind in broader range of cases. + // + if (state_ > request_state::reading) + throw sequence_error ("web::apache::request::rewind: unbuffered"); + + out_.reset (); + out_buf_.reset (); + + rec_->status = HTTP_OK; + + ap_set_content_type (rec_, nullptr); // Unset the output content type. + + // We don't need to rewind the input stream (which well may fail if + // unbuffered) if the form data is already read. + // + if (in_ != nullptr && form_data_ == nullptr) + { + assert (in_buf_ != nullptr); + + in_buf_->rewind (); // Throws if impossible to rewind. + in_->clear (); // Clears *bit flags (in particular eofbit). + } + + // Rewind uploaded file streams. + // + if (uploads_ != nullptr) + { + for (const unique_ptr<istream_buckets>& is: *uploads_) + { + if (is != nullptr) + is->rewind (); + } + } + } + + istream& request:: + content (size_t limit, size_t buffer) + { + // Create the input stream/streambuf if not present, otherwise adjust the + // limits. + // + if (in_ == nullptr) + { + unique_ptr<istreambuf_cache> in_buf ( + new istreambuf_cache (limit, buffer, rec_, *this)); + + in_.reset (new istream (in_buf.get ())); + in_buf_ = move (in_buf); + in_->exceptions (istream::failbit | istream::badbit); + } + else + { + assert (in_buf_ != nullptr); + in_buf_->limits (limit, buffer); + } + + return *in_; + } + + const path& request:: + path () + { + if (path_.empty ()) + { + path_ = path_type (rec_->uri); // Is already URL-decoded. + + // Module request handler can not be called if URI is empty. + // + assert (!path_.empty ()); + } + + return path_; + } + + const name_values& request:: + parameters (size_t limit, bool url_only) + { + if (parameters_ == nullptr || url_only < url_only_parameters_) + { + try + { + if (parameters_ == nullptr) + { + parameters_.reset (new name_values ()); + parse_url_parameters (rec_->args); + } + + if (!url_only && form_data (limit)) + { + // After the form data is parsed we can clean it up for the + // application/x-www-form-urlencoded encoding but not for the + // multipart/form-data (see parse_multipart_parameters() for + // details). + // + if (form_multipart_) + parse_multipart_parameters (*form_data_); + else + { + // Make the character vector a NULL-terminated string. + // + form_data_->push_back ('\0'); + + parse_url_parameters (form_data_->data ()); + *form_data_ = vector<char> (); // Reset the cache. + } + } + } + catch (const invalid_argument&) + { + throw invalid_request (); + } + + url_only_parameters_ = url_only; + } + + return *parameters_; + } + + bool request:: + form_data (size_t limit) + { + if (form_data_ == nullptr) + { + form_data_.reset (new vector<char> ()); + + // We will not consider POST body as a form data if the request is in + // the reading or later state. + // + if (rec_->method_number == M_POST && state_ < request_state::reading) + { + const char* ct (apr_table_get (rec_->headers_in, "Content-Type")); + + if (ct != nullptr) + { + form_multipart_ = icasecmp ("multipart/form-data", ct, 19) == 0; + + if (form_multipart_ || + icasecmp ("application/x-www-form-urlencoded", ct, 33) == 0) + *form_data_ = vector<char> ( + istreambuf_iterator<char> (content (limit)), + istreambuf_iterator<char> ()); + } + } + } + + return !form_data_->empty (); + } + + void request:: + parse_url_parameters (const char* args) + { + assert (parameters_ != nullptr); + + for (auto n (args); n != nullptr; ) + { + const char* v (strchr (n, '=')); + const char* e (strchr (n, '&')); + + if (e != nullptr && e < v) + v = nullptr; + + string name (v != nullptr + ? mime_url_decode (n, v) : + (e + ? mime_url_decode (n, e) + : mime_url_decode (n, n + strlen (n)))); + + optional<string> value; + + if (v++) + value = e + ? mime_url_decode (v, e) + : mime_url_decode (v, v + strlen (v)); + + if (!name.empty () || value) + parameters_->emplace_back (move (name), move (value)); + + n = e ? e + 1 : nullptr; + } + } + + void request:: + parse_multipart_parameters (const vector<char>& body) + { + assert (parameters_ != nullptr && uploads_ == nullptr); + + auto throw_bad_request = [] (apr_status_t s, + status_code sc = HTTP_BAD_REQUEST) + { + char buf[1024]; + throw invalid_request (sc, apr_strerror (s, buf, sizeof (buf))); + }; + + // Create the file upload stream list, filling it with NULLs for the + // parameters parsed from the URL query part. + // + uploads_.reset ( + new vector<unique_ptr<istream_buckets>> (parameters_->size ())); + + // All the required objects (parser, input/output buckets, etc.) will be + // allocated in the request memory pool and so will have the HTTP + // request duration lifetime. + // + apr_pool_t* pool (rec_->pool); + + // Create the input bucket brigade containing a single bucket that + // references the form data. + // + apr_bucket_alloc_t* ba (apr_bucket_alloc_create (pool)); + if (ba == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_alloc_create"); + + apr_bucket_brigade* bb (apr_brigade_create (pool, ba)); + if (bb == nullptr) + throw_internal_error (APR_ENOMEM, "apr_brigade_create"); + + apr_bucket* b ( + apr_bucket_immortal_create (body.data (), body.size (), ba)); + + if (b == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_immortal_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + if ((b = apr_bucket_eos_create (ba)) == nullptr) + throw_internal_error (APR_ENOMEM, "apr_bucket_eos_create"); + + APR_BRIGADE_INSERT_TAIL (bb, b); + + // Make sure that the parser will not swap the parsed data to disk + // passing the maximum possible value for the brigade limit. This way + // the resulting buckets will reference the form data directly, making + // no copies. This why we should not reset the form data cache after + // the parsing. + // + // Note that in future we may possibly setup the parser to read from the + // Apache internals directly and enable swapping the data to disk to + // minimize memory consumption. + // + apreq_parser_t* parser ( + apreq_parser_make (pool, + ba, + apr_table_get (rec_->headers_in, "Content-Type"), + apreq_parse_multipart, + APR_SIZE_MAX /* brigade_limit */, + nullptr /* temp_dir */, + nullptr /* hook */, + nullptr /* ctx */)); + + if (parser == nullptr) + throw_internal_error (APR_ENOMEM, "apreq_parser_make"); + + // Create the output table that will be filled with the parsed + // parameters. + // + apr_table_t* params (apr_table_make (pool, APREQ_DEFAULT_NELTS)); + if (params == nullptr) + throw_internal_error (APR_ENOMEM, "apr_table_make"); + + // Parse the form data. + // + apr_status_t r (apreq_parser_run (parser, params, bb)); + if (r != APR_SUCCESS) + throw_bad_request (r); + + // Fill the parameter and file upload stream lists. + // + const apr_array_header_t* ps (apr_table_elts (params)); + size_t n (ps->nelts); + + for (auto p (reinterpret_cast<const apr_table_entry_t*> (ps->elts)); + n--; ++p) + { + assert (p->key != nullptr && p->val != nullptr); + + if (*p->key != '\0') + { + parameters_->emplace_back (p->key, optional<string> (p->val)); + + const apreq_param_t* ap (apreq_value_to_param (p->val)); + assert (ap != nullptr); // Must always be resolvable. + + uploads_->emplace_back (ap->upload != nullptr + ? new istream_buckets (ap->upload) + : nullptr); + } + } + } + + request::uploads_type& request:: + uploads () const + { + if (parameters_ == nullptr || url_only_parameters_) + sequence_error ("web::apache::request::uploads"); + + if (uploads_ == nullptr) + throw invalid_argument ("no uploads"); + + assert (uploads_->size () == parameters_->size ()); + return *uploads_; + } + + istream& request:: + open_upload (size_t index) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + if (index >= n) + throw invalid_argument ("invalid index"); + + const unique_ptr<istream_buckets>& is (us[index]); + + if (is == nullptr) + throw invalid_argument ("no upload"); + + return *is; + } + + istream& request:: + open_upload (const string& name) + { + uploads_type& us (uploads ()); + size_t n (us.size ()); + + istream* r (nullptr); + for (size_t i (0); i < n; ++i) + { + if ((*parameters_)[i].name == name) + { + istream* is (us[i].get ()); + + if (is != nullptr) + { + if (r != nullptr) + throw invalid_argument ("multiple uploads for '" + name + "'"); + + r = is; + } + } + } + + if (r == nullptr) + throw invalid_argument ("no upload"); + + return *r; + } + + const name_values& request:: + headers () + { + if (headers_ == nullptr) + { + headers_.reset (new name_values ()); + + const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); + size_t n (ha->nelts); + + headers_->reserve (n + 1); // One for the custom :Client-IP header. + + auto add = [this] (const char* n, const char* v) + { + assert (n != nullptr && v != nullptr); + headers_->emplace_back (n, optional<string> (v)); + }; + + for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); + n--; ++h) + add (h->key, h->val); + + assert (rec_->connection != nullptr); + + add (":Client-IP", rec_->connection->client_ip); + } + + return *headers_; + } + + const name_values& request:: + cookies () + { + if (cookies_ == nullptr) + { + cookies_.reset (new name_values ()); + + const apr_array_header_t* ha (apr_table_elts (rec_->headers_in)); + size_t n (ha->nelts); + + for (auto h (reinterpret_cast<const apr_table_entry_t*> (ha->elts)); + n--; ++h) + { + assert (h->key != nullptr); + + if (icasecmp (h->key, "Cookie") == 0) + { + for (const char* n (h->val); n != nullptr; ) + { + const char* v (strchr (n, '=')); + const char* e (strchr (n, ';')); + + if (e != nullptr && e < v) + v = nullptr; + + string name (v != nullptr + ? mime_url_decode (n, v, true) + : (e + ? mime_url_decode (n, e, true) + : mime_url_decode (n, n + strlen (n), true))); + + optional<string> value; + + if (v++) + value = e + ? mime_url_decode (v, e, true) + : mime_url_decode (v, v + strlen (v), true); + + if (!name.empty () || value) + cookies_->emplace_back (move (name), move (value)); + + n = e ? e + 1 : nullptr; + } + } + } + } + + return *cookies_; + } + + ostream& request:: + content (status_code status, const string& type, bool buffer) + { + if (out_ && + + // Same status code. + // + status == rec_->status && + + // Same buffering flag. + // + buffer == + (dynamic_cast<stringbuf*> (out_buf_.get ()) != nullptr) && + + // Same content type. + // + icasecmp (type, rec_->content_type ? rec_->content_type : "") == 0) + { + // No change, return the existing stream. + // + return *out_; + } + + if (state_ >= request_state::writing) + throw sequence_error ("web::apache::request::content"); + + if (!buffer) + // Request body will be discarded prior first byte of content is + // written. Save form data now to make it available for future + // parameters() call. + // + // In the rare cases when the form data is expectedly bigger than 64K + // the client can always call parameters(limit) explicitly. + // + form_data (64 * 1024); + + unique_ptr<streambuf> out_buf ( + buffer + ? static_cast<streambuf*> (new stringbuf ()) + : static_cast<streambuf*> (new ostreambuf (rec_, *this))); + + out_.reset (new ostream (out_buf.get ())); + out_buf_ = move (out_buf); + out_->exceptions (ostream::eofbit | ostream::failbit | ostream::badbit); + + rec_->status = status; + + ap_set_content_type ( + rec_, + type.empty () ? nullptr : apr_pstrdup (rec_->pool, type.c_str ())); + + return *out_; + } + + void request:: + status (status_code status) + { + if (status != rec_->status) + { + // Setting status code in exception handler is a common usecase + // where no sense to throw but still need to signal apache a + // proper status code. + // + if (state_ >= request_state::writing && !current_exception ()) + throw sequence_error ("web::apache::request::status"); + + rec_->status = status; + out_.reset (); + out_buf_.reset (); + ap_set_content_type (rec_, nullptr); + } + } + + void request:: + cookie (const char* name, + const char* value, + const chrono::seconds* max_age, + const char* path, + const char* domain, + bool secure, + bool buffer) + { + assert (!buffer); // Cookie buffering is not implemented yet. + + string s (mime_url_encode (name)); + s += "="; + s += mime_url_encode (value); + + if (max_age) + { + timestamp tp (system_clock::now () + *max_age); + time_t t (system_clock::to_time_t (tp)); + + // Assume global locale is not changed and still "C". + // + char b[100]; + strftime (b, sizeof (b), "%a, %d-%b-%Y %H:%M:%S GMT", gmtime (&t)); + s += "; Expires="; + s += b; + } + + if (path) + { + s += ";Path="; + s += path; + } + + if (domain) + { + s += ";Domain="; + s += domain; + } + + if (secure) + s += ";Secure"; + + state (request_state::headers); + apr_table_add (rec_->err_headers_out, "Set-Cookie", s.c_str ()); + } + } +} diff --git a/web/server/apache/request.hxx b/web/server/apache/request.hxx new file mode 100644 index 0000000..bc105ec --- /dev/null +++ b/web/server/apache/request.hxx @@ -0,0 +1,233 @@ +// file : web/server/apache/request.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_REQUEST_HXX +#define WEB_SERVER_APACHE_REQUEST_HXX + +#include <httpd.h> // request_rec, HTTP_*, OK, M_POST + +#include <chrono> +#include <memory> // unique_ptr +#include <string> +#include <vector> +#include <istream> +#include <ostream> +#include <streambuf> + +#include <web/server/module.hxx> +#include <web/server/apache/stream.hxx> + +namespace web +{ + namespace apache + { + // The state of the request processing, reflecting an interaction with + // Apache API (like reading/writing content function calls), with no + // buffering taken into account. Any state different from the initial + // suppose that some irrevocable interaction with Apache API have + // happened, so request processing should be either completed, or + // reported as failed. State values are ordered in a sense that the + // higher value reflects the more advanced stage of processing, so the + // request current state value may not decrease. + // + enum class request_state + { + // Denotes the initial stage of the request handling. At this stage + // the request line and headers are already parsed by Apache. + // + initial, + + // Reading the request content. + // + reading, + + // Adding the response headers (cookies in particular). + // + headers, + + // Writing the response content. + // + writing + }; + + // Extends istreambuf with read limit checking, caching, etc. (see the + // implementation for details). + // + class istreambuf_cache; + + // Stream type for reading from Apache's bucket brigades. + // + class istream_buckets; + + class request: public web::request, + public web::response, + public stream_state + { + friend class service; + + // Can not be inline/default due to the member of + // unique_ptr<istreambuf_cache> type. Note that istreambuf_cache type is + // incomplete. + // + request (request_rec* rec) noexcept; + ~request (); + + request_state + state () const noexcept {return state_;} + + // Flush the buffered response content if present. The returned value + // should be passed to Apache API on request handler exit. + // + int + flush (); + + // Prepare for the request re-processing if possible (no unbuffered + // read/write operations have been done). Throw sequence_error + // otherwise. In particular, the preparation can include the response + // content buffer cleanup, the request content buffer rewind. + // + void + rewind (); + + // Get request path. + // + virtual const path_type& + path (); + + // Get request body data stream. + // + virtual std::istream& + content (std::size_t limit = 0, std::size_t buffer = 0); + + // Get request parameters. + // + virtual const name_values& + parameters (std::size_t limit, bool url_only = false); + + // Get upload stream. + // + virtual std::istream& + open_upload (std::size_t index); + + virtual std::istream& + open_upload (const std::string& name); + + // Get request headers. + // + virtual const name_values& + headers (); + + // Get request cookies. + // + virtual const name_values& + cookies (); + + // Get response status code. + // + status_code + status () const noexcept {return rec_->status;} + + // Set response status code. + // + virtual void + status (status_code status); + + // Set response status code, content type and get body stream. + // + virtual std::ostream& + content (status_code status, + const std::string& type, + bool buffer = true); + + // Add response cookie. + // + virtual void + cookie (const char* name, + const char* value, + const std::chrono::seconds* max_age = nullptr, + const char* path = nullptr, + const char* domain = nullptr, + bool secure = false, + bool buffer = true); + + private: + // On the first call cache the application/x-www-form-urlencoded or + // multipart/form-data form data for the subsequent parameters parsing + // and set the multipart flag accordingly. Don't cache if the request is + // in the reading or later state. Return true if the cache contains the + // form data. + // + // Note that the function doesn't change the content buffering (see + // content() function for details) nor rewind the content stream after + // reading. + // + bool + form_data (std::size_t limit); + + // Used to also parse application/x-www-form-urlencoded POST body. + // + void + parse_url_parameters (const char* args); + + void + parse_multipart_parameters (const std::vector<char>& body); + + // Return a list of the upload input streams. Throw sequence_error if + // the parameters() function was not called yet. Throw invalid_argument + // if the request doesn't contain multipart form data. + // + using uploads_type = std::vector<std::unique_ptr<istream_buckets>>; + + uploads_type& + uploads () const; + + // Advance the request processing state. Noop if new state is equal to + // the current one. Throw sequence_error if the new state is less then + // the current one. Can throw invalid_request if HTTP request is + // malformed. + // + void + state (request_state); + + // stream_state members implementation. + // + virtual void + set_read_state () {state (request_state::reading);} + + virtual void + set_write_state () {state (request_state::writing);} + + private: + request_rec* rec_; + request_state state_ = request_state::initial; + + path_type path_; + + std::unique_ptr<name_values> parameters_; + bool url_only_parameters_; // Meaningless if parameters_ is NULL; + + // Uploaded file streams. If not NULL, is parallel to the parameters + // list. + // + std::unique_ptr<uploads_type> uploads_; + + std::unique_ptr<name_values> headers_; + std::unique_ptr<name_values> cookies_; + + // Form data cache. Is empty if the body doesn't contain the form data. + // + std::unique_ptr<std::vector<char>> form_data_; + bool form_multipart_; // Meaningless if form_data_ is NULL or empty; + + std::unique_ptr<istreambuf_cache> in_buf_; + std::unique_ptr<std::istream> in_; + + std::unique_ptr<std::streambuf> out_buf_; + std::unique_ptr<std::ostream> out_; + }; + } +} + +#include <web/server/apache/request.ixx> + +#endif // WEB_SERVER_APACHE_REQUEST_HXX diff --git a/web/server/apache/request.ixx b/web/server/apache/request.ixx new file mode 100644 index 0000000..119fd2e --- /dev/null +++ b/web/server/apache/request.ixx @@ -0,0 +1,45 @@ +// file : web/server/apache/request.ixx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <http_protocol.h> // ap_*() + +#include <sstream> // stringbuf + +namespace web +{ + namespace apache + { + inline int request:: + flush () + { + if (std::stringbuf* b = dynamic_cast<std::stringbuf*> (out_buf_.get ())) + { + // Response content is buffered. + // + std::string s (b->str ()); + + if (!s.empty ()) + { + try + { + state (request_state::writing); + + if (ap_rwrite (s.c_str (), s.length (), rec_) < 0) + rec_->status = HTTP_REQUEST_TIME_OUT; + } + catch (const invalid_request& e) + { + rec_->status = e.status; + } + } + + out_.reset (); + out_buf_.reset (); + } + + return rec_->status == HTTP_OK || state_ >= request_state::writing + ? OK + : rec_->status; + } + } +} diff --git a/web/server/apache/service.cxx b/web/server/apache/service.cxx new file mode 100644 index 0000000..9fb23da --- /dev/null +++ b/web/server/apache/service.cxx @@ -0,0 +1,268 @@ +// file : web/server/apache/service.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/apache/service.hxx> + +#include <apr_pools.h> // apr_palloc() + +#include <httpd.h> // server_rec +#include <http_config.h> // command_rec, cmd_*, ap_get_module_config() + +#include <memory> // unique_ptr +#include <string> +#include <cassert> +#include <utility> // move() +#include <cstring> // strlen(), strcmp() +#include <exception> + +#include <libbutl/utility.mxx> // function_cast() +#include <libbutl/optional.mxx> + +#include <web/server/module.hxx> +#include <web/server/apache/log.hxx> + +using namespace std; +using namespace butl; + +namespace web +{ + namespace apache + { + void service:: + init_directives () + { + assert (cmds == nullptr); + + // Fill apache module directive definitions. Directives share common + // name space in apache configuration file, so to prevent name clash + // have to form directive name as a combination of module and option + // names: <module name>-<option name>. This why for option bar of module + // foo the corresponding directive will appear in apache configuration + // file as foo-bar. + // + const option_descriptions& od (exemplar_.options ()); + unique_ptr<command_rec[]> directives (new command_rec[od.size () + 2]); + command_rec* d (directives.get ()); + + for (const auto& o: od) + { + auto i ( + option_descriptions_.emplace (name_ + "-" + o.first, o.second)); + assert (i.second); + + *d++ = + { + i.first->first.c_str (), + function_cast<cmd_func> (parse_option), + this, + + // Allow directives in both server and directory configuration + // scopes. + // + RSRC_CONF | ACCESS_CONF, + + // Move away from TAKE1 to be able to handle empty string and + // no-value. + // + RAW_ARGS, + + nullptr + }; + } + + // Track if the handler is allowed to handle a request in the specific + // configuration scope. The handler exemplar will be created (and + // initialized) only for configuration contexts that have + // 'SetHandler <mod_name>' in effect for the corresponding scope. + // + *d++ = + { + "SetHandler", + function_cast<cmd_func> (parse_option), + this, + RSRC_CONF | ACCESS_CONF, + RAW_ARGS, + nullptr + }; + + *d = {nullptr, nullptr, nullptr, 0, RAW_ARGS, nullptr}; + cmds = directives.release (); + } + + void* service:: + create_server_context (apr_pool_t* pool, server_rec*) noexcept + { + // Create the object using the configuration memory pool provided by the + // Apache API. The lifetime of the object is equal to the lifetime of + // the pool. + // + void* p (apr_palloc (pool, sizeof (context))); + assert (p != nullptr); + return new (p) context (); + } + + void* service:: + create_dir_context (apr_pool_t* pool, char* dir) noexcept + { + // Create the object using the configuration memory pool provided by the + // Apache API. The lifetime of the object is equal to the lifetime of + // the pool. + // + void* p (apr_palloc (pool, sizeof (context))); + assert (p != nullptr); + + // For the user-defined directory configuration context dir is the path + // of the corresponding directive. For the special server directory + // invented by Apache for server scope directives, dir is NULL. + // + return new (p) context (dir == nullptr); + } + + const char* service:: + parse_option (cmd_parms* parms, void* conf, const char* args) noexcept + { + service& srv (*reinterpret_cast<service*> (parms->cmd->cmd_data)); + + if (srv.options_parsed_) + // Apache have started the second pass of its messy initialization + // cycle (more details at http://wiki.apache.org/httpd/ModuleLife). + // This time we are parsing for real. Cleanup the existing config, and + // start building the new one. + // + srv.clear_config (); + + // 'args' is an optionally double-quoted string. It uses double quotes + // to distinguish empty string from no-value case. + // + assert (args != nullptr); + + optional<string> value; + if (auto l = strlen (args)) + value = l >= 2 && args[0] == '"' && args[l - 1] == '"' + ? string (args + 1, l - 2) + : args; + + // Determine the directory and server configuration contexts for the + // option. + // + context* dir_context (context_cast (conf)); + assert (dir_context != nullptr); + + server_rec* server (parms->server); + assert (server != nullptr); + assert (server->module_config != nullptr); + + context* srv_context ( + context_cast (ap_get_module_config (server->module_config, &srv))); + + assert (srv_context != nullptr); + + // Associate the directory configuration context with the enclosing + // server configuration context. + // + context*& s (dir_context->server); + if (s == nullptr) + s = srv_context; + else + assert (s == srv_context); + + // If the option appears in the special directory configuration context, + // add it to the enclosing server context instead. This way it will be + // possible to complement all server-enclosed contexts (including this + // special one) with the server scope options. + // + context* c (dir_context->special ? srv_context : dir_context); + + if (dir_context->special) + // + // Make sure the special directory context is also in the option lists + // map. Later the context will be populated with an enclosing server + // context options. + // + srv.options_.emplace (dir_context, name_values ()); + + const char* name (parms->cmd->name); + if (strcmp (name, "SetHandler") == 0) + { + // Keep track of a request handling allowability. + // + srv.options_.emplace (c, name_values ()).first->first->handling = + value && *value == srv.name_ + ? request_handling::allowed + : request_handling::disallowed; + + return 0; + } + + return srv.add_option (c, name, move (value)); + } + + const char* service:: + add_option (context* ctx, const char* name, optional<string> value) + { + auto i (option_descriptions_.find (name)); + assert (i != option_descriptions_.end ()); + + // Check that option value presense is expected. + // + if (i->second != static_cast<bool> (value)) + return value ? "unexpected value" : "value expected"; + + options_[ctx].emplace_back (name + name_.length () + 1, move (value)); + return 0; + } + + void service:: + complement (context* enclosed, context* enclosing) + { + auto i (options_.find (enclosing)); + + // The enclosing context may have no options. It can be the context of a + // server that has no configuration directives in it's immediate scope, + // but has ones in it's enclosed scope (directory or virtual server). + // + if (i != options_.end ()) + { + const name_values& src (i->second); + name_values& dest (options_[enclosed]); + dest.insert (dest.begin (), src.begin (), src.end ()); + } + + if (enclosed->handling == request_handling::inherit) + enclosed->handling = enclosing->handling; + } + + void service:: + finalize_config (server_rec* s) + { + if (!version_logged_) + { + log l (s, this); + exemplar_.version (l); + version_logged_ = true; + } + + // Complement directory configuration contexts with options of the + // enclosing server configuration context. By this time virtual server + // contexts are already complemented with the main server configuration + // context options as a result of the merge_server_context() calls. + // + for (const auto& o: options_) + { + // Is a directory configuration context. + // + if (o.first->server != nullptr) + complement (o.first, o.first->server); + } + + options_parsed_ = true; + } + + void service:: + clear_config () + { + options_.clear (); + options_parsed_ = false; + } + } +} diff --git a/web/server/apache/service.hxx b/web/server/apache/service.hxx new file mode 100644 index 0000000..ad54d2c --- /dev/null +++ b/web/server/apache/service.hxx @@ -0,0 +1,333 @@ +// file : web/server/apache/service.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_SERVICE_HXX +#define WEB_SERVER_APACHE_SERVICE_HXX + +#include <apr_pools.h> // apr_pool_t +#include <apr_hooks.h> // APR_HOOK_* + +#include <httpd.h> // request_rec, server_rec, HTTP_*, DECLINED +#include <http_config.h> // module, cmd_parms, ap_hook_*() + +#include <map> +#include <memory> // unique_ptr +#include <string> +#include <cassert> + +#include <web/server/module.hxx> +#include <web/server/apache/log.hxx> +#include <web/server/apache/request.hxx> + +namespace web +{ + namespace apache + { + // Apache has 3 configuration scopes: main server, virtual server, and + // directory (location). It provides configuration scope-aware modules + // with the ability to build a hierarchy of configuration contexts. Later, + // when processing a request, Apache passes the appropriate directory + // configuration context to the request handler. + // + // This Apache service implementation first makes a copy of the provided + // (in the constructor below) handler exemplar for each directory context. + // It then initializes each of these "context exemplars" with the (merged) + // set of configuration options. Finally, when handling a request, it + // copies the corresponding "context exemplar" to create the "handling + // instance". Note that the "context exemplars" are created as a copy of + // the provided exemplar, which is never initialized. As a result, it is + // possible to detect if the handler's copy constructor is used to create + // a "context exemplar" or a "handling instance". + // + class service: ::module + { + public: + // Note that the module exemplar is stored by-reference. + // + template <typename H> + service (const std::string& name, H& exemplar) + : ::module + { + STANDARD20_MODULE_STUFF, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + ®ister_hooks<H> + +#ifdef AP_MODULE_HAS_FLAGS + , AP_MODULE_FLAG_NONE +#endif + }, + name_ (name), + exemplar_ (exemplar) + { + init_directives (); + + // Set configuration context management hooks. + // + // The overall process of building the configuration hierarchy for a + // handler is as follows: + // + // 1. Apache creates directory and server configuration contexts for + // scopes containing handler-defined directives by calling the + // create_{server,dir}_context() callback functions. For directives + // at the server scope the special directory context is created as + // well. + // + // 2. Apache calls parse_option() function for each handler-defined + // directive. The function parses the directives and places the + // resulting options into the corresponding configuration context. + // It also establishes the directory-server contexts relations. + // + // 3. Apache calls merge_server_context() function for each virtual + // server. The function complements virtual server context options + // with the ones from the main server. + // + // 4. Apache calls config_finalizer() which complements the directory + // contexts options with the ones from the enclosing servers. + // + // 5. Apache calls worker_initializer() which creates handler exemplar + // for each directory configuration context that have + // 'SetHandler <mod_name>' directive in effect for it. + // + // References: + // http://www.apachetutor.org/dev/config + // http://httpd.apache.org/docs/2.4/developer/modguide.html + // http://wiki.apache.org/httpd/ModuleLife + // + create_server_config = &create_server_context; + create_dir_config = &create_dir_context; + merge_server_config = &merge_server_context<H>; + + // instance<H> () is invented to delegate processing from apache + // request handler C function to the service non static member + // function. This appoach resticts number of service objects per + // specific handler implementation class with just one instance. + // + service*& srv (instance<H> ()); + assert (srv == nullptr); + srv = this; + } + + ~service () + { + delete [] cmds; + } + + private: + template <typename H> + static service*& + instance () noexcept + { + static service* instance; + return instance; + } + + template <typename H> + static void + register_hooks (apr_pool_t*) noexcept + { + // The config_finalizer() function is called at the end of Apache + // server configuration parsing. + // + ap_hook_post_config (&config_finalizer<H>, NULL, NULL, APR_HOOK_LAST); + + // The worker_initializer() function is called right after Apache + // worker process is started. Called for every new process spawned. + // + ap_hook_child_init ( + &worker_initializer<H>, NULL, NULL, APR_HOOK_LAST); + + // The request_handler () function is called for each client request. + // + ap_hook_handler (&request_handler<H>, NULL, NULL, APR_HOOK_LAST); + } + + template <typename H> + static int + config_finalizer (apr_pool_t*, apr_pool_t*, apr_pool_t*, server_rec* s) + noexcept + { + instance<H> ()->finalize_config (s); + return OK; + } + + template <typename H> + static void + worker_initializer (apr_pool_t*, server_rec* s) noexcept + { + auto srv (instance<H> ()); + log l (s, srv); + srv->template init_worker<H> (l); + } + + template <typename H> + static int + request_handler (request_rec* r) noexcept; + + private: + + // Reflects the allowability of the request handling in the specific + // configuration scope. + // + enum class request_handling + { + // Configuration scope has 'SetHandler <mod_name>' directive + // specified. The handler is allowed to handle a request in the scope. + // + allowed, + + // Configuration scope has 'SetHandler <other_mod_name>|None' + // directive specified. The handler is disallowed to handle a request + // in the scope. + // + disallowed, + + // + // Note that if there are several SetHandler directives specified + // in the specific scope, then the latest one takes the precedence. + + // Configuration scope has no SetHandler directive specified. The + // request handling allowability is established by the enclosing + // scopes. + // + inherit + }; + + // Our representation of the Apache configuration context. + // + // The lifetime of this object is under the control of the Apache API, + // which treats it as a raw sequence of bytes. In order not to tinker + // with the C-style structures and APR memory pools, we will keep it a + // (C++11) POD type with just the members required to maintain the + // context hierarchy. + // + // We will then use the pointers to these context objects as keys in + // maps to (1) the corresponding application-level option lists during + // the configuration cycle and to (2) the corresponding handler exemplar + // during the HTTP request handling phase. We will also use the same + // type for both directory and server configuration contexts. + // + struct context + { + // Outer (server) configuration context for the directory + // configuration context, NULL otherwise. + // + context* server = nullptr; + + // If module directives appear directly in the server configuration + // scope, Apache creates a special directory context for them. This + // context appears at the same hierarchy level as the user-defined + // directory contexts of the same server scope. + // + bool special; + + // Request handling allowability for the corresponding configuration + // scope. + // + request_handling handling = request_handling::inherit; + + // Create the server configuration context. + // + context (): special (false) {} + + // Create the directory configuration context. Due to the Apache API + // implementation details it is not possible to detect the enclosing + // server configuration context at the time of directory context + // creation. As a result, the server member is set by the module's + // parse_option() function. + // + context (bool s): special (s) {} + + // Ensure the object is only destroyed by Apache. + // + ~context () = delete; + }; + + static context* + context_cast (void* config) noexcept + {return static_cast<context*> (config);} + + private: + void + init_directives (); + + // Create the server configuration context. Called by the Apache API + // whenever a new object of that type is required. + // + static void* + create_server_context (apr_pool_t*, server_rec*) noexcept; + + // Create the server directory configuration context. Called by the + // Apache API whenever a new object of that type is required. + // + static void* + create_dir_context (apr_pool_t*, char* dir) noexcept; + + template <typename H> + static void* + merge_server_context (apr_pool_t*, void* enclosing, void* enclosed) + noexcept + { + instance<H> ()->complement ( + context_cast (enclosed), context_cast (enclosing)); + + return enclosed; + } + + static const char* + parse_option (cmd_parms* parms, void* conf, const char* args) noexcept; + + const char* + add_option (context*, const char* name, optional<std::string> value); + + void + finalize_config (server_rec*); + + void + clear_config (); + + // Complement the enclosed context with options of the enclosing one. + // If the 'handling' member of the enclosed context is set to + // request_handling::inherit value, assign it a value from the enclosing + // context. + // + void + complement (context* enclosed, context* enclosing); + + template <typename H> + void + init_worker (log&); + + template <typename H> + int + handle (request&, const context*, log&) const; + + private: + std::string name_; + handler& exemplar_; + option_descriptions option_descriptions_; + + // The context objects pointed to by the key can change during the + // configuration phase. + // + using options = std::map<context*, name_values>; + options options_; + + // The context objects pointed to by the key can not change during the + // request handling phase. + // + using exemplars = std::map<const context*, std::unique_ptr<handler>>; + exemplars exemplars_; + + bool options_parsed_ = false; + bool version_logged_ = false; + }; + } +} + +#include <web/server/apache/service.txx> + +#endif // WEB_SERVER_APACHE_SERVICE_HXX diff --git a/web/server/apache/service.txx b/web/server/apache/service.txx new file mode 100644 index 0000000..1b16d0b --- /dev/null +++ b/web/server/apache/service.txx @@ -0,0 +1,213 @@ +// file : web/server/apache/service.txx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <httpd.h> // APEXIT_CHILDSICK +#include <http_log.h> // APLOG_* + +#include <cstdlib> // exit() +#include <utility> // move() +#include <exception> + +#include <libbutl/utility.mxx> // operator<<(ostream, exception) + +namespace web +{ + namespace apache + { + template <typename H> + void service:: + init_worker (log& l) + { + using namespace std; + + const string func_name ( + "web::apache::service<" + name_ + ">::init_worker"); + + try + { + const H* exemplar (dynamic_cast<const H*> (&exemplar_)); + assert (exemplar != nullptr); + + // For each directory configuration context, for which the handler is + // allowed to handle a request, create the handler exemplar as a deep + // copy of the exemplar_ member, and initialize it with the + // context-specific option list. + // + for (const auto& o: options_) + { + const context* c (o.first); + + if (c->server != nullptr && // Is a directory configuration context. + c->handling == request_handling::allowed) + { + auto r ( + exemplars_.emplace ( + c, + unique_ptr<handler> (new H (*exemplar)))); + + r.first->second->init (o.second, l); + } + } + + // Options are not needed anymore. Free up the space. + // + options_.clear (); + } + catch (const exception& e) + { + l.write (nullptr, 0, func_name.c_str (), APLOG_EMERG, e.what ()); + + // Terminate the worker apache process. APEXIT_CHILDSICK indicates to + // the root process that the worker have exited due to a resource + // shortage. In this case the root process limits the rate of forking + // until situation is resolved. + // + // If the root process fails to create any worker process on startup, + // the behaviour depends on the Multi-Processing Module enabled. For + // mpm_worker_module and mpm_event_module the root process terminates. + // For mpm_prefork_module it keeps trying to create the worker process + // at one-second intervals. + // + // If the root process loses all it's workers while running (for + // example due to the MaxRequestsPerChild directive), and fails to + // create any new ones, it keeps trying to create the worker process + // at one-second intervals. + // + exit (APEXIT_CHILDSICK); + } + catch (...) + { + l.write (nullptr, + 0, + func_name.c_str (), + APLOG_EMERG, + "unknown error"); + + // Terminate the worker apache process. + // + exit (APEXIT_CHILDSICK); + } + } + + template <typename H> + int service:: + request_handler (request_rec* r) noexcept + { + auto srv (instance<H> ()); + if (!r->handler || srv->name_ != r->handler) return DECLINED; + + assert (r->per_dir_config != nullptr); + + // Obtain the request-associated configuration context. + // + const context* cx ( + context_cast (ap_get_module_config (r->per_dir_config, srv))); + + assert (cx != nullptr); + + request rq (r); + log lg (r->server, srv); + return srv->template handle<H> (rq, cx, lg); + } + + template <typename H> + int service:: + handle (request& rq, const context* cx, log& lg) const + { + using namespace std; + + static const string func_name ( + "web::apache::service<" + name_ + ">::handle"); + + try + { + auto i (exemplars_.find (cx)); + assert (i != exemplars_.end ()); + + const H* e (dynamic_cast<const H*> (i->second.get ())); + assert (e != nullptr); + + for (H h (*e);;) + { + try + { + if (static_cast<handler&> (h).handle (rq, rq, lg)) + return rq.flush (); + + if (rq.state () == request_state::initial) + return DECLINED; + + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, + "handling declined being partially executed"); + break; + } + catch (const handler::retry&) + { + // Retry to handle the request. + // + rq.rewind (); + } + } + } + catch (const invalid_request& e) + { + if (!e.content.empty () && rq.state () < request_state::writing) + { + try + { + rq.content (e.status, e.type) << e.content << endl; + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + + return e.status; + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + + if (*e.what () && rq.state () < request_state::writing) + { + try + { + rq.content ( + HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") + << e << endl; + + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + } + catch (...) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, "unknown error"); + + if (rq.state () < request_state::writing) + { + try + { + rq.content ( + HTTP_INTERNAL_SERVER_ERROR, "text/plain;charset=utf-8") + << "unknown error" << endl; + + return rq.flush (); + } + catch (const exception& e) + { + lg.write (nullptr, 0, func_name.c_str (), APLOG_ERR, e.what ()); + } + } + } + + return HTTP_INTERNAL_SERVER_ERROR; + } + } +} diff --git a/web/server/apache/stream.hxx b/web/server/apache/stream.hxx new file mode 100644 index 0000000..77145af --- /dev/null +++ b/web/server/apache/stream.hxx @@ -0,0 +1,148 @@ +// file : web/server/apache/stream.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_APACHE_STREAM_HXX +#define WEB_SERVER_APACHE_STREAM_HXX + +#include <httpd.h> // request_rec, HTTP_* +#include <http_protocol.h> // ap_*() + +#include <ios> // streamsize +#include <vector> +#include <cstring> // memmove(), size_t +#include <streambuf> +#include <algorithm> // min(), max() + +#include <web/server/module.hxx> // invalid_request + +namespace web +{ + namespace apache + { + // Object of a class implementing this interface is intended for keeping + // the state of communication with the client. + // + struct stream_state + { + // Called by istreambuf functions when content is about to be read from + // the client. Can throw invalid_request or sequence_error. + // + virtual void + set_read_state () = 0; + + // Called by ostreambuf functions when some content is about to be + // written to the client. Can throw invalid_request or sequence_error. + // + virtual void + set_write_state () = 0; + }; + + // Base class for ostreambuf and istreambuf. References request and + // communication state structures. + // + class rbuf: public std::streambuf + { + protected: + rbuf (request_rec* r, stream_state& s): rec_ (r), state_ (s) {} + + protected: + request_rec* rec_; + stream_state& state_; + }; + + class ostreambuf: public rbuf + { + public: + ostreambuf (request_rec* r, stream_state& s): rbuf (r, s) {} + + private: + virtual int_type + overflow (int_type c) + { + if (c != traits_type::eof ()) + { + state_.set_write_state (); + + char chr (c); + + // Throwing allows to distinguish comm failure from other IO error + // conditions. + // + if (ap_rwrite (&chr, sizeof (chr), rec_) == -1) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + } + + return c; + } + + virtual std::streamsize + xsputn (const char* s, std::streamsize num) + { + state_.set_write_state (); + + if (ap_rwrite (s, num, rec_) < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + return num; + } + + virtual int + sync () + { + if (ap_rflush (rec_) < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + return 0; + } + }; + + class istreambuf: public rbuf + { + public: + istreambuf (request_rec* r, + stream_state& s, + size_t bufsize = 1024, + size_t putback = 1) + : rbuf (r, s), + bufsize_ (std::max (bufsize, (size_t)1)), + putback_ (std::min (putback, bufsize_ - 1)), + buf_ (bufsize_) + { + char* p (buf_.data () + putback_); + setg (p, p, p); + } + + protected: + virtual int_type + underflow () + { + if (gptr () < egptr ()) + return traits_type::to_int_type (*gptr ()); + + state_.set_read_state (); + + size_t pb (std::min ((size_t)(gptr () - eback ()), putback_)); + std::memmove (buf_.data () + putback_ - pb, gptr () - pb, pb); + + char* p (buf_.data () + putback_); + int rb (ap_get_client_block (rec_, p, bufsize_ - putback_)); + + if (rb == 0) + return traits_type::eof (); + + if (rb < 0) + throw invalid_request (HTTP_REQUEST_TIME_OUT); + + setg (p - pb, p, p + rb); + return traits_type::to_int_type (*gptr ()); + } + + protected: + size_t bufsize_; + size_t putback_; + std::vector<char> buf_; + }; + } +} + +#endif // WEB_SERVER_APACHE_STREAM_HXX diff --git a/web/server/buildfile b/web/server/buildfile new file mode 100644 index 0000000..26de70f --- /dev/null +++ b/web/server/buildfile @@ -0,0 +1,15 @@ +# file : web/server/buildfile +# license : MIT; see accompanying LICENSE file + +# This is currently part of the brep apache module but lives in a separate +# directory. Thus the strange choices: libus{}, no header installation, etc. + +# While we don't need to link to APR, we need to find its header location. +# +import libs = libapr1%lib{apr-1} +import libs += libapreq2%lib{apreq2} +import libs += libbutl%lib{butl} + +libus{web-server}: {hxx ixx txx cxx}{**} $libs + +{hxx ixx txx}{*}: install = false diff --git a/web/server/mime-url-encoding.cxx b/web/server/mime-url-encoding.cxx new file mode 100644 index 0000000..fd1e4e8 --- /dev/null +++ b/web/server/mime-url-encoding.cxx @@ -0,0 +1,66 @@ +// file : web/server/mime-url-encoding.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <web/server/mime-url-encoding.hxx> + +#include <string> +#include <iterator> // back_inserter + +#include <libbutl/url.mxx> + +using namespace std; +using namespace butl; + +namespace web +{ + inline static bool + encode_query (char& c) + { + if (c == ' ') + { + c = '+'; + return false; + } + + return !url::unreserved (c); + } + + string + mime_url_encode (const char* v, bool query) + { + return query ? url::encode (v, encode_query) : url::encode (v); + } + + string + mime_url_encode (const string& v, bool query) + { + return query ? url::encode (v, encode_query) : url::encode (v); + } + + string + mime_url_decode (const char* b, const char* e, bool trim, bool query) + { + if (trim) + { + for (; b != e && *b == ' '; ++b) ; + + if (b == e) + return string (); + + while (*--e == ' '); + ++e; + } + + string r; + if (!query) + url::decode (b, e, back_inserter (r)); + else + url::decode (b, e, back_inserter (r), + [] (char& c) + { + if (c == '+') + c = ' '; + }); + return r; + } +} diff --git a/web/server/mime-url-encoding.hxx b/web/server/mime-url-encoding.hxx new file mode 100644 index 0000000..34172a4 --- /dev/null +++ b/web/server/mime-url-encoding.hxx @@ -0,0 +1,32 @@ +// file : web/server/mime-url-encoding.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_MIME_URL_ENCODING_HXX +#define WEB_SERVER_MIME_URL_ENCODING_HXX + +#include <string> + +namespace web +{ + // URL-encode characters other than unreserved (see RFC3986). If the query + // flag is true, then the encoding is applied to the URL query part, and so + // convert space characters to plus characters rather than percent-encode + // them. + // + std::string + mime_url_encode (const char*, bool query = true); + + std::string + mime_url_encode (const std::string&, bool query = true); + + // If the query flag is true, then convert plus characters to space + // characters (see above). Throw std::invalid_argument if an invalid encoding + // sequence is encountered. + // + std::string + mime_url_decode (const char* b, const char* e, + bool trim = false, + bool query = true); +} + +#endif // WEB_SERVER_MIME_URL_ENCODING_HXX diff --git a/web/server/module.hxx b/web/server/module.hxx new file mode 100644 index 0000000..beda73c --- /dev/null +++ b/web/server/module.hxx @@ -0,0 +1,299 @@ +// file : web/server/module.hxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#ifndef WEB_SERVER_MODULE_HXX +#define WEB_SERVER_MODULE_HXX + +#include <map> +#include <string> +#include <vector> +#include <iosfwd> +#include <chrono> +#include <cstdint> // uint16_t +#include <cstddef> // size_t +#include <utility> // move() +#include <stdexcept> // runtime_error + +#include <libbutl/path.mxx> +#include <libbutl/optional.mxx> + +namespace web +{ + using butl::optional; + + // HTTP status code. + // + // @@ Define some commonly used constants? + // + using status_code = std::uint16_t; + + // This exception is used to signal that the request is invalid + // (4XX codes) rather than that it could not be processed (5XX). + // By default 400 is returned, which means the request is malformed. + // + // If caught by the web server implementation, it will try to return + // the specified status and content to the client, if possible. + // It is, however, may not be possible if some unbuffered content has + // already been written. The behavior in this case is implementation- + // specific and may result in no indication of an error being sent to + // the client. + // + struct invalid_request + { + status_code status; + std::string content; + std::string type; + + //@@ Maybe optional "try again" link? + // + invalid_request (status_code s = 400, + std::string c = "", + std::string t = "text/plain;charset=utf-8") + : status (s), content (std::move (c)), type (std::move (t)) {} + }; + + // Exception indicating HTTP request/response sequencing error. + // For example, trying to change the status code after some + // content has already been written. + // + struct sequence_error: std::runtime_error + { + sequence_error (std::string d): std::runtime_error (std::move (d)) {} + }; + + // Map of handler configuration option names to the boolean flag indicating + // whether the value is expected for the option. + // + using option_descriptions = std::map<std::string, bool>; + + struct name_value + { + // These should eventually become string_view's. + // + std::string name; + optional<std::string> value; + + name_value () {} + name_value (std::string n, optional<std::string> v) + : name (std::move (n)), value (std::move (v)) {} + }; + + using name_values = std::vector<name_value>; + using butl::path; + + class request + { + public: + using path_type = web::path; + + virtual + ~request () = default; + + // Corresponds to abs_path portion of HTTP URL as described in "3.2.2 HTTP + // URL" of http://tools.ietf.org/html/rfc2616. Returns '/' if no abs_path + // is present in URL. + // + virtual const path_type& + path () = 0; + + //@@ Why not pass parameters directly? Lazy parsing? + //@@ Why not have something like operator[] for lookup? Probably + // in name_values. + //@@ Maybe parameter_list() and parameter_map()? + // + // Parse parameters from the URL query part and from the HTTP POST request + // body for the application/x-www-form-urlencoded or multipart/form-data + // content type. Optionally limit the amount of data read from the body + // (see the content() function for the semantics). Throw invalid_request + // if parameters decoding fails. + // + virtual const name_values& + parameters (std::size_t limit, bool url_only = false) = 0; + + // Open the input stream for the upload corresponding to the specified + // parameter index. Must be called after the parameters() function is + // called, throw sequence_error if that's not the case. Throw + // invalid_argument if the index doesn't have an upload (for example, + // because the parameter is not <input type="file"/> form field). + // + // Note also that reopening the same upload (within the same retry) + // returns the same stream reference. + // + virtual std::istream& + open_upload (std::size_t index) = 0; + + // As above but specify the parameter by name. Throw invalid_argument if + // there are multiple uploads for this parameter name. + // + virtual std::istream& + open_upload (const std::string& name) = 0; + + // Request headers. + // + // The implementation may add custom pseudo-headers reflecting additional + // request options. Such headers should start with ':'. If possible, the + // implementation should add the following well-known pseudo-headers: + // + // :Client-IP - IP address of the connecting client. + // + virtual const name_values& + headers () = 0; + + // Throw invalid_request if cookies are malformed. + // + virtual const name_values& + cookies () = 0; + + // Get the stream to read the request content from. If the limit argument + // is zero, then the content limit is left unchanged (unlimited initially). + // Otherwise the requested limit is set, and the invalid_request exception + // with the code 413 (payload too large) will be thrown when the specified + // limit is reached while reading from the stream. If the buffer argument + // is zero, then the buffer size is left unchanged (zero initially). If it + // is impossible to increase the buffer size (because, for example, some + // content is already read unbuffered), then the sequence_error is thrown. + // + // Note that unread input content is discarded when any unbuffered content + // is written, and any attempt to read it will result in the + // sequence_error exception being thrown. + // + virtual std::istream& + content (std::size_t limit, std::size_t buffer = 0) = 0; + }; + + class response + { + public: + virtual + ~response () = default; + + // Set status code, content type, and get the stream to write + // the content to. If the buffer argument is true (default), + // then buffer the entire content before sending it as a + // response. This allows us to change the status code in + // case of an error. + // + // Specifically, if there is already content in the buffer + // and the status code is changed, then the old content is + // discarded. If the content was not buffered and the status + // is changed, then the sequence_error exception is thrown. + // If this exception leaves handler::handle(), then the + // implementation shall terminate the response in a suitable + // but unspecified manner. In particular, there is no guarantee + // that the user will be notified of an error or observe the + // new status. + // + virtual std::ostream& + content (status_code code = 200, + const std::string& type = "application/xhtml+xml;charset=utf-8", + bool buffer = true) = 0; + + // Set status code without writing any content. On status change, + // discard buffered content or throw sequence_error if content was + // not buffered. + // + virtual void + status (status_code) = 0; + + // Throw sequence_error if some unbuffered content has already + // been written. + // + virtual void + cookie (const char* name, + const char* value, + const std::chrono::seconds* max_age = nullptr, + const char* path = nullptr, + const char* domain = nullptr, + bool secure = false, + bool buffer = true) = 0; + }; + + // A web server logging backend. The handler can use it to log + // diagnostics that is meant for the web server operator rather + // than the user. + // + // The handler can cast this basic interface to the web server's + // specific implementation that may provide a richer interface. + // + class log + { + public: + virtual + ~log () = default; + + virtual void + write (const char* msg) = 0; + }; + + // The web server creates a new handler instance for each request + // by copy-initializing it with the handler exemplar. This way we + // achieve two things: we can freely use handler data members + // without worrying about multi-threading issues and we + // automatically get started with the initial state for each + // request. If you really need to share some rw-data between + // all the handlers, use static data members with appropriate + // locking. See the <service> header in one of the web server + // directories (e.g., apache/) if you need to see the code that + // does this. + // + class handler + { + public: + virtual + ~handler () = default; + + // Description of configuration options supported by this handler. Note: + // should be callable during static initialization. + // + virtual option_descriptions + options () = 0; + + // During startup the web server calls this function on the handler + // exemplar to log the handler version information. It is up to the web + // server whether to call this function once per handler implementation + // type. Therefore, it is expected that this function will log the same + // information for all the handler exemplars. + // + virtual void + version (log&) = 0; + + // During startup the web server calls this function on the handler + // exemplar passing a list of configuration options. The place these + // configuration options come from is implementation-specific (normally + // a configuration file). The web server guarantees that only options + // listed in the map returned by the options() function above can be + // present. Any exception thrown by this function terminates the web + // server. + // + virtual void + init (const name_values&, log&) = 0; + + // Return false if decline to handle the request. If handling have been + // declined after any unbuffered content has been written, then the + // implementation shall terminate the response in a suitable but + // unspecified manner. + // + // Throw retry if need to retry handling the request. The retry will + // happen on the same instance of the handler and the implementation is + // expected to "rewind" the request and response objects to their initial + // state. This is only guaranteed to be possible if the relevant functions + // in the request and response objects were called in buffered mode (the + // buffer argument was true). + // + // Any exception other than retry and invalid_request described above that + // leaves this function is treated by the web server implementation as an + // internal server error (500). Similar to invalid_request, it will try to + // return the status and description (obtained by calling what() on + // std::exception) to the client, if possible. The description is assume + // to be encoded in UTF-8. The implementation may provide a configuration + // option to omit the description from the response, for security/privacy + // reasons. + // + struct retry {}; + + virtual bool + handle (request&, response&, log&) = 0; + }; +} + +#endif // WEB_SERVER_MODULE_HXX |