Add basic support for CI request handling

author: Karen Arutyunov <karen@codesynthesis.com> 2018-08-23 22:29:35 +0300
committer: Karen Arutyunov <karen@codesynthesis.com> 2018-08-28 21:46:41 +0300
commit: 8a094bb0481a9c53646cc15db2e8acecafc3d10c (patch)
tree: 4fd7012b6a26eb852d42fba8b52bfcf8f1cf2fdd /mod
parent: 7e0e141273032c7afc1a9129512aa42c672fcf5d (diff)
8 files changed, 1116 insertions, 310 deletions
diff --git a/mod/external-handler.cxx b/mod/external-handler.cxx
new file mode 100644
index 0000000..d3ea6e3
--- /dev/null
+++ b/mod/external-handler.cxx
@@ -0,0 +1,346 @@
+// file      : mod/external-handler.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <mod/external-handler.hxx>
+
+#include <sys/time.h>   // timeval
+#include <sys/select.h>
+
+#include <ratio>        // ratio_greater_equal
+#include <chrono>
+#include <sstream>
+#include <cstdlib>      // strtoul()
+#include <type_traits>  // static_assert
+#include <system_error> // error_code, generic_category()
+
+#include <libbutl/process.mxx>
+#include <libbutl/fdstream.mxx>
+#include <libbutl/process-io.mxx> // operator<<(ostream, process_args)
+
+using namespace std;
+using namespace butl;
+
+namespace brep
+{
+  namespace external_handler
+  {
+    optional<result_manifest>
+    run (const path& handler,
+         const strings& args,
+         const dir_path& data_dir,
+         size_t tm,
+         const basic_mark& error,
+         const basic_mark& warn,
+         const basic_mark* trace)
+    {
+      using parser  = manifest_parser;
+      using parsing = manifest_parsing;
+
+      using namespace chrono;
+
+      using time_point = system_clock::time_point;
+      using duration   = system_clock::duration;
+
+      // Make sure that the system clock has at least milliseconds resolution.
+      //
+      static_assert(
+        ratio_greater_equal<milliseconds::period, duration::period>::value,
+        "The system clock resolution is too low");
+
+      // For the sake of the documentation we will call the handler's normal
+      // exit with 0 code "successful termination".
+      //
+      // To make sure the handler process execution doesn't exceed the
+      // specified timeout we set the non-blocking mode for the process
+      // stdout-reading stream, try to read from it with the 10 milliseconds
+      // timeout and check the process execution time between the reads. We
+      // then kill the process if the execution time is exceeded.
+      //
+      optional<milliseconds> timeout;
+
+      if (tm != 0)
+        timeout = milliseconds (tm * 1000);
+
+      // Note that due to the non-blocking mode we cannot just pass the stream
+      // to the manifest parser constructor. So we buffer the data in the
+      // string stream and then parse that.
+      //
+      stringstream ss;
+
+      assert (!data_dir.empty ());
+
+      // Normally the data directory leaf component identifies the entity
+      // being handled. We will use it as a reference for logging.
+      //
+      string ref (data_dir.leaf ().string ());
+
+      for (;;) // Breakout loop.
+        try
+        {
+          fdpipe pipe (fdopen_pipe ()); // Can throw io_error.
+
+          // Redirect the diagnostics to the web server error log.
+          //
+          process pr (
+            process_start_callback ([&trace] (const char* args[], size_t n)
+                                    {
+                                      if (trace != nullptr)
+                                        *trace << process_args {args, n};
+                                    },
+                                    0     /* stdin  */,
+                                    pipe  /* stdout */,
+                                    2     /* stderr */,
+                                    handler,
+                                    args,
+                                    data_dir));
+          pipe.out.close ();
+
+          auto kill = [&pr, &warn, &handler, &ref] ()
+            {
+              // We may still end up well (see below), thus this is a warning.
+              //
+              warn << "ref " << ref << ": process " << handler
+              << " execution timeout expired";
+
+              pr.kill ();
+            };
+
+          try
+          {
+            ifdstream is (move (pipe.in), fdstream_mode::non_blocking);
+
+            const size_t nbuf (8192);
+            char buf[nbuf];
+
+            while (is.is_open ())
+            {
+              time_point start;
+              milliseconds wd (10); // Max time to wait for the data portion.
+
+              if (timeout)
+              {
+                start = system_clock::now ();
+
+                if (*timeout < wd)
+                  wd = *timeout;
+              }
+
+              timeval tm {wd.count () / 1000        /* seconds */,
+                          wd.count () % 1000 * 1000 /* microseconds */};
+
+              fd_set rd;
+              FD_ZERO (&rd);
+              FD_SET  (is.fd (), &rd);
+
+              int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm));
+
+              if (r == -1)
+              {
+                // Don't fail if the select() call was interrupted by the
+                // signal.
+                //
+                if (errno != EINTR)
+                  throw_system_ios_failure (errno, "select failed");
+              }
+              else if (r != 0) // Is data available?
+              {
+                assert (FD_ISSET (is.fd (), &rd));
+
+                // The only leagal way to read from non-blocking ifdstream.
+                //
+                streamsize n (is.readsome (buf, nbuf));
+
+                // Close the stream (and bail out) if the end of the data is
+                // reached. Otherwise cache the read data.
+                //
+                if (is.eof ())
+                  is.close ();
+                else
+                {
+                  // The data must be available.
+                  //
+                  // Note that we could keep reading until the readsome() call
+                  // returns 0. However, this way we could potentially exceed
+                  // the timeout significantly for some broken handler that
+                  // floods us with data. So instead, we will be checking the
+                  // process execution time after every data chunk read.
+                  //
+                  assert (n != 0);
+
+                  ss.write (buf, n);
+                }
+              }
+              else // Timeout occured.
+              {
+                // Normally, we don't expect timeout to occur on the pipe read
+                // operation if the process has terminated successfully, as
+                // all its output must already be buffered (including eof).
+                // However, there can be some still running handler's child
+                // that has inherited the parent's stdout. In this case we
+                // assume that we have read all the handler's output, close
+                // the stream, log the warning and bail out.
+                //
+                if (pr.exit)
+                {
+                  // We keep reading only upon successful handler termination.
+                  //
+                  assert (*pr.exit);
+
+                  is.close ();
+
+                  warn << "ref " << ref << ": process " << handler
+                       << " stdout is not closed after termination (possibly "
+                       << "handler's child still running)";
+                }
+              }
+
+              if (timeout)
+              {
+                time_point now (system_clock::now ());
+
+                // Assume we have waited the full amount if the time
+                // adjustment is detected.
+                //
+                duration d (now > start ? now - start : wd);
+
+                // If the timeout is not fully exhausted, then decrement it and
+                // try to read some more data from the handler' stdout.
+                // Otherwise, kill the process, if not done yet.
+                //
+                // Note that it may happen that we are killing an already
+                // terminated process, in which case kill() just sets the
+                // process exit information. On the other hand it's guaranteed
+                // that the process is terminated after the kill() call, and
+                // so the pipe is presumably closed on the write end (see
+                // above for details). Thus, if the process terminated
+                // successfully, we will continue reading until eof is
+                // reached or read timeout occurred. Yes, it may happen that
+                // we will succeed even with the kill.
+                //
+                if (*timeout > d)
+                  *timeout -= duration_cast<milliseconds> (d);
+                else if (!pr.exit)
+                {
+                  kill ();
+
+                  assert (pr.exit);
+
+                  // Close the stream (and bail out) if the process hasn't
+                  // terminate successfully.
+                  //
+                  if (!*pr.exit)
+                    is.close ();
+
+                  *timeout = milliseconds::zero ();
+                }
+              }
+            }
+
+            assert (!is.is_open ());
+
+            if (!timeout)
+              pr.wait ();
+
+            // If the process is not terminated yet, then wait for its
+            // termination for the remaining time. Kill it if the timeout has
+            // been exceeded and the process still hasn't terminate.
+            //
+            else if (!pr.exit && !pr.timed_wait (*timeout))
+              kill ();
+
+            assert (pr.exit); // The process must finally be terminated.
+
+            if (*pr.exit)
+              break; // Get out of the breakout loop.
+
+            error << "ref " << ref << ": process " << handler << " "
+                  << *pr.exit;
+
+            // Fall through.
+          }
+          catch (const io_error& e)
+          {
+            if (pr.wait ())
+              error << "ref " << ref << ": unable to read handler's output: "
+                    << e;
+
+            // Fall through.
+          }
+
+          return nullopt;
+        }
+      // Handle process_error and io_error (both derive from system_error).
+      //
+        catch (const system_error& e)
+        {
+          error << "ref " << ref << ": unable to execute '" << handler
+                << "': " << e;
+
+          return nullopt;
+        }
+
+      result_manifest r;
+
+      // Parse and verify the manifest.
+      //
+      try
+      {
+        parser p (ss, handler.leaf ().string ());
+        manifest_name_value nv (p.next ());
+
+        auto bad_value ([&p, &nv] (const string& d) {
+            throw parsing (p.name (), nv.value_line, nv.value_column, d);});
+
+        if (nv.empty ())
+          bad_value ("empty manifest");
+
+        const string& n (nv.name);
+        const string& v (nv.value);
+
+        // The format version pair is verified by the parser.
+        //
+        assert (n.empty () && v == "1");
+
+        // Save the format version pair.
+        //
+        r.values.push_back (move (nv));
+
+        // Get and verify the HTTP status.
+        //
+        nv = p.next ();
+        if (n != "status")
+          bad_value ("no status specified");
+
+        char* e (nullptr);
+        unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw.
+
+        assert (e != nullptr);
+
+        if (!(*e == '\0' && c >= 100 && c < 600))
+          bad_value ("invalid HTTP status '" + v + "'");
+
+        // Save the HTTP status.
+        //
+        r.status = static_cast<uint16_t> (c);
+        r.values.push_back (move (nv));
+
+        // Save the remaining name/value pairs.
+        //
+        for (nv = p.next (); !nv.empty (); nv = p.next ())
+          r.values.push_back (move (nv));
+
+        // Save end of manifest.
+        //
+        r.values.push_back (move (nv));
+      }
+      catch (const parsing& e)
+      {
+        error << "ref " << ref << ": unable to parse handler's output: " << e;
+        return nullopt;
+      }
+
+      return optional<result_manifest> (move (r));
+    }
+  }
+}
diff --git a/mod/external-handler.hxx b/mod/external-handler.hxx
new file mode 100644
index 0000000..45de711
--- /dev/null
+++ b/mod/external-handler.hxx
@@ -0,0 +1,52 @@
+// file      : mod/external-handler.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef MOD_EXTERNAL_HANDLER_HXX
+#define MOD_EXTERNAL_HANDLER_HXX
+
+#include <libbutl/manifest-parser.mxx>
+
+#include <libbrep/types.hxx>
+#include <libbrep/utility.hxx>
+
+#include <mod/diagnostics.hxx>
+
+namespace brep
+{
+  // Utility for running external handler programs.
+  //
+  namespace external_handler
+  {
+    // Run an external handler program and, if it exited normally with the
+    // zero exit status, return the result manifest it is expected to write to
+    // stdout, containing at least the HTTP status value. Otherwise, log an
+    // error and return nullopt. Redirect the program stderr to the web server
+    // error log.
+    //
+    // If the timeout (in seconds) is not zero and the handler program does
+    // not exit in the allotted time, then it is killed and its termination is
+    // treated as abnormal.
+    //
+    // Note that warnings can be logged regardless of the program success. If
+    // the trace argument is not NULL, then trace records are also logged.
+    //
+    struct result_manifest
+    {
+      uint16_t status;
+      vector<butl::manifest_name_value> values; // Note: all values, including
+                                                // status.
+    };
+
+    optional<result_manifest>
+    run (const path& handler,
+         const strings& args,
+         const dir_path& data_dir,
+         size_t timeout,
+         const basic_mark& error,
+         const basic_mark& warn,
+         const basic_mark* trace);
+  }
+}
+
+#endif // MOD_EXTERNAL_HANDLER_HXX
diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx
new file mode 100644
index 0000000..79472d0
--- /dev/null
+++ b/mod/mod-ci.cxx
@@ -0,0 +1,628 @@
+// file      : mod/mod-ci.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#include <mod/mod-ci.hxx>
+
+#include <ostream>
+
+#include <libbutl/uuid.hxx>
+#include <libbutl/sendmail.mxx>
+#include <libbutl/fdstream.mxx>
+#include <libbutl/timestamp.mxx>
+#include <libbutl/filesystem.mxx>
+#include <libbutl/process-io.mxx>          // operator<<(ostream, process_args)
+#include <libbutl/manifest-parser.mxx>
+#include <libbutl/manifest-serializer.mxx>
+
+#include <libbpkg/manifest.hxx>
+#include <libbpkg/package-name.hxx>
+
+#include <web/xhtml.hxx>
+#include <web/module.hxx>
+
+#include <mod/page.hxx>
+#include <mod/options.hxx>
+#include <mod/external-handler.hxx>
+
+using namespace std;
+using namespace butl;
+using namespace web;
+using namespace brep::cli;
+
+brep::ci::
+ci (const ci& r)
+    : handler (r),
+      options_ (r.initialized_ ? r.options_ : nullptr),
+      form_ (r.initialized_ || r.form_ == nullptr
+             ? r.form_
+             : make_shared<xhtml::fragment> (*r.form_))
+{
+}
+
+void brep::ci::
+init (scanner& s)
+{
+  HANDLER_DIAG;
+
+  options_ = make_shared<options::ci> (
+    s, unknown_mode::fail, unknown_mode::fail);
+
+  // Verify that the CI request handling is setup properly, if configured.
+  //
+  if (options_->ci_data_specified ())
+  {
+    // Verify the data directory satisfies the requirements.
+    //
+    const dir_path& d (options_->ci_data ());
+
+    if (d.relative ())
+      fail << "ci-data directory path must be absolute";
+
+    if (!dir_exists (d))
+      fail << "ci-data directory '" << d << "' does not exist";
+
+    // Parse XHTML5 form file, if configured.
+    //
+    if (options_->ci_form_specified ())
+    {
+      const path& ci_form (options_->ci_form ());
+
+      if (ci_form.relative ())
+        fail << "ci-form path must be absolute";
+
+      try
+      {
+        ifdstream is (ci_form);
+
+        form_ = make_shared<xhtml::fragment> (is.read_text (),
+                                              ci_form.string ());
+      }
+      catch (const xml::parsing& e)
+      {
+        fail << "unable to parse ci-form file: " << e;
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to read ci-form file '" << ci_form << "': " << e;
+      }
+    }
+
+    if (options_->ci_handler_specified () &&
+        options_->ci_handler ().relative ())
+      fail << "ci-handler path must be absolute";
+  }
+
+  if (options_->root ().empty ())
+    options_->root (dir_path ("/"));
+}
+
+bool brep::ci::
+handle (request& rq, response& rs)
+{
+  using namespace bpkg;
+  using namespace xhtml;
+
+  using serializer    = manifest_serializer;
+  using serialization = manifest_serialization;
+
+  HANDLER_DIAG;
+
+  const dir_path& root (options_->root ());
+
+  // We will respond with the manifest to the CI request submission protocol
+  // violations and with a plain text message on the internal errors. In the
+  // latter case we will always respond with the same neutral message for
+  // security reason, logging the error details. Note that descriptions of
+  // exceptions caught by the web server are returned to the client (see
+  // web/module.hxx for details), and we want to avoid this when there is a
+  // danger of exposing sensitive data.
+  //
+  // Also we will pass through exceptions thrown by the underlying API, unless
+  // we need to handle them or add details for the description, in which case
+  // we will fallback to one of the above mentioned response methods.
+  //
+  // Note that both respond_manifest() and respond_error() are normally called
+  // right before the end of the request handling. They both always return
+  // true to allow bailing out with a single line, for example:
+  //
+  // return respond_error (); // Request is handled with an error.
+  //
+  string request_id; // Will be set later.
+  auto respond_manifest = [&rs, &request_id] (status_code status,
+                                              const string& message) -> bool
+  {
+    serializer s (rs.content (status, "text/manifest;charset=utf-8"),
+                  "response");
+
+    s.next ("", "1");                      // Start of manifest.
+    s.next ("status", to_string (status));
+    s.next ("message", message);
+
+    if (!request_id.empty ())
+      s.next ("reference", request_id);
+
+    s.next ("", "");                       // End of manifest.
+    return true;
+  };
+
+  auto respond_error = [&rs] (status_code status = 500) -> bool
+  {
+    rs.content (status, "text/plain;charset=utf-8")
+      << "CI request submission handling failed" << endl;
+
+    return true;
+  };
+
+  // Check if the CI request functionality is enabled.
+  //
+  // Note that this is not a submission protocol violation but it feels right
+  // to respond with the manifest, to help the client a bit.
+  //
+  if (!options_->ci_data_specified ())
+    return respond_manifest (404, "CI request submission disabled");
+
+  // Parse the request form data.
+  //
+  const name_values& rps (rq.parameters (64 * 1024));
+
+  // If there is no request parameters then we respond with the CI form XHTML,
+  // if configured. Otherwise, will proceed as for the CI request and will fail
+  // (missing parameters).
+  //
+  if (rps.empty () && form_ != nullptr)
+  {
+    const string title ("CI");
+
+    xml::serializer s (rs.content (), title);
+
+    s << HTML
+      <<   HEAD
+      <<     TITLE << title << ~TITLE
+      <<     CSS_LINKS (path ("ci.css"), root)
+      <<   ~HEAD
+      <<   BODY
+      <<     DIV_HEADER (root, options_->logo (), options_->menu ())
+      <<     DIV(ID="content") << *form_ << ~DIV
+      <<   ~BODY
+      << ~HTML;
+
+    return true;
+  }
+
+  // Verify the CI request parameters we expect. The unknown ones will be
+  // serialized to the CI request manifest.
+  //
+  params::ci params;
+
+  try
+  {
+    name_value_scanner s (rps);
+    params = params::ci (s, unknown_mode::skip, unknown_mode::skip);
+  }
+  catch (const cli::exception&)
+  {
+    return respond_manifest (400, "invalid parameter");
+  }
+
+  const string& simulate (params.simulate ());
+
+  if (simulate == "internal-error-text")
+    return respond_error ();
+  else if (simulate == "internal-error-html")
+  {
+    const string title ("Internal Error");
+    xml::serializer s (rs.content (500), title);
+
+    s << HTML
+      <<   HEAD << TITLE << title << ~TITLE << ~HEAD
+      <<   BODY << "CI request submission handling failed" << ~BODY
+      << ~HTML;
+
+    return true;
+  }
+
+  // Parse and verify the remote repository location.
+  //
+  repository_location rl;
+
+  try
+  {
+    const repository_url& u (params.repository ());
+
+    if (u.empty () || u.scheme == repository_protocol::file)
+      throw invalid_argument ("");
+
+    rl = repository_location (u, guess_type (u, false /* local */));
+  }
+  catch (const invalid_argument&)
+  {
+    return respond_manifest (400, "invalid repository location");
+  }
+
+  // Verify the package name[/version] arguments.
+  //
+  for (const string& s: params.package())
+  {
+    //  Let's skip the potentially unfilled package form fields.
+    //
+    if (s.empty ())
+      continue;
+
+    try
+    {
+      size_t p (s.find ('/'));
+
+      if (p != string::npos)
+      {
+        package_name (string (s, 0, p));
+
+        // Not to confuse with module::version.
+        //
+        bpkg::version (string (s, p + 1));
+      }
+      else
+        package_name p (s); // Not to confuse with the s variable declaration.
+    }
+    catch (const invalid_argument&)
+    {
+      return respond_manifest (400, "invalid package " + s);
+    }
+  }
+
+  // Verify that unknown parameter values satisfy the requirements (contain
+  // only ASCII printable characters plus '\r', '\n', and '\t').
+  //
+  // Actually, the expected ones must satisfy too, so check them as well.
+  //
+  auto printable = [] (const string& s) -> bool
+  {
+    for (char c: s)
+    {
+      if (!((c >= 0x20 && c <= 0x7E) || c == '\n' || c == '\r' || c == '\t'))
+        return false;
+    }
+    return true;
+  };
+
+  for (const name_value& nv: rps)
+  {
+    if (nv.value && !printable (*nv.value))
+      return respond_manifest (400, "invalid parameter " + nv.name);
+  }
+
+  try
+  {
+    // Note that from now on the result manifest we respond with will contain
+    // the reference value.
+    //
+    request_id = uuid::generate ().string ();
+  }
+  catch (const system_error& e)
+  {
+    error << "unable to generate request id: " << e;
+    return respond_error ();
+  }
+
+  // Create the submission data directory.
+  //
+  dir_path dd (options_->ci_data () / dir_path (request_id));
+
+  try
+  {
+    // It's highly unlikely but still possible that the directory already
+    // exists. This can only happen if the generated uuid is not unique.
+    //
+    if (try_mkdir (dd) == mkdir_status::already_exists)
+      throw_generic_error (EEXIST);
+  }
+  catch (const system_error& e)
+  {
+    error << "unable to create directory '" << dd << "': " << e;
+    return respond_error ();
+  }
+
+  auto_rmdir ddr (dd);
+
+  // Serialize the CI request manifest to a stream. On the serialization error
+  // respond to the client with the manifest containing the bad request (400)
+  // code and return false, on the stream error pass through the io_error
+  // exception, otherwise return true.
+  //
+  timestamp ts (system_clock::now ());
+
+  auto rqm = [&request_id,
+              &rl,
+              &ts,
+              &simulate,
+              &rq,
+              &rps,
+              &params,
+              &respond_manifest]
+             (ostream& os) -> bool
+  {
+    try
+    {
+      serializer s (os, "request");
+
+      // Serialize the submission manifest header.
+      //
+      s.next ("", "1");                // Start of manifest.
+      s.next ("id", request_id);
+      s.next ("repository", rl.string ());
+
+      for (const string& p: params.package())
+      {
+        if (!p.empty ()) // Skip empty package names (see above for details).
+          s.next ("package", p);
+      }
+
+      s.next ("timestamp",
+              butl::to_string (ts,
+                               "%Y-%m-%dT%H:%M:%SZ",
+                               false /* special */,
+                               false /* local */));
+
+      if (!simulate.empty ())
+        s.next ("simulate", simulate);
+
+      // Serialize the User-Agent HTTP header and the client IP address.
+      //
+      optional<string> ip;
+      optional<string> ua;
+      for (const name_value& h: rq.headers ())
+      {
+        if (casecmp (h.name, ":Client-IP") == 0)
+          ip = h.value;
+        else if (casecmp (h.name, "User-Agent") == 0)
+          ua = h.value;
+      }
+
+      if (ip)
+        s.next ("client-ip", *ip);
+
+      if (ua)
+        s.next ("user-agent", *ua);
+
+      // Serialize the request parameters.
+      //
+      // Note that the serializer constraints the parameter names (can't start
+      // with '#', can't contain ':' and the whitespaces, etc.).
+      //
+      for (const name_value& nv: rps)
+      {
+        const string& n (nv.name);
+
+        if (n != "repository" &&
+            n != "_"          &&
+            n != "package"    &&
+            n != "simulate")
+          s.next (n, nv.value ? *nv.value : "");
+      }
+
+      s.next ("", ""); // End of manifest.
+      return true;
+    }
+    catch (const serialization& e)
+    {
+      respond_manifest (400, string ("invalid parameter: ") + e.what ());
+      return false;
+    }
+  };
+
+  // Serialize the CI request manifest to the submission directory.
+  //
+  path rqf (dd / "request.manifest");
+
+  try
+  {
+    ofdstream os (rqf);
+    bool r (rqm (os));
+    os.close ();
+
+    if (!r)
+      return true; // The client is already responded with the manifest.
+  }
+  catch (const io_error& e)
+  {
+    error << "unable to write to '" << rqf << "': " << e;
+    return respond_error ();
+  }
+
+  // Given that the submission data is now successfully persisted we are no
+  // longer in charge of removing it, except for the cases when the submission
+  // handler terminates with an error (see below for details).
+  //
+  ddr.cancel ();
+
+  // If the handler terminates with non-zero exit status or specifies 5XX
+  // (HTTP server error) submission result manifest status value, then we
+  // stash the submission data directory for troubleshooting. Otherwise, if
+  // it's the 4XX (HTTP client error) status value, then we remove the
+  // directory.
+  //
+  // Note that leaving the directory in place in case of a submission error
+  // would have prevent the user from re-submitting until we research the
+  // issue and manually remove the directory.
+  //
+  auto stash_submit_dir = [&dd, error] ()
+  {
+    if (dir_exists (dd))
+    try
+    {
+      mvdir (dd, dir_path (dd + ".fail"));
+    }
+    catch (const system_error& e)
+    {
+      // Not much we can do here. Let's just log the issue and bail out
+      // leaving the directory in place.
+      //
+      error << "unable to rename directory '" << dd << "': " << e;
+    }
+  };
+
+  // Run the submission handler, if specified, reading the result manifest
+  // from its stdout and caching it as a name/value pair list for later use
+  // (forwarding to the client, sending via email, etc.). Otherwise, create
+  // implied result manifest.
+  //
+  status_code sc;
+  vector<manifest_name_value> rvs;
+
+  if (options_->ci_handler_specified ())
+  {
+    using namespace external_handler;
+
+    optional<result_manifest> r (run (options_->ci_handler (),
+                                      options_->ci_handler_argument (),
+                                      dd,
+                                      options_->ci_handler_timeout (),
+                                      error,
+                                      warn,
+                                      verb_ ? &trace : nullptr));
+    if (!r)
+    {
+      stash_submit_dir ();
+      return respond_error (); // The diagnostics is already issued.
+    }
+
+    sc = r->status;
+    rvs = move (r->values);
+  }
+  else // Create the implied result manifest.
+  {
+    sc = 200;
+
+    auto add = [&rvs] (string n, string v)
+    {
+      manifest_name_value nv {move (n), move (v),
+                              0 /* name_line */,  0 /* name_column */,
+                              0 /* value_line */, 0 /* value_column */};
+
+      rvs.emplace_back (move (nv));
+    };
+
+    add ("", "1");                           // Start of manifest.
+    add ("status", "200");
+    add ("message", "CI request is queued");
+    add ("reference", request_id);
+    add ("", "");                            // End of manifest.
+  }
+
+  assert (!rvs.empty ()); // Produced by the handler or is implied.
+
+  // Serialize the submission result manifest to a stream. On the
+  // serialization error log the error description and return false, on the
+  // stream error pass through the io_error exception, otherwise return true.
+  //
+  auto rsm = [&rvs, &error, &request_id] (ostream& os) -> bool
+  {
+    try
+    {
+      serializer s (os, "result");
+      for (const manifest_name_value& nv: rvs)
+        s.next (nv.name, nv.value);
+
+      return true;
+    }
+    catch (const serialization& e)
+    {
+      error << "ref " << request_id << ": unable to serialize handler's "
+            << "output: " << e;
+      return false;
+    }
+  };
+
+  // If the submission data directory still exists then perform an appropriate
+  // action on it, depending on the submission result status. Note that the
+  // handler could move or remove the directory.
+  //
+  if (dir_exists (dd))
+  {
+    // Remove the directory if the client error is detected.
+    //
+    if (sc >= 400 && sc < 500)
+      rmdir_r (dd);
+
+    // Otherwise, save the result manifest, into the directory. Also stash the
+    // directory for troubleshooting in case of the server error.
+    //
+    else
+    {
+      path rsf (dd / "result.manifest");
+
+      try
+      {
+        ofdstream os (rsf);
+
+        // Not being able to stash the result manifest is not a reason to
+        // claim the submission failed. The error is logged nevertheless.
+        //
+        rsm (os);
+
+        os.close ();
+      }
+      catch (const io_error& e)
+      {
+        // Not fatal (see above).
+        //
+        error << "unable to write to '" << rsf << "': " << e;
+      }
+
+      if (sc >= 500 && sc < 600)
+        stash_submit_dir ();
+    }
+  }
+
+  // Send email, if configured, and the CI request submission is not simulated.
+  //
+  // Note that we don't consider the email sending failure to be a submission
+  // failure as the submission data is successfully persisted and the handler
+  // is successfully executed, if configured. One can argue that email can be
+  // essential for the submission processing and missing it would result in
+  // the incomplete submission. In this case it's natural to assume that the
+  // web server error log is monitored and the email sending failure will be
+  // noticed.
+  //
+  if (options_->ci_email_specified () && simulate.empty ())
+  try
+  {
+    // Redirect the diagnostics to the web server error log.
+    //
+    sendmail sm ([&trace, this] (const char* args[], size_t n)
+                 {
+                   l2 ([&]{trace << process_args {args, n};});
+                 },
+                 2 /* stderr */,
+                 options_->email (),
+                 "CI request submission (" + request_id + ")",
+                 {options_->ci_email ()});
+
+    // Write the submission request manifest.
+    //
+    bool r (rqm (sm.out));
+    assert (r); // The serialization succeeded once, so can't fail now.
+
+    // Write the submission result manifest.
+    //
+    sm.out << "\n\n";
+
+    rsm (sm.out); // We don't care about the result (see above).
+
+    sm.out.close ();
+
+    if (!sm.wait ())
+      error << "sendmail " << *sm.exit;
+  }
+  // Handle process_error and io_error (both derive from system_error).
+  //
+  catch (const system_error& e)
+  {
+    error << "sendmail error: " << e;
+  }
+
+  if (!rsm (rs.content (sc, "text/manifest;charset=utf-8")))
+    return respond_error (); // The error description is already logged.
+
+  return true;
+}
diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx
new file mode 100644
index 0000000..f9e89ff
--- /dev/null
+++ b/mod/mod-ci.hxx
@@ -0,0 +1,45 @@
+// file      : mod/mod-ci.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd
+// license   : MIT; see accompanying LICENSE file
+
+#ifndef MOD_MOD_CI_HXX
+#define MOD_MOD_CI_HXX
+
+#include <web/xhtml-fragment.hxx>
+
+#include <libbrep/types.hxx>
+#include <libbrep/utility.hxx>
+
+#include <mod/module.hxx>
+#include <mod/options.hxx>
+
+namespace brep
+{
+  class ci: public handler
+  {
+  public:
+    ci () = default;
+
+    // Create a shallow copy (handling instance) if initialized and a deep
+    // copy (context exemplar) otherwise.
+    //
+    explicit
+    ci (const ci&);
+
+    virtual bool
+    handle (request&, response&);
+
+    virtual const cli::options&
+    cli_options () const {return options::ci::description ();}
+
+  private:
+    virtual void
+    init (cli::scanner&);
+
+  private:
+    shared_ptr<options::ci> options_;
+    shared_ptr<web::xhtml::fragment> form_;
+  };
+}
+
+#endif // MOD_MOD_CI_HXX
diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx
index 27901d7..3b0ab1f 100644
--- a/mod/mod-repository-root.cxx
+++ b/mod/mod-repository-root.cxx
@@ -12,6 +12,8 @@
 
 #include <mod/module.hxx>
 #include <mod/options.hxx>
+
+#include <mod/mod-ci.hxx>
 #include <mod/mod-submit.hxx>
 #include <mod/mod-builds.hxx>
 #include <mod/mod-build-log.hxx>
@@ -112,7 +114,8 @@ namespace brep
         build_force_ (make_shared<build_force> ()),
         build_log_ (make_shared<build_log> ()),
         builds_ (make_shared<builds> ()),
-        submit_ (make_shared<submit> ())
+        submit_ (make_shared<submit> ()),
+        ci_ (make_shared<ci> ())
   {
   }
 
@@ -164,6 +167,10 @@ namespace brep
           r.initialized_
           ? r.submit_
           : make_shared<submit> (*r.submit_)),
+        ci_ (
+          r.initialized_
+          ? r.ci_
+          : make_shared<ci> (*r.ci_)),
         options_ (
           r.initialized_
           ? r.options_
@@ -188,6 +195,7 @@ namespace brep
     append (r, build_log_->options ());
     append (r, builds_->options ());
     append (r, submit_->options ());
+    append (r, ci_->options ());
     return r;
   }
 
@@ -231,6 +239,7 @@ namespace brep
     sub_init (*build_log_, "build_log");
     sub_init (*builds_, "builds");
     sub_init (*submit_, "submit");
+    sub_init (*ci_, "ci");
 
     // Parse own configuration options.
     //
@@ -371,6 +380,13 @@ namespace brep
 
           return handle ("submit", true);
         }
+        else if (fn == "ci")
+        {
+          if (handler_ == nullptr)
+            handler_.reset (new ci (*ci_));
+
+          return handle ("ci", true);
+        }
       }
 
       if (handler_ == nullptr)
diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx
index 74691ea..9a71849 100644
--- a/mod/mod-repository-root.hxx
+++ b/mod/mod-repository-root.hxx
@@ -23,6 +23,7 @@ namespace brep
   class build_log;
   class builds;
   class submit;
+  class ci;
 
   class repository_root: public handler
   {
@@ -67,6 +68,8 @@ namespace brep
     shared_ptr<build_log> build_log_;
     shared_ptr<builds> builds_;
     shared_ptr<submit> submit_;
+    shared_ptr<ci> ci_;
+
     shared_ptr<options::repository_root> options_;
 
     // Sub-handler the request is dispatched to. Initially is NULL. It is set
diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx
index 61eeaf6..470bd45 100644
--- a/mod/mod-submit.cxx
+++ b/mod/mod-submit.cxx
@@ -4,19 +4,9 @@
 
 #include <mod/mod-submit.hxx>
 
-#include <sys/time.h>   // timeval
-#include <sys/select.h>
-
-#include <ratio>        // ratio_greater_equal
-#include <chrono>
-#include <cstdlib>      // strtoul()
-#include <istream>
-#include <sstream>
-#include <type_traits>  // static_assert
-#include <system_error> // error_code, generic_category()
+#include <ostream>
 
 #include <libbutl/sha256.mxx>
-#include <libbutl/process.mxx>
 #include <libbutl/sendmail.mxx>
 #include <libbutl/fdstream.mxx>
 #include <libbutl/timestamp.mxx>
@@ -30,6 +20,7 @@
 
 #include <mod/page.hxx>
 #include <mod/options.hxx>
+#include <mod/external-handler.hxx>
 
 using namespace std;
 using namespace butl;
@@ -113,8 +104,6 @@ handle (request& rq, response& rs)
 {
   using namespace xhtml;
 
-  using parser        = manifest_parser;
-  using parsing       = manifest_parsing;
   using serializer    = manifest_serializer;
   using serialization = manifest_serialization;
 
@@ -285,8 +274,8 @@ handle (request& rq, response& rs)
       return respond_manifest (400, "invalid parameter " + nv.name);
   }
 
-  // Note that from now on the result manifest will contain the reference
-  // value.
+  // Note that from now on the result manifest we respond with will contain
+  // the reference value.
   //
   ref = string (sha256sum, 0, 12);
 
@@ -299,7 +288,7 @@ handle (request& rq, response& rs)
   if (dir_exists (dd) || simulate == "duplicate-archive")
     return respond_manifest (422, "duplicate submission");
 
-  // Create the temporary submission directory.
+  // Create the temporary submission data directory.
   //
   dir_path td;
 
@@ -573,310 +562,34 @@ handle (request& rq, response& rs)
   // (forwarding to the client, sending via email, etc.). Otherwise, create
   // implied result manifest.
   //
-  status_code sc (200);
+  status_code sc;
   vector<manifest_name_value> rvs;
 
   if (options_->submit_handler_specified ())
   {
-    // For the sake of the documentation we will call the handler's normal
-    // exit with 0 code "successful termination".
-    //
-    // To make sure the handler process execution doesn't exceed the specified
-    // timeout we set the non-blocking mode for the process stdout-reading
-    // stream, try to read from it with the 10 milliseconds timeout and check
-    // the process execution time between the reads. We then kill the process
-    // if the execution time is exceeded.
-    //
-    using namespace chrono;
-
-    using time_point = system_clock::time_point;
-    using duration   = system_clock::duration;
-
-    // Make sure that the system clock has at least milliseconds resolution.
-    //
-    static_assert(
-      ratio_greater_equal<milliseconds::period, duration::period>::value,
-      "The system clock resolution is too low");
-
-    optional<milliseconds> timeout;
-
-    if (options_->submit_handler_timeout_specified ())
-      timeout = milliseconds (options_->submit_handler_timeout () * 1000);
-
-    const path& handler (options_->submit_handler ());
-
-    // Note that due to the non-blocking mode we cannot just pass the stream
-    // to the manifest parser constructor. So we buffer the data in the string
-    // stream and then parse that.
-    //
-    stringstream ss;
-
-    for (;;) // Breakout loop.
-    try
-    {
-      fdpipe pipe (fdopen_pipe ()); // Can throw io_error.
-
-      // Redirect the diagnostics to the web server error log.
-      //
-      process pr (
-        process_start_callback (print_args,
-                                0     /* stdin  */,
-                                pipe  /* stdout */,
-                                2     /* stderr */,
-                                handler,
-                                options_->submit_handler_argument (),
-                                dd));
-      pipe.out.close ();
-
-      auto kill = [&pr, &warn, &handler, &ref] ()
-      {
-        // We may still end up well (see below), thus this is a warning.
-        //
-        warn << "ref " << ref << ": process " << handler
-             << " execution timeout expired";
-
-        pr.kill ();
-      };
-
-      try
-      {
-        ifdstream is (move (pipe.in), fdstream_mode::non_blocking);
-
-        const size_t nbuf (8192);
-        char buf[nbuf];
-
-        while (is.is_open ())
-        {
-          time_point start;
-          milliseconds wd (10); // Max time to wait for the data portion.
-
-          if (timeout)
-          {
-            start = system_clock::now ();
-
-            if (*timeout < wd)
-              wd = *timeout;
-          }
-
-          timeval tm {wd.count () / 1000        /* seconds */,
-                      wd.count () % 1000 * 1000 /* microseconds */};
-
-          fd_set rd;
-          FD_ZERO (&rd);
-          FD_SET  (is.fd (), &rd);
-
-          int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm));
-
-          if (r == -1)
-          {
-            // Don't fail if the select() call was interrupted by the signal.
-            //
-            if (errno != EINTR)
-              throw_system_ios_failure (errno, "select failed");
-          }
-          else if (r != 0) // Is data available?
-          {
-            assert (FD_ISSET (is.fd (), &rd));
-
-            // The only leagal way to read from non-blocking ifdstream.
-            //
-            streamsize n (is.readsome (buf, nbuf));
-
-            // Close the stream (and bail out) if the end of the data is
-            // reached. Otherwise cache the read data.
-            //
-            if (is.eof ())
-              is.close ();
-            else
-            {
-              // The data must be available.
-              //
-              // Note that we could keep reading until the readsome() call
-              // returns 0. However, this way we could potentially exceed the
-              // timeout significantly for some broken handler that floods us
-              // with data. So instead, we will be checking the process
-              // execution time after every data chunk read.
-              //
-              assert (n != 0);
-
-              ss.write (buf, n);
-            }
-          }
-          else // Timeout occured.
-          {
-            // Normally, we don't expect timeout to occur on the pipe read
-            // operation if the process has terminated successfully, as all its
-            // output must already be buffered (including eof). However, there
-            // can be some still running handler's child that has inherited
-            // the parent's stdout. In this case we assume that we have read
-            // all the handler's output, close the stream, log the warning and
-            // bail out.
-            //
-            if (pr.exit)
-            {
-              // We keep reading only upon successful handler termination.
-              //
-              assert (*pr.exit);
-
-              is.close ();
-
-              warn << "ref " << ref << ": process " << handler
-                   << " stdout is not closed after termination (possibly "
-                   << "handler's child still running)";
-            }
-          }
-
-          if (timeout)
-          {
-            time_point now (system_clock::now ());
-
-            // Assume we have waited the full amount if the time adjustment is
-            // detected.
-            //
-            duration d (now > start ? now - start : wd);
-
-            // If the timeout is not fully exhausted, then decrement it and
-            // try to read some more data from the handler' stdout. Otherwise,
-            // kill the process, if not done yet.
-            //
-            // Note that it may happen that we are killing an already
-            // terminated process, in which case kill() just sets the process
-            // exit information. On the other hand it's guaranteed that the
-            // process is terminated after the kill() call, and so the pipe is
-            // presumably closed on the write end (see above for details).
-            // Thus, if the process terminated successfully, we will continue
-            // reading until eof is reached or read timeout occurred. Yes, it
-            // may happen that we end up with a successful submission even
-            // with the kill.
-            //
-            if (*timeout > d)
-              *timeout -= duration_cast<milliseconds> (d);
-            else if (!pr.exit)
-            {
-              kill ();
-
-              assert (pr.exit);
-
-              // Close the stream (and bail out) if the process hasn't
-              // terminate successfully.
-              //
-              if (!*pr.exit)
-                is.close ();
-
-              *timeout = milliseconds::zero ();
-            }
-          }
-        }
-
-        assert (!is.is_open ());
-
-        if (!timeout)
-          pr.wait ();
-
-        // If the process is not terminated yet, then wait for its termination
-        // for the remaining time. Kill it if the timeout has been exceeded
-        // and the process still hasn't terminate.
-        //
-        else if (!pr.exit && !pr.timed_wait (*timeout))
-          kill ();
-
-        assert (pr.exit); // The process must finally be terminated.
-
-        if (*pr.exit)
-          break; // Get out of the breakout loop.
-
-        error << "ref " << ref << ": process " << handler << " " << *pr.exit;
-
-        // Fall through.
-      }
-      catch (const io_error& e)
-      {
-        if (pr.wait ())
-          error << "ref " << ref << ": unable to read handler's output: " << e;
+    using namespace external_handler;
 
-        // Fall through.
-      }
-
-      stash_submit_dir ();
-      return respond_error ();
-    }
-    // Handle process_error and io_error (both derive from system_error).
-    //
-    catch (const system_error& e)
+    optional<result_manifest> r (run (options_->submit_handler (),
+                                      options_->submit_handler_argument (),
+                                      dd,
+                                      options_->submit_handler_timeout (),
+                                      error,
+                                      warn,
+                                      verb_ ? &trace : nullptr));
+    if (!r)
     {
-      error << "unable to execute '" << handler << "': " << e;
-
       stash_submit_dir ();
-      return respond_error ();
-    }
-
-    try
-    {
-      // Parse and verify the manifest. Obtain the HTTP status code (must go
-      // first) and cache it for the subsequent response to the client.
-      //
-      parser p (ss, "handler");
-      manifest_name_value nv (p.next ());
-
-      auto bad_value ([&p, &nv] (const string& d) {
-          throw parsing (p.name (), nv.value_line, nv.value_column, d);});
-
-      if (nv.empty ())
-        bad_value ("empty manifest");
-
-      const string& n (nv.name);
-      const string& v (nv.value);
-
-      // The format version pair is verified by the parser.
-      //
-      assert (n.empty () && v == "1");
-
-      // Cache the format version pair.
-      //
-      rvs.push_back (move (nv));
-
-      // Get and verify the HTTP status.
-      //
-      nv = p.next ();
-      if (n != "status")
-        bad_value ("no status specified");
-
-      char* e (nullptr);
-      unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw.
-
-      assert (e != nullptr);
-
-      if (!(*e == '\0' && c >= 100 && c < 600))
-        bad_value ("invalid HTTP status '" + v + "'");
-
-      // Cache the HTTP status.
-      //
-      sc = static_cast<status_code> (c);
-      rvs.push_back (move (nv));
-
-      // Cache the remaining name/value pairs.
-      //
-      for (nv = p.next (); !nv.empty (); nv = p.next ())
-        rvs.push_back (move (nv));
-
-      // Cache end of manifest.
-      //
-      rvs.push_back (move (nv));
+      return respond_error (); // The diagnostics is already issued.
     }
-    catch (const parsing& e)
-    {
-      error << "ref " << ref << ": unable to parse handler's output: " << e;
 
-      // It appears the handler had misbehaved, so let's stash the submission
-      // directory for troubleshooting.
-      //
-      stash_submit_dir ();
 
-      return respond_error ();
-    }
+    sc = r->status;
+    rvs = move (r->values);
   }
-  else // Create implied result manifest.
+  else // Create the implied result manifest.
   {
+    sc = 200;
+
     auto add = [&rvs] (string n, string v)
     {
       manifest_name_value nv {move (n), move (v),
diff --git a/mod/options.cli b/mod/options.cli
index f7a9387..046173c 100644
--- a/mod/options.cli
+++ b/mod/options.cli
@@ -729,7 +729,10 @@ namespace brep
     {
       // Package repository URL.
       //
-      bpkg::repository_url repository;
+      // Note that the ci parameter is renamed to '_' by the root handler (see
+      // the request_proxy class for details).
+      //
+      bpkg::repository_url repository | _;
 
       // Package names/versions.
       //
author	Karen Arutyunov <karen@codesynthesis.com>	2018-08-23 22:29:35 +0300
committer	Karen Arutyunov <karen@codesynthesis.com>	2018-08-28 21:46:41 +0300
commit	8a094bb0481a9c53646cc15db2e8acecafc3d10c (patch)
tree	4fd7012b6a26eb852d42fba8b52bfcf8f1cf2fdd /mod
parent	7e0e141273032c7afc1a9129512aa42c672fcf5d (diff)