From 8a094bb0481a9c53646cc15db2e8acecafc3d10c Mon Sep 17 00:00:00 2001
From: Karen Arutyunov <karen@codesynthesis.com>
Date: Thu, 23 Aug 2018 22:29:35 +0300
Subject: Add basic support for CI request handling

---
 mod/mod-submit.cxx | 329 ++++-------------------------------------------------
 1 file changed, 21 insertions(+), 308 deletions(-)

(limited to 'mod/mod-submit.cxx')
diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx
index 61eeaf6..470bd45 100644
--- a/mod/mod-submit.cxx
+++ b/mod/mod-submit.cxx
@@ -4,19 +4,9 @@
 
 #include <mod/mod-submit.hxx>
 
-#include <sys/time.h>   // timeval
-#include <sys/select.h>
-
-#include <ratio>        // ratio_greater_equal
-#include <chrono>
-#include <cstdlib>      // strtoul()
-#include <istream>
-#include <sstream>
-#include <type_traits>  // static_assert
-#include <system_error> // error_code, generic_category()
+#include <ostream>
 
 #include <libbutl/sha256.mxx>
-#include <libbutl/process.mxx>
 #include <libbutl/sendmail.mxx>
 #include <libbutl/fdstream.mxx>
 #include <libbutl/timestamp.mxx>
@@ -30,6 +20,7 @@
 
 #include <mod/page.hxx>
 #include <mod/options.hxx>
+#include <mod/external-handler.hxx>
 
 using namespace std;
 using namespace butl;
@@ -113,8 +104,6 @@ handle (request& rq, response& rs)
 {
   using namespace xhtml;
 
-  using parser        = manifest_parser;
-  using parsing       = manifest_parsing;
   using serializer    = manifest_serializer;
   using serialization = manifest_serialization;
 
@@ -285,8 +274,8 @@ handle (request& rq, response& rs)
       return respond_manifest (400, "invalid parameter " + nv.name);
   }
 
-  // Note that from now on the result manifest will contain the reference
-  // value.
+  // Note that from now on the result manifest we respond with will contain
+  // the reference value.
   //
   ref = string (sha256sum, 0, 12);
 
@@ -299,7 +288,7 @@ handle (request& rq, response& rs)
   if (dir_exists (dd) || simulate == "duplicate-archive")
     return respond_manifest (422, "duplicate submission");
 
-  // Create the temporary submission directory.
+  // Create the temporary submission data directory.
   //
   dir_path td;
 
@@ -573,310 +562,34 @@ handle (request& rq, response& rs)
   // (forwarding to the client, sending via email, etc.). Otherwise, create
   // implied result manifest.
   //
-  status_code sc (200);
+  status_code sc;
   vector<manifest_name_value> rvs;
 
   if (options_->submit_handler_specified ())
   {
-    // For the sake of the documentation we will call the handler's normal
-    // exit with 0 code "successful termination".
-    //
-    // To make sure the handler process execution doesn't exceed the specified
-    // timeout we set the non-blocking mode for the process stdout-reading
-    // stream, try to read from it with the 10 milliseconds timeout and check
-    // the process execution time between the reads. We then kill the process
-    // if the execution time is exceeded.
-    //
-    using namespace chrono;
-
-    using time_point = system_clock::time_point;
-    using duration   = system_clock::duration;
-
-    // Make sure that the system clock has at least milliseconds resolution.
-    //
-    static_assert(
-      ratio_greater_equal<milliseconds::period, duration::period>::value,
-      "The system clock resolution is too low");
-
-    optional<milliseconds> timeout;
-
-    if (options_->submit_handler_timeout_specified ())
-      timeout = milliseconds (options_->submit_handler_timeout () * 1000);
-
-    const path& handler (options_->submit_handler ());
-
-    // Note that due to the non-blocking mode we cannot just pass the stream
-    // to the manifest parser constructor. So we buffer the data in the string
-    // stream and then parse that.
-    //
-    stringstream ss;
-
-    for (;;) // Breakout loop.
-    try
-    {
-      fdpipe pipe (fdopen_pipe ()); // Can throw io_error.
-
-      // Redirect the diagnostics to the web server error log.
-      //
-      process pr (
-        process_start_callback (print_args,
-                                0     /* stdin  */,
-                                pipe  /* stdout */,
-                                2     /* stderr */,
-                                handler,
-                                options_->submit_handler_argument (),
-                                dd));
-      pipe.out.close ();
-
-      auto kill = [&pr, &warn, &handler, &ref] ()
-      {
-        // We may still end up well (see below), thus this is a warning.
-        //
-        warn << "ref " << ref << ": process " << handler
-             << " execution timeout expired";
-
-        pr.kill ();
-      };
-
-      try
-      {
-        ifdstream is (move (pipe.in), fdstream_mode::non_blocking);
-
-        const size_t nbuf (8192);
-        char buf[nbuf];
-
-        while (is.is_open ())
-        {
-          time_point start;
-          milliseconds wd (10); // Max time to wait for the data portion.
-
-          if (timeout)
-          {
-            start = system_clock::now ();
-
-            if (*timeout < wd)
-              wd = *timeout;
-          }
-
-          timeval tm {wd.count () / 1000        /* seconds */,
-                      wd.count () % 1000 * 1000 /* microseconds */};
-
-          fd_set rd;
-          FD_ZERO (&rd);
-          FD_SET  (is.fd (), &rd);
-
-          int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm));
-
-          if (r == -1)
-          {
-            // Don't fail if the select() call was interrupted by the signal.
-            //
-            if (errno != EINTR)
-              throw_system_ios_failure (errno, "select failed");
-          }
-          else if (r != 0) // Is data available?
-          {
-            assert (FD_ISSET (is.fd (), &rd));
-
-            // The only leagal way to read from non-blocking ifdstream.
-            //
-            streamsize n (is.readsome (buf, nbuf));
-
-            // Close the stream (and bail out) if the end of the data is
-            // reached. Otherwise cache the read data.
-            //
-            if (is.eof ())
-              is.close ();
-            else
-            {
-              // The data must be available.
-              //
-              // Note that we could keep reading until the readsome() call
-              // returns 0. However, this way we could potentially exceed the
-              // timeout significantly for some broken handler that floods us
-              // with data. So instead, we will be checking the process
-              // execution time after every data chunk read.
-              //
-              assert (n != 0);
-
-              ss.write (buf, n);
-            }
-          }
-          else // Timeout occured.
-          {
-            // Normally, we don't expect timeout to occur on the pipe read
-            // operation if the process has terminated successfully, as all its
-            // output must already be buffered (including eof). However, there
-            // can be some still running handler's child that has inherited
-            // the parent's stdout. In this case we assume that we have read
-            // all the handler's output, close the stream, log the warning and
-            // bail out.
-            //
-            if (pr.exit)
-            {
-              // We keep reading only upon successful handler termination.
-              //
-              assert (*pr.exit);
-
-              is.close ();
-
-              warn << "ref " << ref << ": process " << handler
-                   << " stdout is not closed after termination (possibly "
-                   << "handler's child still running)";
-            }
-          }
-
-          if (timeout)
-          {
-            time_point now (system_clock::now ());
-
-            // Assume we have waited the full amount if the time adjustment is
-            // detected.
-            //
-            duration d (now > start ? now - start : wd);
-
-            // If the timeout is not fully exhausted, then decrement it and
-            // try to read some more data from the handler' stdout. Otherwise,
-            // kill the process, if not done yet.
-            //
-            // Note that it may happen that we are killing an already
-            // terminated process, in which case kill() just sets the process
-            // exit information. On the other hand it's guaranteed that the
-            // process is terminated after the kill() call, and so the pipe is
-            // presumably closed on the write end (see above for details).
-            // Thus, if the process terminated successfully, we will continue
-            // reading until eof is reached or read timeout occurred. Yes, it
-            // may happen that we end up with a successful submission even
-            // with the kill.
-            //
-            if (*timeout > d)
-              *timeout -= duration_cast<milliseconds> (d);
-            else if (!pr.exit)
-            {
-              kill ();
-
-              assert (pr.exit);
-
-              // Close the stream (and bail out) if the process hasn't
-              // terminate successfully.
-              //
-              if (!*pr.exit)
-                is.close ();
-
-              *timeout = milliseconds::zero ();
-            }
-          }
-        }
-
-        assert (!is.is_open ());
-
-        if (!timeout)
-          pr.wait ();
-
-        // If the process is not terminated yet, then wait for its termination
-        // for the remaining time. Kill it if the timeout has been exceeded
-        // and the process still hasn't terminate.
-        //
-        else if (!pr.exit && !pr.timed_wait (*timeout))
-          kill ();
-
-        assert (pr.exit); // The process must finally be terminated.
-
-        if (*pr.exit)
-          break; // Get out of the breakout loop.
-
-        error << "ref " << ref << ": process " << handler << " " << *pr.exit;
-
-        // Fall through.
-      }
-      catch (const io_error& e)
-      {
-        if (pr.wait ())
-          error << "ref " << ref << ": unable to read handler's output: " << e;
+    using namespace external_handler;
 
-        // Fall through.
-      }
-
-      stash_submit_dir ();
-      return respond_error ();
-    }
-    // Handle process_error and io_error (both derive from system_error).
-    //
-    catch (const system_error& e)
+    optional<result_manifest> r (run (options_->submit_handler (),
+                                      options_->submit_handler_argument (),
+                                      dd,
+                                      options_->submit_handler_timeout (),
+                                      error,
+                                      warn,
+                                      verb_ ? &trace : nullptr));
+    if (!r)
     {
-      error << "unable to execute '" << handler << "': " << e;
-
       stash_submit_dir ();
-      return respond_error ();
-    }
-
-    try
-    {
-      // Parse and verify the manifest. Obtain the HTTP status code (must go
-      // first) and cache it for the subsequent response to the client.
-      //
-      parser p (ss, "handler");
-      manifest_name_value nv (p.next ());
-
-      auto bad_value ([&p, &nv] (const string& d) {
-          throw parsing (p.name (), nv.value_line, nv.value_column, d);});
-
-      if (nv.empty ())
-        bad_value ("empty manifest");
-
-      const string& n (nv.name);
-      const string& v (nv.value);
-
-      // The format version pair is verified by the parser.
-      //
-      assert (n.empty () && v == "1");
-
-      // Cache the format version pair.
-      //
-      rvs.push_back (move (nv));
-
-      // Get and verify the HTTP status.
-      //
-      nv = p.next ();
-      if (n != "status")
-        bad_value ("no status specified");
-
-      char* e (nullptr);
-      unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw.
-
-      assert (e != nullptr);
-
-      if (!(*e == '\0' && c >= 100 && c < 600))
-        bad_value ("invalid HTTP status '" + v + "'");
-
-      // Cache the HTTP status.
-      //
-      sc = static_cast<status_code> (c);
-      rvs.push_back (move (nv));
-
-      // Cache the remaining name/value pairs.
-      //
-      for (nv = p.next (); !nv.empty (); nv = p.next ())
-        rvs.push_back (move (nv));
-
-      // Cache end of manifest.
-      //
-      rvs.push_back (move (nv));
+      return respond_error (); // The diagnostics is already issued.
     }
-    catch (const parsing& e)
-    {
-      error << "ref " << ref << ": unable to parse handler's output: " << e;
 
-      // It appears the handler had misbehaved, so let's stash the submission
-      // directory for troubleshooting.
-      //
-      stash_submit_dir ();
 
-      return respond_error ();
-    }
+    sc = r->status;
+    rvs = move (r->values);
   }
-  else // Create implied result manifest.
+  else // Create the implied result manifest.
   {
+    sc = 200;
+
     auto add = [&rvs] (string n, string v)
     {
       manifest_name_value nv {move (n), move (v),
-- 
cgit v1.1