1 files changed, 465 insertions, 0 deletions
diff --git a/bbot/agent/http-service.cxx b/bbot/agent/http-service.cxx
new file mode 100644
index 0000000..28b4d94
--- /dev/null
+++ b/bbot/agent/http-service.cxx
@@ -0,0 +1,465 @@
+// file      : bbot/agent/http-service.cxx -*- C++ -*-
+// license   : MIT; see accompanying LICENSE file
+
+#include <bbot/agent/http-service.hxx>
+
+#include <cstdlib> // strtoul()
+
+#include <bbot/diagnostics.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+  namespace http_service
+  {
+    result
+    post (const agent_options& o, const string& u, const parameters& params)
+    {
+      tracer trace ("http_service::post");
+
+      using parser     = manifest_parser;
+      using parsing    = manifest_parsing;
+      using name_value = manifest_name_value;
+
+      // The overall plan is to post the data using the curl program, read
+      // the HTTP response status and content type, read and parse the body
+      // according to the content type, and obtain the result message and
+      // optional reference in case of both the request success and failure.
+      //
+      // The successful request response (HTTP status code 200) is expected to
+      // contain the result manifest (text/manifest content type). The faulty
+      // response (HTTP status code other than 200) can either contain the
+      // result manifest or a plain text error description (text/plain content
+      // type) or some other content (for example text/html). We will return
+      // the manifest message value, if available or the first line of the
+      // plain text error description or, as a last resort, construct the
+      // message from the HTTP status code and reason phrase. We will also
+      // return the error description if anything goes wrong with the HTTP
+      // request or the response manifest status value is not 200.
+      //
+      string message;
+      optional<uint16_t> status;  // Request result manifest status value.
+      optional<string> reference;
+      vector<name_value> body;
+      optional<string> error;
+
+      // None of the 3XX redirect code semantics assume automatic re-posting.
+      // We will treat all such codes as failures, adding the location header
+      // value to the message for troubleshooting.
+      //
+      optional<string> location;
+
+      // Convert the submit arguments to curl's --form* options and cache the
+      // pointer to the file_text parameter value, if present, for writing
+      // into curl's stdin.
+      //
+      strings fos;
+      const string* file_text (nullptr);
+
+      for (const parameter& p: params)
+      {
+        if (p.type == parameter::file_text)
+        {
+          assert (file_text == nullptr);
+          file_text = &p.value;
+        }
+
+        fos.emplace_back (p.type == parameter::file ||
+                          p.type == parameter::file_text
+                          ? "--form"
+                          : "--form-string");
+
+        fos.emplace_back (
+          p.type == parameter::file      ? p.name + "=@" + p.value :
+          p.type == parameter::file_text ? p.name + "=@-"          :
+          p.name + '='  + p.value);
+      }
+
+      // Note that we prefer the low-level process API for running curl over
+      // using butl::curl because in this context it is restrictive and
+      // inconvenient.
+      //
+      // Start curl program.
+      //
+      // Text mode seems appropriate.
+      //
+      fdpipe in_pipe;
+      fdpipe out_pipe;
+      process pr;
+
+      try
+      {
+        in_pipe = fdopen_pipe ();
+
+        out_pipe = (file_text != nullptr
+                    ? fdopen_pipe ()
+                    : fdpipe {fdopen_null (), nullfd});
+
+        pr = process_start_callback (trace,
+                                     out_pipe.in.get () /* stdin  */,
+                                     in_pipe            /* stdout */,
+                                     2                  /* stderr */,
+                                     "curl",
+
+                                     // Include the response headers in the
+                                     // output so we can get the status
+                                     // code/reason, content type, and the
+                                     // redirect location.
+                                     //
+                                     "--include",
+
+                                     "--max-time", o.request_timeout (),
+                                     "--connect-timeout", o.connect_timeout (),
+                                     fos,
+                                     u);
+
+        // Shouldn't throw, unless something is severely damaged.
+        //
+        in_pipe.out.close ();
+        out_pipe.in.close ();
+      }
+      catch (const process_error& e)
+      {
+        fail << "unable to execute curl: " << e;
+      }
+      catch (const io_error& e)
+      {
+        fail << "unable to open pipe: " << e;
+      }
+
+      auto finish = [&pr, &error] (bool io_read = false, bool io_write = false)
+      {
+        if (!pr.wait ())
+          error = "curl " + to_string (*pr.exit);
+        else if (io_read)
+          error = "error reading curl output";
+        else if (io_write)
+          error = "error writing curl input";
+      };
+
+      bool io_write (false);
+      bool io_read  (false);
+
+      try
+      {
+        // First we read the HTTP response status line and headers. At this
+        // stage we will read until the empty line (containing just CRLF). Not
+        // being able to reach such a line is an error, which is the reason
+        // for the exception mask choice.
+        //
+        ifdstream is (
+          move (in_pipe.in),
+          fdstream_mode::skip,
+          ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+        if (file_text != nullptr)
+        {
+          ofdstream os (move (out_pipe.out));
+          os << *file_text;
+          os.close ();
+
+          // Indicate to the potential IO error handling that we are done with
+          // writing.
+          //
+          file_text = nullptr;
+        }
+
+        // Parse and return the HTTP status code. Return 0 if the argument is
+        // invalid.
+        //
+        auto status_code = [] (const string& s)
+        {
+          char* e (nullptr);
+          unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+          assert (e != nullptr);
+
+          return *e == '\0' && c >= 100 && c < 600
+                 ? static_cast<uint16_t> (c)
+                 : 0;
+        };
+
+        // Read the CRLF-terminated line from the stream stripping the
+        // trailing CRLF.
+        //
+        auto read_line = [&is] ()
+        {
+          string l;
+          getline (is, l); // Strips the trailing LF (0xA).
+
+          // Note that on POSIX CRLF is not automatically translated into
+          // LF, so we need to strip CR (0xD) manually.
+          //
+          if (!l.empty () && l.back () == '\r')
+            l.pop_back ();
+
+          return l;
+        };
+
+        auto bad_response = [] (const string& d) {throw runtime_error (d);};
+
+        // Read and parse the HTTP response status line, return the status
+        // code and the reason phrase.
+        //
+        struct http_status
+        {
+          uint16_t code;
+          string reason;
+        };
+
+        auto read_status = [&read_line, &status_code, &bad_response] ()
+        {
+          string l (read_line ());
+
+          for (;;) // Breakout loop.
+          {
+            if (l.compare (0, 5, "HTTP/") != 0)
+              break;
+
+            size_t p (l.find (' ', 5));             // The protocol end.
+            if (p == string::npos)
+              break;
+
+            p = l.find_first_not_of (' ', p + 1);   // The code start.
+            if (p == string::npos)
+              break;
+
+            size_t e (l.find (' ', p + 1));         // The code end.
+            if (e == string::npos)
+              break;
+
+            uint16_t c (status_code (string (l, p, e - p)));
+            if (c == 0)
+              break;
+
+            string r;
+            p = l.find_first_not_of (' ', e + 1);   // The reason start.
+            if (p != string::npos)
+            {
+              e = l.find_last_not_of (' ');         // The reason end.
+              assert (e != string::npos && e >= p);
+
+              r = string (l, p, e - p + 1);
+            }
+
+            return http_status {c, move (r)};
+          }
+
+          bad_response ("invalid HTTP response status line '" + l + '\'');
+
+          assert (false); // Can't be here.
+          return http_status {};
+        };
+
+        // The curl output for a successfull request looks like this:
+        //
+        // HTTP/1.1 100 Continue
+        //
+        // HTTP/1.1 200 OK
+        // Content-Length: 83
+        // Content-Type: text/manifest;charset=utf-8
+        //
+        // : 1
+        // status: 200
+        // message: submission is queued
+        // reference: 256910ca46d5
+        //
+        // curl normally sends the 'Expect: 100-continue' header for uploads,
+        // so we need to handle the interim HTTP server response with the
+        // continue (100) status code.
+        //
+        // Interestingly, Apache can respond with the continue (100) code and
+        // with the not found (404) code afterwords. Can it be configured to
+        // just respond with 404?
+        //
+        http_status rs (read_status ());
+
+        if (rs.code == 100)
+        {
+          while (!read_line ().empty ()) ; // Skips the interim response.
+          rs = read_status ();             // Reads the final status code.
+        }
+
+        // Read through the response headers until the empty line is
+        // encountered and obtain the content type and/or the redirect
+        // location, if present.
+        //
+        optional<string> ctype;
+
+        // Check if the line contains the specified header and return its
+        // value if that's the case. Return nullopt otherwise.
+        //
+        // Note that we don't expect the header values that we are interested
+        // in to span over multiple lines.
+        //
+        string l;
+        auto header = [&l] (const char* name) -> optional<string>
+        {
+          size_t n (string::traits_type::length (name));
+          if (!(icasecmp (name, l, n) == 0 && l[n] == ':'))
+            return nullopt;
+
+          string r;
+          size_t p (l.find_first_not_of (' ', n + 1)); // The value begin.
+          if (p != string::npos)
+          {
+            size_t e (l.find_last_not_of (' '));       // The value end.
+            assert (e != string::npos && e >= p);
+
+            r = string (l, p, e - p + 1);
+          }
+
+          return optional<string> (move (r));
+        };
+
+        while (!(l = read_line ()).empty ())
+        {
+          if (optional<string> v = header ("Content-Type"))
+            ctype = move (v);
+          else if (optional<string> v = header ("Location"))
+          {
+            if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307)
+              location = move (v);
+          }
+        }
+
+        assert (!eof (is)); // Would have already failed otherwise.
+
+        // Now parse the response payload if the content type is specified and
+        // is recognized (text/manifest or text/plain), skip it (with the
+        // ifdstream's close() function) otherwise.
+        //
+        // Note that eof and getline() fail conditions are not errors anymore,
+        // so we adjust the exception mask accordingly.
+        //
+        is.exceptions (ifdstream::badbit);
+
+        if (ctype)
+        {
+          if (icasecmp ("text/manifest", *ctype, 13) == 0)
+          {
+            parser p (is, "manifest");
+            name_value nv (p.next ());
+
+            if (nv.empty ())
+              bad_response ("empty manifest");
+
+            const string& n (nv.name);
+            string& v (nv.value);
+
+            // The format version pair is verified by the parser.
+            //
+            assert (n.empty () && v == "1");
+
+            body.push_back (move (nv)); // Save the format version pair.
+
+            auto bad_value = [&p, &nv] (const string& d) {
+              throw parsing (p.name (), nv.value_line, nv.value_column, d);};
+
+            // Get and verify the HTTP status.
+            //
+            nv = p.next ();
+            if (n != "status")
+              bad_value ("no status specified");
+
+            uint16_t c (status_code (v));
+            if (c == 0)
+              bad_value ("invalid HTTP status '" + v + '\'');
+
+            if (c != rs.code)
+              bad_value ("status " + v + " doesn't match HTTP response "
+                         "code " + to_string (rs.code));
+
+            // Get the message.
+            //
+            nv = p.next ();
+            if (n != "message" || v.empty ())
+              bad_value ("no message specified");
+
+            message = move (v);
+
+            // Try to get an optional reference.
+            //
+            nv = p.next ();
+
+            if (n == "reference")
+            {
+              if (v.empty ())
+                bad_value ("empty reference specified");
+
+              reference = move (v);
+
+              nv = p.next ();
+            }
+
+            // Save the remaining name/value pairs.
+            //
+            for (; !nv.empty (); nv = p.next ())
+              body.push_back (move (nv));
+
+            status = c;
+          }
+          else if (icasecmp ("text/plain", *ctype, 10) == 0)
+            getline (is, message); // Can result in the empty message.
+        }
+
+        is.close (); // Detect errors.
+
+        // The only meaningful result we expect is the manifest (status code
+        // is not necessarily 200). We unable to interpret any other cases and
+        // so report them as a bad response.
+        //
+        if (!status)
+        {
+          if (rs.code == 200)
+            bad_response ("manifest expected");
+
+          if (message.empty ())
+          {
+            message = "HTTP status code " + to_string (rs.code);
+
+            if (!rs.reason.empty ())
+              message += " (" + lcase (rs.reason) + ')';
+          }
+
+          if (location)
+            message += ", new location: " + *location;
+
+          bad_response ("bad server response");
+        }
+      }
+      catch (const io_error&)
+      {
+        // Presumably the child process failed and issued diagnostics so let
+        // finish() try to deal with that first.
+        //
+        (file_text != nullptr ? io_write : io_read) = true;
+      }
+      // Handle all parsing errors, including the manifest_parsing exception
+      // that inherits from the runtime_error exception.
+      //
+      // Note that the io_error class inherits from the runtime_error class,
+      // so this catch-clause must go last.
+      //
+      catch (const runtime_error& e)
+      {
+        finish (); // Sets the error variable on process failure.
+
+        if (!error)
+          error = e.what ();
+      }
+
+      if (!error)
+        finish (io_read, io_write);
+
+      assert (error || (status && !message.empty ()));
+
+      if (!error && *status != 200)
+        error = "status code " + to_string (*status);
+
+      return result {
+        move (error), move (message), move (reference), move (body)};
+    }
+  }
+}