aboutsummaryrefslogtreecommitdiff
path: root/bbot/agent/http-service.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'bbot/agent/http-service.cxx')
-rw-r--r--bbot/agent/http-service.cxx465
1 files changed, 465 insertions, 0 deletions
diff --git a/bbot/agent/http-service.cxx b/bbot/agent/http-service.cxx
new file mode 100644
index 0000000..28b4d94
--- /dev/null
+++ b/bbot/agent/http-service.cxx
@@ -0,0 +1,465 @@
+// file : bbot/agent/http-service.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <bbot/agent/http-service.hxx>
+
+#include <cstdlib> // strtoul()
+
+#include <bbot/diagnostics.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace bbot
+{
+ namespace http_service
+ {
+ result
+ post (const agent_options& o, const string& u, const parameters& params)
+ {
+ tracer trace ("http_service::post");
+
+ using parser = manifest_parser;
+ using parsing = manifest_parsing;
+ using name_value = manifest_name_value;
+
+ // The overall plan is to post the data using the curl program, read
+ // the HTTP response status and content type, read and parse the body
+ // according to the content type, and obtain the result message and
+ // optional reference in case of both the request success and failure.
+ //
+ // The successful request response (HTTP status code 200) is expected to
+ // contain the result manifest (text/manifest content type). The faulty
+ // response (HTTP status code other than 200) can either contain the
+ // result manifest or a plain text error description (text/plain content
+ // type) or some other content (for example text/html). We will return
+ // the manifest message value, if available or the first line of the
+ // plain text error description or, as a last resort, construct the
+ // message from the HTTP status code and reason phrase. We will also
+ // return the error description if anything goes wrong with the HTTP
+ // request or the response manifest status value is not 200.
+ //
+ string message;
+ optional<uint16_t> status; // Request result manifest status value.
+ optional<string> reference;
+ vector<name_value> body;
+ optional<string> error;
+
+ // None of the 3XX redirect code semantics assume automatic re-posting.
+ // We will treat all such codes as failures, adding the location header
+ // value to the message for troubleshooting.
+ //
+ optional<string> location;
+
+ // Convert the submit arguments to curl's --form* options and cache the
+ // pointer to the file_text parameter value, if present, for writing
+ // into curl's stdin.
+ //
+ strings fos;
+ const string* file_text (nullptr);
+
+ for (const parameter& p: params)
+ {
+ if (p.type == parameter::file_text)
+ {
+ assert (file_text == nullptr);
+ file_text = &p.value;
+ }
+
+ fos.emplace_back (p.type == parameter::file ||
+ p.type == parameter::file_text
+ ? "--form"
+ : "--form-string");
+
+ fos.emplace_back (
+ p.type == parameter::file ? p.name + "=@" + p.value :
+ p.type == parameter::file_text ? p.name + "=@-" :
+ p.name + '=' + p.value);
+ }
+
+ // Note that we prefer the low-level process API for running curl over
+ // using butl::curl because in this context it is restrictive and
+ // inconvenient.
+ //
+ // Start curl program.
+ //
+ // Text mode seems appropriate.
+ //
+ fdpipe in_pipe;
+ fdpipe out_pipe;
+ process pr;
+
+ try
+ {
+ in_pipe = fdopen_pipe ();
+
+ out_pipe = (file_text != nullptr
+ ? fdopen_pipe ()
+ : fdpipe {fdopen_null (), nullfd});
+
+ pr = process_start_callback (trace,
+ out_pipe.in.get () /* stdin */,
+ in_pipe /* stdout */,
+ 2 /* stderr */,
+ "curl",
+
+ // Include the response headers in the
+ // output so we can get the status
+ // code/reason, content type, and the
+ // redirect location.
+ //
+ "--include",
+
+ "--max-time", o.request_timeout (),
+ "--connect-timeout", o.connect_timeout (),
+ fos,
+ u);
+
+ // Shouldn't throw, unless something is severely damaged.
+ //
+ in_pipe.out.close ();
+ out_pipe.in.close ();
+ }
+ catch (const process_error& e)
+ {
+ fail << "unable to execute curl: " << e;
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to open pipe: " << e;
+ }
+
+ auto finish = [&pr, &error] (bool io_read = false, bool io_write = false)
+ {
+ if (!pr.wait ())
+ error = "curl " + to_string (*pr.exit);
+ else if (io_read)
+ error = "error reading curl output";
+ else if (io_write)
+ error = "error writing curl input";
+ };
+
+ bool io_write (false);
+ bool io_read (false);
+
+ try
+ {
+ // First we read the HTTP response status line and headers. At this
+ // stage we will read until the empty line (containing just CRLF). Not
+ // being able to reach such a line is an error, which is the reason
+ // for the exception mask choice.
+ //
+ ifdstream is (
+ move (in_pipe.in),
+ fdstream_mode::skip,
+ ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit);
+
+ if (file_text != nullptr)
+ {
+ ofdstream os (move (out_pipe.out));
+ os << *file_text;
+ os.close ();
+
+ // Indicate to the potential IO error handling that we are done with
+ // writing.
+ //
+ file_text = nullptr;
+ }
+
+ // Parse and return the HTTP status code. Return 0 if the argument is
+ // invalid.
+ //
+ auto status_code = [] (const string& s)
+ {
+ char* e (nullptr);
+ unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw.
+ assert (e != nullptr);
+
+ return *e == '\0' && c >= 100 && c < 600
+ ? static_cast<uint16_t> (c)
+ : 0;
+ };
+
+ // Read the CRLF-terminated line from the stream stripping the
+ // trailing CRLF.
+ //
+ auto read_line = [&is] ()
+ {
+ string l;
+ getline (is, l); // Strips the trailing LF (0xA).
+
+ // Note that on POSIX CRLF is not automatically translated into
+ // LF, so we need to strip CR (0xD) manually.
+ //
+ if (!l.empty () && l.back () == '\r')
+ l.pop_back ();
+
+ return l;
+ };
+
+ auto bad_response = [] (const string& d) {throw runtime_error (d);};
+
+ // Read and parse the HTTP response status line, return the status
+ // code and the reason phrase.
+ //
+ struct http_status
+ {
+ uint16_t code;
+ string reason;
+ };
+
+ auto read_status = [&read_line, &status_code, &bad_response] ()
+ {
+ string l (read_line ());
+
+ for (;;) // Breakout loop.
+ {
+ if (l.compare (0, 5, "HTTP/") != 0)
+ break;
+
+ size_t p (l.find (' ', 5)); // The protocol end.
+ if (p == string::npos)
+ break;
+
+ p = l.find_first_not_of (' ', p + 1); // The code start.
+ if (p == string::npos)
+ break;
+
+ size_t e (l.find (' ', p + 1)); // The code end.
+ if (e == string::npos)
+ break;
+
+ uint16_t c (status_code (string (l, p, e - p)));
+ if (c == 0)
+ break;
+
+ string r;
+ p = l.find_first_not_of (' ', e + 1); // The reason start.
+ if (p != string::npos)
+ {
+ e = l.find_last_not_of (' '); // The reason end.
+ assert (e != string::npos && e >= p);
+
+ r = string (l, p, e - p + 1);
+ }
+
+ return http_status {c, move (r)};
+ }
+
+ bad_response ("invalid HTTP response status line '" + l + '\'');
+
+ assert (false); // Can't be here.
+ return http_status {};
+ };
+
+ // The curl output for a successfull request looks like this:
+ //
+ // HTTP/1.1 100 Continue
+ //
+ // HTTP/1.1 200 OK
+ // Content-Length: 83
+ // Content-Type: text/manifest;charset=utf-8
+ //
+ // : 1
+ // status: 200
+ // message: submission is queued
+ // reference: 256910ca46d5
+ //
+ // curl normally sends the 'Expect: 100-continue' header for uploads,
+ // so we need to handle the interim HTTP server response with the
+ // continue (100) status code.
+ //
+ // Interestingly, Apache can respond with the continue (100) code and
+ // with the not found (404) code afterwords. Can it be configured to
+ // just respond with 404?
+ //
+ http_status rs (read_status ());
+
+ if (rs.code == 100)
+ {
+ while (!read_line ().empty ()) ; // Skips the interim response.
+ rs = read_status (); // Reads the final status code.
+ }
+
+ // Read through the response headers until the empty line is
+ // encountered and obtain the content type and/or the redirect
+ // location, if present.
+ //
+ optional<string> ctype;
+
+ // Check if the line contains the specified header and return its
+ // value if that's the case. Return nullopt otherwise.
+ //
+ // Note that we don't expect the header values that we are interested
+ // in to span over multiple lines.
+ //
+ string l;
+ auto header = [&l] (const char* name) -> optional<string>
+ {
+ size_t n (string::traits_type::length (name));
+ if (!(icasecmp (name, l, n) == 0 && l[n] == ':'))
+ return nullopt;
+
+ string r;
+ size_t p (l.find_first_not_of (' ', n + 1)); // The value begin.
+ if (p != string::npos)
+ {
+ size_t e (l.find_last_not_of (' ')); // The value end.
+ assert (e != string::npos && e >= p);
+
+ r = string (l, p, e - p + 1);
+ }
+
+ return optional<string> (move (r));
+ };
+
+ while (!(l = read_line ()).empty ())
+ {
+ if (optional<string> v = header ("Content-Type"))
+ ctype = move (v);
+ else if (optional<string> v = header ("Location"))
+ {
+ if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307)
+ location = move (v);
+ }
+ }
+
+ assert (!eof (is)); // Would have already failed otherwise.
+
+ // Now parse the response payload if the content type is specified and
+ // is recognized (text/manifest or text/plain), skip it (with the
+ // ifdstream's close() function) otherwise.
+ //
+ // Note that eof and getline() fail conditions are not errors anymore,
+ // so we adjust the exception mask accordingly.
+ //
+ is.exceptions (ifdstream::badbit);
+
+ if (ctype)
+ {
+ if (icasecmp ("text/manifest", *ctype, 13) == 0)
+ {
+ parser p (is, "manifest");
+ name_value nv (p.next ());
+
+ if (nv.empty ())
+ bad_response ("empty manifest");
+
+ const string& n (nv.name);
+ string& v (nv.value);
+
+ // The format version pair is verified by the parser.
+ //
+ assert (n.empty () && v == "1");
+
+ body.push_back (move (nv)); // Save the format version pair.
+
+ auto bad_value = [&p, &nv] (const string& d) {
+ throw parsing (p.name (), nv.value_line, nv.value_column, d);};
+
+ // Get and verify the HTTP status.
+ //
+ nv = p.next ();
+ if (n != "status")
+ bad_value ("no status specified");
+
+ uint16_t c (status_code (v));
+ if (c == 0)
+ bad_value ("invalid HTTP status '" + v + '\'');
+
+ if (c != rs.code)
+ bad_value ("status " + v + " doesn't match HTTP response "
+ "code " + to_string (rs.code));
+
+ // Get the message.
+ //
+ nv = p.next ();
+ if (n != "message" || v.empty ())
+ bad_value ("no message specified");
+
+ message = move (v);
+
+ // Try to get an optional reference.
+ //
+ nv = p.next ();
+
+ if (n == "reference")
+ {
+ if (v.empty ())
+ bad_value ("empty reference specified");
+
+ reference = move (v);
+
+ nv = p.next ();
+ }
+
+ // Save the remaining name/value pairs.
+ //
+ for (; !nv.empty (); nv = p.next ())
+ body.push_back (move (nv));
+
+ status = c;
+ }
+ else if (icasecmp ("text/plain", *ctype, 10) == 0)
+ getline (is, message); // Can result in the empty message.
+ }
+
+ is.close (); // Detect errors.
+
+ // The only meaningful result we expect is the manifest (status code
+ // is not necessarily 200). We unable to interpret any other cases and
+ // so report them as a bad response.
+ //
+ if (!status)
+ {
+ if (rs.code == 200)
+ bad_response ("manifest expected");
+
+ if (message.empty ())
+ {
+ message = "HTTP status code " + to_string (rs.code);
+
+ if (!rs.reason.empty ())
+ message += " (" + lcase (rs.reason) + ')';
+ }
+
+ if (location)
+ message += ", new location: " + *location;
+
+ bad_response ("bad server response");
+ }
+ }
+ catch (const io_error&)
+ {
+ // Presumably the child process failed and issued diagnostics so let
+ // finish() try to deal with that first.
+ //
+ (file_text != nullptr ? io_write : io_read) = true;
+ }
+ // Handle all parsing errors, including the manifest_parsing exception
+ // that inherits from the runtime_error exception.
+ //
+ // Note that the io_error class inherits from the runtime_error class,
+ // so this catch-clause must go last.
+ //
+ catch (const runtime_error& e)
+ {
+ finish (); // Sets the error variable on process failure.
+
+ if (!error)
+ error = e.what ();
+ }
+
+ if (!error)
+ finish (io_read, io_write);
+
+ assert (error || (status && !message.empty ()));
+
+ if (!error && *status != 200)
+ error = "status code " + to_string (*status);
+
+ return result {
+ move (error), move (message), move (reference), move (body)};
+ }
+ }
+}