From 2fca6d23f87304ceed78e93d2a52d137c5ffd0c7 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 26 Apr 2023 21:34:12 +0300 Subject: Add support for build artifacts upload in agent --- bbot/agent/http-service.cxx | 465 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) create mode 100644 bbot/agent/http-service.cxx (limited to 'bbot/agent/http-service.cxx') diff --git a/bbot/agent/http-service.cxx b/bbot/agent/http-service.cxx new file mode 100644 index 0000000..28b4d94 --- /dev/null +++ b/bbot/agent/http-service.cxx @@ -0,0 +1,465 @@ +// file : bbot/agent/http-service.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include // strtoul() + +#include + +using namespace std; +using namespace butl; + +namespace bbot +{ + namespace http_service + { + result + post (const agent_options& o, const string& u, const parameters& params) + { + tracer trace ("http_service::post"); + + using parser = manifest_parser; + using parsing = manifest_parsing; + using name_value = manifest_name_value; + + // The overall plan is to post the data using the curl program, read + // the HTTP response status and content type, read and parse the body + // according to the content type, and obtain the result message and + // optional reference in case of both the request success and failure. + // + // The successful request response (HTTP status code 200) is expected to + // contain the result manifest (text/manifest content type). The faulty + // response (HTTP status code other than 200) can either contain the + // result manifest or a plain text error description (text/plain content + // type) or some other content (for example text/html). We will return + // the manifest message value, if available or the first line of the + // plain text error description or, as a last resort, construct the + // message from the HTTP status code and reason phrase. We will also + // return the error description if anything goes wrong with the HTTP + // request or the response manifest status value is not 200. + // + string message; + optional status; // Request result manifest status value. + optional reference; + vector body; + optional error; + + // None of the 3XX redirect code semantics assume automatic re-posting. + // We will treat all such codes as failures, adding the location header + // value to the message for troubleshooting. + // + optional location; + + // Convert the submit arguments to curl's --form* options and cache the + // pointer to the file_text parameter value, if present, for writing + // into curl's stdin. + // + strings fos; + const string* file_text (nullptr); + + for (const parameter& p: params) + { + if (p.type == parameter::file_text) + { + assert (file_text == nullptr); + file_text = &p.value; + } + + fos.emplace_back (p.type == parameter::file || + p.type == parameter::file_text + ? "--form" + : "--form-string"); + + fos.emplace_back ( + p.type == parameter::file ? p.name + "=@" + p.value : + p.type == parameter::file_text ? p.name + "=@-" : + p.name + '=' + p.value); + } + + // Note that we prefer the low-level process API for running curl over + // using butl::curl because in this context it is restrictive and + // inconvenient. + // + // Start curl program. + // + // Text mode seems appropriate. + // + fdpipe in_pipe; + fdpipe out_pipe; + process pr; + + try + { + in_pipe = fdopen_pipe (); + + out_pipe = (file_text != nullptr + ? fdopen_pipe () + : fdpipe {fdopen_null (), nullfd}); + + pr = process_start_callback (trace, + out_pipe.in.get () /* stdin */, + in_pipe /* stdout */, + 2 /* stderr */, + "curl", + + // Include the response headers in the + // output so we can get the status + // code/reason, content type, and the + // redirect location. + // + "--include", + + "--max-time", o.request_timeout (), + "--connect-timeout", o.connect_timeout (), + fos, + u); + + // Shouldn't throw, unless something is severely damaged. + // + in_pipe.out.close (); + out_pipe.in.close (); + } + catch (const process_error& e) + { + fail << "unable to execute curl: " << e; + } + catch (const io_error& e) + { + fail << "unable to open pipe: " << e; + } + + auto finish = [&pr, &error] (bool io_read = false, bool io_write = false) + { + if (!pr.wait ()) + error = "curl " + to_string (*pr.exit); + else if (io_read) + error = "error reading curl output"; + else if (io_write) + error = "error writing curl input"; + }; + + bool io_write (false); + bool io_read (false); + + try + { + // First we read the HTTP response status line and headers. At this + // stage we will read until the empty line (containing just CRLF). Not + // being able to reach such a line is an error, which is the reason + // for the exception mask choice. + // + ifdstream is ( + move (in_pipe.in), + fdstream_mode::skip, + ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + if (file_text != nullptr) + { + ofdstream os (move (out_pipe.out)); + os << *file_text; + os.close (); + + // Indicate to the potential IO error handling that we are done with + // writing. + // + file_text = nullptr; + } + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + auto status_code = [] (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast (c) + : 0; + }; + + // Read the CRLF-terminated line from the stream stripping the + // trailing CRLF. + // + auto read_line = [&is] () + { + string l; + getline (is, l); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into + // LF, so we need to strip CR (0xD) manually. + // + if (!l.empty () && l.back () == '\r') + l.pop_back (); + + return l; + }; + + auto bad_response = [] (const string& d) {throw runtime_error (d);}; + + // Read and parse the HTTP response status line, return the status + // code and the reason phrase. + // + struct http_status + { + uint16_t code; + string reason; + }; + + auto read_status = [&read_line, &status_code, &bad_response] () + { + string l (read_line ()); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (status_code (string (l, p, e - p))); + if (c == 0) + break; + + string r; + p = l.find_first_not_of (' ', e + 1); // The reason start. + if (p != string::npos) + { + e = l.find_last_not_of (' '); // The reason end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return http_status {c, move (r)}; + } + + bad_response ("invalid HTTP response status line '" + l + '\''); + + assert (false); // Can't be here. + return http_status {}; + }; + + // The curl output for a successfull request looks like this: + // + // HTTP/1.1 100 Continue + // + // HTTP/1.1 200 OK + // Content-Length: 83 + // Content-Type: text/manifest;charset=utf-8 + // + // : 1 + // status: 200 + // message: submission is queued + // reference: 256910ca46d5 + // + // curl normally sends the 'Expect: 100-continue' header for uploads, + // so we need to handle the interim HTTP server response with the + // continue (100) status code. + // + // Interestingly, Apache can respond with the continue (100) code and + // with the not found (404) code afterwords. Can it be configured to + // just respond with 404? + // + http_status rs (read_status ()); + + if (rs.code == 100) + { + while (!read_line ().empty ()) ; // Skips the interim response. + rs = read_status (); // Reads the final status code. + } + + // Read through the response headers until the empty line is + // encountered and obtain the content type and/or the redirect + // location, if present. + // + optional ctype; + + // Check if the line contains the specified header and return its + // value if that's the case. Return nullopt otherwise. + // + // Note that we don't expect the header values that we are interested + // in to span over multiple lines. + // + string l; + auto header = [&l] (const char* name) -> optional + { + size_t n (string::traits_type::length (name)); + if (!(icasecmp (name, l, n) == 0 && l[n] == ':')) + return nullopt; + + string r; + size_t p (l.find_first_not_of (' ', n + 1)); // The value begin. + if (p != string::npos) + { + size_t e (l.find_last_not_of (' ')); // The value end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return optional (move (r)); + }; + + while (!(l = read_line ()).empty ()) + { + if (optional v = header ("Content-Type")) + ctype = move (v); + else if (optional v = header ("Location")) + { + if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307) + location = move (v); + } + } + + assert (!eof (is)); // Would have already failed otherwise. + + // Now parse the response payload if the content type is specified and + // is recognized (text/manifest or text/plain), skip it (with the + // ifdstream's close() function) otherwise. + // + // Note that eof and getline() fail conditions are not errors anymore, + // so we adjust the exception mask accordingly. + // + is.exceptions (ifdstream::badbit); + + if (ctype) + { + if (icasecmp ("text/manifest", *ctype, 13) == 0) + { + parser p (is, "manifest"); + name_value nv (p.next ()); + + if (nv.empty ()) + bad_response ("empty manifest"); + + const string& n (nv.name); + string& v (nv.value); + + // The format version pair is verified by the parser. + // + assert (n.empty () && v == "1"); + + body.push_back (move (nv)); // Save the format version pair. + + auto bad_value = [&p, &nv] (const string& d) { + throw parsing (p.name (), nv.value_line, nv.value_column, d);}; + + // Get and verify the HTTP status. + // + nv = p.next (); + if (n != "status") + bad_value ("no status specified"); + + uint16_t c (status_code (v)); + if (c == 0) + bad_value ("invalid HTTP status '" + v + '\''); + + if (c != rs.code) + bad_value ("status " + v + " doesn't match HTTP response " + "code " + to_string (rs.code)); + + // Get the message. + // + nv = p.next (); + if (n != "message" || v.empty ()) + bad_value ("no message specified"); + + message = move (v); + + // Try to get an optional reference. + // + nv = p.next (); + + if (n == "reference") + { + if (v.empty ()) + bad_value ("empty reference specified"); + + reference = move (v); + + nv = p.next (); + } + + // Save the remaining name/value pairs. + // + for (; !nv.empty (); nv = p.next ()) + body.push_back (move (nv)); + + status = c; + } + else if (icasecmp ("text/plain", *ctype, 10) == 0) + getline (is, message); // Can result in the empty message. + } + + is.close (); // Detect errors. + + // The only meaningful result we expect is the manifest (status code + // is not necessarily 200). We unable to interpret any other cases and + // so report them as a bad response. + // + if (!status) + { + if (rs.code == 200) + bad_response ("manifest expected"); + + if (message.empty ()) + { + message = "HTTP status code " + to_string (rs.code); + + if (!rs.reason.empty ()) + message += " (" + lcase (rs.reason) + ')'; + } + + if (location) + message += ", new location: " + *location; + + bad_response ("bad server response"); + } + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // finish() try to deal with that first. + // + (file_text != nullptr ? io_write : io_read) = true; + } + // Handle all parsing errors, including the manifest_parsing exception + // that inherits from the runtime_error exception. + // + // Note that the io_error class inherits from the runtime_error class, + // so this catch-clause must go last. + // + catch (const runtime_error& e) + { + finish (); // Sets the error variable on process failure. + + if (!error) + error = e.what (); + } + + if (!error) + finish (io_read, io_write); + + assert (error || (status && !message.empty ())); + + if (!error && *status != 200) + error = "status code " + to_string (*status); + + return result { + move (error), move (message), move (reference), move (body)}; + } + } +} -- cgit v1.1