From 2abd895682ec8707f30fc6babbf3787e00a8c280 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sat, 25 Aug 2018 20:42:44 +0300 Subject: Implement CI request submission --- bdep/http-service.cxx | 466 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 466 insertions(+) create mode 100644 bdep/http-service.cxx (limited to 'bdep/http-service.cxx') diff --git a/bdep/http-service.cxx b/bdep/http-service.cxx new file mode 100644 index 0000000..faec3a7 --- /dev/null +++ b/bdep/http-service.cxx @@ -0,0 +1,466 @@ +// file : bdep/submit.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#include // strtoul() + +#include // fdterm() + +#include + +using namespace std; +using namespace butl; + +namespace bdep +{ + namespace http_service + { + result + post (const common_options& o, const url& u, const parameters& params) + { + using parser = manifest_parser; + using parsing = manifest_parsing; + using name_value = manifest_name_value; + + // The overall plan is to post the data using the curl program, read + // the HTTP response status and content type, read and parse the body + // according to the content type, and obtain the result message and + // optional reference in case of both the request success and failure. + // + // The successful request response (HTTP status code 200) is expected to + // contain the result manifest (text/manifest content type). The faulty + // response (HTTP status code other than 200) can either contain the + // result manifest or a plain text error description (text/plain content + // type) or some other content (for example text/html). We will print + // the manifest message value, if available or the first line of the + // plain text error description or, as a last resort, construct the + // message from the HTTP status code and reason phrase. + // + string message; + optional status; // Request result manifest status value. + optional reference; + vector body; + + // None of the 3XX redirect code semantics assume automatic re-posting. + // We will treat all such codes as failures, additionally printing the + // location header value to advise the user to try the other URL for the + // request. + // + // Note that services that move to a new URL may well be responding with + // the 301 (moved permanently) code. + // + optional location; + + // Note that it's a bad idea to issue the diagnostics while curl is + // running, as it will be messed up with the progress output. Thus, we + // throw the runtime_error exception on the HTTP response parsing error + // (rather than use our fail stream) and issue the diagnostics after + // curl finishes. + // + // Also note that we prefer the start/finish process facility for + // running curl over using butl::curl because in this context it is + // restrictive and inconvenient. + // + process pr; + bool io (false); + try + { + // Map the verbosity level. + // + cstrings v; + if (verb < 1) + { + v.push_back ("-s"); + v.push_back ("-S"); // But show errors. + } + else if (verb == 1 && fdterm (2)) + v.push_back ("--progress-bar"); + else if (verb > 3) + v.push_back ("-v"); + + // Convert the submit arguments to curl's --form* options. + // + strings fos; + for (const parameter& p: params) + { + fos.emplace_back (p.type == parameter::file + ? "--form" + : "--form-string"); + + fos.emplace_back (p.type == parameter::file + ? p.name + "=@" + p.value + : p.name + "=" + p.value); + } + + // Start curl program. + // + fdpipe pipe (fdopen_pipe ()); // Text mode seems appropriate. + + // Note that we don't specify any default timeouts, assuming that bdep + // is an interactive program and the user can always interrupt the + // command (or pass the timeout with --curl-option). + // + pr = start (0 /* stdin */, + pipe /* stdout */, + 2 /* stderr */, + o.curl (), + v, + "-A", (BDEP_USER_AGENT " curl"), + + o.curl_option (), + + // Include the response headers in the output so we can + // get the status code/reason, content type, and the + // redirect location. + // + "--include", + + fos, + u.string ()); + + pipe.out.close (); + + // First we read the HTTP response status line and headers. At this + // stage we will read until the empty line (containing just CRLF). Not + // being able to reach such a line is an error, which is the reason + // for the exception mask choice. + // + ifdstream is ( + move (pipe.in), + fdstream_mode::skip, + ifdstream::badbit | ifdstream::failbit | ifdstream::eofbit); + + // Parse and return the HTTP status code. Return 0 if the argument is + // invalid. + // + auto status_code = [] (const string& s) + { + char* e (nullptr); + unsigned long c (strtoul (s.c_str (), &e, 10)); // Can't throw. + assert (e != nullptr); + + return *e == '\0' && c >= 100 && c < 600 + ? static_cast (c) + : 0; + }; + + // Read the CRLF-terminated line from the stream stripping the + // trailing CRLF. + // + auto read_line = [&is] () + { + string l; + getline (is, l); // Strips the trailing LF (0xA). + + // Note that on POSIX CRLF is not automatically translated into + // LF, so we need to strip CR (0xD) manually. + // + if (!l.empty () && l.back () == '\r') + l.pop_back (); + + return l; + }; + + auto bad_response = [] (const string& d) {throw runtime_error (d);}; + + // Read and parse the HTTP response status line, return the status + // code and the reason phrase. + // + struct http_status + { + uint16_t code; + string reason; + }; + + auto read_status = [&read_line, &status_code, &bad_response] () + { + string l (read_line ()); + + for (;;) // Breakout loop. + { + if (l.compare (0, 5, "HTTP/") != 0) + break; + + size_t p (l.find (' ', 5)); // The protocol end. + if (p == string::npos) + break; + + p = l.find_first_not_of (' ', p + 1); // The code start. + if (p == string::npos) + break; + + size_t e (l.find (' ', p + 1)); // The code end. + if (e == string::npos) + break; + + uint16_t c (status_code (string (l, p, e - p))); + if (c == 0) + break; + + string r; + p = l.find_first_not_of (' ', e + 1); // The reason start. + if (p != string::npos) + { + e = l.find_last_not_of (' '); // The reason end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return http_status {c, move (r)}; + } + + bad_response ("invalid HTTP response status line '" + l + "'"); + + assert (false); // Can't be here. + return http_status {}; + }; + + // The curl output for a successfull request looks like this: + // + // HTTP/1.1 100 Continue + // + // HTTP/1.1 200 OK + // Content-Length: 83 + // Content-Type: text/manifest;charset=utf-8 + // + // : 1 + // status: 200 + // message: submission is queued + // reference: 256910ca46d5 + // + // curl normally sends the 'Expect: 100-continue' header for uploads, + // so we need to handle the interim HTTP server response with the + // continue (100) status code. + // + // Interestingly, Apache can respond with the continue (100) code and + // with the not found (404) code afterwords. Can it be configured to + // just respond with 404? + // + http_status rs (read_status ()); + + if (rs.code == 100) + { + while (!read_line ().empty ()) ; // Skips the interim response. + rs = read_status (); // Reads the final status code. + } + + // Read through the response headers until the empty line is + // encountered and obtain the content type and/or the redirect + // location, if present. + // + optional ctype; + + // Check if the line contains the specified header and return its + // value if that's the case. Return nullopt otherwise. + // + // Note that we don't expect the header values that we are interested + // in to span over multiple lines. + // + string l; + auto header = [&l] (const char* name) -> optional + { + size_t n (string::traits_type::length (name)); + if (!(casecmp (name, l, n) == 0 && l[n] == ':')) + return nullopt; + + string r; + size_t p (l.find_first_not_of (' ', n + 1)); // The value begin. + if (p != string::npos) + { + size_t e (l.find_last_not_of (' ')); // The value end. + assert (e != string::npos && e >= p); + + r = string (l, p, e - p + 1); + } + + return optional (move (r)); + }; + + while (!(l = read_line ()).empty ()) + { + if (optional v = header ("Content-Type")) + ctype = move (v); + else if (optional v = header ("Location")) + { + if ((rs.code >= 301 && rs.code <= 303) || rs.code == 307) + try + { + location = url (*v); + location->query = nullopt; // Can possibly contain '?submit'. + } + catch (const invalid_argument&) + { + // Let's just ignore invalid locations. + // + } + } + } + + assert (!eof (is)); // Would have already failed otherwise. + + // Now parse the response payload if the content type is specified and + // is recognized (text/manifest or text/plain), skip it (with the + // ifdstream's close() function) otherwise. + // + // Note that eof and getline() fail conditions are not errors anymore, + // so we adjust the exception mask accordingly. + // + is.exceptions (ifdstream::badbit); + + if (ctype) + { + if (casecmp ("text/manifest", *ctype, 13) == 0) + { + parser p (is, "manifest"); + name_value nv (p.next ()); + + if (nv.empty ()) + bad_response ("empty manifest"); + + const string& n (nv.name); + string& v (nv.value); + + // The format version pair is verified by the parser. + // + assert (n.empty () && v == "1"); + + body.push_back (move (nv)); // Save the format version pair. + + auto bad_value = [&p, &nv] (const string& d) { + throw parsing (p.name (), nv.value_line, nv.value_column, d);}; + + // Get and verify the HTTP status. + // + nv = p.next (); + if (n != "status") + bad_value ("no status specified"); + + uint16_t c (status_code (v)); + if (c == 0) + bad_value ("invalid HTTP status '" + v + "'"); + + if (c != rs.code) + bad_value ("status " + v + " doesn't match HTTP response " + "code " + to_string (rs.code)); + + // Get the message. + // + nv = p.next (); + if (n != "message" || v.empty ()) + bad_value ("no message specified"); + + message = move (v); + + // Try to get an optional reference. + // + nv = p.next (); + + if (n == "reference") + { + if (v.empty ()) + bad_value ("empty reference specified"); + + reference = move (v); + + nv = p.next (); + } + + // Save the remaining name/value pairs. + // + for (; !nv.empty (); nv = p.next ()) + body.push_back (move (nv)); + + status = c; + } + else if (casecmp ("text/plain", *ctype, 10) == 0) + getline (is, message); // Can result in the empty message. + } + + is.close (); // Detect errors. + + // The meaningful result we expect is either manifest (status code is + // not necessarily 200) or HTTP redirect (location is present). We + // unable to interpret any other cases and so report them as a bad + // response. + // + if (!status) + { + if (rs.code == 200) + bad_response ("manifest expected"); + + if (message.empty ()) + { + message = "HTTP status code " + to_string (rs.code); + + if (!rs.reason.empty ()) + message += " (" + lcase (rs.reason) + ")"; + } + + if (!location) + bad_response (message); + } + } + catch (const io_error&) + { + // Presumably the child process failed and issued diagnostics so let + // finish() try to deal with that first. + // + io = true; + } + // Handle all parsing errors, including the manifest_parsing exception + // that inherits from the runtime_error exception. + // + // Note that the io_error class inherits from the runtime_error class, + // so this catch-clause must go last. + // + catch (const runtime_error& e) + { + finish (o.curl (), pr); // Throws on process failure. + + // Finally we can safely issue the diagnostics (see above for + // details). + // + diag_record dr (fail); + + url du (u); + du.query = nullopt; // Strip URL parameters from the diagnostics. + + dr << e << + info << "consider reporting this to " << du << " maintainers"; + + if (reference) + dr << info << "reference: " << *reference; + } + + finish (o.curl (), pr, io); + + assert (!message.empty ()); + + // Print the request failure reason and fail. + // + if (!status || *status != 200) + { + diag_record dr (fail); + dr << message; + + if (reference) + dr << info << "reference: " << *reference; + + if (location) + dr << info << "new location: " << *location; + + // In case of a server error advise the user to re-try later, assuming + // that the issue is temporary (service overload, network connectivity + // loss, etc.). + // + if (status && *status >= 500 && *status < 600) + dr << info << "try again later"; + } + + return result {move (message), move (reference), move (body)}; + } + } +} -- cgit v1.1