diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2018-08-23 22:29:35 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2018-08-28 21:46:41 +0300 |
commit | 8a094bb0481a9c53646cc15db2e8acecafc3d10c (patch) | |
tree | 4fd7012b6a26eb852d42fba8b52bfcf8f1cf2fdd /mod | |
parent | 7e0e141273032c7afc1a9129512aa42c672fcf5d (diff) |
Add basic support for CI request handling
Diffstat (limited to 'mod')
-rw-r--r-- | mod/external-handler.cxx | 346 | ||||
-rw-r--r-- | mod/external-handler.hxx | 52 | ||||
-rw-r--r-- | mod/mod-ci.cxx | 628 | ||||
-rw-r--r-- | mod/mod-ci.hxx | 45 | ||||
-rw-r--r-- | mod/mod-repository-root.cxx | 18 | ||||
-rw-r--r-- | mod/mod-repository-root.hxx | 3 | ||||
-rw-r--r-- | mod/mod-submit.cxx | 329 | ||||
-rw-r--r-- | mod/options.cli | 5 |
8 files changed, 1116 insertions, 310 deletions
diff --git a/mod/external-handler.cxx b/mod/external-handler.cxx new file mode 100644 index 0000000..d3ea6e3 --- /dev/null +++ b/mod/external-handler.cxx @@ -0,0 +1,346 @@ +// file : mod/external-handler.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <mod/external-handler.hxx> + +#include <sys/time.h> // timeval +#include <sys/select.h> + +#include <ratio> // ratio_greater_equal +#include <chrono> +#include <sstream> +#include <cstdlib> // strtoul() +#include <type_traits> // static_assert +#include <system_error> // error_code, generic_category() + +#include <libbutl/process.mxx> +#include <libbutl/fdstream.mxx> +#include <libbutl/process-io.mxx> // operator<<(ostream, process_args) + +using namespace std; +using namespace butl; + +namespace brep +{ + namespace external_handler + { + optional<result_manifest> + run (const path& handler, + const strings& args, + const dir_path& data_dir, + size_t tm, + const basic_mark& error, + const basic_mark& warn, + const basic_mark* trace) + { + using parser = manifest_parser; + using parsing = manifest_parsing; + + using namespace chrono; + + using time_point = system_clock::time_point; + using duration = system_clock::duration; + + // Make sure that the system clock has at least milliseconds resolution. + // + static_assert( + ratio_greater_equal<milliseconds::period, duration::period>::value, + "The system clock resolution is too low"); + + // For the sake of the documentation we will call the handler's normal + // exit with 0 code "successful termination". + // + // To make sure the handler process execution doesn't exceed the + // specified timeout we set the non-blocking mode for the process + // stdout-reading stream, try to read from it with the 10 milliseconds + // timeout and check the process execution time between the reads. We + // then kill the process if the execution time is exceeded. + // + optional<milliseconds> timeout; + + if (tm != 0) + timeout = milliseconds (tm * 1000); + + // Note that due to the non-blocking mode we cannot just pass the stream + // to the manifest parser constructor. So we buffer the data in the + // string stream and then parse that. + // + stringstream ss; + + assert (!data_dir.empty ()); + + // Normally the data directory leaf component identifies the entity + // being handled. We will use it as a reference for logging. + // + string ref (data_dir.leaf ().string ()); + + for (;;) // Breakout loop. + try + { + fdpipe pipe (fdopen_pipe ()); // Can throw io_error. + + // Redirect the diagnostics to the web server error log. + // + process pr ( + process_start_callback ([&trace] (const char* args[], size_t n) + { + if (trace != nullptr) + *trace << process_args {args, n}; + }, + 0 /* stdin */, + pipe /* stdout */, + 2 /* stderr */, + handler, + args, + data_dir)); + pipe.out.close (); + + auto kill = [&pr, &warn, &handler, &ref] () + { + // We may still end up well (see below), thus this is a warning. + // + warn << "ref " << ref << ": process " << handler + << " execution timeout expired"; + + pr.kill (); + }; + + try + { + ifdstream is (move (pipe.in), fdstream_mode::non_blocking); + + const size_t nbuf (8192); + char buf[nbuf]; + + while (is.is_open ()) + { + time_point start; + milliseconds wd (10); // Max time to wait for the data portion. + + if (timeout) + { + start = system_clock::now (); + + if (*timeout < wd) + wd = *timeout; + } + + timeval tm {wd.count () / 1000 /* seconds */, + wd.count () % 1000 * 1000 /* microseconds */}; + + fd_set rd; + FD_ZERO (&rd); + FD_SET (is.fd (), &rd); + + int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm)); + + if (r == -1) + { + // Don't fail if the select() call was interrupted by the + // signal. + // + if (errno != EINTR) + throw_system_ios_failure (errno, "select failed"); + } + else if (r != 0) // Is data available? + { + assert (FD_ISSET (is.fd (), &rd)); + + // The only leagal way to read from non-blocking ifdstream. + // + streamsize n (is.readsome (buf, nbuf)); + + // Close the stream (and bail out) if the end of the data is + // reached. Otherwise cache the read data. + // + if (is.eof ()) + is.close (); + else + { + // The data must be available. + // + // Note that we could keep reading until the readsome() call + // returns 0. However, this way we could potentially exceed + // the timeout significantly for some broken handler that + // floods us with data. So instead, we will be checking the + // process execution time after every data chunk read. + // + assert (n != 0); + + ss.write (buf, n); + } + } + else // Timeout occured. + { + // Normally, we don't expect timeout to occur on the pipe read + // operation if the process has terminated successfully, as + // all its output must already be buffered (including eof). + // However, there can be some still running handler's child + // that has inherited the parent's stdout. In this case we + // assume that we have read all the handler's output, close + // the stream, log the warning and bail out. + // + if (pr.exit) + { + // We keep reading only upon successful handler termination. + // + assert (*pr.exit); + + is.close (); + + warn << "ref " << ref << ": process " << handler + << " stdout is not closed after termination (possibly " + << "handler's child still running)"; + } + } + + if (timeout) + { + time_point now (system_clock::now ()); + + // Assume we have waited the full amount if the time + // adjustment is detected. + // + duration d (now > start ? now - start : wd); + + // If the timeout is not fully exhausted, then decrement it and + // try to read some more data from the handler' stdout. + // Otherwise, kill the process, if not done yet. + // + // Note that it may happen that we are killing an already + // terminated process, in which case kill() just sets the + // process exit information. On the other hand it's guaranteed + // that the process is terminated after the kill() call, and + // so the pipe is presumably closed on the write end (see + // above for details). Thus, if the process terminated + // successfully, we will continue reading until eof is + // reached or read timeout occurred. Yes, it may happen that + // we will succeed even with the kill. + // + if (*timeout > d) + *timeout -= duration_cast<milliseconds> (d); + else if (!pr.exit) + { + kill (); + + assert (pr.exit); + + // Close the stream (and bail out) if the process hasn't + // terminate successfully. + // + if (!*pr.exit) + is.close (); + + *timeout = milliseconds::zero (); + } + } + } + + assert (!is.is_open ()); + + if (!timeout) + pr.wait (); + + // If the process is not terminated yet, then wait for its + // termination for the remaining time. Kill it if the timeout has + // been exceeded and the process still hasn't terminate. + // + else if (!pr.exit && !pr.timed_wait (*timeout)) + kill (); + + assert (pr.exit); // The process must finally be terminated. + + if (*pr.exit) + break; // Get out of the breakout loop. + + error << "ref " << ref << ": process " << handler << " " + << *pr.exit; + + // Fall through. + } + catch (const io_error& e) + { + if (pr.wait ()) + error << "ref " << ref << ": unable to read handler's output: " + << e; + + // Fall through. + } + + return nullopt; + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "ref " << ref << ": unable to execute '" << handler + << "': " << e; + + return nullopt; + } + + result_manifest r; + + // Parse and verify the manifest. + // + try + { + parser p (ss, handler.leaf ().string ()); + manifest_name_value nv (p.next ()); + + auto bad_value ([&p, &nv] (const string& d) { + throw parsing (p.name (), nv.value_line, nv.value_column, d);}); + + if (nv.empty ()) + bad_value ("empty manifest"); + + const string& n (nv.name); + const string& v (nv.value); + + // The format version pair is verified by the parser. + // + assert (n.empty () && v == "1"); + + // Save the format version pair. + // + r.values.push_back (move (nv)); + + // Get and verify the HTTP status. + // + nv = p.next (); + if (n != "status") + bad_value ("no status specified"); + + char* e (nullptr); + unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw. + + assert (e != nullptr); + + if (!(*e == '\0' && c >= 100 && c < 600)) + bad_value ("invalid HTTP status '" + v + "'"); + + // Save the HTTP status. + // + r.status = static_cast<uint16_t> (c); + r.values.push_back (move (nv)); + + // Save the remaining name/value pairs. + // + for (nv = p.next (); !nv.empty (); nv = p.next ()) + r.values.push_back (move (nv)); + + // Save end of manifest. + // + r.values.push_back (move (nv)); + } + catch (const parsing& e) + { + error << "ref " << ref << ": unable to parse handler's output: " << e; + return nullopt; + } + + return optional<result_manifest> (move (r)); + } + } +} diff --git a/mod/external-handler.hxx b/mod/external-handler.hxx new file mode 100644 index 0000000..45de711 --- /dev/null +++ b/mod/external-handler.hxx @@ -0,0 +1,52 @@ +// file : mod/external-handler.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_EXTERNAL_HANDLER_HXX +#define MOD_EXTERNAL_HANDLER_HXX + +#include <libbutl/manifest-parser.mxx> + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/diagnostics.hxx> + +namespace brep +{ + // Utility for running external handler programs. + // + namespace external_handler + { + // Run an external handler program and, if it exited normally with the + // zero exit status, return the result manifest it is expected to write to + // stdout, containing at least the HTTP status value. Otherwise, log an + // error and return nullopt. Redirect the program stderr to the web server + // error log. + // + // If the timeout (in seconds) is not zero and the handler program does + // not exit in the allotted time, then it is killed and its termination is + // treated as abnormal. + // + // Note that warnings can be logged regardless of the program success. If + // the trace argument is not NULL, then trace records are also logged. + // + struct result_manifest + { + uint16_t status; + vector<butl::manifest_name_value> values; // Note: all values, including + // status. + }; + + optional<result_manifest> + run (const path& handler, + const strings& args, + const dir_path& data_dir, + size_t timeout, + const basic_mark& error, + const basic_mark& warn, + const basic_mark* trace); + } +} + +#endif // MOD_EXTERNAL_HANDLER_HXX diff --git a/mod/mod-ci.cxx b/mod/mod-ci.cxx new file mode 100644 index 0000000..79472d0 --- /dev/null +++ b/mod/mod-ci.cxx @@ -0,0 +1,628 @@ +// file : mod/mod-ci.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci.hxx> + +#include <ostream> + +#include <libbutl/uuid.hxx> +#include <libbutl/sendmail.mxx> +#include <libbutl/fdstream.mxx> +#include <libbutl/timestamp.mxx> +#include <libbutl/filesystem.mxx> +#include <libbutl/process-io.mxx> // operator<<(ostream, process_args) +#include <libbutl/manifest-parser.mxx> +#include <libbutl/manifest-serializer.mxx> + +#include <libbpkg/manifest.hxx> +#include <libbpkg/package-name.hxx> + +#include <web/xhtml.hxx> +#include <web/module.hxx> + +#include <mod/page.hxx> +#include <mod/options.hxx> +#include <mod/external-handler.hxx> + +using namespace std; +using namespace butl; +using namespace web; +using namespace brep::cli; + +brep::ci:: +ci (const ci& r) + : handler (r), + options_ (r.initialized_ ? r.options_ : nullptr), + form_ (r.initialized_ || r.form_ == nullptr + ? r.form_ + : make_shared<xhtml::fragment> (*r.form_)) +{ +} + +void brep::ci:: +init (scanner& s) +{ + HANDLER_DIAG; + + options_ = make_shared<options::ci> ( + s, unknown_mode::fail, unknown_mode::fail); + + // Verify that the CI request handling is setup properly, if configured. + // + if (options_->ci_data_specified ()) + { + // Verify the data directory satisfies the requirements. + // + const dir_path& d (options_->ci_data ()); + + if (d.relative ()) + fail << "ci-data directory path must be absolute"; + + if (!dir_exists (d)) + fail << "ci-data directory '" << d << "' does not exist"; + + // Parse XHTML5 form file, if configured. + // + if (options_->ci_form_specified ()) + { + const path& ci_form (options_->ci_form ()); + + if (ci_form.relative ()) + fail << "ci-form path must be absolute"; + + try + { + ifdstream is (ci_form); + + form_ = make_shared<xhtml::fragment> (is.read_text (), + ci_form.string ()); + } + catch (const xml::parsing& e) + { + fail << "unable to parse ci-form file: " << e; + } + catch (const io_error& e) + { + fail << "unable to read ci-form file '" << ci_form << "': " << e; + } + } + + if (options_->ci_handler_specified () && + options_->ci_handler ().relative ()) + fail << "ci-handler path must be absolute"; + } + + if (options_->root ().empty ()) + options_->root (dir_path ("/")); +} + +bool brep::ci:: +handle (request& rq, response& rs) +{ + using namespace bpkg; + using namespace xhtml; + + using serializer = manifest_serializer; + using serialization = manifest_serialization; + + HANDLER_DIAG; + + const dir_path& root (options_->root ()); + + // We will respond with the manifest to the CI request submission protocol + // violations and with a plain text message on the internal errors. In the + // latter case we will always respond with the same neutral message for + // security reason, logging the error details. Note that descriptions of + // exceptions caught by the web server are returned to the client (see + // web/module.hxx for details), and we want to avoid this when there is a + // danger of exposing sensitive data. + // + // Also we will pass through exceptions thrown by the underlying API, unless + // we need to handle them or add details for the description, in which case + // we will fallback to one of the above mentioned response methods. + // + // Note that both respond_manifest() and respond_error() are normally called + // right before the end of the request handling. They both always return + // true to allow bailing out with a single line, for example: + // + // return respond_error (); // Request is handled with an error. + // + string request_id; // Will be set later. + auto respond_manifest = [&rs, &request_id] (status_code status, + const string& message) -> bool + { + serializer s (rs.content (status, "text/manifest;charset=utf-8"), + "response"); + + s.next ("", "1"); // Start of manifest. + s.next ("status", to_string (status)); + s.next ("message", message); + + if (!request_id.empty ()) + s.next ("reference", request_id); + + s.next ("", ""); // End of manifest. + return true; + }; + + auto respond_error = [&rs] (status_code status = 500) -> bool + { + rs.content (status, "text/plain;charset=utf-8") + << "CI request submission handling failed" << endl; + + return true; + }; + + // Check if the CI request functionality is enabled. + // + // Note that this is not a submission protocol violation but it feels right + // to respond with the manifest, to help the client a bit. + // + if (!options_->ci_data_specified ()) + return respond_manifest (404, "CI request submission disabled"); + + // Parse the request form data. + // + const name_values& rps (rq.parameters (64 * 1024)); + + // If there is no request parameters then we respond with the CI form XHTML, + // if configured. Otherwise, will proceed as for the CI request and will fail + // (missing parameters). + // + if (rps.empty () && form_ != nullptr) + { + const string title ("CI"); + + xml::serializer s (rs.content (), title); + + s << HTML + << HEAD + << TITLE << title << ~TITLE + << CSS_LINKS (path ("ci.css"), root) + << ~HEAD + << BODY + << DIV_HEADER (root, options_->logo (), options_->menu ()) + << DIV(ID="content") << *form_ << ~DIV + << ~BODY + << ~HTML; + + return true; + } + + // Verify the CI request parameters we expect. The unknown ones will be + // serialized to the CI request manifest. + // + params::ci params; + + try + { + name_value_scanner s (rps); + params = params::ci (s, unknown_mode::skip, unknown_mode::skip); + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + const string& simulate (params.simulate ()); + + if (simulate == "internal-error-text") + return respond_error (); + else if (simulate == "internal-error-html") + { + const string title ("Internal Error"); + xml::serializer s (rs.content (500), title); + + s << HTML + << HEAD << TITLE << title << ~TITLE << ~HEAD + << BODY << "CI request submission handling failed" << ~BODY + << ~HTML; + + return true; + } + + // Parse and verify the remote repository location. + // + repository_location rl; + + try + { + const repository_url& u (params.repository ()); + + if (u.empty () || u.scheme == repository_protocol::file) + throw invalid_argument (""); + + rl = repository_location (u, guess_type (u, false /* local */)); + } + catch (const invalid_argument&) + { + return respond_manifest (400, "invalid repository location"); + } + + // Verify the package name[/version] arguments. + // + for (const string& s: params.package()) + { + // Let's skip the potentially unfilled package form fields. + // + if (s.empty ()) + continue; + + try + { + size_t p (s.find ('/')); + + if (p != string::npos) + { + package_name (string (s, 0, p)); + + // Not to confuse with module::version. + // + bpkg::version (string (s, p + 1)); + } + else + package_name p (s); // Not to confuse with the s variable declaration. + } + catch (const invalid_argument&) + { + return respond_manifest (400, "invalid package " + s); + } + } + + // Verify that unknown parameter values satisfy the requirements (contain + // only ASCII printable characters plus '\r', '\n', and '\t'). + // + // Actually, the expected ones must satisfy too, so check them as well. + // + auto printable = [] (const string& s) -> bool + { + for (char c: s) + { + if (!((c >= 0x20 && c <= 0x7E) || c == '\n' || c == '\r' || c == '\t')) + return false; + } + return true; + }; + + for (const name_value& nv: rps) + { + if (nv.value && !printable (*nv.value)) + return respond_manifest (400, "invalid parameter " + nv.name); + } + + try + { + // Note that from now on the result manifest we respond with will contain + // the reference value. + // + request_id = uuid::generate ().string (); + } + catch (const system_error& e) + { + error << "unable to generate request id: " << e; + return respond_error (); + } + + // Create the submission data directory. + // + dir_path dd (options_->ci_data () / dir_path (request_id)); + + try + { + // It's highly unlikely but still possible that the directory already + // exists. This can only happen if the generated uuid is not unique. + // + if (try_mkdir (dd) == mkdir_status::already_exists) + throw_generic_error (EEXIST); + } + catch (const system_error& e) + { + error << "unable to create directory '" << dd << "': " << e; + return respond_error (); + } + + auto_rmdir ddr (dd); + + // Serialize the CI request manifest to a stream. On the serialization error + // respond to the client with the manifest containing the bad request (400) + // code and return false, on the stream error pass through the io_error + // exception, otherwise return true. + // + timestamp ts (system_clock::now ()); + + auto rqm = [&request_id, + &rl, + &ts, + &simulate, + &rq, + &rps, + ¶ms, + &respond_manifest] + (ostream& os) -> bool + { + try + { + serializer s (os, "request"); + + // Serialize the submission manifest header. + // + s.next ("", "1"); // Start of manifest. + s.next ("id", request_id); + s.next ("repository", rl.string ()); + + for (const string& p: params.package()) + { + if (!p.empty ()) // Skip empty package names (see above for details). + s.next ("package", p); + } + + s.next ("timestamp", + butl::to_string (ts, + "%Y-%m-%dT%H:%M:%SZ", + false /* special */, + false /* local */)); + + if (!simulate.empty ()) + s.next ("simulate", simulate); + + // Serialize the User-Agent HTTP header and the client IP address. + // + optional<string> ip; + optional<string> ua; + for (const name_value& h: rq.headers ()) + { + if (casecmp (h.name, ":Client-IP") == 0) + ip = h.value; + else if (casecmp (h.name, "User-Agent") == 0) + ua = h.value; + } + + if (ip) + s.next ("client-ip", *ip); + + if (ua) + s.next ("user-agent", *ua); + + // Serialize the request parameters. + // + // Note that the serializer constraints the parameter names (can't start + // with '#', can't contain ':' and the whitespaces, etc.). + // + for (const name_value& nv: rps) + { + const string& n (nv.name); + + if (n != "repository" && + n != "_" && + n != "package" && + n != "simulate") + s.next (n, nv.value ? *nv.value : ""); + } + + s.next ("", ""); // End of manifest. + return true; + } + catch (const serialization& e) + { + respond_manifest (400, string ("invalid parameter: ") + e.what ()); + return false; + } + }; + + // Serialize the CI request manifest to the submission directory. + // + path rqf (dd / "request.manifest"); + + try + { + ofdstream os (rqf); + bool r (rqm (os)); + os.close (); + + if (!r) + return true; // The client is already responded with the manifest. + } + catch (const io_error& e) + { + error << "unable to write to '" << rqf << "': " << e; + return respond_error (); + } + + // Given that the submission data is now successfully persisted we are no + // longer in charge of removing it, except for the cases when the submission + // handler terminates with an error (see below for details). + // + ddr.cancel (); + + // If the handler terminates with non-zero exit status or specifies 5XX + // (HTTP server error) submission result manifest status value, then we + // stash the submission data directory for troubleshooting. Otherwise, if + // it's the 4XX (HTTP client error) status value, then we remove the + // directory. + // + // Note that leaving the directory in place in case of a submission error + // would have prevent the user from re-submitting until we research the + // issue and manually remove the directory. + // + auto stash_submit_dir = [&dd, error] () + { + if (dir_exists (dd)) + try + { + mvdir (dd, dir_path (dd + ".fail")); + } + catch (const system_error& e) + { + // Not much we can do here. Let's just log the issue and bail out + // leaving the directory in place. + // + error << "unable to rename directory '" << dd << "': " << e; + } + }; + + // Run the submission handler, if specified, reading the result manifest + // from its stdout and caching it as a name/value pair list for later use + // (forwarding to the client, sending via email, etc.). Otherwise, create + // implied result manifest. + // + status_code sc; + vector<manifest_name_value> rvs; + + if (options_->ci_handler_specified ()) + { + using namespace external_handler; + + optional<result_manifest> r (run (options_->ci_handler (), + options_->ci_handler_argument (), + dd, + options_->ci_handler_timeout (), + error, + warn, + verb_ ? &trace : nullptr)); + if (!r) + { + stash_submit_dir (); + return respond_error (); // The diagnostics is already issued. + } + + sc = r->status; + rvs = move (r->values); + } + else // Create the implied result manifest. + { + sc = 200; + + auto add = [&rvs] (string n, string v) + { + manifest_name_value nv {move (n), move (v), + 0 /* name_line */, 0 /* name_column */, + 0 /* value_line */, 0 /* value_column */}; + + rvs.emplace_back (move (nv)); + }; + + add ("", "1"); // Start of manifest. + add ("status", "200"); + add ("message", "CI request is queued"); + add ("reference", request_id); + add ("", ""); // End of manifest. + } + + assert (!rvs.empty ()); // Produced by the handler or is implied. + + // Serialize the submission result manifest to a stream. On the + // serialization error log the error description and return false, on the + // stream error pass through the io_error exception, otherwise return true. + // + auto rsm = [&rvs, &error, &request_id] (ostream& os) -> bool + { + try + { + serializer s (os, "result"); + for (const manifest_name_value& nv: rvs) + s.next (nv.name, nv.value); + + return true; + } + catch (const serialization& e) + { + error << "ref " << request_id << ": unable to serialize handler's " + << "output: " << e; + return false; + } + }; + + // If the submission data directory still exists then perform an appropriate + // action on it, depending on the submission result status. Note that the + // handler could move or remove the directory. + // + if (dir_exists (dd)) + { + // Remove the directory if the client error is detected. + // + if (sc >= 400 && sc < 500) + rmdir_r (dd); + + // Otherwise, save the result manifest, into the directory. Also stash the + // directory for troubleshooting in case of the server error. + // + else + { + path rsf (dd / "result.manifest"); + + try + { + ofdstream os (rsf); + + // Not being able to stash the result manifest is not a reason to + // claim the submission failed. The error is logged nevertheless. + // + rsm (os); + + os.close (); + } + catch (const io_error& e) + { + // Not fatal (see above). + // + error << "unable to write to '" << rsf << "': " << e; + } + + if (sc >= 500 && sc < 600) + stash_submit_dir (); + } + } + + // Send email, if configured, and the CI request submission is not simulated. + // + // Note that we don't consider the email sending failure to be a submission + // failure as the submission data is successfully persisted and the handler + // is successfully executed, if configured. One can argue that email can be + // essential for the submission processing and missing it would result in + // the incomplete submission. In this case it's natural to assume that the + // web server error log is monitored and the email sending failure will be + // noticed. + // + if (options_->ci_email_specified () && simulate.empty ()) + try + { + // Redirect the diagnostics to the web server error log. + // + sendmail sm ([&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }, + 2 /* stderr */, + options_->email (), + "CI request submission (" + request_id + ")", + {options_->ci_email ()}); + + // Write the submission request manifest. + // + bool r (rqm (sm.out)); + assert (r); // The serialization succeeded once, so can't fail now. + + // Write the submission result manifest. + // + sm.out << "\n\n"; + + rsm (sm.out); // We don't care about the result (see above). + + sm.out.close (); + + if (!sm.wait ()) + error << "sendmail " << *sm.exit; + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "sendmail error: " << e; + } + + if (!rsm (rs.content (sc, "text/manifest;charset=utf-8"))) + return respond_error (); // The error description is already logged. + + return true; +} diff --git a/mod/mod-ci.hxx b/mod/mod-ci.hxx new file mode 100644 index 0000000..f9e89ff --- /dev/null +++ b/mod/mod-ci.hxx @@ -0,0 +1,45 @@ +// file : mod/mod-ci.hxx -*- C++ -*- +// copyright : Copyright (c) 2014-2018 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef MOD_MOD_CI_HXX +#define MOD_MOD_CI_HXX + +#include <web/xhtml-fragment.hxx> + +#include <libbrep/types.hxx> +#include <libbrep/utility.hxx> + +#include <mod/module.hxx> +#include <mod/options.hxx> + +namespace brep +{ + class ci: public handler + { + public: + ci () = default; + + // Create a shallow copy (handling instance) if initialized and a deep + // copy (context exemplar) otherwise. + // + explicit + ci (const ci&); + + virtual bool + handle (request&, response&); + + virtual const cli::options& + cli_options () const {return options::ci::description ();} + + private: + virtual void + init (cli::scanner&); + + private: + shared_ptr<options::ci> options_; + shared_ptr<web::xhtml::fragment> form_; + }; +} + +#endif // MOD_MOD_CI_HXX diff --git a/mod/mod-repository-root.cxx b/mod/mod-repository-root.cxx index 27901d7..3b0ab1f 100644 --- a/mod/mod-repository-root.cxx +++ b/mod/mod-repository-root.cxx @@ -12,6 +12,8 @@ #include <mod/module.hxx> #include <mod/options.hxx> + +#include <mod/mod-ci.hxx> #include <mod/mod-submit.hxx> #include <mod/mod-builds.hxx> #include <mod/mod-build-log.hxx> @@ -112,7 +114,8 @@ namespace brep build_force_ (make_shared<build_force> ()), build_log_ (make_shared<build_log> ()), builds_ (make_shared<builds> ()), - submit_ (make_shared<submit> ()) + submit_ (make_shared<submit> ()), + ci_ (make_shared<ci> ()) { } @@ -164,6 +167,10 @@ namespace brep r.initialized_ ? r.submit_ : make_shared<submit> (*r.submit_)), + ci_ ( + r.initialized_ + ? r.ci_ + : make_shared<ci> (*r.ci_)), options_ ( r.initialized_ ? r.options_ @@ -188,6 +195,7 @@ namespace brep append (r, build_log_->options ()); append (r, builds_->options ()); append (r, submit_->options ()); + append (r, ci_->options ()); return r; } @@ -231,6 +239,7 @@ namespace brep sub_init (*build_log_, "build_log"); sub_init (*builds_, "builds"); sub_init (*submit_, "submit"); + sub_init (*ci_, "ci"); // Parse own configuration options. // @@ -371,6 +380,13 @@ namespace brep return handle ("submit", true); } + else if (fn == "ci") + { + if (handler_ == nullptr) + handler_.reset (new ci (*ci_)); + + return handle ("ci", true); + } } if (handler_ == nullptr) diff --git a/mod/mod-repository-root.hxx b/mod/mod-repository-root.hxx index 74691ea..9a71849 100644 --- a/mod/mod-repository-root.hxx +++ b/mod/mod-repository-root.hxx @@ -23,6 +23,7 @@ namespace brep class build_log; class builds; class submit; + class ci; class repository_root: public handler { @@ -67,6 +68,8 @@ namespace brep shared_ptr<build_log> build_log_; shared_ptr<builds> builds_; shared_ptr<submit> submit_; + shared_ptr<ci> ci_; + shared_ptr<options::repository_root> options_; // Sub-handler the request is dispatched to. Initially is NULL. It is set diff --git a/mod/mod-submit.cxx b/mod/mod-submit.cxx index 61eeaf6..470bd45 100644 --- a/mod/mod-submit.cxx +++ b/mod/mod-submit.cxx @@ -4,19 +4,9 @@ #include <mod/mod-submit.hxx> -#include <sys/time.h> // timeval -#include <sys/select.h> - -#include <ratio> // ratio_greater_equal -#include <chrono> -#include <cstdlib> // strtoul() -#include <istream> -#include <sstream> -#include <type_traits> // static_assert -#include <system_error> // error_code, generic_category() +#include <ostream> #include <libbutl/sha256.mxx> -#include <libbutl/process.mxx> #include <libbutl/sendmail.mxx> #include <libbutl/fdstream.mxx> #include <libbutl/timestamp.mxx> @@ -30,6 +20,7 @@ #include <mod/page.hxx> #include <mod/options.hxx> +#include <mod/external-handler.hxx> using namespace std; using namespace butl; @@ -113,8 +104,6 @@ handle (request& rq, response& rs) { using namespace xhtml; - using parser = manifest_parser; - using parsing = manifest_parsing; using serializer = manifest_serializer; using serialization = manifest_serialization; @@ -285,8 +274,8 @@ handle (request& rq, response& rs) return respond_manifest (400, "invalid parameter " + nv.name); } - // Note that from now on the result manifest will contain the reference - // value. + // Note that from now on the result manifest we respond with will contain + // the reference value. // ref = string (sha256sum, 0, 12); @@ -299,7 +288,7 @@ handle (request& rq, response& rs) if (dir_exists (dd) || simulate == "duplicate-archive") return respond_manifest (422, "duplicate submission"); - // Create the temporary submission directory. + // Create the temporary submission data directory. // dir_path td; @@ -573,310 +562,34 @@ handle (request& rq, response& rs) // (forwarding to the client, sending via email, etc.). Otherwise, create // implied result manifest. // - status_code sc (200); + status_code sc; vector<manifest_name_value> rvs; if (options_->submit_handler_specified ()) { - // For the sake of the documentation we will call the handler's normal - // exit with 0 code "successful termination". - // - // To make sure the handler process execution doesn't exceed the specified - // timeout we set the non-blocking mode for the process stdout-reading - // stream, try to read from it with the 10 milliseconds timeout and check - // the process execution time between the reads. We then kill the process - // if the execution time is exceeded. - // - using namespace chrono; - - using time_point = system_clock::time_point; - using duration = system_clock::duration; - - // Make sure that the system clock has at least milliseconds resolution. - // - static_assert( - ratio_greater_equal<milliseconds::period, duration::period>::value, - "The system clock resolution is too low"); - - optional<milliseconds> timeout; - - if (options_->submit_handler_timeout_specified ()) - timeout = milliseconds (options_->submit_handler_timeout () * 1000); - - const path& handler (options_->submit_handler ()); - - // Note that due to the non-blocking mode we cannot just pass the stream - // to the manifest parser constructor. So we buffer the data in the string - // stream and then parse that. - // - stringstream ss; - - for (;;) // Breakout loop. - try - { - fdpipe pipe (fdopen_pipe ()); // Can throw io_error. - - // Redirect the diagnostics to the web server error log. - // - process pr ( - process_start_callback (print_args, - 0 /* stdin */, - pipe /* stdout */, - 2 /* stderr */, - handler, - options_->submit_handler_argument (), - dd)); - pipe.out.close (); - - auto kill = [&pr, &warn, &handler, &ref] () - { - // We may still end up well (see below), thus this is a warning. - // - warn << "ref " << ref << ": process " << handler - << " execution timeout expired"; - - pr.kill (); - }; - - try - { - ifdstream is (move (pipe.in), fdstream_mode::non_blocking); - - const size_t nbuf (8192); - char buf[nbuf]; - - while (is.is_open ()) - { - time_point start; - milliseconds wd (10); // Max time to wait for the data portion. - - if (timeout) - { - start = system_clock::now (); - - if (*timeout < wd) - wd = *timeout; - } - - timeval tm {wd.count () / 1000 /* seconds */, - wd.count () % 1000 * 1000 /* microseconds */}; - - fd_set rd; - FD_ZERO (&rd); - FD_SET (is.fd (), &rd); - - int r (select (is.fd () + 1, &rd, nullptr, nullptr, &tm)); - - if (r == -1) - { - // Don't fail if the select() call was interrupted by the signal. - // - if (errno != EINTR) - throw_system_ios_failure (errno, "select failed"); - } - else if (r != 0) // Is data available? - { - assert (FD_ISSET (is.fd (), &rd)); - - // The only leagal way to read from non-blocking ifdstream. - // - streamsize n (is.readsome (buf, nbuf)); - - // Close the stream (and bail out) if the end of the data is - // reached. Otherwise cache the read data. - // - if (is.eof ()) - is.close (); - else - { - // The data must be available. - // - // Note that we could keep reading until the readsome() call - // returns 0. However, this way we could potentially exceed the - // timeout significantly for some broken handler that floods us - // with data. So instead, we will be checking the process - // execution time after every data chunk read. - // - assert (n != 0); - - ss.write (buf, n); - } - } - else // Timeout occured. - { - // Normally, we don't expect timeout to occur on the pipe read - // operation if the process has terminated successfully, as all its - // output must already be buffered (including eof). However, there - // can be some still running handler's child that has inherited - // the parent's stdout. In this case we assume that we have read - // all the handler's output, close the stream, log the warning and - // bail out. - // - if (pr.exit) - { - // We keep reading only upon successful handler termination. - // - assert (*pr.exit); - - is.close (); - - warn << "ref " << ref << ": process " << handler - << " stdout is not closed after termination (possibly " - << "handler's child still running)"; - } - } - - if (timeout) - { - time_point now (system_clock::now ()); - - // Assume we have waited the full amount if the time adjustment is - // detected. - // - duration d (now > start ? now - start : wd); - - // If the timeout is not fully exhausted, then decrement it and - // try to read some more data from the handler' stdout. Otherwise, - // kill the process, if not done yet. - // - // Note that it may happen that we are killing an already - // terminated process, in which case kill() just sets the process - // exit information. On the other hand it's guaranteed that the - // process is terminated after the kill() call, and so the pipe is - // presumably closed on the write end (see above for details). - // Thus, if the process terminated successfully, we will continue - // reading until eof is reached or read timeout occurred. Yes, it - // may happen that we end up with a successful submission even - // with the kill. - // - if (*timeout > d) - *timeout -= duration_cast<milliseconds> (d); - else if (!pr.exit) - { - kill (); - - assert (pr.exit); - - // Close the stream (and bail out) if the process hasn't - // terminate successfully. - // - if (!*pr.exit) - is.close (); - - *timeout = milliseconds::zero (); - } - } - } - - assert (!is.is_open ()); - - if (!timeout) - pr.wait (); - - // If the process is not terminated yet, then wait for its termination - // for the remaining time. Kill it if the timeout has been exceeded - // and the process still hasn't terminate. - // - else if (!pr.exit && !pr.timed_wait (*timeout)) - kill (); - - assert (pr.exit); // The process must finally be terminated. - - if (*pr.exit) - break; // Get out of the breakout loop. - - error << "ref " << ref << ": process " << handler << " " << *pr.exit; - - // Fall through. - } - catch (const io_error& e) - { - if (pr.wait ()) - error << "ref " << ref << ": unable to read handler's output: " << e; + using namespace external_handler; - // Fall through. - } - - stash_submit_dir (); - return respond_error (); - } - // Handle process_error and io_error (both derive from system_error). - // - catch (const system_error& e) + optional<result_manifest> r (run (options_->submit_handler (), + options_->submit_handler_argument (), + dd, + options_->submit_handler_timeout (), + error, + warn, + verb_ ? &trace : nullptr)); + if (!r) { - error << "unable to execute '" << handler << "': " << e; - stash_submit_dir (); - return respond_error (); - } - - try - { - // Parse and verify the manifest. Obtain the HTTP status code (must go - // first) and cache it for the subsequent response to the client. - // - parser p (ss, "handler"); - manifest_name_value nv (p.next ()); - - auto bad_value ([&p, &nv] (const string& d) { - throw parsing (p.name (), nv.value_line, nv.value_column, d);}); - - if (nv.empty ()) - bad_value ("empty manifest"); - - const string& n (nv.name); - const string& v (nv.value); - - // The format version pair is verified by the parser. - // - assert (n.empty () && v == "1"); - - // Cache the format version pair. - // - rvs.push_back (move (nv)); - - // Get and verify the HTTP status. - // - nv = p.next (); - if (n != "status") - bad_value ("no status specified"); - - char* e (nullptr); - unsigned long c (strtoul (v.c_str (), &e, 10)); // Can't throw. - - assert (e != nullptr); - - if (!(*e == '\0' && c >= 100 && c < 600)) - bad_value ("invalid HTTP status '" + v + "'"); - - // Cache the HTTP status. - // - sc = static_cast<status_code> (c); - rvs.push_back (move (nv)); - - // Cache the remaining name/value pairs. - // - for (nv = p.next (); !nv.empty (); nv = p.next ()) - rvs.push_back (move (nv)); - - // Cache end of manifest. - // - rvs.push_back (move (nv)); + return respond_error (); // The diagnostics is already issued. } - catch (const parsing& e) - { - error << "ref " << ref << ": unable to parse handler's output: " << e; - // It appears the handler had misbehaved, so let's stash the submission - // directory for troubleshooting. - // - stash_submit_dir (); - return respond_error (); - } + sc = r->status; + rvs = move (r->values); } - else // Create implied result manifest. + else // Create the implied result manifest. { + sc = 200; + auto add = [&rvs] (string n, string v) { manifest_name_value nv {move (n), move (v), diff --git a/mod/options.cli b/mod/options.cli index f7a9387..046173c 100644 --- a/mod/options.cli +++ b/mod/options.cli @@ -729,7 +729,10 @@ namespace brep { // Package repository URL. // - bpkg::repository_url repository; + // Note that the ci parameter is renamed to '_' by the root handler (see + // the request_proxy class for details). + // + bpkg::repository_url repository | _; // Package names/versions. // |