From 9f5b820aec37ac0a929e074ae2c859229da33b0f Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Fri, 28 Apr 2023 22:14:14 +0300 Subject: Add support for upload handlers and implement brep-upload-bindist handler --- mod/mod-upload.cxx | 766 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 766 insertions(+) create mode 100644 mod/mod-upload.cxx (limited to 'mod/mod-upload.cxx') diff --git a/mod/mod-upload.cxx b/mod/mod-upload.cxx new file mode 100644 index 0000000..1474363 --- /dev/null +++ b/mod/mod-upload.cxx @@ -0,0 +1,766 @@ +// file : mod/mod-upload.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include // operator<<(ostream, process_args) +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include + +using namespace std; +using namespace butl; +using namespace brep::cli; +using namespace odb::core; + +// While currently the user-defined copy constructor is not required (we don't +// need to deep copy nullptr's), it is a good idea to keep the placeholder +// ready for less trivial cases. +// +brep::upload:: +upload (const upload& r) + : build_result_module (r), + options_ (r.initialized_ ? r.options_ : nullptr) +{ +} + +void brep::upload:: +init (scanner& s) +{ + HANDLER_DIAG; + + options_ = make_shared ( + s, unknown_mode::fail, unknown_mode::fail); + + // Verify that the upload handling is setup properly, if configured. + // + for (const auto& ud: options_->upload_data ()) + { + const string& t (ud.first); + + if (t.empty ()) + fail << "empty upload type in upload-data configuration option"; + + if (ud.second.relative ()) + fail << t << " upload-data path '" << ud.second << "' is relative"; + + if (!dir_exists (ud.second)) + fail << t << " upload-data directory '" << ud.second + << "' does not exist"; + + const map& uh (options_->upload_handler ()); + auto i (uh.find (t)); + + if (i != uh.end () && i->second.relative ()) + fail << t << " upload-handler path '" << i->second << "' is relative"; + } + + if (options_->upload_data_specified ()) + { + if (!options_->build_config_specified ()) + fail << "upload functionality is enabled but package building " + << "functionality is disabled"; + + build_result_module::init (*options_, *options_); + } + + if (options_->root ().empty ()) + options_->root (dir_path ("/")); +} + +bool brep::upload:: +handle (request& rq, response& rs) +{ + using brep::version; // Not to confuse with module::version. + + using serializer = manifest_serializer; + using serialization = manifest_serialization; + + HANDLER_DIAG; + + // We will respond with the manifest to the upload protocol violations and + // with a plain text message on the internal errors. In the latter case we + // will always respond with the same neutral message for security reason, + // logging the error details. Note that descriptions of exceptions caught by + // the web server are returned to the client (see web/module.hxx for + // details), and we want to avoid this when there is a danger of exposing + // sensitive data. + // + // Also we will pass through exceptions thrown by the underlying API, unless + // we need to handle them or add details for the description, in which case + // we will fallback to one of the above mentioned response methods. + // + // Note that both respond_manifest() and respond_error() are normally called + // right before the end of the request handling. They both always return + // true to allow bailing out with a single line, for example: + // + // return respond_error (); // Request is handled with an error. + // + string request_id; // Will be set later. + auto respond_manifest = [&rs, &request_id] (status_code status, + const string& message) -> bool + { + serializer s (rs.content (status, "text/manifest;charset=utf-8"), + "response"); + + s.next ("", "1"); // Start of manifest. + s.next ("status", to_string (status)); + s.next ("message", message); + + if (!request_id.empty ()) + s.next ("reference", request_id); + + s.next ("", ""); // End of manifest. + return true; + }; + + auto respond_error = [&rs] (status_code status = 500) -> bool + { + rs.content (status, "text/plain;charset=utf-8") + << "upload handling failed" << endl; + + return true; + }; + + // Check if the upload functionality is enabled. + // + // Note that this is not an upload protocol violation but it feels right to + // respond with the manifest, to help the client a bit. + // + if (!options_->upload_data_specified ()) + return respond_manifest (404, "upload disabled"); + + // Parse the request data and verify the upload size limit. + // + // Note that the size limit is upload type-specific. Thus, first, we need to + // determine the upload type which we expect to be specified in the URL as a + // value of the upload parameter. + // + string type; + dir_path dir; + + try + { + name_value_scanner s (rq.parameters (0 /* limit */, true /* url_only */)); + + // We only expect the upload= parameter in URL. + // + params::upload params ( + params::upload (s, unknown_mode::fail, unknown_mode::fail)); + + type = move (params.type ()); + + if (type.empty ()) + return respond_manifest (400, "upload type expected"); + + // Check if this upload type is enabled. While at it, cache the upload + // data directory path. + // + const map& ud (options_->upload_data ()); + auto i (ud.find (type)); + + if (i == ud.end ()) + return respond_manifest (404, type + " upload disabled"); + + dir = i->second; + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + try + { + const map& us (options_->upload_max_size ()); + auto i (us.find (type)); + rq.parameters (i != us.end () ? i->second : 10485760); // 10M by default. + } + catch (const invalid_request& e) + { + if (e.status == 413) // Payload too large? + return respond_manifest (e.status, type + " upload size exceeds limit"); + + throw; + } + + // The request parameters are now parsed and the limit doesn't really matter. + // + const name_values& rps (rq.parameters (0 /* limit */)); + + // Verify the upload parameters we expect. The unknown ones will be + // serialized to the upload manifest. + // + params::upload params; + + try + { + name_value_scanner s (rps); + params = params::upload (s, unknown_mode::skip, unknown_mode::skip); + } + catch (const cli::exception&) + { + return respond_manifest (400, "invalid parameter"); + } + + const string& session (params.session ()); + const string& instance (params.instance ()); + const string& archive (params.archive ()); + const string& sha256sum (params.sha256sum ()); + + if (session.empty ()) + return respond_manifest (400, "upload session expected"); + + optional> challenge; + + if (params.challenge_specified ()) + try + { + challenge = base64_decode (params.challenge ()); + } + catch (const invalid_argument&) + { + return respond_manifest (400, "invalid challenge"); + } + + if (instance.empty ()) + return respond_manifest (400, "upload instance expected"); + + if (archive.empty ()) + return respond_manifest (400, "upload archive expected"); + + if (sha256sum.empty ()) + return respond_manifest (400, "upload archive checksum expected"); + + if (sha256sum.size () != 64) + return respond_manifest (400, "invalid upload archive checksum"); + + // Verify that unknown parameter values satisfy the requirements (contain + // only UTF-8 encoded graphic characters plus '\t', '\r', and '\n'). + // + // Actually, the expected ones must satisfy too, so check them as well. + // + string what; + for (const name_value& nv: rps) + { + if (nv.value && + !utf8 (*nv.value, what, codepoint_types::graphic, U"\n\r\t")) + return respond_manifest (400, + "invalid parameter " + nv.name + ": " + what); + } + + parse_session_result sess; + + try + { + sess = parse_session (session); + } + catch (const invalid_argument& e) + { + return respond_manifest (400, string ("invalid session: ") + e.what ()); + } + + // If the session expired (no such configuration, etc) then, similar to the + // build result module, we log this case with the warning severity and + // respond with manifest with the 200 status as if the session is valid (see + // the build result module for the reasoning). + // + auto warn_expired = [&session, &warn] (const string& d) + { + warn << "session '" << session << "' expired: " << d; + }; + + const build_id& id (sess.id); + + // Make sure the build configuration still exists. + // + const build_target_config* tc; + { + auto i (target_conf_map_->find ( + build_target_config_id {id.target, id.target_config_name})); + + if (i == target_conf_map_->end ()) + { + warn_expired ("no build configuration"); + return respond_manifest (200, type + " upload is queued"); + } + + tc = i->second; + } + + // Note that if the session authentication fails (probably due to the + // authentication settings change), then we log this case with the warning + // severity and respond with manifest with the 200 status as if the + // challenge is valid (see the build result module for the reasoning). + // + shared_ptr bld; + shared_ptr pkg; + shared_ptr rep; + { + transaction t (build_db_->begin ()); + + package_build pb; + shared_ptr b; + if (!build_db_->query_one ( + query::build::id == id, pb)) + { + warn_expired ("no package build"); + } + else if ((b = move (pb.build))->state != build_state::building) + { + warn_expired ("package configuration state is " + to_string (b->state)); + } + else if (b->timestamp != sess.timestamp) + { + warn_expired ("non-matching timestamp"); + } + else if (authenticate_session (*options_, challenge, *b, session)) + { + bld = move (b); + pkg = build_db_->load (id.package); + rep = pkg->internal_repository.load (); + } + + t.commit (); + } + + // Note that from now on the result manifest we respond with will contain + // the reference value. + // + try + { + request_id = uuid::generate ().string (); + } + catch (const system_error& e) + { + error << "unable to generate request id: " << e; + return respond_error (); + } + + if (bld == nullptr) + return respond_manifest (200, type + " upload is queued"); + + // Create the upload data directory. + // + dir_path dd (dir / dir_path (request_id)); + + try + { + // It's highly unlikely but still possible that the directory already + // exists. This can only happen if the generated uuid is not unique. + // + if (try_mkdir (dd) == mkdir_status::already_exists) + throw_generic_error (EEXIST); + } + catch (const system_error& e) + { + error << "unable to create directory '" << dd << "': " << e; + return respond_error (); + } + + auto_rmdir ddr (dd); + + // Save the package archive into the temporary directory and verify its + // checksum. + // + // Note that the archive file name can potentially contain directory path in + // the POSIX form, so let's strip it if that's the case. + // + path a; + path af; + + try + { + size_t n (archive.find_last_of ('/')); + a = path (n != string::npos ? string (archive, n + 1) : archive); + af = dd / a; + } + catch (const invalid_path&) + { + return respond_manifest (400, "invalid package archive name"); + } + + try + { + istream& is (rq.open_upload ("archive")); + + // Note that istream::read() sets failbit if unable to read the requested + // number of bytes. + // + is.exceptions (istream::badbit); + + sha256 sha; + char buf[8192]; + ofdstream os (af, fdopen_mode::binary); + + while (!eof (is)) + { + is.read (buf, sizeof (buf)); + + if (size_t n = is.gcount ()) + { + sha.append (buf, n); + os.write (buf, n); + } + } + + os.close (); + + // Respond with the unprocessable entity (422) code for the archive + // checksum mismatch. + // + if (sha.string () != sha256sum) + return respond_manifest (422, "upload archive checksum mismatch"); + } + // Note that invalid_argument (thrown by open_upload() function call) can + // mean both no archive upload or multiple archive uploads. + // + catch (const invalid_argument&) + { + return respond_manifest (400, "archive upload expected"); + } + catch (const io_error& e) + { + error << "unable to write package archive '" << af << "': " << e; + return respond_error (); + } + + // Serialize the upload request manifest to a stream. On the serialization + // error respond to the client with the manifest containing the bad request + // (400) code and return false, on the stream error pass through the + // io_error exception, otherwise return true. + // + timestamp ts (system_clock::now ()); + + auto rqm = [&request_id, + &ts, + &rps, + &session, + &instance, + &a, + &sha256sum, + &id, + &bld, + &pkg, + &rep, + &tc, + &sess, + &respond_manifest, + this] (ostream& os, bool long_lines = false) -> bool + { + try + { + serializer s (os, "request", long_lines); + + // Serialize the upload manifest header. + // + s.next ("", "1"); // Start of manifest. + s.next ("id", request_id); + s.next ("session", session); + s.next ("instance", instance); + s.next ("archive", a.string ()); + s.next ("sha256sum", sha256sum); + + s.next ("timestamp", + butl::to_string (ts, + "%Y-%m-%dT%H:%M:%SZ", + false /* special */, + false /* local */)); + + s.next ("name", id.package.name.string ()); + s.next ("version", pkg->version.string ()); + s.next ("project", pkg->project.string ()); + s.next ("target-config", tc->name); + s.next ("package-config", id.package_config_name); + s.next ("target", tc->target.string ()); + + if (!tenant.empty ()) + s.next ("tenant", tenant); + + s.next ("toolchain-name", id.toolchain_name); + s.next ("toolchain-version", sess.toolchain_version.string ()); + s.next ("repository-name", rep->canonical_name); + + s.next ("machine-name", bld->machine); + s.next ("machine-summary", bld->machine_summary); + + // Serialize the request parameters. + // + // Note that the serializer constraints the parameter names (can't start + // with '#', can't contain ':' and the whitespaces, etc.). + // + for (const name_value& nv: rps) + { + // Note that the upload parameter is renamed to '_' by the root + // handler (see the request_proxy class for details). + // + const string& n (nv.name); + if (n != "_" && + n != "session" && + n != "challenge" && + n != "instance" && + n != "archive" && + n != "sha256sum") + s.next (n, nv.value ? *nv.value : ""); + } + + s.next ("", ""); // End of manifest. + return true; + } + catch (const serialization& e) + { + respond_manifest (400, string ("invalid parameter: ") + e.what ()); + return false; + } + }; + + // Serialize the upload request manifest to the upload directory. + // + path rqf (dd / "request.manifest"); + + try + { + ofdstream os (rqf); + bool r (rqm (os)); + os.close (); + + if (!r) + return true; // The client is already responded with the manifest. + } + catch (const io_error& e) + { + error << "unable to write to '" << rqf << "': " << e; + return respond_error (); + } + + // Given that the upload data is now successfully persisted we are no longer + // in charge of removing it, except for the cases when the upload + // handler terminates with an error (see below for details). + // + ddr.cancel (); + + // If the handler terminates with non-zero exit status or specifies 5XX + // (HTTP server error) upload result manifest status value, then we stash + // the upload data directory for troubleshooting. Otherwise, if it's the 4XX + // (HTTP client error) status value, then we remove the directory. + // + auto stash_upload_dir = [&dd, error] () + { + if (dir_exists (dd)) + try + { + mvdir (dd, dir_path (dd + ".fail")); + } + catch (const system_error& e) + { + // Not much we can do here. Let's just log the issue and bail out + // leaving the directory in place. + // + error << "unable to rename directory '" << dd << "': " << e; + } + }; + + // Run the upload handler, if specified, reading the result manifest from + // its stdout and caching it as a name/value pair list for later use + // (forwarding to the client, sending via email, etc). Otherwise, create + // implied result manifest. + // + status_code sc; + vector rvs; + + const map& uh (options_->upload_handler ()); + auto hi (uh.find (type)); + + if (hi != uh.end ()) + { + auto range (options_->upload_handler_argument ().equal_range (type)); + + strings args; + for (auto i (range.first); i != range.second; ++i) + args.push_back (i->second); + + const map& ht (options_->upload_handler_timeout ()); + auto i (ht.find (type)); + + optional r ( + external_handler::run (hi->second, + args, + dd, + i != ht.end () ? i->second : 0, + error, + warn, + verb_ ? &trace : nullptr)); + + if (!r) + { + stash_upload_dir (); + return respond_error (); // The diagnostics is already issued. + } + + sc = r->status; + rvs = move (r->values); + } + else // Create the implied result manifest. + { + sc = 200; + + auto add = [&rvs] (string n, string v) + { + manifest_name_value nv { + move (n), move (v), + 0 /* name_line */, 0 /* name_column */, + 0 /* value_line */, 0 /* value_column */, + 0 /* start_pos */, 0 /* colon_pos */, 0 /* end_pos */}; + + rvs.emplace_back (move (nv)); + }; + + add ("status", "200"); + add ("message", type + " upload is queued"); + add ("reference", request_id); + } + + assert (!rvs.empty ()); // Produced by the handler or is implied. + + // Serialize the upload result manifest to a stream. On the serialization + // error log the error description and return false, on the stream error + // pass through the io_error exception, otherwise return true. + // + auto rsm = [&rvs, + &error, + &request_id, + &type] (ostream& os, bool long_lines = false) -> bool + { + try + { + serializer s (os, "result", long_lines); + serialize_manifest (s, rvs); + return true; + } + catch (const serialization& e) + { + error << "ref " << request_id << ": unable to serialize " << type + << " upload handler's output: " << e; + return false; + } + }; + + // If the upload data directory still exists then perform an appropriate + // action on it, depending on the upload result status. Note that the + // handler could move or remove the directory. + // + if (dir_exists (dd)) + { + // Remove the directory if the client error is detected. + // + if (sc >= 400 && sc < 500) + { + rmdir_r (dd); + } + // + // Otherwise, save the result manifest, into the directory. Also stash the + // directory for troubleshooting in case of the server error. + // + else + { + path rsf (dd / "result.manifest"); + + try + { + ofdstream os (rsf); + + // Not being able to stash the result manifest is not a reason to + // claim the upload failed. The error is logged nevertheless. + // + rsm (os); + + os.close (); + } + catch (const io_error& e) + { + // Not fatal (see above). + // + error << "unable to write to '" << rsf << "': " << e; + } + + if (sc >= 500 && sc < 600) + stash_upload_dir (); + } + } + + // Send email, if configured. Use the long lines manifest serialization mode + // for the convenience of copying/clicking URLs they contain. + // + // Note that we don't consider the email sending failure to be an upload + // failure as the upload data is successfully persisted and the handler is + // successfully executed, if configured. One can argue that email can be + // essential for the upload processing and missing it would result in the + // incomplete upload. In this case it's natural to assume that the web + // server error log is monitored and the email sending failure will be + // noticed. + // + const map& ue (options_->upload_email ()); + auto ei (ue.find (type)); + + if (ei != ue.end ()) + try + { + // Redirect the diagnostics to the web server error log. + // + sendmail sm ([&trace, this] (const char* args[], size_t n) + { + l2 ([&]{trace << process_args {args, n};}); + }, + 2 /* stderr */, + options_->email (), + type + " upload (" + request_id + ')', + {ei->second}); + + // Write the upload request manifest. + // + bool r (rqm (sm.out, true /* long_lines */)); + assert (r); // The serialization succeeded once, so can't fail now. + + // Write the upload result manifest. + // + sm.out << "\n\n"; + + // We don't care about the result (see above). + // + rsm (sm.out, true /* long_lines */); + + sm.out.close (); + + if (!sm.wait ()) + error << "sendmail " << *sm.exit; + } + // Handle process_error and io_error (both derive from system_error). + // + catch (const system_error& e) + { + error << "sendmail error: " << e; + } + + if (!rsm (rs.content (sc, "text/manifest;charset=utf-8"))) + return respond_error (); // The error description is already logged. + + return true; +} -- cgit v1.1