aboutsummaryrefslogtreecommitdiff
path: root/mod/mod-ci-github.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'mod/mod-ci-github.cxx')
-rw-r--r--mod/mod-ci-github.cxx3478
1 files changed, 3478 insertions, 0 deletions
diff --git a/mod/mod-ci-github.cxx b/mod/mod-ci-github.cxx
new file mode 100644
index 0000000..d53c46e
--- /dev/null
+++ b/mod/mod-ci-github.cxx
@@ -0,0 +1,3478 @@
+// file : mod/mod-ci-github.cxx -*- C++ -*-
+// license : MIT; see accompanying LICENSE file
+
+#include <mod/mod-ci-github.hxx>
+
+#include <libbutl/json/parser.hxx>
+
+#include <web/xhtml/serialization.hxx>
+#include <web/server/mime-url-encoding.hxx> // mime_url_encode()
+
+#include <mod/jwt.hxx>
+#include <mod/hmac.hxx>
+#include <mod/build.hxx> // build_log_url()
+#include <mod/module-options.hxx>
+
+#include <mod/mod-ci-github-gq.hxx>
+#include <mod/mod-ci-github-post.hxx>
+#include <mod/mod-ci-github-service-data.hxx>
+
+#include <cerrno>
+#include <cstdlib> // strtoull()
+#include <stdexcept>
+
+// Resources:
+//
+// Creating an App:
+// https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/best-practices-for-creating-a-github-app
+//
+// Webhooks:
+// https://docs.github.com/en/webhooks/using-webhooks/best-practices-for-using-webhooks
+// https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries
+//
+// REST API:
+// All docs: https://docs.github.com/en/rest#all-docs
+// Best practices: https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api
+//
+// GraphQL API:
+// Reference: https://docs.github.com/en/graphql/reference
+//
+
+using namespace std;
+using namespace butl;
+using namespace web;
+using namespace brep::cli;
+
+namespace brep
+{
+ ci_github::
+ ci_github (tenant_service_map& tsm)
+ : tenant_service_map_ (tsm)
+ {
+ }
+
+ ci_github::
+ ci_github (const ci_github& r, tenant_service_map& tsm)
+ : database_module (r),
+ ci_start (r),
+ options_ (r.initialized_ ? r.options_ : nullptr),
+ tenant_service_map_ (tsm)
+ {
+ }
+
+ void ci_github::
+ init (scanner& s)
+ {
+ HANDLER_DIAG;
+
+ {
+ shared_ptr<tenant_service_base> ts (
+ dynamic_pointer_cast<tenant_service_base> (shared_from_this ()));
+
+ assert (ts != nullptr); // By definition.
+
+ tenant_service_map_["ci-github"] = move (ts);
+ }
+
+ options_ = make_shared<options::ci_github> (
+ s, unknown_mode::fail, unknown_mode::fail);
+
+ // Prepare for the CI requests handling, if configured.
+ //
+ if (options_->ci_github_app_webhook_secret_specified ())
+ {
+ if (!options_->build_config_specified ())
+ fail << "package building functionality must be enabled";
+
+ if (!options_->ci_github_app_id_private_key_specified ())
+ fail << "no app id/private key mappings configured";
+
+ for (const auto& pr: options_->ci_github_app_id_private_key ())
+ {
+ if (pr.second.relative ())
+ fail << "ci-github-app-id-private-key path must be absolute";
+ }
+
+ // Read the webhook secret from the configured path.
+ //
+ {
+ const path& p (options_->ci_github_app_webhook_secret ());
+
+ if (p.relative ())
+ fail << "ci-github-app-webhook-secret path must be absolute";
+
+ try
+ {
+ ifdstream is (p);
+ getline (is, webhook_secret_, '\0');
+
+ // Trim leading/trailing whitespaces (presumably GitHub does the
+ // same in its web UI).
+ //
+ if (trim (webhook_secret_).empty ())
+ fail << "empty webhook secret in " << p;
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read webhook secret from " << p << ": " << e;
+ }
+ }
+
+ ci_start::init (make_shared<options::ci_start> (*options_));
+
+ database_module::init (*options_, options_->build_db_retry ());
+ }
+ }
+
+ bool ci_github::
+ handle (request& rq, response&)
+ {
+ using namespace bpkg;
+
+ HANDLER_DIAG;
+
+ if (build_db_ == nullptr)
+ throw invalid_request (501, "GitHub CI submission not implemented");
+
+ // Process headers.
+ //
+ string event; // Webhook event.
+ string hmac; // Received HMAC.
+ try
+ {
+ bool content_type (false);
+
+ for (const name_value& h: rq.headers ())
+ {
+ // HMAC authenticating this request. Note that it won't be present
+ // unless a webhook secret has been set in the GitHub app's settings.
+ //
+ if (icasecmp (h.name, "x-hub-signature-256") == 0)
+ {
+ if (!h.value)
+ throw invalid_request (400, "missing x-hub-signature-256 value");
+
+ // Parse the x-hub-signature-256 header value. For example:
+ //
+ // sha256=5e82258...
+ //
+ // Check for the presence of the "sha256=" prefix and then strip it
+ // to leave only the HMAC value.
+ //
+ if (h.value->find ("sha256=", 0, 7) == string::npos)
+ throw invalid_request (400, "invalid x-hub-signature-256 value");
+
+ hmac = h.value->substr (7);
+ }
+ // This event's UUID.
+ //
+ else if (icasecmp (h.name, "x-github-delivery") == 0)
+ {
+ // @@ TODO Check that delivery UUID has not been received before
+ // (replay attack).
+ }
+ else if (icasecmp (h.name, "content-type") == 0)
+ {
+ if (!h.value)
+ throw invalid_request (400, "missing content-type value");
+
+ if (icasecmp (*h.value, "application/json") != 0)
+ {
+ throw invalid_request (400,
+ "invalid content-type value: '" + *h.value +
+ '\'');
+ }
+
+ content_type = true;
+ }
+ // The webhook event.
+ //
+ else if (icasecmp (h.name, "x-github-event") == 0)
+ {
+ if (!h.value)
+ throw invalid_request (400, "missing x-github-event value");
+
+ event = *h.value;
+ }
+ }
+
+ if (!content_type)
+ throw invalid_request (400, "missing content-type header");
+
+ if (event.empty ())
+ throw invalid_request (400, "missing x-github-event header");
+
+ if (hmac.empty ())
+ throw invalid_request (400, "missing x-hub-signature-256 header");
+ }
+ catch (const invalid_request& e)
+ {
+ error << "request header error: " << e.content;
+ throw;
+ }
+
+ // Read the entire request body into a buffer because we need to compute
+ // an HMAC over it and then parse it as JSON. The alternative of reading
+ // from the stream twice works out to be more complicated (see also a TODO
+ // item in web/server/module.hxx).
+ //
+ string body;
+ {
+ // Note that even though we may not need caching right now, we may later
+ // (e.g., to support cancel) so let's just enable it right away.
+ //
+ size_t limit (128 * 1024);
+
+ istream& is (rq.content (limit, limit));
+
+ try
+ {
+ getline (is, body, '\0');
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read request body: " << e;
+ }
+ }
+
+ // Verify the received HMAC.
+ //
+ // Compute the HMAC value over the request body using the configured
+ // webhook secret as key and compare it to the received HMAC.
+ //
+ try
+ {
+ string h (compute_hmac (*options_,
+ body.data (),
+ body.size (),
+ webhook_secret_.c_str ()));
+
+ if (!icasecmp (h, hmac))
+ {
+ string m ("computed HMAC does not match received HMAC");
+
+ error << m;
+
+ throw invalid_request (400, move (m));
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail << "unable to compute request HMAC: " << e;
+ }
+
+ // Process the `app-id` and `warning` webhook request query parameters.
+ //
+ uint64_t app_id;
+ bool warning_success;
+ {
+ const name_values& rps (rq.parameters (1024, true /* url_only */));
+
+ bool ai (false), wa (false);
+
+ auto badreq = [] (const string& m)
+ {
+ throw invalid_request (400, m);
+ };
+
+ for (const name_value& rp: rps)
+ {
+ if (rp.name == "app-id")
+ {
+ if (!rp.value)
+ badreq ("missing 'app-id' webhook query parameter value");
+
+ ai = true;
+
+ // Parse the app id value.
+ //
+ const char* b (rp.value->c_str ());
+ char* e (nullptr);
+ errno = 0; // We must clear it according to POSIX.
+ app_id = strtoull (b, &e, 10);
+ if (errno == ERANGE || e == b || *e != '\0')
+ {
+ badreq ("invalid 'app-id' webhook query parameter value: '" +
+ *rp.value + '\'');
+ }
+ }
+ else if (rp.name == "warning")
+ {
+ if (!rp.value)
+ badreq ("missing 'warning' webhook query parameter value");
+
+ wa = true;
+ const string& v (*rp.value);
+
+ if (v == "success") warning_success = true;
+ else if (v == "failure") warning_success = false;
+ else
+ badreq ("invalid 'warning' webhook query parameter value: '" + v +
+ '\'');
+ }
+ }
+
+ if (!ai) badreq ("missing 'app-id' webhook query parameter");
+ if (!wa) badreq ("missing 'warning' webhook query parameter");
+ }
+
+ // There is a webhook event (specified in the x-github-event header) and
+ // each event contains a bunch of actions (specified in the JSON request
+ // body).
+ //
+ // Note: "GitHub continues to add new event types and new actions to
+ // existing event types." As a result we ignore known actions that we are
+ // not interested in and log and ignore unknown actions. The thinking here
+ // is that we want to be "notified" of new actions at which point we can
+ // decide whether to ignore them or to handle.
+ //
+ if (event == "check_suite")
+ {
+ gh_check_suite_event cs;
+ try
+ {
+ json::parser p (body.data (), body.size (), "check_suite event");
+
+ cs = gh_check_suite_event (p);
+ }
+ catch (const json::invalid_json_input& e)
+ {
+ string m ("malformed JSON in " + e.name + " request body");
+
+ error << m << ", line: " << e.line << ", column: " << e.column
+ << ", byte offset: " << e.position << ", error: " << e;
+
+ throw invalid_request (400, move (m));
+ }
+
+ if (cs.check_suite.app_id != app_id)
+ {
+ fail << "webhook check_suite app.id " << cs.check_suite.app_id
+ << " does not match app-id query parameter " << app_id;
+ }
+
+ if (cs.action == "requested")
+ {
+ // Branch pushes are handled in handle_branch_push() so ignore this
+ // event.
+ //
+ return true;
+ }
+ else if (cs.action == "rerequested")
+ {
+ // Someone manually requested to re-run all the check runs in this
+ // check suite. Treat as a new request.
+ //
+ return handle_check_suite_rerequest (move (cs), warning_success);
+ }
+ else if (cs.action == "completed")
+ {
+ // GitHub thinks that "all the check runs in this check suite have
+ // completed and a conclusion is available". Check with our own
+ // bookkeeping and log an error if there is a mismatch.
+ //
+ return handle_check_suite_completed (move (cs), warning_success);
+ }
+ else
+ {
+ // Ignore unknown actions by sending a 200 response with empty body
+ // but also log as an error since we want to notice new actions.
+ //
+ error << "unknown action '" << cs.action << "' in check_suite event";
+
+ return true;
+ }
+ }
+ else if (event == "check_run")
+ {
+ gh_check_run_event cr;
+ try
+ {
+ json::parser p (body.data (), body.size (), "check_run event");
+
+ cr = gh_check_run_event (p);
+ }
+ catch (const json::invalid_json_input& e)
+ {
+ string m ("malformed JSON in " + e.name + " request body");
+
+ error << m << ", line: " << e.line << ", column: " << e.column
+ << ", byte offset: " << e.position << ", error: " << e;
+
+ throw invalid_request (400, move (m));
+ }
+
+ if (cr.check_run.app_id != app_id)
+ {
+ fail << "webhook check_run app.id " << cr.check_run.app_id
+ << " does not match app-id query parameter " << app_id;
+ }
+
+ if (cr.action == "rerequested")
+ {
+ // Someone manually requested to re-run a specific check run.
+ //
+ return handle_check_run_rerequest (move (cr), warning_success);
+ }
+#if 0
+ // It looks like we shouldn't be receiving these since we are not
+ // subscribed to them.
+ //
+ else if (cr.action == "created" ||
+ cr.action == "completed" ||
+ cr.action == "requested_action")
+ {
+ }
+#endif
+ else
+ {
+ // Ignore unknown actions by sending a 200 response with empty body
+ // but also log as an error since we want to notice new actions.
+ //
+ error << "unknown action '" << cr.action << "' in check_run event";
+
+ return true;
+ }
+ }
+ else if (event == "pull_request")
+ {
+ gh_pull_request_event pr;
+ try
+ {
+ json::parser p (body.data (), body.size (), "pull_request event");
+
+ pr = gh_pull_request_event (p);
+ }
+ catch (const json::invalid_json_input& e)
+ {
+ string m ("malformed JSON in " + e.name + " request body");
+
+ error << m << ", line: " << e.line << ", column: " << e.column
+ << ", byte offset: " << e.position << ", error: " << e;
+
+ throw invalid_request (400, move (m));
+ }
+
+ // Store the app-id webhook query parameter in the gh_pull_request_event
+ // object (see gh_pull_request for an explanation).
+ //
+ // When we receive the other webhooks we do check that the app ids in
+ // the payload and query match but here we have to assume it is valid.
+ //
+ pr.pull_request.app_id = app_id;
+
+ if (pr.action == "opened" ||
+ pr.action == "synchronize")
+ {
+ // opened
+ // A pull request was opened.
+ //
+ // synchronize
+ // A pull request's head branch was updated from the base branch or
+ // new commits were pushed to the head branch. (Note that there is
+ // no equivalent event for the base branch.)
+ //
+ // Note that both cases are handled similarly: we start a new CI
+ // request which will be reported on the new commit id.
+ //
+ return handle_pull_request (move (pr), warning_success);
+ }
+ else if (pr.action == "edited")
+ {
+ // PR base branch changed (to a different branch) besides other
+ // irrelevant changes (title, body, etc).
+ //
+ // This is in a sense a special case of the base branch moving. In
+ // that case we don't do anything (due to the head sharing problem)
+ // relying instead on the branch protection rule. So it makes sense
+ // to do the same here.
+ //
+ return true;
+ }
+ else if (pr.action == "closed")
+ {
+ // PR has been closed (as merged or not; see merged member). Also
+ // apparently received if base branch is deleted (and the same
+ // for head branch). See also the reopened event below.
+ //
+ // While it may seem natural to cancel the CI for the closed PR, it
+ // might actually be useful to have a completed CI record. GitHub
+ // doesn't prevent us from publishing CI results for the closed PR
+ // (even if both base and head branches were deleted). And if such a
+ // PR is reopened, the CI results remain.
+ //
+ return true;
+ }
+ else if (pr.action == "reopened")
+ {
+ // Previously closed PR has been reopened.
+ //
+ // Since we don't cancel the CI for a closed PR, there is nothing
+ // to do if it is reopened.
+ //
+ return true;
+ }
+ else if (pr.action == "assigned" ||
+ pr.action == "auto_merge_disabled" ||
+ pr.action == "auto_merge_enabled" ||
+ pr.action == "converted_to_draft" ||
+ pr.action == "demilestoned" ||
+ pr.action == "dequeued" ||
+ pr.action == "enqueued" ||
+ pr.action == "labeled" ||
+ pr.action == "locked" ||
+ pr.action == "milestoned" ||
+ pr.action == "ready_for_review" ||
+ pr.action == "review_request_removed" ||
+ pr.action == "review_requested" ||
+ pr.action == "unassigned" ||
+ pr.action == "unlabeled" ||
+ pr.action == "unlocked")
+ {
+ // These have no relation to CI.
+ //
+ return true;
+ }
+ else
+ {
+ // Ignore unknown actions by sending a 200 response with empty body
+ // but also log as an error since we want to notice new actions.
+ //
+ error << "unknown action '" << pr.action << "' in pull_request event";
+
+ return true;
+ }
+ }
+ else if (event == "push")
+ {
+ // Push events are triggered by branch pushes, branch creation, and
+ // branch deletion.
+ //
+ gh_push_event ps;
+ try
+ {
+ json::parser p (body.data (), body.size (), "push event");
+
+ ps = gh_push_event (p);
+ }
+ catch (const json::invalid_json_input& e)
+ {
+ string m ("malformed JSON in " + e.name + " request body");
+
+ error << m << ", line: " << e.line << ", column: " << e.column
+ << ", byte offset: " << e.position << ", error: " << e;
+
+ throw invalid_request (400, move (m));
+ }
+
+ // Store the app-id webhook query parameter in the gh_push_event
+ // object (see gh_push_event for an explanation).
+ //
+ // When we receive the other webhooks we do check that the app ids in
+ // the payload and query match but here we have to assume it is valid.
+ //
+ ps.app_id = app_id;
+
+ // Note that the push request event has no action.
+ //
+ return handle_branch_push (move (ps), warning_success);
+ }
+ // Ignore marketplace_purchase events (sent by the GitHub Marketplace) by
+ // sending a 200 response with empty body. We offer a free plan only and
+ // do not support user accounts so there is nothing to be done.
+ //
+ else if (event == "marketplace_purchase")
+ {
+ return true;
+ }
+ // Ignore GitHub App installation events by sending a 200 response with
+ // empty body. These are triggered when a user installs a GitHub App in a
+ // repository or organization.
+ //
+ else if (event == "installation")
+ {
+ return true;
+ }
+ // Ignore ping events by sending a 200 response with empty body. This
+ // event occurs when you create a new webhook. The ping event is a
+ // confirmation from GitHub that you configured the webhook correctly. One
+ // of its triggers is listing an App on the GitHub Marketplace.
+ //
+ else if (event == "ping")
+ {
+ return true;
+ }
+ else
+ {
+ // Log to investigate.
+ //
+ error << "unexpected event '" << event << "'";
+
+ throw invalid_request (400, "unexpected event: '" + event + "'");
+ }
+ }
+
+ // Let's capitalize the synthetic conclusion check run name to make it
+ // easier to distinguish from the regular ones.
+ //
+ static const string conclusion_check_run_name ("CONCLUSION");
+
+ // Yellow circle.
+ //
+ static const string conclusion_building_title ("\U0001F7E1 IN PROGRESS");
+ static const string conclusion_building_summary (
+ "Waiting for all the builds to complete.");
+
+ // "Medium white" circle.
+ //
+ static const string check_run_queued_title ("\U000026AA QUEUED");
+ static const string check_run_queued_summary (
+ "Waiting for the build to start.");
+
+ // Yellow circle.
+ //
+ static const string check_run_building_title ("\U0001F7E1 BUILDING");
+ static const string check_run_building_summary (
+ "Waiting for the build to complete.");
+
+ // Return the colored circle corresponding to a result_status.
+ //
+ // Note: the rest of the title is produced by to_string(result_status).
+ //
+ static string
+ circle (result_status rs)
+ {
+ switch (rs)
+ {
+ case result_status::success: return "\U0001F7E2"; // Green circle.
+ case result_status::warning: return "\U0001F7E0"; // Orange circle.
+ case result_status::error:
+ case result_status::abort:
+ case result_status::abnormal: return "\U0001F534"; // Red circle.
+
+ // Valid values we should never encounter.
+ //
+ case result_status::skip:
+ case result_status::interrupt:
+ throw invalid_argument ("unexpected result_status value: " +
+ to_string (rs));
+ }
+
+ return ""; // Should never reach.
+ }
+
+ bool ci_github::
+ handle_branch_push (gh_push_event ps, bool warning_success)
+ {
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "push event { " << ps << " }";});
+
+ // Cancel the CI tenant associated with the overwritten/deleted previous
+ // head commit if this is a forced push or a branch deletion.
+ //
+ if (ps.forced || ps.deleted)
+ {
+ // Service id that will uniquely identify the CI tenant.
+ //
+ string sid (ps.repository.node_id + ':' + ps.before);
+
+ // Note that it's possible this commit still exists in another branch so
+ // we do refcount-aware cancel.
+ //
+ if (optional<tenant_service> ts = cancel (error, warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ "ci-github", sid,
+ true /* ref_count */))
+ {
+ l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to "
+ : "deletion of ")
+ << ps.ref << ": attempted to cancel CI of previous"
+ << " head commit with tenant_service id " << sid
+ << " (ref_count: " << ts->ref_count << ')';});
+ }
+ else
+ {
+ // It's possible that there was no CI for the previous commit for
+ // various reasons (e.g., CI was not enabled).
+ //
+ l3 ([&]{trace << (ps.forced ? "forced push " + ps.after + " to "
+ : "deletion of ")
+ << ps.ref << ": failed to cancel CI of previous"
+ << " head commit with tenant_service id " << sid;});
+ }
+ }
+
+ if (ps.deleted)
+ return true; // Do nothing further if this was a branch deletion.
+
+ // While we don't need the installation access token in this request,
+ // let's obtain it to flush out any permission issues early. Also, it is
+ // valid for an hour so we will most likely make use of it.
+ //
+ optional<string> jwt (generate_jwt (ps.app_id, trace, error));
+ if (!jwt)
+ throw server_error ();
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (ps.installation.id,
+ move (*jwt),
+ error));
+ if (!iat)
+ throw server_error ();
+
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ // While it would have been nice to cancel CIs of PRs with this branch as
+ // base not to waste resources, there are complications: Firstly, we can
+ // only do this for remote PRs (since local PRs will most likely share the
+ // result with branch push). Secondly, we try to do our best even if the
+ // branch protection rule for head behind is not enabled. In this case, it
+ // would be good to complete the CI. So maybe/later. See also the head
+ // case in handle_pull_request(), where we do cancel remote PRs that are
+ // not shared.
+
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (ps.repository.node_id + ':' + ps.after);
+
+ service_data sd (warning_success,
+ iat->token,
+ iat->expires_at,
+ ps.app_id,
+ ps.installation.id,
+ move (ps.repository.node_id),
+ move (ps.repository.clone_url),
+ service_data::local,
+ false /* pre_check */,
+ false /* re_requested */,
+ ps.after /* check_sha */,
+ ps.after /* report_sha */);
+
+ // Create an unloaded CI tenant, doing nothing if one already exists
+ // (which could've been created by handle_pull_request() or by us as a
+ // result of a push to another branch). Note that the tenant's reference
+ // count is incremented in all cases.
+ //
+ // Note: use no delay since we need to (re)create the synthetic conclusion
+ // check run as soon as possible.
+ //
+ // Note that we use the create() API instead of start() since duplicate
+ // management is not available in start().
+ //
+ // After this call we will start getting the build_unloaded()
+ // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+ // it gets archived after some timeout.
+ //
+ if (!create (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ tenant_service (sid, "ci-github", sd.json ()),
+ chrono::seconds (30) /* interval */,
+ chrono::seconds (0) /* delay */,
+ duplicate_tenant_mode::ignore))
+ {
+ fail << "push " + ps.after + " to " + ps.ref
+ << ": unable to create unloaded CI tenant";
+ }
+
+ return true;
+ }
+
+ // Miscellaneous pull request facts
+ //
+ // - Although some of the GitHub documentation makes it sound like they
+ // expect check runs to be added to both the PR head commit and the merge
+ // commit, the PR UI does not react to the merge commit's check runs
+ // consistently. It actually seems to be quite broken. The only thing it
+ // does seem to do reliably is blocking the PR merge if the merge commit's
+ // check runs are not successful (i.e, overriding the PR head commit's
+ // check runs). But the UI looks quite messed up generally in this state.
+ //
+ // - When new commits are added to a PR base branch, pull_request.base.sha
+ // does not change, but the test merge commit will be updated to include
+ // the new commits to the base branch.
+ //
+ // - When new commits are added to a PR head branch, pull_request.head.sha
+ // gets updated with the head commit's SHA and check_suite.pull_requests[]
+ // will contain all PRs with this branch as head.
+ //
+ bool ci_github::
+ handle_pull_request (gh_pull_request_event pr, bool warning_success)
+ {
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "pull_request event { " << pr << " }";});
+
+ // While we don't need the installation access token in this request,
+ // let's obtain it to flush out any permission issues early. Also, it is
+ // valid for an hour so we will most likely make use of it.
+ //
+ optional<string> jwt (generate_jwt (pr.pull_request.app_id, trace, error));
+ if (!jwt)
+ throw server_error ();
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (pr.installation.id,
+ move (*jwt),
+ error));
+ if (!iat)
+ throw server_error ();
+
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ // Distinguish between local and remote PRs by comparing the head and base
+ // repositories' paths.
+ //
+ service_data::kind_type kind (
+ pr.pull_request.head_path == pr.pull_request.base_path
+ ? service_data::local
+ : service_data::remote);
+
+ // Note that similar to the branch push case above, while it would have
+ // been nice to cancel the previous CI job once the PR head moves (the
+ // "synchronize" event), due to the head sharing problem the previous CI
+ // job might actually still be relevant (in both local and remote PR
+ // cases). So we only do it for the remote PRs and only if the head is not
+ // shared (via tenant reference counting).
+ //
+ if (kind == service_data::remote && pr.action == "synchronize")
+ {
+ if (pr.before)
+ {
+ // Service id that will uniquely identify the CI tenant.
+ //
+ string sid (pr.repository.node_id + ':' + *pr.before);
+
+ if (optional<tenant_service> ts = cancel (error, warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ "ci-github", sid,
+ true /* ref_count */))
+ {
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": attempted to cancel CI of previous head commit"
+ << " (ref_count: " << ts->ref_count << ')';});
+ }
+ else
+ {
+ // It's possible that there was no CI for the previous commit for
+ // various reasons (e.g., CI was not enabled).
+ //
+ l3 ([&]{trace << "pull request " << pr.pull_request.node_id
+ << ": failed to cancel CI of previous head commit "
+ << "with tenant_service id " << sid;});
+ }
+ }
+ else
+ {
+ error << "pull request " << pr.pull_request.node_id
+ << ": before commit is missing in synchronize event";
+ }
+ }
+
+ // Note: for remote PRs the check_sha will be set later, in
+ // build_unloaded_pre_check(), to test merge commit id.
+ //
+ string check_sha (kind == service_data::local
+ ? pr.pull_request.head_sha
+ : "");
+
+ // Note that PR rebuilds (re-requested) are handled by
+ // handle_check_suite_rerequest().
+ //
+ // Note that, in the case of a remote PR, GitHub will copy the PR head
+ // commit from the head (forked) repository into the base repository. So
+ // the check runs must always be added to the base repository, whether the
+ // PR is local or remote. The head commit refs are located at
+ // refs/pull/<PR-number>/head.
+ //
+ service_data sd (warning_success,
+ move (iat->token),
+ iat->expires_at,
+ pr.pull_request.app_id,
+ pr.installation.id,
+ move (pr.repository.node_id),
+ move (pr.repository.clone_url),
+ kind, true /* pre_check */, false /* re_request */,
+ move (check_sha),
+ move (pr.pull_request.head_sha) /* report_sha */,
+ pr.pull_request.node_id,
+ pr.pull_request.number);
+
+ // Create an unloaded CI tenant for the pre-check phase (during which we
+ // wait for the PR's merge commit and behindness to become available).
+ //
+ // Create with an empty service id so that the generated tenant id is used
+ // instead during the pre-check phase (so as not to clash with a proper
+ // service id for this head commit, potentially created in
+ // handle_branch_push() or as another PR).
+ //
+ tenant_service ts ("", "ci-github", sd.json ());
+
+ // Note: use no delay since we need to start the actual CI (which in turn
+ // (re)creates the synthetic conclusion check run) as soon as possible.
+ //
+ // After this call we will start getting the build_unloaded()
+ // notifications -- which will be routed to build_unloaded_pre_check() --
+ // until we cancel the tenant or it gets archived after some timeout.
+ // (Note that we never actually load this request, we always cancel it;
+ // see build_unloaded_pre_check() for details.)
+ //
+ if (!create (error,
+ warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ move (ts),
+ chrono::seconds (30) /* interval */,
+ chrono::seconds (0) /* delay */))
+ {
+ fail << "pull request " << pr.pull_request.node_id
+ << ": unable to create unloaded pre-check tenant";
+ }
+
+ return true;
+ }
+
+ bool ci_github::
+ handle_check_suite_rerequest (gh_check_suite_event cs, bool warning_success)
+ {
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "check_suite event { " << cs << " }";});
+
+ assert (cs.action == "rerequested");
+
+ // While we don't need the installation access token in this request,
+ // let's obtain it to flush out any permission issues early. Also, it is
+ // valid for an hour so we will most likely make use of it.
+ //
+ optional<string> jwt (generate_jwt (cs.check_suite.app_id, trace, error));
+ if (!jwt)
+ throw server_error ();
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (cs.installation.id,
+ move (*jwt),
+ error));
+ if (!iat)
+ throw server_error ();
+
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
+
+ // If the user requests a rebuild of the (entire) PR, then this manifests
+ // as the check_suite rather than pull_request event. Specifically:
+ //
+ // - For a local PR, this event is shared with the branch push and all we
+ // need to do is restart the CI for the head commit.
+ //
+ // - For a remote PR, this event will have no gh_check_suite::head_branch.
+ // In this case we need to load the existing service data for this head
+ // commit, extract the test merge commit, and restart the CI for that.
+ //
+ // Note that it's possible the base branch has moved in the meantime and
+ // ideally we would want to re-request the test merge commit, etc.
+ // However, this will only be necessary if the user does not follow our
+ // recommendation of enabling the head-behind-base protection. And it
+ // seems all this extra complexity would not be warranted.
+ //
+ string check_sha;
+ service_data::kind_type kind;
+
+ if (!cs.check_suite.head_branch)
+ {
+ // Rebuild of remote PR.
+ //
+ kind = service_data::remote;
+
+ if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
+ {
+ tenant_service& ts (d->service);
+
+ try
+ {
+ service_data sd (*ts.data);
+ check_sha = move (sd.check_sha); // Test merge commit.
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "failed to parse service data: " << e;
+ }
+ }
+ else
+ {
+ error << "check suite " << cs.check_suite.node_id
+ << " for remote pull request:"
+ << " re-requested but tenant_service with id " << sid
+ << " did not exist";
+ return true;
+ }
+ }
+ else
+ {
+ // Rebuild of branch push or local PR.
+ //
+ kind = service_data::local;
+ check_sha = cs.check_suite.head_sha;
+ }
+
+ service_data sd (warning_success,
+ iat->token,
+ iat->expires_at,
+ cs.check_suite.app_id,
+ cs.installation.id,
+ move (cs.repository.node_id),
+ move (cs.repository.clone_url),
+ kind, false /* pre_check */, true /* re_requested */,
+ move (check_sha),
+ move (cs.check_suite.head_sha) /* report_sha */);
+
+ // Replace the existing CI tenant if it exists.
+ //
+ // Note that GitHub UI does not allow re-running the entire check suite
+ // until all the check runs are completed.
+ //
+
+ // Create an unloaded CI tenant.
+ //
+ // Note: use no delay since we need to (re)create the synthetic conclusion
+ // check run as soon as possible.
+ //
+ // Note that we use the create() API instead of start() since duplicate
+ // management is not available in start().
+ //
+ // After this call we will start getting the build_unloaded()
+ // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+ // it gets archived after some timeout.
+ //
+ auto pr (create (error,
+ warn,
+ verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ tenant_service (sid, "ci-github", sd.json ()),
+ chrono::seconds (30) /* interval */,
+ chrono::seconds (0) /* delay */,
+ duplicate_tenant_mode::replace));
+
+ if (!pr)
+ {
+ fail << "check suite " << cs.check_suite.node_id
+ << ": unable to create unloaded CI tenant";
+ }
+
+ if (pr->second == duplicate_tenant_result::created)
+ {
+ error << "check suite " << cs.check_suite.node_id
+ << ": re-requested but tenant_service with id " << sid
+ << " did not exist";
+ return true;
+ }
+
+ return true;
+ }
+
+ bool ci_github::
+ handle_check_suite_completed (gh_check_suite_event cs, bool warning_success)
+ {
+ // The plans is as follows:
+ //
+ // 1. Load the service data.
+ //
+ // 2. Verify it is completed.
+ //
+ // 3. Verify the check run counts match.
+ //
+ // 4. Verify (like in build_built()) that all the check runs are
+ // completed.
+ //
+ // 5. Verify the result matches what GitHub thinks it is.
+
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "check_suite event { " << cs << " }";});
+
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (cs.repository.node_id + ':' + cs.check_suite.head_sha);
+
+ // The common log entry subject.
+ //
+ string sub ("check suite " + cs.check_suite.node_id + '/' + sid);
+
+ // Load the service data.
+ //
+ service_data sd;
+
+ if (optional<tenant_data> d = find (*build_db_, "ci-github", sid))
+ {
+ try
+ {
+ sd = service_data (*d->service.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "failed to parse service data: " << e;
+ }
+ }
+ else
+ {
+ error << sub << ": tenant_service does not exist";
+ return true;
+ }
+
+ // Verify the completed flag and the number of check runs.
+ //
+ if (!sd.completed)
+ {
+ error << sub << " service data complete flag is false";
+ return true;
+ }
+
+ // Received count will be one higher because we don't store the conclusion
+ // check run.
+ //
+ size_t check_runs_count (sd.check_runs.size () + 1);
+
+ if (check_runs_count == 1)
+ {
+ error << sub << ": no check runs in service data";
+ return true;
+ }
+
+ if (cs.check_suite.check_runs_count != check_runs_count)
+ {
+ error << sub << ": check runs count " << cs.check_suite.check_runs_count
+ << " does not match service data count " << check_runs_count;
+ return true;
+ }
+
+ // Verify that all the check runs are built and compute the summary
+ // conclusion.
+ //
+ result_status conclusion (result_status::success);
+
+ for (const check_run& cr: sd.check_runs)
+ {
+ if (cr.state == build_state::built)
+ {
+ assert (cr.status.has_value ());
+ conclusion |= *cr.status;
+ }
+ else
+ {
+ error << sub << ": unbuilt check run in service data";
+ return true;
+ }
+ }
+
+ // Verify the conclusion.
+ //
+ if (!cs.check_suite.conclusion)
+ {
+ error << sub << ": absent conclusion in completed check suite";
+ return true;
+ }
+
+ // Note that the case mismatch is due to GraphQL (gh_conclusion())
+ // requiring uppercase conclusion values while the received webhook values
+ // are lower case.
+ //
+ string gh_conclusion (gh_to_conclusion (conclusion, warning_success));
+
+ if (icasecmp (*cs.check_suite.conclusion, gh_conclusion) != 0)
+ {
+ error << sub << ": conclusion " << *cs.check_suite.conclusion
+ << " does not match service data conclusion " << gh_conclusion;
+ return true;
+ }
+
+ return true;
+ }
+
+ // Make a check run summary from a CI start_result.
+ //
+ static string
+ to_check_run_summary (const optional<ci_start::start_result>& r)
+ {
+ string s;
+
+ s = "```\n";
+ if (r) s += r->message;
+ else s += "Internal service error";
+ s += "\n```";
+
+ return s;
+ }
+
+ // Create a gq_built_result.
+ //
+ // Throw invalid_argument in case of invalid result_status.
+ //
+ static gq_built_result
+ make_built_result (result_status rs, bool warning_success, string message)
+ {
+ string title (circle (rs == result_status::warning && !warning_success
+ ? result_status::error
+ : rs));
+ title += ' ';
+ title += ucase (to_string (rs));
+
+ return {gh_to_conclusion (rs, warning_success),
+ move (title),
+ move (message)};
+ }
+
+ // Parse a check run details URL into a build_id.
+ //
+ // Return nullopt if the URL is invalid.
+ //
+ static optional<build_id>
+ parse_details_url (const string& details_url);
+
+ // Note that GitHub always posts a message to their GUI saying "You have
+ // successfully requested <check_run_name> be rerun", regardless of what
+ // HTTP status code we respond with. However we do return error status codes
+ // when there is no better option (like failing the conclusion) in case they
+ // start handling them someday.
+ //
+ bool ci_github::
+ handle_check_run_rerequest (const gh_check_run_event& cr,
+ bool warning_success)
+ {
+ HANDLER_DIAG;
+
+ l3 ([&]{trace << "check_run event { " << cr << " }";});
+
+ // The overall plan is as follows:
+ //
+ // 1. Load service data.
+ //
+ // 2. If the tenant is archived, then fail (re-create) both the check run
+ // and the conclusion with appropriate diagnostics.
+ //
+ // 3. If the check run is in the queued state, then do nothing.
+ //
+ // 4. Re-create the check run in the queued state and the conclusion in
+ // the building state. Note: do in a single request to make sure we
+ // either "win" or "loose" the potential race for both (important
+ // for #7).
+ //
+ // 5. Call the rebuild() function to attempt to schedule a rebuild. Pass
+ // the update function that does the following (if called):
+ //
+ // a. Save new node ids.
+ //
+ // b. Update the check run state (may also not exist).
+ //
+ // c. Clear the completed flag if true.
+ //
+ // 6. If the result of rebuild() indicates the tenant is archived, then
+ // fail (update) both the check run and conclusion with appropriate
+ // diagnostics.
+ //
+ // 7. If original state is queued (no rebuild was scheduled), then fail
+ // (update) both the check run and the conclusion.
+ //
+ // Note that while conceptually we are updating existing check runs, in
+ // practice we have to re-create as new check runs in order to replace the
+ // existing ones because GitHub does not allow transitioning out of the
+ // built state.
+
+ // Get a new installation access token.
+ //
+ auto get_iat = [this, &trace, &error, &cr] ()
+ -> optional<gh_installation_access_token>
+ {
+ optional<string> jwt (generate_jwt (cr.check_run.app_id, trace, error));
+ if (!jwt)
+ return nullopt;
+
+ optional<gh_installation_access_token> iat (
+ obtain_installation_access_token (cr.installation.id,
+ move (*jwt),
+ error));
+
+ if (iat)
+ l3 ([&]{trace << "installation_access_token { " << *iat << " }";});
+
+ return iat;
+ };
+
+ const string& repo_node_id (cr.repository.node_id);
+ const string& head_sha (cr.check_run.check_suite.head_sha);
+
+ // Prepare the build and conclusion check runs. They are sent to GitHub in
+ // a single request (unless something goes wrong) so store them together
+ // from the outset.
+ //
+ brep::check_runs check_runs (2);
+ check_run& bcr (check_runs[0]); // Build check run
+ check_run& ccr (check_runs[1]); // Conclusion check run
+
+ ccr.name = conclusion_check_run_name;
+
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ // Load the service data, failing the check runs if the tenant has been
+ // archived.
+ //
+ service_data sd;
+ string tenant_id;
+ {
+ // Service id that uniquely identifies the CI tenant.
+ //
+ string sid (repo_node_id + ':' + head_sha);
+
+ optional<tenant_data> d (find (*build_db_, "ci-github", sid));
+ if (!d)
+ {
+ // No such tenant.
+ //
+ fail << "check run " << cr.check_run.node_id
+ << " re-requested but tenant_service with id " << sid
+ << " does not exist";
+ }
+
+ tenant_service& ts (d->service);
+
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ fail << "failed to parse service data: " << e;
+ }
+
+ if (!sd.conclusion_node_id)
+ fail << "no conclusion node id for check run " << cr.check_run.node_id;
+
+ tenant_id = d->tenant_id;
+
+ // Get a new IAT if the one from the service data has expired.
+ //
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if ((new_iat = get_iat ()))
+ iat = &*new_iat;
+ else
+ throw server_error ();
+ }
+ else
+ iat = &sd.installation_access;
+
+ if (d->archived) // Tenant is archived
+ {
+ // Fail (update) the check runs.
+ //
+ gq_built_result br (
+ make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has "
+ "been archived"));
+
+ // Try to update the conclusion check run even if the first update
+ // fails.
+ //
+ bool f (false); // Failed.
+
+ if (gq_update_check_run (error, bcr, iat->token,
+ repo_node_id, cr.check_run.node_id,
+ br))
+ {
+ l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to update check run";
+ f = true;
+ }
+
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *sd.conclusion_node_id,
+ move (br)))
+ {
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ error << "check_run " << cr.check_run.node_id
+ << ": unable to update conclusion check run";
+ f = true;
+ }
+
+ // Fail the handler if either of the check runs could not be
+ // updated.
+ //
+ if (f)
+ throw server_error ();
+
+ return true;
+ }
+ }
+
+ // Fail if it's the conclusion check run that is being re-requested.
+ //
+ // Expect that if the user selects re-run all failed checks we will
+ // receive multiple check runs, one of which will be the conclusion. And
+ // if we fail it while it happens to arrive last, then we will end up in
+ // the wrong overall state (real check run is building while conclusion is
+ // failed). It seems the best we can do is to ignore it: if the user did
+ // request a rebuild of the conclusion check run explicitly, there will be
+ // no change, which is not ideal but is still an indication that this
+ // operation is not supported.
+ //
+ if (cr.check_run.name == conclusion_check_run_name)
+ {
+ l3 ([&]{trace << "re-requested conclusion check_run";});
+
+#if 0
+ if (!sd.conclusion_node_id)
+ fail << "no conclusion node id for check run " << cr.check_run.node_id;
+
+ gq_built_result br (
+ make_built_result (result_status::error, warning_success,
+ "Conclusion check run cannot be rebuilt"));
+
+ // Fail (update) the conclusion check run.
+ //
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *sd.conclusion_node_id,
+ move (br)))
+ {
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ fail << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run "
+ << *sd.conclusion_node_id;
+ }
+#endif
+
+ return true;
+ }
+
+ // Parse the check_run's details_url to extract build id.
+ //
+ // While this is a bit hackish, there doesn't seem to be a better way
+ // (like associating custom data with a check run). Note that the GitHub
+ // UI only allows rebuilding completed check runs, so the details URL
+ // should be there.
+ //
+ optional<build_id> bid (parse_details_url (cr.check_run.details_url));
+ if (!bid)
+ {
+ fail << "check run " << cr.check_run.node_id
+ << ": failed to extract build id from details_url";
+ }
+
+ // Initialize the check run (`bcr`) with state from the service data.
+ //
+ {
+ // Search for the check run in the service data.
+ //
+ // Note that we look by name in case node id got replaced by a racing
+ // re-request (in which case we ignore this request).
+ //
+ auto i (find_if (sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
+
+ if (i == sd.check_runs.end ())
+ fail << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
+
+ // Do nothing if node ids don't match.
+ //
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ l3 ([&]{trace << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "node id has changed in service data";});
+ return true;
+ }
+
+ // Do nothing if the build is already queued.
+ //
+ if (i->state == build_state::queued)
+ {
+ l3 ([&]{trace << "ignoring already-queued check run";});
+ return true;
+ }
+
+ bcr.name = i->name;
+ bcr.build_id = i->build_id;
+ bcr.state = i->state;
+ }
+
+ // Transition the build and conclusion check runs out of the built state
+ // (or any other state) by re-creating them.
+ //
+ bcr.state = build_state::queued;
+ bcr.state_synced = false;
+ bcr.details_url = cr.check_run.details_url;
+ bcr.description = {check_run_queued_title, check_run_queued_summary};
+
+ ccr.state = build_state::building;
+ ccr.state_synced = false;
+ ccr.details_url = details_url (tenant_id);
+ ccr.description = {conclusion_building_title,
+ conclusion_building_summary};
+
+ if (gq_create_check_runs (error, check_runs, iat->token,
+ cr.check_run.app_id, repo_node_id, head_sha,
+ options_->build_queued_batch ()))
+ {
+ assert (bcr.state == build_state::queued);
+ assert (ccr.state == build_state::building);
+
+ l3 ([&]{trace << "created check_run { " << bcr << " }";});
+ l3 ([&]{trace << "created conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ fail << "check run " << cr.check_run.node_id
+ << ": unable to re-create build and conclusion check runs";
+ }
+
+ // Request the rebuild and update service data.
+ //
+ bool race (false);
+
+ // Callback function called by rebuild() to update the service data (but
+ // only if the build is actually restarted).
+ //
+ auto update_sd = [&error, &new_iat, &race,
+ tenant_id = move (tenant_id),
+ &cr, &bcr, &ccr] (const string& ti,
+ const tenant_service& ts,
+ build_state) -> optional<string>
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
+
+ race = false; // Reset.
+
+ if (tenant_id != ti)
+ {
+ // The tenant got replaced since we loaded it but we managed to
+ // trigger a rebuild in the new tenant. Who knows whose check runs are
+ // visible, so let's fail ours similar to the cases below.
+ //
+ race = true;
+ return nullopt;
+ }
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
+
+ // Note that we again look by name in case node id got replaced by a
+ // racing re-request. In this case, however, it's impossible to decide
+ // who won that race, so let's fail the check suite to be on the safe
+ // side (in a sense, similar to the rebuild() returning queued below).
+ //
+ auto i (find_if (
+ sd.check_runs.begin (), sd.check_runs.end (),
+ [&cr] (const check_run& scr)
+ {
+ return scr.name == cr.check_run.name;
+ }));
+
+ if (i == sd.check_runs.end ())
+ {
+ error << "check_run " << cr.check_run.node_id
+ << " (" << cr.check_run.name << "): "
+ << "re-requested but does not exist in service data";
+ return nullopt;
+ }
+
+ if (i->node_id && *i->node_id != cr.check_run.node_id)
+ {
+ // Keep the old conclusion node id to make sure any further state
+ // transitions are ignored. A bit of a hack.
+ //
+ race = true;
+ return nullopt;
+ }
+
+ *i = bcr; // Update with new node_id, state, state_synced.
+
+ sd.conclusion_node_id = ccr.node_id;
+ sd.completed = false;
+
+ // Save the IAT if we created a new one.
+ //
+ if (new_iat)
+ sd.installation_access = *new_iat;
+
+ return sd.json ();
+ };
+
+ optional<build_state> bs (
+ rebuild (*build_db_, retry_max_, *bid, update_sd));
+
+ // If the build has been archived or re-enqueued since we loaded the
+ // service data, fail (by updating) both the build check run and the
+ // conclusion check run. Otherwise the build has been successfully
+ // re-enqueued so do nothing further.
+ //
+ if (!race && bs && *bs != build_state::queued)
+ return true;
+
+ gq_built_result br; // Built result for both check runs.
+
+ if (race || bs) // Race or re-enqueued.
+ {
+ // The re-enqueued case: this build has been re-enqueued since we first
+ // loaded the service data. This could happen if the user clicked
+ // "re-run" multiple times and another handler won the rebuild() race.
+ //
+ // However the winner of the check runs race cannot be determined.
+ //
+ // Best case the other handler won the check runs race as well and
+ // thus everything will proceed normally. Our check runs will be
+ // invisible and disregarded.
+ //
+ // Worst case we won the check runs race and the other handler's check
+ // runs -- the ones that will be updated by the build_*() notifications
+ // -- are no longer visible, leaving things quite broken.
+ //
+ // Either way, we fail our check runs. In the best case scenario it
+ // will have no effect; in the worst case scenario it lets the user
+ // know something has gone wrong.
+ //
+ br = make_built_result (result_status::error, warning_success,
+ "Unable to rebuild, try again");
+ }
+ else // Archived.
+ {
+ // The build has expired since we loaded the service data. Most likely
+ // the tenant has been archived.
+ //
+ br = make_built_result (
+ result_status::error, warning_success,
+ "Unable to rebuild individual configuration: build has been archived");
+ }
+
+ // Try to update the conclusion check run even if the first update fails.
+ //
+ bool f (false); // Failed.
+
+ // Fail the build check run.
+ //
+ if (gq_update_check_run (error, bcr, iat->token,
+ repo_node_id, *bcr.node_id,
+ br))
+ {
+ l3 ([&]{trace << "updated check_run { " << bcr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update (replacement) check run "
+ << *bcr.node_id;
+ f = true;
+ }
+
+ // Fail the conclusion check run.
+ //
+ if (gq_update_check_run (error, ccr, iat->token,
+ repo_node_id, *ccr.node_id,
+ move (br)))
+ {
+ l3 ([&]{trace << "updated conclusion check_run { " << ccr << " }";});
+ }
+ else
+ {
+ error << "check run " << cr.check_run.node_id
+ << ": unable to update conclusion check run " << *ccr.node_id;
+ f = true;
+ }
+
+ // Fail the handler if either of the check runs could not be updated.
+ //
+ if (f)
+ throw server_error ();
+
+ return true;
+ }
+
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded (const string& ti,
+ tenant_service&& ts,
+ const diag_epilogue& log_writer) const noexcept
+ {
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullptr;
+ }
+
+ return sd.pre_check
+ ? build_unloaded_pre_check (move (ts), move (sd), log_writer)
+ : build_unloaded_load (ti, move (ts), move (sd), log_writer);
+ }
+
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded_pre_check (tenant_service&& ts,
+ service_data&& sd,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+ //
+ // In a few places where invalid_argument is unlikely to be thrown and/or
+ // would indicate that things are seriously broken we let it propagate to
+ // the function catch block where the pre-check tenant will be canceled
+ // (otherwise we could end up in an infinite loop, e.g., because the
+ // problematic arguments won't change).
+
+ NOTIFICATION_DIAG (log_writer);
+
+ // We get here for PRs only (but both local and remote). The overall
+ // plan is as follows:
+ //
+ // 1. Ask for the mergeability/behind status/test merge commit.
+ //
+ // 2. If not ready, get called again.
+ //
+ // 3. If not mergeable, behind, or different head (head changed while
+ // waiting for merge commit and thus differs from what's in the
+ // service_data), cancel the pre-check tenant and do nothing.
+ //
+ // 4. Otherwise, create an unloaded CI tenant and cancel ourselves. Note
+ // that all re-requested cases are handled elsewhere.
+ //
+ // Note that in case of a mixed local/remote case, whether we CI the head
+ // commit or test merge commit will be racy and there is nothing we can do
+ // about (the purely local case can get "upgraded" to mixed after we have
+ // started the CI job).
+ //
+
+ // Request PR pre-check info (triggering the generation of the test merge
+ // commit on the GitHub's side).
+ //
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ optional<gq_pr_pre_check_info> pc (
+ gq_fetch_pull_request_pre_check_info (error,
+ sd.installation_access.token,
+ *sd.pr_node_id));
+
+ if (!pc)
+ {
+ // Test merge commit not available yet: get called again to retry.
+ //
+ return nullptr;
+ }
+
+ // Create the CI tenant if nothing is wrong, otherwise issue diagnostics.
+ //
+ if (pc->behind)
+ {
+ l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+ << ": head is behind base";});
+ }
+ else if (!pc->merge_commit_sha)
+ {
+ l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+ << ": not auto-mergeable";});
+ }
+ else if (pc->head_sha != sd.report_sha)
+ {
+ l3 ([&]{trace << "ignoring pull request " << *sd.pr_node_id
+ << ": head commit has changed";});
+ }
+ else
+ {
+ // Create the CI tenant by reusing the pre-check service data.
+ //
+ sd.pre_check = false;
+
+ // Set the service data's check_sha if this is a remote PR. The test
+ // merge commit refs are located at refs/pull/<PR-number>/merge.
+ //
+ if (sd.kind == service_data::remote)
+ sd.check_sha = *pc->merge_commit_sha;
+
+ // Service id that will uniquely identify the CI tenant.
+ //
+ string sid (sd.repository_node_id + ':' + sd.report_sha);
+
+ // Create an unloaded CI tenant, doing nothing if one already exists
+ // (which could've been created by a head branch push or another PR
+ // sharing the same head commit). Note that the tenant's reference count
+ // is incremented in all cases.
+ //
+ // Note: use no delay since we need to (re)create the synthetic
+ // conclusion check run as soon as possible.
+ //
+ // Note that we use the create() API instead of start() since duplicate
+ // management is not available in start().
+ //
+ // After this call we will start getting the build_unloaded()
+ // notifications until (1) we load the tenant, (2) we cancel it, or (3)
+ // it gets archived after some timeout.
+ //
+ try
+ {
+ if (auto pr = create (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ tenant_service (sid, "ci-github", sd.json ()),
+ chrono::seconds (30) /* interval */,
+ chrono::seconds (0) /* delay */,
+ duplicate_tenant_mode::ignore))
+ {
+ if (pr->second == duplicate_tenant_result::ignored)
+ {
+ // This PR is sharing a head commit with something else.
+ //
+ // If this is a local PR then it's probably the branch push, which
+ // is expected, so do nothing.
+ //
+ // If this is a remote PR then it could be anything (branch push,
+ // local PR, or another remote PR) which in turn means the CI
+ // result may end up being for head, not merge commit. There is
+ // nothing we can do about it on our side (the user can enable the
+ // head-behind-base protection on their side).
+ //
+ if (sd.kind == service_data::remote)
+ {
+ l3 ([&]{trace << "remote pull request " << *sd.pr_node_id
+ << ": CI tenant already exists for " << sid;});
+ }
+ }
+ }
+ else
+ {
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to create unloaded CI tenant "
+ << "with tenant_service id " << sid;
+
+ // Fall through to cancel.
+ }
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to create unloaded CI tenant "
+ << "with tenant_service id " << sid
+ << ": " << e.what ();
+
+ // Fall through to cancel.
+ }
+ }
+
+ // Cancel the pre-check tenant.
+ //
+ try
+ {
+ if (!cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ ts.type,
+ ts.id))
+ {
+ // Should never happen (no such tenant).
+ //
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to cancel pre-check tenant with tenant_service id "
+ << ts.id;
+ }
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ error << "pull request " << *sd.pr_node_id
+ << ": failed to cancel pre-check tenant with tenant_service id "
+ << ts.id << ": " << e.what ();
+ }
+
+ return nullptr;
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+ error << "pull request " << *sd.pr_node_id
+ << ": unhandled exception: " << e.what ();
+
+ // Cancel the pre-check tenant otherwise we could end up in an infinite
+ // loop (see top of function).
+ //
+ try
+ {
+ if (cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ ts.type,
+ ts.id))
+ l3 ([&]{trace << "canceled pre-check tenant " << ts.id;});
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ l3 ([&]{trace << "failed to cancel pre-check tenant " << ts.id << ": "
+ << e.what ();});
+ }
+
+ return nullptr;
+ }
+
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_unloaded_load (const string& tenant_id,
+ tenant_service&& ts,
+ service_data&& sd,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+ //
+ // In a few places where invalid_argument is unlikely to be thrown and/or
+ // would indicate that things are seriously broken we let it propagate to
+ // the function catch block where the tenant will be canceled (otherwise
+ // we could end up in an infinite loop, e.g., because the problematic
+ // arguments won't change).
+
+ NOTIFICATION_DIAG (log_writer);
+
+ // Load the tenant, which is essentially the same for both branch push and
+ // PR. The overall plan is as follows:
+ //
+ // - Create synthetic conclusion check run with the in-progress state. If
+ // unable to, get called again to re-try.
+ //
+ // - Load the tenant. If unable to, fail the conclusion check run.
+ //
+ // - Update service data.
+ //
+
+ // Get a new installation access token if the current one has expired.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+ {
+ new_iat = obtain_installation_access_token (sd.installation_id,
+ move (*jwt),
+ error);
+ if (new_iat)
+ iat = &*new_iat;
+ }
+ }
+ else
+ iat = &sd.installation_access;
+
+ if (iat == nullptr)
+ return nullptr; // Try again on the next call.
+
+ // Create a synthetic check run with an in-progress state. Return the
+ // check run on success or nullopt on failure.
+ //
+ auto create_synthetic_cr = [&tenant_id,
+ iat,
+ &sd,
+ &error,
+ this] (string name,
+ const string& title,
+ const string& summary)
+ -> optional<check_run>
+ {
+ check_run cr;
+ cr.name = move (name);
+
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ if (gq_create_check_run (error,
+ cr,
+ iat->token,
+ sd.app_id,
+ sd.repository_node_id,
+ sd.report_sha,
+ details_url (tenant_id),
+ build_state::building,
+ title, summary))
+ {
+ return cr;
+ }
+ else
+ return nullopt;
+ };
+
+ // Update a synthetic check run with success or failure. Return the check
+ // run on success or nullopt on failure.
+ //
+ auto update_synthetic_cr = [iat,
+ &sd,
+ &error] (const string& node_id,
+ const string& name,
+ result_status rs,
+ string summary) -> optional<check_run>
+ {
+ assert (!node_id.empty ());
+
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ gq_built_result br (
+ make_built_result (rs, sd.warning_success, move (summary)));
+
+ check_run cr;
+ cr.name = name; // For display purposes only.
+
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ if (gq_update_check_run (error,
+ cr,
+ iat->token,
+ sd.repository_node_id,
+ node_id,
+ move (br)))
+ {
+ assert (cr.state == build_state::built);
+ return cr;
+ }
+ else
+ return nullopt;
+ };
+
+ // (Re)create the synthetic conclusion check run first in order to convert
+ // a potentially completed check suite to building as early as possible.
+ //
+ // Note that there is a window between receipt of a check_suite or
+ // pull_request event and the first bot/worker asking for a task, which
+ // could be substantial. We could probably (also) try to (re)create the
+ // conclusion checkrun in the webhook handler. @@ Maybe/later.
+ //
+ string conclusion_node_id; // Conclusion check run node ID.
+
+ if (!sd.conclusion_node_id)
+ {
+ if (auto cr = create_synthetic_cr (conclusion_check_run_name,
+ conclusion_building_title,
+ conclusion_building_summary))
+ {
+ l3 ([&]{trace << "created check_run { " << *cr << " }";});
+
+ conclusion_node_id = move (*cr->node_id);
+ }
+ }
+
+ const string& effective_conclusion_node_id (
+ sd.conclusion_node_id
+ ? *sd.conclusion_node_id
+ : conclusion_node_id);
+
+ // Load the CI tenant if the conclusion check run was created.
+ //
+ if (!effective_conclusion_node_id.empty ())
+ {
+ string ru; // Repository URL.
+
+ // CI the test merge commit for remote PRs and the head commit for
+ // everything else (branch push or local PRs).
+ //
+ if (sd.kind == service_data::remote)
+ {
+ // E.g. #pull/28/merge@1b6c9a361086ed93e6f1e67189e82d52de91c49b
+ //
+ ru = sd.repository_clone_url + "#pull/" + to_string (*sd.pr_number) +
+ "/merge@" + sd.check_sha;
+ }
+ else
+ ru = sd.repository_clone_url + '#' + sd.check_sha;
+
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ repository_location rl (move (ru), repository_type::git);
+
+ try
+ {
+ optional<start_result> r (load (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_,
+ move (ts),
+ move (rl)));
+
+ if (!r || r->status != 200)
+ {
+ // Let unlikely invalid_argument propagate (see above).
+ //
+ if (auto cr = update_synthetic_cr (effective_conclusion_node_id,
+ conclusion_check_run_name,
+ result_status::error,
+ to_check_run_summary (r)))
+ {
+ l3 ([&]{trace << "updated check_run { " << *cr << " }";});
+ }
+ else
+ {
+ // Nothing really we can do in this case since we will not receive
+ // any further notifications. Log the error as a last resort.
+
+ error << "failed to load CI tenant " << ts.id
+ << " and unable to update conclusion";
+ }
+
+ return nullptr; // No need to update service data in this case.
+ }
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ error << "failed to load CI tenant " << ts.id << ": " << e.what ();
+
+ // Fall through to retry on next call.
+ }
+ }
+
+ if (!new_iat && conclusion_node_id.empty ())
+ return nullptr; // Nothing to save (but potentially retry on next call).
+
+ return [&error,
+ tenant_id,
+ iat = move (new_iat),
+ cni = move (conclusion_node_id)]
+ (const string& ti,
+ const tenant_service& ts) -> optional<string>
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to
+ // transaction being aborted) and so should not move out of its
+ // captures.
+
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
+
+ if (iat)
+ sd.installation_access = *iat;
+
+ if (!cni.empty ())
+ sd.conclusion_node_id = cni;
+
+ return sd.json ();
+ };
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+ error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+ // Cancel the tenant otherwise we could end up in an infinite loop (see
+ // top of function).
+ //
+ try
+ {
+ if (cancel (error, warn, verb_ ? &trace : nullptr,
+ *build_db_, retry_max_, ts.type, ts.id))
+ l3 ([&]{trace << "canceled CI tenant " << ts.id;});
+ }
+ catch (const runtime_error& e) // Database retries exhausted.
+ {
+ l3 ([&]{trace << "failed to cancel CI tenant " << ts.id
+ << ": " << e.what ();});
+ }
+
+ return nullptr;
+ }
+
+ // Build state change notifications (see tenant-services.hxx for
+ // background). Mapping our state transitions to GitHub pose multiple
+ // problems:
+ //
+ // 1. In our model we have the building->queued (interrupted) and
+ // built->queued (rebuild) transitions. We are going to ignore both of
+ // them when notifying GitHub. The first is not important (we expect the
+ // state to go back to building shortly). The second should normally not
+ // happen and would mean that a completed check suite may go back on its
+ // conclusion (which would be pretty confusing for the user). Note that
+ // the ->queued state transition of a check run rebuild triggered by
+ // us is handled directly in handle_check_run_rerequest().
+ //
+ // So, for GitHub notifications, we only have the following linear
+ // transition sequence:
+ //
+ // -> queued -> building -> built
+ //
+ // Note, however, that because we ignore certain transitions, we can now
+ // observe "degenerate" state changes that we need to ignore:
+ //
+ // building -> [queued] -> building
+ // built -> [queued] -> ...
+ //
+ // 2. As mentioned in tenant-services.hxx, we may observe the notifications
+ // as arriving in the wrong order. Unfortunately, GitHub provides no
+ // mechanisms to help with that. In fact, GitHub does not even prevent
+ // the creation of multiple check runs with the same name (it will always
+ // use the last created instance, regardless of the status, timestamps,
+ // etc). As a result, we cannot, for example, rely on the failure to
+ // create a new check run in response to the queued notification as an
+ // indication of a subsequent notification (e.g., building) having
+ // already occurred.
+ //
+ // The only aid in this area that GitHub provides is that it prevents
+ // updating a check run in the built state to a former state (queued or
+ // building). But one can still create a new check run with the same name
+ // and a former state.
+ //
+ // (Note that we should also be careful if trying to take advantage of
+ // this "check run override" semantics: each created check run gets a new
+ // URL and while the GitHub UI will always point to the last created when
+ // showing the list of check runs, if the user is already on the previous
+ // check run's URL, nothing will automatically cause them to be
+ // redirected to the new URL. And so the user may sit on the abandoned
+ // check run waiting forever for it to be completed.)
+ //
+ // As a result, we will deal with the out of order problem differently
+ // depending on the notification:
+ //
+ // queued Skip if there is already a check run in service data,
+ // otherwise create new.
+ //
+ // building Skip if there is no check run in service data or it's
+ // not in the queued state, otherwise update.
+ //
+ // built Update if there is check run in service data unless its
+ // state is built, otherwise create new.
+ //
+ // The rationale for this semantics is as follows: the building
+ // notification is a "nice to have" and can be skipped if things are not
+ // going normally. In contrast, the built notification cannot be skipped
+ // and we must either update the existing check run or create a new one
+ // (hopefully overriding the one created previously, if any). Note that
+ // the likelihood of the built notification being performed at the same
+ // time as queued/building is quite low (unlike queued and building).
+ //
+ // Note also that with this semantics it's unlikely but possible that we
+ // attempt to update the service data in the wrong order. Specifically, it
+ // feels like this should not be possible in the ->building transition
+ // since we skip the building notification unless the check run in the
+ // service data is already in the queued state. But it is theoretically
+ // possible in the ->built transition. For example, we may be updating
+ // the service data for the queued notification after it has already been
+ // updated by the built notification. In such cases we should not be
+ // overriding the latter state (built) with the former (queued).
+ //
+ // 3. We may not be able to "conclusively" notify GitHub, for example, due
+ // to a transient network error. The "conclusively" part means that the
+ // notification may or may not have gone through (though it feels the
+ // common case will be the inability to send the request rather than
+ // receive the reply).
+ //
+ // In such cases, we record in the service data that the notification was
+ // not synchronized and in subsequent notifications we do the best we can:
+ // if we have node_id, then we update, otherwise, we create (potentially
+ // overriding the check run created previously).
+ //
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_queued (const string& tenant_id,
+ const tenant_service& ts,
+ const vector<build>& builds,
+ optional<build_state> istate,
+ const build_queued_hints& hs,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullptr;
+ }
+
+ // Ignore attempts to add new builds to a completed check suite. This can
+ // happen, for example, if a new build configuration is added before
+ // the tenant is archived.
+ //
+ if (sd.completed)
+ return nullptr;
+
+ // The builds for which we will be creating check runs.
+ //
+ vector<reference_wrapper<const build>> bs;
+ brep::check_runs crs; // Parallel to bs.
+
+ // Exclude the builds for which we won't be creating check runs.
+ //
+ for (const build& b: builds)
+ {
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ if (const check_run* scr = sd.find_check_run (bid))
+ {
+ // Another notification has already stored this check run.
+ //
+ if (!istate)
+ {
+ // Out of order queued notification.
+ //
+ warn << "check run " << bid << ": out of order queued "
+ << "notification; existing state: " << scr->state_string ();
+ }
+ else if (*istate == build_state::built)
+ {
+ // Unexpected built->queued transition (rebuild).
+ //
+ // Note that handle_check_run_rerequest() may trigger an "expected"
+ // rebuild, in which case our state should be set to queued.
+ //
+ if (scr->state != build_state::queued || !scr->state_synced)
+ warn << "check run " << bid << ": unexpected rebuild";
+ }
+ else
+ {
+ // Ignore interrupted.
+ //
+ assert (*istate == build_state::building);
+ }
+ }
+ else
+ {
+ // No stored check run for this build so prepare to create one.
+ //
+ bs.push_back (b);
+
+ crs.push_back (
+ check_run {move (bid),
+ gh_check_run_name (b, &hs),
+ nullopt, /* node_id */
+ build_state::queued,
+ false /* state_synced */,
+ nullopt /* status */,
+ details_url (b),
+ check_run::description_type {check_run_queued_title,
+ check_run_queued_summary}});
+ }
+ }
+
+ if (bs.empty ()) // Nothing to do.
+ return nullptr;
+
+ // Get a new installation access token if the current one has expired.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+ {
+ new_iat = obtain_installation_access_token (sd.installation_id,
+ move (*jwt),
+ error);
+ if (new_iat)
+ iat = &*new_iat;
+ }
+ }
+ else
+ iat = &sd.installation_access;
+
+ // Note: we treat the failure to obtain the installation access token the
+ // same as the failure to notify GitHub (state is updated by not marked
+ // synced).
+ //
+ if (iat != nullptr)
+ {
+ // Create a check_run for each build as a single request.
+ //
+ // Let unlikely invalid_argument propagate.
+ //
+ if (gq_create_check_runs (error,
+ crs,
+ iat->token,
+ sd.app_id,
+ sd.repository_node_id,
+ sd.report_sha,
+ options_->build_queued_batch ()))
+ {
+ for (const check_run& cr: crs)
+ {
+ // We can only create a check run in the queued state.
+ //
+ assert (cr.state == build_state::queued);
+ l3 ([&]{trace << "created check_run { " << cr << " }";});
+ }
+ }
+ }
+
+ return [tenant_id,
+ bs = move (bs),
+ iat = move (new_iat),
+ crs = move (crs),
+ error = move (error),
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts) -> optional<string>
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
+
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
+
+ if (iat)
+ sd.installation_access = *iat;
+
+ for (size_t i (0); i != bs.size (); ++i)
+ {
+ const check_run& cr (crs[i]);
+
+ // Note that this service data may not be the same as what we observed
+ // in the build_queued() function above. For example, some check runs
+ // that we have queued may have already transitioned to built. So we
+ // skip any check runs that are already present.
+ //
+ if (const check_run* scr = sd.find_check_run (cr.build_id))
+ {
+ // Doesn't looks like printing new/existing check run node_id will
+ // be of any help.
+ //
+ warn << "check run " << cr.build_id << ": out of order queued "
+ << "notification service data update; existing state: "
+ << scr->state_string ();
+ }
+ else
+ sd.check_runs.push_back (cr);
+ }
+
+ return sd.json ();
+ };
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ error << "CI tenant " << ts.id << ": unhandled exception: " << e.what ();
+
+ return nullptr;
+ }
+
+ function<optional<string> (const string&, const tenant_service&)> ci_github::
+ build_building (const string& tenant_id,
+ const tenant_service& ts,
+ const build& b,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullptr;
+ }
+
+ // Similar to build_queued(), ignore attempts to add new builds to a
+ // completed check suite.
+ //
+ if (sd.completed)
+ return nullptr;
+
+ optional<check_run> cr; // Updated check run.
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ if (check_run* scr = sd.find_check_run (bid)) // Stored check run.
+ {
+ // Update the check run if it exists on GitHub and the queued
+ // notification updated the service data, otherwise do nothing.
+ //
+ if (scr->state == build_state::queued)
+ {
+ if (scr->node_id)
+ {
+ cr = move (*scr);
+ cr->state_synced = false;
+ }
+ else
+ {
+ // Network error during queued notification (state unsynchronized),
+ // ignore.
+ //
+ l3 ([&]{trace << "unsynchronized check run " << bid;});
+ }
+ }
+ else
+ {
+ // Ignore interrupted (building -> queued -> building transition).
+ //
+ if (scr->state != build_state::building)
+ {
+ warn << "check run " << bid << ": out of order building "
+ << "notification; existing state: " << scr->state_string ();
+ }
+ }
+ }
+ else
+ warn << "check run " << bid << ": out of order building "
+ << "notification; no check run state in service data";
+
+ if (!cr)
+ return nullptr;
+
+ // Get a new installation access token if the current one has expired.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+ {
+ new_iat = obtain_installation_access_token (sd.installation_id,
+ move (*jwt),
+ error);
+ if (new_iat)
+ iat = &*new_iat;
+ }
+ }
+ else
+ iat = &sd.installation_access;
+
+ // Note: we treat the failure to obtain the installation access token the
+ // same as the failure to notify GitHub (state is updated but not marked
+ // synced).
+ //
+ if (iat != nullptr)
+ {
+ // Let unlikely invalid_argument propagate.
+ //
+ if (gq_update_check_run (error,
+ *cr,
+ iat->token,
+ sd.repository_node_id,
+ *cr->node_id,
+ build_state::building,
+ check_run_building_title,
+ check_run_building_summary))
+ {
+ // Do nothing further if the state was already built on GitHub (note
+ // that this is based on the above-mentioned special GitHub semantics
+ // of preventing changes to the built status).
+ //
+ if (cr->state == build_state::built)
+ {
+ warn << "check run " << bid << ": already in built state on GitHub";
+ return nullptr;
+ }
+
+ assert (cr->state == build_state::building);
+ l3 ([&]{trace << "updated check_run { " << *cr << " }";});
+ }
+ }
+
+ return [tenant_id,
+ iat = move (new_iat),
+ cr = move (*cr),
+ error = move (error),
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts) -> optional<string>
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
+
+ if (tenant_id != ti)
+ return nullopt; // Do nothing if the tenant has been replaced.
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullopt;
+ }
+
+ if (iat)
+ sd.installation_access = *iat;
+
+ // Update the check run only if it is in the queued state.
+ //
+ if (check_run* scr = sd.find_check_run (cr.build_id))
+ {
+ if (scr->state == build_state::queued)
+ *scr = cr;
+ else
+ {
+ warn << "check run " << cr.build_id << ": out of order building "
+ << "notification service data update; existing state: "
+ << scr->state_string ();
+ }
+ }
+ else
+ warn << "check run " << cr.build_id << ": service data state has "
+ << "disappeared";
+
+ return sd.json ();
+ };
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ error << "check run " << bid << ": unhandled exception: " << e.what();
+
+ return nullptr;
+ }
+
+ function<pair<optional<string>, bool> (const string&,
+ const tenant_service&)> ci_github::
+ build_built (const string& tenant_id,
+ const tenant_service& ts,
+ const build& b,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
+ // @@ TODO Include ts.id in diagnostics? Check run build ids alone seem
+ // kind of meaningless. Log lines get pretty long this way however.
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return nullptr;
+ }
+
+ // Similar to build_queued(), ignore attempts to add new builds to a
+ // completed check suite.
+ //
+ if (sd.completed)
+ return nullptr;
+
+ // If we don't have the accurate list of check runs in the service data
+ // (for example, because we ran out of transaction retries trying to
+ // update it), then things are going to fall apart: we will add this check
+ // run and then immediately conclude that the check suite is complete
+ // (while GitHub will likely continue showing a bunch of queued check
+ // runs. If this checks run is successful, then we will conclude the
+ // check suite is successful and update the conclusion check run, all
+ // based on one build.
+ //
+ if (sd.check_runs.empty ())
+ {
+ error << "no queued check runs in service data for tenant " << tenant_id;
+ return nullptr;
+ }
+
+ // Here we only update the state of this check run. If there are no more
+ // unbuilt ones, then the synthetic conclusion check run will be updated
+ // in build_completed(). Note that determining whether we have no more
+ // unbuilt would be racy here so instead we do it in the service data
+ // update function that we return.
+
+ check_run cr; // Updated check run.
+ {
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ if (check_run* scr = sd.find_check_run (bid))
+ {
+ if (scr->state != build_state::building)
+ {
+ warn << "check run " << bid << ": out of order built notification; "
+ << "existing state: " << scr->state_string ();
+ }
+
+ // Do nothing if already built (e.g., rebuild).
+ //
+ if (scr->state == build_state::built)
+ return nullptr;
+
+ cr = move (*scr);
+ }
+ else
+ {
+ warn << "check run " << bid << ": out of order built notification; "
+ << "no check run state in service data";
+
+ // Note that we have no hints here and so have to use the full build
+ // id for name.
+ //
+ cr.build_id = move (bid);
+ cr.name = cr.build_id;
+ }
+
+ cr.state_synced = false;
+ }
+
+ // Get a new installation access token if the current one has expired.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+ {
+ new_iat = obtain_installation_access_token (sd.installation_id,
+ move (*jwt),
+ error);
+ if (new_iat)
+ iat = &*new_iat;
+ }
+ }
+ else
+ iat = &sd.installation_access;
+
+ // Note: we treat the failure to obtain the installation access token the
+ // same as the failure to notify GitHub (state is updated but not marked
+ // synced).
+ //
+ if (iat != nullptr)
+ {
+ // Prepare the check run's summary field (the build information in an
+ // XHTML table).
+ //
+ string sm; // Summary.
+ {
+ using namespace web::xhtml;
+
+ // Note: let all serialization exceptions propagate. The XML
+ // serialization code can throw bad_alloc or xml::serialization in
+ // case of I/O failures, but we're serializing to a string stream so
+ // both exceptions are unlikely.
+ //
+ ostringstream os;
+ xml::serializer s (os, "check_run_summary");
+
+ // This hack is required to disable XML element name prefixes (which
+ // GitHub does not like). Note that this adds an xmlns declaration for
+ // the XHTML namespace which for now GitHub appears to ignore. If that
+ // ever becomes a problem, then we should redo this with raw XML
+ // serializer calls.
+ //
+ struct table: element
+ {
+ table (): element ("table") {}
+
+ void
+ start (xml::serializer& s) const override
+ {
+ s.start_element (xmlns, name);
+ s.namespace_decl (xmlns, "");
+ }
+ } TABLE;
+
+ // Serialize a result row (colored circle, result text, log URL) for
+ // an operation and result_status.
+ //
+ auto tr_result = [this, &b] (xml::serializer& s,
+ const string& op,
+ result_status rs)
+ {
+ // The log URL.
+ //
+ string lu (build_log_url (options_->host (),
+ options_->root (),
+ b,
+ op != "result" ? &op : nullptr));
+
+ s << TR
+ << TD << EM << op << ~EM << ~TD
+ << TD
+ << circle (rs) << ' '
+ << CODE << to_string (rs) << ~CODE
+ << " (" << A << HREF << lu << ~HREF << "log" << ~A << ')'
+ << ~TD
+ << ~TR;
+ };
+
+ // Serialize the summary to an XHTML table.
+ //
+ s << TABLE
+ << TBODY;
+
+ tr_result (s, "result", *b.status);
+
+ s << TR
+ << TD << EM << "package" << ~EM << ~TD
+ << TD << CODE << b.package_name << ~CODE << ~TD
+ << ~TR
+ << TR
+ << TD << EM << "version" << ~EM << ~TD
+ << TD << CODE << b.package_version << ~CODE << ~TD
+ << ~TR
+ << TR
+ << TD << EM << "toolchain" << ~EM << ~TD
+ << TD
+ << CODE
+ << b.toolchain_name << '-' << b.toolchain_version.string ()
+ << ~CODE
+ << ~TD
+ << ~TR
+ << TR
+ << TD << EM << "target" << ~EM << ~TD
+ << TD << CODE << b.target.string () << ~CODE << ~TD
+ << ~TR
+ << TR
+ << TD << EM << "target config" << ~EM << ~TD
+ << TD << CODE << b.target_config_name << ~CODE << ~TD
+ << ~TR
+ << TR
+ << TD << EM << "package config" << ~EM << ~TD
+ << TD << CODE << b.package_config_name << ~CODE << ~TD
+ << ~TR;
+
+ for (const operation_result& r: b.results)
+ tr_result (s, r.operation, r.status);
+
+ s << ~TBODY
+ << ~TABLE;
+
+ sm = os.str ();
+ }
+
+ gq_built_result br (
+ make_built_result (*b.status, sd.warning_success, move (sm)));
+
+ if (cr.node_id)
+ {
+ // Update existing check run to built. Let unlikely invalid_argument
+ // propagate.
+ //
+ if (gq_update_check_run (error,
+ cr,
+ iat->token,
+ sd.repository_node_id,
+ *cr.node_id,
+ move (br)))
+ {
+ assert (cr.state == build_state::built);
+ l3 ([&]{trace << "updated check_run { " << cr << " }";});
+ }
+ }
+ else
+ {
+ // Create new check run. Let unlikely invalid_argument propagate.
+ //
+ // Note that we don't have build hints so will be creating this check
+ // run with the full build id as name. In the unlikely event that an
+ // out of order build_queued() were to run before we've saved this
+ // check run to the service data it will create another check run with
+ // the shortened name which will never get to the built state.
+ //
+ if (gq_create_check_run (error,
+ cr,
+ iat->token,
+ sd.app_id,
+ sd.repository_node_id,
+ sd.report_sha,
+ details_url (b),
+ move (br)))
+ {
+ assert (cr.state == build_state::built);
+ l3 ([&]{trace << "created check_run { " << cr << " }";});
+ }
+ }
+
+ if (cr.state_synced)
+ {
+ // Check run was created/updated successfully to built (with status we
+ // specified).
+ //
+ cr.status = b.status;
+ }
+ }
+
+ return [tenant_id,
+ iat = move (new_iat),
+ cr = move (cr),
+ error = move (error),
+ warn = move (warn)] (const string& ti,
+ const tenant_service& ts)
+ {
+ // NOTE: this lambda may be called repeatedly (e.g., due to transaction
+ // being aborted) and so should not move out of its captures.
+
+ // Do nothing if the tenant has been replaced.
+ //
+ if (tenant_id != ti)
+ return make_pair (optional<string> (), false);
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return make_pair (optional<string> (), false);
+ }
+
+ // Feel like this could potentially happen in case of an out of order
+ // notification (see above).
+ //
+ if (sd.completed)
+ {
+ // @@ Perhaps this should be a warning but let's try error for now (we
+ // essentially missed a build, which could have failed).
+ //
+ error << "built notification for completed check suite";
+ return make_pair (optional<string> (), false);
+ }
+
+ if (iat)
+ sd.installation_access = *iat;
+
+ // Only update the check_run state in service data if it matches the
+ // state (specifically, status) on GitHub.
+ //
+ if (cr.state_synced)
+ {
+ if (check_run* scr = sd.find_check_run (cr.build_id))
+ {
+ // This will most commonly generate a duplicate warning (see above).
+ // We could save the old state and only warn if it differs but let's
+ // not complicate things for now.
+ //
+#if 0
+ if (scr->state != build_state::building)
+ {
+ warn << "check run " << cr.build_id << ": out of order built "
+ << "notification service data update; existing state: "
+ << scr->state_string ();
+ }
+#endif
+ *scr = cr; // Note: also updates node id if created.
+ }
+ else
+ sd.check_runs.push_back (cr);
+
+ // Determine of this check suite is completed.
+ //
+ sd.completed = find_if (sd.check_runs.begin (), sd.check_runs.end (),
+ [] (const check_run& scr)
+ {
+ return scr.state != build_state::built;
+ }) == sd.check_runs.end ();
+ }
+
+ return make_pair (optional<string> (sd.json ()), sd.completed);
+ };
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ string bid (gh_check_run_name (b)); // Full build id.
+
+ error << "check run " << bid << ": unhandled exception: " << e.what ();
+
+ return nullptr;
+ }
+
+ void ci_github::
+ build_completed (const string& /* tenant_id */,
+ const tenant_service& ts,
+ const diag_epilogue& log_writer) const noexcept
+ try
+ {
+ // NOTE: this function is noexcept and should not throw.
+
+ NOTIFICATION_DIAG (log_writer);
+
+ service_data sd;
+ try
+ {
+ sd = service_data (*ts.data);
+ }
+ catch (const invalid_argument& e)
+ {
+ error << "failed to parse service data: " << e;
+ return;
+ }
+
+ // This could have been reset by handle_check_run_rerequest().
+ //
+ if (!sd.completed)
+ return;
+
+ assert (!sd.check_runs.empty ());
+
+ // Here we need to update the state of the synthetic conclusion check run.
+ //
+ result_status result (result_status::success);
+
+ // Conclusion check run summary. Will include the success/warning/failure
+ // count breakdown.
+ //
+ string summary;
+ {
+ // The success/warning/failure counts.
+ //
+ // Note that the warning count will be included in the success or
+ // failure count (depending on the value of sd.warning_success).
+ //
+ size_t succ_count (0), warn_count (0), fail_count (0);
+
+ // Count a result_status under the appropriate category.
+ //
+ auto count = [&succ_count,
+ &warn_count,
+ &fail_count,
+ ws = sd.warning_success] (result_status rs)
+ {
+ switch (rs)
+ {
+ case result_status::success: ++succ_count; break;
+
+ case result_status::error:
+ case result_status::abort:
+ case result_status::abnormal: ++fail_count; break;
+
+ case result_status::warning:
+ {
+ ++warn_count;
+
+ if (ws)
+ ++succ_count;
+ else
+ ++fail_count;
+
+ break;
+ }
+
+ case result_status::skip:
+ case result_status::interrupt:
+ {
+ assert (false);
+ }
+ }
+ };
+
+ for (const check_run& cr: sd.check_runs)
+ {
+ assert (cr.state == build_state::built && cr.status);
+
+ result |= *cr.status;
+ count (*cr.status);
+ }
+
+ // Construct the conclusion check run summary.
+ //
+ ostringstream os;
+
+ // Note: the warning count has already been included in the success or
+ // failure count.
+ //
+ os << fail_count << " failed";
+ if (!sd.warning_success && warn_count != 0)
+ os << " (" << warn_count << " due to warnings)";
+
+ os << ", " << succ_count << " succeeded";
+ if (sd.warning_success && warn_count != 0)
+ os << " (" << warn_count << " with warnings)";
+
+ os << ", " << (succ_count + fail_count) << " total";
+
+ summary = os.str ();
+ }
+
+ // Get a new installation access token if the current one has expired
+ // (unlikely since we just returned from build_built()). Note also that we
+ // are not saving the new token in the service data.
+ //
+ const gh_installation_access_token* iat (nullptr);
+ optional<gh_installation_access_token> new_iat;
+
+ if (system_clock::now () > sd.installation_access.expires_at)
+ {
+ if (optional<string> jwt = generate_jwt (sd.app_id, trace, error))
+ {
+ new_iat = obtain_installation_access_token (sd.installation_id,
+ move (*jwt),
+ error);
+ if (new_iat)
+ iat = &*new_iat;
+ }
+ }
+ else
+ iat = &sd.installation_access;
+
+ // Note: we treat the failure to obtain the installation access token the
+ // same as the failure to notify GitHub.
+ //
+ if (iat != nullptr)
+ {
+ // Update the conclusion check run if all check runs are now built.
+ //
+ assert (sd.conclusion_node_id);
+
+ gq_built_result br (
+ make_built_result (result, sd.warning_success, move (summary)));
+
+ check_run cr;
+
+ // Set some fields for display purposes.
+ //
+ cr.node_id = *sd.conclusion_node_id;
+ cr.name = conclusion_check_run_name;
+
+ // Let unlikely invalid_argument propagate.
+ //
+ if (gq_update_check_run (error,
+ cr,
+ iat->token,
+ sd.repository_node_id,
+ *sd.conclusion_node_id,
+ move (br)))
+ {
+ assert (cr.state == build_state::built);
+ l3 ([&]{trace << "updated conclusion check_run { " << cr << " }";});
+ }
+ else
+ {
+ // Nothing we can do here except log the error.
+ //
+ error << "tenant_service id " << ts.id
+ << ": unable to update conclusion check run "
+ << *sd.conclusion_node_id;
+ }
+ }
+ }
+ catch (const std::exception& e)
+ {
+ NOTIFICATION_DIAG (log_writer);
+
+ error << "unhandled exception: " << e.what ();
+ }
+
+ string ci_github::
+ details_url (const build& b) const
+ {
+ // This code is based on build_force_url() in mod/build.cxx.
+ //
+ return
+ options_->host () +
+ tenant_dir (options_->root (), b.tenant).string () +
+ "?builds=" + mime_url_encode (b.package_name.string ()) +
+ "&pv=" + mime_url_encode (b.package_version.string ()) +
+ "&tg=" + mime_url_encode (b.target.string ()) +
+ "&tc=" + mime_url_encode (b.target_config_name) +
+ "&pc=" + mime_url_encode (b.package_config_name) +
+ "&th=" + mime_url_encode (b.toolchain_name) + '-' +
+ b.toolchain_version.string ();
+ }
+
+ string ci_github::
+ details_url (const string& t) const
+ {
+ return
+ options_->host () +
+ tenant_dir (options_->root (), t).string () +
+ "?builds";
+ }
+
+ static optional<build_id>
+ parse_details_url (const string& details_url)
+ try
+ {
+ // See details_url() above for an idea of what the URL looks like.
+
+ url u (details_url);
+
+ build_id r;
+
+ // Extract the tenant from the URL path.
+ //
+ // Example paths:
+ //
+ // @d2586f57-21dc-40b7-beb2-6517ad7917dd (37 characters)
+ // <brep-root>/@d2586f57-21dc-40b7-beb2-6517ad7917dd
+ //
+ if (!u.path)
+ return nullopt;
+
+ {
+ size_t p (u.path->find ('@'));
+ if (p == string::npos || u.path->size () - p != 37)
+ return nullopt; // Tenant not found or too short.
+
+ r.package.tenant = u.path->substr (p + 1);
+ }
+
+ // Extract the rest of the build_id members from the URL query.
+ //
+ if (!u.query)
+ return nullopt;
+
+ bool pn (false), pv (false), tg (false), tc (false), pc (false),
+ th (false);
+
+ // This URL query parsing code is based on
+ // web::apache::request::parse_url_parameters().
+ //
+ for (const char* qp (u.query->c_str ()); qp != nullptr; )
+ {
+ const char* vp (strchr (qp, '='));
+ const char* ep (strchr (qp, '&'));
+
+ if (vp == nullptr || (ep != nullptr && ep < vp))
+ return nullopt; // Missing value.
+
+ string n (mime_url_decode (qp, vp)); // Name.
+
+ ++vp; // Skip '='
+
+ const char* ve (ep != nullptr ? ep : vp + strlen (vp)); // Value end.
+
+ // Get the value as-is or URL-decode it.
+ //
+ auto rawval = [vp, ve] () { return string (vp, ve); };
+ auto decval = [vp, ve] () { return mime_url_decode (vp, ve); };
+
+ auto make_version = [] (string&& v)
+ {
+ return canonical_version (brep::version (move (v)));
+ };
+
+ auto c = [&n] (bool& b, const char* s)
+ {
+ return n == s ? (b = true) : false;
+ };
+
+ if (c (pn, "builds")) r.package.name = package_name (decval ());
+ else if (c (pv, "pv")) r.package.version = make_version (decval ());
+ else if (c (tg, "tg")) r.target = target_triplet (decval ());
+ else if (c (tc, "tc")) r.target_config_name = decval ();
+ else if (c (pc, "pc")) r.package_config_name = decval ();
+ else if (c (th, "th"))
+ {
+ // Toolchain name and version. E.g. "public-0.17.0"
+
+ string v (rawval ());
+
+ // Note: parsing code based on mod/mod-builds.cxx.
+ //
+ size_t p (v.find ('-'));
+ if (p == string::npos || p >= v.size () - 1)
+ return nullopt; // Invalid format.
+
+ r.toolchain_name = v.substr (0, p);
+ r.toolchain_version = make_version (v.substr (p + 1));
+ }
+
+ qp = ep != nullptr ? ep + 1 : nullptr;
+ }
+
+ if (!pn || !pv || !tg || !tc || !pc || !th)
+ return nullopt; // Fail if any query parameters are absent.
+
+ return r;
+ }
+ catch (const invalid_argument&) // Invalid url, brep::version, etc.
+ {
+ return nullopt;
+ }
+
+ optional<string> ci_github::
+ generate_jwt (uint64_t app_id,
+ const basic_mark& trace,
+ const basic_mark& error) const
+ {
+ string jwt;
+ try
+ {
+ // Look up the private key path for the app id and fail if not found.
+ //
+ const map<uint64_t, dir_path>& pks (
+ options_->ci_github_app_id_private_key ());
+
+ auto pk (pks.find (app_id));
+ if (pk == pks.end ())
+ {
+ error << "unable to generate JWT: "
+ << "no private key configured for app id " << app_id;
+ return nullopt;
+ }
+
+ // Set token's "issued at" time 60 seconds in the past to combat clock
+ // drift (as recommended by GitHub).
+ //
+ jwt = brep::generate_jwt (
+ *options_,
+ pk->second, to_string (app_id),
+ chrono::seconds (options_->ci_github_jwt_validity_period ()),
+ chrono::seconds (60));
+
+ l3 ([&]{trace << "JWT: " << jwt;});
+ }
+ catch (const system_error& e)
+ {
+ error << "unable to generate JWT (errno=" << e.code () << "): " << e;
+ return nullopt;
+ }
+
+ return jwt;
+ }
+
+ // There are three types of GitHub API authentication:
+ //
+ // 1) Authenticating as an app. Used to access parts of the API concerning
+ // the app itself such as getting the list of installations. (Need to
+ // authenticate as an app as part of authenticating as an app
+ // installation.)
+ //
+ // 2) Authenticating as an app installation (on a user or organisation
+ // account). Used to access resources belonging to the user/repository
+ // or organisation the app is installed in.
+ //
+ // 3) Authenticating as a user. Used to perform actions as the user.
+ //
+ // We need to authenticate as an app installation (2).
+ //
+ // How to authenticate as an app installation
+ //
+ // Reference:
+ // https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/authenticating-as-a-github-app-installation
+ //
+ // The final authentication token we need is an installation access token
+ // (IAT), valid for one hour, which we will pass in the `Authentication`
+ // header of our Github API requests:
+ //
+ // Authorization: Bearer <INSTALLATION_ACCESS_TOKEN>
+ //
+ // To generate an IAT:
+ //
+ // - Generate a JSON Web Token (JWT)
+ //
+ // - Get the installation ID. This will be included in the webhook request
+ // in our case
+ //
+ // - Send a POST to /app/installations/<INSTALLATION_ID>/access_tokens which
+ // includes the JWT (`Authorization: Bearer <JWT>`). The response will
+ // include the IAT. Can pass the name of the repository included in the
+ // webhook request to restrict access, otherwise we get access to all
+ // repos covered by the installation if installed on an organisation for
+ // example.
+ //
+ optional<gh_installation_access_token> ci_github::
+ obtain_installation_access_token (const string& iid,
+ string jwt,
+ const basic_mark& error) const
+ {
+ gh_installation_access_token iat;
+ try
+ {
+ // API endpoint.
+ //
+ string ep ("app/installations/" + iid + "/access_tokens");
+
+ uint16_t sc (
+ github_post (iat, ep, strings {"Authorization: Bearer " + jwt}));
+
+ // Possible response status codes from the access_tokens endpoint:
+ //
+ // 201 Created
+ // 401 Requires authentication
+ // 403 Forbidden
+ // 404 Resource not found
+ // 422 Validation failed, or the endpoint has been spammed.
+ //
+ // Note that the payloads of non-201 status codes are undocumented.
+ //
+ if (sc != 201)
+ {
+ error << "unable to get installation access token: error HTTP "
+ << "response status " << sc;
+ return nullopt;
+ }
+
+ // Create a clock drift safety window.
+ //
+ iat.expires_at -= chrono::minutes (5);
+ }
+ // gh_installation_access_token (via github_post())
+ //
+ catch (const json::invalid_json_input& e)
+ {
+ // Note: e.name is the GitHub API endpoint.
+ //
+ error << "malformed JSON in response from " << e.name << ", line: "
+ << e.line << ", column: " << e.column << ", byte offset: "
+ << e.position << ", error: " << e;
+ return nullopt;
+ }
+ catch (const invalid_argument& e) // github_post()
+ {
+ error << "malformed header(s) in response: " << e;
+ return nullopt;
+ }
+ catch (const system_error& e) // github_post()
+ {
+ error << "unable to get installation access token (errno=" << e.code ()
+ << "): " << e.what ();
+ return nullopt;
+ }
+
+ return iat;
+ }
+}