diff options
Diffstat (limited to 'mod/mod-ci-github-gq.cxx')
-rw-r--r-- | mod/mod-ci-github-gq.cxx | 730 |
1 files changed, 730 insertions, 0 deletions
diff --git a/mod/mod-ci-github-gq.cxx b/mod/mod-ci-github-gq.cxx new file mode 100644 index 0000000..4fe9190 --- /dev/null +++ b/mod/mod-ci-github-gq.cxx @@ -0,0 +1,730 @@ +// file : mod/mod-ci-github-gq.cxx -*- C++ -*- +// license : MIT; see accompanying LICENSE file + +#include <mod/mod-ci-github-gq.hxx> + +#include <libbutl/json/parser.hxx> +#include <libbutl/json/serializer.hxx> + +#include <mod/mod-ci-github-post.hxx> + +using namespace std; +using namespace butl; + +namespace brep +{ + // GraphQL serialization functions (see definitions and documentation at the + // bottom). + // + static const string& gq_name (const string&); + static string gq_str (const string&); + static string gq_bool (bool); + static const string& gq_enum (const string&); + + [[noreturn]] static void + throw_json (json::parser& p, const string& m) + { + throw json::invalid_json_input ( + p.input_name, + p.line (), p.column (), p.position (), + m); + } + + // Parse a JSON-serialized GraphQL response. + // + // Throw runtime_error if the response indicated errors and + // invalid_json_input if the GitHub response contained invalid JSON. + // + // The response format is defined in the GraphQL spec: + // https://spec.graphql.org/October2021/#sec-Response. + // + // Example response: + // + // { + // "data": {...}, + // "errors": {...} + // } + // + // The contents of `data`, including its opening and closing braces, are + // parsed by the `parse_data` function. + // + // @@ TODO: specify what parse_data may throw (probably only + // invalid_json_input). + // + // @@ TODO errors comes before data in GitHub's responses. + // + static void + gq_parse_response (json::parser& p, + function<void (json::parser&)> parse_data) + { + using event = json::event; + + // True if the data/errors fields are present. + // + // Although the spec merely recommends that the `errors` field, if + // present, comes before the `data` field, assume it always does because + // letting the client parse data in the presence of field errors + // (unexpected nulls) would not make sense. + // + bool dat (false), err (false); + + p.next_expect (event::begin_object); + + while (p.next_expect (event::name, event::end_object)) + { + if (p.name () == "data") + { + dat = true; + + // Currently we're not handling fields that are null due to field + // errors (see below for details) so don't parse any further. + // + if (err) + break; + + parse_data (p); + } + else if (p.name () == "errors") + { + // Don't stop parsing because the error semantics depends on whether + // or not `data` is present. + // + err = true; // Handled below. + } + else + { + // The spec says the response will never contain any top-level fields + // other than data, errors, and extensions. + // + if (p.name () != "extensions") + { + throw_json (p, + "unexpected top-level GraphQL response field: '" + + p.name () + '\''); + } + + p.next_expect_value_skip (); + } + } + + // If the `errors` field was present in the response, error(s) occurred + // before or during execution of the operation. + // + // If the `data` field was not present the errors are request errors which + // occur before execution and are typically the client's fault. + // + // If the `data` field was also present in the response the errors are + // field errors which occur during execution and are typically the GraphQL + // endpoint's fault, and some fields in `data` that should not be are + // likely to be null. + // + if (err) + { + if (dat) + { + // @@ TODO: Consider parsing partial data? + // + throw runtime_error ("field error(s) received from GraphQL endpoint; " + "incomplete data received"); + } + else + throw runtime_error ("request error(s) received from GraphQL endpoint"); + } + } + + // Parse a response to a check_run GraphQL mutation such as `createCheckRun` + // or `updateCheckRun`. + // + // Example response (only the part we need to parse here): + // + // { + // "cr0": { + // "checkRun": { + // "id": "CR_kwDOLc8CoM8AAAAFQ5GqPg", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1-O3/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // }, + // "cr1": { + // "checkRun": { + // "id": "CR_kwDOLc8CoM8AAAAFQ5GqhQ", + // "name": "libb2/0.98.1+2/x86_64-linux-gnu/linux_debian_12-gcc_13.1/default/dev/0.17.0-a.1", + // "status": "QUEUED" + // } + // } + // } + // + // @@ TODO Handle response errors properly. + // + static vector<gh_check_run> + gq_parse_response_check_runs (json::parser& p) + { + using event = json::event; + + vector<gh_check_run> r; + + gq_parse_response (p, [&r] (json::parser& p) + { + p.next_expect (event::begin_object); + + // Parse the "cr0".."crN" members (field aliases). + // + while (p.next_expect (event::name, event::end_object)) + { + // Parse `"crN": { "checkRun":`. + // + if (p.name () != "cr" + to_string (r.size ())) + throw_json (p, "unexpected field alias: '" + p.name () + '\''); + p.next_expect (event::begin_object); + p.next_expect_name ("checkRun"); + + r.emplace_back (p); // Parse the check_run object. + + p.next_expect (event::end_object); // Parse end of crN object. + } + }); + + // Our requests always operate on at least one check run so if there were + // none in the data field something went wrong. + // + if (r.empty ()) + throw_json (p, "data object is empty"); + + return r; + } + + // Send a GraphQL mutation request `rq` that operates on one or more check + // runs. Update the check runs in `crs` with the new state and the node ID + // if unset. Return false and issue diagnostics if the request failed. + // + static bool + gq_mutate_check_runs (vector<check_run>& crs, + const string& iat, + const vector<reference_wrapper<const build>>& bs, + string rq, + build_state st, + const basic_mark& error) noexcept + { + vector<gh_check_run> rcrs; + + try + { + // Response type which parses a GraphQL response containing multiple + // check_run objects. + // + struct resp + { + vector<gh_check_run> check_runs; // Received check runs. + + resp (json::parser& p) : check_runs (gq_parse_response_check_runs (p)) {} + + resp () = default; + } rs; + + uint16_t sc (github_post (rs, + "graphql", // API Endpoint. + strings {"Authorization: Bearer " + iat}, + move (rq))); + + if (sc == 200) + { + rcrs = move (rs.check_runs); + + if (rcrs.size () == bs.size ()) + { + for (size_t i (0); i != rcrs.size (); ++i) + { + // Validate the check run in the response against the build. + // + const gh_check_run& rcr (rcrs[i]); // Received check run. + const build& b (bs[i]); + + build_state rst (gh_from_status (rcr.status)); // Received state. + + if (rst != build_state::built && rst != st) + { + error << "unexpected check_run status: received '" << rcr.status + << "' but expected '" << gh_to_status (st) << '\''; + + return false; // Fail because something is clearly very wrong. + } + else + { + check_run& cr (crs[i]); + + if (!cr.node_id) + cr.node_id = move (rcr.node_id); + + cr.state = gh_from_status (rcr.status); + } + } + + return true; + } + else + error << "unexpected number of check_run objects in response"; + } + else + error << "failed to update check run: error HTTP response status " + << sc; + } + catch (const json::invalid_json_input& e) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in response from " << e.name << ", line: " + << e.line << ", column: " << e.column << ", byte offset: " + << e.position << ", error: " << e; + } + catch (const invalid_argument& e) + { + error << "malformed header(s) in response: " << e; + } + catch (const system_error& e) + { + error << "unable to mutate check runs (errno=" << e.code () << "): " + << e.what (); + } + catch (const runtime_error& e) // From gq_parse_response_check_runs(). + { + // GitHub response contained error(s) (could be ours or theirs at this + // point). + // + error << "unable to mutate check runs: " << e; + } + + return false; + } + + // Serialize a GraphQL operation (query/mutation) into a GraphQL request. + // + // This is essentially a JSON object with a "query" string member containing + // the GraphQL operation. For example: + // + // { "query": "mutation { cr0:createCheckRun(... }" } + // + static string + gq_serialize_request (const string& o) + { + string b; + json::buffer_serializer s (b); + + s.begin_object (); + s.member ("query", o); + s.end_object (); + + return b; + } + + // Serialize `createCheckRun` mutations for one or more builds to GraphQL. + // + static string + gq_mutation_create_check_runs ( + const string& ri, // Repository ID + const string& hs, // Head SHA + const vector<reference_wrapper<const build>>& bs, + build_state st, + const tenant_service_base::build_hints* bh) + { + ostringstream os; + + os << "mutation {" << '\n'; + + // Serialize a `createCheckRun` for each build. + // + for (size_t i (0); i != bs.size (); ++i) + { + const build& b (bs[i]); + + string al ("cr" + to_string (i)); // Field alias. + + // Check run name. + // + string nm (gh_check_run_name (b, bh)); + + os << gq_name (al) << ":createCheckRun(input: {" << '\n' + << " name: " << gq_str (nm) << ',' << '\n' + << " repositoryId: " << gq_str (ri) << ',' << '\n' + << " headSha: " << gq_str (hs) << ',' << '\n' + << " status: " << gq_enum (gh_to_status (st)) << '\n' + << "})" << '\n' + // Specify the selection set (fields to be returned). + // + << "{" << '\n' + << " checkRun {" << '\n' + << " id," << '\n' + << " name," << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n'; + } + + os << "}" << '\n'; + + return os.str (); + } + + // Serialize an `updateCheckRun` mutation for one build to GraphQL. + // + // @@ TODO Support conclusion, output, etc. + // + static string + gq_mutation_update_check_run (const string& ri, // Repository ID. + const string& ni, // Node ID. + build_state st) + { + ostringstream os; + + os << "mutation {" << '\n' + << "cr0:updateCheckRun(input: {" << '\n' + << " checkRunId: " << gq_str (ni) << ',' << '\n' + << " repositoryId: " << gq_str (ri) << ',' << '\n' + << " status: " << gq_enum (gh_to_status (st)) << '\n' + << "})" << '\n' + // Specify the selection set (fields to be returned). + // + << "{" << '\n' + << " checkRun {" << '\n' + << " id," << '\n' + << " name," << '\n' + << " status" << '\n' + << " }" << '\n' + << "}" << '\n' + << "}" << '\n'; + + return os.str (); + } + + bool + gq_create_check_runs (vector<check_run>& crs, + const string& iat, + const string& rid, + const string& hs, + const vector<reference_wrapper<const build>>& bs, + build_state st, + const tenant_service_base::build_hints& bh, + const basic_mark& error) + { + string rq (gq_serialize_request ( + gq_mutation_create_check_runs (rid, hs, bs, st, &bh))); + + return gq_mutate_check_runs (crs, iat, bs, move (rq), st, error); + } + + bool + gq_create_check_run (check_run& cr, + const string& iat, + const string& rid, + const string& hs, + const build& b, + build_state st, + const tenant_service_base::build_hints& bh, + const basic_mark& error) + { + vector<check_run> crs {move (cr)}; + + bool r (gq_create_check_runs (crs, iat, rid, hs, {b}, st, bh, error)); + + cr = move (crs[0]); + + return r; + } + + bool + gq_update_check_run (check_run& cr, + const string& iat, + const string& rid, + const string& nid, + const build& b, + build_state st, + const basic_mark& error) + { + string rq ( + gq_serialize_request (gq_mutation_update_check_run (rid, nid, st))); + + vector<check_run> crs {move (cr)}; + + bool r (gq_mutate_check_runs (crs, iat, {b}, move (rq), st, error)); + + cr = move (crs[0]); + + return r; + } + + pair<optional<gh_check_run>, bool> + gq_fetch_check_run (const string& iat, + const string& check_suite_id, + const string& cr_name, + const basic_mark& error) noexcept + { + try + { + // Example request: + // + // query { + // node(id: "CS_kwDOLc8CoM8AAAAFQPQYEw") { + // ... on CheckSuite { + // checkRuns(last: 100, filterBy: {checkName: "linux_debian_..."}) { + // totalCount, + // edges { + // node { + // id, name, status + // } + // } + // } + // } + // } + // } + // + // This request does the following: + // + // - Look up the check suite by node ID ("direct node lookup"). This + // returns a Node (GraphQL interface). + // + // - Get to the concrete CheckSuite type by using a GraphQL "inline + // fragment" (`... on CheckSuite`). + // + // - Get the check suite's check runs + // - Filter by the sought name + // - Return only two check runs, just enough to be able to tell + // whether there are more than one check runs with this name (which + // is an error). + // + // - Return the id, name, and status fields from the matching check run + // objects. + // + string rq; + { + ostringstream os; + + os << "query {" << '\n'; + + os << "node(id: " << gq_str (check_suite_id) << ") {" << '\n' + << " ... on CheckSuite {" << '\n' + << " checkRuns(last: 2," << '\n' + << " filterBy: {" << '\n' + << "checkName: " << gq_str (cr_name) << '\n' + << " })" << '\n' + // Specify the selection set (fields to be returned). Note that + // edges and node are mandatory. + // + << " {" << '\n' + << " totalCount," << '\n' + << " edges {" << '\n' + << " node {" << '\n' + << " id, name, status" << '\n' + << " }" << '\n' + << " }" << '\n' + << " }" << '\n' + << " }" << '\n' + << "}" << '\n'; + + os << "}" << '\n'; + + rq = os.str (); + } + + // Example response (the part we need to parse here, at least): + // + // { + // "node": { + // "checkRuns": { + // "totalCount": 1, + // "edges": [ + // { + // "node": { + // "id": "CR_kwDOLc8CoM8AAAAFgeoweg", + // "name": "linux_debian_...", + // "status": "IN_PROGRESS" + // } + // } + // ] + // } + // } + // } + // + struct resp + { + optional<gh_check_run> cr; + size_t cr_count = 0; + + resp (json::parser& p) + { + using event = json::event; + + gq_parse_response (p, [this] (json::parser& p) + { + p.next_expect (event::begin_object); + p.next_expect_member_object ("node"); + p.next_expect_member_object ("checkRuns"); + + cr_count = p.next_expect_member_number<size_t> ("totalCount"); + + p.next_expect_member_array ("edges"); + + for (size_t i (0); i != cr_count; ++i) + { + p.next_expect (event::begin_object); + p.next_expect_name ("node"); + gh_check_run cr (p); + p.next_expect (event::end_object); + + if (i == 0) + this->cr = move (cr); + } + + p.next_expect (event::end_array); // edges + p.next_expect (event::end_object); // checkRuns + p.next_expect (event::end_object); // node + p.next_expect (event::end_object); + }); + } + + resp () = default; + } rs; + + uint16_t sc (github_post (rs, + "graphql", + strings {"Authorization: Bearer " + iat}, + gq_serialize_request (rq))); + + if (sc == 200) + { + if (rs.cr_count <= 1) + return {rs.cr, true}; + else + { + error << "unexpected number of check runs (" << rs.cr_count + << ") in response"; + } + } + else + error << "failed to get check run by name: error HTTP " + << "response status " << sc; + } + catch (const json::invalid_json_input& e) + { + // Note: e.name is the GitHub API endpoint. + // + error << "malformed JSON in response from " << e.name + << ", line: " << e.line << ", column: " << e.column + << ", byte offset: " << e.position << ", error: " << e; + } + catch (const invalid_argument& e) + { + error << "malformed header(s) in response: " << e; + } + catch (const system_error& e) + { + error << "unable to get check run by name (errno=" << e.code () + << "): " << e.what (); + } + catch (const std::exception& e) + { + error << "unable to get check run by name: " << e.what (); + } + + return {nullopt, false}; + } + + // GraphQL serialization functions. + // + // The GraphQL spec: + // https://spec.graphql.org/ + // + // The GitHub GraphQL API reference: + // https://docs.github.com/en/graphql/reference/ + // + + // Check that a string is a valid GraphQL name. + // + // GraphQL names can contain only alphanumeric characters and underscores + // and cannot begin with a digit (so basically a C identifier). + // + // Return the name or throw invalid_argument if it is invalid. + // + // @@ TODO: dangerous API. + // + static const string& + gq_name (const string& v) + { + if (v.empty () || digit (v[0])) + throw invalid_argument ("invalid GraphQL name: '" + v + '\''); + + for (char c: v) + { + if (!alnum (c) && c != '_') + { + throw invalid_argument ("invalid character in GraphQL name: '" + c + + '\''); + } + } + + return v; + } + + // Serialize a string to GraphQL. + // + // Return the serialized string or throw invalid_argument if the string is + // invalid. + // + static string + gq_str (const string& v) + { + // GraphQL strings are the same as JSON strings so we use the JSON + // serializer. + // + string b; + json::buffer_serializer s (b); + + try + { + s.value (v); + } + catch (const json::invalid_json_output&) + { + throw invalid_argument ("invalid GraphQL string: '" + v + '\''); + } + + return b; + } + + // Serialize an int to GraphQL. + // +#if 0 + static string + gq_int (uint64_t v) + { + string b; + json::buffer_serializer s (b); + s.value (v); + return b; + } +#endif + + // Serialize a boolean to GraphQL. + // + static inline string + gq_bool (bool v) + { + return v ? "true" : "false"; + } + + // Check that a string is a valid GraphQL enum value. + // + // GraphQL enum values can be any GraphQL name except for `true`, `false`, + // or `null`. + // + // Return the enum value or throw invalid_argument if it is invalid. + // + // @@ TODO: dangerous API. + // + static const string& + gq_enum (const string& v) + { + if (v == "true" || v == "false" || v == "null") + throw invalid_argument ("invalid GraphQL enum value: '" + v + '\''); + + return gq_name (v); + } +} |