From a431fe8a8dfbbe121a30c66df0424be6351ae9ff Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 28 Jan 2020 20:44:28 +0300 Subject: Validate that values submitted to CI and submission services contain UTF-8 encoded graphic characters only --- bdep/ci-parsers.cxx | 18 ++++++++++++++++++ bdep/ci.cxx | 12 ++++++++++-- bdep/publish.cxx | 23 +++++++++++++++++++---- bdep/utility.cxx | 14 ++++++++++++++ bdep/utility.hxx | 9 +++++++++ 5 files changed, 70 insertions(+), 6 deletions(-) diff --git a/bdep/ci-parsers.cxx b/bdep/ci-parsers.cxx index 0dfafcb..a887ad1 100644 --- a/bdep/ci-parsers.cxx +++ b/bdep/ci-parsers.cxx @@ -35,16 +35,34 @@ namespace bdep string v (s.next ()); + // Make sure that values we post to the CI service are UTF-8 encoded and + // contain only the graphic Unicode codepoints. + // + auto validate_value = [&o, &v] () + { + if (!utf8 (v, codepoint_types::graphic)) + throw invalid_value (o, + v, + "not UTF-8 encoded or contains non-graphic " + "Unicode codepoints"); + }; + if (o == "--build-email") { + validate_value (); + add ("build-email", move (v)); } else if (o == "--builds") { + validate_value (); + add ("builds", move (v)); } else if (o == "--override") { + validate_value (); + // Validate that the value has the : form. // // Note that the value semantics will be verified later, with the diff --git a/bdep/ci.cxx b/bdep/ci.cxx index 81db678..6c30b46 100644 --- a/bdep/ci.cxx +++ b/bdep/ci.cxx @@ -251,7 +251,15 @@ namespace bdep // Get the server and repository URLs. // const url& srv (o.server_specified () ? o.server () : default_server); - const repository_location rep (repository_url (o, prj)); + string rep (repository_url (o, prj).string ()); + + // Make sure that parameters we post to the CI service are UTF-8 encoded + // and contain only the graphic Unicode codepoints. + // + validate_utf8_graphic (rep, "repository URL", "--repository"); + + if (o.simulate_specified ()) + validate_utf8_graphic (o.simulate (), "--simulate option value"); // Print the plan and ask for confirmation. // @@ -291,7 +299,7 @@ namespace bdep using namespace http_service; - parameters params ({{parameter::text, "repository", rep.string ()}}); + parameters params ({{parameter::text, "repository", move (rep)}}); for (const package& p: pkgs) params.push_back ({parameter::text, diff --git a/bdep/publish.cxx b/bdep/publish.cxx index 1757f0b..00b36df 100644 --- a/bdep/publish.cxx +++ b/bdep/publish.cxx @@ -63,15 +63,15 @@ namespace bdep // Control repository URL. // - optional ctrl; + optional ctrl; if (!o.control_specified ()) { - ctrl = control_url (prj); + ctrl = control_url (prj).string (); } else if (o.control () != "none") try { - ctrl = url (o.control ()); + ctrl = url (o.control ()).string (); } catch (const invalid_argument& e) { @@ -101,6 +101,21 @@ namespace bdep fail << "unable to obtain publisher's email" << info << "use --author-email to specify explicitly"; + // Make sure that parameters we post to the submission service are UTF-8 + // encoded and contain only the graphic Unicode codepoints. + // + validate_utf8_graphic (*author.name, "author name", "--author-name"); + validate_utf8_graphic (*author.email, "author email", "--author-email"); + + if (o.section_specified ()) + validate_utf8_graphic (o.section (), "--section option value"); + + if (ctrl) + validate_utf8_graphic (*ctrl, "control URL", "--control"); + + if (o.simulate_specified ()) + validate_utf8_graphic (o.simulate (), "--simulate option value"); + // Collect package information (version, project, section, archive // path/checksum, and manifest). // @@ -777,7 +792,7 @@ namespace bdep {parameter::text, "author-email", *author.email}}); if (ctrl) - params.push_back ({parameter::text, "control", ctrl->string ()}); + params.push_back ({parameter::text, "control", *ctrl}); if (o.simulate_specified ()) params.push_back ({parameter::text, "simulate", o.simulate ()}); diff --git a/bdep/utility.cxx b/bdep/utility.cxx index 5a547eb..0ec3233 100644 --- a/bdep/utility.cxx +++ b/bdep/utility.cxx @@ -297,4 +297,18 @@ namespace bdep r.push_back ("}"); } } + + void + validate_utf8_graphic (const string& s, const char* what, const char* opt) + { + if (!utf8 (s, codepoint_types::graphic)) + { + diag_record dr (fail); + dr << what << " '" << s << "' is not UTF-8 encoded or contains " + << "non-graphic Unicode codepoints"; + + if (opt != nullptr) + dr << info << "consider using " << opt << " to override"; + } + } } diff --git a/bdep/utility.hxx b/bdep/utility.hxx index 3de1c32..7f464ad 100644 --- a/bdep/utility.hxx +++ b/bdep/utility.hxx @@ -303,6 +303,15 @@ namespace bdep scan_arguments (r, s); return r; } + + // Verify that a string is a valid UTF-8 byte sequence encoding only the + // graphic Unicode codepoints. Issue diagnostics (including a suggestion to + // use option opt, if specified) and fail if that's not the case. + // + void + validate_utf8_graphic (const string&, + const char* what, + const char* opt = nullptr); } #include -- cgit v1.1