From 02d43c988322c22f991c7023d1f372195ef86bf0 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 11 Jan 2016 16:41:32 +0200 Subject: Support repository web interface relative locations --- bpkg/manifest | 46 ++-- bpkg/manifest.cxx | 486 ++++++++++++++++++++++------------- tests/repository-location/driver.cxx | 166 +++++++++++- 3 files changed, 499 insertions(+), 199 deletions(-) diff --git a/bpkg/manifest b/bpkg/manifest index d443c49..6bfe8e7 100644 --- a/bpkg/manifest +++ b/bpkg/manifest @@ -10,7 +10,7 @@ #include #include // uint16_t #include -#include // move() +#include // move() #include // logic_error #include @@ -363,7 +363,7 @@ namespace bpkg repository_location () = default; // If the argument is not empty, create remote/absolute repository - // location. Throw invalid_argument if the location is a relative + // location. Throw std::invalid_argument if the location is a relative // path. If the argument is empty, then create the special empty // location. // @@ -372,7 +372,7 @@ namespace bpkg // Create a potentially relative repository location. If base is not // empty, use it to complete the relative location to remote/absolute. - // Throw invalid_argument if base is not empty but the location is + // Throw std::invalid_argument if base is not empty but the location is // empty, base itself is relative, or the resulting completed location // is invalid. // @@ -458,13 +458,15 @@ namespace bpkg return port_; } - bool - secure () const + enum class protocol {http, https}; + + protocol + proto () const { if (local ()) throw std::logic_error ("local location"); - return secure_; + return proto_; } // Note that this is not necessarily syntactically the same string @@ -477,10 +479,10 @@ namespace bpkg private: std::string canonical_name_; + protocol proto_; std::string host_; std::uint16_t port_; butl::dir_path path_; - bool secure_; }; inline std::ostream& @@ -509,15 +511,6 @@ namespace bpkg butl::optional summary; butl::optional description; - public: - repository_manifest (manifest_parser&, bool ignore_unknown = false); - repository_manifest (manifest_parser&, - manifest_name_value start, - bool ignore_unknown = false); - - void - serialize (manifest_serializer&) const; - // Return the effective role of the repository. If the role is not // explicitly specified (see the role member above), then calculate // the role based on the location. Specifically, if the location is @@ -525,10 +518,29 @@ namespace bpkg // If the role is specified, then verify that it is consistent with // the location value (that is, base if the location is empty and // prerequisite or complete if not) and return that. Otherwise, - // throw logic_error. + // throw std::logic_error. // repository_role effective_role () const; + + // Return the effective web interface URL based on the specified remote + // repository location. If url is not present or doesn't start with '.', + // then return it unchanged. Otherwise, process the relative format + // as described in the manifest specification. Throw std::invalid_argument + // if the relative url format is invalid or if the repository location is + // empty or local. + // + butl::optional + effective_url (const repository_location&) const; + + public: + repository_manifest (manifest_parser&, bool ignore_unknown = false); + repository_manifest (manifest_parser&, + manifest_name_value start, + bool ignore_unknown = false); + + void + serialize (manifest_serializer&) const; }; class repository_manifests: public std::vector diff --git a/bpkg/manifest.cxx b/bpkg/manifest.cxx index 7f847a3..b7b0aaf 100644 --- a/bpkg/manifest.cxx +++ b/bpkg/manifest.cxx @@ -1188,8 +1188,234 @@ namespace bpkg s.next ("", ""); // End of stream. } + // url_parts + // + struct url_parts + { + using protocol = repository_location::protocol; + + protocol proto; + string host; + uint16_t port; + dir_path path; + + explicit + url_parts (const string&); + }; + + // Return the URL protocol, or nullopt if location is not a URL. + // + static optional + is_url (const string& location) + { + using protocol = url_parts::protocol; + + optional p; + if (::strncasecmp (location.c_str (), "http://", 7) == 0) + p = protocol::http; + else if (::strncasecmp (location.c_str (), "https://", 8) == 0) + p = protocol::https; + + return p; + } + + static string + to_string (url_parts::protocol proto, + const string& host, + uint16_t port, + const dir_path& path) + { + string u ( + (proto == url_parts::protocol::http ? "http://" : "https://") + host); + + if (port != 0) + u += ":" + std::to_string (port); + + if (!path.empty ()) + u += "/" + path.posix_string (); + + return u; + } + + url_parts:: + url_parts (const string& s) + { + optional pr (is_url (s)); + if (!pr) + throw invalid_argument ("invalid protocol"); + + proto = *pr; + + string::size_type host_offset (s.find ("//")); + assert (host_offset != string::npos); + host_offset += 2; + + string::size_type p (s.find ('/', host_offset)); + + if (p != string::npos) + // Chop the path part. Path is saved as a relative one to be of the + // same type on different operating systems including Windows. + // + path = dir_path (s, p + 1, string::npos); + + // Put the lower-cased version of the host part into host. + // Chances are good it will stay unmodified. + // + transform (s.cbegin () + host_offset, + p == string::npos ? s.cend () : s.cbegin () + p, + back_inserter (host), + lowercase); + + // Validate host name according to "2.3.1. Preferred name syntax" and + // "2.3.4. Size limits" of https://tools.ietf.org/html/rfc1035. + // + // Check that there is no empty labels and ones containing chars + // different from alpha-numeric and hyphen. Label should start from + // letter, do not end with hypen and be not longer than 63 chars. + // Total host name length should be not longer than 255 chars. + // + auto hb (host.cbegin ()); + auto he (host.cend ()); + auto ls (hb); // Host domain name label begin. + auto pt (he); // Port begin. + + for (auto i (hb); i != he; ++i) + { + char c (*i); + + if (pt == he) // Didn't reach port specification yet. + { + if (c == ':') // Port specification reached. + pt = i; + else + { + auto n (i + 1); + + // Validate host name. + // + + // Is first label char. + // + bool flc (i == ls); + + // Is last label char. + // + bool llc (n == he || *n == '.' || *n == ':'); + + // Validate char. + // + bool valid (alpha (c) || + (digit (c) && !flc) || + ((c == '-' || c == '.') && !flc && !llc)); + + // Validate length. + // + if (valid) + valid = i - ls < 64 && i - hb < 256; + + if (!valid) + throw invalid_argument ("invalid host"); + + if (c == '.') + ls = n; + } + } + else + { + // Validate port. + // + if (!digit (c)) + throw invalid_argument ("invalid port"); + } + } + + // Chop the port, if present. + // + if (pt == he) + port = 0; + else + { + unsigned long long n (++pt == he ? 0 : stoull (string (pt, he))); + if (n == 0 || n > UINT16_MAX) + throw invalid_argument ("invalid port"); + + port = static_cast (n); + host.resize (pt - hb - 1); + } + + if (host.empty ()) + throw invalid_argument ("invalid host"); + } + // repository_location // + static string + strip_domain (const string& host) + { + assert (!host.empty ()); // Should be repository location host. + + string h; + bool bpkg (false); + + if (host.compare (0, 4, "www.") == 0 || + host.compare (0, 4, "pkg.") == 0 || + (bpkg = host.compare (0, 5, "bpkg.") == 0)) + { + if (h.assign (host, bpkg ? 5 : 4, string::npos).empty ()) + throw invalid_argument ("invalid host"); + } + else + h = host; + + return h; + } + + // The 'pkg' path component stripping mode. + // + enum class strip_mode {version, component, path}; + + static dir_path + strip_path (const dir_path& path, strip_mode mode) + { + // Should be repository location path. + // + assert (!path.empty () && *path.begin () != ".."); + + auto rb (path.rbegin ()), i (rb), re (path.rend ()); + + // Find the version component. + // + for (; i != re; ++i) + { + const string& c (*i); + + if (!c.empty () && c.find_first_not_of ("1234567890") == string::npos) + break; + } + + if (i == re) + throw invalid_argument ("missing repository version"); + + // Validate the version. At the moment the only valid value is 1. + // + if (stoul (*i) != 1) + throw invalid_argument ("unsupported repository version"); + + dir_path res (rb, i); + + // Canonical name prefix part ends with the special "pkg" component. + // + bool pc (++i != re && (*i == "pkg" || *i == "bpkg")); + + if (pc && mode == strip_mode::component) + ++i; // Strip the "pkg" component. + + if (!pc || mode != strip_mode::path) + res = dir_path (i, re) / res; // Concatenate prefix and path parts. + + return res; + } + // Location parameter type is fully qualified as compiler gets confused with // string() member. // @@ -1221,135 +1447,22 @@ namespace bpkg if (!b.empty () && b.relative ()) throw invalid_argument ("base relative filesystem path"); - secure_ = false; - - if (::strncasecmp (l.c_str (), "http://", 7) == 0 || - (secure_ = ::strncasecmp (l.c_str (), "https://", 8) == 0)) + if (is_url (l)) { - // Split location into host, port and path components. Calculate - // canonical name part removing www. and pkg. prefixes. - // - size_t host_offset (secure_ ? 8 : 7); - auto p (l.find ('/', host_offset)); - - // The remote repository location with no path specified is not a valid - // one. Keep the path_ member empty so the later check for emptiness - // will throw invalid_argument exception. - // - if (p != string::npos) - // Chop the path part. Path is saved as a relative one to be of the - // same type on different operating systems including Windows. - // - path_ = dir_path (l, p + 1, string::npos); + url_parts u (l); + proto_ = u.proto; + host_ = move (u.host); + port_ = u.port; + path_ = move (u.path); - // Put the lower-cased version of the host part into host_. - // Chances are good it will stay unmodified. - // - transform (l.cbegin () + host_offset, - p == string::npos ? l.cend () : l.cbegin () + p, - back_inserter (host_), - lowercase); + canonical_name_ = strip_domain (host_); - // Validate host name according to "2.3.1. Preferred name syntax" and - // "2.3.4. Size limits" of https://tools.ietf.org/html/rfc1035. - // - // Check that there is no empty labels and ones containing chars - // different from alpha-numeric and hyphen. Label should start from - // letter, do not end with hypen and be not longer than 63 chars. - // Total host name length should be not longer than 255 chars. + // For canonical name and for the HTTP protocol, treat a.com and + // a.com:80 as the same name. The same rule applies to the HTTPS + // protocol and port 443. // - auto hb (host_.cbegin ()); - auto he (host_.cend ()); - auto ls (hb); // Host domain name label begin. - auto pt (he); // Port begin. - - for (auto i (hb); i != he; ++i) - { - char c (*i); - - if (pt == he) // Didn't reach port specification yet. - { - if (c == ':') // Port specification reached. - pt = i; - else - { - auto n (i + 1); - - // Validate host name. - // - - // Is first label char. - // - bool flc (i == ls); - - // Is last label char. - // - bool llc (n == he || *n == '.' || *n == ':'); - - // Validate char. - // - bool valid (alpha (c) || - (digit (c) && !flc) || - ((c == '-' || c == '.') && !flc && !llc)); - - // Validate length. - // - if (valid) - valid = i - ls < 64 && i - hb < 256; - - if (!valid) - throw invalid_argument ("invalid host"); - - if (c == '.') - ls = n; - } - } - else - { - // Validate port. - // - if (!digit (c)) - throw invalid_argument ("invalid port"); - } - } - - // Chop the port, if present. - // - if (pt == he) - port_ = 0; - else - { - unsigned long long n (++pt == he ? 0 : stoull (string (pt, he))); - if (n == 0 || n > UINT16_MAX) - throw invalid_argument ("invalid port"); - - port_ = static_cast (n); - host_.resize (pt - hb - 1); - } - - if (host_.empty ()) - throw invalid_argument ("invalid host"); - - // Ok, the last thing we need to do is add the host and port - // parts to the canonical_name_ name. Here we also need to - // chop off the special "www" and "pkg" prefixes. Strictly - // speaking we can end up with comething bogus like "com" - // if the host is "pkg.com". - // - bool bpkg (false); - if (host_.compare (0, 4, "www.") == 0 || - host_.compare (0, 4, "pkg.") == 0 || - (bpkg = host_.compare (0, 5, "bpkg.") == 0)) - canonical_name_.assign (host_, bpkg ? 5 : 4, string::npos); - else - canonical_name_ = host_; - - // For canonical name and for the HTTP protocol, treat a.com - // and a.com:80 as the same name. The same rule apply the HTTPS protocol - // and the port 443. - // - if (port_ != 0 && port_ != (secure_ ? 443 : 80)) - canonical_name_ += ':' + to_string (port_); + if (port_ != 0 && port_ != (proto_ == protocol::http ? 80 : 443)) + canonical_name_ += ':' + std::to_string (port_); } else { @@ -1361,10 +1474,10 @@ namespace bpkg { // Convert the relative path location to an absolute or remote one. // + proto_ = b.proto_; host_ = b.host_; port_ = b.port_; path_ = b.path_ / path_; - secure_ = b.secure_; // Set canonical name to the base location canonical name host // part. The path part of the canonical name is calculated below. @@ -1412,49 +1525,18 @@ namespace bpkg return; } - // Search for the version path component preceeding canonical name - // component. + // Canonical name / part. // - auto rb (path_.rbegin ()), i (rb), re (path_.rend ()); - - // Find the version component. - // - for (; i != re; ++i) - { - const string& c (*i); - - if (!c.empty () && c.find_first_not_of ("1234567890") == string::npos) - break; - } - - if (i == re) - throw invalid_argument ("missing repository version"); - - // Validate the version. At the moment the only valid value is 1. - // - if (stoul (*i) != 1) - throw invalid_argument ("unsupported repository version"); - - dir_path p (rb, i); // Canonical name path part. - - // Prefix ends with "pkg" component. - // - bool pc (++i != re && (*i == "pkg" || *i == "bpkg")); - - if (pc) - ++i; // Skip "pkg" component from prefix. - - if (!host_.empty () || !pc) - p = dir_path (i, re) / p; // Concatenate prefix and path. + string cp ( + strip_path (path_, remote () ? strip_mode::component : strip_mode::path). + posix_string ()); // Note: allow empty paths (e.g., http://stable.cppget.org/1/). // - string d (p.posix_string ()); - - if (!canonical_name_.empty () && !d.empty ()) // If we have host and dir. + if (!canonical_name_.empty () && !cp.empty ()) // If we have host and dir. canonical_name_ += '/'; - canonical_name_ += d; + canonical_name_ += cp; // But don't allow empty canonical names. // @@ -1465,20 +1547,13 @@ namespace bpkg string repository_location:: string () const { - using std::string; // Also function name. - if (empty ()) - return string (); + return std::string (); // Also function name. if (local ()) return path_.string (); - string p ((secure_ ? "https://" : "http://") + host_); - - if (port_ != 0) - p += ":" + to_string (port_); - - return p + "/" + path_.posix_string (); + return to_string (proto_, host_, port_, path_); } // repository_manifest @@ -1693,6 +1768,73 @@ namespace bpkg : repository_role::prerequisite; } + optional repository_manifest:: + effective_url (const repository_location& l) const + { + if (!url || (*url)[0] != '.') + return url; + + const dir_path rp (*url); + auto i (rp.begin ()); + + static const char* invalid_url ("invalid relative url"); + + auto strip([&i, &rp]() -> bool { + if (i != rp.end ()) + { + const auto& c (*i++); + if (c == "..") + return true; + + if (c == ".") + return false; + } + + throw invalid_argument (invalid_url); + }); + + bool strip_d (strip ()); // Strip domain. + bool strip_p (strip ()); // Strip path. + + // The web interface relative path with the special first two components + // stripped. + // + const dir_path rpath (i, rp.end ()); + assert (rpath.relative ()); + + url_parts u (l.string ()); + + // Web interface URL path part. + // + // It is important to call strip_path() before appending the relative path. + // Otherwise the effective URL for the path ./../../.. and the repository + // location http://a.com/foo/pkg/1/math will wrongly be + // http://a.com/foo/pkg instead of http://a.com. + // + dir_path ipath ( + strip_path ( + u.path, strip_p ? strip_mode::component : strip_mode::version) / rpath); + + static const char* invalid_location ("invalid repository location"); + + try + { + ipath.normalize (); + } + catch (const invalid_path&) + { + throw invalid_argument (invalid_location); + } + + assert (ipath.relative ()); + + if (!ipath.empty () && *ipath.begin () == "..") + throw invalid_argument (invalid_location); + + return to_string ( + u.proto, strip_d ? strip_domain (u.host) : u.host, u.port, ipath); + } + // repository_manifests // repository_manifests:: diff --git a/tests/repository-location/driver.cxx b/tests/repository-location/driver.cxx index 908b243..58aa5ed 100644 --- a/tests/repository-location/driver.cxx +++ b/tests/repository-location/driver.cxx @@ -4,13 +4,18 @@ #include #include +#include #include #include #include // invalid_argument +#include + #include +#include using namespace std; +using namespace butl; using namespace bpkg; static bool @@ -35,15 +40,46 @@ bad_location (const string& l, const repository_location& b) repository_location bl (l, b); return false; } - catch (const invalid_argument) + catch (const invalid_argument&) { return true; } } +static string +effective_url (const string& l, const repository_location& r) +{ + istringstream is (":1\nurl: " + l); + manifest_parser mp (is, ""); + repository_manifest m (mp); + + optional u (m.effective_url (r)); + assert (u); + return *u; +} + +static bool +bad_url (const string& l, const repository_location& r) +{ + try + { + effective_url (l, r); + return false; + } + catch (const invalid_argument&) + { + } + catch (const logic_error&) + { + } + return true; +} + int main (int argc, char* argv[]) { + using protocol = repository_location::protocol; + if (argc != 1) { cerr << "usage: " << argv[0] << endl; @@ -123,6 +159,38 @@ main (int argc, char* argv[]) repository_location ( "http://stable.cppget.org/1/misc"))); + // Invalid web interface URL. + // + assert (bad_url (".a/..", + repository_location ( + "http://stable.cppget.org/1/misc"))); + + assert (bad_url ("../a/..", + repository_location ( + "http://stable.cppget.org/1/misc"))); + + assert (bad_url ("../.a", + repository_location ( + "http://stable.cppget.org/1/misc"))); + + assert (bad_url ("../../..", + repository_location ( + "/var/1/misc"))); + + assert (bad_url ("../../..", + repository_location ( + "/var/pkg/1/misc"))); + + assert (bad_url ("../../..", repository_location ())); + + assert (bad_url ("../../../../..", + repository_location ( + "http://pkg.stable.cppget.org/foo/pkg/1/misc"))); + + assert (bad_url ("../../../../../abc", + repository_location ( + "http://stable.cppget.org/foo/1/misc"))); + // Test valid locations. // { @@ -133,6 +201,7 @@ main (int argc, char* argv[]) { repository_location l ("1/aa/bb", repository_location ()); assert (l.string () == "1/aa/bb"); + // Relative locations have no canonical name. // assert (l.canonical_name ().empty ()); @@ -181,7 +250,8 @@ main (int argc, char* argv[]) assert (l.canonical_name () == "bb"); } { - repository_location l ("../c/../c/./1/aa/../bb", repository_location ()); + repository_location l ("../c/../c/./1/aa/../bb", + repository_location ()); assert (l.string () == "../c/1/bb"); assert (l.canonical_name ().empty ()); } @@ -189,37 +259,37 @@ main (int argc, char* argv[]) repository_location l ("http://www.a.com:80/1/aa/bb"); assert (l.string () == "http://www.a.com:80/1/aa/bb"); assert (l.canonical_name () == "a.com/aa/bb"); - assert (!l.secure ()); + assert (l.proto () == protocol::http); } { repository_location l ("https://www.a.com:443/1/aa/bb"); assert (l.string () == "https://www.a.com:443/1/aa/bb"); assert (l.canonical_name () == "a.com/aa/bb"); - assert (l.secure ()); + assert (l.proto () == protocol::https); } { repository_location l ("http://www.a.com:8080/dd/1/aa/bb"); assert (l.string () == "http://www.a.com:8080/dd/1/aa/bb"); assert (l.canonical_name () == "a.com:8080/dd/aa/bb"); - assert (!l.secure ()); + assert (l.proto () == protocol::http); } { repository_location l ("http://www.a.com:8080/dd/pkg/1/aa/bb"); assert (l.string () == "http://www.a.com:8080/dd/pkg/1/aa/bb"); assert (l.canonical_name () == "a.com:8080/dd/aa/bb"); - assert (!l.secure ()); + assert (l.proto () == protocol::http); } { repository_location l ("http://www.a.com:8080/bpkg/dd/1/aa/bb"); assert (l.string () == "http://www.a.com:8080/bpkg/dd/1/aa/bb"); assert (l.canonical_name () == "a.com:8080/bpkg/dd/aa/bb"); - assert (!l.secure ()); + assert (l.proto () == protocol::http); } { repository_location l ("https://www.a.com:444/dd/1/aa/bb"); assert (l.string () == "https://www.a.com:444/dd/1/aa/bb"); assert (l.canonical_name () == "a.com:444/dd/aa/bb"); - assert (l.secure ()); + assert (l.proto () == protocol::https); } { repository_location l ("http://a.com/a/b/../c/1/aa/../bb"); @@ -299,14 +369,14 @@ main (int argc, char* argv[]) repository_location l2 ("../1/math", l1); assert (l2.string () == "http://www.stable.cppget.org:8080/1/math"); assert (l2.canonical_name () == "stable.cppget.org:8080/math"); - assert (!l2.secure ()); + assert (l2.proto () == protocol::http); } { repository_location l1 ("https://www.stable.cppget.org:444/1"); repository_location l2 ("../1/math", l1); assert (l2.string () == "https://www.stable.cppget.org:444/1/math"); assert (l2.canonical_name () == "stable.cppget.org:444/math"); - assert (l2.secure ()); + assert (l2.proto () == protocol::https); } { repository_location l1 ("/var/r1/1/misc"); @@ -351,6 +421,82 @@ main (int argc, char* argv[]) assert (l1.string () == l2.string ()); assert (l1.canonical_name () == l2.canonical_name ()); } + + // Test valid web interface locations. + // + { + repository_location l ("http://cppget.org/1/misc"); + assert (effective_url ("http://cppget.org/pkg", l) == + "http://cppget.org/pkg"); + } + { + repository_location l ("http://cppget.org/1/misc"); + assert (effective_url ("https://cppget.org/pkg", l) == + "https://cppget.org/pkg"); + } + { + repository_location l ("http://pkg.cppget.org/foo/pkg/1/misc/stable"); + assert (effective_url ("./.", l) == + "http://pkg.cppget.org/foo/pkg/misc/stable"); + } + { + repository_location l ("http://cppget.org/foo/1/misc/stable"); + assert (effective_url ("./.", l) == + "http://cppget.org/foo/misc/stable"); + } + { + repository_location l ("http://pkg.cppget.org/foo/pkg/1/misc/stable"); + assert (effective_url ("././..", l) == + "http://pkg.cppget.org/foo/pkg/misc"); + } + { + repository_location l ("http://pkg.cppget.org/foo/pkg/1/misc"); + assert (effective_url ("././../../..", l) == "http://pkg.cppget.org"); + } + { + repository_location l ("https://pkg.cppget.org/foo/pkg/1/misc/stable"); + assert (effective_url ("../.", l) == + "https://cppget.org/foo/pkg/misc/stable"); + } + { + repository_location l ("https://pkg.cppget.org/foo/pkg/1/misc/stable"); + assert (effective_url (".././..", l) == + "https://cppget.org/foo/pkg/misc"); + } + { + repository_location l ("https://bpkg.cppget.org/foo/bpkg/1/misc/stable"); + assert (effective_url ("./..", l) == + "https://bpkg.cppget.org/foo/misc/stable"); + } + { + repository_location l ("https://bpkg.cppget.org/foo/bpkg/1/misc/stable"); + assert (effective_url ("./../..", l) == + "https://bpkg.cppget.org/foo/misc"); + } + { + repository_location l ("http://www.cppget.org/foo/bpkg/1/misc/stable"); + assert (effective_url ("../..", l) == + "http://cppget.org/foo/misc/stable"); + } + { + repository_location l ("http://cppget.org/pkg/foo/1/misc/stable"); + assert (effective_url ("../..", l) == + "http://cppget.org/pkg/foo/misc/stable"); + } + { + repository_location l ("http://www.cppget.org/foo/bpkg/1/misc/stable"); + assert (effective_url ("../../..", l) == + "http://cppget.org/foo/misc"); + } + { + repository_location l ("http://pkg.cppget.org/foo/pkg/1/misc"); + assert (effective_url ("../../../..", l) == "http://cppget.org"); + } + { + repository_location l ("http://www.cppget.org/foo/bpkg/1/misc/stable"); + assert (effective_url ("../../../abc", l) == + "http://cppget.org/foo/misc/abc"); + } } catch (const exception& e) { -- cgit v1.1