diff options
Diffstat (limited to 'bpkg/manifest.cxx')
-rw-r--r-- | bpkg/manifest.cxx | 486 |
1 files changed, 314 insertions, 172 deletions
diff --git a/bpkg/manifest.cxx b/bpkg/manifest.cxx index 7f847a3..b7b0aaf 100644 --- a/bpkg/manifest.cxx +++ b/bpkg/manifest.cxx @@ -1188,8 +1188,234 @@ namespace bpkg s.next ("", ""); // End of stream. } + // url_parts + // + struct url_parts + { + using protocol = repository_location::protocol; + + protocol proto; + string host; + uint16_t port; + dir_path path; + + explicit + url_parts (const string&); + }; + + // Return the URL protocol, or nullopt if location is not a URL. + // + static optional<url_parts::protocol> + is_url (const string& location) + { + using protocol = url_parts::protocol; + + optional<protocol> p; + if (::strncasecmp (location.c_str (), "http://", 7) == 0) + p = protocol::http; + else if (::strncasecmp (location.c_str (), "https://", 8) == 0) + p = protocol::https; + + return p; + } + + static string + to_string (url_parts::protocol proto, + const string& host, + uint16_t port, + const dir_path& path) + { + string u ( + (proto == url_parts::protocol::http ? "http://" : "https://") + host); + + if (port != 0) + u += ":" + std::to_string (port); + + if (!path.empty ()) + u += "/" + path.posix_string (); + + return u; + } + + url_parts:: + url_parts (const string& s) + { + optional<protocol> pr (is_url (s)); + if (!pr) + throw invalid_argument ("invalid protocol"); + + proto = *pr; + + string::size_type host_offset (s.find ("//")); + assert (host_offset != string::npos); + host_offset += 2; + + string::size_type p (s.find ('/', host_offset)); + + if (p != string::npos) + // Chop the path part. Path is saved as a relative one to be of the + // same type on different operating systems including Windows. + // + path = dir_path (s, p + 1, string::npos); + + // Put the lower-cased version of the host part into host. + // Chances are good it will stay unmodified. + // + transform (s.cbegin () + host_offset, + p == string::npos ? s.cend () : s.cbegin () + p, + back_inserter (host), + lowercase); + + // Validate host name according to "2.3.1. Preferred name syntax" and + // "2.3.4. Size limits" of https://tools.ietf.org/html/rfc1035. + // + // Check that there is no empty labels and ones containing chars + // different from alpha-numeric and hyphen. Label should start from + // letter, do not end with hypen and be not longer than 63 chars. + // Total host name length should be not longer than 255 chars. + // + auto hb (host.cbegin ()); + auto he (host.cend ()); + auto ls (hb); // Host domain name label begin. + auto pt (he); // Port begin. + + for (auto i (hb); i != he; ++i) + { + char c (*i); + + if (pt == he) // Didn't reach port specification yet. + { + if (c == ':') // Port specification reached. + pt = i; + else + { + auto n (i + 1); + + // Validate host name. + // + + // Is first label char. + // + bool flc (i == ls); + + // Is last label char. + // + bool llc (n == he || *n == '.' || *n == ':'); + + // Validate char. + // + bool valid (alpha (c) || + (digit (c) && !flc) || + ((c == '-' || c == '.') && !flc && !llc)); + + // Validate length. + // + if (valid) + valid = i - ls < 64 && i - hb < 256; + + if (!valid) + throw invalid_argument ("invalid host"); + + if (c == '.') + ls = n; + } + } + else + { + // Validate port. + // + if (!digit (c)) + throw invalid_argument ("invalid port"); + } + } + + // Chop the port, if present. + // + if (pt == he) + port = 0; + else + { + unsigned long long n (++pt == he ? 0 : stoull (string (pt, he))); + if (n == 0 || n > UINT16_MAX) + throw invalid_argument ("invalid port"); + + port = static_cast<uint16_t> (n); + host.resize (pt - hb - 1); + } + + if (host.empty ()) + throw invalid_argument ("invalid host"); + } + // repository_location // + static string + strip_domain (const string& host) + { + assert (!host.empty ()); // Should be repository location host. + + string h; + bool bpkg (false); + + if (host.compare (0, 4, "www.") == 0 || + host.compare (0, 4, "pkg.") == 0 || + (bpkg = host.compare (0, 5, "bpkg.") == 0)) + { + if (h.assign (host, bpkg ? 5 : 4, string::npos).empty ()) + throw invalid_argument ("invalid host"); + } + else + h = host; + + return h; + } + + // The 'pkg' path component stripping mode. + // + enum class strip_mode {version, component, path}; + + static dir_path + strip_path (const dir_path& path, strip_mode mode) + { + // Should be repository location path. + // + assert (!path.empty () && *path.begin () != ".."); + + auto rb (path.rbegin ()), i (rb), re (path.rend ()); + + // Find the version component. + // + for (; i != re; ++i) + { + const string& c (*i); + + if (!c.empty () && c.find_first_not_of ("1234567890") == string::npos) + break; + } + + if (i == re) + throw invalid_argument ("missing repository version"); + + // Validate the version. At the moment the only valid value is 1. + // + if (stoul (*i) != 1) + throw invalid_argument ("unsupported repository version"); + + dir_path res (rb, i); + + // Canonical name prefix part ends with the special "pkg" component. + // + bool pc (++i != re && (*i == "pkg" || *i == "bpkg")); + + if (pc && mode == strip_mode::component) + ++i; // Strip the "pkg" component. + + if (!pc || mode != strip_mode::path) + res = dir_path (i, re) / res; // Concatenate prefix and path parts. + + return res; + } + // Location parameter type is fully qualified as compiler gets confused with // string() member. // @@ -1221,135 +1447,22 @@ namespace bpkg if (!b.empty () && b.relative ()) throw invalid_argument ("base relative filesystem path"); - secure_ = false; - - if (::strncasecmp (l.c_str (), "http://", 7) == 0 || - (secure_ = ::strncasecmp (l.c_str (), "https://", 8) == 0)) + if (is_url (l)) { - // Split location into host, port and path components. Calculate - // canonical name <host> part removing www. and pkg. prefixes. - // - size_t host_offset (secure_ ? 8 : 7); - auto p (l.find ('/', host_offset)); - - // The remote repository location with no path specified is not a valid - // one. Keep the path_ member empty so the later check for emptiness - // will throw invalid_argument exception. - // - if (p != string::npos) - // Chop the path part. Path is saved as a relative one to be of the - // same type on different operating systems including Windows. - // - path_ = dir_path (l, p + 1, string::npos); + url_parts u (l); + proto_ = u.proto; + host_ = move (u.host); + port_ = u.port; + path_ = move (u.path); - // Put the lower-cased version of the host part into host_. - // Chances are good it will stay unmodified. - // - transform (l.cbegin () + host_offset, - p == string::npos ? l.cend () : l.cbegin () + p, - back_inserter (host_), - lowercase); + canonical_name_ = strip_domain (host_); - // Validate host name according to "2.3.1. Preferred name syntax" and - // "2.3.4. Size limits" of https://tools.ietf.org/html/rfc1035. - // - // Check that there is no empty labels and ones containing chars - // different from alpha-numeric and hyphen. Label should start from - // letter, do not end with hypen and be not longer than 63 chars. - // Total host name length should be not longer than 255 chars. + // For canonical name and for the HTTP protocol, treat a.com and + // a.com:80 as the same name. The same rule applies to the HTTPS + // protocol and port 443. // - auto hb (host_.cbegin ()); - auto he (host_.cend ()); - auto ls (hb); // Host domain name label begin. - auto pt (he); // Port begin. - - for (auto i (hb); i != he; ++i) - { - char c (*i); - - if (pt == he) // Didn't reach port specification yet. - { - if (c == ':') // Port specification reached. - pt = i; - else - { - auto n (i + 1); - - // Validate host name. - // - - // Is first label char. - // - bool flc (i == ls); - - // Is last label char. - // - bool llc (n == he || *n == '.' || *n == ':'); - - // Validate char. - // - bool valid (alpha (c) || - (digit (c) && !flc) || - ((c == '-' || c == '.') && !flc && !llc)); - - // Validate length. - // - if (valid) - valid = i - ls < 64 && i - hb < 256; - - if (!valid) - throw invalid_argument ("invalid host"); - - if (c == '.') - ls = n; - } - } - else - { - // Validate port. - // - if (!digit (c)) - throw invalid_argument ("invalid port"); - } - } - - // Chop the port, if present. - // - if (pt == he) - port_ = 0; - else - { - unsigned long long n (++pt == he ? 0 : stoull (string (pt, he))); - if (n == 0 || n > UINT16_MAX) - throw invalid_argument ("invalid port"); - - port_ = static_cast<uint16_t> (n); - host_.resize (pt - hb - 1); - } - - if (host_.empty ()) - throw invalid_argument ("invalid host"); - - // Ok, the last thing we need to do is add the host and port - // parts to the canonical_name_ name. Here we also need to - // chop off the special "www" and "pkg" prefixes. Strictly - // speaking we can end up with comething bogus like "com" - // if the host is "pkg.com". - // - bool bpkg (false); - if (host_.compare (0, 4, "www.") == 0 || - host_.compare (0, 4, "pkg.") == 0 || - (bpkg = host_.compare (0, 5, "bpkg.") == 0)) - canonical_name_.assign (host_, bpkg ? 5 : 4, string::npos); - else - canonical_name_ = host_; - - // For canonical name and for the HTTP protocol, treat a.com - // and a.com:80 as the same name. The same rule apply the HTTPS protocol - // and the port 443. - // - if (port_ != 0 && port_ != (secure_ ? 443 : 80)) - canonical_name_ += ':' + to_string (port_); + if (port_ != 0 && port_ != (proto_ == protocol::http ? 80 : 443)) + canonical_name_ += ':' + std::to_string (port_); } else { @@ -1361,10 +1474,10 @@ namespace bpkg { // Convert the relative path location to an absolute or remote one. // + proto_ = b.proto_; host_ = b.host_; port_ = b.port_; path_ = b.path_ / path_; - secure_ = b.secure_; // Set canonical name to the base location canonical name host // part. The path part of the canonical name is calculated below. @@ -1412,49 +1525,18 @@ namespace bpkg return; } - // Search for the version path component preceeding canonical name - // <path> component. + // Canonical name <prefix>/<path> part. // - auto rb (path_.rbegin ()), i (rb), re (path_.rend ()); - - // Find the version component. - // - for (; i != re; ++i) - { - const string& c (*i); - - if (!c.empty () && c.find_first_not_of ("1234567890") == string::npos) - break; - } - - if (i == re) - throw invalid_argument ("missing repository version"); - - // Validate the version. At the moment the only valid value is 1. - // - if (stoul (*i) != 1) - throw invalid_argument ("unsupported repository version"); - - dir_path p (rb, i); // Canonical name path part. - - // Prefix ends with "pkg" component. - // - bool pc (++i != re && (*i == "pkg" || *i == "bpkg")); - - if (pc) - ++i; // Skip "pkg" component from prefix. - - if (!host_.empty () || !pc) - p = dir_path (i, re) / p; // Concatenate prefix and path. + string cp ( + strip_path (path_, remote () ? strip_mode::component : strip_mode::path). + posix_string ()); // Note: allow empty paths (e.g., http://stable.cppget.org/1/). // - string d (p.posix_string ()); - - if (!canonical_name_.empty () && !d.empty ()) // If we have host and dir. + if (!canonical_name_.empty () && !cp.empty ()) // If we have host and dir. canonical_name_ += '/'; - canonical_name_ += d; + canonical_name_ += cp; // But don't allow empty canonical names. // @@ -1465,20 +1547,13 @@ namespace bpkg string repository_location:: string () const { - using std::string; // Also function name. - if (empty ()) - return string (); + return std::string (); // Also function name. if (local ()) return path_.string (); - string p ((secure_ ? "https://" : "http://") + host_); - - if (port_ != 0) - p += ":" + to_string (port_); - - return p + "/" + path_.posix_string (); + return to_string (proto_, host_, port_, path_); } // repository_manifest @@ -1693,6 +1768,73 @@ namespace bpkg : repository_role::prerequisite; } + optional<string> repository_manifest:: + effective_url (const repository_location& l) const + { + if (!url || (*url)[0] != '.') + return url; + + const dir_path rp (*url); + auto i (rp.begin ()); + + static const char* invalid_url ("invalid relative url"); + + auto strip([&i, &rp]() -> bool { + if (i != rp.end ()) + { + const auto& c (*i++); + if (c == "..") + return true; + + if (c == ".") + return false; + } + + throw invalid_argument (invalid_url); + }); + + bool strip_d (strip ()); // Strip domain. + bool strip_p (strip ()); // Strip path. + + // The web interface relative path with the special first two components + // stripped. + // + const dir_path rpath (i, rp.end ()); + assert (rpath.relative ()); + + url_parts u (l.string ()); + + // Web interface URL path part. + // + // It is important to call strip_path() before appending the relative path. + // Otherwise the effective URL for the path ./../../.. and the repository + // location http://a.com/foo/pkg/1/math will wrongly be + // http://a.com/foo/pkg instead of http://a.com. + // + dir_path ipath ( + strip_path ( + u.path, strip_p ? strip_mode::component : strip_mode::version) / rpath); + + static const char* invalid_location ("invalid repository location"); + + try + { + ipath.normalize (); + } + catch (const invalid_path&) + { + throw invalid_argument (invalid_location); + } + + assert (ipath.relative ()); + + if (!ipath.empty () && *ipath.begin () == "..") + throw invalid_argument (invalid_location); + + return to_string ( + u.proto, strip_d ? strip_domain (u.host) : u.host, u.port, ipath); + } + // repository_manifests // repository_manifests:: |