From 4fe188dee733c28b8b7d2d6b3e7d7904d1d30b65 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sun, 28 Jan 2018 00:56:14 +0300 Subject: Fix repository_url ctor to normalize path --- libbpkg/manifest.cxx | 34 +++++++++++++++++++++++++--- libbpkg/manifest.hxx | 21 ++++++++++++++++- tests/repository-location/driver.cxx | 44 ++++++++++++++++++++++++++++++++++++ 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/libbpkg/manifest.cxx b/libbpkg/manifest.cxx index 6a5ff23..11282d0 100644 --- a/libbpkg/manifest.cxx +++ b/libbpkg/manifest.cxx @@ -1446,6 +1446,20 @@ namespace bpkg if (path->absolute ()) bad_url ("absolute path"); + + try + { + path->normalize (false /* actual */, true /* cur_empty */); + } + catch (const invalid_path& e) + { + assert (false); // Can't be here as the path is relative. + } + + // URL shouldn't go past the root directory of a WEB server. + // + if (!path->empty () && *path->begin () == "..") + bad_url ("invalid path"); }; if (casecmp (scheme, "http") == 0) @@ -1495,6 +1509,17 @@ namespace bpkg bad_url ("relative path"); #endif + assert (path->absolute ()); + + try + { + path->normalize (); + } + catch (const invalid_path& e) + { + bad_url ("invalid path"); // Goes past the root directory. + } + if (query) bad_url (); @@ -1506,7 +1531,7 @@ namespace bpkg { try { - path = path_type (url); + path = path_type (url).normalize (); } catch (const invalid_path&) { @@ -1836,9 +1861,12 @@ namespace bpkg // and canonical name. So a/b/../c/1/x/../y and a/c/1/y to be considered // as same location. // + // Note that we need to collapse 'example.com/a/..' to 'example.com/', + // rather than to 'example.com/.'. + // try { - up.normalize (); + up.normalize (false /* actual */, remote () /* cur_empty */); } catch (const invalid_path&) { @@ -2216,7 +2244,7 @@ namespace bpkg try { - ipath.normalize (false, true); // Current dir collapses to an empty one. + ipath.normalize (false /* actual */, true /* cur_empty */); } catch (const invalid_path&) { diff --git a/libbpkg/manifest.hxx b/libbpkg/manifest.hxx index e00e551..8c20e76 100644 --- a/libbpkg/manifest.hxx +++ b/libbpkg/manifest.hxx @@ -467,7 +467,9 @@ namespace bpkg // Notes: // // - For an empty URL object all components are absent. For a non-empty one - // the directory path is always present and normalized. + // the path is always present and normalized. The string representation of + // non-empty object with non-empty path never contains the trailing slash + // (except for the root path on POSIX system). // // - For the remote URL object the host name is in the lower case (IPv4/6 are // not supported) and the path is relative. @@ -508,6 +510,23 @@ namespace bpkg // the URL matches the repository type. Throw std::invalid_argument if the // URL object is a relative local path. // + // @@ Note that the repository location string representation may differ + // from the original URL in the presence of the trailing slash. This + // may cause problems with some WEB servers that are sensitive to the + // trailing slash presence/absence. For example: + // + // $ git clone http://git.sv.gnu.org/r/config.git + // warning: redirecting to http://git.savannah.gnu.org/r/config.git/ + // + // Also note that we disregard the slash presence/absence on multiple + // levels: + // + // - reduce absent path to an empty one in + // repository_url_traits::translate_scheme() (so a.com/ becomes a.com) + // - use path::*string() rather than path::*representation() functions + // in repository_url_traits::translate_*() functions + // - may append slash in repository_location ctor + // explicit repository_location (repository_url, repository_type); diff --git a/tests/repository-location/driver.cxx b/tests/repository-location/driver.cxx index 18ccd2a..098ecab 100644 --- a/tests/repository-location/driver.cxx +++ b/tests/repository-location/driver.cxx @@ -439,6 +439,14 @@ namespace bpkg assert (l.canonical_name () == "git:example.com"); } { + repository_url u ("http://git.example.com/a/#master"); + *u.path /= path (".."); + + repository_location l (u, repository_type::git); + assert (l.string () == "http://git.example.com/#master"); + assert (l.canonical_name () == "git:example.com"); + } + { repository_location l (loc ("http://a.com/a/b/../c/1/aa/../bb")); assert (l.string () == "http://a.com/a/c/1/bb"); assert (l.canonical_name () == "bpkg:a.com/a/c/bb"); @@ -740,6 +748,42 @@ namespace bpkg repository_url::host_type ("example.com"), dir_path ("test.git"))); + // For an empty URL object all components are absent. + // + { + repository_url u; + assert (u.empty () && + !u.authority && !u.path && !u.query && !u.fragment); + } + + // Absent path. + // + assert (repository_url ("git://example.com").string () == + "git://example.com/"); + + // Empty path. + // + assert (repository_url ("git://example.com/").string () == + "git://example.com/"); + + // Normalized path. + // + assert (repository_url ("git://example.com/a/..").string () == + "git://example.com/"); + + // No trailing slash. + // + assert (repository_url ("git://example.com/a/").string () == + "git://example.com/a"); + + assert (repository_url ("a/").string () == "a"); + +#ifndef _WIN32 + assert (repository_url ("/a/").string () == "/a"); +#else + assert (repository_url ("c:/a/").string () == "c:\\a"); +#endif + return 0; } } -- cgit v1.1