aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2018-09-29 00:12:26 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2018-10-09 21:07:02 +0300
commitec9c6f1bbdfd3d86fba493ea56473c0aaf9acad1 (patch)
tree85f063f5fa0ea307d0c8f67e39fdfdd93731cb89
parent758b64bcf02d9269ae4765ab98d7d46396b3e69a (diff)
Add support for rootless URLs
-rw-r--r--libbutl/url.ixx36
-rw-r--r--libbutl/url.mxx122
-rw-r--r--libbutl/url.txx87
-rw-r--r--tests/url/driver.cxx86
-rw-r--r--tests/url/testscript21
5 files changed, 234 insertions, 118 deletions
diff --git a/libbutl/url.ixx b/libbutl/url.ixx
index 2feb347..7a6d86f 100644
--- a/libbutl/url.ixx
+++ b/libbutl/url.ixx
@@ -4,6 +4,28 @@
LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
{
+ // url_traits
+ //
+ template <typename H, typename S, typename P>
+ inline typename url_traits<H, S, P>::path_type url_traits<H, S, P>::
+ translate_path (string_type&& path)
+ {
+ return path_type (basic_url<string_type>::decode (path));
+ }
+
+ template <typename H, typename S, typename P>
+ inline typename url_traits<H, S, P>::string_type url_traits<H, S, P>::
+ translate_path (const path_type& path)
+ {
+ using url = basic_url<string_type>;
+
+ return url::encode (
+ string_type (path),
+ [] (typename url::char_type& c) {return !url::path_char (c);});
+ }
+
+ // basic_url
+ //
template <typename S, typename T>
inline basic_url<S, T>::
basic_url (scheme_type s,
@@ -81,4 +103,18 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
std::move (f))
{
}
+
+ template <typename S, typename T>
+ inline basic_url<S, T>::
+ basic_url (scheme_type s,
+ optional<path_type> p,
+ optional<string_type> q,
+ optional<string_type> f)
+ : scheme (std::move (s)),
+ path (std::move (p)),
+ query (std::move (q)),
+ fragment (std::move (f)),
+ rootless (true)
+ {
+ }
}
diff --git a/libbutl/url.mxx b/libbutl/url.mxx
index c696eaa..6e7acde 100644
--- a/libbutl/url.mxx
+++ b/libbutl/url.mxx
@@ -44,7 +44,9 @@ LIBBUTL_MODEXPORT namespace butl
{
// RFC3986 Uniform Resource Locator (URL).
//
- // <url> = <scheme>:[//[<authority>]][/<path>][?<query>][#<fragment>]
+ // <url> = <scheme>:[//[<authority>]][/<path>][?<query>][#<fragment>] |
+ // <scheme>:[<path>][?<query>][#<fragment>]
+ //
// <authority> = [<user>@]<host>[:<port>]
//
// Some examples of equivalent URLs to meditate upon:
@@ -72,10 +74,22 @@ LIBBUTL_MODEXPORT namespace butl
// can choose to use the path type if local paths are to be interpreted as
// relative to the host.
//
- // Note that we currently forbid one character schemes to support scheme-
- // less (Windows) paths which can be done by url_traits::translate_scheme()
- // (see below). (A Windows path that uses forward slashes would be parsed as
- // a valid authority-less URL).
+ // For authority-less schemes the <path> component is also represented as a
+ // relative path. Some examples of such URLs (let's call them rootless
+ // rather than authority-less not to confuse with a case where authority is
+ // empty/implied):
+ //
+ // pkcs11:token=sign;object=SIGN%20key
+ // pkcs11:id=%02%38%01?pin-value=12345
+ // pkcs11:
+ //
+ // Note that a scheme can theoretically allow both rootless and "rootfull"
+ // representations.
+ //
+ // Note also that we currently forbid one character schemes to support
+ // scheme- less (Windows) paths which can be done by
+ // url_traits::translate_scheme() (see below). (A Windows path that uses
+ // forward slashes would be parsed as a valid authority-less URL).
// URL host component can be an IPv4 address (if matches its dotted-decimal
// notation), an IPv6 address (if enclosed in [square brackets]) or
@@ -165,23 +179,25 @@ LIBBUTL_MODEXPORT namespace butl
using authority_type = basic_url_authority<string_type>;
// Translate the scheme string representation to its type. May throw
- // std::invalid_argument. May change the URL components.
+ // std::invalid_argument. May change the URL components. Should not return
+ // nullopt if called with a non-empty scheme.
//
// This function is called with an empty scheme if the URL has no scheme,
// the scheme is invalid, or it could not be parsed into components
// according to the URL syntax. In this case all the passed components
- // reference empty/absent values and if they remain unchanged on return,
- // the URL is considered invalid and the std::invalid_argument exception
- // with an appropriate description is thrown by the URL object constructor.
- // This can be used to support scheme-less URLs, local paths, etc.
+ // reference empty/absent/false values. If nullopt is returned, the URL is
+ // considered invalid and the std::invalid_argument exception with an
+ // appropriate description is thrown by the URL object constructor. This
+ // can be used to support scheme-less URLs, local paths, etc.
//
- static scheme_type
+ static optional<scheme_type>
translate_scheme (const string_type& /*url*/,
string_type&& scheme,
optional<authority_type>& /*authority*/,
optional<path_type>& /*path*/,
optional<string_type>& /*query*/,
- optional<string_type>& /*fragment*/)
+ optional<string_type>& /*fragment*/,
+ bool& /*rootless*/)
{
return scheme_type (std::move (scheme));
}
@@ -195,33 +211,36 @@ LIBBUTL_MODEXPORT namespace butl
//
static string_type
translate_scheme (string_type&, /*url*/
- const scheme_type& scheme,
+ const scheme_type& scheme,
const optional<authority_type>& /*authority*/,
const optional<path_type>& /*path*/,
const optional<string_type>& /*query*/,
- const optional<string_type>& /*fragment*/)
+ const optional<string_type>& /*fragment*/,
+ bool /*rootless*/)
{
return string_type (scheme);
}
- // Translate the path string representation to its type.
+ // Translate the URL-encoded path string representation to its type.
+ //
+ // Note that encoding for non-ASCII paths is not specified (in contrast
+ // to the host name), and presumably is local to the referenced authority.
+ // Furthermore, for some schemes, the path component can contain encoded
+ // binary data, for example for pkcs11.
//
static path_type
- translate_path (string_type&& path)
- {
- return path_type (std::move (path));
- }
+ translate_path (string_type&&);
- // Translate path type back to its string representation.
+ // Translate path type back to its URL-encoded string representation.
//
static string_type
- translate_path (const path_type& path) {return string_type (path);}
+ translate_path (const path_type&);
- // Check whether a string looks like a URL by searching for the first ':'
- // (unless its position is specified with the second argument) and then
- // making sure it's followed by '/' (e.g., http:// or file:/) and preceded
- // by the scheme at least 2 characters long (so we don't confuse it with
- // an absolute Windows path, e.g., c:/).
+ // Check whether a string looks like a non-rootless URL by searching for
+ // the first ':' (unless its position is specified with the second
+ // argument) and then making sure it's both followed by '/' (e.g., http://
+ // or file:/) and preceded by a valid scheme at least 2 characters long
+ // (so we don't confuse it with an absolute Windows path, e.g., c:/).
//
// Return the start of the URL substring or string_type::npos.
//
@@ -249,16 +268,17 @@ LIBBUTL_MODEXPORT namespace butl
optional<path_type> path;
optional<string_type> query;
optional<string_type> fragment;
+ bool rootless = false;
// Create an empty URL object.
//
- basic_url () = default;
+ basic_url (): empty_ (true) {}
// Create the URL object from its string representation. Verify that the
// string is compliant to the generic URL syntax. URL-decode and validate
- // components with common for all schemes syntax (scheme, host, port,
- // path). Throw std::invalid_argument if the passed string is not a valid
- // URL representation.
+ // components with common for all schemes syntax (scheme, host, port).
+ // Throw std::invalid_argument if the passed string is not a valid URL
+ // representation.
//
// Validation and URL-decoding of the scheme-specific components can be
// provided by a custom url_traits::translate_scheme() implementation.
@@ -301,12 +321,15 @@ LIBBUTL_MODEXPORT namespace butl
optional<string_type> query = nullopt,
optional<string_type> fragment = nullopt);
+ // Create a rootless URL.
+ //
+ basic_url (scheme_type,
+ optional<path_type> path,
+ optional<string_type> query = nullopt,
+ optional<string_type> fragment = nullopt);
+
bool
- empty () const noexcept
- {
- assert (authority || path || query || !fragment);
- return !authority && !path && !query;
- }
+ empty () const noexcept {return empty_;}
// Return a string representation of the URL. Note that while this is not
// necessarily syntactically the same string as what was used to
@@ -344,6 +367,13 @@ LIBBUTL_MODEXPORT namespace butl
return alnum (c) || c == '-' || c == '.' || c =='_' || c == '~';
}
+ static bool
+ path_char (char_type c)
+ {
+ return c == '/' || c == ':' || unreserved (c) || c == '@' ||
+ sub_delim (c);
+ }
+
// URL-encode a character sequence.
//
// Note that the set of characters that should be encoded may differ for
@@ -415,6 +445,9 @@ LIBBUTL_MODEXPORT namespace butl
std::back_inserter (r), f);
return r;
}
+
+ private:
+ bool empty_ = false;
};
using url_authority = basic_url_authority<std::string>;
@@ -454,16 +487,15 @@ LIBBUTL_MODEXPORT namespace butl
inline bool
operator== (const basic_url<S, T>& x, const basic_url<S, T>& y) noexcept
{
- if (!(x.authority == y.authority && x.path == y.path &&
- x.query == y.query && x.fragment == y.fragment))
- return false;
-
- assert (x.empty () == y.empty ());
-
- if (x.empty ())
- return true;
-
- return x.scheme == y.scheme; // None is empty, so schemes are valid.
+ if (x.empty () || y.empty ())
+ return x.empty () == y.empty ();
+
+ return x.scheme == y.scheme &&
+ x.authority == y.authority &&
+ x.path == y.path &&
+ x.query == y.query &&
+ x.fragment == y.fragment &&
+ x.rootless == y.rootless;
}
template <typename S, typename T>
diff --git a/libbutl/url.txx b/libbutl/url.txx
index b520509..2a2a215 100644
--- a/libbutl/url.txx
+++ b/libbutl/url.txx
@@ -6,16 +6,6 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
{
// Convenience functions.
//
- template <typename C>
- inline bool
- url_path_char (C c)
- {
- using url = basic_url<std::basic_string<C>>;
-
- return c == '/' || c == ':' || url::unreserved (c) ||
- c == '@' || url::sub_delim (c);
- }
-
// basic_url_host
//
template <typename S>
@@ -224,7 +214,7 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
{
using namespace std;
- using iterator = typename string_type::const_iterator;
+ using iterator = typename string_type::const_iterator;
try
{
@@ -329,26 +319,23 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// Extract path.
//
- if (i != e && *i == '/')
+ if (i != e && *i != '?' && *i != '#')
{
- ++i; // Skip '/'.
+ rootless = *i != '/';
- // Verify and URL-decode the path.
+ if (!rootless)
+ ++i;
+
+ // Verify and translate the path.
//
iterator j (i);
for(char_type c; j != e && (c = *j) != '?' && c != '#'; ++j)
{
- if (!(url_path_char (c) || c == '%'))
+ if (!(path_char (c) || c == '%'))
throw invalid_argument ("invalid path");
}
- // Note that encoding for non-ASCII path is not specified (in contrast
- // to the host name), and presumably is local to the referenced
- // authority.
- //
- string_type s;
- decode (i, j, back_inserter (s));
- path = traits::translate_path (move (s));
+ path = traits::translate_path (string_type (i, j));
i = j;
}
@@ -369,11 +356,6 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
i = qe;
}
- // We don't suppose to end up with an empty URL.
- //
- if (empty ())
- throw invalid_argument ("no authority, path or query");
-
// Parse fragment.
//
if (i != e)
@@ -390,16 +372,19 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
// Translate the scheme string representation to its type.
//
- scheme = traits::translate_scheme (u,
- move (sc),
- authority,
- path,
- query,
- fragment);
+ optional<scheme_type> s (traits::translate_scheme (u,
+ move (sc),
+ authority,
+ path,
+ query,
+ fragment,
+ rootless));
+ assert (s);
+ scheme = *s;
}
// If we fail to parse the URL, then delegate this job to
- // traits::translate_scheme(). If it also fails, leaving the components
- // absent, then we re-throw.
+ // traits::translate_scheme(). If it also fails, returning nullopt, then
+ // we re-throw.
//
catch (const invalid_argument&)
{
@@ -407,16 +392,20 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
path = nullopt;
query = nullopt;
fragment = nullopt;
-
- scheme = traits::translate_scheme (u,
- string_type () /* scheme */,
- authority,
- path,
- query,
- fragment);
-
- if (!authority && !path && !query && !fragment)
+ rootless = false;
+
+ optional<scheme_type> s (
+ traits::translate_scheme (u,
+ string_type () /* scheme */,
+ authority,
+ path,
+ query,
+ fragment,
+ rootless));
+ if (!s)
throw;
+
+ scheme = *s;
}
}
@@ -433,7 +422,8 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
authority,
path,
query,
- fragment));
+ fragment,
+ rootless));
// Return the custom URL pbject representation if provided.
//
@@ -454,9 +444,10 @@ LIBBUTL_MODEXPORT namespace butl //@@ MOD Clang needs this for some reason.
if (path)
{
- r += '/';
- r += encode (traits::translate_path (*path),
- [] (char_type& c) {return !url_path_char (c);});
+ if (!rootless)
+ r += '/';
+
+ r += traits::translate_path (*path);
}
if (query)
diff --git a/tests/url/driver.cxx b/tests/url/driver.cxx
index 64bc2b5..dd124cd 100644
--- a/tests/url/driver.cxx
+++ b/tests/url/driver.cxx
@@ -33,7 +33,8 @@ enum class scheme
{
http,
https,
- file
+ file,
+ pkcs11
};
namespace butl
@@ -47,37 +48,63 @@ namespace butl
using scheme_type = scheme;
using authority_type = basic_url_authority<string_type>;
- static scheme_type
+ static optional<scheme_type>
translate_scheme (const string_type& url,
string_type&& scheme,
- optional<authority_type>& /*authority*/,
+ optional<authority_type>& authority,
optional<path_type>& path,
- optional<string_type>& /*query*/,
- optional<string_type>& /*fragment*/)
+ optional<string_type>& query,
+ optional<string_type>& /*fragment*/,
+ bool& rootless)
{
- // Note that we must compare case-insensitive in the real program.
- //
- if (scheme == L"http")
- return scheme_type::http;
- else if (scheme == L"https")
- return scheme_type::https;
- else if (scheme == L"file")
- return scheme_type::file;
- else if (scheme.empty ())
+ if (scheme.empty ())
{
// If the URL looks like an absolute filesystem path, then translate it
- // to the file URL. If it is not, then leave all the components absent
- // to fail with a proper exception description.
+ // to the file URL. If it is not, then return nullopt to fail with a
+ // proper exception description.
//
wchar_t c;
if ((c = url[0]) == '/' ||
(url.size () > 2 && alpha (c) && url[1] == ':' && url[2] == '/'))
+ {
path = url;
+ rootless = false;
+ return scheme_type::file;
+ }
- return scheme_type::file;
+ return nullopt;
}
+
+ scheme_type t;
+
+ // Note that we must compare case-insensitive in the real program.
+ //
+ if (scheme == L"http")
+ t = scheme_type::http;
+ else if (scheme == L"https")
+ t = scheme_type::https;
+ else if (scheme == L"file")
+ t = scheme_type::file;
+ else if (scheme == L"pkcs11")
+ t = scheme_type::pkcs11;
else
throw invalid_argument ("unknown scheme");
+
+ if (t != scheme_type::pkcs11 && !authority && !path && !query)
+ throw invalid_argument ("no authority, path or query");
+
+ if (path)
+ {
+ if (t == scheme_type::pkcs11)
+ {
+ if (!rootless || path->find (L'/') != string_type::npos)
+ throw invalid_argument ("unexpected slash");
+ }
+ else if (rootless)
+ throw invalid_argument ("rootless path");
+ }
+
+ return t;
}
// Translate scheme type back to its string representation.
@@ -88,13 +115,15 @@ namespace butl
const optional<authority_type>& /*authority*/,
const optional<path_type>& /*path*/,
const optional<string_type>& /*query*/,
- const optional<string_type>& /*fragment*/)
+ const optional<string_type>& /*fragment*/,
+ bool /*rootless*/)
{
switch (scheme)
{
- case scheme_type::http: return L"http";
- case scheme_type::https: return L"https";
- case scheme_type::file: return L"file";
+ case scheme_type::http: return L"http";
+ case scheme_type::https: return L"https";
+ case scheme_type::file: return L"file";
+ case scheme_type::pkcs11: return L"pkcs11";
}
assert (false); // Can't be here.
@@ -104,11 +133,19 @@ namespace butl
static path_type
translate_path (string_type&& path)
{
- return path_type (move (path));
+ // Note that a real pkcs11-supporting URL most likely would keep the
+ // path URL-encoded as its components can contain binary data. Or, it
+ // would split the path into components before decoding them.
+ //
+ return path_type (basic_url<string_type>::decode (path));
}
static string_type
- translate_path (const path_type& path) {return string_type (path);}
+ translate_path (const path_type& path)
+ {
+ using url = basic_url<string_type>;
+ return url::encode (path, [] (wchar_t& c) {return !url::path_char (c);});
+ }
};
}
@@ -317,7 +354,8 @@ try
nullopt,
nullopt,
nullopt,
- nullopt) << endl;
+ nullopt,
+ false) << endl;
}
else
wcout << L"[null]" << endl;
diff --git a/tests/url/testscript b/tests/url/testscript
index 4166007..05cc528 100644
--- a/tests/url/testscript
+++ b/tests/url/testscript
@@ -33,7 +33,6 @@ $*
:
{
$* 'file:#f' 2>'no authority, path or query' != 0 : fragment
- $* 'file:aaa' 2>'no authority, path or query' != 0 : junk
$* 'file:' 2>'no authority, path or query' != 0 : none
}
@@ -324,6 +323,24 @@ $*
[null]
[null]
EOO
+
+ $* 'http:a/b/c' 2>'rootless path' != 0 : rootless-path
+ $* 'pkcs11:/abc' 2>'unexpected slash' != 0 : unexpected-slash1
+ $* 'pkcs11:a/bc' 2>'unexpected slash' != 0 : unexpected-slash2
+ }
+
+ : rootless
+ :
+ {
+ : non-empty
+ :
+ $* 'pkcs11:token=sign;object=SIGN%20key' >>EOO
+ pkcs11
+ [null]
+ token=sign;object=SIGN key
+ [null]
+ [null]
+ EOO
}
: query
@@ -388,6 +405,8 @@ $*
$* 'file:/b%7C2' >'file:/b%7C2' : path
$* 'http://a?q=' >'http://a?q=' : query
$* 'http://a#f' >'http://a#f' : fragment
+
+ $* 'pkcs11:object=SIGN%20key' >'pkcs11:object=SIGN%20key' : rootless
}
: wstring