aboutsummaryrefslogtreecommitdiff
path: root/libbutl/url.mxx
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2021-09-28 19:24:31 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2021-09-28 20:29:59 +0300
commitdf1ef68cd8e8582724ce1192bfc202e0b9aeaf0c (patch)
treeb731ca4c68e60c00c7e7d499dbf4868ee7b71f44 /libbutl/url.mxx
parent7a4fc37f264cdb67f2f83fa92703c869215bbc86 (diff)
Get rid of C++ modules related code and rename *.mxx files to *.hxx
Diffstat (limited to 'libbutl/url.mxx')
-rw-r--r--libbutl/url.mxx579
1 files changed, 0 insertions, 579 deletions
diff --git a/libbutl/url.mxx b/libbutl/url.mxx
deleted file mode 100644
index 713bc3e..0000000
--- a/libbutl/url.mxx
+++ /dev/null
@@ -1,579 +0,0 @@
-// file : libbutl/url.mxx -*- C++ -*-
-// license : MIT; see accompanying LICENSE file
-
-#ifndef __cpp_modules_ts
-#pragma once
-#endif
-
-// C includes.
-
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
-#include <string>
-#include <cstdint> // uint*_t
-#include <utility> // move()
-#include <ostream>
-#include <iterator> // back_inserter
-
-#include <cstddef> // size_t
-#include <stdexcept> // invalid_argument
-#include <algorithm> // find(), find_if()
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-export module butl.url;
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.path;
-import butl.utility;
-import butl.optional;
-
-import butl.small_vector;
-#else
-#include <libbutl/path.mxx>
-#include <libbutl/utility.mxx>
-#include <libbutl/optional.mxx>
-
-#include <libbutl/small-vector.mxx>
-#endif
-
-#include <libbutl/export.hxx>
-
-LIBBUTL_MODEXPORT namespace butl
-{
- // RFC3986 Uniform Resource Locator (URL).
- //
- // <url> = <scheme>:[//[<authority>]][/<path>][?<query>][#<fragment>] |
- // <scheme>:[<path>][?<query>][#<fragment>]
- //
- // <authority> = [<user>@]<host>[:<port>]
- //
- // Some examples of equivalent URLs to meditate upon:
- //
- // file://localhost/tmp (localhost authority)
- // file:///tmp (empty authority)
- // file:/tmp (absent authority)
- //
- // file://localhost/c:/tmp
- // file:///c:/tmp
- // file:/c:/tmp
- //
- // We think of the slash between <authority> and <path> as a separator but
- // with the path always interpreted as starting from the "root" of the
- // authority. Thus:
- //
- // file://localhost/tmp -> 'file'://'localhost'/'tmp' -> /tmp
- // file://localhost/c:/tmp -> 'file'://'localhost'/'c:/tmp' -> c:/tmp
- //
- // This means that the <path> component is represented as a relative path
- // and, in the general case, we cannot use our path type for its storage
- // since it assumes the path is for the host platform. In other words, the
- // interpretation of the path has to take into account the platform of the
- // authority host. Note, however, that a custom url_traits implementation
- // can choose to use the path type if local paths are to be interpreted as
- // relative to the host.
- //
- // For authority-less schemes the <path> component is also represented as a
- // relative path. Some examples of such URLs (let's call them rootless
- // rather than authority-less not to confuse with a case where authority is
- // empty/implied):
- //
- // pkcs11:token=sign;object=SIGN%20key
- // pkcs11:id=%02%38%01?pin-value=12345
- // pkcs11:
- //
- // Note that a scheme can theoretically allow both rootless and "rootfull"
- // representations.
- //
- // Note also that we currently forbid one character schemes to support
- // scheme-less (Windows) paths which can be done by
- // url_traits::translate_scheme() (see below). (A Windows path that uses
- // forward slashes would be parsed as a valid authority-less URL).
-
- // URL host component can be an IPv4 address (if matches its dotted-decimal
- // notation), an IPv6 address (if enclosed in [square brackets]) or
- // otherwise a name.
- //
- // Note that non-ASCII host names are allowed in URLs. They must be
- // UTF8-encoded and URL-encoded afterwards. Curently we store the parsed
- // host name UTF8-encoded without regards to the template argument string
- // type. Later we may add support for more appropriate encodings for
- // multi-byte character types.
- //
- enum class url_host_kind {ipv4, ipv6, name};
-
- template <typename S>
- struct basic_url_host
- {
- using string_type = S;
- using kind_type = url_host_kind;
-
- string_type value;
- kind_type kind;
-
- // Can be treated as const string_type&.
- //
- operator const string_type& () const noexcept {return value;}
-
- // Create an empty host.
- //
- basic_url_host (): kind (kind_type::name) {}
-
- // Create the host object from its string representation as it appears in
- // a URL, throwing std::invalid_argument if invalid. Remove the enclosing
- // square brackets for IPv6 addresses, and URL-decode host names.
- //
- // Note that the 'x:x:x:x:x:x:d.d.d.d' IPv6 address mixed notation is not
- // supported.
- //
- explicit
- basic_url_host (string_type);
-
- basic_url_host (string_type v, kind_type k)
- : value (std::move (v)), kind (k) {}
-
- bool
- empty () const
- {
- assert (kind == kind_type::name || !value.empty ());
- return value.empty ();
- }
-
- // Return string representation of the host as it would appear in a URL.
- //
- string_type
- string () const;
-
- // Normalize the host value in accordance with its type:
- //
- // Name - convert to the lower case. Note: only ASCII names are currently
- // supported.
- //
- // IPv4 - strip the leading zeros in its octets.
- //
- // IPv6 - strip the leading zeros in its groups (hextets), squash the
- // longest zero-only hextet sequence, and convert to the lower case
- // (as per RFC5952).
- //
- // Assume that the host value is valid.
- //
- void
- normalize ();
- };
-
- template <typename S>
- struct basic_url_authority
- {
- using string_type = S;
- using host_type = basic_url_host<string_type>;
-
- string_type user; // Empty if not specified.
- host_type host;
- std::uint16_t port; // Zero if not specified.
-
- bool
- empty () const
- {
- assert (!host.empty () || (user.empty () && port == 0));
- return host.empty ();
- }
-
- // Return a string representation of the URL authority. String
- // representation of an empty instance is the empty string.
- //
- string_type
- string () const;
- };
-
- template <typename H, typename S = H, typename P = S>
- struct url_traits
- {
- using scheme_type = H;
- using string_type = S;
- using path_type = P;
-
- using authority_type = basic_url_authority<string_type>;
-
- // Translate the scheme string representation to its type. May throw
- // std::invalid_argument. May change the URL components. Should not return
- // nullopt if called with a non-empty scheme.
- //
- // This function is called with an empty scheme if the URL has no scheme,
- // the scheme is invalid, or it could not be parsed into components
- // according to the URL syntax. In this case all the passed components
- // reference empty/absent/false values. If nullopt is returned, the URL is
- // considered invalid and the std::invalid_argument exception with an
- // appropriate description is thrown by the URL object constructor. This
- // can be used to support scheme-less URLs, local paths, etc.
- //
- static optional<scheme_type>
- translate_scheme (const string_type& /*url*/,
- string_type&& scheme,
- optional<authority_type>& /*authority*/,
- optional<path_type>& /*path*/,
- optional<string_type>& /*query*/,
- optional<string_type>& /*fragment*/,
- bool& /*rootless*/)
- {
- return !scheme.empty ()
- ? optional<scheme_type> (std::move (scheme))
- : nullopt; // Leave the URL object constructor to throw.
- }
-
- // Translate scheme type back to its string representation.
- //
- // Similar to the above the function is called with an empty string
- // representation. If on return this value is no longer empty, then it is
- // assume the URL has been translated in a custom manner (in which case
- // the returned scheme value is ignored).
- //
- static string_type
- translate_scheme (string_type&, /*url*/
- const scheme_type& scheme,
- const optional<authority_type>& /*authority*/,
- const optional<path_type>& /*path*/,
- const optional<string_type>& /*query*/,
- const optional<string_type>& /*fragment*/,
- bool /*rootless*/)
- {
- return string_type (scheme);
- }
-
- // Translate the URL-encoded path string representation to its type.
- //
- // Note that encoding for non-ASCII paths is not specified (in contrast
- // to the host name), and presumably is local to the referenced authority.
- // Furthermore, for some schemes, the path component can contain encoded
- // binary data, for example for pkcs11.
- //
- static path_type
- translate_path (string_type&&);
-
- // Translate path type back to its URL-encoded string representation.
- //
- static string_type
- translate_path (const path_type&);
-
- // Check whether a string looks like a non-rootless URL by searching for
- // the first ':' (unless its position is specified with the second
- // argument) and then making sure it's both followed by '/' (e.g., http://
- // or file:/) and preceded by a valid scheme at least 2 characters long
- // (so we don't confuse it with an absolute Windows path, e.g., c:/).
- //
- // Return the start of the URL substring or string_type::npos.
- //
- static std::size_t
- find (const string_type&, std::size_t pos = string_type::npos);
- };
-
- template <typename H, // scheme
- typename T = url_traits<H>>
- class basic_url
- {
- public:
- using traits_type = T;
-
- using string_type = typename traits_type::string_type;
- using char_type = typename string_type::value_type;
- using path_type = typename traits_type::path_type;
-
- using scheme_type = typename traits_type::scheme_type;
- using authority_type = typename traits_type::authority_type;
- using host_type = typename authority_type::host_type;
-
- scheme_type scheme;
- optional<authority_type> authority;
- optional<path_type> path;
- optional<string_type> query;
- optional<string_type> fragment;
- bool rootless = false;
-
- // Create an empty URL object.
- //
- basic_url (): scheme (), empty_ (true) {}
-
- // Create the URL object from its string representation. Verify that the
- // string is compliant to the generic URL syntax. URL-decode and validate
- // components with common for all schemes syntax (scheme, host, port).
- // Throw std::invalid_argument if the passed string is not a valid URL
- // representation.
- //
- // Validation and URL-decoding of the scheme-specific components can be
- // provided by a custom url_traits::translate_scheme() implementation.
- //
- explicit
- basic_url (const string_type&);
-
- // Create the URL object from individual components. Performs no
- // components URL-decoding or verification.
- //
- basic_url (scheme_type,
- optional<authority_type>,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- basic_url (scheme_type,
- host_type host,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- basic_url (scheme_type,
- host_type host,
- std::uint16_t port,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- basic_url (scheme_type,
- string_type host,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- basic_url (scheme_type,
- string_type host,
- std::uint16_t port,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- // Create a rootless URL.
- //
- basic_url (scheme_type,
- optional<path_type> path,
- optional<string_type> query = nullopt,
- optional<string_type> fragment = nullopt);
-
- bool
- empty () const noexcept {return empty_;}
-
- // Return a string representation of the URL. Note that while this is not
- // necessarily syntactically the same string as what was used to
- // initialize this instance, it should be semantically equivalent. String
- // representation of an empty instance is the empty string.
- //
- string_type
- string () const;
-
- // Normalize the URL host, if present.
- //
- void
- normalize ();
-
- // The following predicates can be used to classify URL characters while
- // parsing, validating or encoding scheme-specific components. For the
- // semantics of character classes see RFC3986.
- //
- static bool
- gen_delim (char_type c)
- {
- return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' ||
- c == ']' || c == '@';
- }
-
- static bool
- sub_delim (char_type c)
- {
- return c == '!' || c == '$' || c == '&' || c == '=' || c == '(' ||
- c == ')' || c == '*' || c == '+' || c == ',' || c == ';' ||
- c == '\'';
- }
-
- static bool
- reserved (char_type c) {return sub_delim (c) || gen_delim (c);}
-
- static bool
- unreserved (char_type c)
- {
- return alnum (c) || c == '-' || c == '.' || c =='_' || c == '~';
- }
-
- static bool
- path_char (char_type c)
- {
- return c == '/' || c == ':' || unreserved (c) || c == '@' ||
- sub_delim (c);
- }
-
- // URL-encode a character sequence.
- //
- // Note that the set of characters that should be encoded may differ for
- // different URL components. The optional callback function must return
- // true for characters that should be percent-encoded. The function may
- // encode the passed character in it's own way with another character (but
- // never with '%'), and return false. By default all characters other than
- // unreserved are percent-encoded.
- //
- // Also note that the characters are interpreted as bytes. In other words,
- // each character may result in a single encoding triplet.
- //
- template <typename I, typename O, typename F>
- static void
- encode (I begin, I end, O output, F&& efunc);
-
- template <typename I, typename O>
- static void
- encode (I b, I e, O o)
- {
- encode (b, e, o, [] (char_type& c) {return !unreserved (c);});
- }
-
- template <typename F>
- static string_type
- encode (const string_type& s, F&& f)
- {
- string_type r;
- encode (s.begin (), s.end (), std::back_inserter (r), f);
- return r;
- }
-
- static string_type
- encode (const string_type& s)
- {
- return encode (s, [] (char_type& c) {return !unreserved (c);});
- }
-
- template <typename F>
- static string_type
- encode (const char_type* s, F&& f)
- {
- string_type r;
- encode (s, s + string_type::traits_type::length (s),
- std::back_inserter (r),
- f);
- return r;
- }
-
- static string_type
- encode (const char_type* s)
- {
- return encode (s, [] (char_type& c) {return !unreserved (c);});
- }
-
- // URL-decode a character sequence. Throw std::invalid_argument if an
- // invalid encoding sequence is encountered.
- //
- // If some characters in the sequence are encoded with another characters
- // (rather than percent-encoded), then one must provide the callback
- // function to decode them.
- //
- template <typename I, typename O, typename F>
- static void
- decode (I begin, I end, O output, F&& dfunc);
-
- template <typename I, typename O>
- static void
- decode (I b, I e, O o)
- {
- decode (b, e, o, [] (char_type&) {});
- }
-
- template <typename F>
- static string_type
- decode (const string_type& s, F&& f)
- {
- string_type r;
- decode (s.begin (), s.end (), std::back_inserter (r), f);
- return r;
- }
-
- static string_type
- decode (const string_type& s)
- {
- return decode (s, [] (char_type&) {});
- }
-
- template <typename F>
- static string_type
- decode (const char_type* s, F&& f)
- {
- string_type r;
- decode (s, s + string_type::traits_type::length (s),
- std::back_inserter (r),
- f);
- return r;
- }
-
- static string_type
- decode (const char_type* s)
- {
- return decode (s, [] (char_type&) {});
- }
-
- private:
- bool empty_ = false;
- };
-
- using url_authority = basic_url_authority<std::string>;
- using url = basic_url <std::string>;
-
- template <typename S>
- inline bool
- operator== (const basic_url_host<S>& x, const basic_url_host<S>& y) noexcept
- {
- return x.value == y.value && x.kind == y.kind;
- }
-
- template <typename S>
- inline bool
- operator!= (const basic_url_host<S>& x, const basic_url_host<S>& y) noexcept
- {
- return !(x == y);
- }
-
- template <typename S>
- inline bool
- operator== (const basic_url_authority<S>& x,
- const basic_url_authority<S>& y) noexcept
- {
- return x.user == y.user && x.host == y.host && x.port == y.port;
- }
-
- template <typename S>
- inline bool
- operator!= (const basic_url_authority<S>& x,
- const basic_url_authority<S>& y) noexcept
- {
- return !(x == y);
- }
-
- template <typename S, typename T>
- inline bool
- operator== (const basic_url<S, T>& x, const basic_url<S, T>& y) noexcept
- {
- if (x.empty () || y.empty ())
- return x.empty () == y.empty ();
-
- return x.scheme == y.scheme &&
- x.authority == y.authority &&
- x.path == y.path &&
- x.query == y.query &&
- x.fragment == y.fragment &&
- x.rootless == y.rootless;
- }
-
- template <typename S, typename T>
- inline bool
- operator!= (const basic_url<S, T>& x, const basic_url<S, T>& y) noexcept
- {
- return !(x == y);
- }
-
- template <typename S, typename T>
- inline auto
- operator<< (std::basic_ostream<typename T::string_type::value_type>& o,
- const basic_url<S, T>& u) -> decltype (o)
- {
- return o << u.string ();
- }
-}
-
-#include <libbutl/url.ixx>
-#include <libbutl/url.txx>