From df1ef68cd8e8582724ce1192bfc202e0b9aeaf0c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Tue, 28 Sep 2021 19:24:31 +0300 Subject: Get rid of C++ modules related code and rename *.mxx files to *.hxx --- libbutl/url.mxx | 579 -------------------------------------------------------- 1 file changed, 579 deletions(-) delete mode 100644 libbutl/url.mxx (limited to 'libbutl/url.mxx') diff --git a/libbutl/url.mxx b/libbutl/url.mxx deleted file mode 100644 index 713bc3e..0000000 --- a/libbutl/url.mxx +++ /dev/null @@ -1,579 +0,0 @@ -// file : libbutl/url.mxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef __cpp_modules_ts -#pragma once -#endif - -// C includes. - -#include - -#ifndef __cpp_lib_modules_ts -#include -#include // uint*_t -#include // move() -#include -#include // back_inserter - -#include // size_t -#include // invalid_argument -#include // find(), find_if() -#endif - -// Other includes. - -#ifdef __cpp_modules_ts -export module butl.url; -#ifdef __cpp_lib_modules_ts -import std.core; -import std.io; -#endif -import butl.path; -import butl.utility; -import butl.optional; - -import butl.small_vector; -#else -#include -#include -#include - -#include -#endif - -#include - -LIBBUTL_MODEXPORT namespace butl -{ - // RFC3986 Uniform Resource Locator (URL). - // - // = :[//[]][/][?][#] | - // :[][?][#] - // - // = [@][:] - // - // Some examples of equivalent URLs to meditate upon: - // - // file://localhost/tmp (localhost authority) - // file:///tmp (empty authority) - // file:/tmp (absent authority) - // - // file://localhost/c:/tmp - // file:///c:/tmp - // file:/c:/tmp - // - // We think of the slash between and as a separator but - // with the path always interpreted as starting from the "root" of the - // authority. Thus: - // - // file://localhost/tmp -> 'file'://'localhost'/'tmp' -> /tmp - // file://localhost/c:/tmp -> 'file'://'localhost'/'c:/tmp' -> c:/tmp - // - // This means that the component is represented as a relative path - // and, in the general case, we cannot use our path type for its storage - // since it assumes the path is for the host platform. In other words, the - // interpretation of the path has to take into account the platform of the - // authority host. Note, however, that a custom url_traits implementation - // can choose to use the path type if local paths are to be interpreted as - // relative to the host. - // - // For authority-less schemes the component is also represented as a - // relative path. Some examples of such URLs (let's call them rootless - // rather than authority-less not to confuse with a case where authority is - // empty/implied): - // - // pkcs11:token=sign;object=SIGN%20key - // pkcs11:id=%02%38%01?pin-value=12345 - // pkcs11: - // - // Note that a scheme can theoretically allow both rootless and "rootfull" - // representations. - // - // Note also that we currently forbid one character schemes to support - // scheme-less (Windows) paths which can be done by - // url_traits::translate_scheme() (see below). (A Windows path that uses - // forward slashes would be parsed as a valid authority-less URL). - - // URL host component can be an IPv4 address (if matches its dotted-decimal - // notation), an IPv6 address (if enclosed in [square brackets]) or - // otherwise a name. - // - // Note that non-ASCII host names are allowed in URLs. They must be - // UTF8-encoded and URL-encoded afterwards. Curently we store the parsed - // host name UTF8-encoded without regards to the template argument string - // type. Later we may add support for more appropriate encodings for - // multi-byte character types. - // - enum class url_host_kind {ipv4, ipv6, name}; - - template - struct basic_url_host - { - using string_type = S; - using kind_type = url_host_kind; - - string_type value; - kind_type kind; - - // Can be treated as const string_type&. - // - operator const string_type& () const noexcept {return value;} - - // Create an empty host. - // - basic_url_host (): kind (kind_type::name) {} - - // Create the host object from its string representation as it appears in - // a URL, throwing std::invalid_argument if invalid. Remove the enclosing - // square brackets for IPv6 addresses, and URL-decode host names. - // - // Note that the 'x:x:x:x:x:x:d.d.d.d' IPv6 address mixed notation is not - // supported. - // - explicit - basic_url_host (string_type); - - basic_url_host (string_type v, kind_type k) - : value (std::move (v)), kind (k) {} - - bool - empty () const - { - assert (kind == kind_type::name || !value.empty ()); - return value.empty (); - } - - // Return string representation of the host as it would appear in a URL. - // - string_type - string () const; - - // Normalize the host value in accordance with its type: - // - // Name - convert to the lower case. Note: only ASCII names are currently - // supported. - // - // IPv4 - strip the leading zeros in its octets. - // - // IPv6 - strip the leading zeros in its groups (hextets), squash the - // longest zero-only hextet sequence, and convert to the lower case - // (as per RFC5952). - // - // Assume that the host value is valid. - // - void - normalize (); - }; - - template - struct basic_url_authority - { - using string_type = S; - using host_type = basic_url_host; - - string_type user; // Empty if not specified. - host_type host; - std::uint16_t port; // Zero if not specified. - - bool - empty () const - { - assert (!host.empty () || (user.empty () && port == 0)); - return host.empty (); - } - - // Return a string representation of the URL authority. String - // representation of an empty instance is the empty string. - // - string_type - string () const; - }; - - template - struct url_traits - { - using scheme_type = H; - using string_type = S; - using path_type = P; - - using authority_type = basic_url_authority; - - // Translate the scheme string representation to its type. May throw - // std::invalid_argument. May change the URL components. Should not return - // nullopt if called with a non-empty scheme. - // - // This function is called with an empty scheme if the URL has no scheme, - // the scheme is invalid, or it could not be parsed into components - // according to the URL syntax. In this case all the passed components - // reference empty/absent/false values. If nullopt is returned, the URL is - // considered invalid and the std::invalid_argument exception with an - // appropriate description is thrown by the URL object constructor. This - // can be used to support scheme-less URLs, local paths, etc. - // - static optional - translate_scheme (const string_type& /*url*/, - string_type&& scheme, - optional& /*authority*/, - optional& /*path*/, - optional& /*query*/, - optional& /*fragment*/, - bool& /*rootless*/) - { - return !scheme.empty () - ? optional (std::move (scheme)) - : nullopt; // Leave the URL object constructor to throw. - } - - // Translate scheme type back to its string representation. - // - // Similar to the above the function is called with an empty string - // representation. If on return this value is no longer empty, then it is - // assume the URL has been translated in a custom manner (in which case - // the returned scheme value is ignored). - // - static string_type - translate_scheme (string_type&, /*url*/ - const scheme_type& scheme, - const optional& /*authority*/, - const optional& /*path*/, - const optional& /*query*/, - const optional& /*fragment*/, - bool /*rootless*/) - { - return string_type (scheme); - } - - // Translate the URL-encoded path string representation to its type. - // - // Note that encoding for non-ASCII paths is not specified (in contrast - // to the host name), and presumably is local to the referenced authority. - // Furthermore, for some schemes, the path component can contain encoded - // binary data, for example for pkcs11. - // - static path_type - translate_path (string_type&&); - - // Translate path type back to its URL-encoded string representation. - // - static string_type - translate_path (const path_type&); - - // Check whether a string looks like a non-rootless URL by searching for - // the first ':' (unless its position is specified with the second - // argument) and then making sure it's both followed by '/' (e.g., http:// - // or file:/) and preceded by a valid scheme at least 2 characters long - // (so we don't confuse it with an absolute Windows path, e.g., c:/). - // - // Return the start of the URL substring or string_type::npos. - // - static std::size_t - find (const string_type&, std::size_t pos = string_type::npos); - }; - - template > - class basic_url - { - public: - using traits_type = T; - - using string_type = typename traits_type::string_type; - using char_type = typename string_type::value_type; - using path_type = typename traits_type::path_type; - - using scheme_type = typename traits_type::scheme_type; - using authority_type = typename traits_type::authority_type; - using host_type = typename authority_type::host_type; - - scheme_type scheme; - optional authority; - optional path; - optional query; - optional fragment; - bool rootless = false; - - // Create an empty URL object. - // - basic_url (): scheme (), empty_ (true) {} - - // Create the URL object from its string representation. Verify that the - // string is compliant to the generic URL syntax. URL-decode and validate - // components with common for all schemes syntax (scheme, host, port). - // Throw std::invalid_argument if the passed string is not a valid URL - // representation. - // - // Validation and URL-decoding of the scheme-specific components can be - // provided by a custom url_traits::translate_scheme() implementation. - // - explicit - basic_url (const string_type&); - - // Create the URL object from individual components. Performs no - // components URL-decoding or verification. - // - basic_url (scheme_type, - optional, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - host_type host, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - host_type host, - std::uint16_t port, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - string_type host, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - basic_url (scheme_type, - string_type host, - std::uint16_t port, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - // Create a rootless URL. - // - basic_url (scheme_type, - optional path, - optional query = nullopt, - optional fragment = nullopt); - - bool - empty () const noexcept {return empty_;} - - // Return a string representation of the URL. Note that while this is not - // necessarily syntactically the same string as what was used to - // initialize this instance, it should be semantically equivalent. String - // representation of an empty instance is the empty string. - // - string_type - string () const; - - // Normalize the URL host, if present. - // - void - normalize (); - - // The following predicates can be used to classify URL characters while - // parsing, validating or encoding scheme-specific components. For the - // semantics of character classes see RFC3986. - // - static bool - gen_delim (char_type c) - { - return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' || - c == ']' || c == '@'; - } - - static bool - sub_delim (char_type c) - { - return c == '!' || c == '$' || c == '&' || c == '=' || c == '(' || - c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || - c == '\''; - } - - static bool - reserved (char_type c) {return sub_delim (c) || gen_delim (c);} - - static bool - unreserved (char_type c) - { - return alnum (c) || c == '-' || c == '.' || c =='_' || c == '~'; - } - - static bool - path_char (char_type c) - { - return c == '/' || c == ':' || unreserved (c) || c == '@' || - sub_delim (c); - } - - // URL-encode a character sequence. - // - // Note that the set of characters that should be encoded may differ for - // different URL components. The optional callback function must return - // true for characters that should be percent-encoded. The function may - // encode the passed character in it's own way with another character (but - // never with '%'), and return false. By default all characters other than - // unreserved are percent-encoded. - // - // Also note that the characters are interpreted as bytes. In other words, - // each character may result in a single encoding triplet. - // - template - static void - encode (I begin, I end, O output, F&& efunc); - - template - static void - encode (I b, I e, O o) - { - encode (b, e, o, [] (char_type& c) {return !unreserved (c);}); - } - - template - static string_type - encode (const string_type& s, F&& f) - { - string_type r; - encode (s.begin (), s.end (), std::back_inserter (r), f); - return r; - } - - static string_type - encode (const string_type& s) - { - return encode (s, [] (char_type& c) {return !unreserved (c);}); - } - - template - static string_type - encode (const char_type* s, F&& f) - { - string_type r; - encode (s, s + string_type::traits_type::length (s), - std::back_inserter (r), - f); - return r; - } - - static string_type - encode (const char_type* s) - { - return encode (s, [] (char_type& c) {return !unreserved (c);}); - } - - // URL-decode a character sequence. Throw std::invalid_argument if an - // invalid encoding sequence is encountered. - // - // If some characters in the sequence are encoded with another characters - // (rather than percent-encoded), then one must provide the callback - // function to decode them. - // - template - static void - decode (I begin, I end, O output, F&& dfunc); - - template - static void - decode (I b, I e, O o) - { - decode (b, e, o, [] (char_type&) {}); - } - - template - static string_type - decode (const string_type& s, F&& f) - { - string_type r; - decode (s.begin (), s.end (), std::back_inserter (r), f); - return r; - } - - static string_type - decode (const string_type& s) - { - return decode (s, [] (char_type&) {}); - } - - template - static string_type - decode (const char_type* s, F&& f) - { - string_type r; - decode (s, s + string_type::traits_type::length (s), - std::back_inserter (r), - f); - return r; - } - - static string_type - decode (const char_type* s) - { - return decode (s, [] (char_type&) {}); - } - - private: - bool empty_ = false; - }; - - using url_authority = basic_url_authority; - using url = basic_url ; - - template - inline bool - operator== (const basic_url_host& x, const basic_url_host& y) noexcept - { - return x.value == y.value && x.kind == y.kind; - } - - template - inline bool - operator!= (const basic_url_host& x, const basic_url_host& y) noexcept - { - return !(x == y); - } - - template - inline bool - operator== (const basic_url_authority& x, - const basic_url_authority& y) noexcept - { - return x.user == y.user && x.host == y.host && x.port == y.port; - } - - template - inline bool - operator!= (const basic_url_authority& x, - const basic_url_authority& y) noexcept - { - return !(x == y); - } - - template - inline bool - operator== (const basic_url& x, const basic_url& y) noexcept - { - if (x.empty () || y.empty ()) - return x.empty () == y.empty (); - - return x.scheme == y.scheme && - x.authority == y.authority && - x.path == y.path && - x.query == y.query && - x.fragment == y.fragment && - x.rootless == y.rootless; - } - - template - inline bool - operator!= (const basic_url& x, const basic_url& y) noexcept - { - return !(x == y); - } - - template - inline auto - operator<< (std::basic_ostream& o, - const basic_url& u) -> decltype (o) - { - return o << u.string (); - } -} - -#include -#include -- cgit v1.1