From 2b14f09675c10d999779858ae31934b7eef55b89 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 3 Sep 2020 20:23:45 +0300 Subject: Add normalize() function to host/URL class templates Also add IPv6 verification to host constructor. --- libbutl/url.mxx | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'libbutl/url.mxx') diff --git a/libbutl/url.mxx b/libbutl/url.mxx index 3ced734..713bc3e 100644 --- a/libbutl/url.mxx +++ b/libbutl/url.mxx @@ -18,6 +18,7 @@ #include // size_t #include // invalid_argument +#include // find(), find_if() #endif // Other includes. @@ -31,10 +32,14 @@ import std.io; import butl.path; import butl.utility; import butl.optional; + +import butl.small_vector; #else #include #include #include + +#include #endif #include @@ -123,7 +128,8 @@ LIBBUTL_MODEXPORT namespace butl // a URL, throwing std::invalid_argument if invalid. Remove the enclosing // square brackets for IPv6 addresses, and URL-decode host names. // - // Note that currently we don't validate IPv6 addresses. + // Note that the 'x:x:x:x:x:x:d.d.d.d' IPv6 address mixed notation is not + // supported. // explicit basic_url_host (string_type); @@ -142,6 +148,22 @@ LIBBUTL_MODEXPORT namespace butl // string_type string () const; + + // Normalize the host value in accordance with its type: + // + // Name - convert to the lower case. Note: only ASCII names are currently + // supported. + // + // IPv4 - strip the leading zeros in its octets. + // + // IPv6 - strip the leading zeros in its groups (hextets), squash the + // longest zero-only hextet sequence, and convert to the lower case + // (as per RFC5952). + // + // Assume that the host value is valid. + // + void + normalize (); }; template @@ -340,6 +362,11 @@ LIBBUTL_MODEXPORT namespace butl string_type string () const; + // Normalize the URL host, if present. + // + void + normalize (); + // The following predicates can be used to classify URL characters while // parsing, validating or encoding scheme-specific components. For the // semantics of character classes see RFC3986. -- cgit v1.1