From e5bfd17637bf297c3cfe509d51027916864092d5 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sun, 10 Dec 2017 10:02:19 +0300 Subject: Add basic_url class template --- tests/url/buildfile | 8 ++ tests/url/driver.cxx | 346 ++++++++++++++++++++++++++++++++++++++++++++++ tests/url/testscript | 378 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 732 insertions(+) create mode 100644 tests/url/buildfile create mode 100644 tests/url/driver.cxx create mode 100644 tests/url/testscript (limited to 'tests/url') diff --git a/tests/url/buildfile b/tests/url/buildfile new file mode 100644 index 0000000..ed8380c --- /dev/null +++ b/tests/url/buildfile @@ -0,0 +1,8 @@ +# file : tests/url/buildfile +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +import libs = libbutl%lib{butl} +libs += $stdmod_lib + +exe{driver}: {hxx cxx}{*} $libs test{testscript} diff --git a/tests/url/driver.cxx b/tests/url/driver.cxx new file mode 100644 index 0000000..95fe9cb --- /dev/null +++ b/tests/url/driver.cxx @@ -0,0 +1,346 @@ +// file : tests/url/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include + +#ifndef __cpp_lib_modules +#include +#include // move() +#include +#include // back_inserter +#include // invalid_argument +#endif + +// Other includes. + +#ifdef __cpp_modules +#ifdef __cpp_lib_modules +import std.core; +import std.io; +#endif +import butl.url; +import butl.utility; // operator<<(ostream, exception) +#else +#include +#include +#endif + +using namespace std; +using namespace butl; + +enum class scheme +{ + http, + https, + file +}; + +namespace butl +{ + template <> + struct url_traits + { + using string_type = wstring; + using path_type = wstring; + + using scheme_type = scheme; + using authority_type = basic_url_authority; + + static scheme_type + translate_scheme (const string_type& url, + string_type&& scheme, + optional& /*authority*/, + optional& path, + optional& /*query*/, + optional& /*fragment*/) + { + // Note that we must compare case-insensitive in the real program. + // + if (scheme == L"http") + return scheme_type::http; + else if (scheme == L"https") + return scheme_type::https; + else if (scheme == L"file") + return scheme_type::file; + else if (scheme.empty ()) + { + // If the URL looks like an absolute filesystem path, then translate it + // to the file URL. If it is not, then leave all the components absent + // to fail with a proper exception description. + // + wchar_t c; + if ((c = url[0]) == '/' || + (url.size () > 2 && alpha (c) && url[1] == ':' && url[2] == '/')) + path = url; + + return scheme_type::file; + } + else + throw invalid_argument ("unknown scheme"); + } + + // Translate scheme type back to its string representation. + // + static string_type + translate_scheme (string_type&, /*url*/ + const scheme_type& scheme, + const optional& /*authority*/, + const optional& /*path*/, + const optional& /*query*/, + const optional& /*fragment*/) + { + switch (scheme) + { + case scheme_type::http: return L"http"; + case scheme_type::https: return L"https"; + case scheme_type::file: return L"file"; + } + + assert (false); // Can't be here. + return L""; + } + + static path_type + translate_path (string_type&& path) + { + return path_type (move (path)); + } + + static string_type + translate_path (const path_type& path) {return string_type (path);} + }; +} + +// Usages: +// +// argv[0] +// argv[0] [-c|-s|-w] +// +// Perform some basic tests if no URL is provided. Otherwise round-trip the URL +// to STDOUT. URL must contain only ASCII characters. Exit with zero code on +// success. Exit with code one on parsing failure, printing error description +// to STDERR. +// +// -c +// Print the URL components one per line. Print the special '[null]' string +// for an absent components. This is the default option if URL is provided. +// +// -s +// Print stringified url object representation. +// +// -w +// Same as above, but use the custom wstring-based url_traits +// implementation for the basic_url template. +// +int +main (int argc, const char* argv[]) +try +{ + using wurl = basic_url; + using wurl_authority = wurl::authority_type; + using wurl_host = wurl::host_type; + + enum class print_mode + { + str, + wstr, + comp + } mode (print_mode::comp); + + int i (1); + for (; i != argc; ++i) + { + string o (argv[i]); + if (o == "-s") + mode = print_mode::str; + else if (o == "-w") + mode = print_mode::wstr; + else if (o == "-c") + mode = print_mode::comp; + else + break; // End of options. + } + + if (i == argc) + { + // Test ctors and operators. + // + { + wurl u0 ((wstring ())); + assert (u0.empty ()); + assert (u0 == wurl ()); + + wurl u1 (scheme::http, + wurl_authority {wstring (), wurl_host (L"[123]"), 0}, + wstring (L"login"), + wstring (L"q="), + wstring (L"f")); + + assert (!u1.empty ()); + assert (u1 != u0); + + wurl u2 (scheme::http, + wurl_host (L"123", url_host_kind::ipv6), + wstring (L"login"), + wstring (L"q="), + wstring (L"f")); + + assert (u2 == u1); + + wurl u3 (scheme::http, + wurl_host (L"123", url_host_kind::ipv6), + 0, + wstring (L"login"), + wstring (L"q="), + wstring (L"f")); + + assert (u3 == u2); + + wurl u4 (scheme::http, + L"[123]", + wstring (L"login"), + wstring (L"q="), + wstring (L"f")); + + assert (u4 == u3); + + wurl u5 (scheme::http, + L"[123]", + 0, + wstring (L"login"), + wstring (L"q="), + wstring (L"f")); + + assert (u5 == u4); + } + + // Test encode and decode. + // + { + const char* s ("ABC +"); + string es (url::encode (s)); + + assert (es == "ABC%20%2B"); + string ds (url::decode (es)); + + assert (ds == s); + } + + { + const char* s ("ABC +"); + + string es (url::encode (s, + [] (char& c) -> bool + { + if (c == ' ') + { + c = '+'; + return false; + } + return !url::unreserved (c); + })); + + assert (es == "ABC+%2B"); + + string ds (url::decode (es.c_str (), + [] (char& c) + { + if (c == '+') + c = ' '; + })); + assert (ds == s); + } + { + const wchar_t s[] = L"ABC "; + + wstring es; + wurl::encode (s, s + 4, + back_inserter (es), + [] (wchar_t& c) -> bool + { + if (!alnum (c)) + return true; + + ++c; + return false; + }); + assert (es == L"BCD%20"); + + wstring ds (wurl::decode (es, + [] (wchar_t& c) + { + if (alnum (c)) + --c; + })); + assert (ds == s); + } + } + else // Round-trip the URL. + { + assert (i + 1 == argc); + + const char* ua (argv[i]); + + switch (mode) + { + case print_mode::str: + { + cout << url (ua) << endl; + break; + } + case print_mode::wstr: + { + // Convert ASCII string to wstring. + // + wstring s (ua, ua + strlen (ua)); + + wcout << wurl (s) << endl; + break; + } + case print_mode::comp: + { + // Convert ASCII string to wstring. + // + wstring s (ua, ua + strlen (ua)); + wurl u (s); + + if (!u.empty ()) + { + wstring s; + wcout << wurl::traits::translate_scheme (s, + u.scheme, + nullopt, + nullopt, + nullopt, + nullopt) << endl; + } + else + wcout << L"[null]" << endl; + + if (u.authority) + { + const wchar_t* kinds[] = {L"ipv4", L"ipv6", L"name"}; + const wurl_authority& a (*u.authority); + + wcout << a.user << L'@' << a.host.value << L':' << a.port + << " " << kinds[static_cast (a.host.kind)] << endl; + } + else + wcout << L"[null]" << endl; + + wcout << (u.path ? *u.path : L"[null]") << endl + << (u.query ? *u.query : L"[null]") << endl + << (u.fragment ? *u.fragment : L"[null]") << endl; + break; + } + } + } + + return 0; +} +catch (const invalid_argument& e) +{ + cerr << e << endl; + return 1; +} diff --git a/tests/url/testscript b/tests/url/testscript new file mode 100644 index 0000000..d81f282 --- /dev/null +++ b/tests/url/testscript @@ -0,0 +1,378 @@ +# file : tests/url/testscript +# copyright : Copyright (c) 2014-2017 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +:basic +: +$* + +: components +: +{ + : all + : + $* 'https://user@stage.b2.org:443/libbutl?f=full#description' >>EOO + https + user@stage.b2.org:443 name + libbutl + f=full + description + EOO + + : empty-url + : + $* '' >>EOO + [null] + [null] + [null] + [null] + [null] + EOO + + : no-id + : + { + $* 'file:#f' 2>'no authority, path or query' != 0 : fragment + $* 'file:aaa' 2>'no authority, path or query' != 0 : junk + $* 'file:' 2>'no authority, path or query' != 0 : none + } + + : scheme + : + { + : detected + : + $* 'http://build2.org' >>EOO + http + @build2.org:0 name + [null] + [null] + [null] + EOO + + : deduced + : + $* 'c:/a' >>EOO + file + [null] + c:/a + [null] + [null] + EOO + + $* ':/a' 2>'no scheme' != 0 : none + $* 'http' 2>'no scheme' != 0 : unterminated + $* 'ht~tp://a.com' 2>'invalid scheme' != 0 : invalid-char + $* '1http://a.com' 2>'invalid scheme' != 0 : invalid-first-char + } + + : authority + { + : absent + : + $* 'file:/tmp/a' >>EOO + file + [null] + tmp/a + [null] + [null] + EOO + + : empty + : + $* 'file:///tmp/a' >>EOO + file + @:0 name + tmp/a + [null] + [null] + EOO + + : trailing + : + $* 'http://localhost' >>EOO + http + @localhost:0 name + [null] + [null] + [null] + EOO + + : user + : + { + : non-empty + : + $* 'http://admin@localhost' >>EOO + http + admin@localhost:0 name + [null] + [null] + [null] + EOO + + : empty + : + $* 'http://@localhost' >>EOO + http + @localhost:0 name + [null] + [null] + [null] + EOO + } + + : host + : + { + : ipv6 + : + { + : port + : + $* 'http://[1:23]:443' >>EOO + http + @1:23:443 ipv6 + [null] + [null] + [null] + EOO + + : no-port + : + $* 'http://[1:23]' >>EOO + http + @1:23:0 ipv6 + [null] + [null] + [null] + EOO + + $* 'http://[123' 2>'invalid IPv6 address' != 0 : missed-bracket + $* 'http://[123] :80' 2>'invalid IPv6 address' != 0 : extra-char + } + + : ipv4 + : + { + : valid + : + $* 'http://0.10.200.255' >>EOO + http + @0.10.200.255:0 ipv4 + [null] + [null] + [null] + EOO + + : long + : + $* 'http://0.10.200.255.30' >>EOO + http + @0.10.200.255.30:0 name + [null] + [null] + [null] + EOO + + : short + : + $* 'http://0.10.200' >>EOO + http + @0.10.200:0 name + [null] + [null] + [null] + EOO + + : missed + : + $* 'http://0.10..200' >>EOO + http + @0.10..200:0 name + [null] + [null] + [null] + EOO + + : out-of-range + : + $* 'http://0.10.200.256' >>EOO + http + @0.10.200.256:0 name + [null] + [null] + [null] + EOO + } + + : name + : + { + : valid + : + $* 'https://www.b2.org' >>EOO + https + @www.b2.org:0 name + [null] + [null] + [null] + EOO + + : encoded + : + { + : valid + : + $* 'https://www.%62%32.org' >>EOO + https + @www.b2.org:0 name + [null] + [null] + [null] + EOO + + $* 'https://www.%62%3.org' 2>'invalid URL-encoding' != 0 : short + $* 'https://www.%62%3x.org' 2>'invalid URL-encoding' != 0 : invalid + $* 'https://www.%62%.org' 2>'invalid URL-encoding' != 0 : absent + } + + $* 'https://www.b|2.org' 2>'invalid host name' != 0 : invalid-char + } + + $* 'http://admin@:80?q=' 2>'no host' != 0: no-host + } + + : port + : + { + : valid + : + $* 'http://build2.org:443' >>EOO + http + @build2.org:443 name + [null] + [null] + [null] + EOO + + $* 'http://build2.org:-433' 2>'invalid port' != 0 : invalid-char + $* 'http://build2.org:70000' 2>'invalid port' != 0 : exceeds-max + $* 'http://build2.org:0' 2>'invalid port' != 0 : zero + } + } + + : path + : + { + : absent + : + $* 'http://b2.org' >>EOO + http + @b2.org:0 name + [null] + [null] + [null] + EOO + + : empty + : + $* 'http://b2.org/' >>EOO + http + @b2.org:0 name + + [null] + [null] + EOO + + : non-empty + : + $* 'http://b2.org/s/q' >>EOO + http + @b2.org:0 name + s/q + [null] + [null] + EOO + + : encoded + : + $* 'http://b2.org/%6F/s' >>EOO + http + @b2.org:0 name + o/s + [null] + [null] + EOO + } + + : query + : + { + : no-fragment + : + $* 'http://b2.org/a?x=foo&y=bar' >>EOO + http + @b2.org:0 name + a + x=foo&y=bar + [null] + EOO + + : fragment + : + $* 'http://b2.org/a?foo#bar' >>EOO + http + @b2.org:0 name + a + foo + bar + EOO + } + + : fragment + : + { + $* 'http://b2.org#foo' >>EOO + http + @b2.org:0 name + [null] + [null] + foo + EOO + } +} + +: string +{ + test.options += -s + + : authority + : + { + : host + : + { + $* 'file:///a' >'file:///a' : empty + $* 'http://1.1.1.1' >'http://1.1.1.1' : ipv4 + $* 'https://[1:2:3]' >'https://[1:2:3]' : ipv6 + $* 'file://a%d1%84' >'file://a%D1%84' : name + } + + $* 'http://admin@localhost' >'http://admin@localhost' : user + $* 'http://localhost:8080' >'http://localhost:8080' : port + $* 'file:/a' >'file:/a' : absent + } + + $* '' >'' : empty + $* 'file:/b%7C2' >'file:/b%7C2' : path + $* 'http://a?q=' >'http://a?q=' : query + $* 'http://a#f' >'http://a#f' : fragment +} + +: wstring +: +{ + u = 'https://user@stage.b2.org:443/libbutl?f=full#description' + $* -w "$u" >"$u" +} -- cgit v1.1