From aa0370b08ea8a1ad679a746c7be21a874f264fb6 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Mon, 8 Aug 2016 00:49:04 +0300 Subject: Add ucase(), lcase(), casecmp(), alpha(), digit(), alnum() --- butl/buildfile | 2 +- butl/path | 4 +- butl/path.ixx | 5 +- butl/process.cxx | 4 +- butl/sha256.cxx | 8 +-- butl/utility | 65 +++++++++++++++++++++- butl/utility.ixx | 136 +++++++++++++++++++++++++++++++++++++++++++++++ tests/buildfile | 2 +- tests/strcase/buildfile | 7 +++ tests/strcase/driver.cxx | 68 ++++++++++++++++++++++++ 10 files changed, 288 insertions(+), 13 deletions(-) create mode 100644 butl/utility.ixx create mode 100644 tests/strcase/buildfile create mode 100644 tests/strcase/driver.cxx diff --git a/butl/buildfile b/butl/buildfile index d2d16ac..7e65b6c 100644 --- a/butl/buildfile +++ b/butl/buildfile @@ -20,7 +20,7 @@ lib{butl}: \ {hxx txx }{ string-table } \ {hxx cxx}{ timestamp } \ {hxx cxx}{ triplet } \ -{hxx }{ utility } \ +{hxx ixx }{ utility } \ {hxx }{ vector-view } \ {hxx }{ version } \ {hxx cxx}{ win32-utility } diff --git a/butl/path b/butl/path index 7322a15..2040f5f 100644 --- a/butl/path +++ b/butl/path @@ -14,6 +14,8 @@ #include +#include + namespace butl { // Wish list/ideas for improvements. @@ -195,7 +197,7 @@ namespace butl for (size_type i (0), n (ln < rn ? ln : rn); i != n; ++i) { #ifdef _WIN32 - C lc (tolower (l[i])), rc (tolower (r[i])); + C lc (lcase (l[i])), rc (lcase (r[i])); #else C lc (l[i]), rc (r[i]); #endif diff --git a/butl/path.ixx b/butl/path.ixx index 26a9b3d..d1fa34c 100644 --- a/butl/path.ixx +++ b/butl/path.ixx @@ -3,7 +3,6 @@ // license : MIT; see accompanying LICENSE file #ifdef _WIN32 -# include // tolower(), toupper() # include // towlower(), towupper() #endif @@ -14,7 +13,7 @@ namespace butl inline char path_traits:: tolower (char c) { - return std::tolower (c); + return lcase (c); } template <> @@ -28,7 +27,7 @@ namespace butl inline char path_traits:: toupper (char c) { - return std::toupper (c); + return ucase (c); } template <> diff --git a/butl/process.cxx b/butl/process.cxx index 6b41165..75e8718 100644 --- a/butl/process.cxx +++ b/butl/process.cxx @@ -12,7 +12,6 @@ # include // _open_osfhandle(), _get_osfhandle(), _close() # include // _O_TEXT -# include // _stricmp() @@ CASE # include // _MAX_PATH, getenv() # include // stat # include // stat(), S_IS* @@ -25,6 +24,7 @@ #include +#include // casecmp() #include // fdnull(), fdclose() using namespace std; @@ -288,7 +288,7 @@ namespace butl // support those, it will have to be handled differently. // const char* e (r.extension ()); - if (e == nullptr || _stricmp (e, "exe") != 0) // @@ CASE + if (e == nullptr || casecmp (e, "exe") != 0) r += ".exe"; // Only check that the file exists since the executable mode is set diff --git a/butl/sha256.cxx b/butl/sha256.cxx index 7c0b21e..fb1fcc8 100644 --- a/butl/sha256.cxx +++ b/butl/sha256.cxx @@ -9,9 +9,11 @@ #include #include // size_t -#include // isxdigit(), toupper(), tolower() +#include // isxdigit() #include // invalid_argument +#include // ucase(), lcase() + using SHA256_CTX = butl::sha256::context; extern "C" @@ -101,7 +103,7 @@ namespace butl if (i > 0 && i % 2 == 0) f += ":"; - f += toupper (c); + f += ucase (c); } return f; @@ -131,7 +133,7 @@ namespace butl if (!isxdigit (c)) bad (); - s += tolower (c); + s += lcase (c); } } diff --git a/butl/utility b/butl/utility index 101d503..757edc2 100644 --- a/butl/utility +++ b/butl/utility @@ -5,14 +5,73 @@ #ifndef BUTL_UTILITY #define BUTL_UTILITY -#include // std::size_t +#include +#include // size_t #include // forward() -#include // strcmp +#include // strcmp(), strlen() #include namespace butl { + // Convert ASCII character/string case. If there is no upper/lower case + // counterpart, leave the character unchanged. The POSIX locale (also known + // as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // Note that the POSIX locale specifies behaviour on data consisting + // entirely of characters from the portable character set (subset of ASCII + // including 103 non-negative characters and English alphabet letters in + // particular) and the control character set (more about them at + // http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap06.html). + // + // Also note that according to the POSIX locale definition the case + // conversion can be applied only to [A-Z] and [a-z] character ranges being + // translated to each other (more about that at + // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02) + // + char ucase (char); + std::string ucase (const char*, std::size_t = std::string::npos); + std::string ucase (const std::string&); + std::string& ucase (std::string&); + LIBBUTL_EXPORT void ucase (char*, std::size_t); + + char lcase (char); + std::string lcase (const char*, std::size_t = std::string::npos); + std::string lcase (const std::string&); + std::string& lcase (std::string&); + LIBBUTL_EXPORT void lcase (char*, std::size_t); + + // Compare ASCII characters/strings ignoring case. Behave as if characters + // had been converted to the lower case and then byte-compared. Return a + // negative, zero or positive value if the left hand side is less, equal or + // greater than the right hand side, respectivelly. The POSIX locale (also + // known as C locale) must be the current application locale. Otherwise the + // behavior is undefined. + // + // The optional size argument specifies the maximum number of characters + // to compare. + // + int casecmp (char, char); + + int casecmp (const std::string&, const std::string&, + std::size_t = std::string::npos); + + int casecmp (const std::string&, const char*, + std::size_t = std::string::npos); + + LIBBUTL_EXPORT int casecmp (const char*, const char*, + std::size_t = std::string::npos); + + bool + alpha (char); + + bool + digit (char); + + bool + alnum (char); + // Key comparators (i.e., to be used in sets, maps, etc). // struct compare_c_string @@ -67,4 +126,6 @@ namespace butl reverse_iterate (T&& x) {return reverse_range (std::forward (x));} } +#include + #endif // BUTL_UTILITY diff --git a/butl/utility.ixx b/butl/utility.ixx new file mode 100644 index 0000000..b0d83c9 --- /dev/null +++ b/butl/utility.ixx @@ -0,0 +1,136 @@ +// file : butl/utility.ixx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#ifndef _WIN32 +# include // strcasecmp(), strncasecmp() +#else +# include // _stricmp(), _strnicmp() +#endif + +#include // toupper(), tolower(), isalpha(), isdigit(), isalnum() + +namespace butl +{ + inline char + ucase (char c) + { + return std::toupper (c); + } + + inline std::string + ucase (const char* s, std::size_t n) + { + std::string r (s, n == std::string::npos ? std::strlen (s) : n); + return ucase (r); + } + + inline std::string + ucase (const std::string& s) + { + return ucase (s.c_str (), s.size ()); + } + + inline std::string& + ucase (std::string& s) + { + if (size_t n = s.size ()) + { + s.front () = s.front (); // Force copy in CoW. + ucase (const_cast (s.data ()), n); + } + return s; + } + + inline void + ucase (char* s, std::size_t n) + { + for (const char* e (s + n); s != e; ++s) + *s = ucase (*s); + } + + inline char + lcase (char c) + { + return std::tolower (c); + } + + inline std::string + lcase (const char* s, std::size_t n) + { + std::string r (s, n == std::string::npos ? std::strlen (s) : n); + return lcase (r); + } + + inline std::string + lcase (const std::string& s) + { + return lcase (s.c_str (), s.size ()); + } + + inline std::string& + lcase (std::string& s) + { + if (size_t n = s.size ()) + { + s.front () = s.front (); // Force copy in CoW. + lcase (const_cast (s.data ()), n); + } + return s; + } + + inline void + lcase (char* s, std::size_t n) + { + for (const char* e (s + n); s != e; ++s) + *s = lcase (*s); + } + + inline int + casecmp (char l, char r) + { + l = lcase (l); + r = lcase (r); + return l < r ? -1 : (l > r ? 1 : 0); + } + + inline int + casecmp (const std::string& l, const std::string& r, std::size_t n) + { + return casecmp (l.c_str (), r.c_str (), n); + } + + inline int + casecmp (const std::string& l, const char* r, std::size_t n) + { + return casecmp (l.c_str (), r, n); + } + + inline int + casecmp (const char* l, const char* r, std::size_t n) + { +#ifndef _WIN32 + return n == std::string::npos ? strcasecmp (l, r) : strncasecmp (l, r, n); +#else + return n == std::string::npos ? _stricmp (l, r) : _strnicmp (l, r, n); +#endif + } + + inline bool + alpha (char c) + { + return std::isalpha (c); + } + + inline bool + digit (char c) + { + return std::isdigit (c); + } + + inline bool + alnum (char c) + { + return std::isalnum (c); + } +} diff --git a/tests/buildfile b/tests/buildfile index 36e2ce6..fd2589a 100644 --- a/tests/buildfile +++ b/tests/buildfile @@ -3,7 +3,7 @@ # license : MIT; see accompanying LICENSE file d = base64/ cpfile/ dir-iterator/ fdstream/ link/ pager/ path/ prefix-map/ \ - process/ sha256/ timestamp/ triplet/ + process/ sha256/ strcase/ timestamp/ triplet/ ./: $d include $d diff --git a/tests/strcase/buildfile b/tests/strcase/buildfile new file mode 100644 index 0000000..a18c730 --- /dev/null +++ b/tests/strcase/buildfile @@ -0,0 +1,7 @@ +# file : tests/strcase/buildfile +# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +# license : MIT; see accompanying LICENSE file + +exe{driver}: cxx{driver} ../../butl/lib{butl} + +include ../../butl/ diff --git a/tests/strcase/driver.cxx b/tests/strcase/driver.cxx new file mode 100644 index 0000000..4741856 --- /dev/null +++ b/tests/strcase/driver.cxx @@ -0,0 +1,68 @@ +// file : tests/strcase/driver.cxx -*- C++ -*- +// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd +// license : MIT; see accompanying LICENSE file + +#include +#include + +#include + +using namespace std; +using namespace butl; + +int +main () +{ + const string upper ("+/0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + const string lower ("+/0123456789abcdefghijklmnopqrstuvwxyz"); + + assert (casecmp (upper, lower) == 0); + assert (casecmp (upper, lower, upper.size ()) == 0); + assert (casecmp (upper, lower, 100) == 0); + assert (casecmp ("a", "A1") < 0); + assert (casecmp ("A1", "a") > 0); + assert (casecmp ("a", "A1", 1) == 0); + assert (casecmp ("A1", "a", 1) == 0); + assert (casecmp ("a", "b", 0) == 0); + + for (size_t i (0); i < upper.size (); ++i) + { + assert (casecmp (upper[i], lower[i]) == 0); + + if (i > 0) + { + assert (casecmp (upper[i], lower[i - 1]) > 0); + assert (casecmp (lower[i - 1], upper[i]) < 0); + } + } + + // As casecmp() compares strings as if they have been converted to the + // lower case the characters [\]^_` (located between 'Z' and 'a' in the ASCII + // table) evaluates as less than any alphabetic character. + // + string ascii_91_96 ("[\\]^_`"); + for (const auto& c: ascii_91_96) + { + assert (casecmp (&c, "A", 1) < 0); + assert (casecmp (&c, "a", 1) < 0); + } + + assert (ucase (lower) == upper); + assert (lcase (upper) == lower); + + assert (ucase (lower.c_str (), 20) == string (upper, 0, 20)); + assert (lcase (upper.c_str (), 20) == string (lower, 0, 20)); + + assert (ucase (lower.c_str (), 0) == string ()); + assert (lcase (upper.c_str (), 0) == string ()); + + assert (ucase ("") == string ()); + assert (lcase ("") == string ()); + + string s (upper); + assert (lcase (s) == lower); + + s = lower; + ucase (const_cast (s.data ()), s.size ()); + assert (s == upper); +} -- cgit v1.1