aboutsummaryrefslogtreecommitdiff
path: root/libbutl/utility.mxx
blob: 78c9355ab4e264e6d95abbe37aa0c17af8b23ddb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
// file      : libbutl/utility.mxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#ifndef __cpp_modules_ts
#pragma once
#endif

#ifndef _WIN32
#  include <strings.h> // strcasecmp(), strncasecmp()
#else
#  include <string.h> // _stricmp(), _strnicmp()
#endif

#ifndef __cpp_lib_modules_ts
#include <string>
#include <iosfwd>       // ostream
#include <istream>
#include <cstddef>      // size_t
#include <utility>      // move(), forward(), pair
#include <cstring>      // strcmp(), strlen()
#include <exception>    // exception, uncaught_exception[s]()
//#include <functional> // hash
#endif

#include <libbutl/ft/lang.hxx>      // thread_local
#include <libbutl/ft/exception.hxx> // uncaught_exceptions

#ifdef __cpp_modules_ts
export module butl.utility;
#ifdef __cpp_lib_modules_ts
import std.core;
import std.io;
#endif
import butl.utf8;
import butl.unicode;
import butl.optional;
#else
#include <libbutl/utf8.mxx>
#include <libbutl/unicode.mxx>
#include <libbutl/optional.mxx>
#endif

#include <libbutl/export.hxx>

LIBBUTL_MODEXPORT namespace butl
{
  // Throw std::system_error with generic_category or system_category,
  // respectively.
  //
  // The generic version should be used for portable errno codes (those that
  // are mapped to std::errc). The system version should be used for platform-
  // specific codes, for example, additional errno codes on POSIX systems or
  // the result of GetLastError() on Windows.
  //
  // See also the exception sanitization below.
  //
  [[noreturn]] LIBBUTL_SYMEXPORT void
  throw_generic_error (int errno_code, const char* what = nullptr);

  [[noreturn]] LIBBUTL_SYMEXPORT void
  throw_system_error (int system_code, int fallback_errno_code = 0);

  // Throw std::ios::failure with the specified description and, if it is
  // derived from std::system_error (as it should), error code.
  //
  [[noreturn]] LIBBUTL_SYMEXPORT void
  throw_generic_ios_failure (int errno_code, const char* what = nullptr);

  [[noreturn]] LIBBUTL_SYMEXPORT void
  throw_system_ios_failure (int system_code, const char* what = nullptr);

  // Convert ASCII character/string case. If there is no upper/lower case
  // counterpart, leave the character unchanged. The POSIX locale (also known
  // as C locale) must be the current application locale. Otherwise the
  // behavior is undefined.
  //
  // Note that the POSIX locale specifies behaviour on data consisting
  // entirely of characters from the portable character set (subset of ASCII
  // including 103 non-negative characters and English alphabet letters in
  // particular) and the control character set (more about them at
  // http://pubs.opengroup.org/onlinepubs/009696899/basedefs/xbd_chap06.html).
  //
  // Also note that according to the POSIX locale definition the case
  // conversion can be applied only to [A-Z] and [a-z] character ranges being
  // translated to each other (more about that at
  // http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html#tag_07_02)
  //
  char ucase (char);
  std::string ucase (const char*, std::size_t = std::string::npos);

  std::string ucase (const std::string&);
  std::string& ucase (std::string&);
  void ucase (char*, std::size_t);

  char lcase (char);
  std::string lcase (const char*, std::size_t = std::string::npos);
  std::string lcase (const std::string&);
  std::string& lcase (std::string&);
  void lcase (char*, std::size_t);

  // Compare ASCII characters/strings ignoring case. Behave as if characters
  // had been converted to the lower case and then byte-compared. Return a
  // negative, zero or positive value if the left hand side is less, equal or
  // greater than the right hand side, respectivelly. The POSIX locale (also
  // known as C locale) must be the current application locale. Otherwise the
  // behavior is undefined.
  //
  // The optional size argument specifies the maximum number of characters
  // to compare.
  //
  int icasecmp (char, char);

  int icasecmp (const std::string&, const std::string&,
                std::size_t = std::string::npos);

  int icasecmp (const std::string&, const char*,
                std::size_t = std::string::npos);

  int icasecmp (const char*, const char*, std::size_t = std::string::npos);

  // Case-insensitive key comparators (i.e., to be used in sets, maps, etc).
  //
  struct icase_compare_string
  {
    bool operator() (const std::string& x, const std::string& y) const
    {
      return icasecmp (x, y) < 0;
    }
  };

  struct icase_compare_c_string
  {
    bool operator() (const char* x, const char* y) const
    {
      return icasecmp (x, y) < 0;
    }
  };

  bool alpha  (char);
  bool digit  (char);
  bool alnum  (char);
  bool xdigit (char);

  bool alpha  (wchar_t);
  bool digit  (wchar_t);
  bool alnum  (wchar_t);
  bool xdigit (wchar_t);

  // Basic string utilities.
  //

  // Trim leading/trailing whitespaces, including '\n' and '\r'.
  //
  LIBBUTL_SYMEXPORT std::string&
  trim (std::string&);

  inline std::string
  trim (std::string&& s)
  {
    return move (trim (s));
  }

  // Find the beginning and end poistions of the next word. Return the size
  // of the word or 0 and set b = e = n if there are no more words. For
  // example:
  //
  // for (size_t b (0), e (0); next_word (s, b, e); )
  // {
  //   string w (s, b, e - b);
  // }
  //
  // Or:
  //
  // for (size_t b (0), e (0), n; n = next_word (s, b, e, ' ', ','); )
  // {
  //   string w (s, b, n);
  // }
  //
  // The second version examines up to the n'th character in the string.
  //
  std::size_t
  next_word (const std::string&, std::size_t& b, std::size_t& e,
             char d1 = ' ', char d2 = '\0');

  std::size_t
  next_word (const std::string&, std::size_t n, std::size_t& b, std::size_t& e,
             char d1 = ' ', char d2 = '\0');

  // Sanitize a string to only contain characters valid in an identifier
  // (ASCII alphanumeric plus `_`) replacing all others with `_`.
  //
  // Note that it doesn't make sure the first character is not a digit.
  //
  std::string& sanitize_identifier (std::string&);
  std::string  sanitize_identifier (std::string&&);
  std::string  sanitize_identifier (const std::string&);

  // Sanitize a string (e.g., a path) to be a valid C string literal by
  // escaping backslahes, double-quotes, and newlines.
  //
  // Note that in the second version the result is appended to out.
  //
  std::string sanitize_strlit (const std::string&);
  void        sanitize_strlit (const std::string&, std::string& out);

  // Return true if the string is a valid UTF-8 encoded byte string and,
  // optionally, its decoded codepoints belong to the specified types or
  // codepoint whitelist.
  //
  bool
  utf8 (const std::string&,
        codepoint_types = codepoint_types::any,
        const char32_t* whitelist = nullptr);

  // As above but in case of an invalid sequence also return the description
  // of why it is invalid.
  //
  bool
  utf8 (const std::string&,
        std::string& what,
        codepoint_types = codepoint_types::any,
        const char32_t* whitelist = nullptr);

  // Return UTF-8 byte string length in codepoints. Throw
  // std::invalid_argument if this is not a valid UTF-8.
  //
  std::size_t
  utf8_length (const std::string&,
               codepoint_types = codepoint_types::any,
               const char32_t* whitelist = nullptr);

  // Fixup the specified string (in place) to be valid UTF-8 replacing invalid
  // bytes and codepoints with the specified character, for example, '?'.
  //
  // Potential future improvements:
  //  - char32_t replacement (will need UTF-8 encoding)
  //  - different replacement for bytes and codepoints
  //
  LIBBUTL_SYMEXPORT void
  to_utf8 (std::string&,
           char replacement,
           codepoint_types = codepoint_types::any,
           const char32_t* whitelist = nullptr);

  // If an input stream is in a failed state, then return true if this is
  // because of the eof and throw istream::failure otherwise. If the stream
  // is not in a failed state, return false. This helper function is normally
  // used like this:
  //
  // is.exceptions (istream::badbit);
  //
  // for (string l; !eof (getline (is, l)); )
  // {
  //   ...
  // }
  //
  bool
  eof (std::istream&);

  // Environment variables.
  //
  optional<std::string>
  getenv (const std::string&);

  // Throw system_error on failure.
  //
  // Note that on Windows setting an empty value usets the variable.
  //
  LIBBUTL_SYMEXPORT void
  setenv (const std::string& name, const std::string& value);

  // Throw system_error on failure.
  //
  LIBBUTL_SYMEXPORT void
  unsetenv (const std::string&);

  // Key comparators (i.e., to be used in sets, maps, etc).
  //
  struct compare_c_string
  {
    bool operator() (const char* x, const char* y) const noexcept
    {
      return std::strcmp (x, y) < 0;
    }
  };

  struct compare_pointer_target
  {
    template <typename P>
    bool operator() (const P& x, const P& y) const
    {
      return *x < *y;
    }
  };

  //struct hash_pointer_target
  //{
  //  template <typename P>
  //  std::size_t operator() (const P& x) const {return std::hash (*x);}
  //};

  // Compare two std::reference_wrapper's.
  //
  struct compare_reference_target
  {
    template <typename R>
    bool operator() (const R& x, const R& y) const
    {
      return x.get () < y.get ();
    }
  };

  // Combine one or more hash values.
  //
  inline std::size_t
  combine_hash (std::size_t s, std::size_t h)
  {
    // Magic formula from boost::hash_combine().
    //
    return s ^ (h + 0x9e3779b9 + (s << 6) + (s >> 2));
  }

  template <typename... S>
  inline std::size_t
  combine_hash (std::size_t s, std::size_t h, S... hs)
  {
    return combine_hash (combine_hash (s, h), hs...);
  }

  // Support for reverse iteration using range-based for-loop:
  //
  // for (... : reverse_iterate (x)) ...
  //
  template <typename T>
  class reverse_range
  {
    T x_;

  public:
    reverse_range (T&& x): x_ (std::forward<T> (x)) {}

    auto begin () const -> decltype (this->x_.rbegin ()) {return x_.rbegin ();}
    auto end () const -> decltype (this->x_.rend ()) {return x_.rend ();}
  };

  template <typename T>
  inline reverse_range<T>
  reverse_iterate (T&& x) {return reverse_range<T> (std::forward<T> (x));}

  // Cleanly cast between incompatible function types or dlsym() result
  // (void*) to a function pointer.
  //
  template <typename F, typename P>
  F
  function_cast (P*);

  // Call a function on destruction.
  //
  template <typename F>
  struct guard_impl;

  template <typename F>
  inline guard_impl<F>
  make_guard (F f)
  {
    return guard_impl<F> (std::move (f));
  }

  template <typename F>
  struct guard_impl
  {
    guard_impl (F f): function (std::move (f)), active (true) {}
    ~guard_impl () {if (active) function ();}

    void
    cancel () {active = false;}

    F function;
    bool active;
  };

  // Call a function if there is an exception.
  //

  template <typename F>
  struct exception_guard_impl;

  template <typename F>
  inline exception_guard_impl<F>
  make_exception_guard (F f)
  {
    return exception_guard_impl<F> (std::move (f));
  }

#ifdef __cpp_lib_uncaught_exceptions
  template <typename F>
  struct exception_guard_impl
  {
    exception_guard_impl (F f)
        : f_ (std::move (f)),
          u_ (std::uncaught_exceptions ()) {}

    ~exception_guard_impl ()
    {
      if (u_ != std::uncaught_exceptions ())
        f_ ();
    }

  private:
    F f_;
    int u_;
  };
#else
  // Fallback implementation using a TLS flag.
  //
  // True means we are in the body of a destructor that is being called as
  // part of the exception stack unwindining.
  //
  extern
#ifdef __cpp_thread_local
  thread_local
#else
  __thread
#endif
  // Work around glibc bug #14898.
  //
#if defined(__GLIBC__)       && \
    defined(__GLIBC_MINOR__) && \
    (__GLIBC__  < 2 || __GLIBC__ == 2 && __GLIBC_MINOR__ < 17)
  int
#else
  bool
#endif
  exception_unwinding_dtor_;

  // On Windows one cannot export a thread-local variable so we have to
  // use wrapper functions.
  //
#ifdef _WIN32
  LIBBUTL_SYMEXPORT bool
  exception_unwinding_dtor ();

  LIBBUTL_SYMEXPORT void
  exception_unwinding_dtor (bool);
#else
  inline bool
  exception_unwinding_dtor () {return exception_unwinding_dtor_;}

  inline void
  exception_unwinding_dtor (bool v) {exception_unwinding_dtor_ = v;}
#endif

  template <typename F>
  struct exception_guard_impl
  {
    exception_guard_impl (F f): f_ (std::move (f)) {}
    ~exception_guard_impl ()
    {
      if (std::uncaught_exception ())
      {
        exception_unwinding_dtor (true);
        f_ ();
        exception_unwinding_dtor (false);
      }
    }

  private:
    F f_;
  };
#endif
}

LIBBUTL_MODEXPORT namespace std
{
  // Sanitize the exception description before printing. This includes:
  //
  // - stripping leading colons and spaces (see fdstream.cxx)
  // - stripping trailing newlines, periods, and spaces
  // - stripping system error redundant suffix (see utility.cxx)
  // - lower-case the first letter if the beginning looks like a word
  //
  LIBBUTL_SYMEXPORT ostream&
  operator<< (ostream&, const exception&);
}

#include <libbutl/utility.ixx>