aboutsummaryrefslogtreecommitdiff
path: root/butl/timestamp.cxx
blob: 0da9f4c039bc1072b717d835e7fbdf08125b8f1e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
// file      : butl/timestamp.cxx -*- C++ -*-
// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
// license   : MIT; see accompanying LICENSE file

#include <butl/timestamp>

#include <time.h>  // localtime_r(), gmtime_r(), strptime(), timegm()
#include <errno.h> // EINVAL

#include <ctime>        // tm, time_t, strftime(), mktime()
#include <cstdlib>      // strtoull()
#include <cassert>
#include <iomanip>      // put_time(), setw(), dec, right
#include <cstring>      // strlen(), memcpy()
#include <ostream>
#include <utility>      // pair, make_pair()
#include <stdexcept>    // runtime_error
#include <system_error>

using namespace std;

// libstdc++ prior to GCC 5 does not have std::put_time() so we have to invent
// our own. Detecting the "prior to GCC 5" condition, however, is not easy:
// libstdc++ is used by other compilers (e.g., Clang) so we cannot just use
// __GNUC__. There is __GLIBCXX__ but it is a date which is updated with
// every release, including bugfixes (so, there can be some 4.7.X release with
// a date greater than 5.0.0).
//
// So what we going to do here is "offer" our implementation and let the ADL
// pick one. If there is std::put_time(), then it will be preferred because
// of the std::tm argument.
//
#ifdef __GLIBCXX__
#include <ctime>   // tm, strftime()
#include <ostream>

namespace details
{
  struct put_time_data
  {
    const std::tm* tm;
    const char* fmt;
  };

  inline put_time_data
  put_time (const std::tm* tm, const char* fmt)
  {
    return put_time_data {tm, fmt};
  }

  inline ostream&
  operator<< (ostream& os, const put_time_data& d)
  {
    char buf[256];
    if (strftime (buf, sizeof (buf), d.fmt, d.tm) != 0)
      os << buf;
    else
      os.setstate (ostream::badbit);
    return os;
  }
}

using namespace details;
#endif

namespace butl
{
  ostream&
  to_stream (ostream& os,
             const timestamp& ts,
             const char* format,
             bool special,
             bool local)
  {
    if (special)
    {
      if (ts == timestamp_unknown)
        return os << "<unknown>";

      if (ts == timestamp_nonexistent)
        return os << "<nonexistent>";
    }

    time_t t (system_clock::to_time_t (ts));

    std::tm tm;
    if ((local ? localtime_r (&t, &tm) : gmtime_r (&t, &tm)) == nullptr)
      throw system_error (errno, system_category ());

    using namespace chrono;

    timestamp sec (system_clock::from_time_t (t));
    nanoseconds ns (duration_cast<nanoseconds> (ts - sec));

    char fmt[256];
    size_t n (strlen (format));
    if (n + 1 > sizeof (fmt))
      throw system_error (EINVAL, system_category ());
    memcpy (fmt, format, n + 1);

    // Chunk the format string into fragments that we feed to put_time() and
    // those that we handle ourselves. Watch out for the escapes (%%).
    //
    size_t i (0), j (0); // put_time()'s range.
    for (; j != n; ++j)
    {
      if (fmt[j] == '%' && j + 1 != n)
      {
        if (fmt[j + 1] == '[')
        {
          if (os.width () != 0)
            throw runtime_error (
              "padding is not supported when printing nanoseconds");

          // Our fragment. First see if we need to call put_time().
          //
          if (i != j)
          {
            fmt[j] = '\0';
            if (!(os << put_time (&tm, fmt + i)))
              return os;
          }

          j += 2; // Character after '['.
          if (j == n)
            throw system_error (EINVAL, system_category ());

          char d ('\0');
          if (fmt[j] != 'N')
          {
            d = fmt[j];
            if (++j == n || fmt[j] != 'N')
              throw system_error (EINVAL, system_category ());
          }

          if (++j == n || fmt[j] != ']')
            throw system_error (EINVAL, system_category ());

          if (ns != nanoseconds::zero ())
          {
            if (d != '\0')
              os << d;

            ostream::fmtflags fl (os.flags ());
            char fc (os.fill ('0'));
            os << dec << right << setw (9) << ns.count ();
            os.fill (fc);
            os.flags (fl);
          }

          i = j + 1; // j is incremented in the for-loop header.
        }
        else
          ++j; // Skip % and the next character to handle %%.
      }
    }

    // Do we need to call put_time() one last time?
    //
    if (i != j)
    {
      if (!(os << put_time (&tm, fmt + i)))
        return os;
    }

    return os;
  }

  ostream&
  operator<< (ostream& os, const duration& d)
  {
    if (os.width () != 0) // We always print nanosecond.
      throw runtime_error (
        "padding is not supported when printing nanoseconds");

    timestamp ts; // Epoch.
    ts += d;

    time_t t (system_clock::to_time_t (ts));

    const char* fmt (nullptr);
    const char* unt ("nanoseconds");
    if (t >= 365 * 24 * 60 * 60)
    {
      fmt = "%Y-%m-%d %H:%M:%S";
      unt = "years";
    }
    else if (t >= 31 * 24 * 60 * 60)
    {
      fmt = "%m-%d %H:%M:%S";
      unt = "months";
    }
    else if (t >= 24 * 60 * 60)
    {
      fmt = "%d %H:%M:%S";
      unt = "days";
    }
    else if (t >= 60 * 60)
    {
      fmt = "%H:%M:%S";
      unt = "hours";
    }
    else if (t >= 60)
    {
      fmt = "%M:%S";
      unt = "minutes";
    }
    else if (t >= 1)
    {
      fmt = "%S";
      unt = "seconds";
    }

    if (fmt != nullptr)
    {
      std::tm tm;
      if (gmtime_r (&t, &tm) == nullptr)
        throw system_error (errno, system_category ());

      if (t >= 24 * 60 * 60)
        tm.tm_mday -= 1; // Make day of the month to be a zero-based number.

      if (t >= 31 * 24 * 60 * 60)
        tm.tm_mon -= 1; // Make month of the year to be a zero-based number.

      if (t >= 365 * 24 * 60 * 60)
        // Make the year to be a 1970-based number. Negative values allowed
        // according to the POSIX specification.
        //
        tm.tm_year -= 1970;

      if (!(os << put_time (&tm, fmt)))
        return os;
    }

    using namespace chrono;

    timestamp sec (system_clock::from_time_t (t));
    nanoseconds ns (duration_cast<nanoseconds> (ts - sec));

    if (ns != nanoseconds::zero ())
    {
      if (fmt != nullptr)
      {
        ostream::fmtflags fl (os.flags ());
        char fc (os.fill ('0'));
        os << '.' << dec << right << setw (9) << ns.count ();
        os.fill (fc);
        os.flags (fl);
      }
      else
        os << ns.count ();

      os << ' ' << unt;
    }
    else if (fmt == nullptr)
      os << '0';

    return os;
  }
}

// VC++ implementation of strptime() via std::get_time().
//
// To debug fallback functions with GCC, uncomment the following defines.
//
//#define _MSC_VER
//#define strptime strptime_
//#define timegm   timegm_

#ifdef _MSC_VER
#include <ctime>   // time_t, tm, mktime(), gmtime()
#include <locale>
#include <clocale>
#include <sstream>
#include <iomanip>
#include <cstring> // strlen()

namespace details
{
  static char*
  strptime (const char* input, const char* format, tm* time)
  {
    istringstream is (input);

    // The original strptime() function behaves according to the process' C
    // locale (set with std::setlocale()), which can differ from the process
    // C++ locale (set with std::locale::global()).
    //
    is.imbue (locale (setlocale (LC_ALL, nullptr)));

    if (!(is >> get_time (time, format)))
      return nullptr;
    else
      // tellg () behaves as UnformattedInputFunction, so returns failure
      // status if eofbit is set.
      //
      return const_cast<char*> (
        input + (is.eof ()
                 ? strlen (input)
                 : static_cast<size_t> (is.tellg ())));
  }

  static time_t
  timegm (tm* ctm)
  {
    const time_t e (static_cast<time_t> (-1));

    // We will use an example to explain how it works. Say *ctm contains 9 AM
    // of some day. Note that no time zone information is available.
    //
    // Convert it to the time from Epoch as if it's in the local time zone.
    //
    ctm->tm_isdst = -1;
    time_t t (mktime (ctm));
    if (t == e)
      return e;

    // Let's say we are in Moscow, and t contains the time passed from Epoch
    // till 9 AM MSK. But that is not what we need. What we need is the time
    // passed from Epoch till 9 AM GMT. This is some bigger number, as it takes
    // longer to achieve the same calendar time for more Western location. So
    // we need to find that offset, and increment t with it to obtain the
    // desired value. The offset is effectively the time difference between MSK
    // and GMT time zones.
    //
    tm gtm;
    if (gmtime_r (&t, &gtm) == nullptr)
      return e;

    // gmtime_r() being called for the timepoint t returns 6 AM. So now we
    // have *ctm and gtm, which value difference (3 hours) reflects the
    // desired offset. The only problem is that we can not deduct gtm from
    // *ctm, to get the offset expressed as time_t. To do that we need to apply
    // to both of them the same conversion function transforming std::tm to
    // std::time_t. The mktime() can do that, so the expression (mktime(ctm) -
    // mktime(&gtm)) calculates the desired offset.
    //
    // To ensure mktime() works exactly the same way for both cases, we need
    // to reset Daylight Saving Time flag for each of *ctm and gtm.
    //
    ctm->tm_isdst = 0;
    time_t lt (mktime (ctm));
    if (lt == e)
      return e;

    gtm.tm_isdst = 0;
    time_t gt (mktime (&gtm));
    if (gt == e)
      return e;

    // C11 standard specifies time_t to be a real type (integer and real
    // floating types are collectively called real types). So we can not
    // consider it to be signed.
    //
    return lt > gt ? t + (lt - gt) : t - (gt - lt);
  }
}

using namespace details;
#endif

namespace butl
{
  static pair<tm, chrono::nanoseconds>
  from_string (const char* input, const char* format, const char** end)
  {
    auto bad_val = []() {throw system_error (EINVAL, system_category ());};

    // See if we have our specifier.
    //
    size_t i (0);
    size_t n (strlen (format));
    for (; i != n; ++i)
    {
      if (format[i] == '%' && i + 1 != n)
      {
        if (format[i + 1] == '[')
          break;
        else
          ++i; // To handle %%.
      }
    }

    // Call the fraction of a second as just fraction from now on.
    //
    using namespace chrono;
    nanoseconds ns (nanoseconds::zero ());

    if (i == n)
    {
      // No %[], so just parse with strptime().
      //
      tm t = tm ();
      const char* p (strptime (input, format, &t));
      if (p == nullptr)
        bad_val ();

      if (end != nullptr)
        *end = p;
      else if (*p != '\0')
        bad_val (); // Input is not fully read.

      return make_pair (t, ns);
    }

    // Now the overall plan is:
    //
    // 1. Parse the fraction part of the input string to obtain nanoseconds.
    //
    // 2. Remove fraction part from the input string.
    //
    // 3. Remove %[] from the format string.
    //
    // 4. Re-parse the modified input with the modified format to fill the
    //    std::tm structure.
    //
    // Parse the %[] specifier.
    //
    assert (format[i] == '%');
    string fm (format, i++); // Start assembling the new format string.

    assert (format[i] == '[');
    if (++i == n)
      bad_val ();

    char d (format[i]); // Delimiter character.
    if (++i == n)
      bad_val ();

    char f (format[i]); // Fraction specifier character.
    if ((f != 'N' && f != 'U' && f != 'M') || ++i == n)
      bad_val ();

    if (format[i++] != ']')
      bad_val ();

    // Parse the input with the initial part of the format string, the one
    // that preceeds the %[] specifier. The returned pointer will be the
    // position we need to start from to parse the fraction.
    //
    tm t = tm ();

    // What if %[] is first, there is nothing before it? According to the
    // strptime() documentation an empty format string is a valid one.
    //
    const char* p (strptime (input, fm.c_str (), &t));
    if (p == nullptr)
      bad_val ();

    // Start assembling the new input string.
    //
    string in (input, p - input);
    size_t fn (0); // Fraction size.

    if (d == *p)
    {
      // Fraction present in the input.
      //

      // Read fraction digits.
      //
      char buf [10];
      size_t i (0);
      size_t n (f == 'N' ? 9 : (f == 'U' ? 6 : 3));
      for (++p; i < n && *p >= '0' && *p <= '9'; ++i, ++p)
        buf[i] = *p;

      if (i < n)
        bad_val ();

      buf[n] = '\0';
      fn = n;

      // Calculate nanoseconds.
      //
      char* e (nullptr);
      unsigned long long t (strtoull (buf, &e, 10));
      assert (e == buf + n);

      switch (f)
      {
      case 'N': ns = nanoseconds (t); break;
      case 'U': ns = microseconds (t); break;
      case 'M': ns = milliseconds (t); break;
      default: assert (false);
      }

      // Actually the idea to fully remove the fraction from the input string,
      // and %[] from the format string, has a flaw. After the fraction removal
      // the spaces around it will be "swallowed" with a single space in the
      // format string. So, as an example, for the input:
      //
      // 2016-02-21 19:31:10 .384902285 GMT
      //
      // And the format:
      //
      // %Y-%m-%d %H:%M:%S %[.N]
      //
      // The unparsed tail of the input will be 'GMT' while expected to be
      // ' GMT'. To fix that we will not remove, but replace the mentioned
      // parts with some non-space character.
      //
      fm += '-';
      in += '-';
    }

    fm += format + i;
    in += p;

    // Reparse the modified input with the modified format.
    //
    t = tm ();
    const char* b (in.c_str ());
    p = strptime (b, fm.c_str (), &t);

    if (p == nullptr)
      bad_val ();

    if (end != nullptr)
      *end = input + (p - b + fn);
    else if (*p != '\0')
      bad_val (); // Input is not fully read.

    return make_pair (t, ns);
  }

  timestamp
  from_string (const char* input,
               const char* format,
               bool local,
               const char** end)
  {
    pair<tm, chrono::nanoseconds> t (from_string (input, format, end));

    time_t time (local ? mktime (&t.first) : timegm (&t.first));
    if (time == -1)
      throw system_error (errno, system_category ());

    return timestamp::clock::from_time_t (time) + t.second;
  }
}