aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2016-04-14 16:20:59 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2016-04-21 14:32:11 +0300
commitc0beeb5f0b3285fd7b411859bd68d44b472ad034 (patch)
treea97400a873dd0a5092261cd91675ab3a75bc2d92
parent079e167e7c62c857e271bda0588064dc4030e337 (diff)
Add timestamp from_string()
-rw-r--r--NEWS1
-rw-r--r--butl/timestamp64
-rw-r--r--butl/timestamp.cxx291
-rw-r--r--tests/buildfile2
-rw-r--r--tests/timestamp/buildfile7
-rw-r--r--tests/timestamp/driver.cxx159
6 files changed, 513 insertions, 11 deletions
diff --git a/NEWS b/NEWS
index 60026c3..2c5645d 100644
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,7 @@ Version 0.3.0
* Derive a target class from the target triplet for some targets. Currently
the classes are: 'linux', 'macosx', 'freebsd', 'windows', and 'other'.
+ * Implement timestamp from_string().
Version 0.2.0
diff --git a/butl/timestamp b/butl/timestamp
index 10090c5..90bca08 100644
--- a/butl/timestamp
+++ b/butl/timestamp
@@ -38,19 +38,22 @@ namespace butl
// Generally-useful special values.
//
- const timestamp timestamp_unknown {duration {-1}};
- const timestamp timestamp_nonexistent {duration {0}};
+ const timestamp timestamp_unknown = timestamp (duration (-1));
+ const timestamp timestamp_nonexistent = timestamp (duration (10));
- // Human-readable representation. By default the timestamp is printed by
- // localtime_r() in the local timezone, so tzset() from <time.h> should be
- // called prior to using the corresponding operator or the to_stream()
- // function (normally from main() or equivalent).
+ // Print human-readable representation of the timestamp.
+ //
+ // By default the timestamp is printed by localtime_r() in the local
+ // timezone, so tzset() from <time.h> should be called prior to using the
+ // corresponding operator or the to_stream() function (normally from main()
+ // or equivalent).
//
// The format argument in the to_stream() function is the put_time() format
// string except that it also supports the nanoseconds conversion specifier
// in the form %[<d>N] where <d> is the optional single delimiter character,
- // for example '.'. If the nanoseconds part is 0, then it is not printed (nor
- // the delimiter character).
+ // for example '.'. If the nanoseconds part is 0, then it is not printed
+ // (nor the delimiter character). Otherwise, if necessary, the nanoseconds
+ // part is padded to 9 characters with leading zeros.
//
// The special argument in the to_stream() function indicates whether the
// special timestamp_unknown and timestamp_nonexistent values should be
@@ -87,6 +90,51 @@ namespace butl
std::ostream&
operator<< (std::ostream&, const duration&);
+
+ // Parse human-readable representation of the timestamp.
+ //
+ // The format argument is the strptime() format string except that it also
+ // supports the fraction of a second specifier in the form %[<d><f>], where
+ // <d> is the optional single delimiter character, for example '.', and <f>
+ // is one of the 'N', 'U', 'M' characters, denoting nanoseconds,
+ // microseconds and milliseconds, respectively.
+ //
+ // The delimiter <d> is mandatory. If no such character is encountered at
+ // the corresponding position of the input string, the function behaves as
+ // if no %[] specifier were provided. Only single %[] specifier in the
+ // format string is currently supported.
+ //
+ // If the delimiter is present, then it should be followed by 9 (N), 6 (U),
+ // or 3 (M) digit value padded with leading zeros if necessary.
+ //
+ // If the local argument is true, then the input is assume to be local time
+ // and the result is returned as local time as well. Otherwise, UCT is used
+ // in both cases.
+ //
+ // If the end argument is not NULL, then it points to the first character
+ // that was not parsed. Otherwise, throw invalid_argument in case of any
+ // unparsed characters.
+ //
+ // Throw std::system_error on input/format mismatch and underlying time
+ // conversion function failures.
+ //
+ // Note that internally from_string() calls strptime(), which behaves
+ // according to the process' C locale (set with std::setlocale()) and not
+ // the C++ locale (set with std::locale::global()). Meanwhile the behaviour
+ // can be affected by std::locale::global() as well, as it itself calls
+ // std::setlocale() for the locale with a name.
+ //
+ // Potential improvements:
+ // - support %() version for non-optional component but with optional
+ // delimiter
+ // - ability to parse local, return UTC and vice-versa
+ // - handle timezone parsing
+ //
+ timestamp
+ from_string (const char* input,
+ const char* format,
+ bool local,
+ const char** end = nullptr);
};
#endif // BUTL_TIMESTAMP
diff --git a/butl/timestamp.cxx b/butl/timestamp.cxx
index 6299ba3..f3966b7 100644
--- a/butl/timestamp.cxx
+++ b/butl/timestamp.cxx
@@ -4,13 +4,16 @@
#include <butl/timestamp>
-#include <time.h> // localtime_r(), gmtime_r()
+#include <time.h> // localtime_r(), gmtime_r(), strptime(), timegm()
#include <errno.h> // EINVAL
-#include <ctime> // tm, strftime()
+#include <ctime> // tm, time_t, strftime(), mktime()
+#include <cstdlib> // strtoull()
+#include <cassert>
#include <iomanip> // put_time(), setw(), dec, right
#include <cstring> // strlen(), memcpy()
#include <ostream>
+#include <utility> // pair, make_pair()
#include <stdexcept> // runtime_error
#include <system_error>
@@ -28,6 +31,9 @@ using namespace std;
// of the std::tm argument.
//
#ifdef __GLIBCXX__
+#include <ctime> // tm, strftime()
+#include <ostream>
+
namespace details
{
struct put_time_data
@@ -253,3 +259,284 @@ namespace butl
return os;
}
}
+
+// VC++ implementation of strptime() via std::get_time().
+//
+// To debug fallback functions with GCC, uncomment the following defines.
+//
+//#define _MSC_VER
+//#define strptime strptime_
+//#define timegm timegm_
+
+#ifdef _MSC_VER
+#include <ctime> // time_t, tm, mktime(), gmtime()
+#include <locale>
+#include <clocale>
+#include <sstream>
+#include <iomanip>
+#include <cstring> // strlen()
+
+namespace details
+{
+ static char*
+ strptime (const char* input, const char* format, tm* time)
+ {
+ istringstream is (input);
+
+ // The original strptime() function behaves according to the process' C
+ // locale (set with std::setlocale()), which can differ from the process
+ // C++ locale (set with std::locale::global()).
+ //
+ is.imbue (locale (setlocale (LC_ALL, nullptr)));
+
+ if (!(is >> get_time (time, format)))
+ return nullptr;
+ else
+ // tellg () behaves as UnformattedInputFunction, so returns failure
+ // status if eofbit is set.
+ //
+ return const_cast<char*> (
+ input + (is.eof ()
+ ? strlen (input)
+ : static_cast<size_t> (is.tellg ())));
+ }
+
+ static time_t
+ timegm (tm* ctm)
+ {
+ const time_t e (static_cast<time_t> (-1));
+
+ // We will use an example to explain how it works. Say *ctm contains 9 AM
+ // of some day. Note that no time zone information is available.
+ //
+ // Convert it to the time from Epoch as if it's in the local time zone.
+ //
+ ctm->tm_isdst = -1;
+ time_t t (mktime (ctm));
+ if (t == e)
+ return e;
+
+ // Let's say we are in Moscow, and t contains the time passed from Epoch
+ // till 9 AM MSK. But that is not what we need. What we need is the time
+ // passed from Epoch till 9 AM GMT. This is some bigger number, as it takes
+ // longer to achieve the same calendar time for more Western location. So
+ // we need to find that offset, and increment t with it to obtain the
+ // desired value. The offset is effectively the time difference between MSK
+ // and GMT time zones.
+ //
+ tm gtm;
+ if (gmtime_r (&t, &gtm) == nullptr)
+ return e;
+
+ // gmtime_r() being called for the timepoint t returns 6 AM. So now we
+ // have *ctm and gtm, which value difference (3 hours) reflects the
+ // desired offset. The only problem is that we can not deduct gtm from
+ // *ctm, to get the offset expressed as time_t. To do that we need to apply
+ // to both of them the same conversion function transforming std::tm to
+ // std::time_t. The mktime() can do that, so the expression (mktime(ctm) -
+ // mktime(&gtm)) calculates the desired offset.
+ //
+ // To ensure mktime() works exactly the same way for both cases, we need
+ // to reset Daylight Saving Time flag for each of *ctm and gtm.
+ //
+ ctm->tm_isdst = 0;
+ time_t lt (mktime (ctm));
+ if (lt == e)
+ return e;
+
+ gtm.tm_isdst = 0;
+ time_t gt (mktime (&gtm));
+ if (gt == e)
+ return e;
+
+ // C11 standard specifies time_t to be a real type (integer and real
+ // floating types are collectively called real types). So we can not
+ // consider it to be signed.
+ //
+ return lt > gt ? t + (lt - gt) : t - (gt - lt);
+ }
+}
+
+using namespace details;
+#endif
+
+namespace butl
+{
+ static pair<tm, chrono::nanoseconds>
+ from_string (const char* input, const char* format, const char** end)
+ {
+ auto bad_val = []() {throw system_error (EINVAL, system_category ());};
+
+ // See if we have our specifier.
+ //
+ size_t i (0);
+ size_t n (strlen (format));
+ for (; i != n; ++i)
+ {
+ if (format[i] == '%' && i + 1 != n)
+ {
+ if (format[i + 1] == '[')
+ break;
+ else
+ ++i; // To handle %%.
+ }
+ }
+
+ // Call the fraction of a second as just fraction from now on.
+ //
+ using namespace chrono;
+ nanoseconds ns (nanoseconds::zero ());
+
+ if (i == n)
+ {
+ // No %[], so just parse with strptime().
+ //
+ tm t {};
+ const char* p (strptime (input, format, &t));
+ if (p == nullptr)
+ bad_val ();
+
+ if (end != nullptr)
+ *end = p;
+ else if (*p != '\0')
+ bad_val (); // Input is not fully read.
+
+ return make_pair (t, ns);
+ }
+
+ // Now the overall plan is:
+ //
+ // 1. Parse the fraction part of the input string to obtain nanoseconds.
+ //
+ // 2. Remove fraction part from the input string.
+ //
+ // 3. Remove %[] from the format string.
+ //
+ // 4. Re-parse the modified input with the modified format to fill the
+ // std::tm structure.
+ //
+ // Parse the %[] specifier.
+ //
+ assert (format[i] == '%');
+ string fm (format, i++); // Start assembling the new format string.
+
+ assert (format[i] == '[');
+ if (++i == n)
+ bad_val ();
+
+ char d (format[i]); // Delimiter character.
+ if (++i == n)
+ bad_val ();
+
+ char f (format[i]); // Fraction specifier character.
+ if ((f != 'N' && f != 'U' && f != 'M') || ++i == n)
+ bad_val ();
+
+ if (format[i++] != ']')
+ bad_val ();
+
+ // Parse the input with the initial part of the format string, the one
+ // that preceeds the %[] specifier. The returned pointer will be the
+ // position we need to start from to parse the fraction.
+ //
+ tm t {};
+
+ // What if %[] is first, there is nothing before it? According to the
+ // strptime() documentation an empty format string is a valid one.
+ //
+ const char* p (strptime (input, fm.c_str (), &t));
+ if (p == nullptr)
+ bad_val ();
+
+ // Start assembling the new input string.
+ //
+ string in (input, p - input);
+ size_t fn (0); // Fraction size.
+
+ if (d == *p)
+ {
+ // Fraction present in the input.
+ //
+
+ // Read fraction digits.
+ //
+ char buf [10];
+ size_t i (0);
+ size_t n (f == 'N' ? 9 : (f == 'U' ? 6 : 3));
+ for (++p; i < n && *p >= '0' && *p <= '9'; ++i, ++p)
+ buf[i] = *p;
+
+ if (i < n)
+ bad_val ();
+
+ buf[n] = '\0';
+ fn = n;
+
+ // Calculate nanoseconds.
+ //
+ char* e (nullptr);
+ unsigned long long t (strtoull (buf, &e, 10));
+ assert (e == buf + n);
+
+ switch (f)
+ {
+ case 'N': ns = nanoseconds (t); break;
+ case 'U': ns = microseconds (t); break;
+ case 'M': ns = milliseconds (t); break;
+ default: assert (false);
+ }
+
+ // Actually the idea to fully remove the fraction from the input string,
+ // and %[] from the format string, has a flaw. After the fraction removal
+ // the spaces around it will be "swallowed" with a single space in the
+ // format string. So, as an example, for the input:
+ //
+ // 2016-02-21 19:31:10 .384902285 GMT
+ //
+ // And the format:
+ //
+ // %Y-%m-%d %H:%M:%S %[.N]
+ //
+ // The unparsed tail of the input will be 'GMT' while expected to be
+ // ' GMT'. To fix that we will not remove, but replace the mentioned
+ // parts with some non-space character.
+ //
+ fm += '-';
+ in += '-';
+ }
+
+ fm += format + i;
+ in += p;
+
+ // Reparse the modified input with the modified format.
+ //
+ t = {};
+ const char* b (in.c_str ());
+ p = strptime (b, fm.c_str (), &t);
+
+ if (p == nullptr)
+ bad_val ();
+
+ if (end != nullptr)
+ *end = input + (p - b + fn);
+ else if (*p != '\0')
+ bad_val (); // Input is not fully read.
+
+ return make_pair (t, ns);
+ }
+
+ timestamp
+ from_string (const char* input,
+ const char* format,
+ bool local,
+ const char** end)
+ {
+ pair<tm, chrono::nanoseconds> t (from_string (input, format, end));
+
+ time_t time (local ? mktime (&t.first) : timegm (&t.first));
+ if (time == -1)
+ throw system_error (errno, system_category ());
+
+ return timestamp::clock::from_time_t (time) + t.second;
+ }
+}
diff --git a/tests/buildfile b/tests/buildfile
index 0ad40e5..45c78d6 100644
--- a/tests/buildfile
+++ b/tests/buildfile
@@ -2,6 +2,6 @@
# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
# license : MIT; see accompanying LICENSE file
-d = dir-iterator/ path/ prefix-map/ sha256/ triplet/
+d = dir-iterator/ path/ prefix-map/ sha256/ timestamp/ triplet/
.: $d
include $d
diff --git a/tests/timestamp/buildfile b/tests/timestamp/buildfile
new file mode 100644
index 0000000..bb565a2
--- /dev/null
+++ b/tests/timestamp/buildfile
@@ -0,0 +1,7 @@
+# file : tests/timestamp/buildfile
+# copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+exe{driver}: cxx{driver} ../../butl/lib{butl}
+
+include ../../butl/
diff --git a/tests/timestamp/driver.cxx b/tests/timestamp/driver.cxx
new file mode 100644
index 0000000..2db726d
--- /dev/null
+++ b/tests/timestamp/driver.cxx
@@ -0,0 +1,159 @@
+// file : tests/timestamp/driver.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2016 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <time.h> // tzset()
+
+#include <locale>
+#include <clocale>
+#include <cassert>
+#include <sstream>
+#include <iostream>
+#include <system_error>
+
+#include <butl/timestamp>
+
+using namespace std;
+using namespace butl;
+
+// Parse the input using the format string. Print the resulted time with the
+// same format string, ensure the output matches the input.
+//
+static bool
+parse (const char* in, const char* fmt, bool local, const char* out)
+{
+ if (out == nullptr)
+ out = in;
+
+ try
+ {
+ const char* e;
+ timestamp t (from_string (in, fmt, local, &e));
+
+ ostringstream o;
+ if (!to_stream (o, t, fmt, false, local))
+ return false;
+
+ return o.str () + e == out;
+ }
+ catch (...)
+ {
+ return false;
+ }
+}
+
+static bool
+parse (const char* in, const char* fmt, const char* out = nullptr)
+{
+ return parse (in, fmt, true, out) && parse (in, fmt, false, out);
+}
+
+static bool
+fail (const char* in, const char* fmt)
+{
+ try
+ {
+ from_string (in, fmt, true);
+ return false;
+ }
+ catch (const system_error&)
+ {
+ return true;
+ }
+}
+
+int
+main ()
+{
+ tzset (); // To use butl::to_stream() later on.
+
+ // Invalid %[].
+ //
+ assert (fail ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S%["));
+ assert (fail ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S%[."));
+ assert (fail ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S%[.U"));
+ assert (fail ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S%[.A]"));
+ assert (fail ("Apr 08 19:31:10 2016", "%d %H:%M:%S%[.U] %Y"));
+ assert (fail ("2016-10-20 11:12:13.123456789", "%Y-%m-%d %H:%M:%S%[N]"));
+
+ // Invalid fraction of a second.
+ //
+ assert (fail ("Apr 08 19:31:10. 2016", "%b %d %H:%M:%S%[.U] %Y"));
+ assert (fail ("Apr 08 19:31:10.1 2016", "%b %d %H:%M:%S%[.M] %Y"));
+ assert (fail ("Apr 08 19:31:10.12 2016", "%b %d %H:%M:%S%[.M] %Y"));
+ assert (fail ("Apr 08 19:31:10.", "%b %d %H:%M:%S%[.U] %Y"));
+ assert (fail ("Apr 08 19:31:10.1", "%b %d %H:%M:%S%[.M] %Y"));
+ assert (fail ("Apr 08 19:31:10.12", "%b %d %H:%M:%S%[.M] %Y"));
+
+ // Input is not fully parsed.
+ //
+ assert (fail (
+ "Feb 21 19:31:10.123456789 2016 GMT", "%b %d %H:%M:%S%[.N] %Y"));
+
+ // Invalid input (%[] unrelated).
+ //
+ assert (fail ("Apr 08 19:31:10.123456789 ABC", "%b %d %H:%M:%S%[.N] %Y"));
+ assert (fail ("Apr 19:31:10 2016", "%b %d %H:%M:%S %Y"));
+ assert (fail ("Opr 08 19:31:10 2016", "%b %d %H:%M:%S %Y"));
+
+ // Parse valid input with a valid format.
+ //
+ assert (parse (
+ "Apr 18 19:31:10 2016", "%b %d %H:%M:%S %Y", "Apr 18 19:31:10 2016"));
+
+ assert (parse ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S %Y"));
+ assert (parse ("2016-04-08 19:31:10", "%Y-%m-%d %H:%M:%S"));
+
+ assert (parse ("ABC=Apr 18 19:31:10 2016 ABC", "ABC=%b %d %H:%M:%S %Y"));
+ assert (parse ("ABC=2016-04-08 19:31:10 ABC", "ABC=%Y-%m-%d %H:%M:%S"));
+
+ assert (parse ("Feb 11 19:31:10 2016 GMT", "%b %d %H:%M:%S%[.N] %Y"));
+ assert (parse ("2016-02-11 19:31:10 GMT", "%Y-%m-%d %H:%M:%S%[.N]"));
+
+ assert (parse (
+ "Feb 21 19:31:10.384902285 2016 GMT", "%b %d %H:%M:%S%[.N] %Y"));
+ assert (parse (
+ "2016-02-21 19:31:10.384902285 GMT", "%Y-%m-%d %H:%M:%S%[.N]"));
+
+ assert (parse (
+ "Feb 21 19:31:10 .384902285 2016 GMT", "%b %d %H:%M:%S %[.N] %Y"));
+ assert (parse (
+ "2016-02-21 19:31:10 .384902285 GMT", "%Y-%m-%d %H:%M:%S %[.N]"));
+
+ assert (parse (
+ "2016-02-21 19:31:10 .384902285 GMT",
+ "%Y-%m-%d %H:%M:%S %[.N]",
+ "2016-02-21 19:31:10 .384902285 GMT"));
+
+ assert (parse (
+ "2016-02-21 19:31:10 .384902285 GMT",
+ "%Y-%m-%d %H:%M:%S %[.N]",
+ "2016-02-21 19:31:10 .384902285 GMT"));
+
+ assert (parse (
+ "Feb 21 19:31:10 .384902285NS 2016 GMT", "%b %d %H:%M:%S %[.N]NS %Y"));
+ assert (parse (
+ "2016-02-21 19:31:10 .384902285NS GMT", "%Y-%m-%d %H:%M:%S %[.N]NS"));
+
+ assert (parse (
+ ".384902285 Feb 21 19:31:10 2016", "%[.N] %b %d %H:%M:%S %Y"));
+ assert (parse (
+ ".384902285 2016-02-21 19:31:10", "%[.N] %Y-%m-%d %H:%M:%S"));
+ assert (parse (
+ ".3849022852016-02-21 19:31:10", "%[.N]%Y-%m-%d %H:%M:%S"));
+
+ setlocale (LC_ALL, "de_DE.utf-8");
+ locale::global (locale ("de_DE.utf-8"));
+ assert (parse ("Mai 11 19:31:10 2016 GMT", "%b %d %H:%M:%S%[.N] %Y"));
+ locale::global (locale ("C"));
+
+ // @@ When debuging strptime() fallback implementation compiled with GCC
+ // 5.3.1, the following asserts will fail due to bugs in implementation
+ // of std::get_time() manipulator. So need to be commented out.
+ //
+ assert (fail ("Apr 08 19:31:10 2016", "%b %d %H:%M:%S %Y %"));
+ assert (fail ("Apr 08 19:31:10", "%b %d %H:%M:%S %Y"));
+
+ assert (parse (
+ "Apr 8 19:31:10 2016", "%b %d %H:%M:%S %Y", "Apr 08 19:31:10 2016"));
+}