aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2017-05-24 13:26:13 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2017-05-24 13:28:51 +0200
commitb6e02f4224975a6425f62095bc35478e8866db77 (patch)
tree51627eef813ebb670efca83aed7dd284cb738806
parenteb3c27b8f47c793244436cd082512bb8235bea89 (diff)
Various improvements to char_scanner
-rw-r--r--libbutl/char-scanner.cxx68
-rw-r--r--libbutl/char-scanner.hxx38
-rw-r--r--libbutl/char-scanner.ixx32
3 files changed, 87 insertions, 51 deletions
diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx
index cbc2503..42a72cc 100644
--- a/libbutl/char-scanner.cxx
+++ b/libbutl/char-scanner.cxx
@@ -14,44 +14,44 @@ namespace butl
peek () -> xchar
{
if (unget_)
- return buf_;
- else
- {
- if (eos_)
- return xchar (xchar::traits_type::eof (), line, column);
- else
- {
- xchar::int_type v (is_.peek ());
+ return ungetc_;
- if (v == xchar::traits_type::eof ())
- eos_ = true;
- else if (crlf_ && v == 0x0D)
- {
- is_.get ();
- xchar::int_type v1 (is_.peek ());
+ if (unpeek_)
+ return unpeekc_;
- if (v1 != '\n')
- {
- unget_ = true;
- buf_ = '\n';
- }
+ if (eos_)
+ return xchar (xchar::traits_type::eof (), line, column);
- v = '\n';
- }
+ xchar::int_type v (is_.peek ());
- return xchar (v, line, column);
+ if (v == xchar::traits_type::eof ())
+ eos_ = true;
+ else if (crlf_ && v == 0x0D)
+ {
+ is_.get ();
+ xchar::int_type v1 (is_.peek ());
+
+ if (v1 != '\n')
+ {
+ // We need to make sure subsequent calls to peek() return newline.
+ //
+ unpeek_ = true;
+ unpeekc_ = xchar ('\n', line, column);
}
+
+ v = '\n';
}
+
+ return xchar (v, line, column);
}
- auto char_scanner::
- get () -> xchar
+ void char_scanner::
+ get (const xchar& c)
{
if (unget_)
- {
unget_ = false;
- return buf_;
- }
+ else if (unpeek_)
+ unpeek_ = false;
else
{
// When is_.get () returns eof, the failbit is also set (stupid,
@@ -60,8 +60,6 @@ namespace butl
// eof. But we can only call peek() on eof once; any subsequent
// calls will spoil the failbit (even more stupid).
//
- xchar c (peek ());
-
if (!eos (c))
{
is_.get ();
@@ -74,18 +72,6 @@ namespace butl
else
column++;
}
-
- return c;
}
}
-
- void char_scanner::
- unget (const xchar& c)
- {
- // Because iostream::unget cannot work once eos is reached,
- // we have to provide our own implementation.
- //
- buf_ = c;
- unget_ = true;
- }
}
diff --git a/libbutl/char-scanner.hxx b/libbutl/char-scanner.hxx
index 71f8313..e71f286 100644
--- a/libbutl/char-scanner.hxx
+++ b/libbutl/char-scanner.hxx
@@ -33,8 +33,12 @@ namespace butl
//
public:
- // Extended character. It includes line/column information
- // and is capable of representing EOF.
+ // Extended character. It includes line/column information and is capable
+ // of representing EOF.
+ //
+ // Note that implicit conversion of EOF to char_type results in NUL
+ // character (which means in most cases it is safe to compare xchar to
+ // char without checking for EOF).
//
class xchar
{
@@ -47,7 +51,12 @@ namespace butl
std::uint64_t line;
std::uint64_t column;
- operator char_type () const {return static_cast<char_type> (value);}
+ operator char_type () const
+ {
+ return value != traits_type::eof ()
+ ? static_cast<char_type> (value)
+ : char_type (0);
+ }
xchar (int_type v, std::uint64_t l = 0, std::uint64_t c = 0)
: value (v), line (l), column (c) {}
@@ -57,6 +66,9 @@ namespace butl
get ();
void
+ get (const xchar& peeked); // Get previously peeked character (faster).
+
+ void
unget (const xchar&);
// Note that if there is an "ungot" character, peek() will return
@@ -71,20 +83,26 @@ namespace butl
static bool
eos (const xchar& c) {return c.value == xchar::traits_type::eof ();}
- // Line and column of the furthest seen (either via get() or
- // peek()) character.
+ // Line and column of the next character to be extracted from the stream
+ // by peek() or get().
//
- std::uint64_t line {1};
- std::uint64_t column {1};
+ std::uint64_t line = 1;
+ std::uint64_t column = 1;
protected:
std::istream& is_;
+
bool crlf_;
+ bool eos_ = false;
+
+ bool unget_ = false;
+ bool unpeek_ = false;
- bool unget_ {false};
- xchar buf_ = '\0';
- bool eos_ {false};
+ xchar ungetc_ = '\0';
+ xchar unpeekc_ = '\0';
};
}
+#include <libbutl/char-scanner.ixx>
+
#endif // LIBBUTL_CHAR_SCANNER_HXX
diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx
new file mode 100644
index 0000000..2d96207
--- /dev/null
+++ b/libbutl/char-scanner.ixx
@@ -0,0 +1,32 @@
+// file : libbutl/char-scanner.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2017 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+namespace butl
+{
+ inline auto char_scanner::
+ get () -> xchar
+ {
+ if (unget_)
+ {
+ unget_ = false;
+ return ungetc_;
+ }
+ else
+ {
+ xchar c (peek ());
+ get (c);
+ return c;
+ }
+ }
+
+ inline void char_scanner::
+ unget (const xchar& c)
+ {
+ // Because iostream::unget cannot work once eos is reached, we have to
+ // provide our own implementation.
+ //
+ unget_ = true;
+ ungetc_ = c;
+ }
+}