aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Kolpackov <boris@codesynthesis.com>2015-06-18 15:25:36 +0200
committerBoris Kolpackov <boris@codesynthesis.com>2015-06-18 15:25:36 +0200
commit165ebf5e12d5c421df06d4371295c7a10e30beba (patch)
treeec3f095778ac0e429e163e71bd640145942dadde
parentfb5bab834df474f5fa7c09f137aa375b5bca4718 (diff)
Add char-scanner to libbutl, use in libbpkg and build2
-rw-r--r--bpkg/manifest-parser63
-rw-r--r--bpkg/manifest-parser.cxx111
2 files changed, 25 insertions, 149 deletions
diff --git a/bpkg/manifest-parser b/bpkg/manifest-parser
index 6dc3ee3..cee0786 100644
--- a/bpkg/manifest-parser
+++ b/bpkg/manifest-parser
@@ -10,6 +10,8 @@
#include <cstdint> // uint64_t
#include <stdexcept> // runtime_error
+#include <butl/char-scanner>
+
namespace bpkg
{
class manifest_parsing: public std::runtime_error
@@ -42,11 +44,11 @@ namespace bpkg
empty () const {return name.empty () && value.empty ();}
};
- class manifest_parser
+ class manifest_parser: protected butl::char_scanner
{
public:
manifest_parser (std::istream& is, const std::string& name)
- : is_ (is), name_ (name) {}
+ : char_scanner (is), name_ (name) {}
const std::string&
name () const {return name_;}
@@ -68,31 +70,6 @@ namespace bpkg
next ();
private:
- class xchar
- {
- public:
- typedef std::char_traits<char> traits_type;
- typedef traits_type::int_type int_type;
- typedef traits_type::char_type char_type;
-
- xchar (int_type v, std::uint64_t l, std::uint64_t c)
- : v_ (v), l_ (l), c_ (c) {}
-
- operator char_type () const {return static_cast<char_type> (v_);}
-
- int_type
- value () const {return v_;}
-
- std::uint64_t line () const {return l_;}
- std::uint64_t column () const {return c_;}
-
- private:
- int_type v_;
- std::uint64_t l_;
- std::uint64_t c_;
- };
-
- private:
void
parse_name (manifest_name_value&);
@@ -104,41 +81,11 @@ namespace bpkg
xchar
skip_spaces ();
- // Character interface.
- //
private:
- xchar
- peek ();
-
- xchar
- get ();
-
- void
- unget (const xchar&);
-
- // Tests.
- //
- bool
- is_eos (const xchar& c) const
- {
- return c.value () == xchar::traits_type::eof ();
- }
+ const std::string name_;
- private:
enum {start, body, end} s_ = start;
std::string version_; // Current format version.
-
- private:
- std::istream& is_;
- const std::string name_;
-
- std::uint64_t l_ {1};
- std::uint64_t c_ {1};
-
- bool unget_ {false};
- xchar buf_ {0, 0, 0};
-
- bool eos_ {false};
};
}
diff --git a/bpkg/manifest-parser.cxx b/bpkg/manifest-parser.cxx
index f8a2e36..513573a 100644
--- a/bpkg/manifest-parser.cxx
+++ b/bpkg/manifest-parser.cxx
@@ -18,7 +18,7 @@ namespace bpkg
next ()
{
if (s_ == end)
- return name_value {"", "", l_, c_, l_, c_};
+ return name_value {"", "", line, column, line, column};
xchar c (skip_spaces ());
@@ -36,8 +36,7 @@ namespace bpkg
if (s_ == body && c == ':')
{
s_ = start;
- uint64_t ln (c.line ()), cn (c.column ());
- return name_value {"", "", ln, cn, ln, cn};
+ return name_value {"", "", c.line, c.column, c.line, c.column};
}
// Regardless of the state, what should come next is a name,
@@ -49,13 +48,12 @@ namespace bpkg
skip_spaces ();
c = get ();
- if (is_eos (c))
+ if (eos (c))
{
// This is ok as long as the name is empty.
//
if (!r.name.empty ())
- throw parsing (name_, c.line (), c.column (),
- "':' expected after name");
+ throw parsing (name_, c.line, c.column, "':' expected after name");
s_ = end;
@@ -67,7 +65,7 @@ namespace bpkg
}
if (c != ':')
- throw parsing (name_, c.line (), c.column (), "':' expected after name");
+ throw parsing (name_, c.line, c.column, "':' expected after name");
skip_spaces ();
parse_value (r);
@@ -76,7 +74,7 @@ namespace bpkg
// The character after the value should be either a newline or eos.
//
- assert (c == '\n' || is_eos (c));
+ assert (c == '\n' || eos (c));
if (c == '\n')
get ();
@@ -132,10 +130,10 @@ namespace bpkg
{
xchar c (peek ());
- r.name_line = c.line ();
- r.name_column = c.column ();
+ r.name_line = c.line;
+ r.name_column = c.column;
- for (; !is_eos (c); c = peek ())
+ for (; !eos (c); c = peek ())
{
if (c == ':' || c == ' ' || c == '\t' || c == '\n')
break;
@@ -150,8 +148,8 @@ namespace bpkg
{
xchar c (peek ());
- r.value_line = c.line ();
- r.value_column = c.column ();
+ r.value_line = c.line;
+ r.value_column = c.column;
string& v (r.value);
string::size_type n (0); // Size of last non-space character (simple mode).
@@ -170,7 +168,7 @@ namespace bpkg
c = peek ();
ml = true;
}
- else if (is_eos (p))
+ else if (eos (p))
ml = true;
else
unget (c);
@@ -180,7 +178,7 @@ namespace bpkg
// newline", that is, a newline that was part of the milti-line mode
// introductor or an escape sequence.
//
- for (bool nl (ml); !is_eos (c); c = peek ())
+ for (bool nl (ml); !eos (c); c = peek ())
{
// Detect the special "\n\\\n" sequence. In the multi-line mode,
// this is a "terminator". In the simple mode, this is a way to
@@ -209,7 +207,7 @@ namespace bpkg
get ();
xchar c1 (peek ());
- if (c1 == '\n' || is_eos (c1))
+ if (c1 == '\n' || eos (c1))
{
if (ml)
break;
@@ -240,7 +238,7 @@ namespace bpkg
get ();
xchar c2 (peek ());
- if (c2 == '\n' || is_eos (c2))
+ if (c2 == '\n' || eos (c2))
break;
else
{
@@ -264,7 +262,7 @@ namespace bpkg
get ();
xchar c1 (peek ());
- if (c1 == '\n' || is_eos (c1))
+ if (c1 == '\n' || eos (c1))
{
if (c1 == '\n')
{
@@ -278,7 +276,7 @@ namespace bpkg
get ();
xchar c2 (peek ());
- if (c2 == '\n' || is_eos (c1))
+ if (c2 == '\n' || eos (c1))
{
v += '\\';
n = v.size ();
@@ -314,9 +312,9 @@ namespace bpkg
skip_spaces ()
{
xchar c (peek ());
- bool start (c.column () == 1);
+ bool start (c.column == 1);
- for (; !is_eos (c); c = peek ())
+ for (; !eos (c); c = peek ())
{
switch (c)
{
@@ -344,7 +342,7 @@ namespace bpkg
// Read until newline or eos.
//
- for (c = peek (); !is_eos (c) && c != '\n'; c = peek ())
+ for (c = peek (); !eos (c) && c != '\n'; c = peek ())
get ();
continue;
@@ -359,75 +357,6 @@ namespace bpkg
return c;
}
- // Character interface.
- //
-
- manifest_parser::xchar manifest_parser::
- peek ()
- {
- if (unget_)
- return buf_;
- else
- {
- if (eos_)
- return xchar (xchar::traits_type::eof (), l_, c_);
- else
- {
- xchar::int_type v (is_.peek ());
-
- if (v == xchar::traits_type::eof ())
- eos_ = true;
-
- return xchar (v, l_, c_);
- }
- }
- }
-
- manifest_parser::xchar manifest_parser::
- get ()
- {
- if (unget_)
- {
- unget_ = false;
- return buf_;
- }
- else
- {
- // When is_.get () returns eof, the failbit is also set (stupid,
- // isn't?) which may trigger an exception. To work around this
- // we will call peek() first and only call get() if it is not
- // eof. But we can only call peek() on eof once; any subsequent
- // calls will spoil the failbit (even more stupid).
- //
- xchar c (peek ());
-
- if (!is_eos (c))
- {
- is_.get ();
-
- if (c == '\n')
- {
- l_++;
- c_ = 1;
- }
- else
- c_++;
- }
-
- return c;
- }
- }
-
- void manifest_parser::
- unget (const xchar& c)
- {
- // Because iostream::unget cannot work once eos is reached,
- // we have to provide our own implementation.
- //
- buf_ = c;
- unget_ = true;
- }
-
// manifest_parsing
//