aboutsummaryrefslogtreecommitdiff
path: root/libbutl/manifest-parser.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'libbutl/manifest-parser.cxx')
-rw-r--r--libbutl/manifest-parser.cxx238
1 files changed, 175 insertions, 63 deletions
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 9514bbd..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -1,39 +1,10 @@
// file : libbutl/manifest-parser.cxx -*- C++ -*-
// license : MIT; see accompanying LICENSE file
-#ifndef __cpp_modules_ts
-#include <libbutl/manifest-parser.mxx>
-#endif
+#include <libbutl/manifest-parser.hxx>
-#include <cassert>
-
-#ifndef __cpp_lib_modules_ts
#include <string>
-#include <vector>
-#include <cstdint>
-#include <utility>
-#include <stdexcept>
-
-#include <sstream>
-#endif
-
-// Other includes.
-
-#ifdef __cpp_modules_ts
-module butl.manifest_parser;
-
-// Only imports additional to interface.
-#ifdef __clang__
-#ifdef __cpp_lib_modules_ts
-import std.core;
-import std.io;
-#endif
-import butl.optional;
-import butl.char_scanner;
-import butl.manifest_types;
-#endif
-
-#endif
+#include <cassert>
using namespace std;
@@ -177,41 +148,136 @@ namespace butl
{
using iterator = string::const_iterator;
- auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+ // Parse the value differently depending on whether it is multi-line or
+ // not.
+ //
+ if (v.find ('\n') == string::npos) // Single-line.
+ {
+ auto space = [] (char c) {return c == ' ' || c == '\t';};
- iterator i (v.begin ());
- iterator e (v.end ());
+ iterator i (v.begin ());
+ iterator e (v.end ());
- string r;
- size_t n (0);
- for (char c; i != e && (c = *i) != ';'; ++i)
- {
- // Unescape ';' character.
+ string r;
+ size_t n (0);
+ for (char c; i != e && (c = *i) != ';'; ++i)
+ {
+ // Unescape ';' and '\' characters.
+ //
+ if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+ c = *++i;
+
+ r += c;
+
+ if (!space (c))
+ n = r.size ();
+ }
+
+ // Strip the value trailing spaces.
//
- if (c == '\\' && i + 1 != e && *(i + 1) == ';')
- c = *++i;
+ if (r.size () != n)
+ r.resize (n);
- r += c;
+ // Find beginning of a comment (i).
+ //
+ if (i != e)
+ {
+ // Skip spaces.
+ //
+ for (++i; i != e && space (*i); ++i);
+ }
- if (!space (c))
- n = r.size ();
+ return make_pair (move (r), string (i, e));
}
+ else // Multi-line.
+ {
+ string r;
+ string c;
- // Strip the value trailing spaces.
- //
- if (r.size () != n)
- r.resize (n);
+ // Parse the value lines until the comment separator is encountered or
+ // the end of the value is reached. Add these lines to the resulting
+ // value, unescaping them if required.
+ //
+ // Note that we only need to unescape lines which have the '\+;' form.
+ //
+ auto i (v.begin ());
+ auto e (v.end ());
- // Find beginning of a comment (i).
- //
- if (i != e)
- {
- // Skip spaces.
+ while (i != e)
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then this is either the comment separator or
+ // an escape sequence.
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ {
+ // If ';' is the first (and thus the only) character on the line,
+ // then this is the comment separator and we bail out from this
+ // loop. Note that in this case we need to trim the trailing newline
+ // (but only one) from the resulting value since it is considered as
+ // a part of the separator.
+ //
+ if (nb == i)
+ {
+ if (!r.empty ())
+ {
+ assert (r.back () == '\n');
+ r.pop_back ();
+ }
+
+ next ();
+ break;
+ }
+ //
+ // Otherwise, this is an escape sequence, so unescape it. For that
+ // just take the rightmost half of the string:
+ //
+ // \; -> ;
+ // \\; -> \;
+ // \\\; -> \;
+ // \\\\; -> \\;
+ // \\\\\; -> \\;
+ //
+ else
+ i += (le - i) / 2;
+ }
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ next ();
+ }
+
+ // If we haven't reached the end of the value then it means we've
+ // encountered the comment separator. In this case save the remaining
+ // value part as a comment.
//
- for (++i; i != e && space (*i); ++i);
- }
+ if (i != e)
+ c = string (i, e);
- return make_pair (move (r), string (i, e));
+ return make_pair (move (r), move (c));
+ }
}
void manifest_parser::
@@ -251,7 +317,8 @@ namespace butl
string& v (r.value);
string::size_type n (0); // Size of last non-space character (simple mode).
- // Detect the multi-line mode introductor.
+ // Detect the old-fashioned multi-line mode introducer (like in
+ // 'foo:\<newline>').
//
bool ml (false);
if (c == '\\')
@@ -266,11 +333,46 @@ namespace butl
ml = true;
}
else if (eos (p))
+ {
+ c = p; // Set to EOF.
ml = true;
+ }
else
unget (c);
}
+ // Detect the new-fashioned multi-line mode introducer (like in
+ // 'foo:<newline>\<newline>').
+ //
+ if (!ml && c == '\n')
+ {
+ get ();
+ xchar p1 (peek ());
+
+ if (p1 == '\\')
+ {
+ get ();
+ xchar p2 (peek ());
+
+ if (p2 == '\n')
+ {
+ get (); // Newline is not part of the value so skip it.
+ c = peek ();
+ ml = true;
+ }
+ else if (eos (p2))
+ {
+ c = p2; // Set to EOF.
+ ml = true;
+ }
+ else
+ unget (p1); // Unget '\\'. Note: '\n' will be ungot below.
+ }
+
+ if (!ml)
+ unget (c); // Unget '\n'.
+ }
+
// Multi-line value starts from the line that follows the name.
//
if (ml)
@@ -281,7 +383,7 @@ namespace butl
// The nl flag signals that the preceding character was a "special
// newline", that is, a newline that was part of the milti-line mode
- // introductor or an escape sequence.
+ // introducer or an escape sequence.
//
for (bool nl (ml); !eos (c); c = peek ())
{
@@ -299,7 +401,7 @@ namespace butl
//
// The first block handles the special sequence that starts with
// a special newline. In multi-line mode, this is an "immediate
- // termination" where we "use" the newline from the introductor.
+ // termination" where we "use" the newline from the introducer.
// Note also that in the simple mode the special sequence can
// only start with a special (i.e., escaped) newline.
//
@@ -472,11 +574,21 @@ namespace butl
static inline string
format (const string& n, uint64_t l, uint64_t c, const string& d)
{
- ostringstream os;
+ using std::to_string;
+
+ string r;
if (!n.empty ())
- os << n << ':';
- os << l << ':' << c << ": error: " << d;
- return os.str ();
+ {
+ r += n;
+ r += ':';
+ }
+
+ r += to_string (l);
+ r += ':';
+ r += to_string (c);
+ r += ": error: ";
+ r += d;
+ return r;
}
manifest_parsing::