aboutsummaryrefslogtreecommitdiff
path: root/libbutl
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2022-06-23 22:55:20 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2022-06-24 15:58:39 +0300
commit2d15efda5db161accd5f572fd4816885bce7c68c (patch)
tree3832d4e33aeb7d91418d07bfe0a824723ff18a37 /libbutl
parent0ae2c768fc8b2e43d271284544af35a27dc2cd17 (diff)
Split and merge manifest value/comment pair differently depending on whether it is multiline or not
Diffstat (limited to 'libbutl')
-rw-r--r--libbutl/manifest-parser.cxx145
-rw-r--r--libbutl/manifest-serializer.cxx91
2 files changed, 199 insertions, 37 deletions
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 258a536..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -148,41 +148,136 @@ namespace butl
{
using iterator = string::const_iterator;
- auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+ // Parse the value differently depending on whether it is multi-line or
+ // not.
+ //
+ if (v.find ('\n') == string::npos) // Single-line.
+ {
+ auto space = [] (char c) {return c == ' ' || c == '\t';};
- iterator i (v.begin ());
- iterator e (v.end ());
+ iterator i (v.begin ());
+ iterator e (v.end ());
- string r;
- size_t n (0);
- for (char c; i != e && (c = *i) != ';'; ++i)
- {
- // Unescape ';' character.
+ string r;
+ size_t n (0);
+ for (char c; i != e && (c = *i) != ';'; ++i)
+ {
+ // Unescape ';' and '\' characters.
+ //
+ if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+ c = *++i;
+
+ r += c;
+
+ if (!space (c))
+ n = r.size ();
+ }
+
+ // Strip the value trailing spaces.
//
- if (c == '\\' && i + 1 != e && *(i + 1) == ';')
- c = *++i;
+ if (r.size () != n)
+ r.resize (n);
- r += c;
+ // Find beginning of a comment (i).
+ //
+ if (i != e)
+ {
+ // Skip spaces.
+ //
+ for (++i; i != e && space (*i); ++i);
+ }
- if (!space (c))
- n = r.size ();
+ return make_pair (move (r), string (i, e));
}
+ else // Multi-line.
+ {
+ string r;
+ string c;
- // Strip the value trailing spaces.
- //
- if (r.size () != n)
- r.resize (n);
+ // Parse the value lines until the comment separator is encountered or
+ // the end of the value is reached. Add these lines to the resulting
+ // value, unescaping them if required.
+ //
+ // Note that we only need to unescape lines which have the '\+;' form.
+ //
+ auto i (v.begin ());
+ auto e (v.end ());
- // Find beginning of a comment (i).
- //
- if (i != e)
- {
- // Skip spaces.
+ while (i != e)
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then this is either the comment separator or
+ // an escape sequence.
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ {
+ // If ';' is the first (and thus the only) character on the line,
+ // then this is the comment separator and we bail out from this
+ // loop. Note that in this case we need to trim the trailing newline
+ // (but only one) from the resulting value since it is considered as
+ // a part of the separator.
+ //
+ if (nb == i)
+ {
+ if (!r.empty ())
+ {
+ assert (r.back () == '\n');
+ r.pop_back ();
+ }
+
+ next ();
+ break;
+ }
+ //
+ // Otherwise, this is an escape sequence, so unescape it. For that
+ // just take the rightmost half of the string:
+ //
+ // \; -> ;
+ // \\; -> \;
+ // \\\; -> \;
+ // \\\\; -> \\;
+ // \\\\\; -> \\;
+ //
+ else
+ i += (le - i) / 2;
+ }
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ next ();
+ }
+
+ // If we haven't reached the end of the value then it means we've
+ // encountered the comment separator. In this case save the remaining
+ // value part as a comment.
//
- for (++i; i != e && space (*i); ++i);
- }
+ if (i != e)
+ c = string (i, e);
- return make_pair (move (r), string (i, e));
+ return make_pair (move (r), move (c));
+ }
}
void manifest_parser::
diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx
index b0d0324..26699e0 100644
--- a/libbutl/manifest-serializer.cxx
+++ b/libbutl/manifest-serializer.cxx
@@ -101,22 +101,89 @@ namespace butl
merge_comment (const string& value, const string& comment)
{
string r;
- for (char c: value)
+
+ // Merge the value and comment differently depending on whether any of
+ // them is multi-line or not.
+ //
+ if (value.find ('\n') == string::npos && // Single-line.
+ comment.find ('\n') == string::npos)
{
- // Escape ';' character.
- //
- if (c == ';')
- r += '\\';
+ for (char c: value)
+ {
+ // Escape ';' and '\' characters.
+ //
+ if (c == ';' || c == '\\')
+ r += '\\';
- r += c;
- }
+ r += c;
+ }
- // Add the comment.
- //
- if (!comment.empty ())
+ // Add the comment.
+ //
+ if (!comment.empty ())
+ {
+ r += "; ";
+ r += comment;
+ }
+ }
+ else // Multi-line.
{
- r += "; ";
- r += comment;
+ // Parse the value lines and add them to the resulting value, escaping
+ // them if required.
+ //
+ // Note that we only need to escape lines which have the '\*;' form.
+ //
+ for (auto i (value.begin ()), e (value.end ()); i != e; )
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then we need to escape the line characters.
+ // Note that we only escape ';' if it is the only character on the
+ // line. Otherwise, we only escape backslashes doubling the number of
+ // them from the left:
+ //
+ // ; -> \;
+ // \; -> \\;
+ // \\; -> \\\\;
+ // \\\; -> \\\\\\;
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ r.append (nb == i ? 1 : nb - i, '\\');
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ i = (le != e ? le + 1 : e);
+ }
+
+ // Append the comment, if present.
+ //
+ if (!comment.empty ())
+ {
+ if (!r.empty ())
+ r += '\n';
+
+ r += ";\n";
+ r += comment;
+ }
}
return r;