aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2022-06-23 22:55:20 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2022-06-24 15:58:39 +0300
commit2d15efda5db161accd5f572fd4816885bce7c68c (patch)
tree3832d4e33aeb7d91418d07bfe0a824723ff18a37
parent0ae2c768fc8b2e43d271284544af35a27dc2cd17 (diff)
Split and merge manifest value/comment pair differently depending on whether it is multiline or not
-rw-r--r--libbutl/manifest-parser.cxx145
-rw-r--r--libbutl/manifest-serializer.cxx91
-rw-r--r--tests/manifest-parser/driver.cxx65
-rw-r--r--tests/manifest-roundtrip/driver.cxx13
-rw-r--r--tests/manifest-roundtrip/testscript40
-rw-r--r--tests/manifest-serializer/driver.cxx29
6 files changed, 340 insertions, 43 deletions
diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx
index 258a536..904910a 100644
--- a/libbutl/manifest-parser.cxx
+++ b/libbutl/manifest-parser.cxx
@@ -148,41 +148,136 @@ namespace butl
{
using iterator = string::const_iterator;
- auto space = [] (char c) -> bool {return c == ' ' || c == '\t';};
+ // Parse the value differently depending on whether it is multi-line or
+ // not.
+ //
+ if (v.find ('\n') == string::npos) // Single-line.
+ {
+ auto space = [] (char c) {return c == ' ' || c == '\t';};
- iterator i (v.begin ());
- iterator e (v.end ());
+ iterator i (v.begin ());
+ iterator e (v.end ());
- string r;
- size_t n (0);
- for (char c; i != e && (c = *i) != ';'; ++i)
- {
- // Unescape ';' character.
+ string r;
+ size_t n (0);
+ for (char c; i != e && (c = *i) != ';'; ++i)
+ {
+ // Unescape ';' and '\' characters.
+ //
+ if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\'))
+ c = *++i;
+
+ r += c;
+
+ if (!space (c))
+ n = r.size ();
+ }
+
+ // Strip the value trailing spaces.
//
- if (c == '\\' && i + 1 != e && *(i + 1) == ';')
- c = *++i;
+ if (r.size () != n)
+ r.resize (n);
- r += c;
+ // Find beginning of a comment (i).
+ //
+ if (i != e)
+ {
+ // Skip spaces.
+ //
+ for (++i; i != e && space (*i); ++i);
+ }
- if (!space (c))
- n = r.size ();
+ return make_pair (move (r), string (i, e));
}
+ else // Multi-line.
+ {
+ string r;
+ string c;
- // Strip the value trailing spaces.
- //
- if (r.size () != n)
- r.resize (n);
+ // Parse the value lines until the comment separator is encountered or
+ // the end of the value is reached. Add these lines to the resulting
+ // value, unescaping them if required.
+ //
+ // Note that we only need to unescape lines which have the '\+;' form.
+ //
+ auto i (v.begin ());
+ auto e (v.end ());
- // Find beginning of a comment (i).
- //
- if (i != e)
- {
- // Skip spaces.
+ while (i != e)
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);};
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then this is either the comment separator or
+ // an escape sequence.
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ {
+ // If ';' is the first (and thus the only) character on the line,
+ // then this is the comment separator and we bail out from this
+ // loop. Note that in this case we need to trim the trailing newline
+ // (but only one) from the resulting value since it is considered as
+ // a part of the separator.
+ //
+ if (nb == i)
+ {
+ if (!r.empty ())
+ {
+ assert (r.back () == '\n');
+ r.pop_back ();
+ }
+
+ next ();
+ break;
+ }
+ //
+ // Otherwise, this is an escape sequence, so unescape it. For that
+ // just take the rightmost half of the string:
+ //
+ // \; -> ;
+ // \\; -> \;
+ // \\\; -> \;
+ // \\\\; -> \\;
+ // \\\\\; -> \\;
+ //
+ else
+ i += (le - i) / 2;
+ }
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ next ();
+ }
+
+ // If we haven't reached the end of the value then it means we've
+ // encountered the comment separator. In this case save the remaining
+ // value part as a comment.
//
- for (++i; i != e && space (*i); ++i);
- }
+ if (i != e)
+ c = string (i, e);
- return make_pair (move (r), string (i, e));
+ return make_pair (move (r), move (c));
+ }
}
void manifest_parser::
diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx
index b0d0324..26699e0 100644
--- a/libbutl/manifest-serializer.cxx
+++ b/libbutl/manifest-serializer.cxx
@@ -101,22 +101,89 @@ namespace butl
merge_comment (const string& value, const string& comment)
{
string r;
- for (char c: value)
+
+ // Merge the value and comment differently depending on whether any of
+ // them is multi-line or not.
+ //
+ if (value.find ('\n') == string::npos && // Single-line.
+ comment.find ('\n') == string::npos)
{
- // Escape ';' character.
- //
- if (c == ';')
- r += '\\';
+ for (char c: value)
+ {
+ // Escape ';' and '\' characters.
+ //
+ if (c == ';' || c == '\\')
+ r += '\\';
- r += c;
- }
+ r += c;
+ }
- // Add the comment.
- //
- if (!comment.empty ())
+ // Add the comment.
+ //
+ if (!comment.empty ())
+ {
+ r += "; ";
+ r += comment;
+ }
+ }
+ else // Multi-line.
{
- r += "; ";
- r += comment;
+ // Parse the value lines and add them to the resulting value, escaping
+ // them if required.
+ //
+ // Note that we only need to escape lines which have the '\*;' form.
+ //
+ for (auto i (value.begin ()), e (value.end ()); i != e; )
+ {
+ // Find the end of the line and while at it the first non-backslash
+ // character.
+ //
+ auto le (i);
+ auto nb (e);
+ for (; le != e && *le != '\n'; ++le)
+ {
+ if (nb == e && *le != '\\')
+ nb = le;
+ }
+
+ // If the first non-backslash character is ';' and it is the last
+ // character on the line, then we need to escape the line characters.
+ // Note that we only escape ';' if it is the only character on the
+ // line. Otherwise, we only escape backslashes doubling the number of
+ // them from the left:
+ //
+ // ; -> \;
+ // \; -> \\;
+ // \\; -> \\\\;
+ // \\\; -> \\\\\\;
+ //
+ if (nb != e && *nb == ';' && nb + 1 == le)
+ r.append (nb == i ? 1 : nb - i, '\\');
+
+ // Add the line to the resulting value together with the trailing
+ // newline, if present.
+ //
+ r.append (i, le);
+
+ if (le != e)
+ r += '\n';
+
+ // If the value end is not reached then position to the beginning of
+ // the next line and to the end of the value otherwise.
+ //
+ i = (le != e ? le + 1 : e);
+ }
+
+ // Append the comment, if present.
+ //
+ if (!comment.empty ())
+ {
+ if (!r.empty ())
+ r += '\n';
+
+ r += ";\n";
+ r += comment;
+ }
}
return r;
diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx
index 6924321..56c614a 100644
--- a/tests/manifest-parser/driver.cxx
+++ b/tests/manifest-parser/driver.cxx
@@ -164,14 +164,18 @@ namespace butl
// Manifest value splitting (into the value/comment pair).
//
+ // Single-line.
+ //
{
- auto p (manifest_parser::split_comment ("value\\; text ; comment text"));
- assert (p.first == "value; text" && p.second == "comment text");
+ auto p (manifest_parser::split_comment (
+ "\\value\\\\\\; text ; comment text"));
+
+ assert (p.first == "\\value\\; text" && p.second == "comment text");
}
{
- auto p (manifest_parser::split_comment ("value"));
- assert (p.first == "value" && p.second == "");
+ auto p (manifest_parser::split_comment ("value\\"));
+ assert (p.first == "value\\" && p.second == "");
}
{
@@ -179,6 +183,59 @@ namespace butl
assert (p.first == "" && p.second == "comment");
}
+ // Multi-line.
+ //
+ {
+ auto p (manifest_parser::split_comment ("value\n;"));
+ assert (p.first == "value" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n"));
+ assert (p.first == "value\ntext\n" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n;"));
+ assert (p.first == "value\ntext" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("value\ntext\n;\n"));
+ assert (p.first == "value\ntext" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("\n\\\nvalue\ntext\n"
+ ";\n"
+ "\n\n comment\ntext"));
+
+ assert (p.first == "\n\\\nvalue\ntext" && p.second ==
+ "\n\n comment\ntext");
+ }
+
+ {
+ auto p (manifest_parser::split_comment ("\n;\ncomment"));
+ assert (p.first == "" && p.second == "comment");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (";\ncomment"));
+ assert (p.first == "" && p.second == "comment");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (";\n"));
+ assert (p.first == "" && p.second == "");
+ }
+
+ {
+ auto p (manifest_parser::split_comment (
+ "\\;\n\\\\;\n\\\\\\;\n\\\\\\\\;\n\\\\\\\\\\;"));
+
+ assert (p.first == ";\n\\;\n\\;\n\\\\;\n\\\\;" && p.second == "");
+ }
+
// UTF-8.
//
assert (test (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0",
diff --git a/tests/manifest-roundtrip/driver.cxx b/tests/manifest-roundtrip/driver.cxx
index 5dc5862..c63a729 100644
--- a/tests/manifest-roundtrip/driver.cxx
+++ b/tests/manifest-roundtrip/driver.cxx
@@ -22,11 +22,16 @@ using namespace butl;
// -m
// Serialize multi-line manifest values using the v2 form.
//
+// -s
+// Split values into the value/comment pairs and merge them back before
+// printing.
+//
int
main (int argc, const char* argv[])
try
{
bool multiline_v2 (false);
+ bool split (false);
for (int i (1); i != argc; ++i)
{
@@ -34,6 +39,8 @@ try
if (v == "-m")
multiline_v2 = true;
+ else if (v == "-s")
+ split = true;
}
// Read/write in binary mode.
@@ -61,6 +68,12 @@ try
else
eom = false;
+ if (split)
+ {
+ const auto& vc (manifest_parser::split_comment (nv.value));
+ nv.value = manifest_serializer::merge_comment (vc.first, vc.second);
+ }
+
s.next (nv.name, nv.value);
}
}
diff --git a/tests/manifest-roundtrip/testscript b/tests/manifest-roundtrip/testscript
index e0a15cc..a228b0f 100644
--- a/tests/manifest-roundtrip/testscript
+++ b/tests/manifest-roundtrip/testscript
@@ -76,3 +76,43 @@ $* -m <<EOF >>EOF
c:\windows\\
\
EOF
+
+: split-merge-comment
+:
+$* -s <<EOF >>EOF
+ : 1
+ info:\
+ value
+ text
+ \
+ info:\
+ value
+ text
+ ;
+ comment
+ \
+ info:\
+ ;
+ comment
+ text
+ \
+ info:\
+ value
+ \;
+ \\
+ ;
+ comment
+ \
+ info:\
+ value
+ \\;
+ ;
+ comment
+ \
+ info:\
+ value
+ \\\\;
+ ;
+ comment
+ \
+ EOF
diff --git a/tests/manifest-serializer/driver.cxx b/tests/manifest-serializer/driver.cxx
index be3ae25..a003fa4 100644
--- a/tests/manifest-serializer/driver.cxx
+++ b/tests/manifest-serializer/driver.cxx
@@ -251,12 +251,37 @@ main ()
// Manifest value/comment merging.
//
- assert (manifest_serializer::merge_comment ("value; text", "comment") ==
- "value\\; text; comment");
+ // Single-line.
+ //
+ assert (manifest_serializer::merge_comment ("value\\; text", "comment") ==
+ "value\\\\\\; text; comment");
assert (manifest_serializer::merge_comment ("value text", "") ==
"value text");
+ // Multi-line.
+ //
+ assert (manifest_serializer::merge_comment ("value\n;\ntext", "comment") ==
+ "value\n\\;\ntext\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("value\n\\;\ntext\n",
+ "comment") ==
+ "value\n\\\\;\ntext\n\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("value\n\\\\;\ntext\n",
+ "comment") ==
+ "value\n\\\\\\\\;\ntext\n\n;\ncomment");
+
+
+ assert (manifest_serializer::merge_comment ("value\n\\\ntext", "comment") ==
+ "value\n\\\ntext\n;\ncomment");
+
+ assert (manifest_serializer::merge_comment ("\\", "comment\n") ==
+ "\\\n;\ncomment\n");
+
+ assert (manifest_serializer::merge_comment ("", "comment\ntext") ==
+ ";\ncomment\ntext");
+
// Filtering.
//
assert (test ({{"","1"},{"a","abc"},{"b","bca"},{"c","cab"},{"",""},{"",""}},