From 2d15efda5db161accd5f572fd4816885bce7c68c Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Thu, 23 Jun 2022 22:55:20 +0300 Subject: Split and merge manifest value/comment pair differently depending on whether it is multiline or not --- libbutl/manifest-parser.cxx | 145 +++++++++++++++++++++++++++++++++------- libbutl/manifest-serializer.cxx | 91 +++++++++++++++++++++---- 2 files changed, 199 insertions(+), 37 deletions(-) (limited to 'libbutl') diff --git a/libbutl/manifest-parser.cxx b/libbutl/manifest-parser.cxx index 258a536..904910a 100644 --- a/libbutl/manifest-parser.cxx +++ b/libbutl/manifest-parser.cxx @@ -148,41 +148,136 @@ namespace butl { using iterator = string::const_iterator; - auto space = [] (char c) -> bool {return c == ' ' || c == '\t';}; + // Parse the value differently depending on whether it is multi-line or + // not. + // + if (v.find ('\n') == string::npos) // Single-line. + { + auto space = [] (char c) {return c == ' ' || c == '\t';}; - iterator i (v.begin ()); - iterator e (v.end ()); + iterator i (v.begin ()); + iterator e (v.end ()); - string r; - size_t n (0); - for (char c; i != e && (c = *i) != ';'; ++i) - { - // Unescape ';' character. + string r; + size_t n (0); + for (char c; i != e && (c = *i) != ';'; ++i) + { + // Unescape ';' and '\' characters. + // + if (c == '\\' && i + 1 != e && (*(i + 1) == ';' || *(i + 1) == '\\')) + c = *++i; + + r += c; + + if (!space (c)) + n = r.size (); + } + + // Strip the value trailing spaces. // - if (c == '\\' && i + 1 != e && *(i + 1) == ';') - c = *++i; + if (r.size () != n) + r.resize (n); - r += c; + // Find beginning of a comment (i). + // + if (i != e) + { + // Skip spaces. + // + for (++i; i != e && space (*i); ++i); + } - if (!space (c)) - n = r.size (); + return make_pair (move (r), string (i, e)); } + else // Multi-line. + { + string r; + string c; - // Strip the value trailing spaces. - // - if (r.size () != n) - r.resize (n); + // Parse the value lines until the comment separator is encountered or + // the end of the value is reached. Add these lines to the resulting + // value, unescaping them if required. + // + // Note that we only need to unescape lines which have the '\+;' form. + // + auto i (v.begin ()); + auto e (v.end ()); - // Find beginning of a comment (i). - // - if (i != e) - { - // Skip spaces. + while (i != e) + { + // Find the end of the line and while at it the first non-backslash + // character. + // + auto le (i); + auto nb (e); + for (; le != e && *le != '\n'; ++le) + { + if (nb == e && *le != '\\') + nb = le; + } + + // If the value end is not reached then position to the beginning of + // the next line and to the end of the value otherwise. + // + auto next = [&i, &le, &e] () {i = (le != e ? le + 1 : e);}; + + // If the first non-backslash character is ';' and it is the last + // character on the line, then this is either the comment separator or + // an escape sequence. + // + if (nb != e && *nb == ';' && nb + 1 == le) + { + // If ';' is the first (and thus the only) character on the line, + // then this is the comment separator and we bail out from this + // loop. Note that in this case we need to trim the trailing newline + // (but only one) from the resulting value since it is considered as + // a part of the separator. + // + if (nb == i) + { + if (!r.empty ()) + { + assert (r.back () == '\n'); + r.pop_back (); + } + + next (); + break; + } + // + // Otherwise, this is an escape sequence, so unescape it. For that + // just take the rightmost half of the string: + // + // \; -> ; + // \\; -> \; + // \\\; -> \; + // \\\\; -> \\; + // \\\\\; -> \\; + // + else + i += (le - i) / 2; + } + + // Add the line to the resulting value together with the trailing + // newline, if present. + // + r.append (i, le); + + if (le != e) + r += '\n'; + + next (); + } + + // If we haven't reached the end of the value then it means we've + // encountered the comment separator. In this case save the remaining + // value part as a comment. // - for (++i; i != e && space (*i); ++i); - } + if (i != e) + c = string (i, e); - return make_pair (move (r), string (i, e)); + return make_pair (move (r), move (c)); + } } void manifest_parser:: diff --git a/libbutl/manifest-serializer.cxx b/libbutl/manifest-serializer.cxx index b0d0324..26699e0 100644 --- a/libbutl/manifest-serializer.cxx +++ b/libbutl/manifest-serializer.cxx @@ -101,22 +101,89 @@ namespace butl merge_comment (const string& value, const string& comment) { string r; - for (char c: value) + + // Merge the value and comment differently depending on whether any of + // them is multi-line or not. + // + if (value.find ('\n') == string::npos && // Single-line. + comment.find ('\n') == string::npos) { - // Escape ';' character. - // - if (c == ';') - r += '\\'; + for (char c: value) + { + // Escape ';' and '\' characters. + // + if (c == ';' || c == '\\') + r += '\\'; - r += c; - } + r += c; + } - // Add the comment. - // - if (!comment.empty ()) + // Add the comment. + // + if (!comment.empty ()) + { + r += "; "; + r += comment; + } + } + else // Multi-line. { - r += "; "; - r += comment; + // Parse the value lines and add them to the resulting value, escaping + // them if required. + // + // Note that we only need to escape lines which have the '\*;' form. + // + for (auto i (value.begin ()), e (value.end ()); i != e; ) + { + // Find the end of the line and while at it the first non-backslash + // character. + // + auto le (i); + auto nb (e); + for (; le != e && *le != '\n'; ++le) + { + if (nb == e && *le != '\\') + nb = le; + } + + // If the first non-backslash character is ';' and it is the last + // character on the line, then we need to escape the line characters. + // Note that we only escape ';' if it is the only character on the + // line. Otherwise, we only escape backslashes doubling the number of + // them from the left: + // + // ; -> \; + // \; -> \\; + // \\; -> \\\\; + // \\\; -> \\\\\\; + // + if (nb != e && *nb == ';' && nb + 1 == le) + r.append (nb == i ? 1 : nb - i, '\\'); + + // Add the line to the resulting value together with the trailing + // newline, if present. + // + r.append (i, le); + + if (le != e) + r += '\n'; + + // If the value end is not reached then position to the beginning of + // the next line and to the end of the value otherwise. + // + i = (le != e ? le + 1 : e); + } + + // Append the comment, if present. + // + if (!comment.empty ()) + { + if (!r.empty ()) + r += '\n'; + + r += ";\n"; + r += comment; + } } return r; -- cgit v1.1