From 5ae9686adac1508873f2d980e84becd3496244c2 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 26 Feb 2020 17:16:45 +0300 Subject: Add notion of validator to char_scanner and make sure manifest is UTF-8 This involves implementing utf8_validator and UTF-8 utility functions and using them during the manifest parsing, serialization, and rewriting. --- libbutl/manifest-rewriter.cxx | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'libbutl/manifest-rewriter.cxx') diff --git a/libbutl/manifest-rewriter.cxx b/libbutl/manifest-rewriter.cxx index ba0c866..e38d5f4 100644 --- a/libbutl/manifest-rewriter.cxx +++ b/libbutl/manifest-rewriter.cxx @@ -30,8 +30,10 @@ import butl.fdstream; import butl.manifest_types; #endif +import butl.utility; // utf8_length() import butl.manifest_serializer; #else +#include #include #endif @@ -101,8 +103,16 @@ namespace butl manifest_serializer s (os, path_.string (), long_lines_); + // Note that the name can be surrounded with the ASCII whitespace + // characters and the start_pos refers to the first character in the + // line. + // + // Also note that we assume the already serialized name to be a valid + // UTF-8 byte string and so utf8_length() may not throw. + // s.write_value (nv.value, - static_cast (nv.colon_pos - nv.start_pos + 2)); + static_cast (nv.colon_pos - nv.start_pos) - + (nv.name.size () - utf8_length (nv.name)) + 2); } os << suffix; @@ -128,15 +138,21 @@ namespace butl os << '\n'; manifest_serializer s (os, path_.string (), long_lines_); - s.write_name (nv.name); + size_t n (s.write_name (nv.name)); os << ':'; if (!nv.value.empty ()) { os << ' '; + + // Note that the name can be surrounded with the ASCII whitespace + // characters and the start_pos refers to the first character in the + // line. + // s.write_value (nv.value, - static_cast (nv.colon_pos - nv.start_pos + 2)); + static_cast (nv.colon_pos - nv.start_pos) - + (nv.name.size () - n) + 2); } os << suffix; -- cgit v1.1