From 5ae9686adac1508873f2d980e84becd3496244c2 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 26 Feb 2020 17:16:45 +0300 Subject: Add notion of validator to char_scanner and make sure manifest is UTF-8 This involves implementing utf8_validator and UTF-8 utility functions and using them during the manifest parsing, serialization, and rewriting. --- tests/manifest-parser/driver.cxx | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'tests/manifest-parser') diff --git a/tests/manifest-parser/driver.cxx b/tests/manifest-parser/driver.cxx index 57674cb..a34f2b7 100644 --- a/tests/manifest-parser/driver.cxx +++ b/tests/manifest-parser/driver.cxx @@ -40,6 +40,9 @@ namespace butl static bool equal (const optional& actual, const optional& expected); + static pairs + parse (const char* m, manifest_parser::filter_function f = {}); + // Test manifest as it is represented in the stream, including format // version and end-of-manifest values. // @@ -188,6 +191,41 @@ namespace butl assert (p.first == "" && p.second == "comment"); } + // UTF-8. + // + assert (test (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0", + {{"","1"}, + {"\xD0\xB0y\xD0\xB0", "\xD0\xB0z\xD0\xB0"}, + {"",""}, + {"",""}})); + + assert (fail (":1\n#\xD0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xD0y\xD0\xB0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0")); + assert (fail (":1\r\r\xB0#\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\r\xD0#\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0")); + assert (fail (":1\n#\xD0\xB0\n\xD0\xB0y\xD0\xB0:\xD0\xB0z\xD0\xB0\r\xD0")); + + // Test parsing failure for manifest with multi-byte UTF-8 sequences + // (the column is properly reported, etc). + // + try + { + parse (":1\na\xD0\xB0\xD0\xB0\xFE"); + assert (false); + } + catch (const manifest_parsing& e) + { + assert (e.line == 2 && + e.column == 4 && + e.description == + "invalid manifest name: " + "invalid UTF-8 sequence first byte (0xFE)"); + } + // Filtering. // assert (test (":1\na: abc\nb: bca\nc: cab", @@ -281,7 +319,7 @@ namespace butl } static pairs - parse (const char* m, manifest_parser::filter_function f = {}) + parse (const char* m, manifest_parser::filter_function f) { istringstream is (m); is.exceptions (istream::failbit | istream::badbit); -- cgit v1.1