From 5ae9686adac1508873f2d980e84becd3496244c2 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Wed, 26 Feb 2020 17:16:45 +0300 Subject: Add notion of validator to char_scanner and make sure manifest is UTF-8 This involves implementing utf8_validator and UTF-8 utility functions and using them during the manifest parsing, serialization, and rewriting. --- libbutl/char-scanner.ixx | 56 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 49 insertions(+), 7 deletions(-) (limited to 'libbutl/char-scanner.ixx') diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx index 36cc93d..7e9c4b0 100644 --- a/libbutl/char-scanner.ixx +++ b/libbutl/char-scanner.ixx @@ -3,8 +3,30 @@ namespace butl { - inline auto char_scanner:: - get () -> xchar + template + inline char_scanner:: + char_scanner (std::istream& is, bool crlf, std::uint64_t l, std::uint64_t p) + : char_scanner (is, validator_type (), crlf, l, p) + { + } + + template + inline auto char_scanner:: + peek (std::string& what) -> xchar + { + return peek (&what); + } + + template + inline auto char_scanner:: + peek () -> xchar + { + return peek (nullptr /* what */); + } + + template + inline auto char_scanner:: + get (std::string* what) -> xchar { if (unget_) { @@ -13,13 +35,28 @@ namespace butl } else { - xchar c (peek ()); + xchar c (peek (what)); get (c); return c; } } - inline void char_scanner:: + template + inline auto char_scanner:: + get (std::string& what) -> xchar + { + return get (&what); + } + + template + inline auto char_scanner:: + get () -> xchar + { + return get (nullptr /* what */); + } + + template + inline void char_scanner:: unget (const xchar& c) { // Because iostream::unget cannot work once eos is reached, we have to @@ -29,7 +66,8 @@ namespace butl ungetc_ = c; } - inline auto char_scanner:: + template + inline auto char_scanner:: peek_ () -> int_type { if (gptr_ != egptr_) @@ -48,7 +86,8 @@ namespace butl return r; } - inline void char_scanner:: + template + inline void char_scanner:: get_ () { int_type c; @@ -61,11 +100,14 @@ namespace butl else c = is_.get (); // About as fast as ignore() and way faster than tellg(). + validated_ = false; + if (save_ != nullptr && c != xchar::traits_type::eof ()) save_->push_back (static_cast (c)); } - inline std::uint64_t char_scanner:: + template + inline std::uint64_t char_scanner:: pos_ () const { return buf_ != nullptr ? buf_->tellg () : 0; -- cgit v1.1