From 1b57e247b8d1a7a41a8ee45d6d524c71edd63a81 Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Fri, 26 May 2017 17:20:30 +0200 Subject: Optimize char_scanner if used with ifdstream --- libbutl/char-scanner.cxx | 20 ++++++++++++++------ libbutl/char-scanner.hxx | 31 +++++++++++++++++++++++++------ libbutl/char-scanner.ixx | 31 +++++++++++++++++++++++++++++++ libbutl/fdstream.hxx | 12 ++++++++++-- 4 files changed, 80 insertions(+), 14 deletions(-) diff --git a/libbutl/char-scanner.cxx b/libbutl/char-scanner.cxx index 42a72cc..a2baaf3 100644 --- a/libbutl/char-scanner.cxx +++ b/libbutl/char-scanner.cxx @@ -4,12 +4,20 @@ #include -#include - using namespace std; namespace butl { + char_scanner:: + char_scanner (istream& is, bool crlf) + : is_ (is), + buf_ (dynamic_cast (is.rdbuf ())), + gptr_ (nullptr), + egptr_ (nullptr), + crlf_ (crlf) + { + } + auto char_scanner:: peek () -> xchar { @@ -22,14 +30,14 @@ namespace butl if (eos_) return xchar (xchar::traits_type::eof (), line, column); - xchar::int_type v (is_.peek ()); + int_type v (peek_ ()); if (v == xchar::traits_type::eof ()) eos_ = true; else if (crlf_ && v == 0x0D) { - is_.get (); - xchar::int_type v1 (is_.peek ()); + get_ (); + int_type v1 (peek_ ()); if (v1 != '\n') { @@ -62,7 +70,7 @@ namespace butl // if (!eos (c)) { - is_.get (); + get_ (); if (c == '\n') { diff --git a/libbutl/char-scanner.hxx b/libbutl/char-scanner.hxx index e71f286..80a1f2a 100644 --- a/libbutl/char-scanner.hxx +++ b/libbutl/char-scanner.hxx @@ -6,8 +6,10 @@ #define LIBBUTL_CHAR_SCANNER_HXX #include // char_traits -#include #include // uint64_t +#include + +#include #include @@ -23,8 +25,11 @@ namespace butl // 0x0A) and convert them to just '\n' (0x0A). Note that a standalone // 0x0D is treated "as if" it was followed by 0x0A. // - char_scanner (std::istream& is, bool crlf = true) - : is_ (is), crlf_ (crlf) {} + // Note also that if the stream happens to be ifdstream, then it includes + // a number of optimizations that assume nobody else is messing with the + // stream. + // + char_scanner (std::istream& is, bool crlf = true); char_scanner (const char_scanner&) = delete; char_scanner& operator= (const char_scanner&) = delete; @@ -43,9 +48,9 @@ namespace butl class xchar { public: - typedef std::char_traits traits_type; - typedef traits_type::int_type int_type; - typedef traits_type::char_type char_type; + using traits_type = std::char_traits; + using int_type = traits_type::int_type; + using char_type = traits_type::char_type; int_type value; std::uint64_t line; @@ -90,8 +95,22 @@ namespace butl std::uint64_t column = 1; protected: + using int_type = xchar::int_type; + using char_type = xchar::char_type; + + int_type + peek_ (); + + void + get_ (); + + protected: std::istream& is_; + fdbuf* buf_; // NULL if not ifdstream. + const char_type* gptr_; + const char_type* egptr_; + bool crlf_; bool eos_ = false; diff --git a/libbutl/char-scanner.ixx b/libbutl/char-scanner.ixx index 2d96207..0e00dfd 100644 --- a/libbutl/char-scanner.ixx +++ b/libbutl/char-scanner.ixx @@ -29,4 +29,35 @@ namespace butl unget_ = true; ungetc_ = c; } + + inline auto char_scanner:: + peek_ () -> int_type + { + if (gptr_ != egptr_) + return *gptr_; + + int_type r (is_.peek ()); + + // Update buffer pointers for the next chunk. + // + if (buf_ != nullptr) + { + gptr_ = buf_->gptr (); + egptr_ = buf_->egptr (); + } + + return r; + } + + inline void char_scanner:: + get_ () + { + if (gptr_ != egptr_) + { + buf_->gbump (1); + ++gptr_; + } + else + is_.get (); // About as fast as ignore() and way faster than tellg(). + } } diff --git a/libbutl/fdstream.hxx b/libbutl/fdstream.hxx index e1f5790..dd33263 100644 --- a/libbutl/fdstream.hxx +++ b/libbutl/fdstream.hxx @@ -122,8 +122,10 @@ namespace butl fd () const {return fd_.get ();} public: - using int_type = std::basic_streambuf::int_type; - using traits_type = std::basic_streambuf::traits_type; + using base = std::basic_streambuf; + + using int_type = base::int_type; + using traits_type = base::traits_type; // basic_streambuf input interface. // @@ -134,6 +136,12 @@ namespace butl virtual int_type underflow (); + // Direct access to the get area. Use with caution. + // + using base::gptr; + using base::egptr; + using base::gbump; + private: bool load (); -- cgit v1.1