From 5bff24a8862f61e40f827591be5c81228efab4c6 Mon Sep 17 00:00:00 2001 From: Karen Arutyunov Date: Sun, 9 Dec 2018 01:18:10 +0300 Subject: Add support for fdstream positioning --- libbutl/fdstream.cxx | 199 ++++++++++++++++++++++++++++++-- libbutl/fdstream.mxx | 35 +++++- tests/fdstream/driver.cxx | 280 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 503 insertions(+), 11 deletions(-) diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx index 26d5da3..f417c6a 100644 --- a/libbutl/fdstream.cxx +++ b/libbutl/fdstream.cxx @@ -27,6 +27,8 @@ # include // S_I* # include // wcsncmp(), wcsstr() + +# include // count() #endif #include @@ -175,6 +177,14 @@ namespace butl return r; } +#ifdef _WIN32 + static inline int + read (int fd, void* buf, size_t n) + { + return _read (fd, buf, static_cast (n)); + } +#endif + bool fdbuf:: load () { @@ -182,11 +192,7 @@ namespace butl // assert (!non_blocking_); -#ifndef _WIN32 - ssize_t n (read (fd_.get (), buf_, sizeof (buf_))); -#else - int n (_read (fd_.get (), buf_, sizeof (buf_))); -#endif + auto n (read (fd_.get (), buf_, sizeof (buf_))); if (n == -1) throw_generic_ios_failure (errno); @@ -196,6 +202,41 @@ namespace butl return n != 0; } + void fdbuf:: + seekg (uint64_t off) + { + // In the future we may implement the blocking behavior for a non-blocking + // file descriptor. + // + if (non_blocking_) + throw_generic_ios_failure (ENOTSUP); + + // The plan is to rewind to the beginning of the stream, read the + // requested number of characters and reset the get area, so it will be + // filled from scratch on the next read from the stream. + // + fdseek (fd_.get (), 0, fdseek_mode::set); + + for (uint64_t n (off); n != 0; ) + { + size_t m (n > sizeof (buf_) ? sizeof (buf_) : static_cast (n)); + auto r (read (fd_.get (), buf_, m)); + + if (r == -1) + throw_generic_ios_failure (errno); + + // Fail if trying to seek beyond the end of the stream. + // + if (r == 0) + throw_generic_ios_failure (EINVAL); + + n -= r; + } + + off_ = off; + setg (buf_, buf_, buf_); + } + fdbuf::int_type fdbuf:: overflow (int_type c) { @@ -429,6 +470,150 @@ namespace butl #endif } + // Common call chains: + // + // - basic_ostream::seekp(pos) -> + // basic_streambuf::pubseekpos(pos, ios::out) -> + // fdbuf::seekpos(pos, ios::out) + // + // - basic_istream::seekg(pos) -> + // basic_streambuf::pubseekpos(pos, ios::in) -> + // fdbuf::seekpos(pos, ios::in) + // + fdbuf::pos_type fdbuf:: + seekpos (pos_type pos, ios_base::openmode which) + { + // Note that the position type provides an explicit conversion to the + // numeric offset type (see std::fpos for details). The position state is + // disregarded in this case, which is ok since we don't mess with the + // multibyte character conversions. + // + return seekoff (static_cast (pos), ios_base::beg, which); + } + + // Common call chains: + // + // - basic_ostream::seekp(off, dir) -> + // basic_streambuf::pubseekoff(off, dir, ios::out) -> + // fdbuf::seekoff(off, dir, ios::out) + // + // - basic_ostream::tellp() -> + // basic_streambuf::pubseekoff(0, ios::cur, ios::out) -> + // fdbuf::seekoff(0, ios::cur, ios::out) + // + // - basic_istream::seekg(off, dir) -> + // basic_streambuf::pubseekoff(off, dir, ios::in) -> + // fdbuf::seekoff(off, dir, ios::in) + // + // - basic_istream::tellg() -> + // basic_streambuf::pubseekoff(0, ios::cur, ios::in) -> + // fdbuf::seekoff(0, ios::cur, ios::in) + // + fdbuf::pos_type fdbuf:: + seekoff (off_type off, ios_base::seekdir dir, ios_base::openmode which) + { + // The seekoff() function interface doesn't support the non-blocking + // semantics since being unable to serialize the character in write mode + // is supposed to be an error. Also the non-blocking mode is likely to be + // used for non-seekable file descriptors (pipes, etc.). In the future we + // may implement the blocking behavior for a non-blocking file descriptor. + // + if (non_blocking_) + throw_generic_ios_failure (ENOTSUP); + + // Translate ios_base value to to fdseek_mode. + // + fdseek_mode m; + switch (dir) + { + case ios_base::beg: m = fdseek_mode::set; break; + case ios_base::cur: m = fdseek_mode::cur; break; + case ios_base::end: m = fdseek_mode::end; break; + default: assert (false); + } + + // Prior to fdseek() call we will flush the buffer for the write mode, + // reset the get area for the read mode, and fail otherwise. Note that we + // don't support the read/write mode. + // + // Note that the return (position) type is implicitly constructible from + // the numeric offset type (see std::fpos for details). + // + switch (which) + { + case ios_base::out: + { + // Fail if unable to fully flush the buffer (for example, because the + // device is full). + // + if (!save ()) + return static_cast (-1); + + break; + } + case ios_base::in: + { + // We may have unread data in the get area and need to subtract its + // size from the offset if we seek from the current position. + // + if (dir == ios_base::cur) + { + off_type n (egptr () - gptr ()); // Get area size. + +#ifdef _WIN32 + // Note that on Windows, when reading in the text mode, newline + // characters are translated from the CRLF character sequences. + // Thus, in this mode, we also need to subtract the number of + // newlines in the get area from the offset. + // + // Note that this approach only works for "canonical" Windows text + // files. Specifically, if there are newlines not preceded with the + // CR character then we may end up in the wrong place. It seems that + // there is no reasonable solution for this problem, and neither of + // the MSVC's or MinGW's std::ifstream implementations handle this + // case properly. + // + + // The only way to query the current file descriptor mode is to + // reset it and use the result (see fdmode() for details). + // + fdstream_mode fm (fdmode (fd_.get (), fdstream_mode::text)); + + // Note: the fdstream_mode::blocking flag is also set. + // + if ((fm & fdstream_mode::text) == fdstream_mode::text) + n += count (gptr (), egptr (), '\n'); + else + fdmode (fd_.get (), fm); // Restore the mode if it was changed. +#endif + + // Note that ifdstream::tellg() implicitly calls seekoff(0,ios::cur) + // (see above). Let's not reset the get area for such noop seeks. + // + if (off == 0) + return static_cast ( + fdseek (fd_.get (), 0, fdseek_mode::cur) - n); + + off -= n; + } + + // Reset the get area. + // + setg (buf_, buf_, buf_); + break; + } + default: return static_cast (-1); + } + + // Note that on Windows in the text mode the logical offset (number of + // read/written bytes) is likely to be screwed up due to newlines + // translation (see above). + // + off_ = fdseek (fd_.get (), off, m); + + return static_cast (off_); + } + inline static bool flag (fdstream_mode m, fdstream_mode flag) { @@ -784,7 +969,7 @@ namespace butl } uint64_t - fdseek (int fd, uint64_t o, fdseek_mode fdm) + fdseek (int fd, int64_t o, fdseek_mode fdm) { int m (-1); @@ -800,7 +985,7 @@ namespace butl if (r == static_cast (-1)) throw_generic_ios_failure (errno); #else - __int64 r (_lseeki64 (fd, static_cast<__int64> (o), m)); + __int64 r (_lseeki64 (fd, o, m)); if (r == -1) throw_generic_ios_failure (errno); #endif diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.mxx index dd7418e..ff96e2e 100644 --- a/libbutl/fdstream.mxx +++ b/libbutl/fdstream.mxx @@ -108,11 +108,14 @@ LIBBUTL_MODEXPORT namespace butl // - char only // - input or output but not both (can use a union of two streams for that) // - no support for put back - // - no support for tell[gp]()/seek[gp]() (but see non-standard tellg() and - // tellp() in fdbuf) + // - use of tell[gp]() and seek[gp]() is discouraged on Windows for + // fdstreams opened in the text mode (see fdbuf::seekoff() implementation + // for reasoning and consider using non-standard tellg() and seekg() in + // fdbuf, instead) // - non-blocking file descriptor is supported only by showmanyc() function // and only on POSIX - // - throws ios::failure in case of open()/read()/write()/close() errors + // - throws ios::failure in case of open(), read(), write(), close(), + // seek[gp](), or tell[gp]() errors // - exception mask has at least badbit // - after catching an exception caused by badbit the stream is no longer // usable @@ -157,6 +160,9 @@ LIBBUTL_MODEXPORT namespace butl using int_type = base::int_type; using traits_type = base::traits_type; + using pos_type = base::pos_type; // std::streampos + using off_type = base::off_type; // std::streamoff + // basic_streambuf input interface. // public: @@ -174,9 +180,21 @@ LIBBUTL_MODEXPORT namespace butl // Return the (logical) position of the next byte to be read. // + // Note that on Windows when reading in the text mode the logical position + // may differ from the physical file descriptor position due to the CRLF + // character sequence translation. See the seekoff() implementation for + // more background on this issue. + // std::uint64_t tellg () const {return off_ - (egptr () - gptr ());} + // Seek to the (logical) position as if by reading the specified number of + // bytes from the beginning of the stream. Throw ios::failure on the + // underlying OS errors. + // + void + seekg (std::uint64_t); + private: bool load (); @@ -198,6 +216,15 @@ LIBBUTL_MODEXPORT namespace butl std::uint64_t tellp () const {return off_ + (pptr () - buf_);} + // basic_streambuf positioning interface (both input/output). + // + public: + virtual pos_type + seekpos (pos_type, std::ios_base::openmode); + + virtual pos_type + seekoff (off_type, std::ios_base::seekdir, std::ios_base::openmode); + private: bool save (); @@ -741,7 +768,7 @@ LIBBUTL_MODEXPORT namespace butl enum class fdseek_mode {set, cur, end}; LIBBUTL_SYMEXPORT std::uint64_t - fdseek (int, std::uint64_t, fdseek_mode); + fdseek (int, std::int64_t, fdseek_mode); // Truncate or expand the file to the specified size. Throw ios::failure on // the underlying OS error. diff --git a/tests/fdstream/driver.cxx b/tests/fdstream/driver.cxx index 675a66e..01da9fb 100644 --- a/tests/fdstream/driver.cxx +++ b/tests/fdstream/driver.cxx @@ -482,6 +482,286 @@ main (int argc, const char* argv[]) #endif + // Test setting and getting position via the non-standard fdbuf interface. + // + // Seek for read. + // + { + to_file (f, "012\n3\n4567", fdopen_mode::truncate); + + ifdstream is (f); + + fdbuf* buf (dynamic_cast (is.rdbuf ())); + assert (buf != nullptr); + + char c; + for (size_t i (0); i < 7; ++i) + is.get (c); + + uint64_t p (buf->tellg ()); + assert (p == 7); + + is.get (c); + assert (c == '5'); + + buf->seekg (p); + assert (buf->tellg () == p); + + is.get (c); + assert (c == '5'); + + // Can't seek beyond the end of the stream. + // + try + { + buf->seekg (20); + assert (false); + } + catch (const ios::failure&) {} + } + + // Seek for write. + // + { + // Let's test replacing the '3' fragment with 'XYZ' in the following file. + // + to_file (f, "012\n3\n4567", fdopen_mode::truncate); + + auto_fd fd; + string suffix; + size_t p (4); // Logical position of the fragment being replaced. + + { + ifdstream is (f, fdopen_mode::in | fdopen_mode::out); + + fdbuf* buf (dynamic_cast (is.rdbuf ())); + assert (buf != nullptr); + + // Read till the end of the fragment. + // + char c; + for (size_t i (0); i < p + 1; ++i) + is.get (c); + + assert (c == '3'); + + // Read the suffix. + // + suffix = is.read_text (); + assert (suffix == "\n4567"); + + // Seek to the beginning of the fragment and detach the file descriptor. + // + buf->seekg (p); + fd = is.release (); + } + + // Rewrite the fragment. + // + // Note that on Windows in the text mode the logical position differs from + // the file descriptor position, so we need to query the later one to + // truncate the file. + // + fdtruncate (fd.get (), fdseek (fd.get (), 0, fdseek_mode::cur)); + + ofdstream os (move (fd), ofdstream::badbit | ofdstream::failbit, p); + + os << "XYZ" << suffix; + os.close (); + + assert (from_file (f) == "012\nXYZ\n4567"); + } + + // Test setting and getting position via the standard [io]stream interface. + // + to_file (f, "0123456789", fdopen_mode::truncate); + + // Seek for read. + // + { + ifdstream is (f); + + char c; + is.get (c); + + is.seekg (5, ios::beg); + is.get (c); + assert (c == '5'); + + is.seekg (2, ios::cur); + + assert (static_cast (is.tellg ()) == 8); + + const fdbuf* buf (dynamic_cast (is.rdbuf ())); + assert (buf != nullptr && buf->tellg () == 8); + + assert (from_stream (is) == "89"); + } + + // Seek for write. + // + { + ofdstream os (f, fdopen_mode::out); + os.seekp (4, ios::beg); + os << "ABC"; + os.seekp (-4, ios::end); + os << "XYZ"; + os.seekp (-8, ios::cur); + os << 'C'; + + assert (static_cast (os.tellp ()) == 2); + + const fdbuf* buf (dynamic_cast (os.rdbuf ())); + assert (buf != nullptr && buf->tellp () == 2); + + os.close (); + assert (from_file (f) == "0C23ABXYZ9"); + } + +#ifdef _WIN32 + + // Test handling newline characters on Windows while setting and getting + // position via the standard [io]stream interface. + // + // Save the string in the text mode, so the newline character is translated + // into the 0xD, 0xA character sequence on Windows. + // + to_file (f, "01234\n56789", fdopen_mode::truncate); + + // Seek for read in the text mode. + // + { + ifdstream is (f); + + char c; + is.get (c); + + is.seekg (2, ios::cur); + is.get (c); + + assert (c == '3'); + + is.seekg (4, ios::cur); + + assert (static_cast (is.tellg ()) == 8); + assert (from_stream (is) == "6789"); + } + + // Seek for read in the binary mode. + // + { + ifdstream is (f, ios::binary); + + char c; + is.get (c); + + is.seekg (2, ios::cur); + is.get (c); + + assert (c == '3'); + + is.seekg (4, ios::cur); + + assert (static_cast (is.tellg ()) == 8); + + const fdbuf* buf (dynamic_cast (is.rdbuf ())); + assert (buf != nullptr && buf->tellp () == 8); + + assert (from_stream (is) == "6789"); + } + + // Research the positioning misbehavior of std::ifstream object opened + // in the text mode on Windows. + // +#if 0 + + to_file (f, "012\r\n3\n4567", fdopen_mode::truncate | fdopen_mode::binary); + + { + ifstream is (f.string ()); +// ifdstream is (f); + + char c1; + for (size_t i (0); i < 2; ++i) + is.get (c1); + + is.seekg (6, ios::cur); + + streamoff p1 (is.tellg ()); + + is.get (c1); + + cout << "c1: '" << c1 << "' pos " << p1 << endl; + + char c2; + is.seekg (8, ios::beg); + + streamoff p2 (is.tellg ()); + is.get (c2); + + cout << "c2: '" << c2 << "' pos " << p2 << endl; + + // One could expect the positions and characters to match, but: + // + // VC's ifstream and ifdstream end up with: + // + // c1: '4' pos 7 + // c2: '5' pos 8 + // + // MinGW's ifstream ends up with: + // + // c1: '6' pos 9 + // c2: '5' pos 8 + // + // These assertions fail for all implementations: + // + // assert (p1 == p2); + // assert (c1 == c2); + } + + { + ifstream is (f.string ()); +// ifdstream is (f); + + char c1; + for (size_t i (0); i < 2; ++i) + is.get (c1); + + auto p1 (is.tellg ()); + is.get (c1); + + cout << "c1: '" << c1 << "' pos " << p1 << endl; + + is.seekg (p1, ios::beg); + + auto p2 (is.tellg ()); + + char c2; + is.get (c2); + + cout << "c2: '" << c2 << "' pos " << p2 << endl; + + // One could expect the positions and characters to match, but: + // + // VC's ifstream and ifdstream end up with: + // + // c1: '2' pos 1 + // c2: '1' pos 1 + // + // MinGW's ifstream ends up with: + // + // c1: '2' pos 3 + // c2: '\n' pos 3 + // + // This assertion fails for all implementations: + // + // assert (c1 == c2); + } + +#endif + +#endif + // Test pipes. // // Here we rely on buffering being always enabled for pipes. -- cgit v1.1