diff options
author | Karen Arutyunov <karen@codesynthesis.com> | 2018-12-09 01:18:10 +0300 |
---|---|---|
committer | Karen Arutyunov <karen@codesynthesis.com> | 2018-12-15 17:27:56 +0300 |
commit | 5bff24a8862f61e40f827591be5c81228efab4c6 (patch) | |
tree | cf509c18ecae91150ac51d1eea036dfa1d30eb5d /libbutl | |
parent | cc8b52be1e02802ef82ff474721d78815ab4e63a (diff) |
Add support for fdstream positioning
Diffstat (limited to 'libbutl')
-rw-r--r-- | libbutl/fdstream.cxx | 199 | ||||
-rw-r--r-- | libbutl/fdstream.mxx | 35 |
2 files changed, 223 insertions, 11 deletions
diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx index 26d5da3..f417c6a 100644 --- a/libbutl/fdstream.cxx +++ b/libbutl/fdstream.cxx @@ -27,6 +27,8 @@ # include <sys/stat.h> // S_I* # include <wchar.h> // wcsncmp(), wcsstr() + +# include <algorithm> // count() #endif #include <cassert> @@ -175,6 +177,14 @@ namespace butl return r; } +#ifdef _WIN32 + static inline int + read (int fd, void* buf, size_t n) + { + return _read (fd, buf, static_cast<unsigned int> (n)); + } +#endif + bool fdbuf:: load () { @@ -182,11 +192,7 @@ namespace butl // assert (!non_blocking_); -#ifndef _WIN32 - ssize_t n (read (fd_.get (), buf_, sizeof (buf_))); -#else - int n (_read (fd_.get (), buf_, sizeof (buf_))); -#endif + auto n (read (fd_.get (), buf_, sizeof (buf_))); if (n == -1) throw_generic_ios_failure (errno); @@ -196,6 +202,41 @@ namespace butl return n != 0; } + void fdbuf:: + seekg (uint64_t off) + { + // In the future we may implement the blocking behavior for a non-blocking + // file descriptor. + // + if (non_blocking_) + throw_generic_ios_failure (ENOTSUP); + + // The plan is to rewind to the beginning of the stream, read the + // requested number of characters and reset the get area, so it will be + // filled from scratch on the next read from the stream. + // + fdseek (fd_.get (), 0, fdseek_mode::set); + + for (uint64_t n (off); n != 0; ) + { + size_t m (n > sizeof (buf_) ? sizeof (buf_) : static_cast<size_t> (n)); + auto r (read (fd_.get (), buf_, m)); + + if (r == -1) + throw_generic_ios_failure (errno); + + // Fail if trying to seek beyond the end of the stream. + // + if (r == 0) + throw_generic_ios_failure (EINVAL); + + n -= r; + } + + off_ = off; + setg (buf_, buf_, buf_); + } + fdbuf::int_type fdbuf:: overflow (int_type c) { @@ -429,6 +470,150 @@ namespace butl #endif } + // Common call chains: + // + // - basic_ostream::seekp(pos) -> + // basic_streambuf::pubseekpos(pos, ios::out) -> + // fdbuf::seekpos(pos, ios::out) + // + // - basic_istream::seekg(pos) -> + // basic_streambuf::pubseekpos(pos, ios::in) -> + // fdbuf::seekpos(pos, ios::in) + // + fdbuf::pos_type fdbuf:: + seekpos (pos_type pos, ios_base::openmode which) + { + // Note that the position type provides an explicit conversion to the + // numeric offset type (see std::fpos for details). The position state is + // disregarded in this case, which is ok since we don't mess with the + // multibyte character conversions. + // + return seekoff (static_cast<off_type> (pos), ios_base::beg, which); + } + + // Common call chains: + // + // - basic_ostream::seekp(off, dir) -> + // basic_streambuf::pubseekoff(off, dir, ios::out) -> + // fdbuf::seekoff(off, dir, ios::out) + // + // - basic_ostream::tellp() -> + // basic_streambuf::pubseekoff(0, ios::cur, ios::out) -> + // fdbuf::seekoff(0, ios::cur, ios::out) + // + // - basic_istream::seekg(off, dir) -> + // basic_streambuf::pubseekoff(off, dir, ios::in) -> + // fdbuf::seekoff(off, dir, ios::in) + // + // - basic_istream::tellg() -> + // basic_streambuf::pubseekoff(0, ios::cur, ios::in) -> + // fdbuf::seekoff(0, ios::cur, ios::in) + // + fdbuf::pos_type fdbuf:: + seekoff (off_type off, ios_base::seekdir dir, ios_base::openmode which) + { + // The seekoff() function interface doesn't support the non-blocking + // semantics since being unable to serialize the character in write mode + // is supposed to be an error. Also the non-blocking mode is likely to be + // used for non-seekable file descriptors (pipes, etc.). In the future we + // may implement the blocking behavior for a non-blocking file descriptor. + // + if (non_blocking_) + throw_generic_ios_failure (ENOTSUP); + + // Translate ios_base value to to fdseek_mode. + // + fdseek_mode m; + switch (dir) + { + case ios_base::beg: m = fdseek_mode::set; break; + case ios_base::cur: m = fdseek_mode::cur; break; + case ios_base::end: m = fdseek_mode::end; break; + default: assert (false); + } + + // Prior to fdseek() call we will flush the buffer for the write mode, + // reset the get area for the read mode, and fail otherwise. Note that we + // don't support the read/write mode. + // + // Note that the return (position) type is implicitly constructible from + // the numeric offset type (see std::fpos for details). + // + switch (which) + { + case ios_base::out: + { + // Fail if unable to fully flush the buffer (for example, because the + // device is full). + // + if (!save ()) + return static_cast<off_type> (-1); + + break; + } + case ios_base::in: + { + // We may have unread data in the get area and need to subtract its + // size from the offset if we seek from the current position. + // + if (dir == ios_base::cur) + { + off_type n (egptr () - gptr ()); // Get area size. + +#ifdef _WIN32 + // Note that on Windows, when reading in the text mode, newline + // characters are translated from the CRLF character sequences. + // Thus, in this mode, we also need to subtract the number of + // newlines in the get area from the offset. + // + // Note that this approach only works for "canonical" Windows text + // files. Specifically, if there are newlines not preceded with the + // CR character then we may end up in the wrong place. It seems that + // there is no reasonable solution for this problem, and neither of + // the MSVC's or MinGW's std::ifstream implementations handle this + // case properly. + // + + // The only way to query the current file descriptor mode is to + // reset it and use the result (see fdmode() for details). + // + fdstream_mode fm (fdmode (fd_.get (), fdstream_mode::text)); + + // Note: the fdstream_mode::blocking flag is also set. + // + if ((fm & fdstream_mode::text) == fdstream_mode::text) + n += count (gptr (), egptr (), '\n'); + else + fdmode (fd_.get (), fm); // Restore the mode if it was changed. +#endif + + // Note that ifdstream::tellg() implicitly calls seekoff(0,ios::cur) + // (see above). Let's not reset the get area for such noop seeks. + // + if (off == 0) + return static_cast<off_type> ( + fdseek (fd_.get (), 0, fdseek_mode::cur) - n); + + off -= n; + } + + // Reset the get area. + // + setg (buf_, buf_, buf_); + break; + } + default: return static_cast<off_type> (-1); + } + + // Note that on Windows in the text mode the logical offset (number of + // read/written bytes) is likely to be screwed up due to newlines + // translation (see above). + // + off_ = fdseek (fd_.get (), off, m); + + return static_cast<off_type> (off_); + } + inline static bool flag (fdstream_mode m, fdstream_mode flag) { @@ -784,7 +969,7 @@ namespace butl } uint64_t - fdseek (int fd, uint64_t o, fdseek_mode fdm) + fdseek (int fd, int64_t o, fdseek_mode fdm) { int m (-1); @@ -800,7 +985,7 @@ namespace butl if (r == static_cast<off_t> (-1)) throw_generic_ios_failure (errno); #else - __int64 r (_lseeki64 (fd, static_cast<__int64> (o), m)); + __int64 r (_lseeki64 (fd, o, m)); if (r == -1) throw_generic_ios_failure (errno); #endif diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.mxx index dd7418e..ff96e2e 100644 --- a/libbutl/fdstream.mxx +++ b/libbutl/fdstream.mxx @@ -108,11 +108,14 @@ LIBBUTL_MODEXPORT namespace butl // - char only // - input or output but not both (can use a union of two streams for that) // - no support for put back - // - no support for tell[gp]()/seek[gp]() (but see non-standard tellg() and - // tellp() in fdbuf) + // - use of tell[gp]() and seek[gp]() is discouraged on Windows for + // fdstreams opened in the text mode (see fdbuf::seekoff() implementation + // for reasoning and consider using non-standard tellg() and seekg() in + // fdbuf, instead) // - non-blocking file descriptor is supported only by showmanyc() function // and only on POSIX - // - throws ios::failure in case of open()/read()/write()/close() errors + // - throws ios::failure in case of open(), read(), write(), close(), + // seek[gp](), or tell[gp]() errors // - exception mask has at least badbit // - after catching an exception caused by badbit the stream is no longer // usable @@ -157,6 +160,9 @@ LIBBUTL_MODEXPORT namespace butl using int_type = base::int_type; using traits_type = base::traits_type; + using pos_type = base::pos_type; // std::streampos + using off_type = base::off_type; // std::streamoff + // basic_streambuf input interface. // public: @@ -174,9 +180,21 @@ LIBBUTL_MODEXPORT namespace butl // Return the (logical) position of the next byte to be read. // + // Note that on Windows when reading in the text mode the logical position + // may differ from the physical file descriptor position due to the CRLF + // character sequence translation. See the seekoff() implementation for + // more background on this issue. + // std::uint64_t tellg () const {return off_ - (egptr () - gptr ());} + // Seek to the (logical) position as if by reading the specified number of + // bytes from the beginning of the stream. Throw ios::failure on the + // underlying OS errors. + // + void + seekg (std::uint64_t); + private: bool load (); @@ -198,6 +216,15 @@ LIBBUTL_MODEXPORT namespace butl std::uint64_t tellp () const {return off_ + (pptr () - buf_);} + // basic_streambuf positioning interface (both input/output). + // + public: + virtual pos_type + seekpos (pos_type, std::ios_base::openmode); + + virtual pos_type + seekoff (off_type, std::ios_base::seekdir, std::ios_base::openmode); + private: bool save (); @@ -741,7 +768,7 @@ LIBBUTL_MODEXPORT namespace butl enum class fdseek_mode {set, cur, end}; LIBBUTL_SYMEXPORT std::uint64_t - fdseek (int, std::uint64_t, fdseek_mode); + fdseek (int, std::int64_t, fdseek_mode); // Truncate or expand the file to the specified size. Throw ios::failure on // the underlying OS error. |