aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKaren Arutyunov <karen@codesynthesis.com>2018-12-09 01:18:10 +0300
committerKaren Arutyunov <karen@codesynthesis.com>2018-12-15 17:27:56 +0300
commit5bff24a8862f61e40f827591be5c81228efab4c6 (patch)
treecf509c18ecae91150ac51d1eea036dfa1d30eb5d
parentcc8b52be1e02802ef82ff474721d78815ab4e63a (diff)
Add support for fdstream positioning
-rw-r--r--libbutl/fdstream.cxx199
-rw-r--r--libbutl/fdstream.mxx35
-rw-r--r--tests/fdstream/driver.cxx280
3 files changed, 503 insertions, 11 deletions
diff --git a/libbutl/fdstream.cxx b/libbutl/fdstream.cxx
index 26d5da3..f417c6a 100644
--- a/libbutl/fdstream.cxx
+++ b/libbutl/fdstream.cxx
@@ -27,6 +27,8 @@
# include <sys/stat.h> // S_I*
# include <wchar.h> // wcsncmp(), wcsstr()
+
+# include <algorithm> // count()
#endif
#include <cassert>
@@ -175,6 +177,14 @@ namespace butl
return r;
}
+#ifdef _WIN32
+ static inline int
+ read (int fd, void* buf, size_t n)
+ {
+ return _read (fd, buf, static_cast<unsigned int> (n));
+ }
+#endif
+
bool fdbuf::
load ()
{
@@ -182,11 +192,7 @@ namespace butl
//
assert (!non_blocking_);
-#ifndef _WIN32
- ssize_t n (read (fd_.get (), buf_, sizeof (buf_)));
-#else
- int n (_read (fd_.get (), buf_, sizeof (buf_)));
-#endif
+ auto n (read (fd_.get (), buf_, sizeof (buf_)));
if (n == -1)
throw_generic_ios_failure (errno);
@@ -196,6 +202,41 @@ namespace butl
return n != 0;
}
+ void fdbuf::
+ seekg (uint64_t off)
+ {
+ // In the future we may implement the blocking behavior for a non-blocking
+ // file descriptor.
+ //
+ if (non_blocking_)
+ throw_generic_ios_failure (ENOTSUP);
+
+ // The plan is to rewind to the beginning of the stream, read the
+ // requested number of characters and reset the get area, so it will be
+ // filled from scratch on the next read from the stream.
+ //
+ fdseek (fd_.get (), 0, fdseek_mode::set);
+
+ for (uint64_t n (off); n != 0; )
+ {
+ size_t m (n > sizeof (buf_) ? sizeof (buf_) : static_cast<size_t> (n));
+ auto r (read (fd_.get (), buf_, m));
+
+ if (r == -1)
+ throw_generic_ios_failure (errno);
+
+ // Fail if trying to seek beyond the end of the stream.
+ //
+ if (r == 0)
+ throw_generic_ios_failure (EINVAL);
+
+ n -= r;
+ }
+
+ off_ = off;
+ setg (buf_, buf_, buf_);
+ }
+
fdbuf::int_type fdbuf::
overflow (int_type c)
{
@@ -429,6 +470,150 @@ namespace butl
#endif
}
+ // Common call chains:
+ //
+ // - basic_ostream::seekp(pos) ->
+ // basic_streambuf::pubseekpos(pos, ios::out) ->
+ // fdbuf::seekpos(pos, ios::out)
+ //
+ // - basic_istream::seekg(pos) ->
+ // basic_streambuf::pubseekpos(pos, ios::in) ->
+ // fdbuf::seekpos(pos, ios::in)
+ //
+ fdbuf::pos_type fdbuf::
+ seekpos (pos_type pos, ios_base::openmode which)
+ {
+ // Note that the position type provides an explicit conversion to the
+ // numeric offset type (see std::fpos for details). The position state is
+ // disregarded in this case, which is ok since we don't mess with the
+ // multibyte character conversions.
+ //
+ return seekoff (static_cast<off_type> (pos), ios_base::beg, which);
+ }
+
+ // Common call chains:
+ //
+ // - basic_ostream::seekp(off, dir) ->
+ // basic_streambuf::pubseekoff(off, dir, ios::out) ->
+ // fdbuf::seekoff(off, dir, ios::out)
+ //
+ // - basic_ostream::tellp() ->
+ // basic_streambuf::pubseekoff(0, ios::cur, ios::out) ->
+ // fdbuf::seekoff(0, ios::cur, ios::out)
+ //
+ // - basic_istream::seekg(off, dir) ->
+ // basic_streambuf::pubseekoff(off, dir, ios::in) ->
+ // fdbuf::seekoff(off, dir, ios::in)
+ //
+ // - basic_istream::tellg() ->
+ // basic_streambuf::pubseekoff(0, ios::cur, ios::in) ->
+ // fdbuf::seekoff(0, ios::cur, ios::in)
+ //
+ fdbuf::pos_type fdbuf::
+ seekoff (off_type off, ios_base::seekdir dir, ios_base::openmode which)
+ {
+ // The seekoff() function interface doesn't support the non-blocking
+ // semantics since being unable to serialize the character in write mode
+ // is supposed to be an error. Also the non-blocking mode is likely to be
+ // used for non-seekable file descriptors (pipes, etc.). In the future we
+ // may implement the blocking behavior for a non-blocking file descriptor.
+ //
+ if (non_blocking_)
+ throw_generic_ios_failure (ENOTSUP);
+
+ // Translate ios_base value to to fdseek_mode.
+ //
+ fdseek_mode m;
+ switch (dir)
+ {
+ case ios_base::beg: m = fdseek_mode::set; break;
+ case ios_base::cur: m = fdseek_mode::cur; break;
+ case ios_base::end: m = fdseek_mode::end; break;
+ default: assert (false);
+ }
+
+ // Prior to fdseek() call we will flush the buffer for the write mode,
+ // reset the get area for the read mode, and fail otherwise. Note that we
+ // don't support the read/write mode.
+ //
+ // Note that the return (position) type is implicitly constructible from
+ // the numeric offset type (see std::fpos for details).
+ //
+ switch (which)
+ {
+ case ios_base::out:
+ {
+ // Fail if unable to fully flush the buffer (for example, because the
+ // device is full).
+ //
+ if (!save ())
+ return static_cast<off_type> (-1);
+
+ break;
+ }
+ case ios_base::in:
+ {
+ // We may have unread data in the get area and need to subtract its
+ // size from the offset if we seek from the current position.
+ //
+ if (dir == ios_base::cur)
+ {
+ off_type n (egptr () - gptr ()); // Get area size.
+
+#ifdef _WIN32
+ // Note that on Windows, when reading in the text mode, newline
+ // characters are translated from the CRLF character sequences.
+ // Thus, in this mode, we also need to subtract the number of
+ // newlines in the get area from the offset.
+ //
+ // Note that this approach only works for "canonical" Windows text
+ // files. Specifically, if there are newlines not preceded with the
+ // CR character then we may end up in the wrong place. It seems that
+ // there is no reasonable solution for this problem, and neither of
+ // the MSVC's or MinGW's std::ifstream implementations handle this
+ // case properly.
+ //
+
+ // The only way to query the current file descriptor mode is to
+ // reset it and use the result (see fdmode() for details).
+ //
+ fdstream_mode fm (fdmode (fd_.get (), fdstream_mode::text));
+
+ // Note: the fdstream_mode::blocking flag is also set.
+ //
+ if ((fm & fdstream_mode::text) == fdstream_mode::text)
+ n += count (gptr (), egptr (), '\n');
+ else
+ fdmode (fd_.get (), fm); // Restore the mode if it was changed.
+#endif
+
+ // Note that ifdstream::tellg() implicitly calls seekoff(0,ios::cur)
+ // (see above). Let's not reset the get area for such noop seeks.
+ //
+ if (off == 0)
+ return static_cast<off_type> (
+ fdseek (fd_.get (), 0, fdseek_mode::cur) - n);
+
+ off -= n;
+ }
+
+ // Reset the get area.
+ //
+ setg (buf_, buf_, buf_);
+ break;
+ }
+ default: return static_cast<off_type> (-1);
+ }
+
+ // Note that on Windows in the text mode the logical offset (number of
+ // read/written bytes) is likely to be screwed up due to newlines
+ // translation (see above).
+ //
+ off_ = fdseek (fd_.get (), off, m);
+
+ return static_cast<off_type> (off_);
+ }
+
inline static bool
flag (fdstream_mode m, fdstream_mode flag)
{
@@ -784,7 +969,7 @@ namespace butl
}
uint64_t
- fdseek (int fd, uint64_t o, fdseek_mode fdm)
+ fdseek (int fd, int64_t o, fdseek_mode fdm)
{
int m (-1);
@@ -800,7 +985,7 @@ namespace butl
if (r == static_cast<off_t> (-1))
throw_generic_ios_failure (errno);
#else
- __int64 r (_lseeki64 (fd, static_cast<__int64> (o), m));
+ __int64 r (_lseeki64 (fd, o, m));
if (r == -1)
throw_generic_ios_failure (errno);
#endif
diff --git a/libbutl/fdstream.mxx b/libbutl/fdstream.mxx
index dd7418e..ff96e2e 100644
--- a/libbutl/fdstream.mxx
+++ b/libbutl/fdstream.mxx
@@ -108,11 +108,14 @@ LIBBUTL_MODEXPORT namespace butl
// - char only
// - input or output but not both (can use a union of two streams for that)
// - no support for put back
- // - no support for tell[gp]()/seek[gp]() (but see non-standard tellg() and
- // tellp() in fdbuf)
+ // - use of tell[gp]() and seek[gp]() is discouraged on Windows for
+ // fdstreams opened in the text mode (see fdbuf::seekoff() implementation
+ // for reasoning and consider using non-standard tellg() and seekg() in
+ // fdbuf, instead)
// - non-blocking file descriptor is supported only by showmanyc() function
// and only on POSIX
- // - throws ios::failure in case of open()/read()/write()/close() errors
+ // - throws ios::failure in case of open(), read(), write(), close(),
+ // seek[gp](), or tell[gp]() errors
// - exception mask has at least badbit
// - after catching an exception caused by badbit the stream is no longer
// usable
@@ -157,6 +160,9 @@ LIBBUTL_MODEXPORT namespace butl
using int_type = base::int_type;
using traits_type = base::traits_type;
+ using pos_type = base::pos_type; // std::streampos
+ using off_type = base::off_type; // std::streamoff
+
// basic_streambuf input interface.
//
public:
@@ -174,9 +180,21 @@ LIBBUTL_MODEXPORT namespace butl
// Return the (logical) position of the next byte to be read.
//
+ // Note that on Windows when reading in the text mode the logical position
+ // may differ from the physical file descriptor position due to the CRLF
+ // character sequence translation. See the seekoff() implementation for
+ // more background on this issue.
+ //
std::uint64_t
tellg () const {return off_ - (egptr () - gptr ());}
+ // Seek to the (logical) position as if by reading the specified number of
+ // bytes from the beginning of the stream. Throw ios::failure on the
+ // underlying OS errors.
+ //
+ void
+ seekg (std::uint64_t);
+
private:
bool
load ();
@@ -198,6 +216,15 @@ LIBBUTL_MODEXPORT namespace butl
std::uint64_t
tellp () const {return off_ + (pptr () - buf_);}
+ // basic_streambuf positioning interface (both input/output).
+ //
+ public:
+ virtual pos_type
+ seekpos (pos_type, std::ios_base::openmode);
+
+ virtual pos_type
+ seekoff (off_type, std::ios_base::seekdir, std::ios_base::openmode);
+
private:
bool
save ();
@@ -741,7 +768,7 @@ LIBBUTL_MODEXPORT namespace butl
enum class fdseek_mode {set, cur, end};
LIBBUTL_SYMEXPORT std::uint64_t
- fdseek (int, std::uint64_t, fdseek_mode);
+ fdseek (int, std::int64_t, fdseek_mode);
// Truncate or expand the file to the specified size. Throw ios::failure on
// the underlying OS error.
diff --git a/tests/fdstream/driver.cxx b/tests/fdstream/driver.cxx
index 675a66e..01da9fb 100644
--- a/tests/fdstream/driver.cxx
+++ b/tests/fdstream/driver.cxx
@@ -482,6 +482,286 @@ main (int argc, const char* argv[])
#endif
+ // Test setting and getting position via the non-standard fdbuf interface.
+ //
+ // Seek for read.
+ //
+ {
+ to_file (f, "012\n3\n4567", fdopen_mode::truncate);
+
+ ifdstream is (f);
+
+ fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ assert (buf != nullptr);
+
+ char c;
+ for (size_t i (0); i < 7; ++i)
+ is.get (c);
+
+ uint64_t p (buf->tellg ());
+ assert (p == 7);
+
+ is.get (c);
+ assert (c == '5');
+
+ buf->seekg (p);
+ assert (buf->tellg () == p);
+
+ is.get (c);
+ assert (c == '5');
+
+ // Can't seek beyond the end of the stream.
+ //
+ try
+ {
+ buf->seekg (20);
+ assert (false);
+ }
+ catch (const ios::failure&) {}
+ }
+
+ // Seek for write.
+ //
+ {
+ // Let's test replacing the '3' fragment with 'XYZ' in the following file.
+ //
+ to_file (f, "012\n3\n4567", fdopen_mode::truncate);
+
+ auto_fd fd;
+ string suffix;
+ size_t p (4); // Logical position of the fragment being replaced.
+
+ {
+ ifdstream is (f, fdopen_mode::in | fdopen_mode::out);
+
+ fdbuf* buf (dynamic_cast<fdbuf*> (is.rdbuf ()));
+ assert (buf != nullptr);
+
+ // Read till the end of the fragment.
+ //
+ char c;
+ for (size_t i (0); i < p + 1; ++i)
+ is.get (c);
+
+ assert (c == '3');
+
+ // Read the suffix.
+ //
+ suffix = is.read_text ();
+ assert (suffix == "\n4567");
+
+ // Seek to the beginning of the fragment and detach the file descriptor.
+ //
+ buf->seekg (p);
+ fd = is.release ();
+ }
+
+ // Rewrite the fragment.
+ //
+ // Note that on Windows in the text mode the logical position differs from
+ // the file descriptor position, so we need to query the later one to
+ // truncate the file.
+ //
+ fdtruncate (fd.get (), fdseek (fd.get (), 0, fdseek_mode::cur));
+
+ ofdstream os (move (fd), ofdstream::badbit | ofdstream::failbit, p);
+
+ os << "XYZ" << suffix;
+ os.close ();
+
+ assert (from_file (f) == "012\nXYZ\n4567");
+ }
+
+ // Test setting and getting position via the standard [io]stream interface.
+ //
+ to_file (f, "0123456789", fdopen_mode::truncate);
+
+ // Seek for read.
+ //
+ {
+ ifdstream is (f);
+
+ char c;
+ is.get (c);
+
+ is.seekg (5, ios::beg);
+ is.get (c);
+ assert (c == '5');
+
+ is.seekg (2, ios::cur);
+
+ assert (static_cast<streamoff> (is.tellg ()) == 8);
+
+ const fdbuf* buf (dynamic_cast<const fdbuf*> (is.rdbuf ()));
+ assert (buf != nullptr && buf->tellg () == 8);
+
+ assert (from_stream (is) == "89");
+ }
+
+ // Seek for write.
+ //
+ {
+ ofdstream os (f, fdopen_mode::out);
+ os.seekp (4, ios::beg);
+ os << "ABC";
+ os.seekp (-4, ios::end);
+ os << "XYZ";
+ os.seekp (-8, ios::cur);
+ os << 'C';
+
+ assert (static_cast<streamoff> (os.tellp ()) == 2);
+
+ const fdbuf* buf (dynamic_cast<const fdbuf*> (os.rdbuf ()));
+ assert (buf != nullptr && buf->tellp () == 2);
+
+ os.close ();
+ assert (from_file (f) == "0C23ABXYZ9");
+ }
+
+#ifdef _WIN32
+
+ // Test handling newline characters on Windows while setting and getting
+ // position via the standard [io]stream interface.
+ //
+ // Save the string in the text mode, so the newline character is translated
+ // into the 0xD, 0xA character sequence on Windows.
+ //
+ to_file (f, "01234\n56789", fdopen_mode::truncate);
+
+ // Seek for read in the text mode.
+ //
+ {
+ ifdstream is (f);
+
+ char c;
+ is.get (c);
+
+ is.seekg (2, ios::cur);
+ is.get (c);
+
+ assert (c == '3');
+
+ is.seekg (4, ios::cur);
+
+ assert (static_cast<streamoff> (is.tellg ()) == 8);
+ assert (from_stream (is) == "6789");
+ }
+
+ // Seek for read in the binary mode.
+ //
+ {
+ ifdstream is (f, ios::binary);
+
+ char c;
+ is.get (c);
+
+ is.seekg (2, ios::cur);
+ is.get (c);
+
+ assert (c == '3');
+
+ is.seekg (4, ios::cur);
+
+ assert (static_cast<streamoff> (is.tellg ()) == 8);
+
+ const fdbuf* buf (dynamic_cast<const fdbuf*> (is.rdbuf ()));
+ assert (buf != nullptr && buf->tellp () == 8);
+
+ assert (from_stream (is) == "6789");
+ }
+
+ // Research the positioning misbehavior of std::ifstream object opened
+ // in the text mode on Windows.
+ //
+#if 0
+
+ to_file (f, "012\r\n3\n4567", fdopen_mode::truncate | fdopen_mode::binary);
+
+ {
+ ifstream is (f.string ());
+// ifdstream is (f);
+
+ char c1;
+ for (size_t i (0); i < 2; ++i)
+ is.get (c1);
+
+ is.seekg (6, ios::cur);
+
+ streamoff p1 (is.tellg ());
+
+ is.get (c1);
+
+ cout << "c1: '" << c1 << "' pos " << p1 << endl;
+
+ char c2;
+ is.seekg (8, ios::beg);
+
+ streamoff p2 (is.tellg ());
+ is.get (c2);
+
+ cout << "c2: '" << c2 << "' pos " << p2 << endl;
+
+ // One could expect the positions and characters to match, but:
+ //
+ // VC's ifstream and ifdstream end up with:
+ //
+ // c1: '4' pos 7
+ // c2: '5' pos 8
+ //
+ // MinGW's ifstream ends up with:
+ //
+ // c1: '6' pos 9
+ // c2: '5' pos 8
+ //
+ // These assertions fail for all implementations:
+ //
+ // assert (p1 == p2);
+ // assert (c1 == c2);
+ }
+
+ {
+ ifstream is (f.string ());
+// ifdstream is (f);
+
+ char c1;
+ for (size_t i (0); i < 2; ++i)
+ is.get (c1);
+
+ auto p1 (is.tellg ());
+ is.get (c1);
+
+ cout << "c1: '" << c1 << "' pos " << p1 << endl;
+
+ is.seekg (p1, ios::beg);
+
+ auto p2 (is.tellg ());
+
+ char c2;
+ is.get (c2);
+
+ cout << "c2: '" << c2 << "' pos " << p2 << endl;
+
+ // One could expect the positions and characters to match, but:
+ //
+ // VC's ifstream and ifdstream end up with:
+ //
+ // c1: '2' pos 1
+ // c2: '1' pos 1
+ //
+ // MinGW's ifstream ends up with:
+ //
+ // c1: '2' pos 3
+ // c2: '\n' pos 3
+ //
+ // This assertion fails for all implementations:
+ //
+ // assert (c1 == c2);
+ }
+
+#endif
+
+#endif
+
// Test pipes.
//
// Here we rely on buffering being always enabled for pipes.