// file : libbuild2/depdb.cxx -*- C++ -*- // license : MIT; see accompanying LICENSE file #include <libbuild2/depdb.hxx> #ifdef _WIN32 # include <libbutl/win32-utility.hxx> #endif #include <libbuild2/filesystem.hxx> // mtime() #include <libbuild2/diagnostics.hxx> using namespace std; using namespace butl; namespace build2 { // Note that state::write with absent pos is interpreted as non-existent. // depdb_base:: depdb_base (const path& p, state s, optional<uint64_t> pos) : state_ (s) { fdopen_mode om (fdopen_mode::out | fdopen_mode::binary); ifdstream::iostate em (ifdstream::badbit); if (s == state::write) { if (!pos) om |= fdopen_mode::create | fdopen_mode::exclusive; em |= ifdstream::failbit; } else om |= fdopen_mode::in; // Both in & out so can switch from read to write. auto_fd fd; try { fd = fdopen (p, om); } catch (const io_error&) { bool c (s == state::write && !pos); diag_record dr (fail); dr << "unable to " << (c ? "create " : "open ") << p; if (c) dr << info << "did you forget to add fsdir{} prerequisite for " << "output directory?"; dr << endf; } if (pos) try { fdseek (fd.get (), *pos, fdseek_mode::set); } catch (const io_error& e) { fail << "unable to rewind " << p << ": " << e; } // Open the corresponding stream. Note that if we throw after that, the // corresponding member will not be destroyed. This is the reason for the // depdb/base split. // if (state_ == state::read) { new (&is_) ifdstream (move (fd), em); buf_ = static_cast<fdstreambuf*> (is_.rdbuf ()); } else { new (&os_) ofdstream (move (fd), em, pos ? *pos : 0); buf_ = static_cast<fdstreambuf*> (os_.rdbuf ()); } } depdb:: depdb (path_type&& p, timestamp mt) : depdb_base (p, mt != timestamp_nonexistent ? state::read : state::write), path (move (p)), mtime (mt != timestamp_nonexistent ? mt : timestamp_unknown) { // Read/write the database format version. // if (state_ == state::read) { string* l (read ()); if (l == nullptr || *l != "1") write ('1'); } else write ('1'); } depdb:: depdb (path_type p) : depdb (move (p), build2::mtime (p)) { } depdb:: depdb (reopen_state rs) : depdb_base (rs.path, state::write, rs.pos), path (move (rs.path)), mtime (timestamp_unknown), touch (rs.mtime) { } void depdb:: change (bool trunc) { assert (state_ != state::write); // Transfer the file descriptor from ifdstream to ofdstream. Note that the // steps in this dance must be carefully ordered to make sure we don't // call any destructors twice in the face of exceptions. // auto_fd fd (is_.release ()); // Consider this scenario: we are overwriting an old line (so it ends with // a newline and the "end marker") but the operation failed half way // through. Now we have the prefix from the new line, the suffix from the // old, and everything looks valid. So what we need is to somehow // invalidate the old content so that it can never combine with (partial) // new content to form a valid line. One way to do that would be to // truncate the file. // if (trunc) try { fdtruncate (fd.get (), pos_); } catch (const io_error& e) { fail << "unable to truncate " << path << ": " << e; } // Note: the file descriptor position can be beyond the pos_ value due to // the ifdstream buffering. That's why we need to seek to switch from // reading to writing. // try { fdseek (fd.get (), pos_, fdseek_mode::set); } catch (const io_error& e) { fail << "unable to rewind " << path << ": " << e; } // @@ Strictly speaking, ofdstream can throw which will leave us in a // non-destructible state. Unlikely but possible. // is_.~ifdstream (); new (&os_) ofdstream (move (fd), ofdstream::badbit | ofdstream::failbit, pos_); buf_ = static_cast<fdstreambuf*> (os_.rdbuf ()); state_ = state::write; mtime = timestamp_unknown; } string* depdb:: read_ () { // Save the start position of this line so that we can overwrite it. // pos_ = buf_->tellg (); try { // Note that we intentionally check for eof after updating the write // position. // if (state_ == state::read_eof) return nullptr; getline (is_, line_); // Calls line_.erase(). // The line should always end with a newline. If it doesn't, then this // line (and the rest of the database) is assumed corrupted. Also peek // at the character after the newline. We should either have the next // line or '\0', which is our "end marker", that is, it indicates the // database was properly closed. // ifdstream::int_type c; if (is_.fail () || // Nothing got extracted. is_.eof () || // Eof reached before delimiter. (c = is_.peek ()) == ifdstream::traits_type::eof ()) { // Preemptively switch to writing. While we could have delayed this // until the user called write(), if the user calls read() again (for // whatever misguided reason) we will mess up the overwrite position. // change (); return nullptr; } // Handle the "end marker". Note that the caller can still switch to the // write mode on this line. And, after calling read() again, write to // the next line (i.e., start from the "end marker"). // if (c == '\0') state_ = state::read_eof; } catch (const io_error& e) { fail << "unable to read from " << path << ": " << e; } return &line_; } bool depdb:: skip () { if (state_ == state::read_eof) return true; assert (state_ == state::read); // The rest is pretty similar in logic to read_() above. // pos_ = buf_->tellg (); try { // Keep reading lines checking for the end marker after each newline. // ifdstream::int_type c; do { if ((c = is_.get ()) == '\n') { if ((c = is_.get ()) == '\0') { state_ = state::read_eof; return true; } } } while (c != ifdstream::traits_type::eof ()); } catch (const io_error& e) { fail << "unable to read from " << path << ": " << e; } // Invalid database so change over to writing. // change (); return false; } void depdb:: write (const char* s, size_t n, bool nl) { // Switch to writing if we are still reading. // if (state_ != state::write) change (); try { os_.write (s, static_cast<streamsize> (n)); if (nl) os_.put ('\n'); } catch (const io_error& e) { fail << "unable to write to " << path << ": " << e; } } void depdb:: write (char c, bool nl) { // Switch to writing if we are still reading. // if (state_ != state::write) change (); try { os_.put (c); if (nl) os_.put ('\n'); } catch (const io_error& e) { fail << "unable to write to " << path << ": " << e; } } void depdb:: close (bool mc) { // If we are at eof, then it means all lines are good, there is the "end // marker" at the end, and we don't need to do anything, except, maybe // touch the file. Otherwise, if we are still in the read mode, truncate // the rest, and then add the "end marker" (we cannot have anything in the // write mode since we truncate in change()). // // Note that we handle touch with timestamp_unknown specially by making a // modification to the file (which happens naturally in the write mode) // and letting the filesystem update its mtime. // if (state_ == state::read_eof) { if (!touch) try { is_.close (); return; } catch (const io_error& e) { fail << "unable to close " << path << ": " << e; } // While there are utime(2)/utimensat(2) (and probably something similar // for Windows), for now we just overwrite the "end marker". Hopefully // no implementation will be smart enough to recognize this is a no-op // and skip updating mtime (which would probably be incorrect, spec- // wise). And this could even be faster since we already have the file // descriptor. Or it might be slower since so far we've only been // reading. // // Note also that utime() on Windows is a bad idea (see touch_file() for // details). // if (*touch == timestamp_unknown) { pos_ = buf_->tellg (); // The last line is accepted. change (false /* truncate */); // Write end marker below. } } else if (state_ != state::write) { pos_ = buf_->tellg (); // The last line is accepted. change (true /* truncate */); } if (mc && mtime_check ()) start_ = system_clock::now (); if (state_ == state::write) try { os_.put ('\0'); // The "end marker". os_.close (); } catch (const io_error& e) { fail << "unable to flush file " << path << ": " << e; } if (touch && *touch != timestamp_unknown) try { file_mtime (path, *touch); } catch (const system_error& e) { fail << "unable to touch file " << path << ": " << e; } // On some platforms (currently confirmed on FreeBSD running as VMs) one // can sometimes end up with a modification time that is a bit after the // call to close(). And in some tight cases this can mess with our // "protocol" that a valid depdb should be no older than the target it is // for. // // Note that this does not seem to be related to clock adjustments but // rather feels like the modification time is set when the changes // actually hit some lower-level layer (e.g., OS or filesystem // driver). One workaround that appears to work is to query the // mtime. This seems to force that layer to commit to a timestamp. // #if defined(__FreeBSD__) mtime = build2::mtime (path); // Save for debugging/check below. #endif } depdb::reopen_state depdb:: close_to_reopen () { assert (!touch); if (state_ != state::write) { pos_ = buf_->tellg (); // The last line is accepted. change (state_ != state::read_eof /* truncate */); } pos_ = buf_->tellp (); try { os_.put ('\0'); // The "end marker". os_.close (); } catch (const io_error& e) { fail << "unable to flush file " << path << ": " << e; } // Note: must still be done for FreeBSD if changing anything here (see // close() for details). // mtime = build2::mtime (path); return reopen_state {move (path), pos_, mtime}; } void depdb:: check_mtime_ (const path_type& t, timestamp e) { // We could call the static version but then we would have lost additional // information for some platforms. // timestamp t_mt (build2::mtime (t)); if (t_mt == timestamp_nonexistent) fail << "target file " << t << " does not exist at the end of recipe"; timestamp d_mt (build2::mtime (path)); if (d_mt > t_mt) { if (e == timestamp_unknown) e = system_clock::now (); fail << "backwards modification times detected:\n" << " " << start_ << " sequence start\n" #if defined(__FreeBSD__) << " " << mtime << " close mtime\n" #endif << " " << d_mt << " " << path.string () << '\n' << " " << t_mt << " " << t.string () << '\n' << " " << e << " sequence end"; } } void depdb:: check_mtime_ (timestamp s, const path_type& d, const path_type& t, timestamp e) { using build2::mtime; timestamp t_mt (mtime (t)); if (t_mt == timestamp_nonexistent) fail << "target file " << t << " does not exist at the end of recipe"; timestamp d_mt (mtime (d)); if (d_mt > t_mt) { fail << "backwards modification times detected:\n" << " " << s << " sequence start\n" << " " << d_mt << " " << d.string () << '\n' << " " << t_mt << " " << t.string () << '\n' << " " << e << " sequence end"; } } }