aboutsummaryrefslogtreecommitdiff
path: root/libbuild2/test/script
diff options
context:
space:
mode:
Diffstat (limited to 'libbuild2/test/script')
-rw-r--r--libbuild2/test/script/builtin.cxx1979
-rw-r--r--libbuild2/test/script/builtin.hxx74
-rw-r--r--libbuild2/test/script/lexer+command-expansion.test.testscript248
-rw-r--r--libbuild2/test/script/lexer+command-line.test.testscript208
-rw-r--r--libbuild2/test/script/lexer+description-line.test.testscript33
-rw-r--r--libbuild2/test/script/lexer+first-token.test.testscript97
-rw-r--r--libbuild2/test/script/lexer+second-token.test.testscript68
-rw-r--r--libbuild2/test/script/lexer+variable-line.test.testscript28
-rw-r--r--libbuild2/test/script/lexer+variable.test.testscript70
-rw-r--r--libbuild2/test/script/lexer.cxx551
-rw-r--r--libbuild2/test/script/lexer.hxx94
-rw-r--r--libbuild2/test/script/lexer.test.cxx85
-rw-r--r--libbuild2/test/script/parser+cleanup.test.testscript58
-rw-r--r--libbuild2/test/script/parser+command-if.test.testscript548
-rw-r--r--libbuild2/test/script/parser+command-re-parse.test.testscript12
-rw-r--r--libbuild2/test/script/parser+description.test.testscript486
-rw-r--r--libbuild2/test/script/parser+directive.test.testscript74
-rw-r--r--libbuild2/test/script/parser+exit.test.testscript27
-rw-r--r--libbuild2/test/script/parser+expansion.test.testscript36
-rw-r--r--libbuild2/test/script/parser+here-document.test.testscript213
-rw-r--r--libbuild2/test/script/parser+here-string.test.testscript19
-rw-r--r--libbuild2/test/script/parser+include.test.testscript104
-rw-r--r--libbuild2/test/script/parser+pipe-expr.test.testscript133
-rw-r--r--libbuild2/test/script/parser+pre-parse.test.testscript23
-rw-r--r--libbuild2/test/script/parser+redirect.test.testscript356
-rw-r--r--libbuild2/test/script/parser+regex.test.testscript223
-rw-r--r--libbuild2/test/script/parser+scope-if.test.testscript554
-rw-r--r--libbuild2/test/script/parser+scope.test.testscript280
-rw-r--r--libbuild2/test/script/parser+setup-teardown.test.testscript151
-rw-r--r--libbuild2/test/script/parser.cxx3451
-rw-r--r--libbuild2/test/script/parser.hxx250
-rw-r--r--libbuild2/test/script/parser.test.cxx245
-rw-r--r--libbuild2/test/script/regex.cxx440
-rw-r--r--libbuild2/test/script/regex.hxx703
-rw-r--r--libbuild2/test/script/regex.ixx35
-rw-r--r--libbuild2/test/script/regex.test.cxx302
-rw-r--r--libbuild2/test/script/runner.cxx1891
-rw-r--r--libbuild2/test/script/runner.hxx101
-rw-r--r--libbuild2/test/script/script.cxx741
-rw-r--r--libbuild2/test/script/script.hxx559
-rw-r--r--libbuild2/test/script/script.ixx60
-rw-r--r--libbuild2/test/script/token.cxx57
-rw-r--r--libbuild2/test/script/token.hxx65
43 files changed, 15732 insertions, 0 deletions
diff --git a/libbuild2/test/script/builtin.cxx b/libbuild2/test/script/builtin.cxx
new file mode 100644
index 0000000..ab57d4f
--- /dev/null
+++ b/libbuild2/test/script/builtin.cxx
@@ -0,0 +1,1979 @@
+// file : libbuild2/test/script/builtin.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/builtin.hxx>
+
+#include <chrono>
+#include <locale>
+#include <ostream>
+#include <sstream>
+#include <cstdlib> // strtoull()
+
+#include <libbutl/regex.mxx>
+#include <libbutl/path-io.mxx> // use default operator<< implementation
+#include <libbutl/fdstream.mxx> // fdopen_mode, fdstream_mode
+#include <libbutl/filesystem.mxx>
+
+#include <libbuild2/context.hxx> // sched
+
+#include <libbuild2/test/script/script.hxx>
+
+// Strictly speaking a builtin which reads/writes from/to standard streams
+// must be asynchronous so that the caller can communicate with it through
+// pipes without being blocked on I/O operations. However, as an optimization,
+// we allow builtins that only print diagnostics to STDERR to be synchronous
+// assuming that their output will always fit the pipe buffer. Synchronous
+// builtins must not read from STDIN and write to STDOUT. Later we may relax
+// this rule to allow a "short" output for such builtins.
+//
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ using builtin_impl = uint8_t (scope&,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err);
+
+ // Operation failed, diagnostics has already been issued.
+ //
+ struct failed {};
+
+ // Accumulate an error message, print it atomically in dtor to the
+ // provided stream and throw failed afterwards if requested. Prefixes
+ // the message with the builtin name.
+ //
+ // Move constructible-only, not assignable (based to diag_record).
+ //
+ class error_record
+ {
+ public:
+ template <typename T>
+ friend const error_record&
+ operator<< (const error_record& r, const T& x)
+ {
+ r.ss_ << x;
+ return r;
+ }
+
+ error_record (ostream& o, bool fail, const char* name)
+ : os_ (o), fail_ (fail), empty_ (false)
+ {
+ ss_ << name << ": ";
+ }
+
+ // Older versions of libstdc++ don't have the ostringstream move
+ // support. Luckily, GCC doesn't seem to be actually needing move due
+ // to copy/move elision.
+ //
+#ifdef __GLIBCXX__
+ error_record (error_record&&);
+#else
+ error_record (error_record&& r)
+ : os_ (r.os_),
+ ss_ (move (r.ss_)),
+ fail_ (r.fail_),
+ empty_ (r.empty_)
+ {
+ r.empty_ = true;
+ }
+#endif
+
+ ~error_record () noexcept (false)
+ {
+ if (!empty_)
+ {
+ // The output stream can be in a bad state (for example as a
+ // result of unsuccessful attempt to report a previous error), so
+ // we check it.
+ //
+ if (os_.good ())
+ {
+ ss_.put ('\n');
+ os_ << ss_.str ();
+ os_.flush ();
+ }
+
+ if (fail_)
+ throw failed ();
+ }
+ }
+
+ private:
+ ostream& os_;
+ mutable ostringstream ss_;
+
+ bool fail_;
+ bool empty_;
+ };
+
+ // Parse and normalize a path. Also, unless it is already absolute, make
+ // the path absolute using the specified directory. Throw invalid_path
+ // if the path is empty, and on parsing and normalization failures.
+ //
+ static path
+ parse_path (string s, const dir_path& d)
+ {
+ path p (move (s));
+
+ if (p.empty ())
+ throw invalid_path ("");
+
+ if (p.relative ())
+ p = d / move (p);
+
+ p.normalize ();
+ return p;
+ }
+
+ // Builtin commands functions.
+ //
+
+ // cat <file>...
+ //
+ // Note that POSIX doesn't specify if after I/O operation failure the
+ // command should proceed with the rest of the arguments. The current
+ // implementation exits immediatelly in such a case.
+ //
+ // @@ Shouldn't we check that we don't print a nonempty regular file to
+ // itself, as that would merely exhaust the output device? POSIX
+ // allows (but not requires) such a check and some implementations do
+ // this. That would require to fstat() file descriptors and complicate
+ // the code a bit. Was able to reproduce on a big file (should be
+ // bigger than the stream buffer size) with the test
+ // 'cat file >+file'.
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ cat (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "cat");
+ };
+
+ try
+ {
+ ifdstream cin (move (in), fdstream_mode::binary);
+ ofdstream cout (move (out), fdstream_mode::binary);
+
+ // Copy input stream to STDOUT.
+ //
+ auto copy = [&cout] (istream& is)
+ {
+ if (is.peek () != ifdstream::traits_type::eof ())
+ cout << is.rdbuf ();
+
+ is.clear (istream::eofbit); // Sets eofbit.
+ };
+
+ // Path of a file being printed to STDOUT. An empty path represents
+ // STDIN. Used in diagnostics.
+ //
+ path p;
+
+ try
+ {
+ // Print STDIN.
+ //
+ if (args.empty ())
+ copy (cin);
+
+ // Print files.
+ //
+ for (auto i (args.begin ()); i != args.end (); ++i)
+ {
+ if (*i == "-")
+ {
+ if (!cin.eof ())
+ {
+ p.clear ();
+ copy (cin);
+ }
+
+ continue;
+ }
+
+ p = parse_path (*i, sp.wd_path);
+
+ ifdstream is (p, ifdstream::binary);
+ copy (is);
+ is.close ();
+ }
+ }
+ catch (const io_error& e)
+ {
+ error_record d (error ());
+ d << "unable to print ";
+
+ if (p.empty ())
+ d << "stdin";
+ else
+ d << "'" << p << "'";
+
+ d << ": " << e;
+ }
+
+ cin.close ();
+ cout.close ();
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while creating/closing cin, cout or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // Make a copy of a file at the specified path, preserving permissions,
+ // and registering a cleanup for a newly created file. The file paths
+ // must be absolute. Fail if an exception is thrown by the underlying
+ // copy operation.
+ //
+ static void
+ cpfile (scope& sp,
+ const path& from, const path& to,
+ bool overwrite,
+ bool attrs,
+ bool cleanup,
+ const function<error_record()>& fail)
+ {
+ try
+ {
+ bool exists (file_exists (to));
+
+ cpflags f (
+ overwrite
+ ? cpflags::overwrite_permissions | cpflags::overwrite_content
+ : cpflags::none);
+
+ if (attrs)
+ f |= cpflags::overwrite_permissions | cpflags::copy_timestamps;
+
+ cpfile (from, to, f);
+
+ if (!exists && cleanup)
+ sp.clean ({cleanup_type::always, to}, true);
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to copy file '" << from << "' to '" << to
+ << "': " << e;
+ }
+ }
+
+ // Make a copy of a directory at the specified path, registering a
+ // cleanup for the created directory. The directory paths must be
+ // absolute. Fail if the destination directory already exists or
+ // an exception is thrown by the underlying copy operation.
+ //
+ static void
+ cpdir (scope& sp,
+ const dir_path& from, const dir_path& to,
+ bool attrs,
+ bool cleanup,
+ const function<error_record()>& fail)
+ {
+ try
+ {
+ if (try_mkdir (to) == mkdir_status::already_exists)
+ throw_generic_error (EEXIST);
+
+ if (cleanup)
+ sp.clean ({cleanup_type::always, to}, true);
+
+ for (const auto& de: dir_iterator (from,
+ false /* ignore_dangling */))
+ {
+ path f (from / de.path ());
+ path t (to / de.path ());
+
+ if (de.type () == entry_type::directory)
+ cpdir (sp,
+ path_cast<dir_path> (move (f)),
+ path_cast<dir_path> (move (t)),
+ attrs,
+ cleanup,
+ fail);
+ else
+ cpfile (sp, f, t, false /* overwrite */, attrs, cleanup, fail);
+ }
+
+ // Note that it is essential to copy timestamps and permissions after
+ // the directory content is copied.
+ //
+ if (attrs)
+ {
+ path_permissions (to, path_permissions (from));
+ dir_time (to, dir_time (from));
+ }
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to copy directory '" << from << "' to '" << to
+ << "': " << e;
+ }
+ }
+
+ // cp [-p] [--no-cleanup] <src-file> <dst-file>
+ // cp [-p] [--no-cleanup] -R|-r <src-dir> <dst-dir>
+ // cp [-p] [--no-cleanup] <src-file>... <dst-dir>/
+ // cp [-p] [--no-cleanup] -R|-r <src-path>... <dst-dir>/
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ cp (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "cp");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool recursive (false);
+ bool attrs (false);
+ bool cleanup (true);
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "-R" || o == "-r")
+ recursive = true;
+ else if (o == "-p")
+ attrs = true;
+ else if (o == "--no-cleanup")
+ cleanup = false;
+ else
+ {
+ if (o == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Copy files or directories.
+ //
+ if (i == e)
+ error () << "missing arguments";
+
+ const dir_path& wd (sp.wd_path);
+
+ auto j (args.rbegin ());
+ path dst (parse_path (*j++, wd));
+ e = j.base ();
+
+ if (i == e)
+ error () << "missing source path";
+
+ auto fail = [&error] () {return error (true);};
+
+ // If destination is not a directory path (no trailing separator)
+ // then make a copy of the filesystem entry at the specified path
+ // (the only source path is allowed in such a case). Otherwise copy
+ // the source filesystem entries into the destination directory.
+ //
+ if (!dst.to_directory ())
+ {
+ path src (parse_path (*i++, wd));
+
+ // If there are multiple sources but no trailing separator for the
+ // destination, then, most likelly, it is missing.
+ //
+ if (i != e)
+ error () << "multiple source paths without trailing separator "
+ << "for destination directory";
+
+ if (!recursive)
+ // Synopsis 1: make a file copy at the specified path.
+ //
+ cpfile (sp,
+ src,
+ dst,
+ true /* overwrite */,
+ attrs,
+ cleanup,
+ fail);
+ else
+ // Synopsis 2: make a directory copy at the specified path.
+ //
+ cpdir (sp,
+ path_cast<dir_path> (src), path_cast<dir_path> (dst),
+ attrs,
+ cleanup,
+ fail);
+ }
+ else
+ {
+ for (; i != e; ++i)
+ {
+ path src (parse_path (*i, wd));
+
+ if (recursive && dir_exists (src))
+ // Synopsis 4: copy a filesystem entry into the specified
+ // directory. Note that we handle only source directories here.
+ // Source files are handled below.
+ //
+ cpdir (sp,
+ path_cast<dir_path> (src),
+ path_cast<dir_path> (dst / src.leaf ()),
+ attrs,
+ cleanup,
+ fail);
+ else
+ // Synopsis 3: copy a file into the specified directory. Also,
+ // here we cover synopsis 4 for the source path being a file.
+ //
+ cpfile (sp,
+ src,
+ dst / src.leaf (),
+ true /* overwrite */,
+ attrs,
+ cleanup,
+ fail);
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // echo <string>...
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ echo (scope&,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ try
+ {
+ in.close ();
+ ofdstream cout (move (out));
+
+ for (auto b (args.begin ()), i (b), e (args.end ()); i != e; ++i)
+ cout << (i != b ? " " : "") << *i;
+
+ cout << '\n';
+ cout.close ();
+ r = 0;
+ }
+ catch (const std::exception& e)
+ {
+ cerr << "echo: " << e << endl;
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // false
+ //
+ // Failure to close the file descriptors is silently ignored.
+ //
+ // Note: can be executed synchronously.
+ //
+ static builtin
+ false_ (scope&, uint8_t& r, const strings&, auto_fd, auto_fd, auto_fd)
+ {
+ return builtin (r = 1);
+ }
+
+ // true
+ //
+ // Failure to close the file descriptors is silently ignored.
+ //
+ // Note: can be executed synchronously.
+ //
+ static builtin
+ true_ (scope&, uint8_t& r, const strings&, auto_fd, auto_fd, auto_fd)
+ {
+ return builtin (r = 0);
+ }
+
+ // Create a symlink to a file or directory at the specified path. The
+ // paths must be absolute. Fall back to creating a hardlink, if symlink
+ // creation is not supported for the link path. If hardlink creation is
+ // not supported either, then fall back to copies. If requested, created
+ // filesystem entries are registered for cleanup. Fail if the target
+ // filesystem entry doesn't exist or an exception is thrown by the
+ // underlying filesystem operation (specifically for an already existing
+ // filesystem entry at the link path).
+ //
+ // Note that supporting optional removal of an existing filesystem entry
+ // at the link path (the -f option) tends to get hairy. As soon as an
+ // existing and the resulting filesystem entries could be of different
+ // types, we would end up with canceling an old cleanup and registering
+ // the new one. Also removing non-empty directories doesn't look very
+ // natural, but would be required if we want the behavior on POSIX and
+ // Windows to be consistent.
+ //
+ static void
+ mksymlink (scope& sp,
+ const path& target, const path& link,
+ bool cleanup,
+ const function<error_record()>& fail)
+ {
+ // Determine the target type, fail if the target doesn't exist.
+ //
+ bool dir (false);
+
+ try
+ {
+ pair<bool, entry_stat> pe (path_entry (target));
+
+ if (!pe.first)
+ fail () << "unable to create symlink to '" << target << "': "
+ << "no such file or directory";
+
+ dir = pe.second.type == entry_type::directory;
+ }
+ catch (const system_error& e)
+ {
+ fail () << "unable to stat '" << target << "': " << e;
+ }
+
+ // First we try to create a symlink. If that fails (e.g., "Windows
+ // happens"), then we resort to hard links. If that doesn't work out
+ // either (e.g., not on the same filesystem), then we fall back to
+ // copies. So things are going to get a bit nested.
+ //
+ try
+ {
+ mksymlink (target, link, dir);
+
+ if (cleanup)
+ sp.clean ({cleanup_type::always, link}, true);
+ }
+ catch (const system_error& e)
+ {
+ // Note that we are not guaranteed (here and below) that the
+ // system_error exception is of the generic category.
+ //
+ int c (e.code ().value ());
+ if (!(e.code ().category () == generic_category () &&
+ (c == ENOSYS || // Not implemented.
+ c == EPERM))) // Not supported by the filesystem(s).
+ fail () << "unable to create symlink '" << link << "' to '"
+ << target << "': " << e;
+
+ try
+ {
+ mkhardlink (target, link, dir);
+
+ if (cleanup)
+ sp.clean ({cleanup_type::always, link}, true);
+ }
+ catch (const system_error& e)
+ {
+ c = e.code ().value ();
+ if (!(e.code ().category () == generic_category () &&
+ (c == ENOSYS || // Not implemented.
+ c == EPERM || // Not supported by the filesystem(s).
+ c == EXDEV))) // On different filesystems.
+ fail () << "unable to create hardlink '" << link << "' to '"
+ << target << "': " << e;
+
+ if (dir)
+ cpdir (sp,
+ path_cast<dir_path> (target), path_cast<dir_path> (link),
+ false,
+ cleanup,
+ fail);
+ else
+ cpfile (sp,
+ target,
+ link,
+ false /* overwrite */,
+ true /* attrs */,
+ cleanup,
+ fail);
+ }
+ }
+ }
+
+ // ln [--no-cleanup] -s <target-path> <link-path>
+ // ln [--no-cleanup] -s <target-path>... <link-dir>/
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ ln (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "ln");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool cleanup (true);
+ bool symlink (false);
+
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "--no-cleanup")
+ cleanup = false;
+ else if (o == "-s")
+ symlink = true;
+ else
+ {
+ if (o == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ if (!symlink)
+ error () << "missing -s option";
+
+ // Create file or directory symlinks.
+ //
+ if (i == e)
+ error () << "missing arguments";
+
+ const dir_path& wd (sp.wd_path);
+
+ auto j (args.rbegin ());
+ path link (parse_path (*j++, wd));
+ e = j.base ();
+
+ if (i == e)
+ error () << "missing target path";
+
+ auto fail = [&error] () {return error (true);};
+
+ // If link is not a directory path (no trailing separator), then
+ // create a symlink to the target path at the specified link path
+ // (the only target path is allowed in such a case). Otherwise create
+ // links to the target paths inside the specified directory.
+ //
+ if (!link.to_directory ())
+ {
+ path target (parse_path (*i++, wd));
+
+ // If there are multiple targets but no trailing separator for the
+ // link, then, most likelly, it is missing.
+ //
+ if (i != e)
+ error () << "multiple target paths with non-directory link path";
+
+ // Synopsis 1: create a target path symlink at the specified path.
+ //
+ mksymlink (sp, target, link, cleanup, fail);
+ }
+ else
+ {
+ for (; i != e; ++i)
+ {
+ path target (parse_path (*i, wd));
+
+ // Synopsis 2: create a target path symlink in the specified
+ // directory.
+ //
+ mksymlink (sp, target, link / target.leaf (), cleanup, fail);
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // Create a directory if not exist and its parent directories if
+ // necessary. Throw system_error on failure. Register created
+ // directories for cleanup. The directory path must be absolute.
+ //
+ static void
+ mkdir_p (scope& sp, const dir_path& p, bool cleanup)
+ {
+ if (!dir_exists (p))
+ {
+ if (!p.root ())
+ mkdir_p (sp, p.directory (), cleanup);
+
+ try_mkdir (p); // Returns success or throws.
+
+ if (cleanup)
+ sp.clean ({cleanup_type::always, p}, true);
+ }
+ }
+
+ // mkdir [--no-cleanup] [-p] <dir>...
+ //
+ // Note that POSIX doesn't specify if after a directory creation failure
+ // the command should proceed with the rest of the arguments. The current
+ // implementation exits immediatelly in such a case.
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ mkdir (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "mkdir");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool parent (false);
+ bool cleanup (true);
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "-p")
+ parent = true;
+ else if (o == "--no-cleanup")
+ cleanup = false;
+ else
+ {
+ if (*i == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Create directories.
+ //
+ if (i == e)
+ error () << "missing directory";
+
+ for (; i != e; ++i)
+ {
+ dir_path p (path_cast<dir_path> (parse_path (*i, sp.wd_path)));
+
+ try
+ {
+ if (parent)
+ mkdir_p (sp, p, cleanup);
+ else if (try_mkdir (p) == mkdir_status::success)
+ {
+ if (cleanup)
+ sp.clean ({cleanup_type::always, p}, true);
+ }
+ else // == mkdir_status::already_exists
+ throw_generic_error (EEXIST);
+ }
+ catch (const system_error& e)
+ {
+ error () << "unable to create directory '" << p << "': " << e;
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // mv [--no-cleanup] [-f] <src-path> <dst-path>
+ // mv [--no-cleanup] [-f] <src-path>... <dst-dir>/
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ mv (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "mv");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool no_cleanup (false);
+ bool force (false);
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "--no-cleanup")
+ no_cleanup = true;
+ else if (*i == "-f")
+ force = true;
+ else
+ {
+ if (o == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Move filesystem entries.
+ //
+ if (i == e)
+ error () << "missing arguments";
+
+ const dir_path& wd (sp.wd_path);
+
+ auto j (args.rbegin ());
+ path dst (parse_path (*j++, wd));
+ e = j.base ();
+
+ if (i == e)
+ error () << "missing source path";
+
+ auto mv = [no_cleanup, force, &wd, &sp, &error] (const path& from,
+ const path& to)
+ {
+ const dir_path& rwd (sp.root->wd_path);
+
+ if (!from.sub (rwd) && !force)
+ error () << "'" << from << "' is out of working directory '"
+ << rwd << "'";
+
+ try
+ {
+ auto check_wd = [&wd, &error] (const path& p)
+ {
+ if (wd.sub (path_cast<dir_path> (p)))
+ error () << "'" << p << "' contains test working directory '"
+ << wd << "'";
+ };
+
+ check_wd (from);
+ check_wd (to);
+
+ bool exists (butl::entry_exists (to));
+
+ // Fail if the source and destination paths are the same.
+ //
+ // Note that for mventry() function (that is based on the POSIX
+ // rename() function) this is a noop.
+ //
+ if (exists && to == from)
+ error () << "unable to move entity '" << from << "' to itself";
+
+ // Rename/move the filesystem entry, replacing an existing one.
+ //
+ mventry (from,
+ to,
+ cpflags::overwrite_permissions |
+ cpflags::overwrite_content);
+
+ // Unless suppressed, adjust the cleanups that are sub-paths of
+ // the source path.
+ //
+ if (!no_cleanup)
+ {
+ // "Move" the matching cleanup if the destination path doesn't
+ // exist and is a sub-path of the working directory. Otherwise
+ // just remove it.
+ //
+ // Note that it's not enough to just change the cleanup paths.
+ // We also need to make sure that these cleanups happen before
+ // the destination directory (or any of its parents) cleanup,
+ // that is potentially registered. To achieve that we can just
+ // relocate these cleanup entries to the end of the list,
+ // preserving their mutual order. Remember that cleanups in
+ // the list are executed in the reversed order.
+ //
+ bool mv_cleanups (!exists && to.sub (rwd));
+ cleanups cs;
+
+ // Remove the source path sub-path cleanups from the list,
+ // adjusting/caching them if required (see above).
+ //
+ for (auto i (sp.cleanups.begin ()); i != sp.cleanups.end (); )
+ {
+ cleanup& c (*i);
+ path& p (c.path);
+
+ if (p.sub (from))
+ {
+ if (mv_cleanups)
+ {
+ // Note that we need to preserve the cleanup path
+ // trailing separator which indicates the removal
+ // method. Also note that leaf(), in particular, does
+ // that.
+ //
+ p = p != from
+ ? to / p.leaf (path_cast<dir_path> (from))
+ : p.to_directory ()
+ ? path_cast<dir_path> (to)
+ : to;
+
+ cs.push_back (move (c));
+ }
+
+ i = sp.cleanups.erase (i);
+ }
+ else
+ ++i;
+ }
+
+ // Re-insert the adjusted cleanups at the end of the list.
+ //
+ sp.cleanups.insert (sp.cleanups.end (),
+ make_move_iterator (cs.begin ()),
+ make_move_iterator (cs.end ()));
+ }
+ }
+ catch (const system_error& e)
+ {
+ error () << "unable to move entity '" << from << "' to '" << to
+ << "': " << e;
+ }
+ };
+
+ // If destination is not a directory path (no trailing separator)
+ // then move the filesystem entry to the specified path (the only
+ // source path is allowed in such a case). Otherwise move the source
+ // filesystem entries into the destination directory.
+ //
+ if (!dst.to_directory ())
+ {
+ path src (parse_path (*i++, wd));
+
+ // If there are multiple sources but no trailing separator for the
+ // destination, then, most likelly, it is missing.
+ //
+ if (i != e)
+ error () << "multiple source paths without trailing separator "
+ << "for destination directory";
+
+ // Synopsis 1: move an entity to the specified path.
+ //
+ mv (src, dst);
+ }
+ else
+ {
+ // Synopsis 2: move entities into the specified directory.
+ //
+ for (; i != e; ++i)
+ {
+ path src (parse_path (*i, wd));
+ mv (src, dst / src.leaf ());
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // rm [-r] [-f] <path>...
+ //
+ // The implementation deviates from POSIX in a number of ways. It doesn't
+ // interact with a user and fails immediatelly if unable to process an
+ // argument. It doesn't check for dots containment in the path, and
+ // doesn't consider files and directory permissions in any way just
+ // trying to remove a filesystem entry. Always fails if empty path is
+ // specified.
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ rm (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "rm");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool dir (false);
+ bool force (false);
+ for (; i != e; ++i)
+ {
+ if (*i == "-r")
+ dir = true;
+ else if (*i == "-f")
+ force = true;
+ else
+ {
+ if (*i == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Remove entries.
+ //
+ if (i == e && !force)
+ error () << "missing file";
+
+ const dir_path& wd (sp.wd_path);
+ const dir_path& rwd (sp.root->wd_path);
+
+ for (; i != e; ++i)
+ {
+ path p (parse_path (*i, wd));
+
+ if (!p.sub (rwd) && !force)
+ error () << "'" << p << "' is out of working directory '" << rwd
+ << "'";
+
+ try
+ {
+ dir_path d (path_cast<dir_path> (p));
+
+ if (dir_exists (d))
+ {
+ if (!dir)
+ error () << "'" << p << "' is a directory";
+
+ if (wd.sub (d))
+ error () << "'" << p << "' contains test working directory '"
+ << wd << "'";
+
+ // The call can result in rmdir_status::not_exist. That's not
+ // very likelly but there is also nothing bad about it.
+ //
+ try_rmdir_r (d);
+ }
+ else if (try_rmfile (p) == rmfile_status::not_exist && !force)
+ throw_generic_error (ENOENT);
+ }
+ catch (const system_error& e)
+ {
+ error () << "unable to remove '" << p << "': " << e;
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // rmdir [-f] <path>...
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ rmdir (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "rmdir");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool force (false);
+ for (; i != e; ++i)
+ {
+ if (*i == "-f")
+ force = true;
+ else
+ {
+ if (*i == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Remove directories.
+ //
+ if (i == e && !force)
+ error () << "missing directory";
+
+ const dir_path& wd (sp.wd_path);
+ const dir_path& rwd (sp.root->wd_path);
+
+ for (; i != e; ++i)
+ {
+ dir_path p (path_cast<dir_path> (parse_path (*i, wd)));
+
+ if (wd.sub (p))
+ error () << "'" << p << "' contains test working directory '"
+ << wd << "'";
+
+ if (!p.sub (rwd) && !force)
+ error () << "'" << p << "' is out of working directory '"
+ << rwd << "'";
+
+ try
+ {
+ rmdir_status s (try_rmdir (p));
+
+ if (s == rmdir_status::not_empty)
+ throw_generic_error (ENOTEMPTY);
+ else if (s == rmdir_status::not_exist && !force)
+ throw_generic_error (ENOENT);
+ }
+ catch (const system_error& e)
+ {
+ error () << "unable to remove '" << p << "': " << e;
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // sed [-n] [-i] -e <script> [<file>]
+ //
+ // Note: must be executed asynchronously.
+ //
+ static uint8_t
+ sed (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "sed");
+ };
+
+ try
+ {
+ // Automatically remove a temporary file (used for in place editing)
+ // on failure.
+ //
+ auto_rmfile rm;
+
+ // Do not throw when failbit is set (getline() failed to extract any
+ // character).
+ //
+ ifdstream cin (move (in), ifdstream::badbit);
+ ofdstream cout (move (out));
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool auto_prn (true);
+ bool in_place (false);
+
+ struct substitute
+ {
+ string regex;
+ string replacement;
+ bool icase = false;
+ bool global = false;
+ bool print = false;
+ };
+ optional<substitute> subst;
+
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "-n")
+ auto_prn = false;
+ else if (o == "-i")
+ in_place = true;
+ else if (o == "-e")
+ {
+ // Only a single script is supported.
+ //
+ if (subst)
+ error () << "multiple scripts";
+
+ // If option has no value then bail out and report.
+ //
+ if (++i == e)
+ break;
+
+ const string& v (*i);
+ if (v.empty ())
+ error () << "empty script";
+
+ if (v[0] != 's')
+ error () << "only 's' command supported";
+
+ // Parse the substitute command.
+ //
+ if (v.size () < 2)
+ error () << "no delimiter for 's' command";
+
+ char delim (v[1]);
+ if (delim == '\\' || delim == '\n')
+ error () << "invalid delimiter for 's' command";
+
+ size_t p (v.find (delim, 2));
+ if (p == string::npos)
+ error () << "unterminated 's' command regex";
+
+ subst = substitute ();
+ subst->regex.assign (v, 2, p - 2);
+
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (subst->regex.empty ())
+ error () << "empty regex in 's' command";
+
+ size_t b (p + 1);
+ p = v.find (delim, b);
+ if (p == string::npos)
+ error () << "unterminated 's' command replacement";
+
+ subst->replacement.assign (v, b, p - b);
+
+ // Parse the substitute command flags.
+ //
+ char c;
+ for (++p; (c = v[p]) != '\0'; ++p)
+ {
+ switch (c)
+ {
+ case 'i': subst->icase = true; break;
+ case 'g': subst->global = true; break;
+ case 'p': subst->print = true; break;
+ default:
+ {
+ error () << "invalid 's' command flag '" << c << "'";
+ }
+ }
+ }
+ }
+ else
+ {
+ if (o == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ if (!subst)
+ error () << "missing script";
+
+ // Path of a file to edit. An empty path represents stdin.
+ //
+ path p;
+ if (i != e)
+ {
+ if (*i != "-")
+ p = parse_path (*i, sp.wd_path);
+
+ ++i;
+ }
+
+ if (i != e)
+ error () << "unexpected argument '" << *i << "'";
+
+ // If we edit file in place make sure that the file path is specified
+ // and obtain a temporary file path. We will be writing to the
+ // temporary file (rather than to stdout) and will move it to the
+ // original file path afterwards.
+ //
+ path tp;
+ if (in_place)
+ {
+ if (p.empty ())
+ error () << "-i option specified while reading from stdin";
+
+ try
+ {
+ tp = path::temp_path ("build2-sed");
+
+ cout.close (); // Flush and close.
+
+ cout.open (
+ fdopen (tp,
+ fdopen_mode::out | fdopen_mode::truncate |
+ fdopen_mode::create,
+ path_permissions (p)));
+ }
+ catch (const io_error& e)
+ {
+ error_record d (error ());
+ d << "unable to open '" << tp << "': " << e;
+ }
+ catch (const system_error& e)
+ {
+ error_record d (error ());
+ d << "unable to obtain temporary file: " << e;
+ }
+
+ rm = auto_rmfile (tp);
+ }
+
+ // Note that ECMAScript is implied if no grammar flag is specified.
+ //
+ regex re (subst->regex,
+ subst->icase ? regex::icase : regex::ECMAScript);
+
+ // Edit a file or STDIN.
+ //
+ try
+ {
+ // Open a file if specified.
+ //
+ if (!p.empty ())
+ {
+ cin.close (); // Flush and close.
+ cin.open (p);
+ }
+
+ // Read until failbit is set (throw on badbit).
+ //
+ string s;
+ while (getline (cin, s))
+ {
+ auto r (regex_replace_search (
+ s,
+ re,
+ subst->replacement,
+ subst->global
+ ? regex_constants::format_default
+ : regex_constants::format_first_only));
+
+ // Add newline regardless whether the source line is newline-
+ // terminated or not (in accordance with POSIX).
+ //
+ if (auto_prn || (r.second && subst->print))
+ cout << r.first << '\n';
+ }
+
+ cin.close ();
+ cout.close ();
+
+ if (in_place)
+ {
+ mvfile (
+ tp, p,
+ cpflags::overwrite_content | cpflags::overwrite_permissions);
+
+ rm.cancel ();
+ }
+
+ r = 0;
+ }
+ catch (const io_error& e)
+ {
+ error_record d (error ());
+ d << "unable to edit ";
+
+ if (p.empty ())
+ d << "stdin";
+ else
+ d << "'" << p << "'";
+
+ d << ": " << e;
+ }
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful (no space).
+ //
+ error (false) << "invalid regex" << e;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while creating cin, cout or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const system_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // sleep <seconds>
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ sleep (scope&,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "sleep");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ if (args.empty ())
+ error () << "missing time interval";
+
+ if (args.size () > 1)
+ error () << "unexpected argument '" << args[1] << "'";
+
+ uint64_t n;
+
+ for (;;) // Breakout loop.
+ {
+ const string& a (args[0]);
+
+ // Note: strtoull() allows these.
+ //
+ if (!a.empty () && a[0] != '-' && a[0] != '+')
+ {
+ char* e (nullptr);
+ n = strtoull (a.c_str (), &e, 10); // Can't throw.
+
+ if (errno != ERANGE && e == a.c_str () + a.size ())
+ break;
+ }
+
+ error () << "invalid time interval '" << a << "'";
+ }
+
+ // If/when required we could probably support the precise sleep mode
+ // (e.g., via an option).
+ //
+ sched.sleep (chrono::seconds (n));
+
+ r = 0;
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // test -f|-d <path>
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ test (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (2);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "test");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ if (args.size () < 2)
+ error () << "missing path";
+
+ bool file (args[0] == "-f");
+
+ if (!file && args[0] != "-d")
+ error () << "invalid option";
+
+ if (args.size () > 2)
+ error () << "unexpected argument '" << args[2] << "'";
+
+ path p (parse_path (args[1], sp.wd_path));
+
+ try
+ {
+ r = (file ? file_exists (p) : dir_exists (p)) ? 0 : 1;
+ }
+ catch (const system_error& e)
+ {
+ error () << "cannot test '" << p << "': " << e;
+ }
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 2;
+ }
+
+ // touch [--no-cleanup] [--after <ref-file>] <file>...
+ //
+ // Note that POSIX doesn't specify the behavior for touching an entry
+ // other than file.
+ //
+ // Also note that POSIX doesn't specify if after a file touch failure the
+ // command should proceed with the rest of the arguments. The current
+ // implementation exits immediatelly in such a case.
+ //
+ // Note: can be executed synchronously.
+ //
+ static uint8_t
+ touch (scope& sp,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err) noexcept
+ try
+ {
+ uint8_t r (1);
+ ofdstream cerr (move (err));
+
+ auto error = [&cerr] (bool fail = true)
+ {
+ return error_record (cerr, fail, "touch");
+ };
+
+ try
+ {
+ in.close ();
+ out.close ();
+
+ auto mtime = [&error] (const path& p) -> timestamp
+ {
+ try
+ {
+ timestamp t (file_mtime (p));
+
+ if (t == timestamp_nonexistent)
+ throw_generic_error (ENOENT);
+
+ return t;
+ }
+ catch (const system_error& e)
+ {
+ error () << "cannot obtain file '" << p
+ << "' modification time: " << e;
+ }
+ assert (false); // Can't be here.
+ return timestamp ();
+ };
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool cleanup (true);
+ optional<timestamp> after;
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "--no-cleanup")
+ cleanup = false;
+ else if (o == "--after")
+ {
+ if (++i == e)
+ error () << "missing --after option value";
+
+ after = mtime (parse_path (*i, sp.wd_path));
+ }
+ else
+ {
+ if (o == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ if (i == e)
+ error () << "missing file";
+
+ // Create files.
+ //
+ for (; i != e; ++i)
+ {
+ path p (parse_path (*i, sp.wd_path));
+
+ try
+ {
+ // Note that we don't register (implicit) cleanup for an
+ // existing path.
+ //
+ if (touch_file (p) && cleanup)
+ sp.clean ({cleanup_type::always, p}, true);
+
+ if (after)
+ {
+ while (mtime (p) <= *after)
+ touch_file (p, false /* create */);
+ }
+ }
+ catch (const system_error& e)
+ {
+ error () << "cannot create/update '" << p << "': " << e;
+ }
+ }
+
+ r = 0;
+ }
+ catch (const invalid_path& e)
+ {
+ error (false) << "invalid path '" << e.path << "'";
+ }
+ // Can be thrown while closing in, out or writing to cerr.
+ //
+ catch (const io_error& e)
+ {
+ error (false) << e;
+ }
+ catch (const failed&)
+ {
+ // Diagnostics has already been issued.
+ }
+
+ cerr.close ();
+ return r;
+ }
+ catch (const std::exception&)
+ {
+ return 1;
+ }
+
+ // Run builtin implementation asynchronously.
+ //
+ static builtin
+ async_impl (builtin_impl* fn,
+ scope& sp,
+ uint8_t& r,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err)
+ {
+ return builtin (
+ r,
+ thread ([fn, &sp, &r, &args,
+ in = move (in),
+ out = move (out),
+ err = move (err)] () mutable noexcept
+ {
+ r = fn (sp, args, move (in), move (out), move (err));
+ }));
+ }
+
+ template <builtin_impl fn>
+ static builtin
+ async_impl (scope& sp,
+ uint8_t& r,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err)
+ {
+ return async_impl (fn, sp, r, args, move (in), move (out), move (err));
+ }
+
+ // Run builtin implementation synchronously.
+ //
+ template <builtin_impl fn>
+ static builtin
+ sync_impl (scope& sp,
+ uint8_t& r,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err)
+ {
+ r = fn (sp, args, move (in), move (out), move (err));
+ return builtin (r, thread ());
+ }
+
+ const builtin_map builtins
+ {
+ {"cat", &async_impl<&cat>},
+ {"cp", &sync_impl<&cp>},
+ {"echo", &async_impl<&echo>},
+ {"false", &false_},
+ {"ln", &sync_impl<&ln>},
+ {"mkdir", &sync_impl<&mkdir>},
+ {"mv", &sync_impl<&mv>},
+ {"rm", &sync_impl<&rm>},
+ {"rmdir", &sync_impl<&rmdir>},
+ {"sed", &async_impl<&sed>},
+ {"sleep", &sync_impl<&sleep>},
+ {"test", &sync_impl<&test>},
+ {"touch", &sync_impl<&touch>},
+ {"true", &true_}
+ };
+ }
+ }
+}
diff --git a/libbuild2/test/script/builtin.hxx b/libbuild2/test/script/builtin.hxx
new file mode 100644
index 0000000..b340335
--- /dev/null
+++ b/libbuild2/test/script/builtin.hxx
@@ -0,0 +1,74 @@
+// file : libbuild2/test/script/builtin.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX
+#define LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX
+
+#include <map>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ class scope;
+
+ // A process/thread-like object representing a running builtin.
+ //
+ // For now, instead of allocating the result storage dynamically, we
+ // expect it to be provided by the caller.
+ //
+ class builtin
+ {
+ public:
+ uint8_t
+ wait () {if (t_.joinable ()) t_.join (); return r_;}
+
+ ~builtin () {wait ();}
+
+ public:
+ builtin (uint8_t& r, thread&& t = thread ()): r_ (r), t_ (move (t)) {}
+
+ builtin (builtin&&) = default;
+
+ private:
+ uint8_t& r_;
+ thread t_;
+ };
+
+ // Start builtin command. Throw system_error on failure.
+ //
+ // Note that unlike argc/argv, our args don't include the program name.
+ //
+ using builtin_func = builtin (scope&,
+ uint8_t& result,
+ const strings& args,
+ auto_fd in, auto_fd out, auto_fd err);
+
+ class builtin_map: public std::map<string, builtin_func*>
+ {
+ public:
+ using base = std::map<string, builtin_func*>;
+ using base::base;
+
+ // Return NULL if not a builtin.
+ //
+ builtin_func*
+ find (const string& n) const
+ {
+ auto i (base::find (n));
+ return i != end () ? i->second : nullptr;
+ }
+ };
+
+ extern const builtin_map builtins;
+ }
+ }
+}
+
+#endif // LIBBUILD2_TEST_SCRIPT_BUILTIN_HXX
diff --git a/libbuild2/test/script/lexer+command-expansion.test.testscript b/libbuild2/test/script/lexer+command-expansion.test.testscript
new file mode 100644
index 0000000..1ddc246
--- /dev/null
+++ b/libbuild2/test/script/lexer+command-expansion.test.testscript
@@ -0,0 +1,248 @@
+# file : libbuild2/test/script/lexer+command-expansion.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = command-expansion
+
+: pass-redirect
+:
+{
+ : in
+ :
+ $* <:"0<|" >>EOO
+ '0'
+ <|
+ EOO
+
+ : arg-in
+ :
+ $* <:"0 <|" >>EOO
+ '0 '
+ <|
+ EOO
+
+ : out
+ :
+ $* <:"1>|" >>EOO
+ '1'
+ >|
+ EOO
+
+ : arg-out
+ :
+ $* <:"1 >|" >>EOO
+ '1 '
+ >|
+ EOO
+}
+
+: null-redirect
+:
+{
+ : in
+ :
+ $* <:"0<-" >>EOO
+ '0'
+ <-
+ EOO
+
+ : arg-in
+ :
+ $* <:"0 <-" >>EOO
+ '0 '
+ <-
+ EOO
+
+ : out
+ :
+ $* <:"1>-" >>EOO
+ '1'
+ >-
+ EOO
+
+ : arg-out
+ :
+ $* <:"1 >-" >>EOO
+ '1 '
+ >-
+ EOO
+}
+
+: trace-redirect
+:
+{
+ : out
+ :
+ $* <:"1>!" >>EOO
+ '1'
+ >!
+ EOO
+
+ : arg-out
+ :
+ $* <:"1 >!" >>EOO
+ '1 '
+ >!
+ EOO
+}
+
+: merge-redirect
+:
+{
+ : out
+ :
+ $* <:"1>&2" >>EOO
+ '1'
+ >&
+ '2'
+ EOO
+
+ : arg-out
+ :
+ $* <:"1 >&2" >>EOO
+ '1 '
+ >&
+ '2'
+ EOO
+}
+
+: str-redirect
+:
+{
+ : in
+ :
+ {
+ : newline
+ :
+ $* <:"0<a b" >>EOO
+ '0'
+ <
+ 'a b'
+ EOO
+
+ : no-newline
+ :
+ $* <:"0<:a b" >>EOO
+ '0'
+ <:
+ 'a b'
+ EOO
+ }
+
+ : out
+ :
+ {
+ : newline
+ :
+ $* <:"1>a b" >>EOO
+ '1'
+ >
+ 'a b'
+ EOO
+
+ : no-newline
+ :
+ $* <:"1>:a b" >>EOO
+ '1'
+ >:
+ 'a b'
+ EOO
+ }
+}
+
+: doc-redirect
+:
+{
+ : in
+ :
+ {
+ : newline
+ :
+ $* <:"0<<E O I" >>EOO
+ '0'
+ <<
+ 'E O I'
+ EOO
+
+ : no-newline
+ :
+ $* <:"0<<:E O I" >>EOO
+ '0'
+ <<:
+ 'E O I'
+ EOO
+ }
+
+ : out
+ :
+ {
+ : newline
+ :
+ $* <:"1>>E O O" >>EOO
+ '1'
+ >>
+ 'E O O'
+ EOO
+
+ : no-newline
+ :
+ $* <:"1>>:E O O" >>EOO
+ '1'
+ >>:
+ 'E O O'
+ EOO
+ }
+}
+
+: file-redirect
+:
+{
+ : in
+ :
+ $* <:"0<<<a b" >>EOO
+ '0'
+ <<<
+ 'a b'
+ EOO
+
+ : out
+ :
+ $* <:"1>=a b" >>EOO
+ '1'
+ >=
+ 'a b'
+ EOO
+
+ : out-app
+ :
+ $* <:"1>+a b" >>EOO
+ '1'
+ >+
+ 'a b'
+ EOO
+}
+
+: cleanup
+:
+{
+ : always
+ :
+ $* <:"&file" >>EOO
+ &
+ 'file'
+ EOO
+
+ : maybe
+ :
+ $* <:"&?file" >>EOO
+ &?
+ 'file'
+ EOO
+
+ : never
+ :
+ $* <:"&!file" >>EOO
+ &!
+ 'file'
+ EOO
+}
diff --git a/libbuild2/test/script/lexer+command-line.test.testscript b/libbuild2/test/script/lexer+command-line.test.testscript
new file mode 100644
index 0000000..eedb46f
--- /dev/null
+++ b/libbuild2/test/script/lexer+command-line.test.testscript
@@ -0,0 +1,208 @@
+# file : libbuild2/test/script/lexer+command-line.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = command-line
+
+: semi
+{
+ : immediate
+ :
+ $* <"cmd;" >>EOO
+ 'cmd'
+ ;
+ <newline>
+ EOO
+
+ : separated
+ :
+ $* <"cmd ;" >>EOO
+ 'cmd'
+ ;
+ <newline>
+ EOO
+
+ : only
+ :
+ $* <";" >>EOO
+ ;
+ <newline>
+ EOO
+}
+
+: colon
+:
+{
+ : immediate
+ :
+ $* <"cmd: dsc" >>EOO
+ 'cmd'
+ :
+ 'dsc'
+ <newline>
+ EOO
+
+ : separated
+ :
+ $* <"cmd :dsc" >>EOO
+ 'cmd'
+ :
+ 'dsc'
+ <newline>
+ EOO
+
+ : only
+ :
+ $* <":" >>EOO
+ :
+ <newline>
+ EOO
+}
+
+: redirect
+:
+{
+ : pass
+ :
+ $* <"cmd <| 1>|" >>EOO
+ 'cmd'
+ <|
+ '1'
+ >|
+ <newline>
+ EOO
+
+ : null
+ :
+ $* <"cmd <- 1>-" >>EOO
+ 'cmd'
+ <-
+ '1'
+ >-
+ <newline>
+ EOO
+
+ : trace
+ :
+ $* <"cmd 1>!" >>EOO
+ 'cmd'
+ '1'
+ >!
+ <newline>
+ EOO
+
+ : merge
+ :
+ $* <"cmd 1>&2" >>EOO
+ 'cmd'
+ '1'
+ >&
+ '2'
+ <newline>
+ EOO
+
+ : str
+ :
+ $* <"cmd <a 1>b" >>EOO
+ 'cmd'
+ <
+ 'a'
+ '1'
+ >
+ 'b'
+ <newline>
+ EOO
+
+ : str-nn
+ :
+ $* <"cmd <:a 1>:b" >>EOO
+ 'cmd'
+ <:
+ 'a'
+ '1'
+ >:
+ 'b'
+ <newline>
+ EOO
+
+ : doc
+ :
+ $* <"cmd <<EOI 1>>EOO" >>EOO
+ 'cmd'
+ <<
+ 'EOI'
+ '1'
+ >>
+ 'EOO'
+ <newline>
+ EOO
+
+ : doc-nn
+ :
+ $* <"cmd <<:EOI 1>>:EOO" >>EOO
+ 'cmd'
+ <<:
+ 'EOI'
+ '1'
+ >>:
+ 'EOO'
+ <newline>
+ EOO
+
+ : file-cmp
+ :
+ $* <"cmd <<<in >>>out 2>>>err" >>EOO
+ 'cmd'
+ <<<
+ 'in'
+ >>>
+ 'out'
+ '2'
+ >>>
+ 'err'
+ <newline>
+ EOO
+
+ : file-write
+ :
+ $* <"cmd >=out 2>+err" >>EOO
+ 'cmd'
+ >=
+ 'out'
+ '2'
+ >+
+ 'err'
+ <newline>
+ EOO
+}
+
+: cleanup
+:
+{
+ : always
+ :
+ $* <"cmd &file" >>EOO
+ 'cmd'
+ &
+ 'file'
+ <newline>
+ EOO
+
+ : maybe
+ :
+ $* <"cmd &?file" >>EOO
+ 'cmd'
+ &?
+ 'file'
+ <newline>
+ EOO
+
+ : never
+ :
+ $* <"cmd &!file" >>EOO
+ 'cmd'
+ &!
+ 'file'
+ <newline>
+ EOO
+}
diff --git a/libbuild2/test/script/lexer+description-line.test.testscript b/libbuild2/test/script/lexer+description-line.test.testscript
new file mode 100644
index 0000000..bb5948a
--- /dev/null
+++ b/libbuild2/test/script/lexer+description-line.test.testscript
@@ -0,0 +1,33 @@
+# file : libbuild2/test/script/lexer+description-line.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = description-line
+
+: full
+:
+$* <" foo bar " >>EOO
+' foo bar '
+<newline>
+EOO
+
+: space
+:
+$* <" " >>EOO
+' '
+<newline>
+EOO
+
+: empty
+:
+$* <"" >>EOO
+<newline>
+EOO
+
+: eof
+:
+$* <:"foo" >>EOO 2>>EOE != 0
+'foo'
+EOO
+stdin:1:4: error: expected newline at the end of description line
+EOE
diff --git a/libbuild2/test/script/lexer+first-token.test.testscript b/libbuild2/test/script/lexer+first-token.test.testscript
new file mode 100644
index 0000000..3eaf976
--- /dev/null
+++ b/libbuild2/test/script/lexer+first-token.test.testscript
@@ -0,0 +1,97 @@
+# file : libbuild2/test/script/lexer+first-token.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Note: this mode auto-expires after each token.
+#
+test.arguments = first-token
+
+: dot
+:
+$* <"." >>EOO
+.
+<newline>
+EOO
+
+: semi
+:
+$* <";" >>EOO
+;
+<newline>
+EOO
+
+: colon
+:
+$* <":" >>EOO
+:
+<newline>
+EOO
+
+: lcbrace
+:
+$* <"{" >>EOO
+{
+<newline>
+EOO
+
+: rcbrace
+:
+$* <"}" >>EOO
+}
+<newline>
+EOO
+
+: setup
+:
+$* <"+foo" >>EOO
++
+'foo'
+<newline>
+EOO
+
+: tdown
+:
+$* <"- foo" >>EOO
+-
+'foo'
+<newline>
+EOO
+
+: plus-leading
+:
+$* <"foo+bar" >>EOO
+'foo+bar'
+<newline>
+EOO
+
+: minus-leading
+:
+$* <"foo- x" >>EOO
+'foo-'
+'x'
+<newline>
+EOO
+
+: assign
+:
+$* <"foo=" >>EOO
+'foo'
+'='
+<newline>
+EOO
+
+: append
+:
+$* <"foo+=" >>EOO
+'foo'
+'+='
+<newline>
+EOO
+
+: prepend
+:
+$* <"foo=+" >>EOO
+'foo'
+'=+'
+<newline>
+EOO
diff --git a/libbuild2/test/script/lexer+second-token.test.testscript b/libbuild2/test/script/lexer+second-token.test.testscript
new file mode 100644
index 0000000..c494796
--- /dev/null
+++ b/libbuild2/test/script/lexer+second-token.test.testscript
@@ -0,0 +1,68 @@
+# file : libbuild2/test/script/lexer+second-token.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Note: this mode auto-expires after each token.
+#
+test.arguments = second-token
+
+: semi
+:
+$* <";" >>EOO
+;
+<newline>
+EOO
+
+: colon
+:
+$* <":" >>EOO
+:
+<newline>
+EOO
+
+: assign
+:
+$* <"=foo" >>EOO
+=
+'foo'
+<newline>
+EOO
+
+: append
+:
+$* <"+= foo" >>EOO
++=
+'foo'
+<newline>
+EOO
+
+: prepend
+:
+$* <" =+ foo" >>EOO
+=+
+'foo'
+<newline>
+EOO
+
+: assign-leading
+:
+$* <"foo=bar" >>EOO
+'foo=bar'
+<newline>
+EOO
+
+: append-leading
+:
+$* <"foo+= bar" >>EOO
+'foo+='
+'bar'
+<newline>
+EOO
+
+: prepend-leading
+:
+$* <"foo =+bar" >>EOO
+'foo'
+'=+bar'
+<newline>
+EOO
diff --git a/libbuild2/test/script/lexer+variable-line.test.testscript b/libbuild2/test/script/lexer+variable-line.test.testscript
new file mode 100644
index 0000000..bac4f16
--- /dev/null
+++ b/libbuild2/test/script/lexer+variable-line.test.testscript
@@ -0,0 +1,28 @@
+# file : libbuild2/test/script/lexer+variable-line.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+test.arguments = variable-line
+
+: semi
+:
+$* <"cmd;" >>EOO
+'cmd'
+;
+<newline>
+EOO
+
+: semi-separated
+:
+$* <"cmd ;" >>EOO
+'cmd'
+;
+<newline>
+EOO
+
+: semi-only
+:
+$* <";" >>EOO
+;
+<newline>
+EOO
diff --git a/libbuild2/test/script/lexer+variable.test.testscript b/libbuild2/test/script/lexer+variable.test.testscript
new file mode 100644
index 0000000..64b2bee
--- /dev/null
+++ b/libbuild2/test/script/lexer+variable.test.testscript
@@ -0,0 +1,70 @@
+# file : libbuild2/test/script/lexer+variable.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# Test handling custom variable names ($*, $~, $NN).
+#
+test.arguments = variable
+
+: command
+:
+{
+ : only
+ :
+ $* <"*" >>EOO
+ '*'
+ <newline>
+ EOO
+
+ : followed
+ :
+ $* <"*abc" >>EOO
+ '*'
+ 'abc'
+ <newline>
+ EOO
+}
+
+: working-dir
+:
+{
+ : only
+ :
+ $* <"~" >>EOO
+ '~'
+ <newline>
+ EOO
+
+ : followed
+ :
+ $* <"~123" >>EOO
+ '~'
+ '123'
+ <newline>
+ EOO
+}
+
+: arg
+:
+{
+ : only
+ :
+ $* <"0" >>EOO
+ '0'
+ <newline>
+ EOO
+
+ : followed
+ :
+ $* <"1abc" >>EOO
+ '1'
+ 'abc'
+ <newline>
+ EOO
+
+ : multi-digit
+ :
+ $* <"10" 2>>EOE != 0
+ stdin:1:1: error: multi-digit special variable name
+ EOE
+}
diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx
new file mode 100644
index 0000000..75c04c8
--- /dev/null
+++ b/libbuild2/test/script/lexer.cxx
@@ -0,0 +1,551 @@
+// file : libbuild2/test/script/lexer.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/lexer.hxx>
+
+#include <cstring> // strchr()
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ void lexer::
+ mode (base_mode m, char ps, optional<const char*> esc)
+ {
+ const char* s1 (nullptr);
+ const char* s2 (nullptr);
+ bool s (true);
+ bool n (true);
+ bool q (true);
+
+ if (!esc)
+ {
+ assert (!state_.empty ());
+ esc = state_.top ().escapes;
+ }
+
+ switch (m)
+ {
+ case lexer_mode::command_line:
+ {
+ s1 = ":;=!|&<> $(#\t\n";
+ s2 = " == ";
+ break;
+ }
+ case lexer_mode::first_token:
+ {
+ // First token on the script line. Like command_line but
+ // recognizes leading '.+-{}' as tokens as well as variable
+ // assignments as separators.
+ //
+ // Note that to recognize only leading '.+-{}' we shouldn't add
+ // them to the separator strings.
+ //
+ s1 = ":;=+!|&<> $(#\t\n";
+ s2 = " == ";
+ break;
+ }
+ case lexer_mode::second_token:
+ {
+ // Second token on the script line. Like command_line but
+ // recognizes leading variable assignments.
+ //
+ // Note that to recognize only leading assignments we shouldn't
+ // add them to the separator strings (so this is identical to
+ // command_line).
+ //
+ s1 = ":;=!|&<> $(#\t\n";
+ s2 = " == ";
+ break;
+ }
+ case lexer_mode::variable_line:
+ {
+ // Like value except we recognize ';' and don't recognize '{'.
+ // Note that we don't recognize ':' since having a trailing
+ // variable assignment is illegal.
+ //
+ s1 = "; $([]#\t\n";
+ s2 = " ";
+ break;
+ }
+
+ case lexer_mode::command_expansion:
+ {
+ // Note that whitespaces are not word separators in this mode.
+ //
+ s1 = "|&<>";
+ s2 = " ";
+ s = false;
+ break;
+ }
+ case lexer_mode::here_line_single:
+ {
+ // This one is like a single-quoted string except it treats
+ // newlines as a separator. We also treat quotes as literals.
+ //
+ // Note that it might be tempting to enable line continuation
+ // escapes. However, we will then have to also enable escaping of
+ // the backslash, which makes it a lot less tempting.
+ //
+ s1 = "\n";
+ s2 = " ";
+ esc = ""; // Disable escape sequences.
+ s = false;
+ q = false;
+ break;
+ }
+ case lexer_mode::here_line_double:
+ {
+ // This one is like a double-quoted string except it treats
+ // newlines as a separator. We also treat quotes as literals.
+ //
+ s1 = "$(\n";
+ s2 = " ";
+ s = false;
+ q = false;
+ break;
+ }
+ case lexer_mode::description_line:
+ {
+ // This one is like a single-quoted string and has an ad hoc
+ // implementation.
+ //
+ break;
+ }
+ default:
+ {
+ // Make sure pair separators are only enabled where we expect
+ // them.
+ //
+ // @@ Should we disable pair separators in the eval mode?
+ //
+ assert (ps == '\0' ||
+ m == lexer_mode::eval ||
+ m == lexer_mode::attribute);
+
+ base_lexer::mode (m, ps, esc);
+ return;
+ }
+ }
+
+ assert (ps == '\0');
+ state_.push (state {m, ps, s, n, q, *esc, s1, s2});
+ }
+
+ token lexer::
+ next ()
+ {
+ token r;
+
+ switch (state_.top ().mode)
+ {
+ case lexer_mode::command_line:
+ case lexer_mode::first_token:
+ case lexer_mode::second_token:
+ case lexer_mode::variable_line:
+ case lexer_mode::command_expansion:
+ case lexer_mode::here_line_single:
+ case lexer_mode::here_line_double:
+ r = next_line ();
+ break;
+ case lexer_mode::description_line:
+ r = next_description ();
+ break;
+ default:
+ r = base_lexer::next ();
+ break;
+ }
+
+ if (r.qtype != quote_type::unquoted)
+ ++quoted_;
+
+ return r;
+ }
+
+ token lexer::
+ next_line ()
+ {
+ bool sep (skip_spaces ());
+
+ xchar c (get ());
+ uint64_t ln (c.line), cn (c.column);
+
+ if (eos (c))
+ return token (type::eos, sep, ln, cn, token_printer);
+
+ state st (state_.top ()); // Make copy (see first/second_token).
+ lexer_mode m (st.mode);
+
+ auto make_token = [&sep, &m, ln, cn] (type t, string v = string ())
+ {
+ bool q (m == lexer_mode::here_line_double);
+
+ return token (t, move (v), sep,
+ (q ? quote_type::double_ : quote_type::unquoted), q,
+ ln, cn,
+ token_printer);
+ };
+
+ auto make_token_with_modifiers =
+ [&make_token, this] (type t,
+ const char* mods, // To recorgnize.
+ const char* stop = nullptr) // To stop after.
+ {
+ string v;
+ if (mods != nullptr)
+ {
+ for (xchar p (peek ());
+ (strchr (mods, p) != nullptr && // Modifier.
+ strchr (v.c_str (), p) == nullptr); // Not already seen.
+ p = peek ())
+ {
+ get ();
+ v += p;
+
+ if (stop != nullptr && strchr (stop, p) != nullptr)
+ break;
+ }
+ }
+
+ return make_token (t, move (v));
+ };
+
+ // Expire certain modes at the end of the token. Do it early in case
+ // we push any new mode (e.g., double quote).
+ //
+ if (m == lexer_mode::first_token || m == lexer_mode::second_token)
+ state_.pop ();
+
+ // NOTE: remember to update mode() if adding new special characters.
+
+ if (m != lexer_mode::command_expansion)
+ {
+ switch (c)
+ {
+ case '\n':
+ {
+ // Expire variable value mode at the end of the line.
+ //
+ if (m == lexer_mode::variable_line)
+ state_.pop ();
+
+ sep = true; // Treat newline as always separated.
+ return make_token (type::newline);
+ }
+ }
+ }
+
+ if (m != lexer_mode::here_line_single)
+ {
+ switch (c)
+ {
+ // Variable expansion, function call, and evaluation context.
+ //
+ case '$': return make_token (type::dollar);
+ case '(': return make_token (type::lparen);
+ }
+ }
+
+
+ if (m == lexer_mode::variable_line)
+ {
+ switch (c)
+ {
+ // Attributes.
+ //
+ case '[': return make_token (type::lsbrace);
+ case ']': return make_token (type::rsbrace);
+ }
+ }
+
+ // Line separators.
+ //
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token ||
+ m == lexer_mode::variable_line)
+ {
+ switch (c)
+ {
+ case ';': return make_token (type::semi);
+ }
+ }
+
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token)
+ {
+ switch (c)
+ {
+ case ':': return make_token (type::colon);
+ }
+ }
+
+ // Command line operator/separators.
+ //
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token)
+ {
+ switch (c)
+ {
+ // Comparison (==, !=).
+ //
+ case '=':
+ case '!':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return make_token (c == '=' ? type::equal : type::not_equal);
+ }
+ }
+ }
+ }
+
+ // Command operators/separators.
+ //
+ if (m == lexer_mode::command_line ||
+ m == lexer_mode::first_token ||
+ m == lexer_mode::second_token ||
+ m == lexer_mode::command_expansion)
+ {
+ switch (c)
+ {
+ // |, ||
+ //
+ case '|':
+ {
+ if (peek () == '|')
+ {
+ get ();
+ return make_token (type::log_or);
+ }
+ else
+ return make_token (type::pipe);
+ }
+ // &, &&
+ //
+ case '&':
+ {
+ xchar p (peek ());
+
+ if (p == '&')
+ {
+ get ();
+ return make_token (type::log_and);
+ }
+
+ // These modifiers are mutually exclusive so stop after seeing
+ // either one.
+ //
+ return make_token_with_modifiers (type::clean, "!?", "!?");
+ }
+ // <
+ //
+ case '<':
+ {
+ type r (type::in_str);
+ xchar p (peek ());
+
+ if (p == '|' || p == '-' || p == '<')
+ {
+ get ();
+
+ switch (p)
+ {
+ case '|': return make_token (type::in_pass);
+ case '-': return make_token (type::in_null);
+ case '<':
+ {
+ r = type::in_doc;
+ p = peek ();
+
+ if (p == '<')
+ {
+ get ();
+ r = type::in_file;
+ }
+ break;
+ }
+ }
+ }
+
+ // Handle modifiers.
+ //
+ const char* mods (nullptr);
+ switch (r)
+ {
+ case type::in_str:
+ case type::in_doc: mods = ":/"; break;
+ }
+
+ return make_token_with_modifiers (r, mods);
+ }
+ // >
+ //
+ case '>':
+ {
+ type r (type::out_str);
+ xchar p (peek ());
+
+ if (p == '|' || p == '-' || p == '!' || p == '&' ||
+ p == '=' || p == '+' || p == '>')
+ {
+ get ();
+
+ switch (p)
+ {
+ case '|': return make_token (type::out_pass);
+ case '-': return make_token (type::out_null);
+ case '!': return make_token (type::out_trace);
+ case '&': return make_token (type::out_merge);
+ case '=': return make_token (type::out_file_ovr);
+ case '+': return make_token (type::out_file_app);
+ case '>':
+ {
+ r = type::out_doc;
+ p = peek ();
+
+ if (p == '>')
+ {
+ get ();
+ r = type::out_file_cmp;
+ }
+ break;
+ }
+ }
+ }
+
+ // Handle modifiers.
+ //
+ const char* mods (nullptr);
+ const char* stop (nullptr);
+ switch (r)
+ {
+ case type::out_str:
+ case type::out_doc: mods = ":/~"; stop = "~"; break;
+ }
+
+ return make_token_with_modifiers (r, mods, stop);
+ }
+ }
+ }
+
+ // Dot, plus/minus, and left/right curly braces.
+ //
+ if (m == lexer_mode::first_token)
+ {
+ switch (c)
+ {
+ case '.': return make_token (type::dot);
+ case '+': return make_token (type::plus);
+ case '-': return make_token (type::minus);
+ case '{': return make_token (type::lcbrace);
+ case '}': return make_token (type::rcbrace);
+ }
+ }
+
+ // Variable assignment (=, +=, =+).
+ //
+ if (m == lexer_mode::second_token)
+ {
+ switch (c)
+ {
+ case '=':
+ {
+ if (peek () == '+')
+ {
+ get ();
+ return make_token (type::prepend);
+ }
+ else
+ return make_token (type::assign);
+ }
+ case '+':
+ {
+ if (peek () == '=')
+ {
+ get ();
+ return make_token (type::append);
+ }
+ }
+ }
+ }
+
+ // Otherwise it is a word.
+ //
+ unget (c);
+ return word (st, sep);
+ }
+
+ token lexer::
+ next_description ()
+ {
+ xchar c (peek ());
+
+ if (eos (c))
+ fail (c) << "expected newline at the end of description line";
+
+ uint64_t ln (c.line), cn (c.column);
+
+ if (c == '\n')
+ {
+ get ();
+ state_.pop (); // Expire the description mode.
+ return token (type::newline, true, ln, cn, token_printer);
+ }
+
+ string lexeme;
+
+ // For now no line continutions though we could support them.
+ //
+ for (; !eos (c) && c != '\n'; c = peek ())
+ {
+ get ();
+ lexeme += c;
+ }
+
+ return token (move (lexeme),
+ false,
+ quote_type::unquoted, false,
+ ln, cn);
+ }
+
+ token lexer::
+ word (state st, bool sep)
+ {
+ lexer_mode m (st.mode);
+
+ // Customized implementation that handles special variable names ($*,
+ // $N, $~, $@).
+ //
+ if (m != lexer_mode::variable)
+ return base_lexer::word (st, sep);
+
+ xchar c (peek ());
+
+ if (c != '*' && c != '~' && c != '@' && !digit (c))
+ return base_lexer::word (st, sep);
+
+ get ();
+
+ if (digit (c) && digit (peek ()))
+ fail (c) << "multi-digit special variable name";
+
+ state_.pop (); // Expire the variable mode.
+ return token (string (1, c),
+ sep,
+ quote_type::unquoted, false,
+ c.line, c.column);
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx
new file mode 100644
index 0000000..d96e91b
--- /dev/null
+++ b/libbuild2/test/script/lexer.hxx
@@ -0,0 +1,94 @@
+// file : libbuild2/test/script/lexer.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_LEXER_HXX
+#define LIBBUILD2_TEST_SCRIPT_LEXER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/lexer.hxx>
+
+#include <libbuild2/test/script/token.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ struct lexer_mode: build2::lexer_mode
+ {
+ using base_type = build2::lexer_mode;
+
+ enum
+ {
+ command_line = base_type::value_next,
+ first_token, // Expires at the end of the token.
+ second_token, // Expires at the end of the token.
+ variable_line, // Expires at the end of the line.
+ command_expansion,
+ here_line_single,
+ here_line_double,
+ description_line // Expires at the end of the line.
+ };
+
+ lexer_mode () = default;
+ lexer_mode (value_type v): base_type (v) {}
+ lexer_mode (base_type v): base_type (v) {}
+ };
+
+ class lexer: public build2::lexer
+ {
+ public:
+ using base_lexer = build2::lexer;
+ using base_mode = build2::lexer_mode;
+
+ lexer (istream& is,
+ const path& name,
+ lexer_mode m,
+ const char* escapes = nullptr)
+ : base_lexer (is,
+ name,
+ 1 /* line */,
+ nullptr /* escapes */,
+ false /* set_mode */)
+ {
+ mode (m, '\0', escapes);
+ }
+
+ virtual void
+ mode (base_mode,
+ char = '\0',
+ optional<const char*> = nullopt) override;
+
+ // Number of quoted (double or single) tokens since last reset.
+ //
+ size_t
+ quoted () const {return quoted_;}
+
+ void
+ reset_quoted (size_t q) {quoted_ = q;}
+
+ virtual token
+ next () override;
+
+ protected:
+ token
+ next_line ();
+
+ token
+ next_description ();
+
+ virtual token
+ word (state, bool) override;
+
+ protected:
+ size_t quoted_;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_TEST_SCRIPT_LEXER_HXX
diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx
new file mode 100644
index 0000000..5a421b8
--- /dev/null
+++ b/libbuild2/test/script/lexer.test.cxx
@@ -0,0 +1,85 @@
+// file : libbuild2/test/script/lexer.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/test/script/token.hxx>
+#include <libbuild2/test/script/lexer.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ // Usage: argv[0] <lexer-mode>
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ lexer_mode m;
+ {
+ assert (argc == 2);
+ string s (argv[1]);
+
+ if (s == "command-line") m = lexer_mode::command_line;
+ else if (s == "first-token") m = lexer_mode::first_token;
+ else if (s == "second-token") m = lexer_mode::second_token;
+ else if (s == "variable-line") m = lexer_mode::variable_line;
+ else if (s == "command-expansion") m = lexer_mode::command_expansion;
+ else if (s == "here-line-single") m = lexer_mode::here_line_single;
+ else if (s == "here-line-double") m = lexer_mode::here_line_double;
+ else if (s == "description-line") m = lexer_mode::description_line;
+ else if (s == "variable") m = lexer_mode::variable;
+ else assert (false);
+ }
+
+ try
+ {
+ cin.exceptions (istream::failbit | istream::badbit);
+
+ // Some modes auto-expire so we need something underneath.
+ //
+ bool u (m == lexer_mode::first_token ||
+ m == lexer_mode::second_token ||
+ m == lexer_mode::variable_line ||
+ m == lexer_mode::description_line ||
+ m == lexer_mode::variable);
+
+ lexer l (cin, path ("stdin"), u ? lexer_mode::command_line : m);
+ if (u)
+ l.mode (m);
+
+ // No use printing eos since we will either get it or loop forever.
+ //
+ for (token t (l.next ()); t.type != token_type::eos; t = l.next ())
+ {
+ // Print each token on a separate line without quoting operators.
+ //
+ t.printer (cout, t, false);
+ cout << endl;
+ }
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::test::script::main (argc, argv);
+}
diff --git a/libbuild2/test/script/parser+cleanup.test.testscript b/libbuild2/test/script/parser+cleanup.test.testscript
new file mode 100644
index 0000000..321664c
--- /dev/null
+++ b/libbuild2/test/script/parser+cleanup.test.testscript
@@ -0,0 +1,58 @@
+# file : libbuild2/test/script/parser+cleanup.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: always
+:
+$* <<EOI >>EOO
+cmd &file
+EOI
+cmd &file
+EOO
+
+: maybe
+:
+$* <<EOI >>EOO
+cmd &?file
+EOI
+cmd &?file
+EOO
+
+: never
+:
+$* <<EOI >>EOO
+cmd &!file
+EOI
+cmd &!file
+EOO
+
+: empty
+:
+$* <<EOI 2>>EOE != 0
+cmd &""
+EOI
+testscript:1:6: error: empty cleanup path
+EOE
+
+: missed-before
+:
+{
+ : token
+ :
+ : Path missed before command next token
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd & >file
+ EOI
+ testscript:1:7: error: missing cleanup path
+ EOE
+
+ : end
+ : Test path missed before end of command
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd &
+ EOI
+ testscript:1:6: error: missing cleanup path
+ EOE
+}
diff --git a/libbuild2/test/script/parser+command-if.test.testscript b/libbuild2/test/script/parser+command-if.test.testscript
new file mode 100644
index 0000000..7425da2
--- /dev/null
+++ b/libbuild2/test/script/parser+command-if.test.testscript
@@ -0,0 +1,548 @@
+# file : libbuild2/test/script/parser+command-if.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: if
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if true foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? true foo
+ cmd1
+ cmd2
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if false foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? false foo
+ EOO
+
+ : not-true
+ :
+ $* <<EOI >>EOO
+ if! true foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? true foo
+ EOO
+
+ : not-false
+ :
+ $* <<EOI >>EOO
+ if! false foo
+ cmd1
+ cmd2
+ end
+ EOI
+ ? false foo
+ cmd1
+ cmd2
+ EOO
+
+ : without-command
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ cmd
+ end
+ EOI
+ testscript:1:3: error: missing program
+ EOE
+
+ : after-semi
+ :
+ $* -s <<EOI >>EOO
+ cmd1;
+ if true
+ cmd2
+ end
+ EOI
+ {
+ {
+ cmd1
+ ? true
+ cmd2
+ }
+ }
+ EOO
+
+ : setup
+ :
+ $* -s <<EOI >>EOO
+ +if true
+ cmd
+ end
+ EOI
+ {
+ ? true
+ +cmd
+ }
+ EOO
+
+ : tdown
+ :
+ $* -s <<EOI >>EOO
+ -if true
+ cmd
+ end
+ EOI
+ {
+ ? true
+ -cmd
+ }
+ EOO
+}
+
+: elif
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif true
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? true
+ cmd3
+ cmd4
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif false
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? false
+ EOO
+
+ : not-true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif! true
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? true
+ EOO
+
+ : not-false
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ elif! false
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ ? false
+ cmd3
+ cmd4
+ EOO
+
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ elif true
+ cmd
+ end
+ EOI
+ testscript:2:1: error: 'elif' without preceding 'if'
+ EOE
+
+ : not-without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ elif! true
+ cmd
+ end
+ EOI
+ testscript:2:1: error: 'elif!' without preceding 'if'
+ EOE
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ cmd
+ else
+ cmd
+ elif true
+ cmd
+ end
+ EOI
+ testscript:5:1: error: 'elif' after 'else'
+ EOE
+}
+
+: else
+:
+{
+ : true
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ cmd2
+ else
+ cmd3
+ cmd4
+ end
+ EOI
+ ? false
+ cmd3
+ cmd4
+ EOO
+
+ : false
+ :
+ $* <<EOI >>EOO
+ if true
+ cmd1
+ cmd2
+ else
+ cmd3
+ cmd4
+ end
+ EOI
+ ? true
+ cmd1
+ cmd2
+ EOO
+
+ : chain
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd
+ cmd
+ elif false
+ cmd
+ cmd
+ elif false
+ cmd
+ cmd
+ elif true
+ cmd1
+ cmd2
+ elif false
+ cmd
+ cmd
+ else
+ cmd
+ cmd
+ end
+ EOI
+ ? false
+ ? false
+ ? false
+ ? true
+ cmd1
+ cmd2
+ EOO
+
+ : command-after
+ :
+ $* <<EOI 2>>EOE != 0
+ if true
+ cmd
+ else cmd
+ cmd
+ end
+ EOI
+ testscript:3:6: error: expected newline instead of 'cmd'
+ EOE
+
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ else
+ cmd
+ end
+ EOI
+ testscript:2:1: error: 'else' without preceding 'if'
+ EOE
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ cmd
+ else
+ cmd
+ else
+ cmd
+ end
+ EOI
+ testscript:5:1: error: 'else' after 'else'
+ EOE
+}
+
+: end
+{
+ : without-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ end
+ EOI
+ testscript:2:1: error: 'end' without preceding 'if'
+ EOE
+
+ : before
+ {
+ : semi
+ :
+ $* -s <<EOI >>EOO
+ if true
+ cmd1
+ end;
+ cmd2
+ EOI
+ {
+ {
+ ? true
+ cmd1
+ cmd2
+ }
+ }
+ EOO
+
+ : command
+ :
+ $* <<EOI 2>>EOE != 0
+ if true
+ cmd
+ end cmd
+ EOI
+ testscript:3:5: error: expected newline instead of 'cmd'
+ EOE
+
+ : colon
+ :
+ $* -s <<EOI >>EOO
+ if true
+ cmd1
+ cmd2
+ end : test
+ EOI
+ {
+ : id:test
+ {
+ ? true
+ cmd1
+ cmd2
+ }
+ }
+ EOO
+ }
+}
+
+: nested
+:
+{
+ : take
+ :
+ $* <<EOI >>EOO
+ if true
+ cmd1
+ if false
+ cmd
+ elif false
+ if true
+ cmd
+ end
+ else
+ cmd2
+ end
+ cmd3
+ end
+ EOI
+ ? true
+ cmd1
+ ? false
+ ? false
+ cmd2
+ cmd3
+ EOO
+
+ : skip
+ :
+ $* <<EOI >>EOO
+ if false
+ cmd1
+ if false
+ cmd
+ elif false
+ if true
+ cmd
+ end
+ else
+ cmd2
+ end
+ cmd3
+ else
+ cmd
+ end
+ EOI
+ ? false
+ cmd
+ EOO
+}
+
+: contained
+{
+ : semi
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ cmd;
+ cmd
+ end
+ EOI
+ testscript:2:3: error: ';' inside 'if'
+ EOE
+
+ : colon-leading
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ : foo
+ cmd
+ end
+ EOI
+ testscript:2:3: error: description inside 'if'
+ EOE
+
+ : colon-trailing
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ cmd : foo
+ end
+ EOI
+ testscript:2:3: error: description inside 'if'
+ EOE
+
+ : eos
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ EOI
+ testscript:2:1: error: expected closing 'end'
+ EOE
+
+ : scope
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ cmd
+ {
+ }
+ end
+ EOI
+ testscript:3:3: error: expected closing 'end'
+ EOE
+
+ : setup
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ +cmd
+ end
+ EOI
+ testscript:2:3: error: setup command inside 'if'
+ EOE
+
+ : tdown
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ -cmd
+ end
+ EOI
+ testscript:2:3: error: teardown command inside 'if'
+ EOE
+}
+
+: line-index
+:
+$* -l <<EOI >>EOO
+if false
+ cmd
+ if true
+ cmd
+ end
+ cmd
+elif false
+ cmd
+else
+ cmd
+end
+EOI
+? false # 1
+? false # 6
+cmd # 8
+EOO
+
+: var
+:
+$* <<EOI >>EOO
+if true
+ x = foo
+else
+ x = bar
+end;
+cmd $x
+EOI
+? true
+cmd foo
+EOO
+
+: leading-and-trailing-description
+:
+$* <<EOI 2>>EOE != 0
+: foo
+if true
+ cmd
+end : bar
+EOI
+testscript:4:1: error: both leading and trailing descriptions
+EOE
diff --git a/libbuild2/test/script/parser+command-re-parse.test.testscript b/libbuild2/test/script/parser+command-re-parse.test.testscript
new file mode 100644
index 0000000..f5a67f3
--- /dev/null
+++ b/libbuild2/test/script/parser+command-re-parse.test.testscript
@@ -0,0 +1,12 @@
+# file : libbuild2/test/script/parser+command-re-parse.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: double-quote
+:
+$* <<EOI >>EOO
+x = cmd \">-\" "'<-'"
+$x
+EOI
+cmd '>-' '<-'
+EOO
diff --git a/libbuild2/test/script/parser+description.test.testscript b/libbuild2/test/script/parser+description.test.testscript
new file mode 100644
index 0000000..d17a69f
--- /dev/null
+++ b/libbuild2/test/script/parser+description.test.testscript
@@ -0,0 +1,486 @@
+# file : libbuild2/test/script/parser+description.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: id
+:
+{
+ : lead
+ :
+ $* <<EOI >>EOO
+ : foo
+ cmd
+ EOI
+ : id:foo
+ cmd
+ EOO
+
+ : trail
+ :
+ $* <<EOI >>EOO
+ cmd : foo
+ EOI
+ : id:foo
+ cmd
+ EOO
+
+ : dup
+ : Id uniqueness
+ :
+ {
+ : test
+ :
+ {
+ : test
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ cmd
+ : foo
+ cmd
+ EOI
+ testscript:3:1: error: duplicate id foo
+ testscript:1:1: info: previously used here
+ EOE
+
+ : group
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ cmd
+ : foo
+ {
+ cmd
+ cmd
+ }
+ EOI
+ testscript:3:1: error: duplicate id foo
+ testscript:1:1: info: previously used here
+ EOE
+
+ : derived
+ :
+ $* <<EOI 2>>EOE != 0
+ : 3
+ cmd
+ cmd
+ EOI
+ testscript:3:1: error: duplicate id 3
+ testscript:1:1: info: previously used here
+ EOE
+ }
+
+ : group
+ :
+ {
+ : test
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ {
+ cmd
+ cmd
+ }
+ : foo
+ cmd
+ EOI
+ testscript:6:1: error: duplicate id foo
+ testscript:1:1: info: previously used here
+ EOE
+
+ : group
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ {
+ cmd
+ cmd
+ }
+ : foo
+ {
+ cmd
+ cmd
+ }
+ EOI
+ testscript:6:1: error: duplicate id foo
+ testscript:1:1: info: previously used here
+ EOE
+
+ : derived
+ :
+ $* <<EOI 2>>EOE != 0
+ : 3
+ cmd
+ {
+ cmd
+ cmd
+ }
+ EOI
+ testscript:3:1: error: duplicate id 3
+ testscript:1:1: info: previously used here
+ EOE
+ }
+ }
+}
+
+: summary
+{
+ : lead
+ :
+ $* <<EOI >>EOO
+ : foo bar
+ cmd
+ EOI
+ : sm:foo bar
+ cmd
+ EOO
+
+ : trail
+ :
+ $* <<EOI >>EOO
+ cmd: foo bar
+ EOI
+ : sm:foo bar
+ cmd
+ EOO
+
+ : id
+ :
+ $* <<EOI >>EOO
+ : foo-bar
+ : foo bar
+ cmd
+ EOI
+ : id:foo-bar
+ : sm:foo bar
+ cmd
+ EOO
+}
+
+: details
+{
+ : id
+ :
+ $* <<EOI >>EOO
+ : foo-bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOI
+ : id:foo-bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOO
+
+ : summary
+ :
+ {
+ : only
+ :
+ $* <<EOI >>EOO
+ : foo bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOI
+ : sm:foo bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOO
+
+ : assumed
+ :
+ $* <<EOI >>EOO
+ : foo bar
+ : bar baz
+ cmd
+ EOI
+ : foo bar
+ : bar baz
+ cmd
+ EOO
+
+ : id
+ :
+ $* <<EOI >>EOO
+ : foo-bar
+ : foo bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOI
+ : id:foo-bar
+ : sm:foo bar
+ :
+ : foo bar
+ : bar baz
+ cmd
+ EOO
+
+ : id-assumed
+ :
+ $* <<EOI >>EOO
+ : foo-bar
+ : bar baz
+ : baz fox
+ cmd
+ EOI
+ : foo-bar
+ : bar baz
+ : baz fox
+ cmd
+ EOO
+ }
+}
+
+: legal
+:
+: Legal places for description.
+:
+{
+ : var
+ :
+ $* <<EOI >>EOO
+ : foo bar
+ x = y;
+ cmd $x
+ EOI
+ : sm:foo bar
+ cmd y
+ EOO
+}
+
+: illegal
+:
+: Illegal places for description.
+:
+{
+ : eof
+ :
+ $* <": foo" 2>>EOE != 0
+ testscript:2:1: error: description before <end of file>
+ EOE
+
+ : rcbrace
+ :
+ $* <<EOI 2>>EOE != 0
+ {
+ cmd
+ : foo
+ }
+ EOI
+ testscript:4:1: error: description before '}'
+ EOE
+
+ : setup
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ +cmd
+ EOI
+ testscript:2:1: error: description before setup command
+ EOE
+
+ : tdown
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ -cmd
+ EOI
+ testscript:2:1: error: description before teardown command
+ EOE
+
+ : var
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ x = y
+ EOI
+ testscript:2:1: error: description before setup/teardown variable
+ EOE
+
+ : var-if
+ :
+ $* <<EOI 2>>EOE != 0
+ : foo
+ if true
+ x = y
+ end
+ EOI
+ testscript:2:1: error: description before/after setup/teardown variable-if
+ EOE
+
+ : var-if-after
+ :
+ $* <<EOI 2>>EOE != 0
+ if true
+ x = y
+ end : foo
+ EOI
+ testscript:1:1: error: description before/after setup/teardown variable-if
+ EOE
+
+ : test
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1;
+ : foo
+ cmd2
+ EOI
+ testscript:2:1: error: description inside test
+ EOE
+}
+
+: test-scope
+:
+: Interaction with test scope merging.
+:
+{
+ : both
+ :
+ : No merge since both have description.
+ :
+ $* -s -i <<EOI >>EOO
+ : foo
+ {
+ : bar
+ cmd
+ }
+ EOI
+ {
+ : id:foo
+ { # foo
+ : id:bar
+ { # foo/bar
+ cmd
+ }
+ }
+ }
+ EOO
+
+ : test
+ :
+ : No merge since test has description.
+ :
+ $* -s -i <<EOI >>EOO
+ {
+ : foo-bar
+ : foo bar
+ cmd
+ }
+ EOI
+ {
+ { # 1
+ : id:foo-bar
+ : sm:foo bar
+ { # 1/foo-bar
+ cmd
+ }
+ }
+ }
+ EOO
+
+ : group
+ :
+ $* -s -i <<EOI >>EOO
+ : foo-bar
+ : foo bar
+ {
+ cmd
+ }
+ EOI
+ {
+ : id:foo-bar
+ : sm:foo bar
+ { # foo-bar
+ cmd
+ }
+ }
+ EOO
+}
+
+: blanks
+:
+$* <<EOI >>EOO
+:
+:
+: foo bar
+: bar baz
+:
+: baz fox
+:
+:
+cmd
+EOI
+: foo bar
+: bar baz
+:
+: baz fox
+cmd
+EOO
+
+: strip
+:
+$* <<EOI >>EOO
+: foo-bar
+: bar baz
+:
+: baz fox
+: fox biz
+:biz buz
+:
+cmd
+EOI
+: id:foo-bar
+: sm:bar baz
+:
+: baz fox
+: fox biz
+: biz buz
+cmd
+EOO
+
+: trail-compound
+:
+$* <<EOI >>EOO
+cmd1;
+cmd2: foo
+EOI
+: id:foo
+cmd1
+cmd2
+EOO
+
+: empty
+:
+$* <<EOI 2>>EOE != 0
+:
+:
+cmd
+EOI
+testscript:1:1: error: empty description
+EOE
+
+: trail-empty
+:
+$* <<EOI 2>>EOE != 0
+cmd:
+EOI
+testscript:1:4: error: empty description
+EOE
+
+: both
+:
+$* <<EOI 2>>EOE != 0
+: foo
+cmd : bar
+EOI
+testscript:2:1: error: both leading and trailing descriptions
+EOE
diff --git a/libbuild2/test/script/parser+directive.test.testscript b/libbuild2/test/script/parser+directive.test.testscript
new file mode 100644
index 0000000..9d04ce7
--- /dev/null
+++ b/libbuild2/test/script/parser+directive.test.testscript
@@ -0,0 +1,74 @@
+# file : libbuild2/test/script/parser+directive.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: not-directive
+:
+$* <<EOI >>EOO
+x = x
+".include" foo.testscript
+\.include foo.testscript
+EOI
+.include foo.testscript
+.include foo.testscript
+EOO
+
+: expected-name
+:
+$* <<EOI 2>>EOE != 0
+.$
+EOI
+testscript:1:2: error: expected directive name instead of '$'
+EOE
+
+: unknown-name
+:
+$* <<EOI 2>>EOE != 0
+.bogus
+EOI
+testscript:1:2: error: unknown directive 'bogus'
+EOE
+
+: separated
+:
+touch foo.testscript;
+$* <<EOI
+. include foo.testscript
+EOI
+
+: not-separated
+:
+touch foo.testscript;
+$* <<EOI
+x = foo.testscript
+.include$x
+EOI
+
+: var-expansion
+:
+cat <<EOI >="foo-$(build.verson.project).testscript";
+cmd
+EOI
+$* <<EOI >>EOO
+.include "foo-$(build.verson.project).testscript"
+EOI
+cmd
+EOO
+
+: after-semi
+:
+$* <<EOI 2>>EOE != 0
+cmd;
+.include foo.testscript
+EOI
+testscript:2:1: error: directive after ';'
+EOE
+
+: semi-after
+:
+$* <<EOI 2>>EOE != 0
+.include foo.testscript;
+cmd
+EOI
+testscript:1:24: error: ';' after directive
+EOE
diff --git a/libbuild2/test/script/parser+exit.test.testscript b/libbuild2/test/script/parser+exit.test.testscript
new file mode 100644
index 0000000..284e9a7
--- /dev/null
+++ b/libbuild2/test/script/parser+exit.test.testscript
@@ -0,0 +1,27 @@
+# file : libbuild2/test/script/parser+exit.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: eq
+:
+$* <<EOI >>EOO
+cmd == 1
+EOI
+cmd == 1
+EOO
+
+: ne
+:
+$* <<EOI >>EOO
+cmd!=1
+EOI
+cmd != 1
+EOO
+
+: end
+:
+$* <<EOI 2>>EOE != 0
+cmd != 1 <"foo"
+EOI
+testscript:1:10: error: unexpected '<' after command exit status
+EOE
diff --git a/libbuild2/test/script/parser+expansion.test.testscript b/libbuild2/test/script/parser+expansion.test.testscript
new file mode 100644
index 0000000..7ea92f9
--- /dev/null
+++ b/libbuild2/test/script/parser+expansion.test.testscript
@@ -0,0 +1,36 @@
+# file : libbuild2/test/script/parser+expansion.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: quote
+:
+: Make sure everything expanded as strings.
+:
+$* <<EOI >>EOO
+x = dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd $x
+EOI
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+cmd dir/ proj% proj%name proj%proj%dir/type{name name {name}}
+EOO
+
+: unterm-quoted-seq
+:
+$* <<EOI 2>>EOE != 0
+x = "'a bc"
+cmd xy$x
+EOI
+<string>:1:8: error: unterminated single-quoted sequence
+ testscript:2:5: info: while parsing string 'xy'a bc'
+EOE
+
+: invalid-redirect
+:
+$* <<EOI 2>>EOE != 0
+x = "1>&a"
+cmd $x
+EOI
+<string>:1:4: error: stdout merge redirect file descriptor must be 2
+ testscript:2:5: info: while parsing string '1>&a'
+EOE
diff --git a/libbuild2/test/script/parser+here-document.test.testscript b/libbuild2/test/script/parser+here-document.test.testscript
new file mode 100644
index 0000000..00f3fbd
--- /dev/null
+++ b/libbuild2/test/script/parser+here-document.test.testscript
@@ -0,0 +1,213 @@
+# file : libbuild2/test/script/parser+here-document.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: end-marker
+:
+{
+ : missing-newline
+ :
+ $* <'cmd <<' 2>>EOE != 0
+ testscript:1:7: error: expected here-document end marker
+ EOE
+
+ : missing-exit
+ :
+ $* <'cmd << != 0' 2>>EOE != 0
+ testscript:1:8: error: expected here-document end marker
+ EOE
+
+ : missing-empty
+ :
+ $* <'cmd <<""' 2>>EOE != 0
+ testscript:1:7: error: expected here-document end marker
+ EOE
+
+ : unseparated-expansion
+ :
+ $* <'cmd <<FOO$foo' 2>>EOE != 0
+ testscript:1:10: error: here-document end marker must be literal
+ EOE
+
+ : quoted-single-partial
+ :
+ $* <"cmd <<F'O'O" 2>>EOE != 0
+ testscript:1:7: error: partially-quoted here-document end marker
+ EOE
+
+ : quoted-double-partial
+ :
+ $* <'cmd <<"FO"O' 2>>EOE != 0
+ testscript:1:7: error: partially-quoted here-document end marker
+ EOE
+
+ : quoted-mixed
+ :
+ $* <"cmd <<\"FO\"'O'" 2>>EOE != 0
+ testscript:1:7: error: partially-quoted here-document end marker
+ EOE
+
+ : unseparated
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF!=0
+ foo
+ EOF
+ EOI
+ cmd <<EOF != 0
+ foo
+ EOF
+ EOO
+
+ : quoted-single
+ :
+ $* <<EOI >>EOO
+ cmd <<'EOF'
+ foo
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ EOF
+ EOO
+
+ : quoted-double
+ :
+ $* <<EOI >>EOO
+ cmd <<"EOF"
+ foo
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ EOF
+ EOO
+}
+
+: indent
+:
+{
+ : basic
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ foo
+ bar
+ baz
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+ bar
+ baz
+ EOF
+ EOO
+
+ : blank
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ foo
+
+
+ bar
+ EOF
+ EOI
+ cmd <<EOF
+ foo
+
+
+ bar
+ EOF
+ EOO
+
+ : non-ws-prefix
+ :
+ $* <<EOI >>EOO
+ cmd <<EOF
+ x EOF
+ EOF
+ EOI
+ cmd <<EOF
+ x EOF
+ EOF
+ EOO
+
+ : whole-token
+ : Test the case where the indentation is a whole token
+ :
+ $* <<EOI >>EOO
+ x = foo bar
+ cmd <<"EOF"
+ $x
+ EOF
+ EOI
+ cmd <<EOF
+ foo bar
+ EOF
+ EOO
+
+ : long-line
+ : Test the case where the line contains multiple tokens
+ :
+ $* <<EOI >>EOO
+ x = foo
+ cmd <<"EOF"
+ $x bar $x
+ EOF
+ EOI
+ cmd <<EOF
+ foo bar foo
+ EOF
+ EOO
+
+ : unindented
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<EOF
+ bar
+ EOF
+ EOI
+ testscript:2:1: error: unindented here-document line
+ EOE
+}
+
+: blank
+:
+$* <<EOI >>EOO
+cmd <<EOF
+
+foo
+
+bar
+
+EOF
+EOI
+cmd <<EOF
+
+foo
+
+bar
+
+EOF
+EOO
+
+: quote
+:
+: Note: they are still recognized in eval contexts.
+:
+$* <<EOI >>EOO
+cmd <<"EOF"
+'single'
+"double"
+b'o't"h"
+('single' "double")
+EOF
+EOI
+cmd <<EOF
+'single'
+"double"
+b'o't"h"
+single double
+EOF
+EOO
diff --git a/libbuild2/test/script/parser+here-string.test.testscript b/libbuild2/test/script/parser+here-string.test.testscript
new file mode 100644
index 0000000..785951d
--- /dev/null
+++ b/libbuild2/test/script/parser+here-string.test.testscript
@@ -0,0 +1,19 @@
+# file : libbuild2/test/script/parser+here-string.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: empty
+:
+$* <<EOI >>EOO
+cmd <""
+EOI
+cmd <''
+EOO
+
+: empty-nn
+:
+$* <<EOI >>EOO
+cmd <:""
+EOI
+cmd <:''
+EOO
diff --git a/libbuild2/test/script/parser+include.test.testscript b/libbuild2/test/script/parser+include.test.testscript
new file mode 100644
index 0000000..c86b583
--- /dev/null
+++ b/libbuild2/test/script/parser+include.test.testscript
@@ -0,0 +1,104 @@
+# file : libbuild2/test/script/parser+include.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: none
+:
+$* <<EOI
+.include
+.include --once
+EOI
+
+: empty
+:
+touch foo.testscript;
+$* <<EOI
+.include foo.testscript
+.include --once foo.testscript
+EOI
+
+: one
+:
+cat <"cmd" >=foo.testscript;
+$* <<EOI >>EOO
+.include foo.testscript
+EOI
+cmd
+EOO
+
+: multiple
+:
+cat <"cmd foo" >=foo.testscript;
+cat <"cmd bar" >=bar.testscript;
+$* <<EOI >>EOO
+.include foo.testscript bar.testscript
+EOI
+cmd foo
+cmd bar
+EOO
+
+: once
+:
+cat <"cmd" >=foo.testscript;
+$* <<EOI >>EOO
+.include foo.testscript
+x
+.include --once foo.testscript
+.include --once bar/../foo.testscript
+y
+.include ../once/foo.testscript
+EOI
+cmd
+x
+y
+cmd
+EOO
+
+: group-id
+:
+cat <<EOI >=foo.testscript;
+{
+ x = b
+}
+EOI
+$* -s -i <<EOI >>EOO
+x = a
+.include foo.testscript
+EOI
+{
+ { # 2-foo-1
+ }
+}
+EOO
+
+: test-id
+:
+cat <<EOI >=foo.testscript;
+cmd
+EOI
+$* -s -i <<EOI >>EOO
+x = a
+.include foo.testscript
+EOI
+{
+ { # 2-foo-1
+ cmd
+ }
+}
+EOO
+
+: invalid-path
+:
+$* <<EOI 2>>EOE != 0
+.include ""
+EOI
+testscript:1:2: error: invalid testscript include path ''
+EOE
+
+: unable-open
+:
+$* <<EOI 2>>~/EOE/ != 0
+.include foo.testscript
+EOI
+/testscript:1:2: error: unable to read testscript foo.testscript: .+/
+EOE
diff --git a/libbuild2/test/script/parser+pipe-expr.test.testscript b/libbuild2/test/script/parser+pipe-expr.test.testscript
new file mode 100644
index 0000000..8b6b4f9
--- /dev/null
+++ b/libbuild2/test/script/parser+pipe-expr.test.testscript
@@ -0,0 +1,133 @@
+# file : libbuild2/test/script/parser+pipe-expr.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: pipe
+:
+$* <<EOI >>EOO
+cmd1 | cmd2|cmd3
+EOI
+cmd1 | cmd2 | cmd3
+EOO
+
+: log
+:
+$* <<EOI >>EOO
+cmd1 || cmd2&&cmd3
+EOI
+cmd1 || cmd2 && cmd3
+EOO
+
+: pipe-log
+:
+$* <<EOI >>EOO
+cmd1 | cmd2 && cmd3 | cmd4
+EOI
+cmd1 | cmd2 && cmd3 | cmd4
+EOO
+
+: exit
+:
+$* <<EOI >>EOO
+cmd1|cmd2==1&&cmd3!=0|cmd4
+EOI
+cmd1 | cmd2 == 1 && cmd3 != 0 | cmd4
+EOO
+
+: here-doc
+:
+$* <<EOI >>EOO
+cmd1 <<EOI1 | cmd2 >>EOO2 && cmd3 <<EOI3 2>&1 | cmd4 2>>EOE4 >>EOO4
+input
+one
+EOI1
+ouput
+two
+EOO2
+input
+three
+EOI3
+error
+four
+EOE4
+output
+four
+EOO4
+EOI
+cmd1 <<EOI1 | cmd2 >>EOO2 && cmd3 <<EOI3 2>&1 | cmd4 >>EOO4 2>>EOE4
+input
+one
+EOI1
+ouput
+two
+EOO2
+input
+three
+EOI3
+output
+four
+EOO4
+error
+four
+EOE4
+EOO
+
+: leading
+:
+$* <<EOI 2>>EOE != 0
+| cmd
+EOI
+testscript:1:1: error: missing program
+EOE
+
+: trailing
+:
+$* <<EOI 2>>EOE != 0
+cmd &&
+EOI
+testscript:1:7: error: missing program
+EOE
+
+: redirected
+:
+{
+ : input
+ :
+ {
+ : first
+ :
+ $* <<EOI >>EOO
+ cmd1 <foo | cmd2
+ EOI
+ cmd1 <foo | cmd2
+ EOO
+
+ : non-first
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1 | cmd2 <foo
+ EOI
+ testscript:1:13: error: stdin is both piped and redirected
+ EOE
+ }
+
+ : output
+ :
+ {
+ : last
+ :
+ $* <<EOI >>EOO
+ cmd1 | cmd2 >foo
+ EOI
+ cmd1 | cmd2 >foo
+ EOO
+
+ : non-last
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1 >foo | cmd2
+ EOI
+ testscript:1:11: error: stdout is both redirected and piped
+ EOE
+ }
+}
diff --git a/libbuild2/test/script/parser+pre-parse.test.testscript b/libbuild2/test/script/parser+pre-parse.test.testscript
new file mode 100644
index 0000000..f98512a
--- /dev/null
+++ b/libbuild2/test/script/parser+pre-parse.test.testscript
@@ -0,0 +1,23 @@
+# file : libbuild2/test/script/parser+pre-parse.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: attribute
+:
+{
+ : pair
+ :
+ $* <<EOI 2>>EOE != 0
+ x = [foo=bar]
+ EOI
+ testscript:1:5: error: unknown value attribute foo=bar
+ EOE
+
+ : pair-empty
+ :
+ $* <<EOI 2>>EOE != 0
+ x = [foo=]
+ EOI
+ testscript:1:5: error: unknown value attribute foo
+ EOE
+}
diff --git a/libbuild2/test/script/parser+redirect.test.testscript b/libbuild2/test/script/parser+redirect.test.testscript
new file mode 100644
index 0000000..a8691da
--- /dev/null
+++ b/libbuild2/test/script/parser+redirect.test.testscript
@@ -0,0 +1,356 @@
+# file : libbuild2/test/script/parser+redirect.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+# @@ Add tests for redirects other than trace, here-*, file and merge.
+# @@ Does it make sense to split into separate files - one per redirect type?
+#
+
+: trace
+:
+{
+ $* <'cmd >!' >'cmd >!' : out
+ $* <'cmd 2>!' >'cmd 2>!' : err
+}
+
+: str
+:
+{
+ : literal
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd </foo >/bar 2>/baz
+ EOI
+ cmd </foo >/bar 2>/baz
+ EOO
+ }
+
+ : regex
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd >/~%foo% 2>/~%bar%
+ EOI
+ cmd >/~%foo% 2>/~%bar%
+ EOO
+ }
+}
+
+: doc
+:
+{
+ : literal
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd <</EOI_ >/EOO_ 2>/EOE_
+ foo
+ EOI_
+ bar
+ EOO_
+ baz
+ EOE_
+ EOI
+ cmd <</EOI_ >/EOO_ 2>/EOE_
+ foo
+ EOI_
+ bar
+ EOO_
+ baz
+ EOE_
+ EOO
+
+ : sharing
+ :
+ {
+ : in-out
+ :
+ $* <<EOI >>EOO
+ cmd <<:/EOF >>:/EOF
+ foo
+ EOF
+ EOI
+ cmd <<:/EOF >>:/EOF
+ foo
+ EOF
+ EOO
+
+ : different
+ :
+ {
+ : modifiers
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<:/EOF >>:EOF
+ foo
+ EOF
+ EOI
+ testscript:1:16: error: different modifiers for shared here-document 'EOF'
+ EOE
+
+ : quoting
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd <<EOF >>"EOF"
+ foo
+ EOF
+ EOI
+ testscript:1:13: error: different quoting for shared here-document 'EOF'
+ EOE
+ }
+ }
+ }
+
+ : regex
+ :
+ {
+ : portable-path
+ :
+ $* <<EOI >>EOO
+ cmd >/~%EOF% 2>/~%EOE%
+ foo
+ EOF
+ bar
+ EOE
+ EOI
+ cmd >/~%EOF% 2>/~%EOE%
+ foo
+ EOF
+ bar
+ EOE
+ EOO
+
+ : sharing
+ :
+ {
+ : in-out
+ :
+ $* <<EOI >>EOO
+ cmd >>~/EOF/ 2>>~/EOF/
+ foo
+ EOF
+ EOI
+ cmd >>~/EOF/ 2>>~/EOF/
+ foo
+ EOF
+ EOO
+
+ : different
+ :
+ {
+ : introducers
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>~/EOF/ 2>>~%EOF%
+ foo
+ EOF
+ EOI
+ testscript:1:18: error: different introducers for shared here-document regex 'EOF'
+ EOE
+
+ : flags
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>~/EOF/ 2>>~/EOF/i
+ foo
+ EOF
+ EOI
+ testscript:1:18: error: different global flags for shared here-document regex 'EOF'
+ EOE
+ }
+ }
+ }
+}
+
+: file
+:
+{
+ : cmp
+ :
+ $* <<EOI >>EOO
+ cmd 0<<<a 1>>>b 2>>>c
+ EOI
+ cmd <<<a >>>b 2>>>c
+ EOO
+
+ : write
+ :
+ $* <<EOI >>EOO
+ cmd 1>=b 2>+c
+ EOI
+ cmd >=b 2>+c
+ EOO
+
+ : quote
+ :
+ $* <<EOI >>EOO
+ cmd 0<<<"a f" 1>="b f" 2>+"c f"
+ EOI
+ cmd <<<'a f' >='b f' 2>+'c f'
+ EOO
+
+ : in
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <<<
+ EOI
+ testscript:1:8: error: missing stdin file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd <<<""
+ EOI
+ testscript:1:8: error: empty stdin redirect path
+ EOE
+ }
+
+ : out
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >=
+ EOI
+ testscript:1:7: error: missing stdout file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd >=""
+ EOI
+ testscript:1:7: error: empty stdout redirect path
+ EOE
+ }
+
+ : err
+ :
+ {
+ : missed
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>=
+ EOI
+ testscript:1:8: error: missing stderr file
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE !=0
+ cmd 2>=""
+ EOI
+ testscript:1:8: error: empty stderr redirect path
+ EOE
+ }
+}
+
+: merge
+{
+ : out
+ :
+ {
+ : err
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2
+ EOI
+ cmd >&2
+ EOO
+
+ : no-mutual
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2 2>&1 2>a
+ EOI
+ cmd >&2 2>a
+ EOO
+
+ : not-descriptor
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&a
+ EOI
+ testscript:1:8: error: stdout merge redirect file descriptor must be 2
+ EOE
+
+ : self
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&1
+ EOI
+ testscript:1:8: error: stdout merge redirect file descriptor must be 2
+ EOE
+
+ : missed
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&
+ EOI
+ testscript:1:8: error: missing stdout file descriptor
+ EOE
+ }
+
+ : err
+ {
+ : out
+ :
+ $* <<EOI >>EOO
+ cmd 2>&1
+ EOI
+ cmd 2>&1
+ EOO
+
+ : no-mutual
+ :
+ $* <<EOI >>EOO
+ cmd 1>&2 2>&1 >a
+ EOI
+ cmd >a 2>&1
+ EOO
+
+ : not-descriptor
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&a
+ EOI
+ testscript:1:8: error: stderr merge redirect file descriptor must be 1
+ EOE
+
+ : self
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&2
+ EOI
+ testscript:1:8: error: stderr merge redirect file descriptor must be 1
+ EOE
+
+ : missed
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 2>&
+ EOI
+ testscript:1:8: error: missing stderr file descriptor
+ EOE
+ }
+
+ : mutual
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd 1>&2 2>&1
+ EOI
+ testscript:1:14: error: stdout and stderr redirected to each other
+ EOE
+}
diff --git a/libbuild2/test/script/parser+regex.test.testscript b/libbuild2/test/script/parser+regex.test.testscript
new file mode 100644
index 0000000..d5f899a
--- /dev/null
+++ b/libbuild2/test/script/parser+regex.test.testscript
@@ -0,0 +1,223 @@
+# file : libbuild2/test/script/parser+regex.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: here-string
+:
+{
+ : stdout
+ :
+ {
+ : missed
+ :
+ $* <'cmd >~' 2>>EOE != 0
+ testscript:1:7: error: missing stdout here-string regex
+ EOE
+
+ : no-introducer
+ :
+ $* <'cmd >~""' 2>>EOE != 0
+ testscript:1:7: error: no introducer character in stdout regex redirect
+ EOE
+
+ : no-term-introducer
+ :
+ $* <'cmd >~/' 2>>EOE != 0
+ testscript:1:7: error: no closing introducer character in stdout regex redirect
+ EOE
+
+ : portable-path-introducer
+ :
+ $* <'cmd >/~/foo/' 2>>EOE != 0
+ testscript:1:8: error: portable path modifier and '/' introducer in stdout regex redirect
+ EOE
+
+ : empty
+ :
+ $* <'cmd >~//' 2>>EOE != 0
+ testscript:1:7: error: stdout regex redirect is empty
+ EOE
+
+ : no-flags
+ :
+ $* <'cmd >~/fo*/' >'cmd >~/fo*/'
+
+ : idot
+ :
+ $* <'cmd >~/fo*/d' >'cmd >~/fo*/d'
+
+ : icase
+ :
+ $* <'cmd >~/fo*/i' >'cmd >~/fo*/i'
+
+ : invalid-flags1
+ :
+ $* <'cmd >~/foo/z' 2>>EOE != 0
+ testscript:1:7: error: junk at the end of stdout regex redirect
+ EOE
+
+ : invalid-flags2
+ :
+ $* <'cmd >~/foo/iz' 2>>EOE != 0
+ testscript:1:7: error: junk at the end of stdout regex redirect
+ EOE
+
+ : no-newline
+ :
+ $* <'cmd >:~/fo*/' >'cmd >:~/fo*/'
+ }
+
+ : stderr
+ :
+ {
+ : missed
+ :
+ $* <'cmd 2>~' 2>>EOE != 0
+ testscript:1:8: error: missing stderr here-string regex
+ EOE
+
+ : no-introducer
+ :
+ : Note that there is no need to reproduce all the errors as for stdout.
+ : All we need is to make sure that the proper description is passed to
+ : the parse_regex() function.
+ :
+ $* <'cmd 2>~""' 2>>EOE != 0
+ testscript:1:8: error: no introducer character in stderr regex redirect
+ EOE
+ }
+
+ : modifier-last
+ :
+ $* <'cmd >~/x' 2>>EOE != 0
+ testscript:1:7: error: no closing introducer character in stdout regex redirect
+ EOE
+}
+
+: here-doc
+:
+{
+ : stdout
+ :
+ {
+ : missed
+ :
+ $* <'cmd >>~' 2>>EOE != 0
+ testscript:1:8: error: expected here-document regex end marker
+ EOE
+
+ : portable-path-introducer
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>/~/EOO/
+ foo
+ EOO
+ EOI
+ testscript:1:5: error: portable path modifier and '/' introducer in here-document regex end marker
+ EOE
+
+ : unterminated-line-char
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>~/EOO/
+ /
+ EOO
+ EOI
+ testscript:2:1: error: no syntax line characters
+ EOE
+
+ : empty
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd >>:~/EOO/
+ EOO
+ EOI
+ testscript:2:1: error: empty here-document regex
+ EOE
+
+ : no-flags
+ :
+ $* <<EOI >>EOO
+ cmd 2>>~/EOE/
+ foo
+ /?
+ /foo/
+ /foo/*
+ /foo/i
+ /foo/i*
+
+ //
+ //*
+ EOE
+ EOI
+ cmd 2>>~/EOE/
+ foo
+ /?
+ /foo/
+ /foo/*
+ /foo/i
+ /foo/i*
+
+ //
+ //*
+ EOE
+ EOO
+
+ : no-newline
+ :
+ $* <'cmd >:~/fo*/' >'cmd >:~/fo*/'
+ $* <<EOI >>EOO
+ cmd 2>>:~/EOE/
+ foo
+ EOE
+ EOI
+ cmd 2>>:~/EOE/
+ foo
+ EOE
+ EOO
+
+ : end-marker-restore
+ :
+ {
+ : idot
+ :
+ $* <<EOI >>EOO
+ cmd 2>>~/EOE/d
+ foo
+ EOE
+ EOI
+ cmd 2>>~/EOE/d
+ foo
+ EOE
+ EOO
+
+ : icase
+ :
+ $* <<EOI >>EOO
+ cmd 2>>~/EOE/i
+ foo
+ EOE
+ EOI
+ cmd 2>>~/EOE/i
+ foo
+ EOE
+ EOO
+ }
+ }
+
+ : stderr
+ :
+ {
+ : missed
+ :
+ $* <'cmd 2>>~' 2>>EOE != 0
+ testscript:1:9: error: expected here-document regex end marker
+ EOE
+ }
+
+ : modifier-last
+ :
+ $* <'cmd >>~:/FOO/' 2>>EOE != 0
+ testscript:1:8: error: expected here-document regex end marker
+ EOE
+}
diff --git a/libbuild2/test/script/parser+scope-if.test.testscript b/libbuild2/test/script/parser+scope-if.test.testscript
new file mode 100644
index 0000000..aad3f37
--- /dev/null
+++ b/libbuild2/test/script/parser+scope-if.test.testscript
@@ -0,0 +1,554 @@
+# file : libbuild2/test/script/parser+scope-if.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: if
+:
+{
+ : true
+ :
+ $* -s <<EOI >>EOO
+ if true foo
+ {
+ cmd
+ }
+ EOI
+ {
+ ? true foo
+ {
+ cmd
+ }
+ }
+ EOO
+
+ : false
+ :
+ $* -s <<EOI >>EOO
+ if false foo
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false foo
+ }
+ EOO
+
+ : not-true
+ :
+ $* -s <<EOI >>EOO
+ if! true
+ {
+ cmd
+ }
+ EOI
+ {
+ ? true
+ }
+ EOO
+
+ : not-false
+ :
+ $* -s <<EOI >>EOO
+ if! false
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ {
+ cmd
+ }
+ }
+ EOO
+
+ : eos-inside
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ {
+ EOI
+ testscript:3:1: error: expected '}' at the end of the scope
+ EOE
+
+}
+
+: elif
+:
+{
+ : true
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif true
+ {
+ cmd1
+ }
+ EOI
+ {
+ ? false
+ ? true
+ {
+ cmd1
+ }
+ }
+ EOO
+
+ : false
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif false
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ ? false
+ }
+ EOO
+
+ : not-false
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif! false
+ {
+ cmd1
+ }
+ EOI
+ {
+ ? false
+ ? false
+ {
+ cmd1
+ }
+ }
+ EOO
+
+ : not-true
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif! true
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ ? true
+ }
+ EOO
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ {
+ cmd
+ }
+ else
+ {
+ cmd
+ }
+ elif true
+ {
+ cmd
+ }
+ EOI
+ testscript:9:1: error: 'elif' after 'else'
+ EOE
+}
+
+: else
+:
+{
+ : true
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ else
+ {
+ cmd1
+ }
+ EOI
+ {
+ ? false
+ {
+ cmd1
+ }
+ }
+ EOO
+
+ : false
+ :
+ $* -s <<EOI >>EOO
+ if true
+ {
+ cmd1
+ }
+ else
+ {
+ cmd
+ }
+ EOI
+ {
+ ? true
+ {
+ cmd1
+ }
+ }
+ EOO
+
+ : chain
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif false
+ {
+ cmd
+ cmd
+ }
+ elif false
+ {
+ cmd
+ }
+ elif true
+ {
+ cmd1
+ cmd2
+ }
+ elif false
+ {
+ cmd
+ }
+ else
+ {
+ cmd
+ cmd
+ }
+ EOI
+ {
+ ? false
+ ? false
+ ? false
+ ? true
+ {
+ {
+ cmd1
+ }
+ {
+ cmd2
+ }
+ }
+ }
+ EOO
+
+ : scope-expected
+ :
+ $* <<EOI 2>>EOE != 0
+ if
+ {
+ cmd
+ }
+ else
+ cmd
+ EOI
+ testscript:5:1: error: expected scope after 'else'
+ EOE
+
+ : after-else
+ :
+ $* <<EOI 2>>EOE != 0
+ if false
+ {
+ cmd
+ }
+ else
+ {
+ cmd
+ }
+ else
+ {
+ cmd
+ }
+ EOI
+ testscript:9:1: error: 'else' after 'else'
+ EOE
+}
+
+: nested
+:
+{
+ : take
+ :
+ $* -s <<EOI >>EOO
+ if true
+ {
+ cmd1
+ if false
+ {
+ cmd
+ }
+ elif false
+ {
+ if true
+ {
+ cmd
+ }
+ }
+ else
+ {
+ cmd2
+ }
+ cmd3
+ }
+ EOI
+ {
+ ? true
+ {
+ {
+ cmd1
+ }
+ ? false
+ ? false
+ {
+ {
+ cmd2
+ }
+ }
+ {
+ cmd3
+ }
+ }
+ }
+ EOO
+
+ : skip
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd1
+ if false
+ {
+ cmd
+ }
+ elif false
+ {
+ if true
+ {
+ cmd
+ }
+ }
+ else
+ {
+ cmd2
+ }
+ cmd3
+ }
+ else
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ {
+ {
+ cmd
+ }
+ }
+ }
+ EOO
+}
+
+: demote
+:
+{
+ : group
+ : Chain remains a group
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif true
+ {
+ cmd1
+ cmd2
+ }
+ else
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ ? true
+ {
+ {
+ cmd1
+ }
+ {
+ cmd2
+ }
+ }
+ }
+ EOO
+
+ : test
+ : Chain demoted to test
+ :
+ $* -s <<EOI >>EOO
+ if false
+ {
+ cmd
+ }
+ elif true
+ {
+ cmd1
+ }
+ else
+ {
+ cmd
+ }
+ EOI
+ {
+ ? false
+ ? true
+ {
+ cmd1
+ }
+ }
+ EOO
+}
+
+: line-index
+: Make sure command line index spans setup/if/teardown
+:
+$* -s -l <<EOI >>EOO
++setup # 1
+
+if false one # 2
+{
+ cmd
+}
+elif false two # 3
+{
+ cmd
+}
+elif true # 4
+{
+ cmd1
+}
+elif false # 5
+{
+ cmd
+}
+else
+{
+ cmd
+}
+
+if false one # 6
+{
+ cmd
+}
+elif false two # 7
+{
+ cmd
+}
+else
+{
+ cmd2
+}
+
+-tdown # 8
+EOI
+{
+ +setup # 1
+ ? false one # 2
+ ? false two # 3
+ ? true # 4
+ {
+ cmd1 # 0
+ }
+ ? false one # 6
+ ? false two # 7
+ {
+ cmd2 # 0
+ }
+ -tdown # 8
+}
+EOO
+
+: scope-comman-if
+:
+$* -s <<EOI >>EOO
+if true
+{
+ cmd
+}
+if true
+ cmd1
+ cmd2
+end
+EOI
+{
+ ? true
+ {
+ cmd
+ }
+ {
+ ? true
+ cmd1
+ cmd2
+ }
+}
+EOO
+
+: shared-id-desc
+:
+$* -s -i <<EOI >>EOO
+: test summary
+:
+if false
+{
+ cmd
+}
+else
+{
+ cmd1
+}
+EOI
+{
+ ? false
+ : sm:test summary
+ { # 3
+ cmd1
+ }
+}
+EOO
diff --git a/libbuild2/test/script/parser+scope.test.testscript b/libbuild2/test/script/parser+scope.test.testscript
new file mode 100644
index 0000000..bfb1a59
--- /dev/null
+++ b/libbuild2/test/script/parser+scope.test.testscript
@@ -0,0 +1,280 @@
+# file : libbuild2/test/script/parser+scope.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+$* testscript <'cmd $@' >"cmd 1" : id-testscript
+$* foo.testscript <'cmd $@' >"cmd foo/1" : id
+
+: wd-testscript
+:
+$* testscript <'cmd "$~"' >~"%cmd '?.+[/\\\\]test-driver[/\\\\]1'?%"
+
+: wd
+:
+$* foo.testscript <'cmd "$~"' >~"%cmd '?.+[/\\\\]test-driver[/\\\\]foo[/\\\\]1'?%"
+
+: group
+:
+{
+ : empty
+ :
+ $* -s <<EOI
+ {
+ }
+ EOI
+
+ : empty-empty
+ :
+ $* -s <<EOI
+ {
+ {
+ }
+ }
+ EOI
+
+ : non-empty
+ :
+ $* -s <<EOI >>EOO
+ {
+ cmd1
+ cmd2
+ }
+ EOI
+ {
+ {
+ {
+ cmd1
+ }
+ {
+ cmd2
+ }
+ }
+ }
+ EOO
+}
+
+: test
+:
+{
+ : explicit
+ :
+ {
+ : one-level
+ :
+ $* -s -i <<EOI >>EOO
+ {
+ cmd
+ }
+ EOI
+ {
+ { # 1
+ cmd
+ }
+ }
+ EOO
+
+ : nested
+ :
+ $* -s -i <<EOI >>EOO
+ {
+ {
+ cmd
+ }
+ }
+ EOI
+ {
+ { # 1
+ cmd
+ }
+ }
+ EOO
+
+ : var
+ :
+ $* -s -i <<EOI >>EOO
+ {
+ x = abc
+ cmd $x
+ }
+ EOI
+ {
+ { # 1
+ cmd abc
+ }
+ }
+ EOO
+
+ : setup
+ :
+ $* -s -i <<EOI >>EOO
+ {
+ x = abc
+ +setup
+ cmd $x
+ }
+ EOI
+ {
+ { # 1
+ +setup
+ { # 1/4
+ cmd abc
+ }
+ }
+ }
+ EOO
+ }
+
+ : implicit
+ {
+ : one-cmd
+ :
+ $* -s <<EOI >>EOO
+ cmd1
+ EOI
+ {
+ {
+ cmd1
+ }
+ }
+ EOO
+
+ : two-cmd
+ :
+ $* -s <<EOI >>EOO
+ cmd1;
+ cmd2
+ EOI
+ {
+ {
+ cmd1
+ cmd2
+ }
+ }
+ EOO
+
+ : three-cmd
+ :
+ $* -s <<EOI >>EOO
+ cmd1;
+ cmd2;
+ cmd3
+ EOI
+ {
+ {
+ cmd1
+ cmd2
+ cmd3
+ }
+ }
+ EOO
+
+ : var
+ :
+ $* -s <<EOI >>EOO
+ cmd1;
+ x = abc;
+ cmd2 $x
+ EOI
+ {
+ {
+ cmd1
+ cmd2 abc
+ }
+ }
+ EOO
+
+ : var-first
+ :
+ $* -s <<EOI >>EOO
+ x = abc;
+ cmd $x
+ EOI
+ {
+ {
+ cmd abc
+ }
+ }
+ EOO
+
+ : var-setup-tdown
+ :
+ $* -s <<EOI >>EOO
+ x = abc
+ cmd $x
+ y = 123
+ EOI
+ {
+ {
+ cmd abc
+ }
+ }
+ EOO
+
+ : after-tdown
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd1
+ x = abc
+ cmd2
+ EOI
+ testscript:3:1: error: test after teardown
+ testscript:2:1: info: last teardown line appears here
+ EOE
+ }
+}
+
+: expected
+{
+ : newline-lcbrace
+ :
+ $* <:"{x" 2>>EOE != 0
+ testscript:1:2: error: expected newline after '{'
+ EOE
+
+ : rcbrace
+ :
+ $* <"{" 2>>EOE != 0
+ testscript:2:1: error: expected '}' at the end of the scope
+ EOE
+
+ : line-rcbrace
+ :
+ $* <<EOI 2>>EOE != 0
+ {
+ cmd;
+ }
+ EOI
+ testscript:3:1: error: expected another line after ';'
+ EOE
+
+ : newline-rcbrace
+ :
+ $* <<:EOI 2>>EOE != 0
+ {
+ }
+ EOI
+ testscript:2:2: error: expected newline after '}'
+ EOE
+
+ : line-eof
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd;
+ EOI
+ testscript:2:1: error: expected another line after ';'
+ EOE
+
+ : newline-cmd
+ :
+ $* <<:EOI 2>>EOE != 0
+ cmd;
+ EOI
+ testscript:1:5: error: expected newline instead of <end of file>
+ EOE
+
+ : newline-var
+ :
+ $* <:"x = abc;" 2>>EOE != 0
+ testscript:1:9: error: expected newline instead of <end of file>
+ EOE
+}
diff --git a/libbuild2/test/script/parser+setup-teardown.test.testscript b/libbuild2/test/script/parser+setup-teardown.test.testscript
new file mode 100644
index 0000000..5f1418a
--- /dev/null
+++ b/libbuild2/test/script/parser+setup-teardown.test.testscript
@@ -0,0 +1,151 @@
+# file : libbuild2/test/script/parser+setup-teardown.test.testscript
+# copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+# license : MIT; see accompanying LICENSE file
+
+: setup
+:
+{
+ : followed
+ :
+ {
+ : semi
+ :
+ $* <"+cmd;" 2>>EOE != 0
+ testscript:1:5: error: ';' after setup command
+ EOE
+
+ : colon
+ :
+ $* <"+cmd:" 2>>EOE != 0
+ testscript:1:5: error: ':' after setup command
+ EOE
+ }
+
+ : after
+ :
+ {
+ : test
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ +cmd
+ EOI
+ testscript:2:1: error: setup command after tests
+ EOE
+
+ : after-tdownt
+ :
+ $* <<EOI 2>>EOE != 0
+ -cmd
+ +cmd
+ EOI
+ testscript:2:1: error: setup command after teardown
+ EOE
+ }
+
+ : in-test
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd;
+ +cmd
+ EOI
+ testscript:2:1: error: setup command in test
+ EOE
+}
+
+: tdown
+:
+{
+ : followed
+ :
+ {
+ : semi
+ :
+ $* <"-cmd;" 2>>EOE != 0
+ testscript:1:5: error: ';' after teardown command
+ EOE
+
+ : colon
+ :
+ $* <"-cmd:" 2>>EOE != 0
+ testscript:1:5: error: ':' after teardown command
+ EOE
+ }
+
+ : in-test
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd;
+ -cmd
+ EOI
+ testscript:2:1: error: teardown command in test
+ EOE
+}
+
+: var
+:
+{
+ : between-tests
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ x = y
+ cmd
+ EOI
+ testscript:3:1: error: test after teardown
+ testscript:2:1: info: last teardown line appears here
+ EOE
+
+ : between-tests-scope
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ x = y
+ {
+ cmd
+ }
+ EOI
+ testscript:3:1: error: scope after teardown
+ testscript:2:1: info: last teardown line appears here
+ EOE
+
+ : between-tests-command-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ x = y
+ if true
+ cmd
+ end
+ EOI
+ testscript:3:1: error: test after teardown
+ testscript:2:1: info: last teardown line appears here
+ EOE
+
+ : between-tests-scope-if
+ :
+ $* <<EOI 2>>EOE != 0
+ cmd
+ x = y
+ if true
+ {
+ cmd
+ }
+ EOI
+ testscript:3:1: error: scope after teardown
+ testscript:2:1: info: last teardown line appears here
+ EOE
+
+ : between-tests-variable-if
+ :
+ $* <<EOI >>EOO
+ cmd
+ x = y
+ if true
+ y = x
+ end
+ EOI
+ cmd
+ ? true
+ EOO
+}
diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx
new file mode 100644
index 0000000..260bc88
--- /dev/null
+++ b/libbuild2/test/script/parser.cxx
@@ -0,0 +1,3451 @@
+// file : libbuild2/test/script/parser.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/parser.hxx>
+
+#include <sstream>
+
+#include <libbuild2/context.hxx> // sched, keep_going
+
+#include <libbuild2/test/script/lexer.hxx>
+#include <libbuild2/test/script/runner.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ using type = token_type;
+
+ // Return true if the string contains only a single digit characters
+ // (used to detect the special $N variables).
+ //
+ static inline bool
+ digit (const string& s)
+ {
+ return s.size () == 1 && butl::digit (s[0]);
+ }
+
+ //
+ // Pre-parse.
+ //
+
+ void parser::
+ pre_parse (script& s)
+ {
+ const path& p (s.script_target.path ());
+ assert (!p.empty ()); // Should have been assigned.
+
+ try
+ {
+ ifdstream ifs (p);
+ pre_parse (ifs, s);
+ }
+ catch (const io_error& e)
+ {
+ fail << "unable to read testscript " << p << ": " << e << endf;
+ }
+ }
+
+ void parser::
+ pre_parse (istream& is, script& s)
+ {
+ path_ = &*s.paths_.insert (s.script_target.path ()).first;
+
+ pre_parse_ = true;
+
+ lexer l (is, *path_, lexer_mode::command_line);
+ set_lexer (&l);
+
+ id_prefix_.clear ();
+
+ id_map idm;
+ include_set ins;
+
+ script_ = &s;
+ runner_ = nullptr;
+ group_ = script_;
+ id_map_ = &idm;
+ include_set_ = &ins;
+ scope_ = nullptr;
+
+ //@@ PAT TODO: set pbase_?
+
+ // Start location of the implied script group is the beginning of
+ // the file. End location -- end of the file.
+ //
+ group_->start_loc_ = location (path_, 1, 1);
+
+ token t (pre_parse_scope_body ());
+
+ if (t.type != type::eos)
+ fail (t) << "stray " << t;
+
+ group_->end_loc_ = get_location (t);
+ }
+
+ bool parser::
+ pre_parse_demote_group_scope (unique_ptr<scope>& s)
+ {
+ // See if this turned out to be an explicit test scope. An explicit
+ // test scope contains a single test, only variable assignments in
+ // setup and nothing in teardown. Plus only the group can have the
+ // description. Because we apply this recursively, also disqualify
+ // a test scope that has an if-condition.
+ //
+ // If we have a chain, then all the scopes must be demotable. So we
+ // first check if this scope is demotable and if so then recurse for
+ // the next in chain.
+ //
+ group& g (static_cast<group&> (*s));
+
+ auto& sc (g.scopes);
+ auto& su (g.setup_);
+ auto& td (g.tdown_);
+
+ test* t;
+ if (sc.size () == 1 &&
+ (t = dynamic_cast<test*> (sc.back ().get ())) != nullptr &&
+ find_if (
+ su.begin (), su.end (),
+ [] (const line& l) {
+ return l.type != line_type::var;
+ }) == su.end () &&
+
+ td.empty () &&
+ !t->desc &&
+ !t->if_cond_)
+ {
+ if (g.if_chain != nullptr &&
+ !pre_parse_demote_group_scope (g.if_chain))
+ return false;
+
+ // It would have been nice to reuse the test object and only throw
+ // away the group. However, the merged scope has to use id_path and
+ // wd_path of the group. So to keep things simple we are going to
+ // throw away both and create a new test object.
+ //
+ // We always use the group's id since the test cannot have a
+ // user-provided one.
+ //
+ unique_ptr<test> m (new test (g.id_path.leaf ().string (), *group_));
+
+ // Move the description, if-condition, and if-chain.
+ //
+ m->desc = move (g.desc);
+ m->if_cond_ = move (g.if_cond_);
+ m->if_chain = move (g.if_chain);
+
+ // Merge the lines of the group and the test.
+ //
+ if (su.empty ())
+ m->tests_ = move (t->tests_);
+ else
+ {
+ m->tests_ = move (su); // Should come first.
+ m->tests_.insert (m->tests_.end (),
+ make_move_iterator (t->tests_.begin ()),
+ make_move_iterator (t->tests_.end ()));
+ }
+
+ // Use start/end locations of the outer scope.
+ //
+ m->start_loc_ = g.start_loc_;
+ m->end_loc_ = g.end_loc_;
+
+ s = move (m);
+ return true;
+ }
+
+ return false;
+ }
+
+ token parser::
+ pre_parse_scope_body ()
+ {
+ // enter: next token is first token of scope body
+ // leave: rcbrace or eos (returned)
+
+ token t;
+ type tt;
+
+ // Parse lines (including nested scopes) until we see '}' or eos.
+ //
+ for (;;)
+ {
+ // Start lexing each line recognizing leading '.+-{}'.
+ //
+ tt = peek (lexer_mode::first_token);
+
+ // Handle description.
+ //
+ optional<description> d;
+ if (tt == type::colon)
+ d = pre_parse_leading_description (t, tt);
+
+ // Determine the line type by peeking at the first token.
+ //
+ switch (tt)
+ {
+ case type::eos:
+ case type::rcbrace:
+ {
+ next (t, tt);
+
+ if (d)
+ fail (t) << "description before " << t;
+
+ return t;
+ }
+ case type::lcbrace:
+ {
+ // Nested scope.
+ //
+ next (t, tt); // Get '{'.
+ const location sl (get_location (t));
+
+ // First check that we don't have any teardown commands yet.
+ // This will detect things like variable assignments between
+ // scopes.
+ //
+ if (!group_->tdown_.empty ())
+ {
+ location tl (
+ group_->tdown_.back ().tokens.front ().location ());
+
+ fail (sl) << "scope after teardown" <<
+ info (tl) << "last teardown line appears here";
+ }
+
+ // If there is no user-supplied id, use the line number
+ // (prefixed with include id) as the scope id.
+ //
+ const string& id (
+ d && !d->id.empty ()
+ ? d->id
+ : insert_id (id_prefix_ + to_string (sl.line), sl));
+
+ unique_ptr<scope> g (pre_parse_scope_block (t, tt, id));
+ g->desc = move (d);
+
+ pre_parse_demote_group_scope (g);
+ group_->scopes.push_back (move (g));
+ continue;
+ }
+ default:
+ {
+ pre_parse_line (t, tt, d);
+ assert (tt == type::newline);
+ break;
+ }
+ }
+ }
+ }
+
+ unique_ptr<group> parser::
+ pre_parse_scope_block (token& t, type& tt, const string& id)
+ {
+ // enter: lcbrace
+ // leave: newline after rcbrace
+
+ const location sl (get_location (t));
+
+ if (next (t, tt) != type::newline)
+ fail (t) << "expected newline after '{'";
+
+ // Push group.
+ //
+ id_map idm;
+ include_set ins;
+
+ unique_ptr<group> g (new group (id, *group_));
+
+ id_map* om (id_map_);
+ id_map_ = &idm;
+
+ include_set* os (include_set_);
+ include_set_ = &ins;
+
+ group* og (group_);
+ group_ = g.get ();
+
+ // Parse body.
+ //
+ group_->start_loc_ = sl;
+ token e (pre_parse_scope_body ());
+ group_->end_loc_ = get_location (e);
+
+ // Pop group.
+ //
+ group_ = og;
+ include_set_ = os;
+ id_map_ = om;
+
+ if (e.type != type::rcbrace)
+ fail (e) << "expected '}' at the end of the scope";
+
+ if (next (t, tt) != type::newline)
+ fail (t) << "expected newline after '}'";
+
+ return g;
+ }
+
+ // Parse a logical line (as well as scope-if since the only way to
+ // recognize it is to parse the if line).
+ //
+ // If one is true then only parse one line returning an indication of
+ // whether the line ended with a semicolon.
+ //
+ bool parser::
+ pre_parse_line (token& t, type& tt,
+ optional<description>& d,
+ lines* ls,
+ bool one)
+ {
+ // enter: next token is peeked at (type in tt)
+ // leave: newline
+
+ // Note: token is only peeked at.
+ //
+ const location ll (get_location (peeked ()));
+
+ // Determine the line type/start token.
+ //
+ line_type lt;
+ type st (type::eos);
+
+ switch (tt)
+ {
+ case type::dot:
+ {
+ // Directive.
+ //
+ next (t, tt); // Skip dot.
+ next (t, tt); // Get the directive name.
+
+ if (tt != type::word || t.qtype != quote_type::unquoted)
+ fail (t) << "expected directive name instead of " << t;
+
+ // Make sure we are not inside a test (i.e., after semi).
+ //
+ if (ls != nullptr)
+ fail (ll) << "directive after ';'";
+
+ const string& n (t.value);
+
+ if (n == "include")
+ pre_parse_directive (t, tt);
+ else
+ fail (t) << "unknown directive '" << n << "'";
+
+ assert (tt == type::newline);
+ return false;
+ }
+ case type::plus:
+ case type::minus:
+ {
+ // Setup/teardown command.
+ //
+ st = tt;
+
+ next (t, tt); // Start saving tokens from the next one.
+ replay_save ();
+ next (t, tt);
+
+ // See if this is a special command.
+ //
+ lt = line_type::cmd; // Default.
+
+ if (tt == type::word && t.qtype == quote_type::unquoted)
+ {
+ const string& n (t.value);
+
+ if (n == "if") lt = line_type::cmd_if;
+ else if (n == "if!") lt = line_type::cmd_ifn;
+ }
+
+ break;
+ }
+ default:
+ {
+ // Either variable assignment or test command.
+ //
+ replay_save (); // Start saving tokens from the current one.
+ next (t, tt);
+
+ // Decide whether this is a variable assignment or a command.
+ //
+ // It is an assignment if the first token is an unquoted name and
+ // the next token is an assign/append/prepend operator. Assignment
+ // to a computed variable name must use the set builtin.
+ //
+ // Note also thatspecial commands take precedence over variable
+ // assignments.
+ //
+ lt = line_type::cmd; // Default.
+
+ if (tt == type::word && t.qtype == quote_type::unquoted)
+ {
+ const string& n (t.value);
+
+ if (n == "if") lt = line_type::cmd_if;
+ else if (n == "if!") lt = line_type::cmd_ifn;
+ else if (n == "elif") lt = line_type::cmd_elif;
+ else if (n == "elif!") lt = line_type::cmd_elifn;
+ else if (n == "else") lt = line_type::cmd_else;
+ else if (n == "end") lt = line_type::cmd_end;
+ else
+ {
+ // Switch the recognition of leading variable assignments for
+ // the next token. This is safe to do because we know we
+ // cannot be in the quoted mode (since the current token is
+ // not quoted).
+ //
+ type p (peek (lexer_mode::second_token));
+
+ if (p == type::assign ||
+ p == type::prepend ||
+ p == type::append)
+ {
+ lt = line_type::var;
+ st = p;
+ }
+ }
+ }
+
+ break;
+ }
+ }
+
+ // Pre-parse the line keeping track of whether it ends with a semi.
+ //
+ bool semi (false);
+
+ line ln;
+ switch (lt)
+ {
+ case line_type::var:
+ {
+ // Check if we are trying to modify any of the special aliases
+ // ($*, $N, $~, $@).
+ //
+ string& n (t.value);
+
+ if (n == "*" || n == "~" || n == "@" || digit (n))
+ fail (t) << "attempt to set '" << n << "' variable directly";
+
+ // Pre-enter the variables now while we are executing serially.
+ // Once parallel, it becomes a lot harder to do.
+ //
+ ln.var = &script_->var_pool.insert (move (n));
+
+ next (t, tt); // Assignment kind.
+ parse_variable_line (t, tt);
+
+ semi = (tt == type::semi);
+
+ if (tt == type::semi)
+ next (t, tt);
+
+ if (tt != type::newline)
+ fail (t) << "expected newline instead of " << t;
+
+ break;
+ }
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ case line_type::cmd_end:
+ next (t, tt); // Skip to start of command.
+ // Fall through.
+ case line_type::cmd:
+ {
+ pair<command_expr, here_docs> p;
+
+ if (lt != line_type::cmd_else && lt != line_type::cmd_end)
+ p = parse_command_expr (t, tt);
+
+ // Colon and semicolon are only valid in test command lines and
+ // after 'end' in if-else. Note that we still recognize them
+ // lexically, they are just not valid tokens per the grammar.
+ //
+ if (tt != type::newline)
+ {
+ if (lt != line_type::cmd && lt != line_type::cmd_end)
+ fail (t) << "expected newline instead of " << t;
+
+ switch (st)
+ {
+ case type::plus: fail (t) << t << " after setup command" << endf;
+ case type::minus: fail (t) << t << " after teardown command" << endf;
+ }
+ }
+
+ switch (tt)
+ {
+ case type::colon:
+ {
+ if (d)
+ fail (ll) << "both leading and trailing descriptions";
+
+ d = parse_trailing_description (t, tt);
+ break;
+ }
+ case type::semi:
+ {
+ semi = true;
+ next (t, tt); // Get newline.
+ break;
+ }
+ }
+
+ if (tt != type::newline)
+ fail (t) << "expected newline instead of " << t;
+
+ parse_here_documents (t, tt, p);
+ break;
+ }
+ }
+
+ assert (tt == type::newline);
+
+ // Stop saving and get the tokens.
+ //
+ lines ls_data;
+
+ if (ls == nullptr)
+ ls = &ls_data;
+
+ ln.type = lt;
+ ln.tokens = replay_data ();
+ ls->push_back (move (ln));
+
+ if (lt == line_type::cmd_if || lt == line_type::cmd_ifn)
+ {
+ semi = pre_parse_if_else (t, tt, d, *ls);
+
+ // If this turned out to be scope-if, then ls is empty, semi is
+ // false, and none of the below logic applies.
+ //
+ if (ls->empty ())
+ return semi;
+ }
+
+ // Unless we were told where to put it, decide where it actually goes.
+ //
+ if (ls == &ls_data)
+ {
+ // First pre-check variable and variable-if: by themselves (i.e.,
+ // without a trailing semicolon) they are treated as either setup or
+ // teardown without plus/minus. Also handle illegal line types.
+ //
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ case line_type::cmd_end:
+ {
+ fail (ll) << lt << " without preceding 'if'" << endf;
+ }
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ {
+ // See if this is a variable-only command-if.
+ //
+ if (find_if (ls_data.begin (), ls_data.end (),
+ [] (const line& l) {
+ return l.type == line_type::cmd;
+ }) != ls_data.end ())
+ break;
+ }
+ // Fall through.
+ case line_type::var:
+ {
+ // If there is a semicolon after the variable then we assume
+ // it is part of a test (there is no reason to use semicolons
+ // after variables in the group scope). Otherwise -- setup or
+ // teardown.
+ //
+ if (!semi)
+ {
+ if (d)
+ {
+ if (lt == line_type::var)
+ fail (ll) << "description before setup/teardown variable";
+ else
+ fail (ll) << "description before/after setup/teardown "
+ << "variable-if";
+ }
+
+ // If we don't have any nested scopes or teardown commands,
+ // then we assume this is a setup, otherwise -- teardown.
+ //
+ ls = group_->scopes.empty () && group_->tdown_.empty ()
+ ? &group_->setup_
+ : &group_->tdown_;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ // If pre-check didn't change the destination, then it's a test.
+ //
+ if (ls == &ls_data)
+ {
+ switch (st)
+ {
+ // Setup.
+ //
+ case type::plus:
+ {
+ if (d)
+ fail (ll) << "description before setup command";
+
+ if (!group_->scopes.empty ())
+ fail (ll) << "setup command after tests";
+
+ if (!group_->tdown_.empty ())
+ fail (ll) << "setup command after teardown";
+
+ ls = &group_->setup_;
+ break;
+ }
+
+ // Teardown.
+ //
+ case type::minus:
+ {
+ if (d)
+ fail (ll) << "description before teardown command";
+
+ ls = &group_->tdown_;
+ break;
+ }
+
+ // Test command or variable.
+ //
+ default:
+ {
+ // First check that we don't have any teardown commands yet.
+ // This will detect things like variable assignments between
+ // tests.
+ //
+ if (!group_->tdown_.empty ())
+ {
+ location tl (
+ group_->tdown_.back ().tokens.front ().location ());
+
+ fail (ll) << "test after teardown" <<
+ info (tl) << "last teardown line appears here";
+ }
+ break;
+ }
+ }
+ }
+
+ // If the destination changed, then move the data over.
+ //
+ if (ls != &ls_data)
+ ls->insert (ls->end (),
+ make_move_iterator (ls_data.begin ()),
+ make_move_iterator (ls_data.end ()));
+ }
+
+ // If this command ended with a semicolon, then the next one should
+ // go to the same place.
+ //
+ if (semi && !one)
+ {
+ tt = peek (lexer_mode::first_token);
+ const location ll (get_location (peeked ()));
+
+ switch (tt)
+ {
+ case type::colon:
+ fail (ll) << "description inside test" << endf;
+ case type::eos:
+ case type::rcbrace:
+ case type::lcbrace:
+ fail (ll) << "expected another line after ';'" << endf;
+ case type::plus:
+ fail (ll) << "setup command in test" << endf;
+ case type::minus:
+ fail (ll) << "teardown command in test" << endf;
+ default:
+ semi = pre_parse_line (t, tt, d, ls);
+ assert (tt == type::newline); // End of last test line.
+ }
+ }
+
+ // If this is a test then create implicit test scope.
+ //
+ if (ls == &ls_data)
+ {
+ // If there is no user-supplied id, use the line number (prefixed
+ // with include id) as the scope id.
+ //
+ const string& id (
+ d && !d->id.empty ()
+ ? d->id
+ : insert_id (id_prefix_ + to_string (ll.line), ll));
+
+ unique_ptr<test> p (new test (id, *group_));
+
+ p->desc = move (d);
+
+ p->start_loc_ = ll;
+ p->tests_ = move (ls_data);
+ p->end_loc_ = get_location (t);
+
+ group_->scopes.push_back (move (p));
+ }
+
+ return semi;
+ }
+
+ bool parser::
+ pre_parse_if_else (token& t, type& tt,
+ optional<description>& d,
+ lines& ls)
+ {
+ // enter: <newline> (previous line)
+ // leave: <newline>
+
+ tt = peek (lexer_mode::first_token);
+
+ return tt == type::lcbrace
+ ? pre_parse_if_else_scope (t, tt, d, ls)
+ : pre_parse_if_else_command (t, tt, d, ls);
+ }
+
+ bool parser::
+ pre_parse_if_else_scope (token& t, type& tt,
+ optional<description>& d,
+ lines& ls)
+ {
+ // enter: peeked token of next line (lcbrace)
+ // leave: newline
+
+ assert (ls.size () == 1); // The if/if! line.
+
+ // Use if/if! as the entire scope chain location.
+ //
+ const location sl (ls.back ().tokens.front ().location ());
+
+ // First check that we don't have any teardown commands yet. This
+ // will detect things like variable assignments between scopes.
+ //
+ if (!group_->tdown_.empty ())
+ {
+ location tl (
+ group_->tdown_.back ().tokens.front ().location ());
+
+ fail (sl) << "scope after teardown" <<
+ info (tl) << "last teardown line appears here";
+ }
+
+ // If there is no user-supplied id, use the line number (prefixed with
+ // include id) as the scope id. Note that we use the same id for all
+ // scopes in the chain.
+ //
+ const string& id (
+ d && !d->id.empty ()
+ ? d->id
+ : insert_id (id_prefix_ + to_string (sl.line), sl));
+
+ unique_ptr<scope> root;
+
+ // Parse the if-else scope chain.
+ //
+ line_type bt (line_type::cmd_if); // Current block.
+
+ for (unique_ptr<scope>* ps (&root);; ps = &(*ps)->if_chain)
+ {
+ next (t, tt); // Get '{'.
+
+ {
+ unique_ptr<group> g (pre_parse_scope_block (t, tt, id));
+
+ // If-condition.
+ //
+ g->if_cond_ = move (ls.back ());
+ ls.clear ();
+
+ // Description. For now we just duplicate it through the entire
+ // chain.
+ //
+ g->desc = (ps == &root ? d : root->desc);
+
+ *ps = move (g);
+ }
+
+ // See if what comes next is another chain element.
+ //
+ line_type lt (line_type::cmd_end);
+
+ type pt (peek (lexer_mode::first_token));
+ const token& p (peeked ());
+ const location ll (get_location (p));
+
+ if (pt == type::word && p.qtype == quote_type::unquoted)
+ {
+ if (p.value == "elif") lt = line_type::cmd_elif;
+ else if (p.value == "elif!") lt = line_type::cmd_elifn;
+ else if (p.value == "else") lt = line_type::cmd_else;
+ }
+
+ if (lt == line_type::cmd_end)
+ break;
+
+ // Check if-else block sequencing.
+ //
+ if (bt == line_type::cmd_else)
+ {
+ if (lt == line_type::cmd_else ||
+ lt == line_type::cmd_elif ||
+ lt == line_type::cmd_elifn)
+ fail (ll) << lt << " after " << bt;
+ }
+
+ // Parse just the condition line using pre_parse_line() in the "one"
+ // mode and into ls so that it is naturally picked up as if_cond_ on
+ // the next iteration.
+ //
+ optional<description> td;
+ bool semi (pre_parse_line (t, (tt = pt), td, &ls, true));
+ assert (ls.size () == 1 && ls.back ().type == lt);
+ assert (tt == type::newline);
+
+ // For any of these lines trailing semi or description is illegal.
+ //
+ // @@ Not the exact location of semi/colon.
+ //
+ if (semi)
+ fail (ll) << "';' after " << lt;
+
+ if (td)
+ fail (ll) << "description after " << lt;
+
+ // Make sure what comes next is another scope.
+ //
+ tt = peek (lexer_mode::first_token);
+
+ if (tt != type::lcbrace)
+ fail (ll) << "expected scope after " << lt;
+
+ // Update current if-else block.
+ //
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn: bt = line_type::cmd_elif; break;
+ case line_type::cmd_else: bt = line_type::cmd_else; break;
+ default: break;
+ }
+ }
+
+ pre_parse_demote_group_scope (root);
+ group_->scopes.push_back (move (root));
+ return false; // We never end with a semi.
+ }
+
+ bool parser::
+ pre_parse_if_else_command (token& t, type& tt,
+ optional<description>& d,
+ lines& ls)
+ {
+ // enter: peeked first token of next line (type in tt)
+ // leave: newline
+
+ // Parse lines until we see closing 'end'. Nested if-else blocks are
+ // handled recursively.
+ //
+ for (line_type bt (line_type::cmd_if); // Current block.
+ ;
+ tt = peek (lexer_mode::first_token))
+ {
+ const location ll (get_location (peeked ()));
+
+ switch (tt)
+ {
+ case type::colon:
+ fail (ll) << "description inside " << bt << endf;
+ case type::eos:
+ case type::rcbrace:
+ case type::lcbrace:
+ fail (ll) << "expected closing 'end'" << endf;
+ case type::plus:
+ fail (ll) << "setup command inside " << bt << endf;
+ case type::minus:
+ fail (ll) << "teardown command inside " << bt << endf;
+ }
+
+ // Parse one line. Note that this one line can still be multiple
+ // lines in case of if-else. In this case we want to view it as
+ // cmd_if, not cmd_end. Thus remember the start position of the
+ // next logical line.
+ //
+ size_t i (ls.size ());
+
+ optional<description> td;
+ bool semi (pre_parse_line (t, tt, td, &ls, true));
+ assert (tt == type::newline);
+
+ line_type lt (ls[i].type);
+
+ // First take care of 'end'.
+ //
+ if (lt == line_type::cmd_end)
+ {
+ if (td)
+ {
+ if (d)
+ fail (ll) << "both leading and trailing descriptions";
+
+ d = move (td);
+ }
+
+ return semi;
+ }
+
+ // For any other line trailing semi or description is illegal.
+ //
+ // @@ Not the exact location of semi/colon.
+ //
+ if (semi)
+ fail (ll) << "';' inside " << bt;
+
+ if (td)
+ fail (ll) << "description inside " << bt;
+
+ // Check if-else block sequencing.
+ //
+ if (bt == line_type::cmd_else)
+ {
+ if (lt == line_type::cmd_else ||
+ lt == line_type::cmd_elif ||
+ lt == line_type::cmd_elifn)
+ fail (ll) << lt << " after " << bt;
+ }
+
+ // Update current if-else block.
+ //
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn: bt = line_type::cmd_elif; break;
+ case line_type::cmd_else: bt = line_type::cmd_else; break;
+ default: break;
+ }
+ }
+ }
+
+ void parser::
+ pre_parse_directive (token& t, type& tt)
+ {
+ // enter: directive name
+ // leave: newline
+
+ string d (t.value);
+ location l (get_location (t));
+ next (t, tt);
+
+ // Suspend pre-parsing since we want to really parse the line, with
+ // expansion, etc. Also parse the whole line in one go.
+ //
+ names args;
+
+ if (tt != type::newline)
+ {
+ pre_parse_ = false;
+ args = parse_names (t, tt,
+ pattern_mode::expand,
+ false,
+ "directive argument",
+ nullptr);
+ pre_parse_ = true;
+ }
+
+ if (tt != type::newline)
+ fail (t) << t << " after directive";
+
+ if (d == "include")
+ pre_parse_include_line (move (args), move (l));
+ else
+ assert (false); // Unhandled directive.
+ }
+
+ void parser::
+ pre_parse_include_line (names args, location dl)
+ {
+ auto i (args.begin ());
+
+ // Process options.
+ //
+ bool once (false);
+ for (; i != args.end () && i->simple (); ++i)
+ {
+ if (i->value == "--once")
+ once = true;
+ else
+ break;
+ }
+
+ // Process arguments.
+ //
+ auto include = [&dl, once, this] (string n) // throw invalid_path
+ {
+ // It may be tempting to use relative paths in diagnostics but it
+ // most likely will be misguided.
+ //
+ auto enter_path = [this] (string n) -> const path&
+ {
+ path p (move (n));
+
+ if (p.relative ())
+ p = path_->directory () / p;
+
+ p.normalize ();
+
+ return *script_->paths_.insert (move (p)).first;
+ };
+
+ const path& p (enter_path (move (n)));
+
+ if (include_set_->insert (p).second || !once)
+ {
+ try
+ {
+ ifdstream ifs (p);
+ lexer l (ifs, p, lexer_mode::command_line);
+
+ const path* op (path_);
+ path_ = &p;
+
+ lexer* ol (lexer_);
+ set_lexer (&l);
+
+ string oip (id_prefix_);
+ id_prefix_ += to_string (dl.line);
+ id_prefix_ += '-';
+ id_prefix_ += p.leaf ().base ().string ();
+ id_prefix_ += '-';
+
+ token t (pre_parse_scope_body ());
+
+ if (t.type != type::eos)
+ fail (t) << "stray " << t;
+
+ id_prefix_ = oip;
+ set_lexer (ol);
+ path_ = op;
+ }
+ catch (const io_error& e)
+ {
+ fail (dl) << "unable to read testscript " << p << ": " << e;
+ }
+ }
+ };
+
+ for (; i != args.end (); ++i)
+ {
+ name& n (*i);
+
+ try
+ {
+ if (n.simple () && !n.empty ())
+ {
+ include (move (n.value));
+ continue;
+ }
+ }
+ catch (const invalid_path&) {} // Fall through.
+
+ diag_record dr (fail (dl));
+ dr << "invalid testscript include path ";
+ to_stream (dr.os, n, true); // Quote.
+ }
+ }
+
+ description parser::
+ pre_parse_leading_description (token& t, type& tt)
+ {
+ // enter: peeked at colon (type in tt)
+ // leave: peeked at in the first_token mode (type in tt)
+
+ assert (tt == type::colon);
+
+ description r;
+ location loc (get_location (peeked ()));
+
+ string sp; // Strip prefix.
+ size_t sn (0); // Strip prefix length.
+
+ for (size_t ln (1); tt == type::colon; ++ln)
+ {
+ next (t, tt); // Get ':'.
+
+ mode (lexer_mode::description_line);
+ next (t, tt);
+
+ // If it is empty, then we get newline right away.
+ //
+ const string& l (tt == type::word ? t.value : string ());
+
+ if (tt == type::word)
+ next (t, tt); // Get newline.
+
+ assert (tt == type::newline);
+
+ // If this is the first line, then get the "strip prefix", i.e.,
+ // the beginning of the line that contains only whitespaces. If
+ // the subsequent lines start with the same prefix, then we strip
+ // it.
+ //
+ if (ln == 1)
+ {
+ sn = l.find_first_not_of (" \t");
+ sp.assign (l, 0, sn == string::npos ? (sn = 0) : sn);
+ }
+
+ // Apply strip prefix.
+ //
+ size_t i (l.compare (0, sn, sp) == 0 ? sn : 0);
+
+ // Strip trailing whitespaces, as a courtesy to the user.
+ //
+ size_t j (l.find_last_not_of (" \t"));
+ j = j != string::npos ? j + 1 : i;
+
+ size_t n (j - i); // [i, j) is our data.
+
+ if (ln == 1)
+ {
+ // First line. Ignore if it's blank.
+ //
+ if (n == 0)
+ --ln; // Stay as if on the first line.
+ else
+ {
+ // Otherwise, see if it is the id. Failed that we assume it is
+ // the summary until we see the next line.
+ //
+ (l.find_first_of (" \t.", i) >= j ? r.id : r.summary).
+ assign (l, i, n);
+
+ // If this is an id then validate it.
+ //
+ if (!r.id.empty ())
+ {
+ for (char c: r.id)
+ {
+ if (!(alnum (c) || c == '_' || c == '-' || c == '+'))
+ fail (loc) << "illegal character '" << c
+ << "' in test id '" << r.id << "'";
+ }
+ }
+ }
+ }
+ else if (ln == 2)
+ {
+ // If this is a blank then whatever we have in id/summary is good.
+ // Otherwise, if we have id, then assume this is summary until we
+ // see the next line. And if not, then move what we (wrongly)
+ // assumed to be the summary to details.
+ //
+ if (n != 0)
+ {
+ if (!r.id.empty ())
+ r.summary.assign (l, i, n);
+ else
+ {
+ r.details = move (r.summary);
+ r.details += '\n';
+ r.details.append (l, i, n);
+
+ r.summary.clear ();
+ }
+ }
+ }
+ // Don't treat line 3 as special if we have given up on id/summary.
+ //
+ else if (ln == 3 && r.details.empty ())
+ {
+ // If this is a blank and we have id and/or summary, then we are
+ // good. Otherwise, if we have both, then move what we (wrongly)
+ // assumed to be id and summary to details.
+ //
+ if (n != 0)
+ {
+ if (!r.id.empty () && !r.summary.empty ())
+ {
+ r.details = move (r.id);
+ r.details += '\n';
+ r.details += r.summary;
+ r.details += '\n';
+
+ r.id.clear ();
+ r.summary.clear ();
+ }
+
+ r.details.append (l, i, n);
+ }
+ }
+ else
+ {
+ if (!r.details.empty ())
+ r.details += '\n';
+
+ r.details.append (l, i, n);
+ }
+
+ tt = peek (lexer_mode::first_token);
+ }
+
+ // Zap trailing newlines in the details.
+ //
+ size_t p (r.details.find_last_not_of ('\n'));
+ if (p != string::npos && ++p != r.details.size ())
+ r.details.resize (p);
+
+ if (r.empty ())
+ fail (loc) << "empty description";
+
+ // Insert id into the id map if we have one.
+ //
+ if (!r.id.empty ())
+ insert_id (r.id, loc);
+
+ return r;
+ }
+
+ description parser::
+ parse_trailing_description (token& t, type& tt)
+ {
+ // enter: colon
+ // leave: newline
+
+ // Parse one-line trailing description.
+ //
+ description r;
+
+ // @@ Would be nice to omit trailing description from replay.
+ //
+ const location loc (get_location (t));
+
+ mode (lexer_mode::description_line);
+ next (t, tt);
+
+ // If it is empty, then we will get newline right away.
+ //
+ if (tt == type::word)
+ {
+ string l (move (t.value));
+ trim (l); // Strip leading/trailing whitespaces.
+
+ // Decide whether this is id or summary.
+ //
+ (l.find_first_of (" \t") == string::npos ? r.id : r.summary) =
+ move (l);
+
+ next (t, tt); // Get newline.
+ }
+
+ assert (tt == type::newline); // Lexer mode invariant.
+
+ if (r.empty ())
+ fail (loc) << "empty description";
+
+ // Insert id into the id map if we have one.
+ //
+ if (pre_parse_ && !r.id.empty ())
+ insert_id (r.id, loc);
+
+ return r;
+ }
+
+ value parser::
+ parse_variable_line (token& t, type& tt)
+ {
+ // enter: assignment
+ // leave: newline or semi
+
+ // We cannot reuse the value mode since it will recognize { which we
+ // want to treat as a literal.
+ //
+ mode (lexer_mode::variable_line);
+ next (t, tt);
+
+ // Parse value attributes if any. Note that it's ok not to have
+ // anything after the attributes (e.g., foo=[null]).
+ //
+ attributes_push (t, tt, true);
+
+ // @@ PAT: Should we expand patterns? Note that it will only be
+ // simple ones since we have disabled {}. Also, what would be the
+ // pattern base directory?
+ //
+ return tt != type::newline && tt != type::semi
+ ? parse_value (t, tt,
+ pattern_mode::ignore,
+ "variable value",
+ nullptr)
+ : value (names ());
+ }
+
+ command_expr parser::
+ parse_command_line (token& t, type& tt)
+ {
+ // enter: first token of the command line
+ // leave: <newline>
+
+ // Note: this one is only used during execution.
+
+ pair<command_expr, here_docs> p (parse_command_expr (t, tt));
+
+ switch (tt)
+ {
+ case type::colon: parse_trailing_description (t, tt); break;
+ case type::semi: next (t, tt); break; // Get newline.
+ }
+
+ assert (tt == type::newline);
+
+ parse_here_documents (t, tt, p);
+ assert (tt == type::newline);
+
+ return move (p.first);
+ }
+
+ // Parse the regular expression representation (non-empty string value
+ // framed with introducer characters and optionally followed by flag
+ // characters from the {di} set, for example '/foo/id') into
+ // components. Also return end-of-parsing position if requested,
+ // otherwise treat any unparsed characters left as an error.
+ //
+ struct regex_parts
+ {
+ string value;
+ char intro;
+ string flags; // Combination of characters from {di} set.
+
+ // Create a special empty object.
+ //
+ regex_parts (): intro ('\0') {}
+
+ regex_parts (string v, char i, string f)
+ : value (move (v)), intro (i), flags (move (f)) {}
+ };
+
+ static regex_parts
+ parse_regex (const string& s,
+ const location& l,
+ const char* what,
+ size_t* end = nullptr)
+ {
+ if (s.empty ())
+ fail (l) << "no introducer character in " << what;
+
+ size_t p (s.find (s[0], 1)); // Find terminating introducer.
+
+ if (p == string::npos)
+ fail (l) << "no closing introducer character in " << what;
+
+ size_t rn (p - 1); // Regex length.
+ if (rn == 0)
+ fail (l) << what << " is empty";
+
+ // Find end-of-flags position.
+ //
+ size_t fp (++p); // Save flags starting position.
+ for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ;
+
+ // If string end is not reached then report invalid flags, unless
+ // end-of-parsing position is requested (which means regex is just a
+ // prefix).
+ //
+ if (s[p] != '\0' && end == nullptr)
+ fail (l) << "junk at the end of " << what;
+
+ if (end != nullptr)
+ *end = p;
+
+ return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp));
+ }
+
+ pair<command_expr, parser::here_docs> parser::
+ parse_command_expr (token& t, type& tt)
+ {
+ // enter: first token of the command line
+ // leave: <newline>
+
+ command_expr expr;
+
+ // OR-ed to an implied false for the first term.
+ //
+ expr.push_back ({expr_operator::log_or, command_pipe ()});
+
+ command c; // Command being assembled.
+
+ // Make sure the command makes sense.
+ //
+ auto check_command = [&c, this] (const location& l, bool last)
+ {
+ if (c.out.type == redirect_type::merge &&
+ c.err.type == redirect_type::merge)
+ fail (l) << "stdout and stderr redirected to each other";
+
+ if (!last && c.out.type != redirect_type::none)
+ fail (l) << "stdout is both redirected and piped";
+ };
+
+ // Check that the introducer character differs from '/' if the
+ // portable path modifier is specified. Must be called before
+ // parse_regex() (see below) to make sure its diagnostics is
+ // meaningful.
+ //
+ // Note that the portable path modifier assumes '/' to be a valid
+ // regex character and so makes it indistinguishable from the
+ // terminating introducer.
+ //
+ auto check_regex_mod = [this] (const string& mod,
+ const string& re,
+ const location& l,
+ const char* what)
+ {
+ // Handles empty regex properly.
+ //
+ if (mod.find ('/') != string::npos && re[0] == '/')
+ fail (l) << "portable path modifier and '/' introducer in "
+ << what;
+ };
+
+ // Pending positions where the next word should go.
+ //
+ enum class pending
+ {
+ none,
+ program,
+ in_string,
+ in_document,
+ in_file,
+ out_merge,
+ out_string,
+ out_str_regex,
+ out_document,
+ out_doc_regex,
+ out_file,
+ err_merge,
+ err_string,
+ err_str_regex,
+ err_document,
+ err_doc_regex,
+ err_file,
+ clean
+ };
+ pending p (pending::program);
+ string mod; // Modifiers for pending in_* and out_* positions.
+ here_docs hd; // Expected here-documents.
+
+ // Add the next word to either one of the pending positions or to
+ // program arguments by default.
+ //
+ auto add_word = [&c, &p, &mod, &check_regex_mod, this] (
+ string&& w, const location& l)
+ {
+ auto add_merge = [&l, this] (redirect& r, const string& w, int fd)
+ {
+ try
+ {
+ size_t n;
+ if (stoi (w, &n) == fd && n == w.size ())
+ {
+ r.fd = fd;
+ return;
+ }
+ }
+ catch (const exception&) {} // Fall through.
+
+ fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect "
+ << "file descriptor must be " << fd;
+ };
+
+ auto add_here_str = [] (redirect& r, string&& w)
+ {
+ if (r.modifiers.find (':') == string::npos)
+ w += '\n';
+ r.str = move (w);
+ };
+
+ auto add_here_str_regex = [&l, &check_regex_mod] (
+ redirect& r, int fd, string&& w)
+ {
+ const char* what (nullptr);
+ switch (fd)
+ {
+ case 1: what = "stdout regex redirect"; break;
+ case 2: what = "stderr regex redirect"; break;
+ }
+
+ check_regex_mod (r.modifiers, w, l, what);
+
+ regex_parts rp (parse_regex (w, l, what));
+
+ regex_lines& re (r.regex);
+ re.intro = rp.intro;
+
+ re.lines.emplace_back (
+ l.line, l.column, move (rp.value), move (rp.flags));
+
+ // Add final blank line unless suppressed.
+ //
+ // Note that the position is synthetic, but that's ok as we don't
+ // expect any diagnostics to refer this line.
+ //
+ if (r.modifiers.find (':') == string::npos)
+ re.lines.emplace_back (l.line, l.column, string (), false);
+ };
+
+ auto parse_path = [&l, this] (string&& w, const char* what) -> path
+ {
+ try
+ {
+ path p (move (w));
+
+ if (!p.empty ())
+ {
+ p.normalize ();
+ return p;
+ }
+
+ fail (l) << "empty " << what << endf;
+ }
+ catch (const invalid_path& e)
+ {
+ fail (l) << "invalid " << what << " '" << e.path << "'" << endf;
+ }
+ };
+
+ auto add_file = [&parse_path] (redirect& r, int fd, string&& w)
+ {
+ const char* what (nullptr);
+ switch (fd)
+ {
+ case 0: what = "stdin redirect path"; break;
+ case 1: what = "stdout redirect path"; break;
+ case 2: what = "stderr redirect path"; break;
+ }
+
+ r.file.path = parse_path (move (w), what);
+ };
+
+ switch (p)
+ {
+ case pending::none: c.arguments.push_back (move (w)); break;
+ case pending::program:
+ c.program = parse_path (move (w), "program path");
+ break;
+
+ case pending::out_merge: add_merge (c.out, w, 2); break;
+ case pending::err_merge: add_merge (c.err, w, 1); break;
+
+ case pending::in_string: add_here_str (c.in, move (w)); break;
+ case pending::out_string: add_here_str (c.out, move (w)); break;
+ case pending::err_string: add_here_str (c.err, move (w)); break;
+
+ case pending::out_str_regex:
+ {
+ add_here_str_regex (c.out, 1, move (w));
+ break;
+ }
+ case pending::err_str_regex:
+ {
+ add_here_str_regex (c.err, 2, move (w));
+ break;
+ }
+
+ // These are handled specially below.
+ //
+ case pending::in_document:
+ case pending::out_document:
+ case pending::err_document:
+ case pending::out_doc_regex:
+ case pending::err_doc_regex: assert (false); break;
+
+ case pending::in_file: add_file (c.in, 0, move (w)); break;
+ case pending::out_file: add_file (c.out, 1, move (w)); break;
+ case pending::err_file: add_file (c.err, 2, move (w)); break;
+
+ case pending::clean:
+ {
+ cleanup_type t;
+ switch (mod[0]) // Ok, if empty
+ {
+ case '!': t = cleanup_type::never; break;
+ case '?': t = cleanup_type::maybe; break;
+ default: t = cleanup_type::always; break;
+ }
+
+ c.cleanups.push_back (
+ {t, parse_path (move (w), "cleanup path")});
+ break;
+ }
+ }
+
+ p = pending::none;
+ mod.clear ();
+ };
+
+ // Make sure we don't have any pending positions to fill.
+ //
+ auto check_pending = [&p, this] (const location& l)
+ {
+ const char* what (nullptr);
+
+ switch (p)
+ {
+ case pending::none: break;
+ case pending::program: what = "program"; break;
+ case pending::in_string: what = "stdin here-string"; break;
+ case pending::in_document: what = "stdin here-document end"; break;
+ case pending::in_file: what = "stdin file"; break;
+ case pending::out_merge: what = "stdout file descriptor"; break;
+ case pending::out_string: what = "stdout here-string"; break;
+ case pending::out_document: what = "stdout here-document end"; break;
+ case pending::out_file: what = "stdout file"; break;
+ case pending::err_merge: what = "stderr file descriptor"; break;
+ case pending::err_string: what = "stderr here-string"; break;
+ case pending::err_document: what = "stderr here-document end"; break;
+ case pending::err_file: what = "stderr file"; break;
+ case pending::clean: what = "cleanup path"; break;
+
+ case pending::out_str_regex:
+ {
+ what = "stdout here-string regex";
+ break;
+ }
+ case pending::err_str_regex:
+ {
+ what = "stderr here-string regex";
+ break;
+ }
+ case pending::out_doc_regex:
+ {
+ what = "stdout here-document regex end";
+ break;
+ }
+ case pending::err_doc_regex:
+ {
+ what = "stderr here-document regex end";
+ break;
+ }
+ }
+
+ if (what != nullptr)
+ fail (l) << "missing " << what;
+ };
+
+ // Parse the redirect operator.
+ //
+ auto parse_redirect =
+ [&c, &expr, &p, &mod, this] (token& t, const location& l)
+ {
+ // Our semantics is the last redirect seen takes effect.
+ //
+ assert (p == pending::none && mod.empty ());
+
+ // See if we have the file descriptor.
+ //
+ unsigned long fd (3);
+ if (!t.separated)
+ {
+ if (c.arguments.empty ())
+ fail (l) << "missing redirect file descriptor";
+
+ const string& s (c.arguments.back ());
+
+ try
+ {
+ size_t n;
+ fd = stoul (s, &n);
+
+ if (n != s.size () || fd > 2)
+ throw invalid_argument (string ());
+ }
+ catch (const exception&)
+ {
+ fail (l) << "invalid redirect file descriptor '" << s << "'";
+ }
+
+ c.arguments.pop_back ();
+ }
+
+ type tt (t.type);
+
+ // Validate/set default file descriptor.
+ //
+ switch (tt)
+ {
+ case type::in_pass:
+ case type::in_null:
+ case type::in_str:
+ case type::in_doc:
+ case type::in_file:
+ {
+ if ((fd = fd == 3 ? 0 : fd) != 0)
+ fail (l) << "invalid in redirect file descriptor " << fd;
+
+ if (!expr.back ().pipe.empty ())
+ fail (l) << "stdin is both piped and redirected";
+
+ break;
+ }
+ case type::out_pass:
+ case type::out_null:
+ case type::out_trace:
+ case type::out_merge:
+ case type::out_str:
+ case type::out_doc:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ if ((fd = fd == 3 ? 1 : fd) == 0)
+ fail (l) << "invalid out redirect file descriptor " << fd;
+
+ break;
+ }
+ }
+
+ mod = move (t.value);
+
+ redirect_type rt (redirect_type::none);
+ switch (tt)
+ {
+ case type::in_pass:
+ case type::out_pass: rt = redirect_type::pass; break;
+
+ case type::in_null:
+ case type::out_null: rt = redirect_type::null; break;
+
+ case type::out_trace: rt = redirect_type::trace; break;
+
+ case type::out_merge: rt = redirect_type::merge; break;
+
+ case type::in_str:
+ case type::out_str:
+ {
+ bool re (mod.find ('~') != string::npos);
+ assert (tt == type::out_str || !re);
+
+ rt = re
+ ? redirect_type::here_str_regex
+ : redirect_type::here_str_literal;
+
+ break;
+ }
+
+ case type::in_doc:
+ case type::out_doc:
+ {
+ bool re (mod.find ('~') != string::npos);
+ assert (tt == type::out_doc || !re);
+
+ rt = re
+ ? redirect_type::here_doc_regex
+ : redirect_type::here_doc_literal;
+
+ break;
+ }
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app: rt = redirect_type::file; break;
+ }
+
+ redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err);
+ r = redirect (rt);
+
+ // Don't move as still may be used for pending here-document end
+ // marker processing.
+ //
+ r.modifiers = mod;
+
+ switch (rt)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ break;
+ case redirect_type::merge:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_merge; break;
+ case 2: p = pending::err_merge; break;
+ }
+ break;
+ case redirect_type::here_str_literal:
+ switch (fd)
+ {
+ case 0: p = pending::in_string; break;
+ case 1: p = pending::out_string; break;
+ case 2: p = pending::err_string; break;
+ }
+ break;
+ case redirect_type::here_str_regex:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_str_regex; break;
+ case 2: p = pending::err_str_regex; break;
+ }
+ break;
+ case redirect_type::here_doc_literal:
+ switch (fd)
+ {
+ case 0: p = pending::in_document; break;
+ case 1: p = pending::out_document; break;
+ case 2: p = pending::err_document; break;
+ }
+ break;
+ case redirect_type::here_doc_regex:
+ switch (fd)
+ {
+ case 0: assert (false); break;
+ case 1: p = pending::out_doc_regex; break;
+ case 2: p = pending::err_doc_regex; break;
+ }
+ break;
+ case redirect_type::file:
+ switch (fd)
+ {
+ case 0: p = pending::in_file; break;
+ case 1: p = pending::out_file; break;
+ case 2: p = pending::err_file; break;
+ }
+
+ // Also sets for stdin, but this is harmless.
+ //
+ r.file.mode = tt == type::out_file_ovr
+ ? redirect_fmode::overwrite
+ : (tt == type::out_file_app
+ ? redirect_fmode::append
+ : redirect_fmode::compare);
+
+ break;
+
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ };
+
+ // Set pending cleanup type.
+ //
+ auto parse_clean = [&p, &mod] (token& t)
+ {
+ p = pending::clean;
+ mod = move (t.value);
+ };
+
+ const location ll (get_location (t)); // Line location.
+
+ // Keep parsing chunks of the command line until we see one of the
+ // "terminators" (newline, semicolon, exit status comparison, etc).
+ //
+ location l (ll);
+ names ns; // Reuse to reduce allocations.
+
+ for (bool done (false); !done; l = get_location (t))
+ {
+ switch (tt)
+ {
+ case type::semi:
+ case type::colon:
+ case type::newline:
+ {
+ done = true;
+ break;
+ }
+
+ case type::equal:
+ case type::not_equal:
+ {
+ if (!pre_parse_)
+ check_pending (l);
+
+ c.exit = parse_command_exit (t, tt);
+
+ // Only a limited set of things can appear after the exit status
+ // so we check this here.
+ //
+ switch (tt)
+ {
+ case type::semi:
+ case type::colon:
+ case type::newline:
+
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ break;
+ default:
+ fail (t) << "unexpected " << t << " after command exit status";
+ }
+
+ break;
+ }
+
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::in_doc:
+ case type::out_str:
+ case type::out_doc:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+
+ case type::clean:
+ {
+ if (pre_parse_)
+ {
+ // The only things we need to handle here are the here-document
+ // and here-document regex end markers since we need to know
+ // how many of them to pre-parse after the command.
+ //
+ switch (tt)
+ {
+ case type::in_doc:
+ case type::out_doc:
+ mod = move (t.value);
+
+ bool re (mod.find ('~') != string::npos);
+ const char* what (re
+ ? "here-document regex end marker"
+ : "here-document end marker");
+
+ // We require the end marker to be a literal, unquoted word.
+ // In particularm, we don't allow quoted because of cases
+ // like foo"$bar" (where we will see word 'foo').
+ //
+ next (t, tt);
+
+ // We require the end marker to be an unquoted or completely
+ // quoted word. The complete quoting becomes important for
+ // cases like foo"$bar" (where we will see word 'foo').
+ //
+ // For good measure we could have also required it to be
+ // separated from the following token, but out grammar
+ // allows one to write >>EOO;. The problematic sequence
+ // would be >>FOO$bar -- on reparse it will be expanded
+ // as a single word.
+ //
+ if (tt != type::word || t.value.empty ())
+ fail (t) << "expected " << what;
+
+ peek ();
+ const token& p (peeked ());
+ if (!p.separated)
+ {
+ switch (p.type)
+ {
+ case type::dollar:
+ case type::lparen:
+ fail (p) << what << " must be literal";
+ }
+ }
+
+ quote_type qt (t.qtype);
+ switch (qt)
+ {
+ case quote_type::unquoted:
+ qt = quote_type::single; // Treat as single-quoted.
+ break;
+ case quote_type::single:
+ case quote_type::double_:
+ if (t.qcomp)
+ break;
+ // Fall through.
+ case quote_type::mixed:
+ fail (t) << "partially-quoted " << what;
+ }
+
+ regex_parts r;
+ string end (move (t.value));
+
+ if (re)
+ {
+ check_regex_mod (mod, end, l, what);
+
+ r = parse_regex (end, l, what);
+ end = move (r.value); // The "cleared" end marker.
+ }
+
+ bool literal (qt == quote_type::single);
+ bool shared (false);
+
+ for (const auto& d: hd)
+ {
+ if (d.end == end)
+ {
+ auto check = [&t, &end, &re, this] (bool c,
+ const char* what)
+ {
+ if (!c)
+ fail (t) << "different " << what
+ << " for shared here-document "
+ << (re ? "regex '" : "'") << end << "'";
+ };
+
+ check (d.modifiers == mod, "modifiers");
+ check (d.literal == literal, "quoting");
+
+ if (re)
+ {
+ check (d.regex == r.intro, "introducers");
+ check (d.regex_flags == r.flags, "global flags");
+ }
+
+ shared = true;
+ break;
+ }
+ }
+
+ if (!shared)
+ hd.push_back (
+ here_doc {
+ {},
+ move (end),
+ literal,
+ move (mod),
+ r.intro, move (r.flags)});
+
+ break;
+ }
+
+ next (t, tt);
+ break;
+ }
+
+ // If this is one of the operators/separators, check that we
+ // don't have any pending locations to be filled.
+ //
+ check_pending (l);
+
+ // Note: there is another one in the inner loop below.
+ //
+ switch (tt)
+ {
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ {
+ // Check that the previous command makes sense.
+ //
+ check_command (l, tt != type::pipe);
+ expr.back ().pipe.push_back (move (c));
+
+ c = command ();
+ p = pending::program;
+
+ if (tt != type::pipe)
+ {
+ expr_operator o (tt == type::log_or
+ ? expr_operator::log_or
+ : expr_operator::log_and);
+ expr.push_back ({o, command_pipe ()});
+ }
+
+ break;
+ }
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::in_doc:
+ case type::out_str:
+ case type::out_doc:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ parse_redirect (t, l);
+ break;
+ }
+
+ case type::clean:
+ {
+ parse_clean (t);
+ break;
+ }
+
+ default: assert (false); break;
+ }
+
+ next (t, tt);
+ break;
+ }
+ default:
+ {
+ // Here-document end markers are literal (we verified that above
+ // during pre-parsing) and we need to know whether they were
+ // quoted. So handle this case specially.
+ //
+ {
+ int fd;
+ switch (p)
+ {
+ case pending::in_document: fd = 0; break;
+ case pending::out_document:
+ case pending::out_doc_regex: fd = 1; break;
+ case pending::err_document:
+ case pending::err_doc_regex: fd = 2; break;
+ default: fd = -1; break;
+ }
+
+ if (fd != -1)
+ {
+ here_redirect rd {
+ expr.size () - 1, expr.back ().pipe.size (), fd};
+
+ string end (move (t.value));
+
+ regex_parts r;
+
+ if (p == pending::out_doc_regex ||
+ p == pending::err_doc_regex)
+ {
+ // We can't fail here as we already parsed all the end
+ // markers during pre-parsing stage, and so no need in the
+ // description.
+ //
+ r = parse_regex (end, l, "");
+ end = move (r.value); // The "cleared" end marker.
+ }
+
+ bool shared (false);
+ for (auto& d: hd)
+ {
+ // No need to check that redirects that share here-document
+ // have the same modifiers, etc. That have been done during
+ // pre-parsing.
+ //
+ if (d.end == end)
+ {
+ d.redirects.emplace_back (rd);
+ shared = true;
+ break;
+ }
+ }
+
+ if (!shared)
+ hd.push_back (
+ here_doc {
+ {rd},
+ move (end),
+ (t.qtype == quote_type::unquoted ||
+ t.qtype == quote_type::single),
+ move (mod),
+ r.intro, move (r.flags)});
+
+ p = pending::none;
+ mod.clear ();
+
+ next (t, tt);
+ break;
+ }
+ }
+
+ // Parse the next chunk as simple names to get expansion, etc.
+ // Note that we do it in the chunking mode to detect whether
+ // anything in each chunk is quoted.
+ //
+ // @@ PAT: should we support pattern expansion? This is even
+ // fuzzier than the variable case above. Though this is the
+ // shell semantics. Think what happens when we do rm *.txt?
+ //
+ reset_quoted (t);
+ parse_names (t, tt,
+ ns,
+ pattern_mode::ignore,
+ true,
+ "command line",
+ nullptr);
+
+ if (pre_parse_) // Nothing else to do if we are pre-parsing.
+ break;
+
+ // Process what we got. Determine whether anything inside was
+ // quoted (note that the current token is "next" and is not part
+ // of this).
+ //
+ bool q ((quoted () -
+ (t.qtype != quote_type::unquoted ? 1 : 0)) != 0);
+
+ for (name& n: ns)
+ {
+ string s;
+
+ try
+ {
+ s = value_traits<string>::convert (move (n), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ diag_record dr (fail (l));
+ dr << "invalid string value ";
+ to_stream (dr.os, n, true); // Quote.
+ }
+
+ // If it is a quoted chunk, then we add the word as is.
+ // Otherwise we re-lex it. But if the word doesn't contain any
+ // interesting characters (operators plus quotes/escapes),
+ // then no need to re-lex.
+ //
+ // NOTE: update quoting (script.cxx:to_stream_q()) if adding
+ // any new characters.
+ //
+ if (q || s.find_first_of ("|&<>\'\"\\") == string::npos)
+ add_word (move (s), l);
+ else
+ {
+ // If the chunk re-parsing results in error, our diagnostics
+ // will look like this:
+ //
+ // <string>:1:4: error: stdout merge redirect file descriptor must be 2
+ // testscript:2:5: info: while parsing string '1>&a'
+ //
+ auto df = make_diag_frame (
+ [s, &l](const diag_record& dr)
+ {
+ dr << info (l) << "while parsing string '" << s << "'";
+ });
+
+ // When re-lexing we do "effective escaping" and only for
+ // ['"\] (quotes plus the backslash itself). In particular,
+ // there is no way to escape redirects, operators, etc. The
+ // idea is to prefer quoting except for passing literal
+ // quotes, for example:
+ //
+ // args = \"&foo\"
+ // cmd $args # cmd &foo
+ //
+ // args = 'x=\"foo bar\"'
+ // cmd $args # cmd x="foo bar"
+ //
+
+ path name ("<string>");
+ istringstream is (s);
+ lexer lex (is, name,
+ lexer_mode::command_expansion,
+ "\'\"\\");
+
+ // Treat the first "sub-token" as always separated from what
+ // we saw earlier.
+ //
+ // Note that this is not "our" token so we cannot do
+ // fail(t). Rather we should do fail(l).
+ //
+ token t (lex.next ());
+ location l (build2::get_location (t, name));
+ t.separated = true;
+
+ string w;
+ bool f (t.type == type::eos); // If the whole thing is empty.
+
+ for (; t.type != type::eos; t = lex.next ())
+ {
+ type tt (t.type);
+ l = build2::get_location (t, name);
+
+ // Re-lexing double-quotes will recognize $, ( inside as
+ // tokens so we have to reverse them back. Since we don't
+ // treat spaces as separators we can be sure we will get
+ // it right.
+ //
+ switch (tt)
+ {
+ case type::dollar: w += '$'; continue;
+ case type::lparen: w += '('; continue;
+ }
+
+ // Retire the current word. We need to distinguish between
+ // empty and non-existent (e.g., > vs >"").
+ //
+ if (!w.empty () || f)
+ {
+ add_word (move (w), l);
+ f = false;
+ }
+
+ if (tt == type::word)
+ {
+ w = move (t.value);
+ f = true;
+ continue;
+ }
+
+ // If this is one of the operators/separators, check that
+ // we don't have any pending locations to be filled.
+ //
+ check_pending (l);
+
+ // Note: there is another one in the outer loop above.
+ //
+ switch (tt)
+ {
+ case type::pipe:
+ case type::log_or:
+ case type::log_and:
+ {
+ // Check that the previous command makes sense.
+ //
+ check_command (l, tt != type::pipe);
+ expr.back ().pipe.push_back (move (c));
+
+ c = command ();
+ p = pending::program;
+
+ if (tt != type::pipe)
+ {
+ expr_operator o (tt == type::log_or
+ ? expr_operator::log_or
+ : expr_operator::log_and);
+ expr.push_back ({o, command_pipe ()});
+ }
+
+ break;
+ }
+
+ case type::in_pass:
+ case type::out_pass:
+
+ case type::in_null:
+ case type::out_null:
+
+ case type::out_trace:
+
+ case type::out_merge:
+
+ case type::in_str:
+ case type::out_str:
+
+ case type::in_file:
+ case type::out_file_cmp:
+ case type::out_file_ovr:
+ case type::out_file_app:
+ {
+ parse_redirect (t, l);
+ break;
+ }
+
+ case type::clean:
+ {
+ parse_clean (t);
+ break;
+ }
+
+ case type::in_doc:
+ case type::out_doc:
+ {
+ fail (l) << "here-document redirect in expansion";
+ break;
+ }
+ }
+ }
+
+ // Don't forget the last word.
+ //
+ if (!w.empty () || f)
+ add_word (move (w), l);
+ }
+ }
+
+ ns.clear ();
+ break;
+ }
+ }
+ }
+
+ if (!pre_parse_)
+ {
+ // Verify we don't have anything pending to be filled and the
+ // command makes sense.
+ //
+ check_pending (l);
+ check_command (l, true);
+
+ expr.back ().pipe.push_back (move (c));
+ }
+
+ return make_pair (move (expr), move (hd));
+ }
+
+ command_exit parser::
+ parse_command_exit (token& t, type& tt)
+ {
+ // enter: equal/not_equal
+ // leave: token after exit status (one parse_names() chunk)
+
+ exit_comparison comp (tt == type::equal
+ ? exit_comparison::eq
+ : exit_comparison::ne);
+
+ // The next chunk should be the exit status.
+ //
+ next (t, tt);
+ location l (get_location (t));
+ names ns (parse_names (t, tt,
+ pattern_mode::ignore,
+ true,
+ "exit status",
+ nullptr));
+ unsigned long es (256);
+
+ if (!pre_parse_)
+ {
+ try
+ {
+ if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ())
+ es = stoul (ns[0].value);
+ }
+ catch (const exception&) {} // Fall through.
+
+ if (es > 255)
+ {
+ diag_record dr;
+
+ dr << fail (l) << "expected exit status instead of ";
+ to_stream (dr.os, ns, true); // Quote.
+
+ dr << info << "exit status is an unsigned integer less than 256";
+ }
+ }
+
+ return command_exit {comp, static_cast<uint8_t> (es)};
+ }
+
+ void parser::
+ parse_here_documents (token& t, type& tt,
+ pair<command_expr, here_docs>& p)
+ {
+ // enter: newline
+ // leave: newline
+
+ // Parse here-document fragments in the order they were mentioned on
+ // the command line.
+ //
+ for (here_doc& h: p.second)
+ {
+ // Switch to the here-line mode which is like single/double-quoted
+ // string but recognized the newline as a separator.
+ //
+ mode (h.literal
+ ? lexer_mode::here_line_single
+ : lexer_mode::here_line_double);
+ next (t, tt);
+
+ parsed_doc v (
+ parse_here_document (t, tt, h.end, h.modifiers, h.regex));
+
+ if (!pre_parse_)
+ {
+ assert (!h.redirects.empty ());
+ auto i (h.redirects.cbegin ());
+
+ command& c (p.first[i->expr].pipe[i->pipe]);
+ redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err);
+
+ if (v.re)
+ {
+ r.regex = move (v.regex);
+ r.regex.flags = move (h.regex_flags);
+ }
+ else
+ r.str = move (v.str);
+
+ r.end = move (h.end);
+ r.end_line = v.end_line;
+ r.end_column = v.end_column;
+
+ // Note that our references cannot be invalidated because the
+ // command_expr/command-pipe vectors already contain all their
+ // elements.
+ //
+ for (++i; i != h.redirects.cend (); ++i)
+ {
+ command& c (p.first[i->expr].pipe[i->pipe]);
+
+ (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) =
+ redirect (redirect_type::here_doc_ref, r);
+ }
+ }
+
+ expire_mode ();
+ }
+ }
+
+ parser::parsed_doc parser::
+ parse_here_document (token& t, type& tt,
+ const string& em,
+ const string& mod,
+ char re)
+ {
+ // enter: first token on first line
+ // leave: newline (after end marker)
+
+ // String literal. Note that when decide if to terminate the previously
+ // added line with a newline, we need to distinguish a yet empty result
+ // and the one that has a single blank line added.
+ //
+ optional<string> rs;
+
+ regex_lines rre;
+
+ // Here-documents can be indented. The leading whitespaces of the end
+ // marker line (called strip prefix) determine the indentation. Every
+ // other line in the here-document should start with this prefix which
+ // is automatically stripped. The only exception is a blank line.
+ //
+ // The fact that the strip prefix is only known at the end, after
+ // seeing all the lines, is rather inconvenient. As a result, the way
+ // we implement this is a bit hackish (though there is also something
+ // elegant about it): at the end of the pre-parse stage we are going
+ // re-examine the sequence of tokens that comprise this here-document
+ // and "fix up" the first token of each line by stripping the prefix.
+ //
+ string sp;
+
+ // Remember the position of the first token in this here-document.
+ //
+ size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0);
+
+ // We will use the location of the first token on the line for the
+ // regex diagnostics. At the end of the loop it will point to the
+ // beginning of the end marker.
+ //
+ location l;
+
+ while (tt != type::eos)
+ {
+ l = get_location (t);
+
+ // Check if this is the end marker. For starters, it should be a
+ // single, unquoted word followed by a newline.
+ //
+ if (tt == type::word &&
+ t.qtype == quote_type::unquoted &&
+ peek () == type::newline)
+ {
+ const string& v (t.value);
+
+ size_t vn (v.size ());
+ size_t en (em.size ());
+
+ // Then check that it ends with the end marker.
+ //
+ if (vn >= en && v.compare (vn - en, en, em) == 0)
+ {
+ // Now check that the prefix only contains whitespaces.
+ //
+ size_t n (vn - en);
+
+ if (v.find_first_not_of (" \t") >= n)
+ {
+ assert (pre_parse_ || n == 0); // Should have been stripped.
+
+ if (n != 0)
+ sp.assign (v, 0, n); // Save the strip prefix.
+
+ next (t, tt); // Get the newline.
+ break;
+ }
+ }
+ }
+
+ // Expand the line (can be blank).
+ //
+ // @@ PAT: one could argue that if we do it in variables, then we
+ // should do it here as well. Though feels bizarre.
+ //
+ names ns (tt != type::newline
+ ? parse_names (t, tt,
+ pattern_mode::ignore,
+ false,
+ "here-document line",
+ nullptr)
+ : names ());
+
+ if (!pre_parse_)
+ {
+ // What shall we do if the expansion results in multiple names?
+ // For, example if the line contains just the variable expansion
+ // and it is of type strings. Adding all the elements space-
+ // separated seems like the natural thing to do.
+ //
+ string s;
+ for (auto b (ns.begin ()), i (b); i != ns.end (); ++i)
+ {
+ string n;
+
+ try
+ {
+ n = value_traits<string>::convert (move (*i), nullptr);
+ }
+ catch (const invalid_argument&)
+ {
+ fail (l) << "invalid string value '" << *i << "'";
+ }
+
+ if (i == b)
+ s = move (n);
+ else
+ {
+ s += ' ';
+ s += n;
+ }
+ }
+
+ if (!re)
+ {
+ // Add newline after previous line.
+ //
+ if (rs)
+ {
+ *rs += '\n';
+ *rs += s;
+ }
+ else
+ rs = move (s);
+ }
+ else
+ {
+ // Due to expansion we can end up with multiple lines. If empty
+ // then will add a blank textual literal.
+ //
+ for (size_t p (0); p != string::npos; )
+ {
+ string ln;
+ size_t np (s.find ('\n', p));
+
+ if (np != string::npos)
+ {
+ ln = string (s, p, np - p);
+ p = np + 1;
+ }
+ else
+ {
+ ln = string (s, p);
+ p = np;
+ }
+
+ if (ln[0] != re) // Line doesn't start with regex introducer.
+ {
+ // This is a line-char literal (covers blank lines as well).
+ //
+ // Append textual literal.
+ //
+ rre.lines.emplace_back (l.line, l.column, move (ln), false);
+ }
+ else // Line starts with the regex introducer.
+ {
+ // This is a char-regex, or a sequence of line-regex syntax
+ // characters or both (in this specific order). So we will
+ // add regex (with optional special characters) or special
+ // literal.
+ //
+ size_t p (ln.find (re, 1));
+ if (p == string::npos)
+ {
+ // No regex, just a sequence of syntax characters.
+ //
+ string spec (ln, 1);
+ if (spec.empty ())
+ fail (l) << "no syntax line characters";
+
+ // Append special literal.
+ //
+ rre.lines.emplace_back (
+ l.line, l.column, move (spec), true);
+ }
+ else
+ {
+ // Regex (probably with syntax characters).
+ //
+ regex_parts re;
+
+ // Empty regex is a special case repesenting a blank line.
+ //
+ if (p == 1)
+ // Position to optional specal characters of an empty
+ // regex.
+ //
+ ++p;
+ else
+ // Can't fail as all the pre-conditions verified
+ // (non-empty with both introducers in place), so no
+ // description required.
+ //
+ re = parse_regex (ln, l, "", &p);
+
+ // Append regex with optional special characters.
+ //
+ rre.lines.emplace_back (l.line, l.column,
+ move (re.value), move (re.flags),
+ string (ln, p));
+ }
+ }
+ }
+ }
+ }
+
+ // We should expand the whole line at once so this would normally be
+ // a newline but can also be an end-of-stream.
+ //
+ if (tt == type::newline)
+ next (t, tt);
+ else
+ assert (tt == type::eos);
+ }
+
+ if (tt == type::eos)
+ fail (t) << "missing here-document end marker '" << em << "'";
+
+ if (pre_parse_)
+ {
+ // Strip the indentation prefix if there is one.
+ //
+ assert (replay_ == replay::save);
+
+ if (!sp.empty ())
+ {
+ size_t sn (sp.size ());
+
+ for (; ri != replay_data_.size (); ++ri)
+ {
+ token& rt (replay_data_[ri].token);
+
+ if (rt.type == type::newline) // Blank
+ continue;
+
+ if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0)
+ fail (rt) << "unindented here-document line";
+
+ // If the word is equal to the strip prefix then we have to drop
+ // the token. Note that simply making it an empty word won't
+ // have the same semantics. For instance, it would trigger
+ // concatenated expansion.
+ //
+ if (rt.value.size () == sn)
+ replay_data_.erase (replay_data_.begin () + ri);
+ else
+ {
+ rt.value.erase (0, sn);
+ rt.column += sn;
+ ++ri;
+ }
+
+ // Skip until next newline.
+ //
+ for (; replay_data_[ri].token.type != type::newline; ++ri) ;
+ }
+ }
+ }
+ else
+ {
+ // Add final newline unless suppressed.
+ //
+ if (mod.find (':') == string::npos)
+ {
+ if (re)
+ // Note that the position is synthetic, but that's ok as we don't
+ // expect any diagnostics to refer this line.
+ //
+ rre.lines.emplace_back (l.line, l.column, string (), false);
+ else if (rs)
+ *rs += '\n';
+ else
+ rs = "\n";
+ }
+
+ // Finalize regex lines.
+ //
+ if (re)
+ {
+ // Empty regex matches nothing, so not of much use.
+ //
+ if (rre.lines.empty ())
+ fail (l) << "empty here-document regex";
+
+ rre.intro = re;
+ }
+ }
+
+ return re
+ ? parsed_doc (move (rre), l.line, l.column)
+ : parsed_doc (rs ? move (*rs) : string (), l.line, l.column);
+ }
+
+ //
+ // Execute.
+ //
+
+ void parser::
+ execute (script& s, runner& r)
+ {
+ assert (s.state == scope_state::unknown);
+
+ auto g (
+ make_exception_guard (
+ [&s] () {s.state = scope_state::failed;}));
+
+ if (!s.empty ())
+ execute (s, s, r);
+ else
+ s.state = scope_state::passed;
+ }
+
+ void parser::
+ execute (scope& sc, script& s, runner& r)
+ {
+ path_ = nullptr; // Set by replays.
+
+ pre_parse_ = false;
+
+ set_lexer (nullptr);
+
+ script_ = &s;
+ runner_ = &r;
+ group_ = nullptr;
+ id_map_ = nullptr;
+ include_set_ = nullptr;
+ scope_ = &sc;
+
+ //@@ PAT TODO: set pbase_?
+
+ exec_scope_body ();
+ }
+
+ static void
+ execute_impl (scope& s, script& scr, runner& r)
+ {
+ try
+ {
+ parser p;
+ p.execute (s, scr, r);
+ }
+ catch (const failed&)
+ {
+ s.state = scope_state::failed;
+ }
+ }
+
+ void parser::
+ exec_scope_body ()
+ {
+ size_t li (0);
+
+ runner_->enter (*scope_, scope_->start_loc_);
+
+ if (test* t = dynamic_cast<test*> (scope_))
+ {
+ exec_lines (
+ t->tests_.begin (), t->tests_.end (), li, command_type::test);
+ }
+ else if (group* g = dynamic_cast<group*> (scope_))
+ {
+ bool exec_scope (
+ exec_lines (
+ g->setup_.begin (), g->setup_.end (), li, command_type::setup));
+
+ if (exec_scope)
+ {
+ atomic_count task_count (0);
+ wait_guard wg (task_count);
+
+ // Start asynchronous execution of inner scopes keeping track of
+ // how many we have handled.
+ //
+ for (unique_ptr<scope>& chain: g->scopes)
+ {
+ // Check if this scope is ignored (e.g., via config.test).
+ //
+ if (!runner_->test (*chain) || !exec_scope)
+ {
+ chain = nullptr;
+ continue;
+ }
+
+ // Pick a scope from the if-else chain.
+ //
+ // In fact, we are going to drop all but the selected (if any)
+ // scope. This way we can re-examine the scope states later. It
+ // will also free some memory.
+ //
+ unique_ptr<scope>* ps;
+ for (ps = &chain; *ps != nullptr; ps = &ps->get ()->if_chain)
+ {
+ scope& s (**ps);
+
+ if (!s.if_cond_) // Unconditional.
+ {
+ assert (s.if_chain == nullptr);
+ break;
+ }
+
+ line l (move (*s.if_cond_));
+ line_type lt (l.type);
+
+ replay_data (move (l.tokens));
+
+ token t;
+ type tt;
+
+ next (t, tt);
+ const location ll (get_location (t));
+ next (t, tt); // Skip to start of command.
+
+ bool take;
+ if (lt != line_type::cmd_else)
+ {
+ // Note: the line index count continues from setup.
+ //
+ command_expr ce (parse_command_line (t, tt));
+
+ try
+ {
+ take = runner_->run_if (*scope_, ce, ++li, ll);
+ }
+ catch (const exit_scope& e)
+ {
+ // Bail out if the scope is exited with the failure status.
+ // Otherwise leave the scope normally.
+ //
+ if (!e.status)
+ throw failed ();
+
+ // Stop iterating through if conditions, and stop executing
+ // inner scopes.
+ //
+ exec_scope = false;
+ replay_stop ();
+ break;
+ }
+
+ if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
+ take = !take;
+ }
+ else
+ {
+ assert (tt == type::newline);
+ take = true;
+ }
+
+ replay_stop ();
+
+ if (take)
+ {
+ // Count the remaining conditions for the line index.
+ //
+ for (scope* r (s.if_chain.get ());
+ r != nullptr &&
+ r->if_cond_->type != line_type::cmd_else;
+ r = r->if_chain.get ())
+ ++li;
+
+ s.if_chain.reset (); // Drop remaining scopes.
+ break;
+ }
+ }
+
+ chain.reset (*ps == nullptr || (*ps)->empty () || !exec_scope
+ ? nullptr
+ : ps->release ());
+
+ if (chain != nullptr)
+ {
+ // Hand it off to a sub-parser potentially in another thread.
+ // But we could also have handled it serially in this parser:
+ //
+ // scope* os (scope_);
+ // scope_ = chain.get ();
+ // exec_scope_body ();
+ // scope_ = os;
+
+ // Pass our diagnostics stack (this is safe since we are going
+ // to wait for completion before unwinding the diag stack).
+ //
+ // If the scope was executed synchronously, check the status
+ // and bail out if we weren't asked to keep going.
+ //
+ // UBSan workaround.
+ //
+ const diag_frame* df (diag_frame::stack ());
+ if (!sched.async (task_count,
+ [] (const diag_frame* ds,
+ scope& s,
+ script& scr,
+ runner& r)
+ {
+ diag_frame::stack_guard dsg (ds);
+ execute_impl (s, scr, r);
+ },
+ df,
+ ref (*chain),
+ ref (*script_),
+ ref (*runner_)))
+ {
+ // Bail out if the scope has failed and we weren't instructed
+ // to keep going.
+ //
+ if (chain->state == scope_state::failed && !keep_going)
+ throw failed ();
+ }
+ }
+ }
+
+ wg.wait ();
+
+ // Re-examine the scopes we have executed collecting their state.
+ //
+ for (const unique_ptr<scope>& chain: g->scopes)
+ {
+ if (chain == nullptr)
+ continue;
+
+ switch (chain->state)
+ {
+ case scope_state::passed: break;
+ case scope_state::failed: throw failed ();
+ default: assert (false);
+ }
+ }
+ }
+
+ exec_lines (
+ g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown);
+ }
+ else
+ assert (false);
+
+ runner_->leave (*scope_, scope_->end_loc_);
+
+ scope_->state = scope_state::passed;
+ }
+
+ bool parser::
+ exec_lines (lines::iterator i, lines::iterator e,
+ size_t& li,
+ command_type ct)
+ {
+ try
+ {
+ token t;
+ type tt;
+
+ for (; i != e; ++i)
+ {
+ line& ln (*i);
+ line_type lt (ln.type);
+
+ assert (path_ == nullptr);
+
+ // Set the tokens and start playing.
+ //
+ replay_data (move (ln.tokens));
+
+ // We don't really need to change the mode since we already know
+ // the line type.
+ //
+ next (t, tt);
+ const location ll (get_location (t));
+
+ switch (lt)
+ {
+ case line_type::var:
+ {
+ // Parse.
+ //
+ string name (move (t.value));
+
+ next (t, tt);
+ type kind (tt); // Assignment kind.
+
+ value rhs (parse_variable_line (t, tt));
+
+ if (tt == type::semi)
+ next (t, tt);
+
+ assert (tt == type::newline);
+
+ // Assign.
+ //
+ const variable& var (*ln.var);
+
+ value& lhs (kind == type::assign
+ ? scope_->assign (var)
+ : scope_->append (var));
+
+ build2::parser::apply_value_attributes (
+ &var, lhs, move (rhs), kind);
+
+ // If we changes any of the test.* values, then reset the $*,
+ // $N special aliases.
+ //
+ if (var.name == script_->test_var.name ||
+ var.name == script_->options_var.name ||
+ var.name == script_->arguments_var.name ||
+ var.name == script_->redirects_var.name ||
+ var.name == script_->cleanups_var.name)
+ {
+ scope_->reset_special ();
+ }
+
+ replay_stop ();
+ break;
+ }
+ case line_type::cmd:
+ {
+ // We use the 0 index to signal that this is the only command.
+ // Note that we only do this for test commands.
+ //
+ if (ct == command_type::test && li == 0)
+ {
+ lines::iterator j (i);
+ for (++j; j != e && j->type == line_type::var; ++j) ;
+
+ if (j != e) // We have another command.
+ ++li;
+ }
+ else
+ ++li;
+
+ command_expr ce (parse_command_line (t, tt));
+ runner_->run (*scope_, ce, ct, li, ll);
+
+ replay_stop ();
+ break;
+ }
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ {
+ next (t, tt); // Skip to start of command.
+
+ bool take;
+ if (lt != line_type::cmd_else)
+ {
+ // Assume if-else always involves multiple commands.
+ //
+ command_expr ce (parse_command_line (t, tt));
+ take = runner_->run_if (*scope_, ce, ++li, ll);
+
+ if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn)
+ take = !take;
+ }
+ else
+ {
+ assert (tt == type::newline);
+ take = true;
+ }
+
+ replay_stop ();
+
+ // If end is true, then find the 'end' line. Otherwise, find
+ // the next if-else line. If skip is true then increment the
+ // command line index.
+ //
+ auto next = [e, &li]
+ (lines::iterator j, bool end, bool skip) -> lines::iterator
+ {
+ // We need to be aware of nested if-else chains.
+ //
+ size_t n (0);
+
+ for (++j; j != e; ++j)
+ {
+ line_type lt (j->type);
+
+ if (lt == line_type::cmd_if ||
+ lt == line_type::cmd_ifn)
+ ++n;
+
+ // If we are nested then we just wait until we get back
+ // to the surface.
+ //
+ if (n == 0)
+ {
+ switch (lt)
+ {
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn:
+ case line_type::cmd_else:
+ if (end) break;
+ // Fall through.
+ case line_type::cmd_end: return j;
+ default: break;
+ }
+ }
+
+ if (lt == line_type::cmd_end)
+ --n;
+
+ if (skip)
+ {
+ // Note that we don't count else and end as commands.
+ //
+ switch (lt)
+ {
+ case line_type::cmd:
+ case line_type::cmd_if:
+ case line_type::cmd_ifn:
+ case line_type::cmd_elif:
+ case line_type::cmd_elifn: ++li; break;
+ default: break;
+ }
+ }
+ }
+
+ assert (false); // Missing end.
+ return e;
+ };
+
+ // If we are taking this branch then we need to parse all the
+ // lines until the next if-else line and then skip all the
+ // lines until the end (unless next is already end).
+ //
+ // Otherwise, we need to skip all the lines until the next
+ // if-else line and then continue parsing.
+ //
+ if (take)
+ {
+ lines::iterator j (next (i, false, false)); // Next if-else.
+ if (!exec_lines (i + 1, j, li, ct))
+ return false;
+
+ i = j->type == line_type::cmd_end ? j : next (j, true, true);
+ }
+ else
+ {
+ i = next (i, false, true);
+ if (i->type != line_type::cmd_end)
+ --i; // Continue with this line (e.g., elif or else).
+ }
+
+ break;
+ }
+ case line_type::cmd_end:
+ {
+ assert (false);
+ }
+ }
+ }
+
+ return true;
+ }
+ catch (const exit_scope& e)
+ {
+ // Bail out if the scope is exited with the failure status. Otherwise
+ // leave the scope normally.
+ //
+ if (!e.status)
+ throw failed ();
+
+ replay_stop ();
+ return false;
+ }
+ }
+
+ //
+ // The rest.
+ //
+
+ lookup parser::
+ lookup_variable (name&& qual, string&& name, const location& loc)
+ {
+ assert (!pre_parse_);
+
+ if (!qual.empty ())
+ fail (loc) << "qualified variable name";
+
+ // If we have no scope (happens when pre-parsing directives), then we
+ // only look for buildfile variables.
+ //
+ // Otherwise, every variable that is ever set in a script has been
+ // pre-entered during pre-parse or introduced with the set builtin
+ // during test execution. Which means that if one is not found in the
+ // script pool then it can only possibly be set in the buildfile.
+ //
+ // Note that we need to acquire the variable pool lock. The pool can
+ // be changed from multiple threads by the set builtin. The obtained
+ // variable pointer can safelly be used with no locking as the variable
+ // pool is an associative container (underneath) and we are only adding
+ // new variables into it.
+ //
+ const variable* pvar (nullptr);
+
+ if (scope_ != nullptr)
+ {
+ slock sl (script_->var_pool_mutex);
+ pvar = script_->var_pool.find (name);
+ }
+
+ return pvar != nullptr
+ ? scope_->find (*pvar)
+ : script_->find_in_buildfile (name);
+ }
+
+ size_t parser::
+ quoted () const
+ {
+ size_t r (0);
+
+ if (replay_ != replay::play)
+ r = lexer_->quoted ();
+ else
+ {
+ // Examine tokens we have replayed since last reset.
+ //
+ for (size_t i (replay_quoted_); i != replay_i_; ++i)
+ if (replay_data_[i].token.qtype != quote_type::unquoted)
+ ++r;
+ }
+
+ return r;
+ }
+
+ void parser::
+ reset_quoted (token& cur)
+ {
+ if (replay_ != replay::play)
+ lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0);
+ else
+ {
+ replay_quoted_ = replay_i_ - 1;
+
+ // Must be the same token.
+ //
+ assert (replay_data_[replay_quoted_].token.qtype == cur.qtype);
+ }
+ }
+
+ const string& parser::
+ insert_id (string id, location l)
+ {
+ auto p (id_map_->emplace (move (id), move (l)));
+
+ if (!p.second)
+ fail (l) << "duplicate id " << p.first->first <<
+ info (p.first->second) << "previously used here";
+
+ return p.first->first;
+ }
+
+ void parser::
+ set_lexer (lexer* l)
+ {
+ lexer_ = l;
+ base_parser::lexer_ = l;
+ }
+
+ void parser::
+ apply_value_attributes (const variable* var,
+ value& lhs,
+ value&& rhs,
+ const string& attributes,
+ token_type kind,
+ const path& name)
+ {
+ path_ = &name;
+
+ istringstream is (attributes);
+ lexer l (is, name, lexer_mode::attribute);
+ set_lexer (&l);
+
+ token t;
+ type tt;
+ next (t, tt);
+
+ if (tt != type::lsbrace && tt != type::eos)
+ fail (t) << "expected '[' instead of " << t;
+
+ attributes_push (t, tt, true);
+
+ if (tt != type::eos)
+ fail (t) << "trailing junk after ']'";
+
+ build2::parser::apply_value_attributes (var, lhs, move (rhs), kind);
+ }
+
+ // parser::parsed_doc
+ //
+ parser::parsed_doc::
+ parsed_doc (string s, uint64_t l, uint64_t c)
+ : str (move (s)), re (false), end_line (l), end_column (c)
+ {
+ }
+
+ parser::parsed_doc::
+ parsed_doc (regex_lines&& r, uint64_t l, uint64_t c)
+ : regex (move (r)), re (true), end_line (l), end_column (c)
+ {
+ }
+
+ parser::parsed_doc::
+ parsed_doc (parsed_doc&& d)
+ : re (d.re), end_line (d.end_line), end_column (d.end_column)
+ {
+ if (re)
+ new (&regex) regex_lines (move (d.regex));
+ else
+ new (&str) string (move (d.str));
+ }
+
+ parser::parsed_doc::
+ ~parsed_doc ()
+ {
+ if (re)
+ regex.~regex_lines ();
+ else
+ str.~string ();
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx
new file mode 100644
index 0000000..1beee49
--- /dev/null
+++ b/libbuild2/test/script/parser.hxx
@@ -0,0 +1,250 @@
+// file : libbuild2/test/script/parser.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_PARSER_HXX
+#define LIBBUILD2_TEST_SCRIPT_PARSER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/parser.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/test/script/token.hxx>
+#include <libbuild2/test/script/script.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ class lexer;
+ class runner;
+
+ class parser: protected build2::parser
+ {
+ // Pre-parse. Issue diagnostics and throw failed in case of an error.
+ //
+ public:
+ void
+ pre_parse (script&);
+
+ void
+ pre_parse (istream&, script&);
+
+ // Helpers.
+ //
+ // Parse attribute string and perform attribute-guided assignment.
+ // Issue diagnostics and throw failed in case of an error.
+ //
+ void
+ apply_value_attributes (const variable*, // Optional.
+ value& lhs,
+ value&& rhs,
+ const string& attributes,
+ token_type assign_kind,
+ const path& name); // For diagnostics.
+
+ // Recursive descent parser.
+ //
+ // Usually (but not always) parse functions receive the token/type
+ // from which it should start consuming and in return the token/type
+ // should contain the first token that has not been consumed.
+ //
+ // Functions that are called parse_*() rather than pre_parse_*() are
+ // used for both stages.
+ //
+ protected:
+ bool
+ pre_parse_demote_group_scope (unique_ptr<scope>&);
+
+ token
+ pre_parse_scope_body ();
+
+ unique_ptr<group>
+ pre_parse_scope_block (token&, token_type&, const string&);
+
+ bool
+ pre_parse_line (token&, token_type&,
+ optional<description>&,
+ lines* = nullptr,
+ bool one = false);
+
+ bool
+ pre_parse_if_else (token&, token_type&,
+ optional<description>&,
+ lines&);
+
+ bool
+ pre_parse_if_else_scope (token&, token_type&,
+ optional<description>&,
+ lines&);
+
+ bool
+ pre_parse_if_else_command (token&, token_type&,
+ optional<description>&,
+ lines&);
+
+ void
+ pre_parse_directive (token&, token_type&);
+
+ void
+ pre_parse_include_line (names, location);
+
+ description
+ pre_parse_leading_description (token&, token_type&);
+
+ description
+ parse_trailing_description (token&, token_type&);
+
+ value
+ parse_variable_line (token&, token_type&);
+
+ command_expr
+ parse_command_line (token&, token_type&);
+
+ // Ordered sequence of here-document redirects that we can expect to
+ // see after the command line.
+ //
+ struct here_redirect
+ {
+ size_t expr; // Index in command_expr.
+ size_t pipe; // Index in command_pipe.
+ int fd; // Redirect fd (0 - in, 1 - out, 2 - err).
+ };
+
+ struct here_doc
+ {
+ // Redirects that share here_doc. Most of the time we will have no
+ // more than 2 (2 - for the roundtrip test cases).
+ //
+ small_vector<here_redirect, 2> redirects;
+
+ string end;
+ bool literal; // Literal (single-quote).
+ string modifiers;
+
+ // Regex introducer ('\0' if not a regex, so can be used as bool).
+ //
+ char regex;
+
+ // Regex global flags. Meaningful if regex != '\0'.
+ //
+ string regex_flags;
+ };
+ using here_docs = vector<here_doc>;
+
+ pair<command_expr, here_docs>
+ parse_command_expr (token&, token_type&);
+
+ command_exit
+ parse_command_exit (token&, token_type&);
+
+ void
+ parse_here_documents (token&, token_type&,
+ pair<command_expr, here_docs>&);
+
+ struct parsed_doc
+ {
+ union
+ {
+ string str; // Here-document literal.
+ regex_lines regex; // Here-document regex.
+ };
+
+ bool re; // True if regex.
+ uint64_t end_line; // Here-document end marker location.
+ uint64_t end_column;
+
+ parsed_doc (string, uint64_t line, uint64_t column);
+ parsed_doc (regex_lines&&, uint64_t line, uint64_t column);
+ parsed_doc (parsed_doc&&); // Note: move constuctible-only type.
+ ~parsed_doc ();
+ };
+
+ parsed_doc
+ parse_here_document (token&, token_type&,
+ const string&,
+ const string& mode,
+ char re_intro); // '\0' if not a regex.
+
+ // Execute. Issue diagnostics and throw failed in case of an error.
+ //
+ public:
+ void
+ execute (script& s, runner& r);
+
+ void
+ execute (scope&, script&, runner&);
+
+ protected:
+ void
+ exec_scope_body ();
+
+ // Return false if the execution of the scope should be terminated
+ // with the success status (e.g., as a result of encountering the exit
+ // builtin). For unsuccessful termination the failed exception should
+ // be thrown.
+ //
+ bool
+ exec_lines (lines::iterator, lines::iterator, size_t&, command_type);
+
+ // Customization hooks.
+ //
+ protected:
+ virtual lookup
+ lookup_variable (name&&, string&&, const location&) override;
+
+ // Number of quoted tokens since last reset. Note that this includes
+ // the peeked token, if any.
+ //
+ protected:
+ size_t
+ quoted () const;
+
+ void
+ reset_quoted (token& current);
+
+ size_t replay_quoted_;
+
+ // Insert id into the id map checking for duplicates.
+ //
+ protected:
+ const string&
+ insert_id (string, location);
+
+ // Set lexer pointers for both the current and the base classes.
+ //
+ protected:
+ void
+ set_lexer (lexer* l);
+
+ protected:
+ using base_parser = build2::parser;
+
+ script* script_;
+
+ // Pre-parse state.
+ //
+ using id_map = std::unordered_map<string, location>;
+ using include_set = std::set<path>;
+
+ group* group_;
+ id_map* id_map_;
+ include_set* include_set_; // Testscripts already included in this
+ // scope. Must be absolute and normalized.
+ lexer* lexer_;
+ string id_prefix_; // Auto-derived id prefix.
+
+ // Execute state.
+ //
+ runner* runner_;
+ scope* scope_;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_TEST_SCRIPT_PARSER_HXX
diff --git a/libbuild2/test/script/parser.test.cxx b/libbuild2/test/script/parser.test.cxx
new file mode 100644
index 0000000..8702e18
--- /dev/null
+++ b/libbuild2/test/script/parser.test.cxx
@@ -0,0 +1,245 @@
+// file : libbuild2/test/script/parser.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <cassert>
+#include <iostream>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/context.hxx> // reset()
+#include <libbuild2/scheduler.hxx>
+
+#include <libbuild2/test/target.hxx>
+
+#include <libbuild2/test/script/token.hxx>
+#include <libbuild2/test/script/parser.hxx>
+#include <libbuild2/test/script/runner.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ // Here we assume we are running serially.
+ //
+ class print_runner: public runner
+ {
+ public:
+ print_runner (bool scope, bool id, bool line)
+ : scope_ (scope), id_ (id), line_ (line) {}
+
+ virtual bool
+ test (scope&) const override
+ {
+ return true;
+ }
+
+ virtual void
+ enter (scope& s, const location&) override
+ {
+ if (s.desc)
+ {
+ const auto& d (*s.desc);
+
+ if (!d.id.empty ())
+ cout << ind_ << ": id:" << d.id << endl;
+
+ if (!d.summary.empty ())
+ cout << ind_ << ": sm:" << d.summary << endl;
+
+ if (!d.details.empty ())
+ {
+ if (!d.id.empty () || !d.summary.empty ())
+ cout << ind_ << ":" << endl; // Blank.
+
+ const auto& s (d.details);
+ for (size_t b (0), e (0), n; e != string::npos; b = e + 1)
+ {
+ e = s.find ('\n', b);
+ n = ((e != string::npos ? e : s.size ()) - b);
+
+ cout << ind_ << ':';
+ if (n != 0)
+ {
+ cout << ' ';
+ cout.write (s.c_str () + b, static_cast<streamsize> (n));
+ }
+ cout << endl;
+ }
+ }
+ }
+
+ if (scope_)
+ {
+ cout << ind_ << "{";
+
+ if (id_ && !s.id_path.empty ()) // Skip empty root scope id.
+ cout << " # " << s.id_path.string ();
+
+ cout << endl;
+
+ ind_ += " ";
+ }
+ }
+
+ virtual void
+ run (scope&,
+ const command_expr& e, command_type t,
+ size_t i,
+ const location&) override
+ {
+ const char* s (nullptr);
+
+ switch (t)
+ {
+ case command_type::test: s = ""; break;
+ case command_type::setup: s = "+"; break;
+ case command_type::teardown: s = "-"; break;
+ }
+
+ cout << ind_ << s << e;
+
+ if (line_)
+ cout << " # " << i;
+
+ cout << endl;
+ }
+
+ virtual bool
+ run_if (scope&,
+ const command_expr& e,
+ size_t i,
+ const location&) override
+ {
+ cout << ind_ << "? " << e;
+
+ if (line_)
+ cout << " # " << i;
+
+ cout << endl;
+
+ return e.back ().pipe.back ().program.string () == "true";
+ }
+
+ virtual void
+ leave (scope&, const location&) override
+ {
+ if (scope_)
+ {
+ ind_.resize (ind_.size () - 2);
+ cout << ind_ << "}" << endl;
+ }
+ }
+
+ private:
+ bool scope_;
+ bool id_;
+ bool line_;
+ string ind_;
+ };
+
+ // Usage: argv[0] [-s] [-i] [-l] [<testscript-name>]
+ //
+ int
+ main (int argc, char* argv[])
+ {
+ tracer trace ("main");
+
+ // Fake build system driver, default verbosity.
+ //
+ init_diag (1);
+ init (argv[0]);
+ sched.startup (1); // Serial execution.
+ reset (strings ()); // No command line variables.
+
+ bool scope (false);
+ bool id (false);
+ bool line (false);
+ path name;
+
+ for (int i (1); i != argc; ++i)
+ {
+ string a (argv[i]);
+
+ if (a == "-s")
+ scope = true;
+ else if (a == "-i")
+ id = true;
+ else if (a == "-l")
+ line = true;
+ else
+ {
+ name = path (move (a));
+ break;
+ }
+ }
+
+ if (name.empty ())
+ name = path ("testscript");
+
+ assert (!id || scope); // Id can only be printed with scope.
+
+ try
+ {
+ cin.exceptions (istream::failbit | istream::badbit);
+
+ // Enter mock targets. Use fixed names and paths so that we can use
+ // them in expected results. Strictly speaking target paths should
+ // be absolute. However, the testscript implementation doesn't
+ // really care.
+ //
+ file& tt (
+ targets.insert<file> (work,
+ dir_path (),
+ "driver",
+ string (),
+ trace));
+
+ value& v (
+ tt.assign (
+ var_pool.rw ().insert<target_triplet> (
+ "test.target", variable_visibility::project)));
+
+ v = cast<target_triplet> ((*global_scope)["build.host"]);
+
+ testscript& st (
+ targets.insert<testscript> (work,
+ dir_path (),
+ name.leaf ().base ().string (),
+ name.leaf ().extension (),
+ trace));
+
+ tt.path (path ("driver"));
+ st.path (name);
+
+ // Parse and run.
+ //
+ parser p;
+ script s (tt, st, dir_path (work) /= "test-driver");
+ p.pre_parse (cin, s);
+
+ print_runner r (scope, id, line);
+ p.execute (s, r);
+ }
+ catch (const failed&)
+ {
+ return 1;
+ }
+
+ return 0;
+ }
+ }
+ }
+}
+
+int
+main (int argc, char* argv[])
+{
+ return build2::test::script::main (argc, argv);
+}
diff --git a/libbuild2/test/script/regex.cxx b/libbuild2/test/script/regex.cxx
new file mode 100644
index 0000000..20dfaa6
--- /dev/null
+++ b/libbuild2/test/script/regex.cxx
@@ -0,0 +1,440 @@
+// file : libbuild2/test/script/regex.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <locale>
+
+#include <libbuild2/test/script/regex.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
+ {
+ static_assert (alignof (char_string) % 4 == 0,
+ "unexpected char_string alignment");
+
+ static_assert (alignof (char_regex) % 4 == 0,
+ "unexpected char_regex alignment");
+
+ static_assert (sizeof (uintptr_t) > sizeof (int16_t),
+ "unexpected uintptr_t size");
+
+ const line_char line_char::nul (0);
+ const line_char line_char::eof (-1);
+
+ // line_char
+ //
+ // We package the special character into uintptr_t with the following
+ // steps:
+ //
+ // - narrow down int value to int16_t (preserves all the valid values)
+ //
+ // - convert to uint16_t (bitwise representation stays the same, but no
+ // need to bother with signed value widening, leftmost bits loss on
+ // left shift, etc)
+ //
+ // - convert to uintptr_t (storage type)
+ //
+ // - shift left by two bits (the operation is fully reversible as
+ // uintptr_t is wider then uint16_t)
+ //
+ line_char::
+ line_char (int c)
+ : data_ (
+ (static_cast <uintptr_t> (
+ static_cast<uint16_t> (
+ static_cast<int16_t> (c))) << 2) |
+ static_cast <uintptr_t> (line_type::special))
+ {
+ // @@ How can we allow anything for basic_regex but only subset
+ // for our own code?
+ //
+ const char ex[] = "pn\n\r";
+
+ assert (c == 0 || // Null character.
+
+ // EOF. Note that is also passed by msvcrt as _Meta_eos
+ // enum value.
+ //
+ c == -1 ||
+
+ // libstdc++ line/paragraph separators.
+ //
+ c == u'\u2028' || c == u'\u2029' ||
+
+ (c > 0 && c <= 255 && (
+ // Supported regex special characters.
+ //
+ syntax (c) ||
+
+ // libstdc++ look-ahead tokens, newline chars.
+ //
+ string::traits_type::find (ex, 4, c) != nullptr)));
+ }
+
+ line_char::
+ line_char (const char_string& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (s).first))
+ {
+ }
+
+ line_char::
+ line_char (char_string&& s, line_pool& p)
+ : line_char (&(*p.strings.emplace (move (s)).first))
+ {
+ }
+
+ line_char::
+ line_char (char_regex r, line_pool& p)
+ // Note: in C++17 can write as p.regexes.emplace_front(move (r))
+ //
+ : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r))))
+ {
+ }
+
+ bool
+ line_char::syntax (char c)
+ {
+ return string::traits_type::find (
+ "()|.*+?{}\\0123456789,=!", 23, c) != nullptr;
+ }
+
+ bool
+ operator== (const line_char& l, const line_char& r)
+ {
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt == rt)
+ {
+ bool res (true);
+
+ switch (lt)
+ {
+ case line_type::special: res = l.special () == r.special (); break;
+ case line_type::regex: assert (false); break;
+
+ // Note that we use pointers (rather than vales) comparison
+ // assuming that the strings must belong to the same pool.
+ //
+ case line_type::literal: res = l.literal () == r.literal (); break;
+ }
+
+ return res;
+ }
+
+ // Match literal with regex.
+ //
+ if (lt == line_type::literal && rt == line_type::regex)
+ return regex_match (*l.literal (), *r.regex ());
+ else if (rt == line_type::literal && lt == line_type::regex)
+ return regex_match (*r.literal (), *l.regex ());
+
+ return false;
+ }
+
+ bool
+ operator< (const line_char& l, const line_char& r)
+ {
+ if (l == r)
+ return false;
+
+ line_type lt (l.type ());
+ line_type rt (r.type ());
+
+ if (lt != rt)
+ return lt < rt;
+
+ bool res (false);
+
+ switch (lt)
+ {
+ case line_type::special: res = l.special () < r.special (); break;
+ case line_type::literal: res = *l.literal () < *r.literal (); break;
+ case line_type::regex: assert (false); break;
+ }
+
+ return res;
+ }
+
+ // line_char_locale
+ //
+
+ // An exemplar locale with the std::ctype<line_char> facet. It is used
+ // for the subsequent line char locale objects creation (see below)
+ // which normally ends up with a shallow copy of a reference-counted
+ // object.
+ //
+ // Note that creating the line char locales from the exemplar is not
+ // merely an optimization: there is a data race in the libstdc++ (at
+ // least as of GCC 9.1) implementation of the locale(const locale&,
+ // Facet*) constructor (bug #91057).
+ //
+ // Also note that we install the facet in init() rather than during
+ // the object creation to avoid a race with the std::locale-related
+ // global variables initialization.
+ //
+ static locale line_char_locale_exemplar;
+
+ void
+ init ()
+ {
+ line_char_locale_exemplar =
+ locale (locale (),
+ new std::ctype<line_char> ()); // Hidden by ctype bitmask.
+ }
+
+ line_char_locale::
+ line_char_locale ()
+ : locale (line_char_locale_exemplar)
+ {
+ // Make sure init() has been called.
+ //
+ // Note: has_facet() is hidden by a private function in libc++.
+ //
+ assert (std::has_facet<std::ctype<line_char>> (*this));
+ }
+
+ // char_regex
+ //
+ // Transform regex according to the extended flags {idot}. If regex is
+ // malformed then keep transforming, so the resulting string is
+ // malformed the same way. We expect the error to be reported by the
+ // char_regex ctor.
+ //
+ static string
+ transform (const string& s, char_flags f)
+ {
+ assert ((f & char_flags::idot) != char_flags::none);
+
+ string r;
+ bool escape (false);
+ bool cclass (false);
+
+ for (char c: s)
+ {
+ // Inverse escaping for a dot which is out of the char class
+ // brackets.
+ //
+ bool inverse (c == '.' && !cclass);
+
+ // Handle the escape case. Note that we delay adding the backslash
+ // since we may have to inverse things.
+ //
+ if (escape)
+ {
+ if (!inverse)
+ r += '\\';
+
+ r += c;
+ escape = false;
+
+ continue;
+ }
+ else if (c == '\\')
+ {
+ escape = true;
+ continue;
+ }
+
+ // Keep track of being inside the char class brackets, escape if
+ // inversion. Note that we never inverse square brackets.
+ //
+ if (c == '[' && !cclass)
+ cclass = true;
+ else if (c == ']' && cclass)
+ cclass = false;
+ else if (inverse)
+ r += '\\';
+
+ r += c;
+ }
+
+ if (escape) // Regex is malformed but that's not our problem.
+ r += '\\';
+
+ return r;
+ }
+
+ static char_regex::flag_type
+ to_std_flags (char_flags f)
+ {
+ // Note that ECMAScript flag is implied in the absense of a grammar
+ // flag.
+ //
+ return (f & char_flags::icase) != char_flags::none
+ ? char_regex::icase
+ : char_regex::flag_type ();
+ }
+
+ char_regex::
+ char_regex (const char_string& s, char_flags f)
+ : base_type ((f & char_flags::idot) != char_flags::none
+ ? transform (s, f)
+ : s,
+ to_std_flags (f))
+ {
+ }
+ }
+ }
+ }
+}
+
+namespace std
+{
+ using namespace build2::test::script::regex;
+
+ // char_traits<line_char>
+ //
+ line_char* char_traits<line_char>::
+ assign (char_type* s, size_t n, char_type c)
+ {
+ for (size_t i (0); i != n; ++i)
+ s[i] = c;
+ return s;
+ }
+
+ line_char* char_traits<line_char>::
+ move (char_type* d, const char_type* s, size_t n)
+ {
+ if (n > 0 && d != s)
+ {
+ // If d < s then it can't be in [s, s + n) range and so using copy() is
+ // safe. Otherwise d + n is out of (s, s + n] range and so using
+ // copy_backward() is safe.
+ //
+ if (d < s)
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ else
+ copy_backward (s, s + n, d + n);
+ }
+
+ return d;
+ }
+
+ line_char* char_traits<line_char>::
+ copy (char_type* d, const char_type* s, size_t n)
+ {
+ std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy().
+ return d;
+ }
+
+ int char_traits<line_char>::
+ compare (const char_type* s1, const char_type* s2, size_t n)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s1[i] < s2[i])
+ return -1;
+ else if (s2[i] < s1[i])
+ return 1;
+ }
+
+ return 0;
+ }
+
+ size_t char_traits<line_char>::
+ length (const char_type* s)
+ {
+ size_t i (0);
+ while (s[i] != char_type::nul)
+ ++i;
+
+ return i;
+ }
+
+ const line_char* char_traits<line_char>::
+ find (const char_type* s, size_t n, const char_type& c)
+ {
+ for (size_t i (0); i != n; ++i)
+ {
+ if (s[i] == c)
+ return s + i;
+ }
+
+ return nullptr;
+ }
+
+ // ctype<line_char>
+ //
+ locale::id ctype<line_char>::id;
+
+ const line_char* ctype<line_char>::
+ is (const char_type* b, const char_type* e, mask* m) const
+ {
+ while (b != e)
+ {
+ const char_type& c (*b++);
+
+ *m++ = c.type () == line_type::special && c.special () >= 0 &&
+ build2::digit (static_cast<char> (c.special ()))
+ ? digit
+ : 0;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_is (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ scan_not (mask m, const char_type* b, const char_type* e) const
+ {
+ for (; b != e; ++b)
+ {
+ if (!is (m, *b))
+ return b;
+ }
+
+ return e;
+ }
+
+ const char* ctype<line_char>::
+ widen (const char* b, const char* e, char_type* c) const
+ {
+ while (b != e)
+ *c++ = widen (*b++);
+
+ return e;
+ }
+
+ const line_char* ctype<line_char>::
+ narrow (const char_type* b, const char_type* e, char def, char* c) const
+ {
+ while (b != e)
+ *c++ = narrow (*b++, def);
+
+ return e;
+ }
+
+ // regex_traits<line_char>
+ //
+ int regex_traits<line_char>::
+ value (char_type c, int radix) const
+ {
+ assert (radix == 8 || radix == 10 || radix == 16);
+
+ if (c.type () != line_type::special)
+ return -1;
+
+ const char digits[] = "0123456789ABCDEF";
+ const char* d (string::traits_type::find (digits, radix, c.special ()));
+ return d != nullptr ? static_cast<int> (d - digits) : -1;
+ }
+}
diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx
new file mode 100644
index 0000000..faec1fc
--- /dev/null
+++ b/libbuild2/test/script/regex.hxx
@@ -0,0 +1,703 @@
+// file : libbuild2/test/script/regex.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX
+#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX
+
+#include <list>
+#include <regex>
+#include <locale>
+#include <string> // basic_string
+#include <type_traits> // make_unsigned, enable_if, is_*
+#include <unordered_set>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
+ {
+ using char_string = std::basic_string<char>;
+
+ enum class char_flags: uint16_t
+ {
+ icase = 0x1, // Case-insensitive match.
+ idot = 0x2, // Invert '.' escaping.
+
+ none = 0
+ };
+
+ // Restricts valid standard flags to just {icase}, extends with custom
+ // flags {idot}.
+ //
+ class char_regex: public std::basic_regex<char>
+ {
+ public:
+ using base_type = std::basic_regex<char>;
+
+ char_regex (const char_string&, char_flags = char_flags::none);
+ };
+
+ // Newlines are line separators and are not part of the line:
+ //
+ // line<newline>line<newline>
+ //
+ // Specifically, this means that a customary trailing newline creates a
+ // trailing blank line.
+ //
+ // All characters can inter-compare (though there cannot be regex
+ // characters in the output, only in line_regex).
+ //
+ // Note that we assume that line_regex and the input to regex_match()
+ // use the same pool.
+ //
+ struct line_pool
+ {
+ // Note that we assume the pool can be moved without invalidating
+ // pointers to any already pooled entities.
+ //
+ std::unordered_set<char_string> strings;
+ std::list<char_regex> regexes;
+ };
+
+ enum class line_type
+ {
+ special,
+ literal,
+ regex
+ };
+
+ struct line_char
+ {
+ // Steal last two bits from the pointer to store the type.
+ //
+ private:
+ std::uintptr_t data_;
+
+ public:
+ line_type
+ type () const {return static_cast<line_type> (data_ & 0x3);}
+
+ int
+ special () const
+ {
+ // Stored as (shifted) int16_t. Perform steps reversed to those
+ // that are described in the comment for the corresponding ctor.
+ // Note that the intermediate cast to uint16_t is required to
+ // portably preserve the -1 special character.
+ //
+ return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2));
+ }
+
+ const char_string*
+ literal () const
+ {
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_string*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
+
+ const char_regex*
+ regex () const
+ {
+ // Note that 2 rightmost bits are used for packaging line_char
+ // type. Read the comment for the corresponding ctor for details.
+ //
+ return reinterpret_cast<const char_regex*> (
+ data_ & ~std::uintptr_t (0x3));
+ }
+
+ static const line_char nul;
+ static const line_char eof;
+
+ // Note: creates an uninitialized value.
+ //
+ line_char () = default;
+
+ // Create a special character. The argument value must be one of the
+ // following ones:
+ //
+ // 0 (nul character)
+ // -1 (EOF)
+ // [()|.*+?{}\0123456789,=!] (excluding [])
+ //
+ // Note that the constructor is implicit to allow basic_regex to
+ // implicitly construct line_chars from special char literals (in
+ // particular libstdc++ appends them to an internal line_string).
+ //
+ // Also note that we extend the valid characters set (see above) with
+ // 'p', 'n' (used by libstdc++ for positive/negative look-ahead
+ // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used
+ // by libstdc++ for newline/newparagraph matching).
+ //
+ line_char (int);
+
+ // Create a literal character.
+ //
+ // Don't copy string if already pooled.
+ //
+ explicit
+ line_char (const char_string&, line_pool&);
+
+ explicit
+ line_char (char_string&&, line_pool&);
+
+ explicit
+ line_char (const char_string* s) // Assume already pooled.
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_string address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (s) |
+ static_cast <std::uintptr_t> (line_type::literal)) {}
+
+ // Create a regex character.
+ //
+ explicit
+ line_char (char_regex, line_pool&);
+
+ explicit
+ line_char (const char_regex* r) // Assume already pooled.
+ //
+ // Steal two bits from the pointer to package line_char type.
+ // Assume (and statically assert) that char_regex address is a
+ // multiple of four.
+ //
+ : data_ (reinterpret_cast <std::uintptr_t> (r) |
+ static_cast <std::uintptr_t> (line_type::regex)) {}
+
+ // Provide basic_regex with the ability to use line_char in a context
+ // where a char value is expected (e.g., as a function argument).
+ //
+ // libstdc++ seems to cast special line_chars only (and such a
+ // conversion is meanigfull).
+ //
+ // msvcrt casts line_chars of arbitrary types instead. The only
+ // reasonable strategy is to return a value that differs from any
+ // other that can be encountered in a regex expression and so will
+ // unlikelly be misinterpreted.
+ //
+ operator char () const
+ {
+ return type () == line_type::special ? special () : '\a'; // BELL.
+ }
+
+ // Return true if the character is a syntax (special) one.
+ //
+ static bool
+ syntax (char);
+
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // explicitly cast line_chars to implementation-specific enums.
+ //
+ template <typename T>
+ explicit
+ operator T () const
+ {
+ assert (type () == line_type::special);
+ return static_cast<T> (special ());
+ }
+ };
+
+ // Perform "deep" characters comparison (for example match literal
+ // character with a regex character), rather than just compare them
+ // literally. At least one argument must be of a type other than regex
+ // as there is no operator==() defined to compare regexes. Characters
+ // of the literal type must share the same pool (strings are compared
+ // by pointers not by values).
+ //
+ bool
+ operator== (const line_char&, const line_char&);
+
+ // Return false if arguments are equal (operator==() returns true).
+ // Otherwise if types are different return the value implying that
+ // special < literal < regex. If types are special or literal return
+ // the result of the respective characters or strings comparison. At
+ // least one argument must be of a type other than regex as there is no
+ // operator<() defined to compare regexes.
+ //
+ // While not very natural operation for the class we have, we have to
+ // provide some meaningfull semantics for such a comparison as it is
+ // required by the char_traits<line_char> specialization. While we
+ // could provide it right in that specialization, let's keep it here
+ // for basic_regex implementations that potentially can compare
+ // line_chars as they compare them with expressions of other types (see
+ // below).
+ //
+ bool
+ operator< (const line_char&, const line_char&);
+
+ inline bool
+ operator!= (const line_char& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ inline bool
+ operator<= (const line_char& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ // Provide basic_regex (such as from msvcrt) with the ability to
+ // compare line_char to a value of an integral or
+ // implementation-specific enum type. In the absense of the following
+ // template operators, such a comparisons would be ambigious for
+ // integral types (given that there are implicit conversions
+ // int->line_char and line_char->char) and impossible for enums.
+ //
+ // Note that these == and < operators can succeed only for a line_char
+ // of the special type. For other types they always return false. That
+ // in particular leads to the following case:
+ //
+ // (lc != c) != (lc < c || c < lc).
+ //
+ // Note that we can not assert line_char is of the special type as
+ // basic_regex (such as from libc++) may need the ability to check if
+ // arbitrary line_char belongs to some special characters range (like
+ // ['0', '9']).
+ //
+ template <typename T>
+ struct line_char_cmp
+ : public std::enable_if<std::is_integral<T>::value ||
+ (std::is_enum<T>::value &&
+ !std::is_same<T, char_flags>::value)> {};
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const line_char& l, const T& r)
+ {
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator== (const T& l, const line_char& r)
+ {
+ return r.type () == line_type::special &&
+ static_cast<T> (r.special ()) == l;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const line_char& l, const T& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator!= (const T& l, const line_char& r)
+ {
+ return !(l == r);
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const line_char& l, const T& r)
+ {
+ return l.type () == line_type::special &&
+ static_cast<T> (l.special ()) < r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ bool
+ operator< (const T& l, const line_char& r)
+ {
+ return r.type () == line_type::special &&
+ l < static_cast<T> (r.special ());
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const line_char& l, const T& r)
+ {
+ return l < r || l == r;
+ }
+
+ template <typename T, typename = typename line_char_cmp<T>::type>
+ inline bool
+ operator<= (const T& l, const line_char& r)
+ {
+ return l < r || l == r;
+ }
+
+ using line_string = std::basic_string<line_char>;
+
+ // Locale that has ctype<line_char> facet installed. Used in the
+ // regex_traits<line_char> specialization (see below).
+ //
+ class line_char_locale: public std::locale
+ {
+ public:
+ // Create a copy of the global C++ locale.
+ //
+ line_char_locale ();
+ };
+
+ // Initialize the testscript regex global state. Should be called once
+ // prior to creating objects of types from this namespace. Note: not
+ // thread-safe.
+ //
+ void
+ init ();
+ }
+ }
+ }
+}
+
+// Standard template specializations for line_char that are required for the
+// basic_regex<line_char> instantiation.
+//
+namespace std
+{
+ template <>
+ class char_traits<build2::test::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::test::script::regex::line_char;
+ using int_type = char_type;
+ using off_type = char_traits<char>::off_type;
+ using pos_type = char_traits<char>::pos_type;
+ using state_type = char_traits<char>::state_type;
+
+ static void
+ assign (char_type& c1, const char_type& c2) {c1 = c2;}
+
+ static char_type*
+ assign (char_type*, size_t, char_type);
+
+ // Note that eq() and lt() are not constexpr (as required by C++11)
+ // because == and < operators for char_type are not constexpr.
+ //
+ static bool
+ eq (const char_type& l, const char_type& r) {return l == r;}
+
+ static bool
+ lt (const char_type& l, const char_type& r) {return l < r;}
+
+ static char_type*
+ move (char_type*, const char_type*, size_t);
+
+ static char_type*
+ copy (char_type*, const char_type*, size_t);
+
+ static int
+ compare (const char_type*, const char_type*, size_t);
+
+ static size_t
+ length (const char_type*);
+
+ static const char_type*
+ find (const char_type*, size_t, const char_type&);
+
+ static constexpr char_type
+ to_char_type (const int_type& c) {return c;}
+
+ static constexpr int_type
+ to_int_type (const char_type& c) {return int_type (c);}
+
+ // Note that the following functions are not constexpr (as required by
+ // C++11) because their return expressions are not constexpr.
+ //
+ static bool
+ eq_int_type (const int_type& l, const int_type& r) {return l == r;}
+
+ static int_type eof () {return char_type::eof;}
+
+ static int_type
+ not_eof (const int_type& c)
+ {
+ return c != char_type::eof ? c : char_type::nul;
+ }
+ };
+
+ // ctype<> must be derived from both ctype_base and locale::facet (the later
+ // supports ref-counting used by the std::locale implementation internally).
+ //
+ // msvcrt for some reason also derives ctype_base from locale::facet which
+ // produces "already a base-class" warning and effectivelly breaks the
+ // reference counting. So we derive from ctype_base only in this case.
+ //
+ template <>
+ class ctype<build2::test::script::regex::line_char>: public ctype_base
+#if !defined(_MSC_VER) || _MSC_VER >= 2000
+ , public locale::facet
+#endif
+ {
+ // Used by the implementation only.
+ //
+ using line_type = build2::test::script::regex::line_type;
+
+ public:
+ using char_type = build2::test::script::regex::line_char;
+
+ static locale::id id;
+
+#if !defined(_MSC_VER) || _MSC_VER >= 2000
+ explicit
+ ctype (size_t refs = 0): locale::facet (refs) {}
+#else
+ explicit
+ ctype (size_t refs = 0): ctype_base (refs) {}
+#endif
+
+ // While unnecessary, let's keep for completeness.
+ //
+ virtual
+ ~ctype () override = default;
+
+ // The C++ standard requires the following functions to call their virtual
+ // (protected) do_*() counterparts that provide the real implementations.
+ // The only purpose for this indirection is to provide a user with the
+ // ability to customize existing (standard) ctype facets. As we do not
+ // provide such an ability, for simplicity we will omit the do_*()
+ // functions and provide the implementations directly. This should be safe
+ // as nobody except us could call those protected functions.
+ //
+ bool
+ is (mask m, char_type c) const
+ {
+ return m ==
+ (c.type () == line_type::special && c.special () >= 0 &&
+ build2::digit (static_cast<char> (c.special ()))
+ ? digit
+ : 0);
+ }
+
+ const char_type*
+ is (const char_type*, const char_type*, mask*) const;
+
+ const char_type*
+ scan_is (mask, const char_type*, const char_type*) const;
+
+ const char_type*
+ scan_not (mask, const char_type*, const char_type*) const;
+
+ char_type
+ toupper (char_type c) const {return c;}
+
+ const char_type*
+ toupper (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ tolower (char_type c) const {return c;}
+
+ const char_type*
+ tolower (char_type*, const char_type* e) const {return e;}
+
+ char_type
+ widen (char c) const {return char_type (c);}
+
+ const char*
+ widen (const char*, const char*, char_type*) const;
+
+ char
+ narrow (char_type c, char def) const
+ {
+ return c.type () == line_type::special ? c.special () : def;
+ }
+
+ const char_type*
+ narrow (const char_type*, const char_type*, char, char*) const;
+ };
+
+ // Note: the current application locale must be the POSIX one. Otherwise the
+ // behavior is undefined.
+ //
+ template <>
+ class regex_traits<build2::test::script::regex::line_char>
+ {
+ public:
+ using char_type = build2::test::script::regex::line_char;
+ using string_type = build2::test::script::regex::line_string;
+ using locale_type = build2::test::script::regex::line_char_locale;
+ using char_class_type = regex_traits<char>::char_class_type;
+
+ // Workaround for msvcrt bugs. For some reason it assumes such a members
+ // to be present in a regex_traits specialization.
+ //
+#if defined(_MSC_VER) && _MSC_VER < 2000
+ static const ctype_base::mask _Ch_upper = ctype_base::upper;
+ static const ctype_base::mask _Ch_alpha = ctype_base::alpha;
+
+ // Unsigned char_type. msvcrt statically asserts the _Uelem type is
+ // unsigned, so we specialize is_unsigned<line_char> as well (see below).
+ //
+ using _Uelem = char_type;
+#endif
+
+ regex_traits () = default; // Unnecessary but let's keep for completeness.
+
+ static size_t
+ length (const char_type* p) {return string_type::traits_type::length (p);}
+
+ char_type
+ translate (char_type c) const {return c;}
+
+ // Case-insensitive matching is not supported by line_regex. So there is no
+ // reason for the function to be called.
+ //
+ char_type
+ translate_nocase (char_type c) const {assert (false); return c;}
+
+ // Return a sort-key - the exact copy of [b, e).
+ //
+ template <typename I>
+ string_type
+ transform (I b, I e) const {return string_type (b, e);}
+
+ // Return a case-insensitive sort-key. Case-insensitive matching is not
+ // supported by line_regex. So there is no reason for the function to be
+ // called.
+ //
+ template <typename I>
+ string_type
+ transform_primary (I b, I e) const
+ {
+ assert (false);
+ return string_type (b, e);
+ }
+
+ // POSIX regex grammar and collating elements (e.g., [.tilde.]) in
+ // particular are not supported. So there is no reason for the function to
+ // be called.
+ //
+ template <typename I>
+ string_type
+ lookup_collatename (I, I) const {assert (false); return string_type ();}
+
+ // Character classes (e.g., [:lower:]) are not supported. So there is no
+ // reason for the function to be called.
+ //
+ template <typename I>
+ char_class_type
+ lookup_classname (I, I, bool = false) const
+ {
+ assert (false);
+ return char_class_type ();
+ }
+
+ // Return false as we don't support character classes (e.g., [:lower:]).
+ //
+ bool
+ isctype (char_type, char_class_type) const {return false;}
+
+ int
+ value (char_type, int) const;
+
+ // Return the locale passed as an argument as we do not expect anything
+ // other than POSIX locale, that we also assume to be imbued by default.
+ //
+ locale_type
+ imbue (locale_type l) {return l;}
+
+ locale_type
+ getloc () const {return locale_type ();}
+ };
+
+ // We assume line_char to be an unsigned type and express that with the
+ // following specializations used by basic_regex implementations.
+ //
+ // libstdc++ defines unsigned CharT type (regex_traits template parameter)
+ // to use as an index in some internal cache regardless if the cache is used
+ // for this specialization (and the cache is used only if CharT is char).
+ //
+ template <>
+ struct make_unsigned<build2::test::script::regex::line_char>
+ {
+ using type = build2::test::script::regex::line_char;
+ };
+
+ // msvcrt assumes regex_traits<line_char>::_Uelem to be present (see above)
+ // and statically asserts it is unsigned.
+ //
+ // And starting from VC 16.1, is_unsigned_v is not implemented in terms of
+ // is_unsigned so we have to get deeper into the implementation details.
+ //
+#if defined(_MSC_VER) && _MSC_VER >= 1921
+ template <>
+ struct _Sign_base<build2::test::script::regex::line_char, false>
+ {
+ static constexpr bool _Signed = false;
+ static constexpr bool _Unsigned = true;
+ };
+#else
+ template <>
+ struct is_unsigned<build2::test::script::regex::line_char>
+ {
+ static const bool value = true;
+ };
+#endif
+
+ // When used with libc++ the linker complains that it can't find
+ // __match_any_but_newline<line_char>::__exec() function. The problem is
+ // that the function is only specialized for char and wchar_t
+ // (LLVM bug #31409). As line_char has no notion of the newline character we
+ // specialize the class template to behave as the __match_any<line_char>
+ // instantiation does (that luckily has all the functions in place).
+ //
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 8000
+ template <>
+ class __match_any_but_newline<build2::test::script::regex::line_char>
+ : public __match_any<build2::test::script::regex::line_char>
+ {
+ public:
+ using base = __match_any<build2::test::script::regex::line_char>;
+ using base::base;
+ };
+#endif
+}
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
+ {
+ class line_regex: public std::basic_regex<line_char>
+ {
+ public:
+ using base_type = std::basic_regex<line_char>;
+
+ using base_type::base_type;
+
+ line_regex () = default;
+
+ // Move string regex together with the pool used to create it.
+ //
+ line_regex (line_string&& s, line_pool&& p)
+ // No move-string ctor for base_type, so emulate it.
+ //
+ : base_type (s), pool (move (p)) {s.clear ();}
+
+ // Move constuctible/assignable-only type.
+ //
+ line_regex (line_regex&&) = default;
+ line_regex (const line_regex&) = delete;
+ line_regex& operator= (line_regex&&) = default;
+ line_regex& operator= (const line_regex&) = delete;
+
+ public:
+ line_pool pool;
+ };
+ }
+ }
+ }
+}
+
+#include <libbuild2/test/script/regex.ixx>
+
+#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX
diff --git a/libbuild2/test/script/regex.ixx b/libbuild2/test/script/regex.ixx
new file mode 100644
index 0000000..c5b638e
--- /dev/null
+++ b/libbuild2/test/script/regex.ixx
@@ -0,0 +1,35 @@
+// file : libbuild2/test/script/regex.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ namespace regex
+ {
+ inline char_flags
+ operator&= (char_flags& x, char_flags y)
+ {
+ return x = static_cast<char_flags> (
+ static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
+ }
+
+ inline char_flags
+ operator|= (char_flags& x, char_flags y)
+ {
+ return x = static_cast<char_flags> (
+ static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
+ }
+
+ inline char_flags
+ operator& (char_flags x, char_flags y) {return x &= y;}
+
+ inline char_flags
+ operator| (char_flags x, char_flags y) {return x |= y;}
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/regex.test.cxx b/libbuild2/test/script/regex.test.cxx
new file mode 100644
index 0000000..f205154
--- /dev/null
+++ b/libbuild2/test/script/regex.test.cxx
@@ -0,0 +1,302 @@
+// file : libbuild2/test/script/regex.test.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <regex>
+#include <type_traits> // is_pod, is_array
+
+#include <libbuild2/test/script/regex.hxx>
+
+using namespace std;
+using namespace build2::test::script::regex;
+
+int
+main ()
+{
+ using lc = line_char;
+ using ls = line_string;
+ using lr = line_regex;
+ using cf = char_flags;
+ using cr = char_regex;
+
+ init (); // Initializes the testscript regex global state.
+
+ // Test line_char.
+ //
+ {
+ static_assert (is_pod<lc>::value && !is_array<lc>::value,
+ "line_char must be char-like");
+
+ // Zero-initialed line_char should be the null-char as required by
+ // char_traits<>::length() specification.
+ //
+ assert (lc () == lc::nul);
+
+ line_pool p;
+
+ assert (lc::eof == -1);
+ assert (lc::nul == 0);
+
+ enum meta {mn = 'n', mp = 'p'};
+
+ // Special roundtrip.
+ //
+ assert (lc ('0').special () == '0');
+ assert (lc (0).special () == 0);
+ assert (lc (-1).special () == -1);
+ assert (lc ('p').special () == 'p');
+ assert (lc (u'\u2028').special () == u'\u2028');
+
+ // Special comparison.
+ //
+ assert (lc ('0') == lc ('0'));
+ assert (lc ('0') == '0');
+ assert (lc ('n') == mn);
+ assert (mn == static_cast<meta> (lc ('n')));
+
+ assert (lc ('0') != lc ('1'));
+ assert (lc ('0') != '1');
+ assert (lc ('n') != mp);
+ assert (lc ('0') != lc ("0", p));
+ assert (lc ('0') != lc (cr ("0"), p));
+
+ assert (lc ('0') < lc ('1'));
+ assert (lc ('0') < '1');
+ assert (lc ('1') < lc ("0", p));
+ assert (lc ('n') < mp);
+
+ assert (lc ('0') <= '1');
+ assert (lc ('0') <= lc ('1'));
+ assert (lc ('n') <= mn);
+ assert (lc ('1') <= lc ("0", p));
+
+ // Literal roundtrip.
+ //
+ assert (*lc ("abc", p).literal () == "abc");
+
+ // Literal comparison.
+ //
+ assert (lc ("a", p) == lc ("a", p));
+ assert (lc ("a", p).literal () == lc ("a", p).literal ());
+ assert (char (lc ("a", p)) == '\a');
+
+ assert (lc ("a", p) != lc ("b", p));
+ assert (!(lc ("a", p) != lc (cr ("a"), p)));
+ assert (lc ("a", p) != lc (cr ("b"), p));
+
+ assert (lc ("a", p) < lc ("b", p));
+ assert (!(lc ("a", p) < lc (cr ("a"), p)));
+
+ assert (lc ("a", p) <= lc ("b", p));
+ assert (lc ("a", p) <= lc (cr ("a"), p));
+ assert (lc ("a", p) < lc (cr ("c"), p));
+
+ // Regex roundtrip.
+ //
+ assert (regex_match ("abc", *lc (cr ("abc"), p).regex ()));
+
+ // Regex flags.
+ //
+ // icase
+ //
+ assert (regex_match ("ABC", cr ("abc", cf::icase)));
+
+ // idot
+ //
+ assert (!regex_match ("a", cr ("[.]", cf::idot)));
+ assert (!regex_match ("a", cr ("[\\.]", cf::idot)));
+
+ assert (regex_match ("a", cr (".")));
+ assert (!regex_match ("a", cr (".", cf::idot)));
+ assert (regex_match ("a", cr ("\\.", cf::idot)));
+ assert (!regex_match ("a", cr ("\\.")));
+
+ // regex::transform()
+ //
+ // The function is static and we can't test it directly. So we will test
+ // it indirectly via regex matches.
+ //
+ // @@ Would be nice to somehow address the inability to test internals (not
+ // exposed via headers). As a part of utility library support?
+ //
+ assert (regex_match (".a[.", cr (".\\.\\[[.]", cf::idot)));
+ assert (regex_match (".a[.", cr (".\\.\\[[\\.]", cf::idot)));
+ assert (!regex_match ("ba[.", cr (".\\.\\[[.]", cf::idot)));
+ assert (!regex_match (".a[b", cr (".\\.\\[[.]", cf::idot)));
+ assert (!regex_match (".a[b", cr (".\\.\\[[\\.]", cf::idot)));
+
+ // Regex comparison.
+ //
+ assert (lc ("a", p) == lc (cr ("a|b"), p));
+ assert (lc (cr ("a|b"), p) == lc ("a", p));
+ }
+
+ // Test char_traits<line_char>.
+ //
+ {
+ using ct = char_traits<lc>;
+ using vc = vector<lc>;
+
+ lc c;
+ ct::assign (c, '0');
+ assert (c == ct::char_type ('0'));
+
+ assert (ct::to_char_type (c) == c);
+ assert (ct::to_int_type (c) == c);
+
+ assert (ct::eq_int_type (c, c));
+ assert (!ct::eq_int_type (c, lc::eof));
+
+ assert (ct::eof () == lc::eof);
+
+ assert (ct::not_eof (c) == c);
+ assert (ct::not_eof (lc::eof) != lc::eof);
+
+ ct::assign (&c, 1, '1');
+ assert (c == ct::int_type ('1'));
+
+ assert (ct::eq (lc ('0'), lc ('0')));
+ assert (ct::lt (lc ('0'), lc ('1')));
+
+ vc v1 ({'0', '1', '2'});
+ vc v2 (3, lc::nul);
+
+ assert (ct::find (v1.data (), 3, '1') == v1.data () + 1);
+
+ ct::copy (v2.data (), v1.data (), 3);
+ assert (v2 == v1);
+
+ v2.push_back (lc::nul);
+ assert (ct::length (v2.data ()) == 3);
+
+ // Overlaping ranges.
+ //
+ ct::move (v1.data () + 1, v1.data (), 2);
+ assert (v1 == vc ({'0', '0', '1'}));
+
+ v1 = vc ({'0', '1', '2'});
+ ct::move (v1.data (), v1.data () + 1, 2);
+ assert (v1 == vc ({'1', '2', '2'}));
+ }
+
+ // Test line_char_locale and ctype<line_char> (only non-trivial functions).
+ //
+ {
+ using ct = ctype<lc>;
+
+ line_char_locale l;
+
+ // It is better not to create q facet on stack as it is
+ // reference-countable.
+ //
+ const ct& t (use_facet<ct> (l));
+ line_pool p;
+
+ assert (t.is (ct::digit, '0'));
+ assert (!t.is (ct::digit, '?'));
+ assert (!t.is (ct::digit, lc ("0", p)));
+
+ const lc chars[] = { '0', '?' };
+ ct::mask m[2];
+
+ const lc* b (chars);
+ const lc* e (chars + 2);
+
+ // Cast flag value to mask type and compare to mask.
+ //
+ auto fl = [] (ct::mask m, ct::mask f) {return m == f;};
+
+ t.is (b, e, m);
+ assert (fl (m[0], ct::digit) && fl (m[1], 0));
+
+ assert (t.scan_is (ct::digit, b, e) == b);
+ assert (t.scan_is (0, b, e) == b + 1);
+
+ assert (t.scan_not (ct::digit, b, e) == b + 1);
+ assert (t.scan_not (0, b, e) == b);
+
+ {
+ char nr[] = "0?";
+ lc wd[2];
+ t.widen (nr, nr + 2, wd);
+ assert (wd[0] == b[0] && wd[1] == b[1]);
+ }
+
+ {
+ lc wd[] = {'0', lc ("a", p)};
+ char nr[2];
+ t.narrow (wd, wd + 2, '-', nr);
+ assert (nr[0] == '0' && nr[1] == '-');
+ }
+ }
+
+ // Test regex_traits<line_char>. Functions other that value() are trivial.
+ //
+ {
+ regex_traits<lc> t;
+
+ const int radix[] = {8, 10}; // Radix 16 is not supported by line_char.
+ const char digits[] = "0123456789ABCDEF";
+
+ for (size_t r (0); r < 2; ++r)
+ {
+ for (int i (0); i < radix[r]; ++i)
+ assert (t.value (digits[i], radix[r]) == i);
+ }
+ }
+
+ // Test line_regex construction.
+ //
+ {
+ line_pool p;
+ lr r1 ({lc ("foo", p), lc (cr ("ba(r|z)"), p)}, move (p));
+
+ lr r2 (move (r1));
+ assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2));
+ assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2));
+ }
+
+ // Test line_regex match.
+ //
+ {
+ line_pool p;
+
+ const lc foo ("foo", p);
+ const lc bar ("bar", p);
+ const lc baz ("baz", p);
+ const lc blank ("", p);
+
+ assert (regex_match (ls ({foo, bar}), lr ({foo, bar})));
+ assert (!regex_match (ls ({foo, baz}), lr ({foo, bar})));
+
+ assert (regex_match (ls ({bar, foo}),
+ lr ({'(', foo, '|', bar, ')', '+'})));
+
+ assert (regex_match (ls ({foo, foo, bar}),
+ lr ({'(', foo, ')', '\\', '1', bar})));
+
+ assert (regex_match (ls ({foo}), lr ({lc (cr ("fo+"), p)})));
+ assert (regex_match (ls ({foo}), lr ({lc (cr (".*"), p)})));
+ assert (regex_match (ls ({blank}), lr ({lc (cr (".*"), p)})));
+
+ assert (regex_match (ls ({blank, blank, foo}),
+ lr ({blank, '*', foo, blank, '*'})));
+
+ assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'})));
+
+ assert (regex_match (ls ({blank, blank}),
+ lr ({blank, '*', foo, '?', blank, '*'})));
+
+ assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'})));
+ assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'})));
+
+ assert (regex_match (ls ({foo, foo}),
+ lr ({foo, '{', '1', ',', '2', '}'})));
+
+ assert (!regex_match (ls ({foo, foo}),
+ lr ({foo, '{', '3', ',', '4', '}'})));
+
+ assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo})));
+ assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo})));
+ }
+}
diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx
new file mode 100644
index 0000000..6c1becd
--- /dev/null
+++ b/libbuild2/test/script/runner.cxx
@@ -0,0 +1,1891 @@
+// file : libbuild2/test/script/runner.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/runner.hxx>
+
+#include <set>
+#include <ios> // streamsize
+
+#include <libbutl/regex.mxx>
+#include <libbutl/fdstream.mxx> // fdopen_mode, fdnull(), fddup()
+
+#include <libbuild2/variable.hxx>
+#include <libbuild2/filesystem.hxx>
+#include <libbuild2/diagnostics.hxx>
+
+#include <libbuild2/test/common.hxx>
+
+#include <libbuild2/test/script/regex.hxx>
+#include <libbuild2/test/script/parser.hxx>
+#include <libbuild2/test/script/builtin.hxx>
+
+using namespace std;
+using namespace butl;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ // Normalize a path. Also make the relative path absolute using the
+ // scope's working directory unless it is already absolute.
+ //
+ static path
+ normalize (path p, const scope& sp, const location& l)
+ {
+ path r (p.absolute () ? move (p) : sp.wd_path / move (p));
+
+ try
+ {
+ r.normalize ();
+ }
+ catch (const invalid_path& e)
+ {
+ fail (l) << "invalid file path " << e.path;
+ }
+
+ return r;
+ }
+
+ // Check if a path is not empty, the referenced file exists and is not
+ // empty.
+ //
+ static bool
+ non_empty (const path& p, const location& ll)
+ {
+ if (p.empty () || !exists (p))
+ return false;
+
+ try
+ {
+ ifdstream is (p);
+ return is.peek () != ifdstream::traits_type::eof ();
+ }
+ catch (const io_error& e)
+ {
+ // While there can be no fault of the test command being currently
+ // executed let's add the location anyway to ease the
+ // troubleshooting. And let's stick to that principle down the road.
+ //
+ fail (ll) << "unable to read " << p << ": " << e << endf;
+ }
+ }
+
+ // If the file exists, not empty and not larger than 4KB print it to the
+ // diag record. The file content goes from the new line and is not
+ // indented.
+ //
+ static void
+ print_file (diag_record& d, const path& p, const location& ll)
+ {
+ if (exists (p))
+ {
+ try
+ {
+ ifdstream is (p, ifdstream::in, ifdstream::badbit);
+
+ if (is.peek () != ifdstream::traits_type::eof ())
+ {
+ char buf[4096 + 1]; // Extra byte is for terminating '\0'.
+
+ // Note that the string is always '\0'-terminated with a maximum
+ // sizeof (buf) - 1 bytes read.
+ //
+ is.getline (buf, sizeof (buf), '\0');
+
+ // Print if the file fits 4KB-size buffer. Note that if it
+ // doesn't the failbit is set.
+ //
+ if (is.eof ())
+ {
+ // Suppress the trailing newline character as the diag record
+ // adds it's own one when flush.
+ //
+ streamsize n (is.gcount ());
+ assert (n > 0);
+
+ // Note that if the file contains '\0' it will also be counted
+ // by gcount(). But even in the worst case we will stay in the
+ // buffer boundaries (and so not crash).
+ //
+ if (buf[n - 1] == '\n')
+ buf[n - 1] = '\0';
+
+ d << '\n' << buf;
+ }
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << p << ": " << e;
+ }
+ }
+ }
+
+ // Print first 10 directory sub-entries to the diag record. The directory
+ // must exist.
+ //
+ static void
+ print_dir (diag_record& d, const dir_path& p, const location& ll)
+ {
+ try
+ {
+ size_t n (0);
+ for (const dir_entry& de: dir_iterator (p,
+ false /* ignore_dangling */))
+ {
+ if (n++ < 10)
+ d << '\n' << (de.ltype () == entry_type::directory
+ ? path_cast<dir_path> (de.path ())
+ : de.path ());
+ }
+
+ if (n > 10)
+ d << "\nand " << n - 10 << " more file(s)";
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to iterate over " << p << ": " << e;
+ }
+ }
+
+ // Save a string to the file. Fail if exception is thrown by underlying
+ // operations.
+ //
+ static void
+ save (const path& p, const string& s, const location& ll)
+ {
+ try
+ {
+ ofdstream os (p);
+ os << s;
+ os.close ();
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write " << p << ": " << e;
+ }
+ }
+
+ // Return the value of the test.target variable.
+ //
+ static inline const target_triplet&
+ test_target (const script& s)
+ {
+ // @@ Would be nice to use cached value from test::common_data.
+ //
+ if (auto r = cast_null<target_triplet> (s.test_target["test.target"]))
+ return *r;
+
+ // We set it to default value in init() so it can only be NULL if the
+ // user resets it.
+ //
+ fail << "invalid test.target value" << endf;
+ }
+
+ // Transform string according to here-* redirect modifiers from the {/}
+ // set.
+ //
+ static string
+ transform (const string& s,
+ bool regex,
+ const string& modifiers,
+ const script& scr)
+ {
+ if (modifiers.find ('/') == string::npos)
+ return s;
+
+ // For targets other than Windows leave the string intact.
+ //
+ if (test_target (scr).class_ != "windows")
+ return s;
+
+ // Convert forward slashes to Windows path separators (escape for
+ // regex).
+ //
+ string r;
+ for (size_t p (0);;)
+ {
+ size_t sp (s.find ('/', p));
+
+ if (sp != string::npos)
+ {
+ r.append (s, p, sp - p);
+ r.append (regex ? "\\\\" : "\\");
+ p = sp + 1;
+ }
+ else
+ {
+ r.append (s, p, sp);
+ break;
+ }
+ }
+
+ return r;
+ }
+
+ // Check if the test command output matches the expected result (redirect
+ // value). Noop for redirect types other than none, here_*.
+ //
+ static bool
+ check_output (const path& pr,
+ const path& op,
+ const path& ip,
+ const redirect& rd,
+ const location& ll,
+ scope& sp,
+ bool diag,
+ const char* what)
+ {
+ auto input_info = [&ip, &ll] (diag_record& d)
+ {
+ if (non_empty (ip, ll))
+ d << info << "stdin: " << ip;
+ };
+
+ auto output_info = [&what, &ll] (diag_record& d,
+ const path& p,
+ const char* prefix = "",
+ const char* suffix = "")
+ {
+ if (non_empty (p, ll))
+ d << info << prefix << what << suffix << ": " << p;
+ else
+ d << info << prefix << what << suffix << " is empty";
+ };
+
+ if (rd.type == redirect_type::none)
+ {
+ // Check that there is no output produced.
+ //
+ assert (!op.empty ());
+
+ if (!non_empty (op, ll))
+ return true;
+
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " unexpectedly writes to " << what <<
+ info << what << ": " << op;
+
+ input_info (d);
+
+ // Print cached output.
+ //
+ print_file (d, op, ll);
+ }
+
+ // Fall through (to return false).
+ //
+ }
+ else if (rd.type == redirect_type::here_str_literal ||
+ rd.type == redirect_type::here_doc_literal ||
+ (rd.type == redirect_type::file &&
+ rd.file.mode == redirect_fmode::compare))
+ {
+ // The expected output is provided as a file or as a string. Save the
+ // string to a file in the later case.
+ //
+ assert (!op.empty ());
+
+ path eop;
+
+ if (rd.type == redirect_type::file)
+ eop = normalize (rd.file.path, sp, ll);
+ else
+ {
+ eop = path (op + ".orig");
+ save (eop, transform (rd.str, false, rd.modifiers, *sp.root), ll);
+ sp.clean_special (eop);
+ }
+
+ // Use the diff utility for comparison.
+ //
+ path dp ("diff");
+ process_path pp (run_search (dp, true));
+
+ cstrings args {pp.recall_string (), "-u"};
+
+ // Ignore Windows newline fluff if that's what we are running on.
+ //
+ if (test_target (*sp.root).class_ == "windows")
+ args.push_back ("--strip-trailing-cr");
+
+ args.push_back (eop.string ().c_str ());
+ args.push_back (op.string ().c_str ());
+ args.push_back (nullptr);
+
+ if (verb >= 2)
+ print_process (args);
+
+ try
+ {
+ // Save diff's stdout to a file for troubleshooting and for the
+ // optional (if not too large) printing (at the end of
+ // diagnostics).
+ //
+ path ep (op + ".diff");
+ auto_fd efd;
+
+ try
+ {
+ efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create);
+ sp.clean_special (ep);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write " << ep << ": " << e;
+ }
+
+ // Diff utility prints the differences to stdout. But for the
+ // user it is a part of the test failure diagnostics so let's
+ // redirect stdout to stderr.
+ //
+ process p (pp, args.data (), 0, 2, efd.get ());
+ efd.reset ();
+
+ if (p.wait ())
+ return true;
+
+ assert (p.exit);
+ const process_exit& pe (*p.exit);
+
+ // Note that both POSIX and GNU diff report error by exiting with
+ // the code > 1.
+ //
+ if (!pe.normal () || pe.code () > 1)
+ {
+ diag_record d (fail (ll));
+ print_process (d, args);
+ d << " " << pe;
+ }
+
+ // Output doesn't match the expected result.
+ //
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " " << what << " doesn't match expected";
+
+ output_info (d, op);
+ output_info (d, eop, "expected ");
+ output_info (d, ep, "", " diff");
+ input_info (d);
+
+ print_file (d, ep, ll);
+ }
+
+ // Fall through (to return false).
+ //
+ }
+ catch (const process_error& e)
+ {
+ error (ll) << "unable to execute " << pp << ": " << e;
+
+ if (e.child)
+ exit (1);
+
+ throw failed ();
+ }
+ }
+ else if (rd.type == redirect_type::here_str_regex ||
+ rd.type == redirect_type::here_doc_regex)
+ {
+ // The overall plan is:
+ //
+ // 1. Create regex line string. While creating it's line characters
+ // transform regex lines according to the redirect modifiers.
+ //
+ // 2. Create line regex using the line string. If creation fails
+ // then save the (transformed) regex redirect to a file for
+ // troubleshooting.
+ //
+ // 3. Parse the output into the literal line string.
+ //
+ // 4. Match the output line string with the line regex.
+ //
+ // 5. If match fails save the (transformed) regex redirect to a file
+ // for troubleshooting.
+ //
+ using namespace regex;
+
+ assert (!op.empty ());
+
+ // Create regex line string.
+ //
+ line_pool pool;
+ line_string rls;
+ const regex_lines rl (rd.regex);
+
+ // Parse regex flags.
+ //
+ // When add support for new flags don't forget to update
+ // parse_regex().
+ //
+ auto parse_flags = [] (const string& f) -> char_flags
+ {
+ char_flags r (char_flags::none);
+
+ for (char c: f)
+ {
+ switch (c)
+ {
+ case 'd': r |= char_flags::idot; break;
+ case 'i': r |= char_flags::icase; break;
+ default: assert (false); // Error so should have been checked.
+ }
+ }
+
+ return r;
+ };
+
+ // Return original regex line with the transformation applied.
+ //
+ auto line = [&rl, &rd, &sp] (const regex_line& l) -> string
+ {
+ string r;
+ if (l.regex) // Regex (possibly empty),
+ {
+ r += rl.intro;
+ r += transform (l.value, true, rd.modifiers, *sp.root);
+ r += rl.intro;
+ r += l.flags;
+ }
+ else if (!l.special.empty ()) // Special literal.
+ r += rl.intro;
+ else // Textual literal.
+ r += transform (l.value, false, rd.modifiers, *sp.root);
+
+ r += l.special;
+ return r;
+ };
+
+ // Return regex line location.
+ //
+ // Note that we rely on the fact that the command and regex lines
+ // are always belong to the same testscript file.
+ //
+ auto loc = [&ll] (uint64_t line, uint64_t column) -> location
+ {
+ location r (ll);
+ r.line = line;
+ r.column = column;
+ return r;
+ };
+
+ // Save the regex to file for troubleshooting, return the file path
+ // it have been saved to.
+ //
+ // Note that we save the regex on line regex creation failure or if
+ // the program output doesn't match.
+ //
+ auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path
+ {
+ path rp (op + ".regex");
+
+ // Encode here-document regex global flags if present as a file
+ // name suffix. For example if icase and idot flags are specified
+ // the name will look like:
+ //
+ // test/1/stdout.regex-di
+ //
+ if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ())
+ rp += '-' + rl.flags;
+
+ // Note that if would be more efficient to directly write chunks
+ // to file rather than to compose a string first. Hower we don't
+ // bother (about performance) for the sake of the code as we
+ // already failed.
+ //
+ string s;
+ for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
+ i != e; ++i)
+ {
+ if (i != b) s += '\n';
+ s += line (*i);
+ }
+
+ save (rp, s, ll);
+ return rp;
+ };
+
+ // Finally create regex line string.
+ //
+ // Note that diagnostics doesn't refer to the program path as it is
+ // irrelevant to failures at this stage.
+ //
+ char_flags gf (parse_flags (rl.flags)); // Regex global flags.
+
+ for (const auto& l: rl.lines)
+ {
+ if (l.regex) // Regex (with optional special characters).
+ {
+ line_char c;
+
+ // Empty regex is a special case repesenting the blank line.
+ //
+ if (l.value.empty ())
+ c = line_char ("", pool);
+ else
+ {
+ try
+ {
+ string s (transform (l.value, true, rd.modifiers, *sp.root));
+
+ c = line_char (
+ char_regex (s, gf | parse_flags (l.flags)), pool);
+ }
+ catch (const regex_error& e)
+ {
+ // Print regex_error description if meaningful.
+ //
+ diag_record d (fail (loc (l.line, l.column)));
+
+ if (rd.type == redirect_type::here_str_regex)
+ d << "invalid " << what << " regex redirect" << e <<
+ info << "regex: '" << line (l) << "'";
+ else
+ d << "invalid char-regex in " << what << " regex redirect"
+ << e <<
+ info << "regex line: '" << line (l) << "'";
+
+ d << endf;
+ }
+ }
+
+ rls += c; // Append blank literal or regex line char.
+ }
+ else if (!l.special.empty ()) // Special literal.
+ {
+ // Literal can not be followed by special characters in the same
+ // line.
+ //
+ assert (l.value.empty ());
+ }
+ else // Textual literal.
+ {
+ // Append literal line char.
+ //
+ rls += line_char (
+ transform (l.value, false, rd.modifiers, *sp.root), pool);
+ }
+
+ for (char c: l.special)
+ {
+ if (line_char::syntax (c))
+ rls += line_char (c); // Append special line char.
+ else
+ fail (loc (l.line, l.column))
+ << "invalid syntax character '" << c << "' in " << what
+ << " regex redirect" <<
+ info << "regex line: '" << line (l) << "'";
+ }
+ }
+
+ // Create line regex.
+ //
+ line_regex regex;
+
+ try
+ {
+ regex = line_regex (move (rls), move (pool));
+ }
+ catch (const regex_error& e)
+ {
+ // Note that line regex creation can not fail for here-string
+ // redirect as it doesn't have syntax line chars. That in
+ // particular means that end_line and end_column are meaningful.
+ //
+ assert (rd.type == redirect_type::here_doc_regex);
+
+ diag_record d (fail (loc (rd.end_line, rd.end_column)));
+
+ // Print regex_error description if meaningful.
+ //
+ d << "invalid " << what << " regex redirect" << e;
+
+ output_info (d, save_regex (), "", " regex");
+ }
+
+ // Parse the output into the literal line string.
+ //
+ line_string ls;
+
+ try
+ {
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (getline() failed to extract any character).
+ //
+ // Note that newlines are treated as line-chars separators. That
+ // in particular means that the trailing newline produces a blank
+ // line-char (empty literal). Empty output produces the zero-length
+ // line-string.
+ //
+ // Also note that we strip the trailing CR characters (otherwise
+ // can mismatch when cross-test).
+ //
+ ifdstream is (op, ifdstream::in, ifdstream::badbit);
+ is.peek (); // Sets eofbit for an empty stream.
+
+ while (!is.eof ())
+ {
+ string s;
+ getline (is, s);
+
+ // It is safer to strip CRs in cycle, as msvcrt unexplainably
+ // adds too much trailing junk to the system_error descriptions,
+ // and so it can appear in programs output. For example:
+ //
+ // ...: Invalid data.\r\r\n
+ //
+ // Note that our custom operator<<(ostream&, const exception&)
+ // removes this junk.
+ //
+ while (!s.empty () && s.back () == '\r')
+ s.pop_back ();
+
+ ls += line_char (move (s), regex.pool);
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << op << ": " << e;
+ }
+
+ // Match the output with the regex.
+ //
+ if (regex_match (ls, regex)) // Doesn't throw.
+ return true;
+
+ // Output doesn't match the regex. We save the regex to file for
+ // troubleshooting regardless of whether we print the diagnostics or
+ // not.
+ //
+ path rp (save_regex ());
+
+ if (diag)
+ {
+ diag_record d (error (ll));
+ d << pr << " " << what << " doesn't match regex";
+
+ output_info (d, op);
+ output_info (d, rp, "", " regex");
+ input_info (d);
+
+ // Print cached output.
+ //
+ print_file (d, op, ll);
+ }
+
+ // Fall through (to return false).
+ //
+ }
+ else // Noop.
+ return true;
+
+ return false;
+ }
+
+ bool default_runner::
+ test (scope& s) const
+ {
+ return common_.test (s.root->test_target, s.id_path);
+ }
+
+ void default_runner::
+ enter (scope& sp, const location&)
+ {
+ auto df = make_diag_frame (
+ [&sp](const diag_record& dr)
+ {
+ // Let's not depend on how the path representation can be improved
+ // for readability on printing.
+ //
+ dr << info << "test id: " << sp.id_path.posix_string ();
+ });
+
+ // Scope working directory shall be empty (the script working
+ // directory is cleaned up by the test rule prior the script
+ // execution).
+ //
+ // Create the root working directory containing the .buildignore file
+ // to make sure that it is ignored by name patterns (see buildignore
+ // description for details).
+ //
+ // @@ Shouldn't we add an optional location parameter to mkdir() and
+ // alike utility functions so the failure message can contain
+ // location info?
+ //
+ fs_status<mkdir_status> r (
+ sp.parent == nullptr
+ ? mkdir_buildignore (
+ sp.wd_path,
+ sp.root->target_scope.root_scope ()->root_extra->buildignore_file,
+ 2)
+ : mkdir (sp.wd_path, 2));
+
+ if (r == mkdir_status::already_exists)
+ fail << "working directory " << sp.wd_path << " already exists" <<
+ info << "are tests stomping on each other's feet?";
+
+ // We don't change the current directory here but indicate that the
+ // scope test commands will be executed in that directory.
+ //
+ if (verb >= 2)
+ text << "cd " << sp.wd_path;
+
+ sp.clean ({cleanup_type::always, sp.wd_path}, true);
+ }
+
+ void default_runner::
+ leave (scope& sp, const location& ll)
+ {
+ auto df = make_diag_frame (
+ [&sp](const diag_record& dr)
+ {
+ // Let's not depend on how the path representation can be improved
+ // for readability on printing.
+ //
+ dr << info << "test id: " << sp.id_path.posix_string ();
+ });
+
+ // Perform registered cleanups if requested.
+ //
+ if (common_.after == output_after::clean)
+ {
+ // Note that we operate with normalized paths here.
+ //
+ // Remove special files. The order is not important as we don't
+ // expect directories here.
+ //
+ for (const auto& p: sp.special_cleanups)
+ {
+ // Remove the file if exists. Fail otherwise.
+ //
+ if (rmfile (p, 3) == rmfile_status::not_exist)
+ fail (ll) << "registered for cleanup special file " << p
+ << " does not exist";
+ }
+
+ // Remove files and directories in the order opposite to the order of
+ // cleanup registration.
+ //
+ for (const auto& c: reverse_iterate (sp.cleanups))
+ {
+ cleanup_type t (c.type);
+
+ // Skip whenever the path exists or not.
+ //
+ if (t == cleanup_type::never)
+ continue;
+
+ const path& cp (c.path);
+
+ // Wildcard with the last component being '***' (without trailing
+ // separator) matches all files and sub-directories recursively as
+ // well as the start directories itself. So we will recursively
+ // remove the directories that match the parent (for the original
+ // path) directory wildcard.
+ //
+ bool recursive (cp.leaf ().representation () == "***");
+ const path& p (!recursive ? cp : cp.directory ());
+
+ // Remove files or directories using wildcard.
+ //
+ if (p.string ().find_first_of ("?*") != string::npos)
+ {
+ bool removed (false);
+
+ auto rm = [&cp, recursive, &removed, &sp, &ll] (path&& pe,
+ const string&,
+ bool interm)
+ {
+ if (!interm)
+ {
+ // While removing the entry we can get not_exist due to
+ // racing conditions, but that's ok if somebody did our job.
+ // Note that we still set the removed flag to true in this
+ // case.
+ //
+ removed = true; // Will be meaningless on failure.
+
+ if (pe.to_directory ())
+ {
+ dir_path d (path_cast<dir_path> (pe));
+
+ if (!recursive)
+ {
+ rmdir_status r (rmdir (d, 3));
+
+ if (r != rmdir_status::not_empty)
+ return true;
+
+ diag_record dr (fail (ll));
+ dr << "registered for cleanup directory " << d
+ << " is not empty";
+
+ print_dir (dr, d, ll);
+ dr << info << "wildcard: '" << cp << "'";
+ }
+ else
+ {
+ // Don't remove the working directory (it will be removed
+ // by the dedicated cleanup).
+ //
+ // Cast to uint16_t to avoid ambiguity with
+ // libbutl::rmdir_r().
+ //
+ rmdir_status r (rmdir_r (d,
+ d != sp.wd_path,
+ static_cast<uint16_t> (3)));
+
+ if (r != rmdir_status::not_empty)
+ return true;
+
+ // The directory is unlikely to be current but let's keep
+ // for completeness.
+ //
+ fail (ll) << "registered for cleanup wildcard " << cp
+ << " matches the current directory";
+ }
+ }
+ else
+ rmfile (pe, 3);
+ }
+
+ return true;
+ };
+
+ // Note that here we rely on the fact that recursive iterating
+ // goes depth-first (which make sense for the cleanup).
+ //
+ try
+ {
+ // Doesn't follow symlinks.
+ //
+ path_search (p,
+ rm,
+ dir_path () /* start */,
+ path_match_flags::none);
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to cleanup wildcard " << cp << ": " << e;
+ }
+
+ // Removal of no filesystem entries is not an error for 'maybe'
+ // cleanup type.
+ //
+ if (removed || t == cleanup_type::maybe)
+ continue;
+
+ fail (ll) << "registered for cleanup wildcard " << cp
+ << " doesn't match any "
+ << (recursive
+ ? "path"
+ : p.to_directory ()
+ ? "directory"
+ : "file");
+ }
+
+ // Remove the directory if exists and empty. Fail otherwise.
+ // Removal of non-existing directory is not an error for 'maybe'
+ // cleanup type.
+ //
+ if (p.to_directory ())
+ {
+ dir_path d (path_cast<dir_path> (p));
+ bool wd (d == sp.wd_path);
+
+ // Trace the scope working directory removal with the verbosity
+ // level 2 (that was used for its creation). For other
+ // directories use level 3 (as for other cleanups).
+ //
+ int v (wd ? 2 : 3);
+
+ // Don't remove the working directory for the recursive cleanup
+ // (it will be removed by the dedicated one).
+ //
+ // Note that the root working directory contains the
+ // .buildignore file (see above).
+ //
+ // @@ If 'd' is a file then will fail with a diagnostics having
+ // no location info. Probably need to add an optional location
+ // parameter to rmdir() function. The same problem exists for
+ // a file cleanup when try to rmfile() directory instead of
+ // file.
+ //
+ rmdir_status r (
+ recursive
+ ? rmdir_r (d, !wd, static_cast <uint16_t> (v))
+ : (wd && sp.parent == nullptr
+ ? rmdir_buildignore (
+ d,
+ sp.root->target_scope.root_scope ()->root_extra->buildignore_file,
+ v)
+ : rmdir (d, v)));
+
+ if (r == rmdir_status::success ||
+ (r == rmdir_status::not_exist && t == cleanup_type::maybe))
+ continue;
+
+ diag_record dr (fail (ll));
+ dr << "registered for cleanup directory " << d
+ << (r == rmdir_status::not_exist
+ ? " does not exist"
+ : !recursive
+ ? " is not empty"
+ : " is current");
+
+ if (r == rmdir_status::not_empty)
+ print_dir (dr, d, ll);
+ }
+
+ // Remove the file if exists. Fail otherwise. Removal of
+ // non-existing file is not an error for 'maybe' cleanup type.
+ //
+ if (rmfile (p, 3) == rmfile_status::not_exist &&
+ t == cleanup_type::always)
+ fail (ll) << "registered for cleanup file " << p
+ << " does not exist";
+ }
+ }
+
+ // Return to the parent scope directory or to the out_base one for the
+ // script scope.
+ //
+ if (verb >= 2)
+ text << "cd " << (sp.parent != nullptr
+ ? sp.parent->wd_path
+ : sp.wd_path.directory ());
+ }
+
+ // The exit pseudo-builtin: exit the current scope successfully, or
+ // print the diagnostics and exit the current scope and all the outer
+ // scopes unsuccessfully. Always throw exit_scope exception.
+ //
+ // exit [<diagnostics>]
+ //
+ [[noreturn]] static void
+ exit_builtin (const strings& args, const location& ll)
+ {
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process arguments.
+ //
+ // If no argument is specified, then exit successfully. Otherwise,
+ // print the diagnostics and exit unsuccessfully.
+ //
+ if (i == e)
+ throw exit_scope (true);
+
+ const string& s (*i++);
+
+ if (i != e)
+ fail (ll) << "unexpected argument '" << *i << "'";
+
+ error (ll) << s;
+ throw exit_scope (false);
+ }
+
+ // The set pseudo-builtin: set variable from the stdin input.
+ //
+ // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var>
+ //
+ static void
+ set_builtin (scope& sp,
+ const strings& args,
+ auto_fd in,
+ const location& ll)
+ {
+ try
+ {
+ // Do not throw when eofbit is set (end of stream reached), and
+ // when failbit is set (read operation failed to extract any
+ // character).
+ //
+ ifdstream cin (move (in), ifdstream::badbit);
+
+ auto i (args.begin ());
+ auto e (args.end ());
+
+ // Process options.
+ //
+ bool exact (false);
+ bool newline (false);
+ bool whitespace (false);
+
+ for (; i != e; ++i)
+ {
+ const string& o (*i);
+
+ if (o == "-e" || o == "--exact")
+ exact = true;
+ else if (o == "-n" || o == "--newline")
+ newline = true;
+ else if (o == "-w" || o == "--whitespace")
+ whitespace = true;
+ else
+ {
+ if (*i == "--")
+ ++i;
+
+ break;
+ }
+ }
+
+ // Process arguments.
+ //
+ if (i == e)
+ fail (ll) << "missing variable name";
+
+ const string& a (*i++); // Either attributes or variable name.
+ const string* ats (i == e ? nullptr : &a);
+ const string& vname (i == e ? a : *i++);
+
+ if (i != e)
+ fail (ll) << "unexpected argument '" << *i << "'";
+
+ if (ats != nullptr && ats->empty ())
+ fail (ll) << "empty variable attributes";
+
+ if (vname.empty ())
+ fail (ll) << "empty variable name";
+
+ // Read the input.
+ //
+ cin.peek (); // Sets eofbit for an empty stream.
+
+ names ns;
+ while (!cin.eof ())
+ {
+ // Read next element that depends on the whitespace mode being
+ // enabled or not. For the later case it also make sense to strip
+ // the trailing CRs that can appear while cross-testing Windows
+ // target or as a part of msvcrt junk production (see above).
+ //
+ string s;
+ if (whitespace)
+ cin >> s;
+ else
+ {
+ getline (cin, s);
+
+ while (!s.empty () && s.back () == '\r')
+ s.pop_back ();
+ }
+
+ // If failbit is set then we read nothing into the string as eof is
+ // reached. That in particular means that the stream has trailing
+ // whitespaces (possibly including newlines) if the whitespace mode
+ // is enabled, or the trailing newline otherwise. If so then
+ // we append the "blank" to the variable value in the exact mode
+ // prior to bailing out.
+ //
+ if (cin.fail ())
+ {
+ if (exact)
+ {
+ if (whitespace || newline)
+ ns.emplace_back (move (s)); // Reuse empty string.
+ else if (ns.empty ())
+ ns.emplace_back ("\n");
+ else
+ ns[0].value += '\n';
+ }
+
+ break;
+ }
+
+ if (whitespace || newline || ns.empty ())
+ ns.emplace_back (move (s));
+ else
+ {
+ ns[0].value += '\n';
+ ns[0].value += s;
+ }
+ }
+
+ cin.close ();
+
+ // Set the variable value and attributes. Note that we need to aquire
+ // unique lock before potentially changing the script's variable
+ // pool. The obtained variable reference can safelly be used with no
+ // locking as the variable pool is an associative container
+ // (underneath) and we are only adding new variables into it.
+ //
+ ulock ul (sp.root->var_pool_mutex);
+ const variable& var (sp.root->var_pool.insert (move (vname)));
+ ul.unlock ();
+
+ value& lhs (sp.assign (var));
+
+ // If there are no attributes specified then the variable assignment
+ // is straightforward. Otherwise we will use the build2 parser helper
+ // function.
+ //
+ if (ats == nullptr)
+ lhs.assign (move (ns), &var);
+ else
+ {
+ // If there is an error in the attributes string, our diagnostics
+ // will look like this:
+ //
+ // <attributes>:1:1 error: unknown value attribute x
+ // testscript:10:1 info: while parsing attributes '[x]'
+ //
+ auto df = make_diag_frame (
+ [ats, &ll](const diag_record& dr)
+ {
+ dr << info (ll) << "while parsing attributes '" << *ats << "'";
+ });
+
+ parser p;
+ p.apply_value_attributes (&var,
+ lhs,
+ value (move (ns)),
+ *ats,
+ token_type::assign,
+ path ("<attributes>"));
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "set: " << e;
+ }
+ }
+
+ static bool
+ run_pipe (scope& sp,
+ command_pipe::const_iterator bc,
+ command_pipe::const_iterator ec,
+ auto_fd ifd,
+ size_t ci, size_t li, const location& ll,
+ bool diag)
+ {
+ if (bc == ec) // End of the pipeline.
+ return true;
+
+ // The overall plan is to run the first command in the pipe, reading
+ // its input from the file descriptor passed (or, for the first
+ // command, according to stdin redirect specification) and redirecting
+ // its output to the right-hand part of the pipe recursively. Fail if
+ // the right-hand part fails. Otherwise check the process exit code,
+ // match stderr (and stdout for the last command in the pipe) according
+ // to redirect specification(s) and fail if any of the above fails.
+ //
+ const command& c (*bc);
+
+ // Register the command explicit cleanups. Verify that the path being
+ // cleaned up is a sub-path of the testscript working directory. Fail
+ // if this is not the case.
+ //
+ for (const auto& cl: c.cleanups)
+ {
+ const path& p (cl.path);
+ path np (normalize (p, sp, ll));
+
+ const string& ls (np.leaf ().string ());
+ bool wc (ls == "*" || ls == "**" || ls == "***");
+ const path& cp (wc ? np.directory () : np);
+ const dir_path& wd (sp.root->wd_path);
+
+ if (!cp.sub (wd))
+ fail (ll) << (wc
+ ? "wildcard"
+ : p.to_directory ()
+ ? "directory"
+ : "file")
+ << " cleanup " << p << " is out of working directory "
+ << wd;
+
+ sp.clean ({cl.type, move (np)}, false);
+ }
+
+ const redirect& in (c.in.effective ());
+ const redirect& out (c.out.effective ());
+ const redirect& err (c.err.effective ());
+ bool eq (c.exit.comparison == exit_comparison::eq);
+
+ // If stdin file descriptor is not open then this is the first pipeline
+ // command.
+ //
+ bool first (ifd.get () == -1);
+
+ command_pipe::const_iterator nc (bc + 1);
+ bool last (nc == ec);
+
+ // Prior to opening file descriptors for command input/output
+ // redirects let's check if the command is the exit builtin. Being a
+ // builtin syntactically it differs from the regular ones in a number
+ // of ways. It doesn't communicate with standard streams, so
+ // redirecting them is meaningless. It may appear only as a single
+ // command in a pipeline. It doesn't return any value and stops the
+ // scope execution, so checking its exit status is meaningless as
+ // well. That all means we can short-circuit here calling the builtin
+ // and bailing out right after that. Checking that the user didn't
+ // specify any redirects or exit code check sounds like a right thing
+ // to do.
+ //
+ if (c.program.string () == "exit")
+ {
+ // In case the builtin is erroneously pipelined from the other
+ // command, we will close stdin gracefully (reading out the stream
+ // content), to make sure that the command doesn't print any
+ // unwanted diagnostics about IO operation failure.
+ //
+ // Note that dtor will ignore any errors (which is what we want).
+ //
+ ifdstream is (move (ifd), fdstream_mode::skip);
+
+ if (!first || !last)
+ fail (ll) << "exit builtin must be the only pipe command";
+
+ if (in.type != redirect_type::none)
+ fail (ll) << "exit builtin stdin cannot be redirected";
+
+ if (out.type != redirect_type::none)
+ fail (ll) << "exit builtin stdout cannot be redirected";
+
+ if (err.type != redirect_type::none)
+ fail (ll) << "exit builtin stderr cannot be redirected";
+
+ // We can't make sure that there is not exit code check. Let's, at
+ // least, check that non-zero code is not expected.
+ //
+ if (eq != (c.exit.code == 0))
+ fail (ll) << "exit builtin exit code cannot be non-zero";
+
+ exit_builtin (c.arguments, ll); // Throws exit_scope exception.
+ }
+
+ // Create a unique path for a command standard stream cache file.
+ //
+ auto std_path = [&sp, &ci, &li, &ll] (const char* n) -> path
+ {
+ path p (n);
+
+ // 0 if belongs to a single-line test scope, otherwise is the
+ // command line number (start from one) in the test scope.
+ //
+ if (li > 0)
+ p += "-" + to_string (li);
+
+ // 0 if belongs to a single-command expression, otherwise is the
+ // command number (start from one) in the expression.
+ //
+ // Note that the name like stdin-N can relate to N-th command of a
+ // single-line test or to N-th single-command line of multi-line
+ // test. These cases are mutually exclusive and so are unambiguous.
+ //
+ if (ci > 0)
+ p += "-" + to_string (ci);
+
+ return normalize (move (p), sp, ll);
+ };
+
+ // If this is the first pipeline command, then open stdin descriptor
+ // according to the redirect specified.
+ //
+ path isp;
+
+ if (!first)
+ assert (in.type == redirect_type::none); // No redirect expected.
+ else
+ {
+ // Open a file for passing to the command stdin.
+ //
+ auto open_stdin = [&isp, &ifd, &ll] ()
+ {
+ assert (!isp.empty ());
+
+ try
+ {
+ ifd = fdopen (isp, fdopen_mode::in);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to read " << isp << ": " << e;
+ }
+ };
+
+ switch (in.type)
+ {
+ case redirect_type::pass:
+ {
+ try
+ {
+ ifd = fddup (0);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate stdin: " << e;
+ }
+
+ break;
+ }
+
+ case redirect_type::none:
+ // Somehow need to make sure that the child process doesn't read
+ // from stdin. That is tricky to do in a portable way. Here we
+ // suppose that the program which (erroneously) tries to read some
+ // data from stdin being redirected to /dev/null fails not being
+ // able to read the expected data, and so the test doesn't pass
+ // through.
+ //
+ // @@ Obviously doesn't cover the case when the process reads
+ // whatever available.
+ // @@ Another approach could be not to redirect stdin and let the
+ // process to hang which can be interpreted as a test failure.
+ // @@ Both ways are quite ugly. Is there some better way to do
+ // this?
+ //
+ // Fall through.
+ //
+ case redirect_type::null:
+ {
+ try
+ {
+ ifd = fdnull ();
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write to null device: " << e;
+ }
+
+ break;
+ }
+
+ case redirect_type::file:
+ {
+ isp = normalize (in.file.path, sp, ll);
+
+ open_stdin ();
+ break;
+ }
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ // We could write to the command stdin directly but instead will
+ // cache the data for potential troubleshooting.
+ //
+ isp = std_path ("stdin");
+
+ save (
+ isp, transform (in.str, false, in.modifiers, *sp.root), ll);
+
+ sp.clean_special (isp);
+
+ open_stdin ();
+ break;
+ }
+
+ case redirect_type::trace:
+ case redirect_type::merge:
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ }
+
+ assert (ifd.get () != -1);
+
+ // Prior to opening file descriptors for command outputs redirects
+ // let's check if the command is the set builtin. Being a builtin
+ // syntactically it differs from the regular ones in a number of ways.
+ // It either succeeds or terminates abnormally, so redirecting stderr
+ // is meaningless. It also never produces any output and may appear
+ // only as a terminal command in a pipeline. That means we can
+ // short-circuit here calling the builtin and returning right after
+ // that. Checking that the user didn't specify any meaningless
+ // redirects or exit code check sounds as a right thing to do.
+ //
+ if (c.program.string () == "set")
+ {
+ if (!last)
+ fail (ll) << "set builtin must be the last pipe command";
+
+ if (out.type != redirect_type::none)
+ fail (ll) << "set builtin stdout cannot be redirected";
+
+ if (err.type != redirect_type::none)
+ fail (ll) << "set builtin stderr cannot be redirected";
+
+ if (eq != (c.exit.code == 0))
+ fail (ll) << "set builtin exit code cannot be non-zero";
+
+ set_builtin (sp, c.arguments, move (ifd), ll);
+ return true;
+ }
+
+ // Open a file for command output redirect if requested explicitly
+ // (file overwrite/append redirects) or for the purpose of the output
+ // validation (none, here_*, file comparison redirects), register the
+ // file for cleanup, return the file descriptor. Interpret trace
+ // redirect according to the verbosity level (as null if below 2, as
+ // pass otherwise). Return nullfd, standard stream descriptor duplicate
+ // or null-device descriptor for merge, pass or null redirects
+ // respectively (not opening any file).
+ //
+ auto open = [&sp, &ll, &std_path] (const redirect& r,
+ int dfd,
+ path& p) -> auto_fd
+ {
+ assert (dfd == 1 || dfd == 2);
+ const char* what (dfd == 1 ? "stdout" : "stderr");
+
+ fdopen_mode m (fdopen_mode::out | fdopen_mode::create);
+
+ auto_fd fd;
+ redirect_type rt (r.type != redirect_type::trace
+ ? r.type
+ : verb < 2
+ ? redirect_type::null
+ : redirect_type::pass);
+ switch (rt)
+ {
+ case redirect_type::pass:
+ {
+ try
+ {
+ fd = fddup (dfd);
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate " << what << ": " << e;
+ }
+
+ return fd;
+ }
+
+ case redirect_type::null:
+ {
+ try
+ {
+ fd = fdnull ();
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write to null device: " << e;
+ }
+
+ return fd;
+ }
+
+ case redirect_type::merge:
+ {
+ // Duplicate the paired file descriptor later.
+ //
+ return fd; // nullfd
+ }
+
+ case redirect_type::file:
+ {
+ // For the cmp mode the user-provided path refers a content to
+ // match against, rather than a content to be produced (as for
+ // overwrite and append modes). And so for cmp mode we redirect
+ // the process output to a temporary file.
+ //
+ p = r.file.mode == redirect_fmode::compare
+ ? std_path (what)
+ : normalize (r.file.path, sp, ll);
+
+ m |= r.file.mode == redirect_fmode::append
+ ? fdopen_mode::at_end
+ : fdopen_mode::truncate;
+
+ break;
+ }
+
+ case redirect_type::none:
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ p = std_path (what);
+ m |= fdopen_mode::truncate;
+ break;
+ }
+
+ case redirect_type::trace:
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+
+ try
+ {
+ fd = fdopen (p, m);
+
+ if ((m & fdopen_mode::at_end) != fdopen_mode::at_end)
+ {
+ if (rt == redirect_type::file)
+ sp.clean ({cleanup_type::always, p}, true);
+ else
+ sp.clean_special (p);
+ }
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to write " << p << ": " << e;
+ }
+
+ return fd;
+ };
+
+ path osp;
+ fdpipe ofd;
+
+ // If this is the last command in the pipeline than redirect the
+ // command process stdout to a file. Otherwise create a pipe and
+ // redirect the stdout to the write-end of the pipe. The read-end will
+ // be passed as stdin for the next command in the pipeline.
+ //
+ // @@ Shouldn't we allow the here-* and file output redirects for a
+ // command with pipelined output? Say if such redirect is present
+ // then the process output is redirected to a file first (as it is
+ // when no output pipelined), and only after the process exit code
+ // and the output are validated the next command in the pipeline is
+ // executed taking the file as an input. This could be usefull for
+ // test failures investigation and for tests "tightening".
+ //
+ if (last)
+ ofd.out = open (out, 1, osp);
+ else
+ {
+ assert (out.type == redirect_type::none); // No redirect expected.
+
+ try
+ {
+ ofd = fdopen_pipe ();
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to open pipe: " << e;
+ }
+ }
+
+ path esp;
+ auto_fd efd (open (err, 2, esp));
+
+ // Merge standard streams.
+ //
+ bool mo (out.type == redirect_type::merge);
+ if (mo || err.type == redirect_type::merge)
+ {
+ auto_fd& self (mo ? ofd.out : efd);
+ auto_fd& other (mo ? efd : ofd.out);
+
+ try
+ {
+ assert (self.get () == -1 && other.get () != -1);
+ self = fddup (other.get ());
+ }
+ catch (const io_error& e)
+ {
+ fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout")
+ << ": " << e;
+ }
+ }
+
+ // All descriptors should be open to the date.
+ //
+ assert (ofd.out.get () != -1 && efd.get () != -1);
+
+ optional<process_exit> exit;
+ builtin_func* bf (builtins.find (c.program.string ()));
+
+ bool success;
+
+ auto process_args = [&c] () -> cstrings
+ {
+ cstrings args {c.program.string ().c_str ()};
+
+ for (const auto& a: c.arguments)
+ args.push_back (a.c_str ());
+
+ args.push_back (nullptr);
+ return args;
+ };
+
+ if (bf != nullptr)
+ {
+ // Execute the builtin.
+ //
+ if (verb >= 2)
+ print_process (process_args ());
+
+ try
+ {
+ uint8_t r; // Storage.
+ builtin b (
+ bf (sp, r, c.arguments, move (ifd), move (ofd.out), move (efd)));
+
+ success = run_pipe (sp,
+ nc,
+ ec,
+ move (ofd.in),
+ ci + 1, li, ll, diag);
+
+ exit = process_exit (b.wait ());
+ }
+ catch (const system_error& e)
+ {
+ fail (ll) << "unable to execute " << c.program << " builtin: "
+ << e << endf;
+ }
+ }
+ else
+ {
+ // Execute the process.
+ //
+ cstrings args (process_args ());
+
+ // Resolve the relative not simple program path against the scope's
+ // working directory. The simple one will be left for the process
+ // path search machinery.
+ //
+ path p;
+
+ try
+ {
+ p = path (args[0]);
+
+ if (p.relative () && !p.simple ())
+ {
+ p = sp.wd_path / p;
+ args[0] = p.string ().c_str ();
+ }
+ }
+ catch (const invalid_path& e)
+ {
+ fail (ll) << "invalid program path " << e.path;
+ }
+
+ try
+ {
+ process_path pp (process::path_search (args[0]));
+
+ if (verb >= 2)
+ print_process (args);
+
+ process pr (
+ pp,
+ args.data (),
+ {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()},
+ sp.wd_path.string ().c_str ());
+
+ ifd.reset ();
+ ofd.out.reset ();
+ efd.reset ();
+
+ success = run_pipe (sp,
+ nc,
+ ec,
+ move (ofd.in),
+ ci + 1, li, ll, diag);
+
+ pr.wait ();
+
+ exit = move (pr.exit);
+ }
+ catch (const process_error& e)
+ {
+ error (ll) << "unable to execute " << args[0] << ": " << e;
+
+ if (e.child)
+ std::exit (1);
+
+ throw failed ();
+ }
+ }
+
+ assert (exit);
+
+ // If the righ-hand side pipeline failed than the whole pipeline fails,
+ // and no further checks are required.
+ //
+ if (!success)
+ return false;
+
+ const path& pr (c.program);
+
+ // If there is no valid exit code available by whatever reason then we
+ // print the proper diagnostics, dump stderr (if cached and not too
+ // large) and fail the whole test. Otherwise if the exit code is not
+ // correct then we print diagnostics if requested and fail the
+ // pipeline.
+ //
+ bool valid (exit->normal ());
+
+ // On Windows the exit code can be out of the valid codes range being
+ // defined as uint16_t.
+ //
+#ifdef _WIN32
+ if (valid)
+ valid = exit->code () < 256;
+#endif
+
+ success = valid && eq == (exit->code () == c.exit.code);
+
+ if (!valid || (!success && diag))
+ {
+ // In the presense of a valid exit code we print the diagnostics and
+ // return false rather than throw.
+ //
+ diag_record d (valid ? error (ll) : fail (ll));
+
+ if (!exit->normal ())
+ d << pr << " " << *exit;
+ else
+ {
+ uint16_t ec (exit->code ()); // Make sure is printed as integer.
+
+ if (!valid)
+ d << pr << " exit code " << ec << " out of 0-255 range";
+ else if (!success)
+ {
+ if (diag)
+ d << pr << " exit code " << ec << (eq ? " != " : " == ")
+ << static_cast<uint16_t> (c.exit.code);
+ }
+ else
+ assert (false);
+ }
+
+ if (non_empty (esp, ll))
+ d << info << "stderr: " << esp;
+
+ if (non_empty (osp, ll))
+ d << info << "stdout: " << osp;
+
+ if (non_empty (isp, ll))
+ d << info << "stdin: " << isp;
+
+ // Print cached stderr.
+ //
+ print_file (d, esp, ll);
+ }
+
+ // If exit code is correct then check if the standard outputs match the
+ // expectations. Note that stdout is only redirected to file for the
+ // last command in the pipeline.
+ //
+ if (success)
+ success =
+ (!last ||
+ check_output (pr, osp, isp, out, ll, sp, diag, "stdout")) &&
+ check_output (pr, esp, isp, err, ll, sp, diag, "stderr");
+
+ return success;
+ }
+
+ static bool
+ run_expr (scope& sp,
+ const command_expr& expr,
+ size_t li, const location& ll,
+ bool diag)
+ {
+ // Print test id once per test expression.
+ //
+ auto df = make_diag_frame (
+ [&sp](const diag_record& dr)
+ {
+ // Let's not depend on how the path representation can be improved
+ // for readability on printing.
+ //
+ dr << info << "test id: " << sp.id_path.posix_string ();
+ });
+
+ // Commands are numbered sequentially throughout the expression
+ // starting with 1. Number 0 means the command is a single one.
+ //
+ size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1
+ ? 0
+ : 1);
+
+ // If there is no ORs to the right of a pipe then the pipe failure is
+ // fatal for the whole expression. In particular, the pipe must print
+ // the diagnostics on failure (if generally allowed). So we find the
+ // pipe that "switches on" the diagnostics potential printing.
+ //
+ command_expr::const_iterator trailing_ands; // Undefined if diag is
+ // disallowed.
+ if (diag)
+ {
+ auto i (expr.crbegin ());
+ for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ;
+ trailing_ands = i.base ();
+ }
+
+ bool r (false);
+ bool print (false);
+
+ for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i)
+ {
+ if (diag && i + 1 == trailing_ands)
+ print = true;
+
+ const command_pipe& p (i->pipe);
+ bool or_op (i->op == expr_operator::log_or);
+
+ // Short-circuit if the pipe result must be OR-ed with true or AND-ed
+ // with false.
+ //
+ if (!((or_op && r) || (!or_op && !r)))
+ r = run_pipe (
+ sp, p.begin (), p.end (), auto_fd (), ci, li, ll, print);
+
+ ci += p.size ();
+ }
+
+ return r;
+ }
+
+ void default_runner::
+ run (scope& sp,
+ const command_expr& expr, command_type ct,
+ size_t li,
+ const location& ll)
+ {
+ // Noop for teardown commands if keeping tests output is requested.
+ //
+ if (ct == command_type::teardown &&
+ common_.after == output_after::keep)
+ return;
+
+ if (verb >= 3)
+ {
+ char c ('\0');
+
+ switch (ct)
+ {
+ case command_type::test: c = ' '; break;
+ case command_type::setup: c = '+'; break;
+ case command_type::teardown: c = '-'; break;
+ }
+
+ text << ": " << c << expr;
+ }
+
+ if (!run_expr (sp, expr, li, ll, true))
+ throw failed (); // Assume diagnostics is already printed.
+ }
+
+ bool default_runner::
+ run_if (scope& sp,
+ const command_expr& expr,
+ size_t li, const location& ll)
+ {
+ if (verb >= 3)
+ text << ": ?" << expr;
+
+ return run_expr (sp, expr, li, ll, false);
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx
new file mode 100644
index 0000000..9a3f91f
--- /dev/null
+++ b/libbuild2/test/script/runner.hxx
@@ -0,0 +1,101 @@
+// file : libbuild2/test/script/runner.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_RUNNER_HXX
+#define LIBBUILD2_TEST_SCRIPT_RUNNER_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/test/script/script.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ struct common;
+
+ namespace script
+ {
+ // An exception that can be thrown by a runner to exit the scope (for
+ // example, as a result of executing the exit builtin). The status
+ // indicates whether the scope should be considered to have succeeded
+ // or failed.
+ //
+ struct exit_scope
+ {
+ bool status;
+
+ explicit
+ exit_scope (bool s): status (s) {}
+ };
+
+ class runner
+ {
+ public:
+ // Return false if this test/group should be skipped.
+ //
+ virtual bool
+ test (scope&) const = 0;
+
+ // Location is the scope start location (for diagnostics, etc).
+ //
+ virtual void
+ enter (scope&, const location&) = 0;
+
+ // Index is the 1-base index of this command line in the command list
+ // (e.g., in a compound test). If it is 0 then it means there is only
+ // one command (e.g., a simple test). This information can be used,
+ // for example, to derive file names.
+ //
+ // Location is the start position of this command line in the
+ // testscript. It can be used in diagnostics.
+ //
+ virtual void
+ run (scope&,
+ const command_expr&, command_type,
+ size_t index,
+ const location&) = 0;
+
+ virtual bool
+ run_if (scope&, const command_expr&, size_t, const location&) = 0;
+
+ // Location is the scope end location (for diagnostics, etc).
+ //
+ virtual void
+ leave (scope&, const location&) = 0;
+ };
+
+ class default_runner: public runner
+ {
+ public:
+ explicit
+ default_runner (const common& c): common_ (c) {}
+
+ virtual bool
+ test (scope& s) const override;
+
+ virtual void
+ enter (scope&, const location&) override;
+
+ virtual void
+ run (scope&,
+ const command_expr&, command_type,
+ size_t,
+ const location&) override;
+
+ virtual bool
+ run_if (scope&, const command_expr&, size_t, const location&) override;
+
+ virtual void
+ leave (scope&, const location&) override;
+
+ private:
+ const common& common_;
+ };
+ }
+ }
+}
+
+#endif // LIBBUILD2_TEST_SCRIPT_RUNNER_HXX
diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx
new file mode 100644
index 0000000..b879eb4
--- /dev/null
+++ b/libbuild2/test/script/script.cxx
@@ -0,0 +1,741 @@
+// file : libbuild2/test/script/script.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/script.hxx>
+
+#include <sstream>
+
+#include <libbuild2/target.hxx>
+#include <libbuild2/algorithm.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ ostream&
+ operator<< (ostream& o, line_type lt)
+ {
+ const char* s (nullptr);
+
+ switch (lt)
+ {
+ case line_type::var: s = "variable"; break;
+ case line_type::cmd: s = "command"; break;
+ case line_type::cmd_if: s = "'if'"; break;
+ case line_type::cmd_ifn: s = "'if!'"; break;
+ case line_type::cmd_elif: s = "'elif'"; break;
+ case line_type::cmd_elifn: s = "'elif!'"; break;
+ case line_type::cmd_else: s = "'else'"; break;
+ case line_type::cmd_end: s = "'end'"; break;
+ }
+
+ return o << s;
+ }
+
+ // Quote if empty or contains spaces or any of the special characters.
+ // Note that we use single quotes since double quotes still allow
+ // expansion.
+ //
+ // @@ What if it contains single quotes?
+ //
+ static void
+ to_stream_q (ostream& o, const string& s)
+ {
+ if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos)
+ o << '\'' << s << '\'';
+ else
+ o << s;
+ };
+
+ void
+ to_stream (ostream& o, const command& c, command_to_stream m)
+ {
+ auto print_path = [&o] (const path& p)
+ {
+ using build2::operator<<;
+
+ ostringstream s;
+ stream_verb (s, stream_verb (o));
+ s << p;
+
+ to_stream_q (o, s.str ());
+ };
+
+ auto print_redirect =
+ [&o, print_path] (const redirect& r, const char* prefix)
+ {
+ o << ' ' << prefix;
+
+ size_t n (string::traits_type::length (prefix));
+ assert (n > 0);
+
+ char d (prefix[n - 1]); // Redirect direction.
+
+ switch (r.type)
+ {
+ case redirect_type::none: assert (false); break;
+ case redirect_type::pass: o << '|'; break;
+ case redirect_type::null: o << '-'; break;
+ case redirect_type::trace: o << '!'; break;
+ case redirect_type::merge: o << '&' << r.fd; break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ bool doc (r.type == redirect_type::here_doc_literal);
+
+ // For here-document add another '>' or '<'. Note that here end
+ // marker never needs to be quoted.
+ //
+ if (doc)
+ o << d;
+
+ o << r.modifiers;
+
+ if (doc)
+ o << r.end;
+ else
+ {
+ const string& v (r.str);
+ to_stream_q (o,
+ r.modifiers.find (':') == string::npos
+ ? string (v, 0, v.size () - 1) // Strip newline.
+ : v);
+ }
+
+ break;
+ }
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ bool doc (r.type == redirect_type::here_doc_regex);
+
+ // For here-document add another '>' or '<'. Note that here end
+ // marker never needs to be quoted.
+ //
+ if (doc)
+ o << d;
+
+ o << r.modifiers;
+
+ const regex_lines& re (r.regex);
+
+ if (doc)
+ o << re.intro + r.end + re.intro + re.flags;
+ else
+ {
+ assert (!re.lines.empty ()); // Regex can't be empty.
+
+ regex_line l (re.lines[0]);
+ to_stream_q (o, re.intro + l.value + re.intro + l.flags);
+ }
+
+ break;
+ }
+
+ case redirect_type::file:
+ {
+ // For stdin or stdout-comparison redirect add '>>' or '<<' (and
+ // so make it '<<<' or '>>>'). Otherwise add '+' or '=' (and so
+ // make it '>+' or '>=').
+ //
+ if (d == '<' || r.file.mode == redirect_fmode::compare)
+ o << d << d;
+ else
+ o << (r.file.mode == redirect_fmode::append ? '+' : '=');
+
+ print_path (r.file.path);
+ break;
+ }
+
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ };
+
+ auto print_doc = [&o] (const redirect& r)
+ {
+ o << endl;
+
+ if (r.type == redirect_type::here_doc_literal)
+ o << r.str;
+ else
+ {
+ assert (r.type == redirect_type::here_doc_regex);
+
+ const regex_lines& rl (r.regex);
+
+ for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ());
+ i != e; ++i)
+ {
+ if (i != b)
+ o << endl;
+
+ const regex_line& l (*i);
+
+ if (l.regex) // Regex (possibly empty),
+ o << rl.intro << l.value << rl.intro << l.flags;
+ else if (!l.special.empty ()) // Special literal.
+ o << rl.intro;
+ else // Textual literal.
+ o << l.value;
+
+ o << l.special;
+ }
+ }
+
+ o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end;
+ };
+
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ // Program.
+ //
+ to_stream_q (o, c.program.string ());
+
+ // Arguments.
+ //
+ for (const string& a: c.arguments)
+ {
+ o << ' ';
+ to_stream_q (o, a);
+ }
+
+ // Redirects.
+ //
+ if (c.in.effective ().type != redirect_type::none)
+ print_redirect (c.in.effective (), "<");
+
+ if (c.out.effective ().type != redirect_type::none)
+ print_redirect (c.out.effective (), ">");
+
+ if (c.err.effective ().type != redirect_type::none)
+ print_redirect (c.err.effective (), "2>");
+
+ for (const auto& p: c.cleanups)
+ {
+ o << " &";
+
+ if (p.type != cleanup_type::always)
+ o << (p.type == cleanup_type::maybe ? '?' : '!');
+
+ print_path (p.path);
+ }
+
+ if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0)
+ {
+ switch (c.exit.comparison)
+ {
+ case exit_comparison::eq: o << " == "; break;
+ case exit_comparison::ne: o << " != "; break;
+ }
+
+ o << static_cast<uint16_t> (c.exit.code);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ // Here-documents.
+ //
+ if (c.in.type == redirect_type::here_doc_literal ||
+ c.in.type == redirect_type::here_doc_regex)
+ print_doc (c.in);
+
+ if (c.out.type == redirect_type::here_doc_literal ||
+ c.out.type == redirect_type::here_doc_regex)
+ print_doc (c.out);
+
+ if (c.err.type == redirect_type::here_doc_literal ||
+ c.err.type == redirect_type::here_doc_regex)
+ print_doc (c.err);
+ }
+ }
+
+ void
+ to_stream (ostream& o, const command_pipe& p, command_to_stream m)
+ {
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ for (auto b (p.begin ()), i (b); i != p.end (); ++i)
+ {
+ if (i != b)
+ o << " | ";
+
+ to_stream (o, *i, command_to_stream::header);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ for (const command& c: p)
+ to_stream (o, c, command_to_stream::here_doc);
+ }
+ }
+
+ void
+ to_stream (ostream& o, const command_expr& e, command_to_stream m)
+ {
+ if ((m & command_to_stream::header) == command_to_stream::header)
+ {
+ for (auto b (e.begin ()), i (b); i != e.end (); ++i)
+ {
+ if (i != b)
+ {
+ switch (i->op)
+ {
+ case expr_operator::log_or: o << " || "; break;
+ case expr_operator::log_and: o << " && "; break;
+ }
+ }
+
+ to_stream (o, i->pipe, command_to_stream::header);
+ }
+ }
+
+ if ((m & command_to_stream::here_doc) == command_to_stream::here_doc)
+ {
+ for (const expr_term& t: e)
+ to_stream (o, t.pipe, command_to_stream::here_doc);
+ }
+ }
+
+ // redirect
+ //
+ redirect::
+ redirect (redirect_type t)
+ : type (t)
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ case redirect_type::merge: break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: new (&str) string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ new (&regex) regex_lines ();
+ break;
+ }
+
+ case redirect_type::file: new (&file) file_type (); break;
+
+ case redirect_type::here_doc_ref: assert (false); break;
+ }
+ }
+
+ redirect::
+ redirect (redirect&& r)
+ : type (r.type),
+ modifiers (move (r.modifiers)),
+ end (move (r.end)),
+ end_line (r.end_line),
+ end_column (r.end_column)
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace: break;
+
+ case redirect_type::merge: fd = r.fd; break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal:
+ {
+ new (&str) string (move (r.str));
+ break;
+ }
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex:
+ {
+ new (&regex) regex_lines (move (r.regex));
+ break;
+ }
+ case redirect_type::file:
+ {
+ new (&file) file_type (move (r.file));
+ break;
+ }
+ case redirect_type::here_doc_ref:
+ {
+ new (&ref) reference_wrapper<const redirect> (r.ref);
+ break;
+ }
+ }
+ }
+
+ redirect::
+ ~redirect ()
+ {
+ switch (type)
+ {
+ case redirect_type::none:
+ case redirect_type::pass:
+ case redirect_type::null:
+ case redirect_type::trace:
+ case redirect_type::merge: break;
+
+ case redirect_type::here_str_literal:
+ case redirect_type::here_doc_literal: str.~string (); break;
+
+ case redirect_type::here_str_regex:
+ case redirect_type::here_doc_regex: regex.~regex_lines (); break;
+
+ case redirect_type::file: file.~file_type (); break;
+
+ case redirect_type::here_doc_ref:
+ {
+ ref.~reference_wrapper<const redirect> ();
+ break;
+ }
+ }
+ }
+
+ redirect& redirect::
+ operator= (redirect&& r)
+ {
+ if (this != &r)
+ {
+ this->~redirect ();
+ new (this) redirect (move (r)); // Assume noexcept move-constructor.
+ }
+ return *this;
+ }
+
+ // scope
+ //
+ scope::
+ scope (const string& id, scope* p, script* r)
+ : parent (p),
+ root (r),
+ vars (false /* global */),
+ id_path (cast<path> (assign (root->id_var) = path ())),
+ wd_path (cast<dir_path> (assign (root->wd_var) = dir_path ()))
+
+ {
+ // Construct the id_path as a string to ensure POSIX form. In fact,
+ // the only reason we keep it as a path is to be able to easily get id
+ // by calling leaf().
+ //
+ {
+ string s (p != nullptr ? p->id_path.string () : string ());
+
+ if (!s.empty () && !id.empty ())
+ s += '/';
+
+ s += id;
+ const_cast<path&> (id_path) = path (move (s));
+ }
+
+ // Calculate the working directory path unless this is the root scope
+ // (handled in an ad hoc way).
+ //
+ if (p != nullptr)
+ const_cast<dir_path&> (wd_path) = dir_path (p->wd_path) /= id;
+ }
+
+ void scope::
+ clean (cleanup c, bool implicit)
+ {
+ using std::find; // Hidden by scope::find().
+
+ assert (!implicit || c.type == cleanup_type::always);
+
+ const path& p (c.path);
+ if (!p.sub (root->wd_path))
+ {
+ if (implicit)
+ return;
+ else
+ assert (false); // Error so should have been checked.
+ }
+
+ auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;};
+ auto i (find_if (cleanups.begin (), cleanups.end (), pr));
+
+ if (i == cleanups.end ())
+ cleanups.emplace_back (move (c));
+ else if (!implicit)
+ i->type = c.type;
+ }
+
+ void scope::
+ clean_special (path p)
+ {
+ special_cleanups.emplace_back (move (p));
+ }
+
+ // script_base
+ //
+ script_base::
+ script_base ()
+ : // Enter the test.* variables with the same variable types as in
+ // buildfiles except for test: while in buildfiles it can be a
+ // target name, in testscripts it should be resolved to a path.
+ //
+ // Note: entering in a custom variable pool.
+ //
+ test_var (var_pool.insert<path> ("test")),
+ options_var (var_pool.insert<strings> ("test.options")),
+ arguments_var (var_pool.insert<strings> ("test.arguments")),
+ redirects_var (var_pool.insert<strings> ("test.redirects")),
+ cleanups_var (var_pool.insert<strings> ("test.cleanups")),
+
+ wd_var (var_pool.insert<dir_path> ("~")),
+ id_var (var_pool.insert<path> ("@")),
+ cmd_var (var_pool.insert<strings> ("*")),
+ cmdN_var {
+ &var_pool.insert<path> ("0"),
+ &var_pool.insert<string> ("1"),
+ &var_pool.insert<string> ("2"),
+ &var_pool.insert<string> ("3"),
+ &var_pool.insert<string> ("4"),
+ &var_pool.insert<string> ("5"),
+ &var_pool.insert<string> ("6"),
+ &var_pool.insert<string> ("7"),
+ &var_pool.insert<string> ("8"),
+ &var_pool.insert<string> ("9")} {}
+
+ // script
+ //
+ script::
+ script (const target& tt,
+ const testscript& st,
+ const dir_path& rwd)
+ : group (st.name == "testscript" ? string () : st.name, this),
+ test_target (tt),
+ target_scope (tt.base_scope ()),
+ script_target (st)
+ {
+ // Set the script working dir ($~) to $out_base/test/<id> (id_path
+ // for root is just the id which is empty if st is 'testscript').
+ //
+ const_cast<dir_path&> (wd_path) = dir_path (rwd) /= id_path.string ();
+
+ // Set the test variable at the script level. We do it even if it's
+ // set in the buildfile since they use different types.
+ //
+ {
+ value& v (assign (test_var));
+
+ // Note that the test variable's visibility is target.
+ //
+ lookup l (find_in_buildfile ("test", false));
+
+ // Note that we have similar code for simple tests.
+ //
+ const target* t (nullptr);
+
+ if (l.defined ())
+ {
+ const name* n (cast_null<name> (l));
+
+ if (n == nullptr)
+ v = nullptr;
+ else if (n->empty ())
+ v = path ();
+ else if (n->simple ())
+ {
+ // Ignore the special 'true' value.
+ //
+ if (n->value != "true")
+ v = path (n->value);
+ else
+ t = &tt;
+ }
+ else if (n->directory ())
+ v = path (n->dir);
+ else
+ {
+ // Must be a target name.
+ //
+ // @@ OUT: what if this is a @-qualified pair of names?
+ //
+ t = search_existing (*n, target_scope);
+
+ if (t == nullptr)
+ fail << "unknown target '" << *n << "' in test variable";
+ }
+ }
+ else
+ // By default we set it to the test target's path.
+ //
+ t = &tt;
+
+ // If this is a path-based target, then we use the path. If this
+ // is an alias target (e.g., dir{}), then we use the directory
+ // path. Otherwise, we leave it NULL expecting the testscript to
+ // set it to something appropriate, if used.
+ //
+ if (t != nullptr)
+ {
+ if (auto* pt = t->is_a<path_target> ())
+ {
+ // Do some sanity checks: the target better be up-to-date with
+ // an assigned path.
+ //
+ v = pt->path ();
+
+ if (v.empty ())
+ fail << "target " << *pt << " specified in the test variable "
+ << "is out of date" <<
+ info << "consider specifying it as a prerequisite of " << tt;
+ }
+ else if (t->is_a<alias> ())
+ v = path (t->dir);
+ else if (t != &tt)
+ fail << "target " << *t << " specified in the test variable "
+ << "is not path-based";
+ }
+ }
+
+ // Set the special $*, $N variables.
+ //
+ reset_special ();
+ }
+
+ lookup scope::
+ find (const variable& var) const
+ {
+ // Search script scopes until we hit the root.
+ //
+ const scope* s (this);
+
+ do
+ {
+ auto p (s->vars.find (var));
+ if (p.first != nullptr)
+ return lookup (*p.first, p.second, s->vars);
+ }
+ while ((s->parent != nullptr ? (s = s->parent) : nullptr) != nullptr);
+
+ return find_in_buildfile (var.name);
+ }
+
+
+ lookup scope::
+ find_in_buildfile (const string& n, bool target_only) const
+ {
+ // Switch to the corresponding buildfile variable. Note that we don't
+ // want to insert a new variable into the pool (we might be running
+ // in parallel). Plus, if there is no such variable, then we cannot
+ // possibly find any value.
+ //
+ const variable* pvar (build2::var_pool.find (n));
+
+ if (pvar == nullptr)
+ return lookup ();
+
+ const script& s (static_cast<const script&> (*root));
+ const variable& var (*pvar);
+
+ // First check the target we are testing.
+ //
+ {
+ // Note that we skip applying the override if we did not find any
+ // value. In this case, presumably the override also affects the
+ // script target and we will pick it up there. A bit fuzzy.
+ //
+ auto p (s.test_target.find_original (var, target_only));
+
+ if (p.first)
+ {
+ if (var.overrides != nullptr)
+ p = s.target_scope.find_override (var, move (p), true);
+
+ return p.first;
+ }
+ }
+
+ // Then the script target followed by the scopes it is in. Note that
+ // while unlikely it is possible the test and script targets will be
+ // in different scopes which brings the question of which scopes we
+ // should search.
+ //
+ return s.script_target[var];
+ }
+
+ value& scope::
+ append (const variable& var)
+ {
+ lookup l (find (var));
+
+ if (l.defined () && l.belongs (*this)) // Existing var in this scope.
+ return vars.modify (l);
+
+ value& r (assign (var)); // NULL.
+
+ if (l.defined ())
+ r = *l; // Copy value (and type) from the outer scope.
+
+ return r;
+ }
+
+ void scope::
+ reset_special ()
+ {
+ // First assemble the $* value.
+ //
+ strings s;
+
+ auto append = [&s] (const strings& v)
+ {
+ s.insert (s.end (), v.begin (), v.end ());
+ };
+
+ if (lookup l = find (root->test_var))
+ s.push_back (cast<path> (l).representation ());
+
+ if (lookup l = find (root->options_var))
+ append (cast<strings> (l));
+
+ if (lookup l = find (root->arguments_var))
+ append (cast<strings> (l));
+
+ // Keep redirects/cleanups out of $N.
+ //
+ size_t n (s.size ());
+
+ if (lookup l = find (root->redirects_var))
+ append (cast<strings> (l));
+
+ if (lookup l = find (root->cleanups_var))
+ append (cast<strings> (l));
+
+ // Set the $N values if present.
+ //
+ for (size_t i (0); i <= 9; ++i)
+ {
+ value& v (assign (*root->cmdN_var[i]));
+
+ if (i < n)
+ {
+ if (i == 0)
+ v = path (s[i]);
+ else
+ v = s[i];
+ }
+ else
+ v = nullptr; // Clear any old values.
+ }
+
+ // Set $*.
+ //
+ assign (root->cmd_var) = move (s);
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx
new file mode 100644
index 0000000..e3f8251
--- /dev/null
+++ b/libbuild2/test/script/script.hxx
@@ -0,0 +1,559 @@
+// file : libbuild2/test/script/script.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX
+#define LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX
+
+#include <set>
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/variable.hxx>
+
+#include <libbuild2/test/target.hxx>
+
+#include <libbuild2/test/script/token.hxx> // replay_tokens
+
+namespace build2
+{
+ class target;
+
+ namespace test
+ {
+ namespace script
+ {
+ class parser; // Required by VC for 'friend class parser' declaration.
+
+ // Pre-parse representation.
+ //
+
+ enum class line_type
+ {
+ var,
+ cmd,
+ cmd_if,
+ cmd_ifn,
+ cmd_elif,
+ cmd_elifn,
+ cmd_else,
+ cmd_end
+ };
+
+ ostream&
+ operator<< (ostream&, line_type);
+
+ struct line
+ {
+ line_type type;
+ replay_tokens tokens;
+
+ union
+ {
+ const variable* var; // Pre-entered for line_type::var.
+ };
+ };
+
+ // Most of the time we will have just one line (test command).
+ //
+ using lines = small_vector<line, 1>;
+
+ // Parse object model.
+ //
+
+ // redirect
+ //
+ enum class redirect_type
+ {
+ none,
+ pass,
+ null,
+ trace,
+ merge,
+ here_str_literal,
+ here_str_regex,
+ here_doc_literal,
+ here_doc_regex,
+ here_doc_ref, // Reference to here_doc literal or regex.
+ file,
+ };
+
+ // Pre-parsed (but not instantiated) regex lines. The idea here is that
+ // we should be able to re-create their (more or less) exact text
+ // representation for diagnostics but also instantiate without any
+ // re-parsing.
+ //
+ struct regex_line
+ {
+ // If regex is true, then value is the regex expression. Otherwise, it
+ // is a literal. Note that special characters can be present in both
+ // cases. For example, //+ is a regex, while /+ is a literal, both
+ // with '+' as a special character. Flags are only valid for regex.
+ // Literals falls apart into textual (has no special characters) and
+ // special (has just special characters instead) ones. For example
+ // foo is a textual literal, while /.+ is a special one. Note that
+ // literal must not have value and special both non-empty.
+ //
+ bool regex;
+
+ string value;
+ string flags;
+ string special;
+
+ uint64_t line;
+ uint64_t column;
+
+ // Create regex with optional special characters.
+ //
+ regex_line (uint64_t l, uint64_t c,
+ string v, string f, string s = string ())
+ : regex (true),
+ value (move (v)),
+ flags (move (f)),
+ special (move (s)),
+ line (l),
+ column (c) {}
+
+ // Create a literal, either text or special.
+ //
+ regex_line (uint64_t l, uint64_t c, string v, bool s)
+ : regex (false),
+ value (s ? string () : move (v)),
+ special (s ? move (v) : string ()),
+ line (l),
+ column (c) {}
+ };
+
+ struct regex_lines
+ {
+ char intro; // Introducer character.
+ string flags; // Global flags (here-document).
+
+ small_vector<regex_line, 8> lines;
+ };
+
+ // Output file redirect mode.
+ //
+ enum class redirect_fmode
+ {
+ compare,
+ overwrite,
+ append
+ };
+
+ struct redirect
+ {
+ redirect_type type;
+
+ struct file_type
+ {
+ using path_type = build2::path;
+ path_type path;
+ redirect_fmode mode; // Meaningless for input redirect.
+ };
+
+ union
+ {
+ int fd; // Merge-to descriptor.
+ string str; // Note: with trailing newline, if requested.
+ regex_lines regex; // Note: with trailing blank, if requested.
+ file_type file;
+ reference_wrapper<const redirect> ref; // Note: no chains.
+ };
+
+ string modifiers; // Redirect modifiers.
+ string end; // Here-document end marker (no regex intro/flags).
+ uint64_t end_line; // Here-document end marker location.
+ uint64_t end_column;
+
+ // Create redirect of a type other than reference.
+ //
+ explicit
+ redirect (redirect_type = redirect_type::none);
+
+ // Create redirect of the reference type.
+ //
+ redirect (redirect_type t, const redirect& r)
+ : type (redirect_type::here_doc_ref), ref (r)
+ {
+ // There is no support (and need) for reference chains.
+ //
+ assert (t == redirect_type::here_doc_ref &&
+ r.type != redirect_type::here_doc_ref);
+ }
+
+ // Move constuctible/assignable-only type.
+ //
+ redirect (redirect&&);
+ redirect& operator= (redirect&&);
+
+ ~redirect ();
+
+ const redirect&
+ effective () const noexcept
+ {
+ return type == redirect_type::here_doc_ref ? ref.get () : *this;
+ }
+ };
+
+ // cleanup
+ //
+ enum class cleanup_type
+ {
+ always, // &foo - cleanup, fail if does not exist.
+ maybe, // &?foo - cleanup, ignore if does not exist.
+ never // &!foo - don’t cleanup, ignore if doesn’t exist.
+ };
+
+ // File or directory to be automatically cleaned up at the end of the
+ // scope. If the path ends with a trailing slash, then it is assumed to
+ // be a directory, otherwise -- a file. A directory that is about to be
+ // cleaned up must be empty.
+ //
+ // The last component in the path may contain a wildcard that have the
+ // following semantics:
+ //
+ // dir/* - remove all immediate files
+ // dir/*/ - remove all immediate sub-directories (must be empty)
+ // dir/** - remove all files recursively
+ // dir/**/ - remove all sub-directories recursively (must be empty)
+ // dir/*** - remove directory dir with all files and sub-directories
+ // recursively
+ //
+ struct cleanup
+ {
+ cleanup_type type;
+ build2::path path;
+ };
+ using cleanups = vector<cleanup>;
+
+ // command_exit
+ //
+ enum class exit_comparison {eq, ne};
+
+ struct command_exit
+ {
+ // C/C++ don't apply constraints on program exit code other than it
+ // being of type int.
+ //
+ // POSIX specifies that only the least significant 8 bits shall be
+ // available from wait() and waitpid(); the full value shall be
+ // available from waitid() (read more at _Exit, _exit Open Group
+ // spec).
+ //
+ // While the Linux man page for waitid() doesn't mention any
+ // deviations from the standard, the FreeBSD implementation (as of
+ // version 11.0) only returns 8 bits like the other wait*() calls.
+ //
+ // Windows supports 32-bit exit codes.
+ //
+ // Note that in shells some exit values can have special meaning so
+ // using them can be a source of confusion. For bash values in the
+ // [126, 255] range are such a special ones (see Appendix E, "Exit
+ // Codes With Special Meanings" in the Advanced Bash-Scripting Guide).
+ //
+ exit_comparison comparison;
+ uint8_t code;
+ };
+
+ // command
+ //
+ struct command
+ {
+ path program;
+ strings arguments;
+
+ redirect in;
+ redirect out;
+ redirect err;
+
+ script::cleanups cleanups;
+
+ command_exit exit {exit_comparison::eq, 0};
+ };
+
+ enum class command_to_stream: uint16_t
+ {
+ header = 0x01,
+ here_doc = 0x02, // Note: printed on a new line.
+ all = header | here_doc
+ };
+
+ void
+ to_stream (ostream&, const command&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command&);
+
+ // command_pipe
+ //
+ using command_pipe = vector<command>;
+
+ void
+ to_stream (ostream&, const command_pipe&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command_pipe&);
+
+ // command_expr
+ //
+ enum class expr_operator {log_or, log_and};
+
+ struct expr_term
+ {
+ expr_operator op; // OR-ed to an implied false for the first term.
+ command_pipe pipe;
+ };
+
+ using command_expr = vector<expr_term>;
+
+ void
+ to_stream (ostream&, const command_expr&, command_to_stream);
+
+ ostream&
+ operator<< (ostream&, const command_expr&);
+
+ // command_type
+ //
+ enum class command_type {test, setup, teardown};
+
+ // description
+ //
+ struct description
+ {
+ string id;
+ string summary;
+ string details;
+
+ bool
+ empty () const
+ {
+ return id.empty () && summary.empty () && details.empty ();
+ }
+ };
+
+ // scope
+ //
+ class script;
+
+ enum class scope_state {unknown, passed, failed};
+
+ class scope
+ {
+ public:
+ scope* const parent; // NULL for the root (script) scope.
+ script* const root; // Self for the root (script) scope.
+
+ // The chain of if-else scope alternatives. See also if_cond_ below.
+ //
+ unique_ptr<scope> if_chain;
+
+ // Note that if we pass the variable name as a string, then it will
+ // be looked up in the wrong pool.
+ //
+ variable_map vars;
+
+ const path& id_path; // Id path ($@, relative in POSIX form).
+ const dir_path& wd_path; // Working dir ($~, absolute and normalized).
+
+ optional<description> desc;
+
+ scope_state state = scope_state::unknown;
+ test::script::cleanups cleanups;
+ paths special_cleanups;
+
+ // Variables.
+ //
+ public:
+ // Lookup the variable starting from this scope, continuing with outer
+ // scopes, then the target being tested, then the testscript target,
+ // and then outer buildfile scopes (including testscript-type/pattern
+ // specific).
+ //
+ lookup
+ find (const variable&) const;
+
+ // As above but only look for buildfile variables. If target_only is
+ // false then also look in scopes of the test target (this should only
+ // be done if the variable's visibility is target).
+ //
+ lookup
+ find_in_buildfile (const string&, bool target_only = true) const;
+
+ // Return a value suitable for assignment. If the variable does not
+ // exist in this scope's map, then a new one with the NULL value is
+ // added and returned. Otherwise the existing value is returned.
+ //
+ value&
+ assign (const variable& var) {return vars.assign (var);}
+
+ // Return a value suitable for append/prepend. If the variable does
+ // not exist in this scope's map, then outer scopes are searched for
+ // the same variable. If found then a new variable with the found
+ // value is added to this scope and returned. Otherwise this function
+ // proceeds as assign() above.
+ //
+ value&
+ append (const variable&);
+
+ // Reset special $*, $N variables based on the test.* values.
+ //
+ void
+ reset_special ();
+
+ // Cleanup.
+ //
+ public:
+ // Register a cleanup. If the cleanup is explicit, then override the
+ // cleanup type if this path is already registered. Ignore implicit
+ // registration of a path outside script working directory.
+ //
+ void
+ clean (cleanup, bool implicit);
+
+ // Register cleanup of a special file. Such files are created to
+ // maintain testscript machinery and must be removed first, not to
+ // interfere with the user-defined wildcard cleanups.
+ //
+ void
+ clean_special (path p);
+
+ public:
+ virtual
+ ~scope () = default;
+
+ protected:
+ scope (const string& id, scope* parent, script* root);
+
+ // Pre-parse data.
+ //
+ public:
+ virtual bool
+ empty () const = 0;
+
+ protected:
+ friend class parser;
+
+ location start_loc_;
+ location end_loc_;
+
+ optional<line> if_cond_;
+ };
+
+ // group
+ //
+ class group: public scope
+ {
+ public:
+ vector<unique_ptr<scope>> scopes;
+
+ public:
+ group (const string& id, group& p): scope (id, &p, p.root) {}
+
+ protected:
+ group (const string& id, script* r): scope (id, nullptr, r) {}
+
+ // Pre-parse data.
+ //
+ public:
+ virtual bool
+ empty () const override
+ {
+ return
+ !if_cond_ && // The condition expression can have side-effects.
+ setup_.empty () &&
+ tdown_.empty () &&
+ find_if (scopes.begin (), scopes.end (),
+ [] (const unique_ptr<scope>& s)
+ {
+ return !s->empty ();
+ }) == scopes.end ();
+ }
+
+ private:
+ friend class parser;
+
+ lines setup_;
+ lines tdown_;
+ };
+
+ // test
+ //
+ class test: public scope
+ {
+ public:
+ test (const string& id, group& p): scope (id, &p, p.root) {}
+
+ // Pre-parse data.
+ //
+ public:
+ virtual bool
+ empty () const override
+ {
+ return tests_.empty ();
+ }
+
+ private:
+ friend class parser;
+
+ lines tests_;
+ };
+
+ // script
+ //
+ class script_base // Make sure certain things are initialized early.
+ {
+ protected:
+ script_base ();
+
+ public:
+ variable_pool var_pool;
+ mutable shared_mutex var_pool_mutex;
+
+ const variable& test_var; // test
+ const variable& options_var; // test.options
+ const variable& arguments_var; // test.arguments
+ const variable& redirects_var; // test.redirects
+ const variable& cleanups_var; // test.cleanups
+
+ const variable& wd_var; // $~
+ const variable& id_var; // $@
+ const variable& cmd_var; // $*
+ const variable* cmdN_var[10]; // $N
+ };
+
+ class script: public script_base, public group
+ {
+ public:
+ script (const target& test_target,
+ const testscript& script_target,
+ const dir_path& root_wd);
+
+ script (script&&) = delete;
+ script (const script&) = delete;
+ script& operator= (script&&) = delete;
+ script& operator= (const script&) = delete;
+
+ public:
+ const target& test_target; // Target we are testing.
+ const build2::scope& target_scope; // Base scope of test target.
+ const testscript& script_target; // Target of the testscript file.
+
+ // Pre-parse data.
+ //
+ private:
+ friend class parser;
+
+ // Testscript file paths. Specifically, replay_token::file points to
+ // these paths.
+ //
+ std::set<path> paths_;
+ };
+ }
+ }
+}
+
+#include <libbuild2/test/script/script.ixx>
+
+#endif // LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX
diff --git a/libbuild2/test/script/script.ixx b/libbuild2/test/script/script.ixx
new file mode 100644
index 0000000..d4a216a
--- /dev/null
+++ b/libbuild2/test/script/script.ixx
@@ -0,0 +1,60 @@
+// file : libbuild2/test/script/script.ixx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ inline command_to_stream
+ operator&= (command_to_stream& x, command_to_stream y)
+ {
+ return x = static_cast<command_to_stream> (
+ static_cast<uint16_t> (x) & static_cast<uint16_t> (y));
+ }
+
+ inline command_to_stream
+ operator|= (command_to_stream& x, command_to_stream y)
+ {
+ return x = static_cast<command_to_stream> (
+ static_cast<uint16_t> (x) | static_cast<uint16_t> (y));
+ }
+
+ inline command_to_stream
+ operator& (command_to_stream x, command_to_stream y) {return x &= y;}
+
+ inline command_to_stream
+ operator| (command_to_stream x, command_to_stream y) {return x |= y;}
+
+
+ // command
+ //
+ inline ostream&
+ operator<< (ostream& o, const command& c)
+ {
+ to_stream (o, c, command_to_stream::all);
+ return o;
+ }
+
+ // command_pipe
+ //
+ inline ostream&
+ operator<< (ostream& o, const command_pipe& p)
+ {
+ to_stream (o, p, command_to_stream::all);
+ return o;
+ }
+
+ // command_expr
+ //
+ inline ostream&
+ operator<< (ostream& o, const command_expr& e)
+ {
+ to_stream (o, e, command_to_stream::all);
+ return o;
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/token.cxx b/libbuild2/test/script/token.cxx
new file mode 100644
index 0000000..e38e227
--- /dev/null
+++ b/libbuild2/test/script/token.cxx
@@ -0,0 +1,57 @@
+// file : libbuild2/test/script/token.cxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#include <libbuild2/test/script/token.hxx>
+
+using namespace std;
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ void
+ token_printer (ostream& os, const token& t, bool d)
+ {
+ const string& v (t.value);
+
+ // Only quote non-name tokens for diagnostics.
+ //
+ const char* q (d ? "'" : "");
+
+ switch (t.type)
+ {
+ case token_type::semi: os << q << ';' << q; break;
+
+ case token_type::dot: os << q << '.' << q; break;
+
+ case token_type::plus: os << q << '+' << q; break;
+ case token_type::minus: os << q << '-' << q; break;
+
+ case token_type::clean: os << q << '&' << v << q; break;
+ case token_type::pipe: os << q << '|' << q; break;
+
+ case token_type::in_pass: os << q << "<|" << q; break;
+ case token_type::in_null: os << q << "<-" << q; break;
+ case token_type::in_str: os << q << '<' << v << q; break;
+ case token_type::in_doc: os << q << "<<" << v << q; break;
+ case token_type::in_file: os << q << "<<<" << q; break;
+
+ case token_type::out_pass: os << q << ">|" << q; break;
+ case token_type::out_null: os << q << ">-" << q; break;
+ case token_type::out_trace: os << q << ">!" << q; break;
+ case token_type::out_merge: os << q << ">&" << q; break;
+ case token_type::out_str: os << q << '>' << v << q; break;
+ case token_type::out_doc: os << q << ">>" << v << q; break;
+ case token_type::out_file_cmp: os << q << ">>>" << v << q; break;
+ case token_type::out_file_ovr: os << q << ">=" << v << q; break;
+ case token_type::out_file_app: os << q << ">+" << v << q; break;
+
+ default: build2::token_printer (os, t, d);
+ }
+ }
+ }
+ }
+}
diff --git a/libbuild2/test/script/token.hxx b/libbuild2/test/script/token.hxx
new file mode 100644
index 0000000..4abe617
--- /dev/null
+++ b/libbuild2/test/script/token.hxx
@@ -0,0 +1,65 @@
+// file : libbuild2/test/script/token.hxx -*- C++ -*-
+// copyright : Copyright (c) 2014-2019 Code Synthesis Ltd
+// license : MIT; see accompanying LICENSE file
+
+#ifndef LIBBUILD2_TEST_SCRIPT_TOKEN_HXX
+#define LIBBUILD2_TEST_SCRIPT_TOKEN_HXX
+
+#include <libbuild2/types.hxx>
+#include <libbuild2/utility.hxx>
+
+#include <libbuild2/token.hxx>
+
+namespace build2
+{
+ namespace test
+ {
+ namespace script
+ {
+ struct token_type: build2::token_type
+ {
+ using base_type = build2::token_type;
+
+ enum
+ {
+ // NOTE: remember to update token_printer()!
+
+ semi = base_type::value_next, // ;
+
+ dot, // .
+
+ plus, // +
+ minus, // -
+
+ pipe, // |
+ clean, // &{?!} (modifiers in value)
+
+ in_pass, // <|
+ in_null, // <-
+ in_str, // <{:} (modifiers in value)
+ in_doc, // <<{:} (modifiers in value)
+ in_file, // <<<
+
+ out_pass, // >|
+ out_null, // >-
+ out_trace, // >!
+ out_merge, // >&
+ out_str, // >{:~} (modifiers in value)
+ out_doc, // >>{:~} (modifiers in value)
+ out_file_cmp, // >>>
+ out_file_ovr, // >=
+ out_file_app // >+
+ };
+
+ token_type () = default;
+ token_type (value_type v): base_type (v) {}
+ token_type (base_type v): base_type (v) {}
+ };
+
+ void
+ token_printer (ostream&, const token&, bool);
+ }
+ }
+}
+
+#endif // LIBBUILD2_TEST_SCRIPT_TOKEN_HXX