diff options
author | Boris Kolpackov <boris@codesynthesis.com> | 2020-04-28 08:48:53 +0200 |
---|---|---|
committer | Boris Kolpackov <boris@codesynthesis.com> | 2020-05-27 15:47:28 +0200 |
commit | b808c255b6a9ddba085bf5646e7d20ec344f2e2d (patch) | |
tree | 32730291f7e6de8ef0a227905520dd66fb4ec0f3 /libbuild2/test/script | |
parent | 3552356a87402727e663131994fa87f48b3cd4fb (diff) |
Initial support for ad hoc recipes (still work in progress)
Diffstat (limited to 'libbuild2/test/script')
25 files changed, 412 insertions, 8111 deletions
diff --git a/libbuild2/test/script/builtin-options.cxx b/libbuild2/test/script/builtin-options.cxx deleted file mode 100644 index 6b6afe0..0000000 --- a/libbuild2/test/script/builtin-options.cxx +++ /dev/null @@ -1,667 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -// Begin prologue. -// -// -// End prologue. - -#include <libbuild2/test/script/builtin-options.hxx> - -#include <map> -#include <set> -#include <string> -#include <vector> -#include <ostream> -#include <sstream> - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - // unknown_option - // - unknown_option:: - ~unknown_option () throw () - { - } - - void unknown_option:: - print (::std::ostream& os) const - { - os << "unknown option '" << option ().c_str () << "'"; - } - - const char* unknown_option:: - what () const throw () - { - return "unknown option"; - } - - // unknown_argument - // - unknown_argument:: - ~unknown_argument () throw () - { - } - - void unknown_argument:: - print (::std::ostream& os) const - { - os << "unknown argument '" << argument ().c_str () << "'"; - } - - const char* unknown_argument:: - what () const throw () - { - return "unknown argument"; - } - - // missing_value - // - missing_value:: - ~missing_value () throw () - { - } - - void missing_value:: - print (::std::ostream& os) const - { - os << "missing value for option '" << option ().c_str () << "'"; - } - - const char* missing_value:: - what () const throw () - { - return "missing option value"; - } - - // invalid_value - // - invalid_value:: - ~invalid_value () throw () - { - } - - void invalid_value:: - print (::std::ostream& os) const - { - os << "invalid value '" << value ().c_str () << "' for option '" - << option ().c_str () << "'"; - - if (!message ().empty ()) - os << ": " << message ().c_str (); - } - - const char* invalid_value:: - what () const throw () - { - return "invalid option value"; - } - - // eos_reached - // - void eos_reached:: - print (::std::ostream& os) const - { - os << what (); - } - - const char* eos_reached:: - what () const throw () - { - return "end of argument stream reached"; - } - - // scanner - // - scanner:: - ~scanner () - { - } - - // argv_scanner - // - bool argv_scanner:: - more () - { - return i_ < argc_; - } - - const char* argv_scanner:: - peek () - { - if (i_ < argc_) - return argv_[i_]; - else - throw eos_reached (); - } - - const char* argv_scanner:: - next () - { - if (i_ < argc_) - { - const char* r (argv_[i_]); - - if (erase_) - { - for (int i (i_ + 1); i < argc_; ++i) - argv_[i - 1] = argv_[i]; - - --argc_; - argv_[argc_] = 0; - } - else - ++i_; - - return r; - } - else - throw eos_reached (); - } - - void argv_scanner:: - skip () - { - if (i_ < argc_) - ++i_; - else - throw eos_reached (); - } - - // vector_scanner - // - bool vector_scanner:: - more () - { - return i_ < v_.size (); - } - - const char* vector_scanner:: - peek () - { - if (i_ < v_.size ()) - return v_[i_].c_str (); - else - throw eos_reached (); - } - - const char* vector_scanner:: - next () - { - if (i_ < v_.size ()) - return v_[i_++].c_str (); - else - throw eos_reached (); - } - - void vector_scanner:: - skip () - { - if (i_ < v_.size ()) - ++i_; - else - throw eos_reached (); - } - - template <typename X> - struct parser - { - static void - parse (X& x, bool& xs, scanner& s) - { - using namespace std; - - const char* o (s.next ()); - if (s.more ()) - { - string v (s.next ()); - istringstream is (v); - if (!(is >> x && is.peek () == istringstream::traits_type::eof ())) - throw invalid_value (o, v); - } - else - throw missing_value (o); - - xs = true; - } - }; - - template <> - struct parser<bool> - { - static void - parse (bool& x, scanner& s) - { - s.next (); - x = true; - } - }; - - template <> - struct parser<std::string> - { - static void - parse (std::string& x, bool& xs, scanner& s) - { - const char* o (s.next ()); - - if (s.more ()) - x = s.next (); - else - throw missing_value (o); - - xs = true; - } - }; - - template <typename X> - struct parser<std::vector<X> > - { - static void - parse (std::vector<X>& c, bool& xs, scanner& s) - { - X x; - bool dummy; - parser<X>::parse (x, dummy, s); - c.push_back (x); - xs = true; - } - }; - - template <typename X> - struct parser<std::set<X> > - { - static void - parse (std::set<X>& c, bool& xs, scanner& s) - { - X x; - bool dummy; - parser<X>::parse (x, dummy, s); - c.insert (x); - xs = true; - } - }; - - template <typename K, typename V> - struct parser<std::map<K, V> > - { - static void - parse (std::map<K, V>& m, bool& xs, scanner& s) - { - const char* o (s.next ()); - - if (s.more ()) - { - std::string ov (s.next ()); - std::string::size_type p = ov.find ('='); - - K k = K (); - V v = V (); - std::string kstr (ov, 0, p); - std::string vstr (ov, (p != std::string::npos ? p + 1 : ov.size ())); - - int ac (2); - char* av[] = - { - const_cast<char*> (o), 0 - }; - - bool dummy; - if (!kstr.empty ()) - { - av[1] = const_cast<char*> (kstr.c_str ()); - argv_scanner s (0, ac, av); - parser<K>::parse (k, dummy, s); - } - - if (!vstr.empty ()) - { - av[1] = const_cast<char*> (vstr.c_str ()); - argv_scanner s (0, ac, av); - parser<V>::parse (v, dummy, s); - } - - m[k] = v; - } - else - throw missing_value (o); - - xs = true; - } - }; - - template <typename X, typename T, T X::*M> - void - thunk (X& x, scanner& s) - { - parser<T>::parse (x.*M, s); - } - - template <typename X, typename T, T X::*M, bool X::*S> - void - thunk (X& x, scanner& s) - { - parser<T>::parse (x.*M, x.*S, s); - } - } - } - } -} - -#include <map> -#include <cstring> - -namespace build2 -{ - namespace test - { - namespace script - { - // set_options - // - - set_options:: - set_options () - : exact_ (), - newline_ (), - whitespace_ () - { - } - - set_options:: - set_options (int& argc, - char** argv, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (argc, argv, erase); - _parse (s, opt, arg); - } - - set_options:: - set_options (int start, - int& argc, - char** argv, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase); - _parse (s, opt, arg); - } - - set_options:: - set_options (int& argc, - char** argv, - int& end, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (argc, argv, erase); - _parse (s, opt, arg); - end = s.end (); - } - - set_options:: - set_options (int start, - int& argc, - char** argv, - int& end, - bool erase, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - ::build2::test::script::cli::argv_scanner s (start, argc, argv, erase); - _parse (s, opt, arg); - end = s.end (); - } - - set_options:: - set_options (::build2::test::script::cli::scanner& s, - ::build2::test::script::cli::unknown_mode opt, - ::build2::test::script::cli::unknown_mode arg) - : exact_ (), - newline_ (), - whitespace_ () - { - _parse (s, opt, arg); - } - - typedef - std::map<std::string, void (*) (set_options&, ::build2::test::script::cli::scanner&)> - _cli_set_options_map; - - static _cli_set_options_map _cli_set_options_map_; - - struct _cli_set_options_map_init - { - _cli_set_options_map_init () - { - _cli_set_options_map_["--exact"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >; - _cli_set_options_map_["-e"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::exact_ >; - _cli_set_options_map_["--newline"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >; - _cli_set_options_map_["-n"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::newline_ >; - _cli_set_options_map_["--whitespace"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; - _cli_set_options_map_["-w"] = - &::build2::test::script::cli::thunk< set_options, bool, &set_options::whitespace_ >; - } - }; - - static _cli_set_options_map_init _cli_set_options_map_init_; - - bool set_options:: - _parse (const char* o, ::build2::test::script::cli::scanner& s) - { - _cli_set_options_map::const_iterator i (_cli_set_options_map_.find (o)); - - if (i != _cli_set_options_map_.end ()) - { - (*(i->second)) (*this, s); - return true; - } - - return false; - } - - bool set_options:: - _parse (::build2::test::script::cli::scanner& s, - ::build2::test::script::cli::unknown_mode opt_mode, - ::build2::test::script::cli::unknown_mode arg_mode) - { - // Can't skip combined flags (--no-combined-flags). - // - assert (opt_mode != ::build2::test::script::cli::unknown_mode::skip); - - bool r = false; - bool opt = true; - - while (s.more ()) - { - const char* o = s.peek (); - - if (std::strcmp (o, "--") == 0) - { - opt = false; - s.skip (); - r = true; - continue; - } - - if (opt) - { - if (_parse (o, s)) - { - r = true; - continue; - } - - if (std::strncmp (o, "-", 1) == 0 && o[1] != '\0') - { - // Handle combined option values. - // - std::string co; - if (const char* v = std::strchr (o, '=')) - { - co.assign (o, 0, v - o); - ++v; - - int ac (2); - char* av[] = - { - const_cast<char*> (co.c_str ()), - const_cast<char*> (v) - }; - - ::build2::test::script::cli::argv_scanner ns (0, ac, av); - - if (_parse (co.c_str (), ns)) - { - // Parsed the option but not its value? - // - if (ns.end () != 2) - throw ::build2::test::script::cli::invalid_value (co, v); - - s.next (); - r = true; - continue; - } - else - { - // Set the unknown option and fall through. - // - o = co.c_str (); - } - } - - // Handle combined flags. - // - char cf[3]; - { - const char* p = o + 1; - for (; *p != '\0'; ++p) - { - if (!((*p >= 'a' && *p <= 'z') || - (*p >= 'A' && *p <= 'Z') || - (*p >= '0' && *p <= '9'))) - break; - } - - if (*p == '\0') - { - for (p = o + 1; *p != '\0'; ++p) - { - std::strcpy (cf, "-"); - cf[1] = *p; - cf[2] = '\0'; - - int ac (1); - char* av[] = - { - cf - }; - - ::build2::test::script::cli::argv_scanner ns (0, ac, av); - - if (!_parse (cf, ns)) - break; - } - - if (*p == '\0') - { - // All handled. - // - s.next (); - r = true; - continue; - } - else - { - // Set the unknown option and fall through. - // - o = cf; - } - } - } - - switch (opt_mode) - { - case ::build2::test::script::cli::unknown_mode::skip: - { - s.skip (); - r = true; - continue; - } - case ::build2::test::script::cli::unknown_mode::stop: - { - break; - } - case ::build2::test::script::cli::unknown_mode::fail: - { - throw ::build2::test::script::cli::unknown_option (o); - } - } - - break; - } - } - - switch (arg_mode) - { - case ::build2::test::script::cli::unknown_mode::skip: - { - s.skip (); - r = true; - continue; - } - case ::build2::test::script::cli::unknown_mode::stop: - { - break; - } - case ::build2::test::script::cli::unknown_mode::fail: - { - throw ::build2::test::script::cli::unknown_argument (o); - } - } - - break; - } - - return r; - } - } - } -} - -// Begin epilogue. -// -// -// End epilogue. - diff --git a/libbuild2/test/script/builtin-options.hxx b/libbuild2/test/script/builtin-options.hxx deleted file mode 100644 index 44e129a..0000000 --- a/libbuild2/test/script/builtin-options.hxx +++ /dev/null @@ -1,345 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -#ifndef LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX -#define LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX - -// Begin prologue. -// -// -// End prologue. - -#include <vector> -#include <iosfwd> -#include <string> -#include <cstddef> -#include <exception> - -#ifndef CLI_POTENTIALLY_UNUSED -# if defined(_MSC_VER) || defined(__xlC__) -# define CLI_POTENTIALLY_UNUSED(x) (void*)&x -# else -# define CLI_POTENTIALLY_UNUSED(x) (void)x -# endif -#endif - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - class unknown_mode - { - public: - enum value - { - skip, - stop, - fail - }; - - unknown_mode (value); - - operator value () const - { - return v_; - } - - private: - value v_; - }; - - // Exceptions. - // - - class exception: public std::exception - { - public: - virtual void - print (::std::ostream&) const = 0; - }; - - ::std::ostream& - operator<< (::std::ostream&, const exception&); - - class unknown_option: public exception - { - public: - virtual - ~unknown_option () throw (); - - unknown_option (const std::string& option); - - const std::string& - option () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - }; - - class unknown_argument: public exception - { - public: - virtual - ~unknown_argument () throw (); - - unknown_argument (const std::string& argument); - - const std::string& - argument () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string argument_; - }; - - class missing_value: public exception - { - public: - virtual - ~missing_value () throw (); - - missing_value (const std::string& option); - - const std::string& - option () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - }; - - class invalid_value: public exception - { - public: - virtual - ~invalid_value () throw (); - - invalid_value (const std::string& option, - const std::string& value, - const std::string& message = std::string ()); - - const std::string& - option () const; - - const std::string& - value () const; - - const std::string& - message () const; - - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - - private: - std::string option_; - std::string value_; - std::string message_; - }; - - class eos_reached: public exception - { - public: - virtual void - print (::std::ostream&) const; - - virtual const char* - what () const throw (); - }; - - // Command line argument scanner interface. - // - // The values returned by next() are guaranteed to be valid - // for the two previous arguments up until a call to a third - // peek() or next(). - // - class scanner - { - public: - virtual - ~scanner (); - - virtual bool - more () = 0; - - virtual const char* - peek () = 0; - - virtual const char* - next () = 0; - - virtual void - skip () = 0; - }; - - class argv_scanner: public scanner - { - public: - argv_scanner (int& argc, char** argv, bool erase = false); - argv_scanner (int start, int& argc, char** argv, bool erase = false); - - int - end () const; - - virtual bool - more (); - - virtual const char* - peek (); - - virtual const char* - next (); - - virtual void - skip (); - - private: - int i_; - int& argc_; - char** argv_; - bool erase_; - }; - - class vector_scanner: public scanner - { - public: - vector_scanner (const std::vector<std::string>&, std::size_t start = 0); - - std::size_t - end () const; - - void - reset (std::size_t start = 0); - - virtual bool - more (); - - virtual const char* - peek (); - - virtual const char* - next (); - - virtual void - skip (); - - private: - const std::vector<std::string>& v_; - std::size_t i_; - }; - - template <typename X> - struct parser; - } - } - } -} - -namespace build2 -{ - namespace test - { - namespace script - { - class set_options - { - public: - set_options (); - - set_options (int& argc, - char** argv, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int start, - int& argc, - char** argv, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int& argc, - char** argv, - int& end, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (int start, - int& argc, - char** argv, - int& end, - bool erase = false, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - set_options (::build2::test::script::cli::scanner&, - ::build2::test::script::cli::unknown_mode option = ::build2::test::script::cli::unknown_mode::fail, - ::build2::test::script::cli::unknown_mode argument = ::build2::test::script::cli::unknown_mode::stop); - - // Option accessors. - // - const bool& - exact () const; - - const bool& - newline () const; - - const bool& - whitespace () const; - - // Implementation details. - // - protected: - bool - _parse (const char*, ::build2::test::script::cli::scanner&); - - private: - bool - _parse (::build2::test::script::cli::scanner&, - ::build2::test::script::cli::unknown_mode option, - ::build2::test::script::cli::unknown_mode argument); - - public: - bool exact_; - bool newline_; - bool whitespace_; - }; - } - } -} - -#include <libbuild2/test/script/builtin-options.ixx> - -// Begin epilogue. -// -// -// End epilogue. - -#endif // LIBBUILD2_TEST_SCRIPT_BUILTIN_OPTIONS_HXX diff --git a/libbuild2/test/script/builtin-options.ixx b/libbuild2/test/script/builtin-options.ixx deleted file mode 100644 index bdb95b4..0000000 --- a/libbuild2/test/script/builtin-options.ixx +++ /dev/null @@ -1,188 +0,0 @@ -// -*- C++ -*- -// -// This file was generated by CLI, a command line interface -// compiler for C++. -// - -// Begin prologue. -// -// -// End prologue. - -#include <cassert> - -namespace build2 -{ - namespace test - { - namespace script - { - namespace cli - { - // unknown_mode - // - inline unknown_mode:: - unknown_mode (value v) - : v_ (v) - { - } - - // exception - // - inline ::std::ostream& - operator<< (::std::ostream& os, const exception& e) - { - e.print (os); - return os; - } - - // unknown_option - // - inline unknown_option:: - unknown_option (const std::string& option) - : option_ (option) - { - } - - inline const std::string& unknown_option:: - option () const - { - return option_; - } - - // unknown_argument - // - inline unknown_argument:: - unknown_argument (const std::string& argument) - : argument_ (argument) - { - } - - inline const std::string& unknown_argument:: - argument () const - { - return argument_; - } - - // missing_value - // - inline missing_value:: - missing_value (const std::string& option) - : option_ (option) - { - } - - inline const std::string& missing_value:: - option () const - { - return option_; - } - - // invalid_value - // - inline invalid_value:: - invalid_value (const std::string& option, - const std::string& value, - const std::string& message) - : option_ (option), - value_ (value), - message_ (message) - { - } - - inline const std::string& invalid_value:: - option () const - { - return option_; - } - - inline const std::string& invalid_value:: - value () const - { - return value_; - } - - inline const std::string& invalid_value:: - message () const - { - return message_; - } - - // argv_scanner - // - inline argv_scanner:: - argv_scanner (int& argc, char** argv, bool erase) - : i_ (1), argc_ (argc), argv_ (argv), erase_ (erase) - { - } - - inline argv_scanner:: - argv_scanner (int start, int& argc, char** argv, bool erase) - : i_ (start), argc_ (argc), argv_ (argv), erase_ (erase) - { - } - - inline int argv_scanner:: - end () const - { - return i_; - } - - // vector_scanner - // - inline vector_scanner:: - vector_scanner (const std::vector<std::string>& v, std::size_t i) - : v_ (v), i_ (i) - { - } - - inline std::size_t vector_scanner:: - end () const - { - return i_; - } - - inline void vector_scanner:: - reset (std::size_t i) - { - i_ = i; - } - } - } - } -} - -namespace build2 -{ - namespace test - { - namespace script - { - // set_options - // - - inline const bool& set_options:: - exact () const - { - return this->exact_; - } - - inline const bool& set_options:: - newline () const - { - return this->newline_; - } - - inline const bool& set_options:: - whitespace () const - { - return this->whitespace_; - } - } - } -} - -// Begin epilogue. -// -// -// End epilogue. diff --git a/libbuild2/test/script/builtin.cli b/libbuild2/test/script/builtin.cli deleted file mode 100644 index 42b26d2..0000000 --- a/libbuild2/test/script/builtin.cli +++ /dev/null @@ -1,25 +0,0 @@ -// file : libbuild2/test/script/builtin.cli -// license : MIT; see accompanying LICENSE file - -// Note that options in this file are undocumented because we generate neither -// the usage printing code nor man pages. Instead, they are documented in the -// Testscript Language Manual's builtin descriptions. -// -namespace build2 -{ - namespace test - { - namespace script - { - // Pseudo-builtin options. - // - - class set_options - { - bool --exact|-e; - bool --newline|-n; - bool --whitespace|-w; - }; - } - } -} diff --git a/libbuild2/test/script/lexer+command-expansion.test.testscript b/libbuild2/test/script/lexer+command-expansion.test.testscript deleted file mode 100644 index 2cb6587..0000000 --- a/libbuild2/test/script/lexer+command-expansion.test.testscript +++ /dev/null @@ -1,247 +0,0 @@ -# file : libbuild2/test/script/lexer+command-expansion.test.testscript -# license : MIT; see accompanying LICENSE file - -test.arguments = command-expansion - -: pass-redirect -: -{ - : in - : - $* <:"0<|" >>EOO - '0' - <| - EOO - - : arg-in - : - $* <:"0 <|" >>EOO - '0 ' - <| - EOO - - : out - : - $* <:"1>|" >>EOO - '1' - >| - EOO - - : arg-out - : - $* <:"1 >|" >>EOO - '1 ' - >| - EOO -} - -: null-redirect -: -{ - : in - : - $* <:"0<-" >>EOO - '0' - <- - EOO - - : arg-in - : - $* <:"0 <-" >>EOO - '0 ' - <- - EOO - - : out - : - $* <:"1>-" >>EOO - '1' - >- - EOO - - : arg-out - : - $* <:"1 >-" >>EOO - '1 ' - >- - EOO -} - -: trace-redirect -: -{ - : out - : - $* <:"1>!" >>EOO - '1' - >! - EOO - - : arg-out - : - $* <:"1 >!" >>EOO - '1 ' - >! - EOO -} - -: merge-redirect -: -{ - : out - : - $* <:"1>&2" >>EOO - '1' - >& - '2' - EOO - - : arg-out - : - $* <:"1 >&2" >>EOO - '1 ' - >& - '2' - EOO -} - -: str-redirect -: -{ - : in - : - { - : newline - : - $* <:"0<a b" >>EOO - '0' - < - 'a b' - EOO - - : no-newline - : - $* <:"0<:a b" >>EOO - '0' - <: - 'a b' - EOO - } - - : out - : - { - : newline - : - $* <:"1>a b" >>EOO - '1' - > - 'a b' - EOO - - : no-newline - : - $* <:"1>:a b" >>EOO - '1' - >: - 'a b' - EOO - } -} - -: doc-redirect -: -{ - : in - : - { - : newline - : - $* <:"0<<E O I" >>EOO - '0' - << - 'E O I' - EOO - - : no-newline - : - $* <:"0<<:E O I" >>EOO - '0' - <<: - 'E O I' - EOO - } - - : out - : - { - : newline - : - $* <:"1>>E O O" >>EOO - '1' - >> - 'E O O' - EOO - - : no-newline - : - $* <:"1>>:E O O" >>EOO - '1' - >>: - 'E O O' - EOO - } -} - -: file-redirect -: -{ - : in - : - $* <:"0<<<a b" >>EOO - '0' - <<< - 'a b' - EOO - - : out - : - $* <:"1>=a b" >>EOO - '1' - >= - 'a b' - EOO - - : out-app - : - $* <:"1>+a b" >>EOO - '1' - >+ - 'a b' - EOO -} - -: cleanup -: -{ - : always - : - $* <:"&file" >>EOO - & - 'file' - EOO - - : maybe - : - $* <:"&?file" >>EOO - &? - 'file' - EOO - - : never - : - $* <:"&!file" >>EOO - &! - 'file' - EOO -} diff --git a/libbuild2/test/script/lexer.cxx b/libbuild2/test/script/lexer.cxx index 26d77b5..a94109b 100644 --- a/libbuild2/test/script/lexer.cxx +++ b/libbuild2/test/script/lexer.cxx @@ -15,8 +15,16 @@ namespace build2 { using type = token_type; + build2::script::redirect_aliases lexer::redirect_aliases { + type (type::in_str), + type (type::in_doc), + type (type::in_file), + type (type::out_str), + type (type::out_doc), + type (type::out_file_cmp)}; + void lexer:: - mode (base_mode m, char ps, optional<const char*> esc) + mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data) { bool a (false); // attributes @@ -77,43 +85,6 @@ namespace build2 s2 = " "; break; } - - case lexer_mode::command_expansion: - { - // Note that whitespaces are not word separators in this mode. - // - s1 = "|&<>"; - s2 = " "; - s = false; - break; - } - case lexer_mode::here_line_single: - { - // This one is like a single-quoted string except it treats - // newlines as a separator. We also treat quotes as literals. - // - // Note that it might be tempting to enable line continuation - // escapes. However, we will then have to also enable escaping of - // the backslash, which makes it a lot less tempting. - // - s1 = "\n"; - s2 = " "; - esc = ""; // Disable escape sequences. - s = false; - q = false; - break; - } - case lexer_mode::here_line_double: - { - // This one is like a double-quoted string except it treats - // newlines as a separator. We also treat quotes as literals. - // - s1 = "$(\n"; - s2 = " "; - s = false; - q = false; - break; - } case lexer_mode::description_line: { // This one is like a single-quoted string and has an ad hoc @@ -138,7 +109,7 @@ namespace build2 } assert (ps == '\0'); - state_.push (state {m, a, ps, s, n, q, *esc, s1, s2}); + state_.push (state {m, data, nullopt, a, ps, s, n, q, *esc, s1, s2}); } token lexer:: @@ -152,17 +123,12 @@ namespace build2 case lexer_mode::first_token: case lexer_mode::second_token: case lexer_mode::variable_line: - case lexer_mode::command_expansion: - case lexer_mode::here_line_single: - case lexer_mode::here_line_double: r = next_line (); break; case lexer_mode::description_line: r = next_description (); break; - default: - r = base_lexer::next (); - break; + default: return base_lexer::next (); } if (r.qtype != quote_type::unquoted) @@ -174,7 +140,7 @@ namespace build2 token lexer:: next_line () { - bool sep (skip_spaces ()); + bool sep (skip_spaces ().first); xchar c (get ()); uint64_t ln (c.line), cn (c.column); @@ -182,38 +148,9 @@ namespace build2 state st (state_.top ()); // Make copy (see first/second_token). lexer_mode m (st.mode); - auto make_token = [&sep, &m, ln, cn] (type t, string v = string ()) + auto make_token = [&sep, ln, cn] (type t) { - bool q (m == lexer_mode::here_line_double); - - return token (t, move (v), sep, - (q ? quote_type::double_ : quote_type::unquoted), q, - ln, cn, - token_printer); - }; - - auto make_token_with_modifiers = - [&make_token, this] (type t, - const char* mods, // To recorgnize. - const char* stop = nullptr) // To stop after. - { - string v; - if (mods != nullptr) - { - for (xchar p (peek ()); - (strchr (mods, p) != nullptr && // Modifier. - strchr (v.c_str (), p) == nullptr); // Not already seen. - p = peek ()) - { - get (); - v += p; - - if (stop != nullptr && strchr (stop, p) != nullptr) - break; - } - } - - return make_token (t, move (v)); + return token (t, sep, ln, cn, token_printer); }; // Handle attributes (do it first to make sure the flag is cleared @@ -240,32 +177,23 @@ namespace build2 // NOTE: remember to update mode() if adding new special characters. - if (m != lexer_mode::command_expansion) + switch (c) { - switch (c) + case '\n': { - case '\n': - { - // Expire variable value mode at the end of the line. - // - if (m == lexer_mode::variable_line) - state_.pop (); + // Expire variable value mode at the end of the line. + // + if (m == lexer_mode::variable_line) + state_.pop (); - sep = true; // Treat newline as always separated. - return make_token (type::newline); - } + sep = true; // Treat newline as always separated. + return make_token (type::newline); } - } - if (m != lexer_mode::here_line_single) - { - switch (c) - { - // Variable expansion, function call, and evaluation context. - // - case '$': return make_token (type::dollar); - case '(': return make_token (type::lparen); - } + // Variable expansion, function call, and evaluation context. + // + case '$': return make_token (type::dollar); + case '(': return make_token (type::lparen); } // Line separators. @@ -313,133 +241,14 @@ namespace build2 } } - // Command operators/separators. + // Command operators. // if (m == lexer_mode::command_line || m == lexer_mode::first_token || - m == lexer_mode::second_token || - m == lexer_mode::command_expansion) + m == lexer_mode::second_token) { - switch (c) - { - // |, || - // - case '|': - { - if (peek () == '|') - { - get (); - return make_token (type::log_or); - } - else - return make_token (type::pipe); - } - // &, && - // - case '&': - { - xchar p (peek ()); - - if (p == '&') - { - get (); - return make_token (type::log_and); - } - - // These modifiers are mutually exclusive so stop after seeing - // either one. - // - return make_token_with_modifiers (type::clean, "!?", "!?"); - } - // < - // - case '<': - { - type r (type::in_str); - xchar p (peek ()); - - if (p == '|' || p == '-' || p == '<') - { - get (); - - switch (p) - { - case '|': return make_token (type::in_pass); - case '-': return make_token (type::in_null); - case '<': - { - r = type::in_doc; - p = peek (); - - if (p == '<') - { - get (); - r = type::in_file; - } - break; - } - } - } - - // Handle modifiers. - // - const char* mods (nullptr); - switch (r) - { - case type::in_str: - case type::in_doc: mods = ":/"; break; - } - - return make_token_with_modifiers (r, mods); - } - // > - // - case '>': - { - type r (type::out_str); - xchar p (peek ()); - - if (p == '|' || p == '-' || p == '!' || p == '&' || - p == '=' || p == '+' || p == '>') - { - get (); - - switch (p) - { - case '|': return make_token (type::out_pass); - case '-': return make_token (type::out_null); - case '!': return make_token (type::out_trace); - case '&': return make_token (type::out_merge); - case '=': return make_token (type::out_file_ovr); - case '+': return make_token (type::out_file_app); - case '>': - { - r = type::out_doc; - p = peek (); - - if (p == '>') - { - get (); - r = type::out_file_cmp; - } - break; - } - } - } - - // Handle modifiers. - // - const char* mods (nullptr); - const char* stop (nullptr); - switch (r) - { - case type::out_str: - case type::out_doc: mods = ":/~"; stop = "~"; break; - } - - return make_token_with_modifiers (r, mods, stop); - } - } + if (optional<token> t = next_cmd_op (c, sep)) + return move (*t); } // Dot, plus/minus, and left/right curly braces. diff --git a/libbuild2/test/script/lexer.hxx b/libbuild2/test/script/lexer.hxx index 5763e3b..452e794 100644 --- a/libbuild2/test/script/lexer.hxx +++ b/libbuild2/test/script/lexer.hxx @@ -7,7 +7,7 @@ #include <libbuild2/types.hxx> #include <libbuild2/utility.hxx> -#include <libbuild2/lexer.hxx> +#include <libbuild2/script/lexer.hxx> #include <libbuild2/test/script/token.hxx> @@ -17,9 +17,9 @@ namespace build2 { namespace script { - struct lexer_mode: build2::lexer_mode + struct lexer_mode: build2::script::lexer_mode { - using base_type = build2::lexer_mode; + using base_type = build2::script::lexer_mode; enum { @@ -27,22 +27,18 @@ namespace build2 first_token, // Expires at the end of the token. second_token, // Expires at the end of the token. variable_line, // Expires at the end of the line. - command_expansion, - here_line_single, - here_line_double, description_line // Expires at the end of the line. }; lexer_mode () = default; lexer_mode (value_type v): base_type (v) {} - lexer_mode (base_type v): base_type (v) {} + lexer_mode (build2::lexer_mode v): base_type (v) {} }; - class lexer: public build2::lexer + class lexer: public build2::script::lexer { public: - using base_lexer = build2::lexer; - using base_mode = build2::lexer_mode; + using base_lexer = build2::script::lexer; // Note that neither the name nor escape arguments are copied. // @@ -52,28 +48,25 @@ namespace build2 const char* escapes = nullptr) : base_lexer (is, name, 1 /* line */, nullptr /* escapes */, - false /* set_mode */) + false /* set_mode */, + redirect_aliases) { mode (m, '\0', escapes); } virtual void - mode (base_mode, + mode (build2::lexer_mode, char = '\0', - optional<const char*> = nullopt) override; - - // Number of quoted (double or single) tokens since last reset. - // - size_t - quoted () const {return quoted_;} - - void - reset_quoted (size_t q) {quoted_ = q;} + optional<const char*> = nullopt, + uintptr_t = 0) override; virtual token next () override; - protected: + public: + static redirect_aliases_type redirect_aliases; + + private: token next_line (); @@ -82,9 +75,6 @@ namespace build2 virtual token word (state, bool) override; - - protected: - size_t quoted_; }; } } diff --git a/libbuild2/test/script/lexer.test.cxx b/libbuild2/test/script/lexer.test.cxx index 1512e58..9c64616 100644 --- a/libbuild2/test/script/lexer.test.cxx +++ b/libbuild2/test/script/lexer.test.cxx @@ -32,9 +32,6 @@ namespace build2 else if (s == "first-token") m = lexer_mode::first_token; else if (s == "second-token") m = lexer_mode::second_token; else if (s == "variable-line") m = lexer_mode::variable_line; - else if (s == "command-expansion") m = lexer_mode::command_expansion; - else if (s == "here-line-single") m = lexer_mode::here_line_single; - else if (s == "here-line-double") m = lexer_mode::here_line_double; else if (s == "description-line") m = lexer_mode::description_line; else if (s == "variable") m = lexer_mode::variable; else assert (false); @@ -46,14 +43,10 @@ namespace build2 // Some modes auto-expire so we need something underneath. // - bool u (m == lexer_mode::first_token || - m == lexer_mode::second_token || - m == lexer_mode::variable_line || - m == lexer_mode::description_line || - m == lexer_mode::variable); + bool u (m != lexer_mode::command_line); path_name in ("<stdin>"); - lexer l (cin, in, u ? lexer_mode::command_line : m); + lexer l (cin, in, lexer_mode::command_line); if (u) l.mode (m); @@ -63,7 +56,7 @@ namespace build2 { // Print each token on a separate line without quoting operators. // - t.printer (cout, t, false); + t.printer (cout, t, print_mode::normal); cout << endl; } } diff --git a/libbuild2/test/script/parser+exit.test.testscript b/libbuild2/test/script/parser+exit.test.testscript index c6327df..44728a5 100644 --- a/libbuild2/test/script/parser+exit.test.testscript +++ b/libbuild2/test/script/parser+exit.test.testscript @@ -22,5 +22,5 @@ EOO $* <<EOI 2>>EOE != 0 cmd != 1 <"foo" EOI -testscript:1:10: error: unexpected '<' after command exit status +testscript:1:10: error: expected newline instead of '<' EOE diff --git a/libbuild2/test/script/parser+redirect.test.testscript b/libbuild2/test/script/parser+redirect.test.testscript index 3858808..79530e0 100644 --- a/libbuild2/test/script/parser+redirect.test.testscript +++ b/libbuild2/test/script/parser+redirect.test.testscript @@ -49,7 +49,7 @@ : portable-path : $* <<EOI >>EOO - cmd <</EOI_ >/EOO_ 2>/EOE_ + cmd <</EOI_ >>/EOO_ 2>>/EOE_ foo EOI_ bar @@ -57,7 +57,7 @@ baz EOE_ EOI - cmd <</EOI_ >/EOO_ 2>/EOE_ + cmd <</EOI_ >>/EOO_ 2>>/EOE_ foo EOI_ bar @@ -113,13 +113,13 @@ : portable-path : $* <<EOI >>EOO - cmd >/~%EOF% 2>/~%EOE% + cmd >>/~%EOF% 2>>/~%EOE% foo EOF bar EOE EOI - cmd >/~%EOF% 2>/~%EOE% + cmd >>/~%EOF% 2>>/~%EOE% foo EOF bar diff --git a/libbuild2/test/script/parser+regex.test.testscript b/libbuild2/test/script/parser+regex.test.testscript index 8627304..db418b3 100644 --- a/libbuild2/test/script/parser+regex.test.testscript +++ b/libbuild2/test/script/parser+regex.test.testscript @@ -162,9 +162,12 @@ EOE EOO - : no-newline + : no-newline-str : $* <'cmd >:~/fo*/' >'cmd >:~/fo*/' + + : no-newline-doc + : $* <<EOI >>EOO cmd 2>>:~/EOE/ foo diff --git a/libbuild2/test/script/parser+variable.test.testscript b/libbuild2/test/script/parser+variable.test.testscript new file mode 100644 index 0000000..3751a5f --- /dev/null +++ b/libbuild2/test/script/parser+variable.test.testscript @@ -0,0 +1,19 @@ +# file : libbuild2/test/script/parser+variable.test.testscript +# license : MIT; see accompanying LICENSE file + +: assignment +: +$* <<EOI >>EOO +a = b +echo $a +EOI +echo b +EOO + +: empty-name +: +$* <<EOI 2>>EOE != 0 += b +EOI +testscript:1:1: error: missing variable name +EOE diff --git a/libbuild2/test/script/parser.cxx b/libbuild2/test/script/parser.cxx index 06cefc7..f663c11 100644 --- a/libbuild2/test/script/parser.cxx +++ b/libbuild2/test/script/parser.cxx @@ -3,8 +3,6 @@ #include <libbuild2/test/script/parser.hxx> -#include <sstream> - #include <libbuild2/context.hxx> // sched, keep_going #include <libbuild2/test/script/lexer.hxx> @@ -316,7 +314,7 @@ namespace build2 // Determine the line type/start token. // line_type lt; - type st (type::eos); + type st (type::eos); // Later, can only be set to plus or minus. switch (tt) { @@ -372,51 +370,7 @@ namespace build2 } default: { - // Either variable assignment or test command. - // - replay_save (); // Start saving tokens from the current one. - next (t, tt); - - // Decide whether this is a variable assignment or a command. - // - // It is an assignment if the first token is an unquoted name and - // the next token is an assign/append/prepend operator. Assignment - // to a computed variable name must use the set builtin. - // - // Note also thatspecial commands take precedence over variable - // assignments. - // - lt = line_type::cmd; // Default. - - if (tt == type::word && t.qtype == quote_type::unquoted) - { - const string& n (t.value); - - if (n == "if") lt = line_type::cmd_if; - else if (n == "if!") lt = line_type::cmd_ifn; - else if (n == "elif") lt = line_type::cmd_elif; - else if (n == "elif!") lt = line_type::cmd_elifn; - else if (n == "else") lt = line_type::cmd_else; - else if (n == "end") lt = line_type::cmd_end; - else - { - // Switch the recognition of leading variable assignments for - // the next token. This is safe to do because we know we - // cannot be in the quoted mode (since the current token is - // not quoted). - // - type p (peek (lexer_mode::second_token)); - - if (p == type::assign || - p == type::prepend || - p == type::append) - { - lt = line_type::var; - st = p; - } - } - } - + lt = pre_parse_line_start (t, tt, lexer_mode::second_token); break; } } @@ -435,7 +389,7 @@ namespace build2 // string& n (t.value); - if (n == "*" || n == "~" || n == "@" || digit (n)) + if (special_variable (n)) fail (t) << "attempt to set '" << n << "' variable directly"; // Pre-enter the variables now while we are executing serially. @@ -444,6 +398,11 @@ namespace build2 ln.var = &script_->var_pool.insert (move (n)); next (t, tt); // Assignment kind. + + // We cannot reuse the value mode since it will recognize `{` + // which we want to treat as a literal. + // + mode (lexer_mode::variable_line); parse_variable_line (t, tt); semi = (tt == type::semi); @@ -469,7 +428,7 @@ namespace build2 pair<command_expr, here_docs> p; if (lt != line_type::cmd_else && lt != line_type::cmd_end) - p = parse_command_expr (t, tt); + p = parse_command_expr (t, tt, lexer::redirect_aliases); // Colon and semicolon are only valid in test command lines and // after 'end' in if-else. Note that we still recognize them @@ -1039,7 +998,7 @@ namespace build2 const path_name* op (path_); path_ = &pn; - lexer* ol (lexer_); + build2::script::lexer* ol (lexer_); set_lexer (&l); string oip (id_prefix_); @@ -1281,35 +1240,6 @@ namespace build2 return r; } - value parser:: - parse_variable_line (token& t, type& tt) - { - // enter: assignment - // leave: newline or semi - - // We cannot reuse the value mode since it will recognize `{` which we - // want to treat as a literal. - // - mode (lexer_mode::variable_line); - next_with_attributes (t, tt); - - // Parse value attributes if any. Note that it's ok not to have - // anything after the attributes (e.g., foo=[null]). - // - attributes_push (t, tt, true); - - // @@ PAT: Should we expand patterns? Note that it will only be - // simple ones since we have disabled {}. Also, what would be the - // pattern base directory? - // - return tt != type::newline && tt != type::semi - ? parse_value (t, tt, - pattern_mode::ignore, - "variable value", - nullptr) - : value (names ()); - } - command_expr parser:: parse_command_line (token& t, type& tt) { @@ -1318,7 +1248,8 @@ namespace build2 // Note: this one is only used during execution. - pair<command_expr, here_docs> p (parse_command_expr (t, tt)); + pair<command_expr, here_docs> p ( + parse_command_expr (t, tt, lexer::redirect_aliases)); switch (tt) { @@ -1334,1542 +1265,6 @@ namespace build2 return move (p.first); } - // Parse the regular expression representation (non-empty string value - // framed with introducer characters and optionally followed by flag - // characters from the {di} set, for example '/foo/id') into - // components. Also return end-of-parsing position if requested, - // otherwise treat any unparsed characters left as an error. - // - struct regex_parts - { - string value; - char intro; - string flags; // Combination of characters from {di} set. - - // Create a special empty object. - // - regex_parts (): intro ('\0') {} - - regex_parts (string v, char i, string f) - : value (move (v)), intro (i), flags (move (f)) {} - }; - - static regex_parts - parse_regex (const string& s, - const location& l, - const char* what, - size_t* end = nullptr) - { - if (s.empty ()) - fail (l) << "no introducer character in " << what; - - size_t p (s.find (s[0], 1)); // Find terminating introducer. - - if (p == string::npos) - fail (l) << "no closing introducer character in " << what; - - size_t rn (p - 1); // Regex length. - if (rn == 0) - fail (l) << what << " is empty"; - - // Find end-of-flags position. - // - size_t fp (++p); // Save flags starting position. - for (char c; (c = s[p]) == 'd' || c == 'i'; ++p) ; - - // If string end is not reached then report invalid flags, unless - // end-of-parsing position is requested (which means regex is just a - // prefix). - // - if (s[p] != '\0' && end == nullptr) - fail (l) << "junk at the end of " << what; - - if (end != nullptr) - *end = p; - - return regex_parts (string (s, 1, rn), s[0], string (s, fp, p - fp)); - } - - pair<command_expr, parser::here_docs> parser:: - parse_command_expr (token& t, type& tt) - { - // enter: first token of the command line - // leave: <newline> - - command_expr expr; - - // OR-ed to an implied false for the first term. - // - expr.push_back ({expr_operator::log_or, command_pipe ()}); - - command c; // Command being assembled. - - // Make sure the command makes sense. - // - auto check_command = [&c, this] (const location& l, bool last) - { - if (c.out.type == redirect_type::merge && - c.err.type == redirect_type::merge) - fail (l) << "stdout and stderr redirected to each other"; - - if (!last && c.out.type != redirect_type::none) - fail (l) << "stdout is both redirected and piped"; - }; - - // Check that the introducer character differs from '/' if the - // portable path modifier is specified. Must be called before - // parse_regex() (see below) to make sure its diagnostics is - // meaningful. - // - // Note that the portable path modifier assumes '/' to be a valid - // regex character and so makes it indistinguishable from the - // terminating introducer. - // - auto check_regex_mod = [this] (const string& mod, - const string& re, - const location& l, - const char* what) - { - // Handles empty regex properly. - // - if (mod.find ('/') != string::npos && re[0] == '/') - fail (l) << "portable path modifier and '/' introducer in " - << what; - }; - - // Pending positions where the next word should go. - // - enum class pending - { - none, - program, - in_string, - in_document, - in_file, - out_merge, - out_string, - out_str_regex, - out_document, - out_doc_regex, - out_file, - err_merge, - err_string, - err_str_regex, - err_document, - err_doc_regex, - err_file, - clean - }; - pending p (pending::program); - string mod; // Modifiers for pending in_* and out_* positions. - here_docs hd; // Expected here-documents. - - // Add the next word to either one of the pending positions or to - // program arguments by default. - // - auto add_word = [&c, &p, &mod, &check_regex_mod, this] ( - string&& w, const location& l) - { - auto add_merge = [&l, this] (redirect& r, const string& w, int fd) - { - try - { - size_t n; - if (stoi (w, &n) == fd && n == w.size ()) - { - r.fd = fd; - return; - } - } - catch (const exception&) {} // Fall through. - - fail (l) << (fd == 1 ? "stderr" : "stdout") << " merge redirect " - << "file descriptor must be " << fd; - }; - - auto add_here_str = [] (redirect& r, string&& w) - { - if (r.modifiers.find (':') == string::npos) - w += '\n'; - r.str = move (w); - }; - - auto add_here_str_regex = [&l, &check_regex_mod] ( - redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 1: what = "stdout regex redirect"; break; - case 2: what = "stderr regex redirect"; break; - } - - check_regex_mod (r.modifiers, w, l, what); - - regex_parts rp (parse_regex (w, l, what)); - - regex_lines& re (r.regex); - re.intro = rp.intro; - - re.lines.emplace_back ( - l.line, l.column, move (rp.value), move (rp.flags)); - - // Add final blank line unless suppressed. - // - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - if (r.modifiers.find (':') == string::npos) - re.lines.emplace_back (l.line, l.column, string (), false); - }; - - auto parse_path = [&l, this] (string&& w, const char* what) -> path - { - try - { - path p (move (w)); - - if (!p.empty ()) - { - p.normalize (); - return p; - } - - fail (l) << "empty " << what << endf; - } - catch (const invalid_path& e) - { - fail (l) << "invalid " << what << " '" << e.path << "'" << endf; - } - }; - - auto add_file = [&parse_path] (redirect& r, int fd, string&& w) - { - const char* what (nullptr); - switch (fd) - { - case 0: what = "stdin redirect path"; break; - case 1: what = "stdout redirect path"; break; - case 2: what = "stderr redirect path"; break; - } - - r.file.path = parse_path (move (w), what); - }; - - switch (p) - { - case pending::none: c.arguments.push_back (move (w)); break; - case pending::program: - c.program = parse_path (move (w), "program path"); - break; - - case pending::out_merge: add_merge (c.out, w, 2); break; - case pending::err_merge: add_merge (c.err, w, 1); break; - - case pending::in_string: add_here_str (c.in, move (w)); break; - case pending::out_string: add_here_str (c.out, move (w)); break; - case pending::err_string: add_here_str (c.err, move (w)); break; - - case pending::out_str_regex: - { - add_here_str_regex (c.out, 1, move (w)); - break; - } - case pending::err_str_regex: - { - add_here_str_regex (c.err, 2, move (w)); - break; - } - - // These are handled specially below. - // - case pending::in_document: - case pending::out_document: - case pending::err_document: - case pending::out_doc_regex: - case pending::err_doc_regex: assert (false); break; - - case pending::in_file: add_file (c.in, 0, move (w)); break; - case pending::out_file: add_file (c.out, 1, move (w)); break; - case pending::err_file: add_file (c.err, 2, move (w)); break; - - case pending::clean: - { - cleanup_type t; - switch (mod[0]) // Ok, if empty - { - case '!': t = cleanup_type::never; break; - case '?': t = cleanup_type::maybe; break; - default: t = cleanup_type::always; break; - } - - c.cleanups.push_back ( - {t, parse_path (move (w), "cleanup path")}); - break; - } - } - - p = pending::none; - mod.clear (); - }; - - // Make sure we don't have any pending positions to fill. - // - auto check_pending = [&p, this] (const location& l) - { - const char* what (nullptr); - - switch (p) - { - case pending::none: break; - case pending::program: what = "program"; break; - case pending::in_string: what = "stdin here-string"; break; - case pending::in_document: what = "stdin here-document end"; break; - case pending::in_file: what = "stdin file"; break; - case pending::out_merge: what = "stdout file descriptor"; break; - case pending::out_string: what = "stdout here-string"; break; - case pending::out_document: what = "stdout here-document end"; break; - case pending::out_file: what = "stdout file"; break; - case pending::err_merge: what = "stderr file descriptor"; break; - case pending::err_string: what = "stderr here-string"; break; - case pending::err_document: what = "stderr here-document end"; break; - case pending::err_file: what = "stderr file"; break; - case pending::clean: what = "cleanup path"; break; - - case pending::out_str_regex: - { - what = "stdout here-string regex"; - break; - } - case pending::err_str_regex: - { - what = "stderr here-string regex"; - break; - } - case pending::out_doc_regex: - { - what = "stdout here-document regex end"; - break; - } - case pending::err_doc_regex: - { - what = "stderr here-document regex end"; - break; - } - } - - if (what != nullptr) - fail (l) << "missing " << what; - }; - - // Parse the redirect operator. - // - auto parse_redirect = - [&c, &expr, &p, &mod, &hd, this] (token& t, const location& l) - { - // Our semantics is the last redirect seen takes effect. - // - assert (p == pending::none && mod.empty ()); - - // See if we have the file descriptor. - // - unsigned long fd (3); - if (!t.separated) - { - if (c.arguments.empty ()) - fail (l) << "missing redirect file descriptor"; - - const string& s (c.arguments.back ()); - - try - { - size_t n; - fd = stoul (s, &n); - - if (n != s.size () || fd > 2) - throw invalid_argument (string ()); - } - catch (const exception&) - { - fail (l) << "invalid redirect file descriptor '" << s << "'"; - } - - c.arguments.pop_back (); - } - - type tt (t.type); - - // Validate/set default file descriptor. - // - switch (tt) - { - case type::in_pass: - case type::in_null: - case type::in_str: - case type::in_doc: - case type::in_file: - { - if ((fd = fd == 3 ? 0 : fd) != 0) - fail (l) << "invalid in redirect file descriptor " << fd; - - if (!expr.back ().pipe.empty ()) - fail (l) << "stdin is both piped and redirected"; - - break; - } - case type::out_pass: - case type::out_null: - case type::out_trace: - case type::out_merge: - case type::out_str: - case type::out_doc: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - if ((fd = fd == 3 ? 1 : fd) == 0) - fail (l) << "invalid out redirect file descriptor " << fd; - - break; - } - } - - mod = move (t.value); - - redirect_type rt (redirect_type::none); - switch (tt) - { - case type::in_pass: - case type::out_pass: rt = redirect_type::pass; break; - - case type::in_null: - case type::out_null: rt = redirect_type::null; break; - - case type::out_trace: rt = redirect_type::trace; break; - - case type::out_merge: rt = redirect_type::merge; break; - - case type::in_str: - case type::out_str: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_str || !re); - - rt = re - ? redirect_type::here_str_regex - : redirect_type::here_str_literal; - - break; - } - - case type::in_doc: - case type::out_doc: - { - bool re (mod.find ('~') != string::npos); - assert (tt == type::out_doc || !re); - - rt = re - ? redirect_type::here_doc_regex - : redirect_type::here_doc_literal; - - break; - } - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: rt = redirect_type::file; break; - } - - redirect& r (fd == 0 ? c.in : fd == 1 ? c.out : c.err); - redirect_type overriden (r.type); - - r = redirect (rt); - - // Don't move as still may be used for pending here-document end - // marker processing. - // - r.modifiers = mod; - - switch (rt) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - break; - case redirect_type::merge: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_merge; break; - case 2: p = pending::err_merge; break; - } - break; - case redirect_type::here_str_literal: - switch (fd) - { - case 0: p = pending::in_string; break; - case 1: p = pending::out_string; break; - case 2: p = pending::err_string; break; - } - break; - case redirect_type::here_str_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_str_regex; break; - case 2: p = pending::err_str_regex; break; - } - break; - case redirect_type::here_doc_literal: - switch (fd) - { - case 0: p = pending::in_document; break; - case 1: p = pending::out_document; break; - case 2: p = pending::err_document; break; - } - break; - case redirect_type::here_doc_regex: - switch (fd) - { - case 0: assert (false); break; - case 1: p = pending::out_doc_regex; break; - case 2: p = pending::err_doc_regex; break; - } - break; - case redirect_type::file: - switch (fd) - { - case 0: p = pending::in_file; break; - case 1: p = pending::out_file; break; - case 2: p = pending::err_file; break; - } - - // Also sets for stdin, but this is harmless. - // - r.file.mode = tt == type::out_file_ovr - ? redirect_fmode::overwrite - : (tt == type::out_file_app - ? redirect_fmode::append - : redirect_fmode::compare); - - break; - - case redirect_type::here_doc_ref: assert (false); break; - } - - // If we are overriding a here-document, then remove the reference - // to this command redirect from the corresponding here_doc object. - // - if (!pre_parse_ && - (overriden == redirect_type::here_doc_literal || - overriden == redirect_type::here_doc_regex)) - { - size_t e (expr.size () - 1); - size_t p (expr.back ().pipe.size ()); - int f (static_cast<int> (fd)); - - for (here_doc& d: hd) - { - small_vector<here_redirect, 2>& rs (d.redirects); - - auto i (find_if (rs.begin (), rs.end (), - [e, p, f] (const here_redirect& r) - { - return r.expr == e && - r.pipe == p && - r.fd == f; - })); - - if (i != rs.end ()) - { - rs.erase (i); - break; - } - } - } - }; - - // Set pending cleanup type. - // - auto parse_clean = [&p, &mod] (token& t) - { - p = pending::clean; - mod = move (t.value); - }; - - const location ll (get_location (t)); // Line location. - - // Keep parsing chunks of the command line until we see one of the - // "terminators" (newline, semicolon, exit status comparison, etc). - // - location l (ll); - names ns; // Reuse to reduce allocations. - - for (bool done (false); !done; l = get_location (t)) - { - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - { - done = true; - break; - } - - case type::equal: - case type::not_equal: - { - if (!pre_parse_) - check_pending (l); - - c.exit = parse_command_exit (t, tt); - - // Only a limited set of things can appear after the exit status - // so we check this here. - // - switch (tt) - { - case type::semi: - case type::colon: - case type::newline: - - case type::pipe: - case type::log_or: - case type::log_and: - break; - default: - fail (t) << "unexpected " << t << " after command exit status"; - } - - break; - } - - case type::pipe: - case type::log_or: - case type::log_and: - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - - case type::clean: - { - if (pre_parse_) - { - // The only things we need to handle here are the here-document - // and here-document regex end markers since we need to know - // how many of them to pre-parse after the command. - // - switch (tt) - { - case type::in_doc: - case type::out_doc: - mod = move (t.value); - - bool re (mod.find ('~') != string::npos); - const char* what (re - ? "here-document regex end marker" - : "here-document end marker"); - - // We require the end marker to be a literal, unquoted word. - // In particularm, we don't allow quoted because of cases - // like foo"$bar" (where we will see word 'foo'). - // - next (t, tt); - - // We require the end marker to be an unquoted or completely - // quoted word. The complete quoting becomes important for - // cases like foo"$bar" (where we will see word 'foo'). - // - // For good measure we could have also required it to be - // separated from the following token, but out grammar - // allows one to write >>EOO;. The problematic sequence - // would be >>FOO$bar -- on reparse it will be expanded - // as a single word. - // - if (tt != type::word || t.value.empty ()) - fail (t) << "expected " << what; - - peek (); - const token& p (peeked ()); - if (!p.separated) - { - switch (p.type) - { - case type::dollar: - case type::lparen: - fail (p) << what << " must be literal"; - } - } - - quote_type qt (t.qtype); - switch (qt) - { - case quote_type::unquoted: - qt = quote_type::single; // Treat as single-quoted. - break; - case quote_type::single: - case quote_type::double_: - if (t.qcomp) - break; - // Fall through. - case quote_type::mixed: - fail (t) << "partially-quoted " << what; - } - - regex_parts r; - string end (move (t.value)); - - if (re) - { - check_regex_mod (mod, end, l, what); - - r = parse_regex (end, l, what); - end = move (r.value); // The "cleared" end marker. - } - - bool literal (qt == quote_type::single); - bool shared (false); - - for (const auto& d: hd) - { - if (d.end == end) - { - auto check = [&t, &end, &re, this] (bool c, - const char* what) - { - if (!c) - fail (t) << "different " << what - << " for shared here-document " - << (re ? "regex '" : "'") << end << "'"; - }; - - check (d.modifiers == mod, "modifiers"); - check (d.literal == literal, "quoting"); - - if (re) - { - check (d.regex == r.intro, "introducers"); - check (d.regex_flags == r.flags, "global flags"); - } - - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {}, - move (end), - literal, - move (mod), - r.intro, move (r.flags)}); - - break; - } - - next (t, tt); - break; - } - - // If this is one of the operators/separators, check that we - // don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the inner loop below. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::in_doc: - case type::out_str: - case type::out_doc: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - default: assert (false); break; - } - - next (t, tt); - break; - } - default: - { - // Here-document end markers are literal (we verified that above - // during pre-parsing) and we need to know whether they were - // quoted. So handle this case specially. - // - { - int fd; - switch (p) - { - case pending::in_document: fd = 0; break; - case pending::out_document: - case pending::out_doc_regex: fd = 1; break; - case pending::err_document: - case pending::err_doc_regex: fd = 2; break; - default: fd = -1; break; - } - - if (fd != -1) - { - here_redirect rd { - expr.size () - 1, expr.back ().pipe.size (), fd}; - - string end (move (t.value)); - - regex_parts r; - - if (p == pending::out_doc_regex || - p == pending::err_doc_regex) - { - // We can't fail here as we already parsed all the end - // markers during pre-parsing stage, and so no need in the - // description. - // - r = parse_regex (end, l, ""); - end = move (r.value); // The "cleared" end marker. - } - - bool shared (false); - for (auto& d: hd) - { - // No need to check that redirects that share here-document - // have the same modifiers, etc. That have been done during - // pre-parsing. - // - if (d.end == end) - { - d.redirects.emplace_back (rd); - shared = true; - break; - } - } - - if (!shared) - hd.push_back ( - here_doc { - {rd}, - move (end), - (t.qtype == quote_type::unquoted || - t.qtype == quote_type::single), - move (mod), - r.intro, move (r.flags)}); - - p = pending::none; - mod.clear (); - - next (t, tt); - break; - } - } - - // Parse the next chunk as simple names to get expansion, etc. - // Note that we do it in the chunking mode to detect whether - // anything in each chunk is quoted. - // - // @@ PAT: should we support pattern expansion? This is even - // fuzzier than the variable case above. Though this is the - // shell semantics. Think what happens when we do rm *.txt? - // - reset_quoted (t); - parse_names (t, tt, - ns, - pattern_mode::ignore, - true, - "command line", - nullptr); - - if (pre_parse_) // Nothing else to do if we are pre-parsing. - break; - - // Process what we got. Determine whether anything inside was - // quoted (note that the current token is "next" and is not part - // of this). - // - bool q ((quoted () - - (t.qtype != quote_type::unquoted ? 1 : 0)) != 0); - - for (name& n: ns) - { - string s; - - try - { - s = value_traits<string>::convert (move (n), nullptr); - } - catch (const invalid_argument&) - { - diag_record dr (fail (l)); - dr << "invalid string value "; - to_stream (dr.os, n, true); // Quote. - } - - // If it is a quoted chunk, then we add the word as is. - // Otherwise we re-lex it. But if the word doesn't contain any - // interesting characters (operators plus quotes/escapes), - // then no need to re-lex. - // - // NOTE: update quoting (script.cxx:to_stream_q()) if adding - // any new characters. - // - if (q || s.find_first_of ("|&<>\'\"\\") == string::npos) - add_word (move (s), l); - else - { - // If the chunk re-parsing results in error, our diagnostics - // will look like this: - // - // <string>:1:4: error: stdout merge redirect file descriptor must be 2 - // testscript:2:5: info: while parsing string '1>&a' - // - auto df = make_diag_frame ( - [s, &l](const diag_record& dr) - { - dr << info (l) << "while parsing string '" << s << "'"; - }); - - // When re-lexing we do "effective escaping" and only for - // ['"\] (quotes plus the backslash itself). In particular, - // there is no way to escape redirects, operators, etc. The - // idea is to prefer quoting except for passing literal - // quotes, for example: - // - // args = \"&foo\" - // cmd $args # cmd &foo - // - // args = 'x=\"foo bar\"' - // cmd $args # cmd x="foo bar" - // - - istringstream is (s); - path_name in ("<string>"); - lexer lex (is, in, - lexer_mode::command_expansion, - "\'\"\\"); - - // Treat the first "sub-token" as always separated from what - // we saw earlier. - // - // Note that this is not "our" token so we cannot do - // fail(t). Rather we should do fail(l). - // - token t (lex.next ()); - location l (build2::get_location (t, in)); - t.separated = true; - - string w; - bool f (t.type == type::eos); // If the whole thing is empty. - - for (; t.type != type::eos; t = lex.next ()) - { - type tt (t.type); - l = build2::get_location (t, in); - - // Re-lexing double-quotes will recognize $, ( inside as - // tokens so we have to reverse them back. Since we don't - // treat spaces as separators we can be sure we will get - // it right. - // - switch (tt) - { - case type::dollar: w += '$'; continue; - case type::lparen: w += '('; continue; - } - - // Retire the current word. We need to distinguish between - // empty and non-existent (e.g., > vs >""). - // - if (!w.empty () || f) - { - add_word (move (w), l); - f = false; - } - - if (tt == type::word) - { - w = move (t.value); - f = true; - continue; - } - - // If this is one of the operators/separators, check that - // we don't have any pending locations to be filled. - // - check_pending (l); - - // Note: there is another one in the outer loop above. - // - switch (tt) - { - case type::pipe: - case type::log_or: - case type::log_and: - { - // Check that the previous command makes sense. - // - check_command (l, tt != type::pipe); - expr.back ().pipe.push_back (move (c)); - - c = command (); - p = pending::program; - - if (tt != type::pipe) - { - expr_operator o (tt == type::log_or - ? expr_operator::log_or - : expr_operator::log_and); - expr.push_back ({o, command_pipe ()}); - } - - break; - } - - case type::in_pass: - case type::out_pass: - - case type::in_null: - case type::out_null: - - case type::out_trace: - - case type::out_merge: - - case type::in_str: - case type::out_str: - - case type::in_file: - case type::out_file_cmp: - case type::out_file_ovr: - case type::out_file_app: - { - parse_redirect (t, l); - break; - } - - case type::clean: - { - parse_clean (t); - break; - } - - case type::in_doc: - case type::out_doc: - { - fail (l) << "here-document redirect in expansion"; - break; - } - } - } - - // Don't forget the last word. - // - if (!w.empty () || f) - add_word (move (w), l); - } - } - - ns.clear (); - break; - } - } - } - - if (!pre_parse_) - { - // Verify we don't have anything pending to be filled and the - // command makes sense. - // - check_pending (l); - check_command (l, true); - - expr.back ().pipe.push_back (move (c)); - } - - return make_pair (move (expr), move (hd)); - } - - command_exit parser:: - parse_command_exit (token& t, type& tt) - { - // enter: equal/not_equal - // leave: token after exit status (one parse_names() chunk) - - exit_comparison comp (tt == type::equal - ? exit_comparison::eq - : exit_comparison::ne); - - // The next chunk should be the exit status. - // - next (t, tt); - location l (get_location (t)); - names ns (parse_names (t, tt, - pattern_mode::ignore, - true, - "exit status", - nullptr)); - unsigned long es (256); - - if (!pre_parse_) - { - try - { - if (ns.size () == 1 && ns[0].simple () && !ns[0].empty ()) - es = stoul (ns[0].value); - } - catch (const exception&) {} // Fall through. - - if (es > 255) - { - diag_record dr; - - dr << fail (l) << "expected exit status instead of "; - to_stream (dr.os, ns, true); // Quote. - - dr << info << "exit status is an unsigned integer less than 256"; - } - } - - return command_exit {comp, static_cast<uint8_t> (es)}; - } - - void parser:: - parse_here_documents (token& t, type& tt, - pair<command_expr, here_docs>& p) - { - // enter: newline - // leave: newline - - // Parse here-document fragments in the order they were mentioned on - // the command line. - // - for (here_doc& h: p.second) - { - // Switch to the here-line mode which is like single/double-quoted - // string but recognized the newline as a separator. - // - mode (h.literal - ? lexer_mode::here_line_single - : lexer_mode::here_line_double); - next (t, tt); - - parsed_doc v ( - parse_here_document (t, tt, h.end, h.modifiers, h.regex)); - - // If all the here-document redirects are overridden, then we just - // drop the fragment. - // - if (!pre_parse_ && !h.redirects.empty ()) - { - auto i (h.redirects.cbegin ()); - - command& c (p.first[i->expr].pipe[i->pipe]); - redirect& r (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err); - - if (v.re) - { - assert (r.type == redirect_type::here_doc_regex); - - r.regex = move (v.regex); - r.regex.flags = move (h.regex_flags); - } - else - { - assert (r.type == redirect_type::here_doc_literal); - - r.str = move (v.str); - } - - r.end = move (h.end); - r.end_line = v.end_line; - r.end_column = v.end_column; - - // Note that our references cannot be invalidated because the - // command_expr/command-pipe vectors already contain all their - // elements. - // - for (++i; i != h.redirects.cend (); ++i) - { - command& c (p.first[i->expr].pipe[i->pipe]); - - (i->fd == 0 ? c.in : i->fd == 1 ? c.out : c.err) = - redirect (redirect_type::here_doc_ref, r); - } - } - - expire_mode (); - } - } - - parser::parsed_doc parser:: - parse_here_document (token& t, type& tt, - const string& em, - const string& mod, - char re) - { - // enter: first token on first line - // leave: newline (after end marker) - - // String literal. Note that when decide if to terminate the previously - // added line with a newline, we need to distinguish a yet empty result - // and the one that has a single blank line added. - // - optional<string> rs; - - regex_lines rre; - - // Here-documents can be indented. The leading whitespaces of the end - // marker line (called strip prefix) determine the indentation. Every - // other line in the here-document should start with this prefix which - // is automatically stripped. The only exception is a blank line. - // - // The fact that the strip prefix is only known at the end, after - // seeing all the lines, is rather inconvenient. As a result, the way - // we implement this is a bit hackish (though there is also something - // elegant about it): at the end of the pre-parse stage we are going - // re-examine the sequence of tokens that comprise this here-document - // and "fix up" the first token of each line by stripping the prefix. - // - string sp; - - // Remember the position of the first token in this here-document. - // - size_t ri (pre_parse_ ? replay_data_.size () - 1 : 0); - - // We will use the location of the first token on the line for the - // regex diagnostics. At the end of the loop it will point to the - // beginning of the end marker. - // - location l; - - while (tt != type::eos) - { - l = get_location (t); - - // Check if this is the end marker. For starters, it should be a - // single, unquoted word followed by a newline. - // - if (tt == type::word && - t.qtype == quote_type::unquoted && - peek () == type::newline) - { - const string& v (t.value); - - size_t vn (v.size ()); - size_t en (em.size ()); - - // Then check that it ends with the end marker. - // - if (vn >= en && v.compare (vn - en, en, em) == 0) - { - // Now check that the prefix only contains whitespaces. - // - size_t n (vn - en); - - if (v.find_first_not_of (" \t") >= n) - { - assert (pre_parse_ || n == 0); // Should have been stripped. - - if (n != 0) - sp.assign (v, 0, n); // Save the strip prefix. - - next (t, tt); // Get the newline. - break; - } - } - } - - // Expand the line (can be blank). - // - // @@ PAT: one could argue that if we do it in variables, then we - // should do it here as well. Though feels bizarre. - // - names ns (tt != type::newline - ? parse_names (t, tt, - pattern_mode::ignore, - false, - "here-document line", - nullptr) - : names ()); - - if (!pre_parse_) - { - // What shall we do if the expansion results in multiple names? - // For, example if the line contains just the variable expansion - // and it is of type strings. Adding all the elements space- - // separated seems like the natural thing to do. - // - string s; - for (auto b (ns.begin ()), i (b); i != ns.end (); ++i) - { - string n; - - try - { - n = value_traits<string>::convert (move (*i), nullptr); - } - catch (const invalid_argument&) - { - fail (l) << "invalid string value '" << *i << "'"; - } - - if (i == b) - s = move (n); - else - { - s += ' '; - s += n; - } - } - - if (!re) - { - // Add newline after previous line. - // - if (rs) - { - *rs += '\n'; - *rs += s; - } - else - rs = move (s); - } - else - { - // Due to expansion we can end up with multiple lines. If empty - // then will add a blank textual literal. - // - for (size_t p (0); p != string::npos; ) - { - string ln; - size_t np (s.find ('\n', p)); - - if (np != string::npos) - { - ln = string (s, p, np - p); - p = np + 1; - } - else - { - ln = string (s, p); - p = np; - } - - if (ln[0] != re) // Line doesn't start with regex introducer. - { - // This is a line-char literal (covers blank lines as well). - // - // Append textual literal. - // - rre.lines.emplace_back (l.line, l.column, move (ln), false); - } - else // Line starts with the regex introducer. - { - // This is a char-regex, or a sequence of line-regex syntax - // characters or both (in this specific order). So we will - // add regex (with optional special characters) or special - // literal. - // - size_t p (ln.find (re, 1)); - if (p == string::npos) - { - // No regex, just a sequence of syntax characters. - // - string spec (ln, 1); - if (spec.empty ()) - fail (l) << "no syntax line characters"; - - // Append special literal. - // - rre.lines.emplace_back ( - l.line, l.column, move (spec), true); - } - else - { - // Regex (probably with syntax characters). - // - regex_parts re; - - // Empty regex is a special case repesenting a blank line. - // - if (p == 1) - // Position to optional specal characters of an empty - // regex. - // - ++p; - else - // Can't fail as all the pre-conditions verified - // (non-empty with both introducers in place), so no - // description required. - // - re = parse_regex (ln, l, "", &p); - - // Append regex with optional special characters. - // - rre.lines.emplace_back (l.line, l.column, - move (re.value), move (re.flags), - string (ln, p)); - } - } - } - } - } - - // We should expand the whole line at once so this would normally be - // a newline but can also be an end-of-stream. - // - if (tt == type::newline) - next (t, tt); - else - assert (tt == type::eos); - } - - if (tt == type::eos) - fail (t) << "missing here-document end marker '" << em << "'"; - - if (pre_parse_) - { - // Strip the indentation prefix if there is one. - // - assert (replay_ == replay::save); - - if (!sp.empty ()) - { - size_t sn (sp.size ()); - - for (; ri != replay_data_.size (); ++ri) - { - token& rt (replay_data_[ri].token); - - if (rt.type == type::newline) // Blank - continue; - - if (rt.type != type::word || rt.value.compare (0, sn, sp) != 0) - fail (rt) << "unindented here-document line"; - - // If the word is equal to the strip prefix then we have to drop - // the token. Note that simply making it an empty word won't - // have the same semantics. For instance, it would trigger - // concatenated expansion. - // - if (rt.value.size () == sn) - replay_data_.erase (replay_data_.begin () + ri); - else - { - rt.value.erase (0, sn); - rt.column += sn; - ++ri; - } - - // Skip until next newline. - // - for (; replay_data_[ri].token.type != type::newline; ++ri) ; - } - } - } - else - { - // Add final newline unless suppressed. - // - if (mod.find (':') == string::npos) - { - if (re) - // Note that the position is synthetic, but that's ok as we don't - // expect any diagnostics to refer this line. - // - rre.lines.emplace_back (l.line, l.column, string (), false); - else if (rs) - *rs += '\n'; - else - rs = "\n"; - } - - // Finalize regex lines. - // - if (re) - { - // Empty regex matches nothing, so not of much use. - // - if (rre.lines.empty ()) - fail (l) << "empty here-document regex"; - - rre.intro = re; - } - } - - return re - ? parsed_doc (move (rre), l.line, l.column) - : parsed_doc (rs ? move (*rs) : string (), l.line, l.column); - } - // // Execute. // @@ -2927,20 +1322,99 @@ namespace build2 void parser:: exec_scope_body () { - size_t li (0); - runner_->enter (*scope_, scope_->start_loc_); + // Note that we rely on "small function object" optimization for the + // exec_*() lambdas. + // + auto exec_set = [this] (const variable& var, + token& t, build2::script::token_type& tt, + const location&) + { + next (t, tt); + type kind (tt); // Assignment kind. + + // We cannot reuse the value mode (see above for details). + // + mode (lexer_mode::variable_line); + value rhs (parse_variable_line (t, tt)); + + if (tt == type::semi) + next (t, tt); + + assert (tt == type::newline); + + // Assign. + // + value& lhs (kind == type::assign + ? scope_->assign (var) + : scope_->append (var)); + + apply_value_attributes (&var, lhs, move (rhs), kind); + + // If we change any of the test.* values, then reset the $*, $N + // special aliases. + // + if (var.name == script_->test_var.name || + var.name == script_->options_var.name || + var.name == script_->arguments_var.name || + var.name == script_->redirects_var.name || + var.name == script_->cleanups_var.name) + { + scope_->reset_special (); + } + }; + + // Is set later, right before the exec_lines() call. + // + command_type ct; + + auto exec_cmd = [&ct, this] (token& t, build2::script::token_type& tt, + size_t li, + bool single, + const location& ll) + { + // We use the 0 index to signal that this is the only command. + // Note that we only do this for test commands. + // + if (ct == command_type::test && single) + li = 0; + + command_expr ce ( + parse_command_line (t, static_cast<token_type&> (tt))); + + runner_->run (*scope_, ce, ct, li, ll); + }; + + auto exec_if = [this] (token& t, build2::script::token_type& tt, + size_t li, + const location& ll) + { + command_expr ce ( + parse_command_line (t, static_cast<token_type&> (tt))); + + // Assume if-else always involves multiple commands. + // + return runner_->run_if (*scope_, ce, li, ll); + }; + + size_t li (1); + if (test* t = dynamic_cast<test*> (scope_)) { - exec_lines ( - t->tests_.begin (), t->tests_.end (), li, command_type::test); + ct = command_type::test; + + exec_lines (t->tests_.begin (), t->tests_.end (), + exec_set, exec_cmd, exec_if, + li); } else if (group* g = dynamic_cast<group*> (scope_)) { - bool exec_scope ( - exec_lines ( - g->setup_.begin (), g->setup_.end (), li, command_type::setup)); + ct = command_type::setup; + + bool exec_scope (exec_lines (g->setup_.begin (), g->setup_.end (), + exec_set, exec_cmd, exec_if, + li)); if (exec_scope) { @@ -2998,7 +1472,7 @@ namespace build2 try { - take = runner_->run_if (*scope_, ce, ++li, ll); + take = runner_->run_if (*scope_, ce, li++, ll); } catch (const exit_scope& e) { @@ -3106,8 +1580,11 @@ namespace build2 } } - exec_lines ( - g->tdown_.begin (), g->tdown_.end (), li, command_type::teardown); + ct = command_type::teardown; + + exec_lines (g->tdown_.begin (), g->tdown_.end (), + exec_set, exec_cmd, exec_if, + li); } else assert (false); @@ -3117,239 +1594,23 @@ namespace build2 scope_->state = scope_state::passed; } - bool parser:: - exec_lines (lines::iterator i, lines::iterator e, - size_t& li, - command_type ct) - { - try - { - token t; - type tt; - - for (; i != e; ++i) - { - line& ln (*i); - line_type lt (ln.type); - - assert (path_ == nullptr); - - // Set the tokens and start playing. - // - replay_data (move (ln.tokens)); - - // We don't really need to change the mode since we already know - // the line type. - // - next (t, tt); - const location ll (get_location (t)); - - switch (lt) - { - case line_type::var: - { - // Parse. - // - string name (move (t.value)); - - next (t, tt); - type kind (tt); // Assignment kind. - - value rhs (parse_variable_line (t, tt)); - - if (tt == type::semi) - next (t, tt); - - assert (tt == type::newline); - - // Assign. - // - const variable& var (*ln.var); - - value& lhs (kind == type::assign - ? scope_->assign (var) - : scope_->append (var)); - - build2::parser::apply_value_attributes ( - &var, lhs, move (rhs), kind); - - // If we changes any of the test.* values, then reset the $*, - // $N special aliases. - // - if (var.name == script_->test_var.name || - var.name == script_->options_var.name || - var.name == script_->arguments_var.name || - var.name == script_->redirects_var.name || - var.name == script_->cleanups_var.name) - { - scope_->reset_special (); - } - - replay_stop (); - break; - } - case line_type::cmd: - { - // We use the 0 index to signal that this is the only command. - // Note that we only do this for test commands. - // - if (ct == command_type::test && li == 0) - { - lines::iterator j (i); - for (++j; j != e && j->type == line_type::var; ++j) ; - - if (j != e) // We have another command. - ++li; - } - else - ++li; - - command_expr ce (parse_command_line (t, tt)); - runner_->run (*scope_, ce, ct, li, ll); - - replay_stop (); - break; - } - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - { - next (t, tt); // Skip to start of command. - - bool take; - if (lt != line_type::cmd_else) - { - // Assume if-else always involves multiple commands. - // - command_expr ce (parse_command_line (t, tt)); - take = runner_->run_if (*scope_, ce, ++li, ll); - - if (lt == line_type::cmd_ifn || lt == line_type::cmd_elifn) - take = !take; - } - else - { - assert (tt == type::newline); - take = true; - } - - replay_stop (); - - // If end is true, then find the 'end' line. Otherwise, find - // the next if-else line. If skip is true then increment the - // command line index. - // - auto next = [e, &li] - (lines::iterator j, bool end, bool skip) -> lines::iterator - { - // We need to be aware of nested if-else chains. - // - size_t n (0); - - for (++j; j != e; ++j) - { - line_type lt (j->type); - - if (lt == line_type::cmd_if || - lt == line_type::cmd_ifn) - ++n; - - // If we are nested then we just wait until we get back - // to the surface. - // - if (n == 0) - { - switch (lt) - { - case line_type::cmd_elif: - case line_type::cmd_elifn: - case line_type::cmd_else: - if (end) break; - // Fall through. - case line_type::cmd_end: return j; - default: break; - } - } - - if (lt == line_type::cmd_end) - --n; - - if (skip) - { - // Note that we don't count else and end as commands. - // - switch (lt) - { - case line_type::cmd: - case line_type::cmd_if: - case line_type::cmd_ifn: - case line_type::cmd_elif: - case line_type::cmd_elifn: ++li; break; - default: break; - } - } - } - - assert (false); // Missing end. - return e; - }; - - // If we are taking this branch then we need to parse all the - // lines until the next if-else line and then skip all the - // lines until the end (unless next is already end). - // - // Otherwise, we need to skip all the lines until the next - // if-else line and then continue parsing. - // - if (take) - { - lines::iterator j (next (i, false, false)); // Next if-else. - if (!exec_lines (i + 1, j, li, ct)) - return false; - - i = j->type == line_type::cmd_end ? j : next (j, true, true); - } - else - { - i = next (i, false, true); - if (i->type != line_type::cmd_end) - --i; // Continue with this line (e.g., elif or else). - } - - break; - } - case line_type::cmd_end: - { - assert (false); - } - } - } - - return true; - } - catch (const exit_scope& e) - { - // Bail out if the scope is exited with the failure status. Otherwise - // leave the scope normally. - // - if (!e.status) - throw failed (); - - replay_stop (); - return false; - } - } - // // The rest. // + // When add a special variable don't forget to update lexer::word(). + // + bool parser:: + special_variable (const string& n) noexcept + { + return n == "*" || n == "~" || n == "@" || digit (n); + } + lookup parser:: lookup_variable (name&& qual, string&& name, const location& loc) { - assert (!pre_parse_); + if (pre_parse_) + return lookup (); if (!qual.empty ()) fail (loc) << "qualified variable name"; @@ -3381,40 +1642,6 @@ namespace build2 : script_->lookup_in_buildfile (name); } - size_t parser:: - quoted () const - { - size_t r (0); - - if (replay_ != replay::play) - r = lexer_->quoted (); - else - { - // Examine tokens we have replayed since last reset. - // - for (size_t i (replay_quoted_); i != replay_i_; ++i) - if (replay_data_[i].token.qtype != quote_type::unquoted) - ++r; - } - - return r; - } - - void parser:: - reset_quoted (token& cur) - { - if (replay_ != replay::play) - lexer_->reset_quoted (cur.qtype != quote_type::unquoted ? 1 : 0); - else - { - replay_quoted_ = replay_i_ - 1; - - // Must be the same token. - // - assert (replay_data_[replay_quoted_].token.qtype == cur.qtype); - } - } - const string& parser:: insert_id (string id, location l) { @@ -3426,76 +1653,6 @@ namespace build2 return p.first->first; } - - void parser:: - set_lexer (lexer* l) - { - lexer_ = l; - base_parser::lexer_ = l; - } - - void parser:: - apply_value_attributes (const variable* var, - value& lhs, - value&& rhs, - const string& attributes, - token_type kind, - const path_name& name) - { - path_ = &name; - - istringstream is (attributes); - lexer l (is, name, lexer_mode::attributes); - set_lexer (&l); - - token t; - type tt; - - next_with_attributes (t, tt); // Enable `[` recognition. - - if (tt != type::lsbrace && tt != type::eos) - fail (t) << "expected '[' instead of " << t; - - attributes_push (t, tt, true); - - if (tt != type::eos) - fail (t) << "trailing junk after ']'"; - - build2::parser::apply_value_attributes (var, lhs, move (rhs), kind); - } - - // parser::parsed_doc - // - parser::parsed_doc:: - parsed_doc (string s, uint64_t l, uint64_t c) - : str (move (s)), re (false), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (regex_lines&& r, uint64_t l, uint64_t c) - : regex (move (r)), re (true), end_line (l), end_column (c) - { - } - - parser::parsed_doc:: - parsed_doc (parsed_doc&& d) - : re (d.re), end_line (d.end_line), end_column (d.end_column) - { - if (re) - new (®ex) regex_lines (move (d.regex)); - else - new (&str) string (move (d.str)); - } - - parser::parsed_doc:: - ~parsed_doc () - { - if (re) - regex.~regex_lines (); - else - str.~string (); - } } } } diff --git a/libbuild2/test/script/parser.hxx b/libbuild2/test/script/parser.hxx index ed3c926..aa64943 100644 --- a/libbuild2/test/script/parser.hxx +++ b/libbuild2/test/script/parser.hxx @@ -8,9 +8,10 @@ #include <libbuild2/forward.hxx> #include <libbuild2/utility.hxx> -#include <libbuild2/parser.hxx> #include <libbuild2/diagnostics.hxx> +#include <libbuild2/script/parser.hxx> + #include <libbuild2/test/script/token.hxx> #include <libbuild2/test/script/script.hxx> @@ -20,15 +21,14 @@ namespace build2 { namespace script { - class lexer; class runner; - class parser: protected build2::parser + class parser: public build2::script::parser { // Pre-parse. Issue diagnostics and throw failed in case of an error. // public: - parser (context& c): build2::parser (c) {} + parser (context& c): build2::script::parser (c) {} void pre_parse (script&); @@ -36,19 +36,6 @@ namespace build2 void pre_parse (istream&, script&); - // Helpers. - // - // Parse attribute string and perform attribute-guided assignment. - // Issue diagnostics and throw failed in case of an error. - // - void - apply_value_attributes (const variable*, // Optional. - value& lhs, - value&& rhs, - const string& attributes, - token_type assign_kind, - const path_name&); // For diagnostics. - // Recursive descent parser. // // Usually (but not always) parse functions receive the token/type @@ -101,83 +88,14 @@ namespace build2 description parse_trailing_description (token&, token_type&); - value - parse_variable_line (token&, token_type&); - command_expr parse_command_line (token&, token_type&); - // Ordered sequence of here-document redirects that we can expect to - // see after the command line. - // - struct here_redirect - { - size_t expr; // Index in command_expr. - size_t pipe; // Index in command_pipe. - int fd; // Redirect fd (0 - in, 1 - out, 2 - err). - }; - - struct here_doc - { - // Redirects that share here_doc. Most of the time we will have no - // more than 2 (2 - for the roundtrip test cases). Doesn't refer - // overridden redirects and thus can be empty. - // - small_vector<here_redirect, 2> redirects; - - string end; - bool literal; // Literal (single-quote). - string modifiers; - - // Regex introducer ('\0' if not a regex, so can be used as bool). - // - char regex; - - // Regex global flags. Meaningful if regex != '\0'. - // - string regex_flags; - }; - using here_docs = vector<here_doc>; - - pair<command_expr, here_docs> - parse_command_expr (token&, token_type&); - - command_exit - parse_command_exit (token&, token_type&); - - void - parse_here_documents (token&, token_type&, - pair<command_expr, here_docs>&); - - struct parsed_doc - { - union - { - string str; // Here-document literal. - regex_lines regex; // Here-document regex. - }; - - bool re; // True if regex. - uint64_t end_line; // Here-document end marker location. - uint64_t end_column; - - parsed_doc (string, uint64_t line, uint64_t column); - parsed_doc (regex_lines&&, uint64_t line, uint64_t column); - parsed_doc (parsed_doc&&); // Note: move constuctible-only type. - ~parsed_doc (); - }; - - parsed_doc - parse_here_document (token&, token_type&, - const string&, - const string& mode, - char re_intro); // '\0' if not a regex. - // Execute. Issue diagnostics and throw failed in case of an error. // public: void - execute (script& s, runner& r); + execute (script&, runner&); void execute (scope&, script&, runner&); @@ -186,13 +104,11 @@ namespace build2 void exec_scope_body (); - // Return false if the execution of the scope should be terminated - // with the success status (e.g., as a result of encountering the exit - // builtin). For unsuccessful termination the failed exception should - // be thrown. + // Helpers. // - bool - exec_lines (lines::iterator, lines::iterator, size_t&, command_type); + public: + static bool + special_variable (const string&) noexcept; // Customization hooks. // @@ -200,33 +116,13 @@ namespace build2 virtual lookup lookup_variable (name&&, string&&, const location&) override; - // Number of quoted tokens since last reset. Note that this includes - // the peeked token, if any. - // - protected: - size_t - quoted () const; - - void - reset_quoted (token& current); - - size_t replay_quoted_; - // Insert id into the id map checking for duplicates. // protected: const string& insert_id (string, location); - // Set lexer pointers for both the current and the base classes. - // - protected: - void - set_lexer (lexer* l); - protected: - using base_parser = build2::parser; - script* script_; // Pre-parse state. @@ -238,7 +134,7 @@ namespace build2 id_map* id_map_; include_set* include_set_; // Testscripts already included in this // scope. Must be absolute and normalized. - lexer* lexer_; + string id_prefix_; // Auto-derived id prefix. // Execute state. diff --git a/libbuild2/test/script/regex.cxx b/libbuild2/test/script/regex.cxx deleted file mode 100644 index 92dd8f1..0000000 --- a/libbuild2/test/script/regex.cxx +++ /dev/null @@ -1,439 +0,0 @@ -// file : libbuild2/test/script/regex.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <locale> - -#include <libbuild2/test/script/regex.hxx> - -using namespace std; - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - static_assert (alignof (char_string) % 4 == 0, - "unexpected char_string alignment"); - - static_assert (alignof (char_regex) % 4 == 0, - "unexpected char_regex alignment"); - - static_assert (sizeof (uintptr_t) > sizeof (int16_t), - "unexpected uintptr_t size"); - - const line_char line_char::nul (0); - const line_char line_char::eof (-1); - - // line_char - // - // We package the special character into uintptr_t with the following - // steps: - // - // - narrow down int value to int16_t (preserves all the valid values) - // - // - convert to uint16_t (bitwise representation stays the same, but no - // need to bother with signed value widening, leftmost bits loss on - // left shift, etc) - // - // - convert to uintptr_t (storage type) - // - // - shift left by two bits (the operation is fully reversible as - // uintptr_t is wider then uint16_t) - // - line_char:: - line_char (int c) - : data_ ( - (static_cast <uintptr_t> ( - static_cast<uint16_t> ( - static_cast<int16_t> (c))) << 2) | - static_cast <uintptr_t> (line_type::special)) - { - // @@ How can we allow anything for basic_regex but only subset - // for our own code? - // - const char ex[] = "pn\n\r"; - - assert (c == 0 || // Null character. - - // EOF. Note that is also passed by msvcrt as _Meta_eos - // enum value. - // - c == -1 || - - // libstdc++ line/paragraph separators. - // - c == u'\u2028' || c == u'\u2029' || - - (c > 0 && c <= 255 && ( - // Supported regex special characters. - // - syntax (c) || - - // libstdc++ look-ahead tokens, newline chars. - // - string::traits_type::find (ex, 4, c) != nullptr))); - } - - line_char:: - line_char (const char_string& s, line_pool& p) - : line_char (&(*p.strings.emplace (s).first)) - { - } - - line_char:: - line_char (char_string&& s, line_pool& p) - : line_char (&(*p.strings.emplace (move (s)).first)) - { - } - - line_char:: - line_char (char_regex r, line_pool& p) - // Note: in C++17 can write as p.regexes.emplace_front(move (r)) - // - : line_char (&(*p.regexes.emplace (p.regexes.begin (), move (r)))) - { - } - - bool - line_char::syntax (char c) - { - return string::traits_type::find ( - "()|.*+?{}\\0123456789,=!", 23, c) != nullptr; - } - - bool - operator== (const line_char& l, const line_char& r) - { - line_type lt (l.type ()); - line_type rt (r.type ()); - - if (lt == rt) - { - bool res (true); - - switch (lt) - { - case line_type::special: res = l.special () == r.special (); break; - case line_type::regex: assert (false); break; - - // Note that we use pointers (rather than vales) comparison - // assuming that the strings must belong to the same pool. - // - case line_type::literal: res = l.literal () == r.literal (); break; - } - - return res; - } - - // Match literal with regex. - // - if (lt == line_type::literal && rt == line_type::regex) - return regex_match (*l.literal (), *r.regex ()); - else if (rt == line_type::literal && lt == line_type::regex) - return regex_match (*r.literal (), *l.regex ()); - - return false; - } - - bool - operator< (const line_char& l, const line_char& r) - { - if (l == r) - return false; - - line_type lt (l.type ()); - line_type rt (r.type ()); - - if (lt != rt) - return lt < rt; - - bool res (false); - - switch (lt) - { - case line_type::special: res = l.special () < r.special (); break; - case line_type::literal: res = *l.literal () < *r.literal (); break; - case line_type::regex: assert (false); break; - } - - return res; - } - - // line_char_locale - // - - // An exemplar locale with the std::ctype<line_char> facet. It is used - // for the subsequent line char locale objects creation (see below) - // which normally ends up with a shallow copy of a reference-counted - // object. - // - // Note that creating the line char locales from the exemplar is not - // merely an optimization: there is a data race in the libstdc++ (at - // least as of GCC 9.1) implementation of the locale(const locale&, - // Facet*) constructor (bug #91057). - // - // Also note that we install the facet in init() rather than during - // the object creation to avoid a race with the std::locale-related - // global variables initialization. - // - static locale line_char_locale_exemplar; - - void - init () - { - line_char_locale_exemplar = - locale (locale (), - new std::ctype<line_char> ()); // Hidden by ctype bitmask. - } - - line_char_locale:: - line_char_locale () - : locale (line_char_locale_exemplar) - { - // Make sure init() has been called. - // - // Note: has_facet() is hidden by a private function in libc++. - // - assert (std::has_facet<std::ctype<line_char>> (*this)); - } - - // char_regex - // - // Transform regex according to the extended flags {idot}. If regex is - // malformed then keep transforming, so the resulting string is - // malformed the same way. We expect the error to be reported by the - // char_regex ctor. - // - static string - transform (const string& s, char_flags f) - { - assert ((f & char_flags::idot) != char_flags::none); - - string r; - bool escape (false); - bool cclass (false); - - for (char c: s) - { - // Inverse escaping for a dot which is out of the char class - // brackets. - // - bool inverse (c == '.' && !cclass); - - // Handle the escape case. Note that we delay adding the backslash - // since we may have to inverse things. - // - if (escape) - { - if (!inverse) - r += '\\'; - - r += c; - escape = false; - - continue; - } - else if (c == '\\') - { - escape = true; - continue; - } - - // Keep track of being inside the char class brackets, escape if - // inversion. Note that we never inverse square brackets. - // - if (c == '[' && !cclass) - cclass = true; - else if (c == ']' && cclass) - cclass = false; - else if (inverse) - r += '\\'; - - r += c; - } - - if (escape) // Regex is malformed but that's not our problem. - r += '\\'; - - return r; - } - - static char_regex::flag_type - to_std_flags (char_flags f) - { - // Note that ECMAScript flag is implied in the absense of a grammar - // flag. - // - return (f & char_flags::icase) != char_flags::none - ? char_regex::icase - : char_regex::flag_type (); - } - - char_regex:: - char_regex (const char_string& s, char_flags f) - : base_type ((f & char_flags::idot) != char_flags::none - ? transform (s, f) - : s, - to_std_flags (f)) - { - } - } - } - } -} - -namespace std -{ - using namespace build2::test::script::regex; - - // char_traits<line_char> - // - line_char* char_traits<line_char>:: - assign (char_type* s, size_t n, char_type c) - { - for (size_t i (0); i != n; ++i) - s[i] = c; - return s; - } - - line_char* char_traits<line_char>:: - move (char_type* d, const char_type* s, size_t n) - { - if (n > 0 && d != s) - { - // If d < s then it can't be in [s, s + n) range and so using copy() is - // safe. Otherwise d + n is out of (s, s + n] range and so using - // copy_backward() is safe. - // - if (d < s) - std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). - else - copy_backward (s, s + n, d + n); - } - - return d; - } - - line_char* char_traits<line_char>:: - copy (char_type* d, const char_type* s, size_t n) - { - std::copy (s, s + n, d); // Hidden by char_traits<line_char>::copy(). - return d; - } - - int char_traits<line_char>:: - compare (const char_type* s1, const char_type* s2, size_t n) - { - for (size_t i (0); i != n; ++i) - { - if (s1[i] < s2[i]) - return -1; - else if (s2[i] < s1[i]) - return 1; - } - - return 0; - } - - size_t char_traits<line_char>:: - length (const char_type* s) - { - size_t i (0); - while (s[i] != char_type::nul) - ++i; - - return i; - } - - const line_char* char_traits<line_char>:: - find (const char_type* s, size_t n, const char_type& c) - { - for (size_t i (0); i != n; ++i) - { - if (s[i] == c) - return s + i; - } - - return nullptr; - } - - // ctype<line_char> - // - locale::id ctype<line_char>::id; - - const line_char* ctype<line_char>:: - is (const char_type* b, const char_type* e, mask* m) const - { - while (b != e) - { - const char_type& c (*b++); - - *m++ = c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast<char> (c.special ())) - ? digit - : 0; - } - - return e; - } - - const line_char* ctype<line_char>:: - scan_is (mask m, const char_type* b, const char_type* e) const - { - for (; b != e; ++b) - { - if (is (m, *b)) - return b; - } - - return e; - } - - const line_char* ctype<line_char>:: - scan_not (mask m, const char_type* b, const char_type* e) const - { - for (; b != e; ++b) - { - if (!is (m, *b)) - return b; - } - - return e; - } - - const char* ctype<line_char>:: - widen (const char* b, const char* e, char_type* c) const - { - while (b != e) - *c++ = widen (*b++); - - return e; - } - - const line_char* ctype<line_char>:: - narrow (const char_type* b, const char_type* e, char def, char* c) const - { - while (b != e) - *c++ = narrow (*b++, def); - - return e; - } - - // regex_traits<line_char> - // - int regex_traits<line_char>:: - value (char_type c, int radix) const - { - assert (radix == 8 || radix == 10 || radix == 16); - - if (c.type () != line_type::special) - return -1; - - const char digits[] = "0123456789ABCDEF"; - const char* d (string::traits_type::find (digits, radix, c.special ())); - return d != nullptr ? static_cast<int> (d - digits) : -1; - } -} diff --git a/libbuild2/test/script/regex.hxx b/libbuild2/test/script/regex.hxx deleted file mode 100644 index 4114ea4..0000000 --- a/libbuild2/test/script/regex.hxx +++ /dev/null @@ -1,684 +0,0 @@ -// file : libbuild2/test/script/regex.hxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#ifndef LIBBUILD2_TEST_SCRIPT_REGEX_HXX -#define LIBBUILD2_TEST_SCRIPT_REGEX_HXX - -#include <list> -#include <regex> -#include <locale> -#include <string> // basic_string -#include <type_traits> // make_unsigned, enable_if, is_* -#include <unordered_set> - -#include <libbuild2/types.hxx> -#include <libbuild2/utility.hxx> - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - using char_string = std::basic_string<char>; - - enum class char_flags: uint16_t - { - icase = 0x1, // Case-insensitive match. - idot = 0x2, // Invert '.' escaping. - - none = 0 - }; - - // Restricts valid standard flags to just {icase}, extends with custom - // flags {idot}. - // - class char_regex: public std::basic_regex<char> - { - public: - using base_type = std::basic_regex<char>; - - char_regex (const char_string&, char_flags = char_flags::none); - }; - - // Newlines are line separators and are not part of the line: - // - // line<newline>line<newline> - // - // Specifically, this means that a customary trailing newline creates a - // trailing blank line. - // - // All characters can inter-compare (though there cannot be regex - // characters in the output, only in line_regex). - // - // Note that we assume that line_regex and the input to regex_match() - // use the same pool. - // - struct line_pool - { - // Note that we assume the pool can be moved without invalidating - // pointers to any already pooled entities. - // - std::unordered_set<char_string> strings; - std::list<char_regex> regexes; - }; - - enum class line_type - { - special, - literal, - regex - }; - - struct line_char - { - // Steal last two bits from the pointer to store the type. - // - private: - std::uintptr_t data_; - - public: - line_type - type () const {return static_cast<line_type> (data_ & 0x3);} - - int - special () const - { - // Stored as (shifted) int16_t. Perform steps reversed to those - // that are described in the comment for the corresponding ctor. - // Note that the intermediate cast to uint16_t is required to - // portably preserve the -1 special character. - // - return static_cast<int16_t> (static_cast<uint16_t> (data_ >> 2)); - } - - const char_string* - literal () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast<const char_string*> ( - data_ & ~std::uintptr_t (0x3)); - } - - const char_regex* - regex () const - { - // Note that 2 rightmost bits are used for packaging line_char - // type. Read the comment for the corresponding ctor for details. - // - return reinterpret_cast<const char_regex*> ( - data_ & ~std::uintptr_t (0x3)); - } - - static const line_char nul; - static const line_char eof; - - // Note: creates an uninitialized value. - // - line_char () = default; - - // Create a special character. The argument value must be one of the - // following ones: - // - // 0 (nul character) - // -1 (EOF) - // [()|.*+?{}\0123456789,=!] (excluding []) - // - // Note that the constructor is implicit to allow basic_regex to - // implicitly construct line_chars from special char literals (in - // particular libstdc++ appends them to an internal line_string). - // - // Also note that we extend the valid characters set (see above) with - // 'p', 'n' (used by libstdc++ for positive/negative look-ahead - // tokens representation), and '\n', '\r', u'\u2028', u'\u2029' (used - // by libstdc++ for newline/newparagraph matching). - // - line_char (int); - - // Create a literal character. - // - // Don't copy string if already pooled. - // - explicit - line_char (const char_string&, line_pool&); - - explicit - line_char (char_string&&, line_pool&); - - explicit - line_char (const char_string* s) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_string address is a - // multiple of four. - // - : data_ (reinterpret_cast <std::uintptr_t> (s) | - static_cast <std::uintptr_t> (line_type::literal)) {} - - // Create a regex character. - // - explicit - line_char (char_regex, line_pool&); - - explicit - line_char (const char_regex* r) // Assume already pooled. - // - // Steal two bits from the pointer to package line_char type. - // Assume (and statically assert) that char_regex address is a - // multiple of four. - // - : data_ (reinterpret_cast <std::uintptr_t> (r) | - static_cast <std::uintptr_t> (line_type::regex)) {} - - // Provide basic_regex with the ability to use line_char in a context - // where a char value is expected (e.g., as a function argument). - // - // libstdc++ seems to cast special line_chars only (and such a - // conversion is meanigfull). - // - // msvcrt casts line_chars of arbitrary types instead. The only - // reasonable strategy is to return a value that differs from any - // other that can be encountered in a regex expression and so will - // unlikelly be misinterpreted. - // - operator char () const - { - return type () == line_type::special ? special () : '\a'; // BELL. - } - - // Return true if the character is a syntax (special) one. - // - static bool - syntax (char); - - // Provide basic_regex (such as from msvcrt) with the ability to - // explicitly cast line_chars to implementation-specific numeric - // types (enums, msvcrt's _Uelem, etc). - // - template <typename T> - explicit - operator T () const - { - assert (type () == line_type::special); - return static_cast<T> (special ()); - } - }; - - // Perform "deep" characters comparison (for example match literal - // character with a regex character), rather than just compare them - // literally. At least one argument must be of a type other than regex - // as there is no operator==() defined to compare regexes. Characters - // of the literal type must share the same pool (strings are compared - // by pointers not by values). - // - bool - operator== (const line_char&, const line_char&); - - // Return false if arguments are equal (operator==() returns true). - // Otherwise if types are different return the value implying that - // special < literal < regex. If types are special or literal return - // the result of the respective characters or strings comparison. At - // least one argument must be of a type other than regex as there is no - // operator<() defined to compare regexes. - // - // While not very natural operation for the class we have, we have to - // provide some meaningfull semantics for such a comparison as it is - // required by the char_traits<line_char> specialization. While we - // could provide it right in that specialization, let's keep it here - // for basic_regex implementations that potentially can compare - // line_chars as they compare them with expressions of other types (see - // below). - // - bool - operator< (const line_char&, const line_char&); - - inline bool - operator!= (const line_char& l, const line_char& r) - { - return !(l == r); - } - - inline bool - operator<= (const line_char& l, const line_char& r) - { - return l < r || l == r; - } - - // Provide basic_regex (such as from msvcrt) with the ability to - // compare line_char to a value of an integral or - // implementation-specific enum type. In the absense of the following - // template operators, such a comparisons would be ambigious for - // integral types (given that there are implicit conversions - // int->line_char and line_char->char) and impossible for enums. - // - // Note that these == and < operators can succeed only for a line_char - // of the special type. For other types they always return false. That - // in particular leads to the following case: - // - // (lc != c) != (lc < c || c < lc). - // - // Note that we can not assert line_char is of the special type as - // basic_regex (such as from libc++) may need the ability to check if - // arbitrary line_char belongs to some special characters range (like - // ['0', '9']). - // - template <typename T> - struct line_char_cmp - : public std::enable_if<std::is_integral<T>::value || - (std::is_enum<T>::value && - !std::is_same<T, char_flags>::value)> {}; - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator== (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast<T> (l.special ()) == r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator== (const T& l, const line_char& r) - { - return r.type () == line_type::special && - static_cast<T> (r.special ()) == l; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator!= (const line_char& l, const T& r) - { - return !(l == r); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator!= (const T& l, const line_char& r) - { - return !(l == r); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator< (const line_char& l, const T& r) - { - return l.type () == line_type::special && - static_cast<T> (l.special ()) < r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - bool - operator< (const T& l, const line_char& r) - { - return r.type () == line_type::special && - l < static_cast<T> (r.special ()); - } - - template <typename T, typename = typename line_char_cmp<T>::type> - inline bool - operator<= (const line_char& l, const T& r) - { - return l < r || l == r; - } - - template <typename T, typename = typename line_char_cmp<T>::type> - inline bool - operator<= (const T& l, const line_char& r) - { - return l < r || l == r; - } - - using line_string = std::basic_string<line_char>; - - // Locale that has ctype<line_char> facet installed. Used in the - // regex_traits<line_char> specialization (see below). - // - class line_char_locale: public std::locale - { - public: - // Create a copy of the global C++ locale. - // - line_char_locale (); - }; - - // Initialize the testscript regex global state. Should be called once - // prior to creating objects of types from this namespace. Note: not - // thread-safe. - // - void - init (); - } - } - } -} - -// Standard template specializations for line_char that are required for the -// basic_regex<line_char> instantiation. -// -namespace std -{ - template <> - class char_traits<build2::test::script::regex::line_char> - { - public: - using char_type = build2::test::script::regex::line_char; - using int_type = char_type; - using off_type = char_traits<char>::off_type; - using pos_type = char_traits<char>::pos_type; - using state_type = char_traits<char>::state_type; - - static void - assign (char_type& c1, const char_type& c2) {c1 = c2;} - - static char_type* - assign (char_type*, size_t, char_type); - - // Note that eq() and lt() are not constexpr (as required by C++11) - // because == and < operators for char_type are not constexpr. - // - static bool - eq (const char_type& l, const char_type& r) {return l == r;} - - static bool - lt (const char_type& l, const char_type& r) {return l < r;} - - static char_type* - move (char_type*, const char_type*, size_t); - - static char_type* - copy (char_type*, const char_type*, size_t); - - static int - compare (const char_type*, const char_type*, size_t); - - static size_t - length (const char_type*); - - static const char_type* - find (const char_type*, size_t, const char_type&); - - static constexpr char_type - to_char_type (const int_type& c) {return c;} - - static constexpr int_type - to_int_type (const char_type& c) {return int_type (c);} - - // Note that the following functions are not constexpr (as required by - // C++11) because their return expressions are not constexpr. - // - static bool - eq_int_type (const int_type& l, const int_type& r) {return l == r;} - - static int_type eof () {return char_type::eof;} - - static int_type - not_eof (const int_type& c) - { - return c != char_type::eof ? c : char_type::nul; - } - }; - - // ctype<> must be derived from both ctype_base and locale::facet (the later - // supports ref-counting used by the std::locale implementation internally). - // - // msvcrt for some reason also derives ctype_base from locale::facet which - // produces "already a base-class" warning and effectivelly breaks the - // reference counting. So we derive from ctype_base only in this case. - // - template <> - class ctype<build2::test::script::regex::line_char>: public ctype_base -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - , public locale::facet -#endif - { - // Used by the implementation only. - // - using line_type = build2::test::script::regex::line_type; - - public: - using char_type = build2::test::script::regex::line_char; - - static locale::id id; - -#if !defined(_MSC_VER) || _MSC_VER >= 2000 - explicit - ctype (size_t refs = 0): locale::facet (refs) {} -#else - explicit - ctype (size_t refs = 0): ctype_base (refs) {} -#endif - - // While unnecessary, let's keep for completeness. - // - virtual - ~ctype () override = default; - - // The C++ standard requires the following functions to call their virtual - // (protected) do_*() counterparts that provide the real implementations. - // The only purpose for this indirection is to provide a user with the - // ability to customize existing (standard) ctype facets. As we do not - // provide such an ability, for simplicity we will omit the do_*() - // functions and provide the implementations directly. This should be safe - // as nobody except us could call those protected functions. - // - bool - is (mask m, char_type c) const - { - return m == - (c.type () == line_type::special && c.special () >= 0 && - build2::digit (static_cast<char> (c.special ())) - ? digit - : 0); - } - - const char_type* - is (const char_type*, const char_type*, mask*) const; - - const char_type* - scan_is (mask, const char_type*, const char_type*) const; - - const char_type* - scan_not (mask, const char_type*, const char_type*) const; - - char_type - toupper (char_type c) const {return c;} - - const char_type* - toupper (char_type*, const char_type* e) const {return e;} - - char_type - tolower (char_type c) const {return c;} - - const char_type* - tolower (char_type*, const char_type* e) const {return e;} - - char_type - widen (char c) const {return char_type (c);} - - const char* - widen (const char*, const char*, char_type*) const; - - char - narrow (char_type c, char def) const - { - return c.type () == line_type::special ? c.special () : def; - } - - const char_type* - narrow (const char_type*, const char_type*, char, char*) const; - }; - - // Note: the current application locale must be POSIX. Otherwise the - // behavior is undefined. - // - template <> - class regex_traits<build2::test::script::regex::line_char> - { - public: - using char_type = build2::test::script::regex::line_char; - using string_type = build2::test::script::regex::line_string; - using locale_type = build2::test::script::regex::line_char_locale; - using char_class_type = regex_traits<char>::char_class_type; - - // Workaround for msvcrt bugs. For some reason it assumes such a members - // to be present in a regex_traits specialization. - // -#if defined(_MSC_VER) && _MSC_VER < 2000 - static const ctype_base::mask _Ch_upper = ctype_base::upper; - static const ctype_base::mask _Ch_alpha = ctype_base::alpha; - - // Unsigned numeric type. msvcrt normally casts characters to this type - // for comparing with some numeric values or for calculating an index in - // some bit array. Luckily that all relates to the character class - // handling that we don't support. - // - using _Uelem = unsigned int; -#endif - - regex_traits () = default; // Unnecessary but let's keep for completeness. - - static size_t - length (const char_type* p) {return string_type::traits_type::length (p);} - - char_type - translate (char_type c) const {return c;} - - // Case-insensitive matching is not supported by line_regex. So there is no - // reason for the function to be called. - // - char_type - translate_nocase (char_type c) const {assert (false); return c;} - - // Return a sort-key - the exact copy of [b, e). - // - template <typename I> - string_type - transform (I b, I e) const {return string_type (b, e);} - - // Return a case-insensitive sort-key. Case-insensitive matching is not - // supported by line_regex. So there is no reason for the function to be - // called. - // - template <typename I> - string_type - transform_primary (I b, I e) const - { - assert (false); - return string_type (b, e); - } - - // POSIX regex grammar and collating elements (e.g., [.tilde.]) in - // particular are not supported. So there is no reason for the function to - // be called. - // - template <typename I> - string_type - lookup_collatename (I, I) const {assert (false); return string_type ();} - - // Character classes (e.g., [:lower:]) are not supported. So there is no - // reason for the function to be called. - // - template <typename I> - char_class_type - lookup_classname (I, I, bool = false) const - { - assert (false); - return char_class_type (); - } - - // Return false as we don't support character classes (e.g., [:lower:]). - // - bool - isctype (char_type, char_class_type) const {return false;} - - int - value (char_type, int) const; - - // Return the locale passed as an argument as we do not expect anything - // other than POSIX locale, that we also assume to be imbued by default. - // - locale_type - imbue (locale_type l) {return l;} - - locale_type - getloc () const {return locale_type ();} - }; - - // We assume line_char to be an unsigned type and express that with the - // following specialization used by basic_regex implementations. - // - // libstdc++ defines unsigned CharT type (regex_traits template parameter) - // to use as an index in some internal cache regardless if the cache is used - // for this specialization (and the cache is used only if CharT is char). - // - template <> - struct make_unsigned<build2::test::script::regex::line_char> - { - using type = build2::test::script::regex::line_char; - }; - - // When used with libc++ the linker complains that it can't find - // __match_any_but_newline<line_char>::__exec() function. The problem is - // that the function is only specialized for char and wchar_t - // (LLVM bug #31409). As line_char has no notion of the newline character we - // specialize the class template to behave as the __match_any<line_char> - // instantiation does (that luckily has all the functions in place). - // -#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 9000 - template <> - class __match_any_but_newline<build2::test::script::regex::line_char> - : public __match_any<build2::test::script::regex::line_char> - { - public: - using base = __match_any<build2::test::script::regex::line_char>; - using base::base; - }; -#endif -} - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - class line_regex: public std::basic_regex<line_char> - { - public: - using base_type = std::basic_regex<line_char>; - - using base_type::base_type; - - line_regex () = default; - - // Move string regex together with the pool used to create it. - // - line_regex (line_string&& s, line_pool&& p) - // No move-string ctor for base_type, so emulate it. - // - : base_type (s), pool (move (p)) {s.clear ();} - - // Move constuctible/assignable-only type. - // - line_regex (line_regex&&) = default; - line_regex (const line_regex&) = delete; - line_regex& operator= (line_regex&&) = default; - line_regex& operator= (const line_regex&) = delete; - - public: - line_pool pool; - }; - } - } - } -} - -#include <libbuild2/test/script/regex.ixx> - -#endif // LIBBUILD2_TEST_SCRIPT_REGEX_HXX diff --git a/libbuild2/test/script/regex.ixx b/libbuild2/test/script/regex.ixx deleted file mode 100644 index 46db9db..0000000 --- a/libbuild2/test/script/regex.ixx +++ /dev/null @@ -1,34 +0,0 @@ -// file : libbuild2/test/script/regex.ixx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -namespace build2 -{ - namespace test - { - namespace script - { - namespace regex - { - inline char_flags - operator&= (char_flags& x, char_flags y) - { - return x = static_cast<char_flags> ( - static_cast<uint16_t> (x) & static_cast<uint16_t> (y)); - } - - inline char_flags - operator|= (char_flags& x, char_flags y) - { - return x = static_cast<char_flags> ( - static_cast<uint16_t> (x) | static_cast<uint16_t> (y)); - } - - inline char_flags - operator& (char_flags x, char_flags y) {return x &= y;} - - inline char_flags - operator| (char_flags x, char_flags y) {return x |= y;} - } - } - } -} diff --git a/libbuild2/test/script/regex.test.cxx b/libbuild2/test/script/regex.test.cxx deleted file mode 100644 index 5a93c53..0000000 --- a/libbuild2/test/script/regex.test.cxx +++ /dev/null @@ -1,303 +0,0 @@ -// file : libbuild2/test/script/regex.test.cxx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -#include <regex> -#include <type_traits> // is_* - -#include <libbuild2/test/script/regex.hxx> - -using namespace std; -using namespace build2::test::script::regex; - -int -main () -{ - using lc = line_char; - using ls = line_string; - using lr = line_regex; - using cf = char_flags; - using cr = char_regex; - - init (); // Initializes the testscript regex global state. - - // Test line_char. - // - { - static_assert (is_trivial<lc>::value && - is_standard_layout<lc>::value && - !is_array<lc>::value, - "line_char must be char-like"); - - // Zero-initialed line_char should be the null-char as required by - // char_traits<>::length() specification. - // - assert (lc () == lc::nul); - - line_pool p; - - assert (lc::eof == -1); - assert (lc::nul == 0); - - enum meta {mn = 'n', mp = 'p'}; - - // Special roundtrip. - // - assert (lc ('0').special () == '0'); - assert (lc (0).special () == 0); - assert (lc (-1).special () == -1); - assert (lc ('p').special () == 'p'); - assert (lc (u'\u2028').special () == u'\u2028'); - - // Special comparison. - // - assert (lc ('0') == lc ('0')); - assert (lc ('0') == '0'); - assert (lc ('n') == mn); - assert (mn == static_cast<meta> (lc ('n'))); - - assert (lc ('0') != lc ('1')); - assert (lc ('0') != '1'); - assert (lc ('n') != mp); - assert (lc ('0') != lc ("0", p)); - assert (lc ('0') != lc (cr ("0"), p)); - - assert (lc ('0') < lc ('1')); - assert (lc ('0') < '1'); - assert (lc ('1') < lc ("0", p)); - assert (lc ('n') < mp); - - assert (lc ('0') <= '1'); - assert (lc ('0') <= lc ('1')); - assert (lc ('n') <= mn); - assert (lc ('1') <= lc ("0", p)); - - // Literal roundtrip. - // - assert (*lc ("abc", p).literal () == "abc"); - - // Literal comparison. - // - assert (lc ("a", p) == lc ("a", p)); - assert (lc ("a", p).literal () == lc ("a", p).literal ()); - assert (char (lc ("a", p)) == '\a'); - - assert (lc ("a", p) != lc ("b", p)); - assert (!(lc ("a", p) != lc (cr ("a"), p))); - assert (lc ("a", p) != lc (cr ("b"), p)); - - assert (lc ("a", p) < lc ("b", p)); - assert (!(lc ("a", p) < lc (cr ("a"), p))); - - assert (lc ("a", p) <= lc ("b", p)); - assert (lc ("a", p) <= lc (cr ("a"), p)); - assert (lc ("a", p) < lc (cr ("c"), p)); - - // Regex roundtrip. - // - assert (regex_match ("abc", *lc (cr ("abc"), p).regex ())); - - // Regex flags. - // - // icase - // - assert (regex_match ("ABC", cr ("abc", cf::icase))); - - // idot - // - assert (!regex_match ("a", cr ("[.]", cf::idot))); - assert (!regex_match ("a", cr ("[\\.]", cf::idot))); - - assert (regex_match ("a", cr ("."))); - assert (!regex_match ("a", cr (".", cf::idot))); - assert (regex_match ("a", cr ("\\.", cf::idot))); - assert (!regex_match ("a", cr ("\\."))); - - // regex::transform() - // - // The function is static and we can't test it directly. So we will test - // it indirectly via regex matches. - // - // @@ Would be nice to somehow address the inability to test internals (not - // exposed via headers). As a part of utility library support? - // - assert (regex_match (".a[.", cr (".\\.\\[[.]", cf::idot))); - assert (regex_match (".a[.", cr (".\\.\\[[\\.]", cf::idot))); - assert (!regex_match ("ba[.", cr (".\\.\\[[.]", cf::idot))); - assert (!regex_match (".a[b", cr (".\\.\\[[.]", cf::idot))); - assert (!regex_match (".a[b", cr (".\\.\\[[\\.]", cf::idot))); - - // Regex comparison. - // - assert (lc ("a", p) == lc (cr ("a|b"), p)); - assert (lc (cr ("a|b"), p) == lc ("a", p)); - } - - // Test char_traits<line_char>. - // - { - using ct = char_traits<lc>; - using vc = vector<lc>; - - lc c; - ct::assign (c, '0'); - assert (c == ct::char_type ('0')); - - assert (ct::to_char_type (c) == c); - assert (ct::to_int_type (c) == c); - - assert (ct::eq_int_type (c, c)); - assert (!ct::eq_int_type (c, lc::eof)); - - assert (ct::eof () == lc::eof); - - assert (ct::not_eof (c) == c); - assert (ct::not_eof (lc::eof) != lc::eof); - - ct::assign (&c, 1, '1'); - assert (c == ct::int_type ('1')); - - assert (ct::eq (lc ('0'), lc ('0'))); - assert (ct::lt (lc ('0'), lc ('1'))); - - vc v1 ({'0', '1', '2'}); - vc v2 (3, lc::nul); - - assert (ct::find (v1.data (), 3, '1') == v1.data () + 1); - - ct::copy (v2.data (), v1.data (), 3); - assert (v2 == v1); - - v2.push_back (lc::nul); - assert (ct::length (v2.data ()) == 3); - - // Overlaping ranges. - // - ct::move (v1.data () + 1, v1.data (), 2); - assert (v1 == vc ({'0', '0', '1'})); - - v1 = vc ({'0', '1', '2'}); - ct::move (v1.data (), v1.data () + 1, 2); - assert (v1 == vc ({'1', '2', '2'})); - } - - // Test line_char_locale and ctype<line_char> (only non-trivial functions). - // - { - using ct = ctype<lc>; - - line_char_locale l; - - // It is better not to create q facet on stack as it is - // reference-countable. - // - const ct& t (use_facet<ct> (l)); - line_pool p; - - assert (t.is (ct::digit, '0')); - assert (!t.is (ct::digit, '?')); - assert (!t.is (ct::digit, lc ("0", p))); - - const lc chars[] = { '0', '?' }; - ct::mask m[2]; - - const lc* b (chars); - const lc* e (chars + 2); - - // Cast flag value to mask type and compare to mask. - // - auto fl = [] (ct::mask m, ct::mask f) {return m == f;}; - - t.is (b, e, m); - assert (fl (m[0], ct::digit) && fl (m[1], 0)); - - assert (t.scan_is (ct::digit, b, e) == b); - assert (t.scan_is (0, b, e) == b + 1); - - assert (t.scan_not (ct::digit, b, e) == b + 1); - assert (t.scan_not (0, b, e) == b); - - { - char nr[] = "0?"; - lc wd[2]; - t.widen (nr, nr + 2, wd); - assert (wd[0] == b[0] && wd[1] == b[1]); - } - - { - lc wd[] = {'0', lc ("a", p)}; - char nr[2]; - t.narrow (wd, wd + 2, '-', nr); - assert (nr[0] == '0' && nr[1] == '-'); - } - } - - // Test regex_traits<line_char>. Functions other that value() are trivial. - // - { - regex_traits<lc> t; - - const int radix[] = {8, 10}; // Radix 16 is not supported by line_char. - const char digits[] = "0123456789ABCDEF"; - - for (size_t r (0); r < 2; ++r) - { - for (int i (0); i < radix[r]; ++i) - assert (t.value (digits[i], radix[r]) == i); - } - } - - // Test line_regex construction. - // - { - line_pool p; - lr r1 ({lc ("foo", p), lc (cr ("ba(r|z)"), p)}, move (p)); - - lr r2 (move (r1)); - assert (regex_match (ls ({lc ("foo", r2.pool), lc ("bar", r2.pool)}), r2)); - assert (!regex_match (ls ({lc ("foo", r2.pool), lc ("ba", r2.pool)}), r2)); - } - - // Test line_regex match. - // - { - line_pool p; - - const lc foo ("foo", p); - const lc bar ("bar", p); - const lc baz ("baz", p); - const lc blank ("", p); - - assert (regex_match (ls ({foo, bar}), lr ({foo, bar}))); - assert (!regex_match (ls ({foo, baz}), lr ({foo, bar}))); - - assert (regex_match (ls ({bar, foo}), - lr ({'(', foo, '|', bar, ')', '+'}))); - - assert (regex_match (ls ({foo, foo, bar}), - lr ({'(', foo, ')', '\\', '1', bar}))); - - assert (regex_match (ls ({foo}), lr ({lc (cr ("fo+"), p)}))); - assert (regex_match (ls ({foo}), lr ({lc (cr (".*"), p)}))); - assert (regex_match (ls ({blank}), lr ({lc (cr (".*"), p)}))); - - assert (regex_match (ls ({blank, blank, foo}), - lr ({blank, '*', foo, blank, '*'}))); - - assert (regex_match (ls ({blank, blank, foo}), lr ({'.', '*'}))); - - assert (regex_match (ls ({blank, blank}), - lr ({blank, '*', foo, '?', blank, '*'}))); - - assert (regex_match (ls ({foo}), lr ({foo, '{', '1', '}'}))); - assert (regex_match (ls ({foo, foo}), lr ({foo, '{', '1', ',', '}'}))); - - assert (regex_match (ls ({foo, foo}), - lr ({foo, '{', '1', ',', '2', '}'}))); - - assert (!regex_match (ls ({foo, foo}), - lr ({foo, '{', '3', ',', '4', '}'}))); - - assert (regex_match (ls ({foo}), lr ({'(', '?', '=', foo, ')', foo}))); - assert (regex_match (ls ({foo}), lr ({'(', '?', '!', bar, ')', foo}))); - } -} diff --git a/libbuild2/test/script/runner.cxx b/libbuild2/test/script/runner.cxx index b40dea8..03a1f0e 100644 --- a/libbuild2/test/script/runner.cxx +++ b/libbuild2/test/script/runner.cxx @@ -3,696 +3,17 @@ #include <libbuild2/test/script/runner.hxx> -#include <ios> // streamsize - -#include <libbutl/regex.mxx> -#include <libbutl/builtin.mxx> -#include <libbutl/fdstream.mxx> // fdopen_mode, fddup() -#include <libbutl/filesystem.mxx> // path_search() -#include <libbutl/path-pattern.mxx> - -#include <libbuild2/variable.hxx> -#include <libbuild2/filesystem.hxx> -#include <libbuild2/diagnostics.hxx> +#include <libbuild2/script/run.hxx> #include <libbuild2/test/common.hxx> -#include <libbuild2/test/script/regex.hxx> -#include <libbuild2/test/script/parser.hxx> -#include <libbuild2/test/script/builtin-options.hxx> - -using namespace std; -using namespace butl; - namespace build2 { namespace test { namespace script { - // Normalize a path. Also make the relative path absolute using the - // scope's working directory unless it is already absolute. - // - static path - normalize (path p, const scope& sp, const location& l) - { - path r (p.absolute () ? move (p) : sp.wd_path / move (p)); - - try - { - r.normalize (); - } - catch (const invalid_path& e) - { - fail (l) << "invalid file path " << e.path; - } - - return r; - } - - // Check if a path is not empty, the referenced file exists and is not - // empty. - // - static bool - non_empty (const path& p, const location& ll) - { - if (p.empty () || !exists (p)) - return false; - - try - { - ifdstream is (p); - return is.peek () != ifdstream::traits_type::eof (); - } - catch (const io_error& e) - { - // While there can be no fault of the test command being currently - // executed let's add the location anyway to ease the - // troubleshooting. And let's stick to that principle down the road. - // - fail (ll) << "unable to read " << p << ": " << e << endf; - } - } - - // If the file exists, not empty and not larger than 4KB print it to the - // diag record. The file content goes from the new line and is not - // indented. - // - static void - print_file (diag_record& d, const path& p, const location& ll) - { - if (exists (p)) - { - try - { - ifdstream is (p, ifdstream::badbit); - - if (is.peek () != ifdstream::traits_type::eof ()) - { - char buf[4096 + 1]; // Extra byte is for terminating '\0'. - - // Note that the string is always '\0'-terminated with a maximum - // sizeof (buf) - 1 bytes read. - // - is.getline (buf, sizeof (buf), '\0'); - - // Print if the file fits 4KB-size buffer. Note that if it - // doesn't the failbit is set. - // - if (is.eof ()) - { - // Suppress the trailing newline character as the diag record - // adds it's own one when flush. - // - streamsize n (is.gcount ()); - assert (n > 0); - - // Note that if the file contains '\0' it will also be counted - // by gcount(). But even in the worst case we will stay in the - // buffer boundaries (and so not crash). - // - if (buf[n - 1] == '\n') - buf[n - 1] = '\0'; - - d << '\n' << buf; - } - } - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << p << ": " << e; - } - } - } - - // Print first 10 directory sub-entries to the diag record. The directory - // must exist. - // - static void - print_dir (diag_record& d, const dir_path& p, const location& ll) - { - try - { - size_t n (0); - for (const dir_entry& de: dir_iterator (p, - false /* ignore_dangling */)) - { - if (n++ < 10) - d << '\n' << (de.ltype () == entry_type::directory - ? path_cast<dir_path> (de.path ()) - : de.path ()); - } - - if (n > 10) - d << "\nand " << n - 10 << " more file(s)"; - } - catch (const system_error& e) - { - fail (ll) << "unable to iterate over " << p << ": " << e; - } - } - - // Save a string to the file. Fail if exception is thrown by underlying - // operations. - // - static void - save (const path& p, const string& s, const location& ll) - { - try - { - ofdstream os (p); - os << s; - os.close (); - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << p << ": " << e; - } - } - - // Return the value of the test.target variable. - // - static inline const target_triplet& - test_target (const script& s) - { - // @@ Would be nice to use cached value from test::common_data. - // - if (auto r = cast_null<target_triplet> (s.test_target["test.target"])) - return *r; - - // We set it to default value in init() so it can only be NULL if the - // user resets it. - // - fail << "invalid test.target value" << endf; - } - - // Transform string according to here-* redirect modifiers from the {/} - // set. - // - static string - transform (const string& s, - bool regex, - const string& modifiers, - const script& scr) - { - if (modifiers.find ('/') == string::npos) - return s; - - // For targets other than Windows leave the string intact. - // - if (test_target (scr).class_ != "windows") - return s; - - // Convert forward slashes to Windows path separators (escape for - // regex). - // - string r; - for (size_t p (0);;) - { - size_t sp (s.find ('/', p)); - - if (sp != string::npos) - { - r.append (s, p, sp - p); - r.append (regex ? "\\\\" : "\\"); - p = sp + 1; - } - else - { - r.append (s, p, sp); - break; - } - } - - return r; - } - - // Check if the test command output matches the expected result (redirect - // value). Noop for redirect types other than none, here_*. - // - static bool - check_output (const path& pr, - const path& op, - const path& ip, - const redirect& rd, - const location& ll, - scope& sp, - bool diag, - const char* what) - { - auto input_info = [&ip, &ll] (diag_record& d) - { - if (non_empty (ip, ll)) - d << info << "stdin: " << ip; - }; - - auto output_info = [&what, &ll] (diag_record& d, - const path& p, - const char* prefix = "", - const char* suffix = "") - { - if (non_empty (p, ll)) - d << info << prefix << what << suffix << ": " << p; - else - d << info << prefix << what << suffix << " is empty"; - }; - - if (rd.type == redirect_type::none) - { - // Check that there is no output produced. - // - assert (!op.empty ()); - - if (!non_empty (op, ll)) - return true; - - if (diag) - { - diag_record d (error (ll)); - d << pr << " unexpectedly writes to " << what << - info << what << ": " << op; - - input_info (d); - - // Print cached output. - // - print_file (d, op, ll); - } - - // Fall through (to return false). - // - } - else if (rd.type == redirect_type::here_str_literal || - rd.type == redirect_type::here_doc_literal || - (rd.type == redirect_type::file && - rd.file.mode == redirect_fmode::compare)) - { - // The expected output is provided as a file or as a string. Save the - // string to a file in the later case. - // - assert (!op.empty ()); - - path eop; - - if (rd.type == redirect_type::file) - eop = normalize (rd.file.path, sp, ll); - else - { - eop = path (op + ".orig"); - save (eop, transform (rd.str, false, rd.modifiers, sp.root), ll); - sp.clean_special (eop); - } - - // Use the diff utility for comparison. - // - path dp ("diff"); - process_path pp (run_search (dp, true)); - - cstrings args {pp.recall_string (), "-u"}; - - // Ignore Windows newline fluff if that's what we are running on. - // - if (test_target (sp.root).class_ == "windows") - args.push_back ("--strip-trailing-cr"); - - args.push_back (eop.string ().c_str ()); - args.push_back (op.string ().c_str ()); - args.push_back (nullptr); - - if (verb >= 2) - print_process (args); - - try - { - // Save diff's stdout to a file for troubleshooting and for the - // optional (if not too large) printing (at the end of - // diagnostics). - // - path ep (op + ".diff"); - auto_fd efd; - - try - { - efd = fdopen (ep, fdopen_mode::out | fdopen_mode::create); - sp.clean_special (ep); - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << ep << ": " << e; - } - - // Diff utility prints the differences to stdout. But for the - // user it is a part of the test failure diagnostics so let's - // redirect stdout to stderr. - // - process p (pp, args.data (), 0, 2, efd.get ()); - efd.reset (); - - if (p.wait ()) - return true; - - assert (p.exit); - const process_exit& pe (*p.exit); - - // Note that both POSIX and GNU diff report error by exiting with - // the code > 1. - // - if (!pe.normal () || pe.code () > 1) - { - diag_record d (fail (ll)); - print_process (d, args); - d << " " << pe; - } - - // Output doesn't match the expected result. - // - if (diag) - { - diag_record d (error (ll)); - d << pr << " " << what << " doesn't match expected"; - - output_info (d, op); - output_info (d, eop, "expected "); - output_info (d, ep, "", " diff"); - input_info (d); - - print_file (d, ep, ll); - } - - // Fall through (to return false). - // - } - catch (const process_error& e) - { - error (ll) << "unable to execute " << pp << ": " << e; - - if (e.child) - exit (1); - - throw failed (); - } - } - else if (rd.type == redirect_type::here_str_regex || - rd.type == redirect_type::here_doc_regex) - { - // The overall plan is: - // - // 1. Create regex line string. While creating it's line characters - // transform regex lines according to the redirect modifiers. - // - // 2. Create line regex using the line string. If creation fails - // then save the (transformed) regex redirect to a file for - // troubleshooting. - // - // 3. Parse the output into the literal line string. - // - // 4. Match the output line string with the line regex. - // - // 5. If match fails save the (transformed) regex redirect to a file - // for troubleshooting. - // - using namespace regex; - - assert (!op.empty ()); - - // Create regex line string. - // - line_pool pool; - line_string rls; - const regex_lines rl (rd.regex); - - // Parse regex flags. - // - // When add support for new flags don't forget to update - // parse_regex(). - // - auto parse_flags = [] (const string& f) -> char_flags - { - char_flags r (char_flags::none); - - for (char c: f) - { - switch (c) - { - case 'd': r |= char_flags::idot; break; - case 'i': r |= char_flags::icase; break; - default: assert (false); // Error so should have been checked. - } - } - - return r; - }; - - // Return original regex line with the transformation applied. - // - auto line = [&rl, &rd, &sp] (const regex_line& l) -> string - { - string r; - if (l.regex) // Regex (possibly empty), - { - r += rl.intro; - r += transform (l.value, true, rd.modifiers, sp.root); - r += rl.intro; - r += l.flags; - } - else if (!l.special.empty ()) // Special literal. - r += rl.intro; - else // Textual literal. - r += transform (l.value, false, rd.modifiers, sp.root); - - r += l.special; - return r; - }; - - // Return regex line location. - // - // Note that we rely on the fact that the command and regex lines - // are always belong to the same testscript file. - // - auto loc = [&ll] (uint64_t line, uint64_t column) -> location - { - location r (ll); - r.line = line; - r.column = column; - return r; - }; - - // Save the regex to file for troubleshooting, return the file path - // it have been saved to. - // - // Note that we save the regex on line regex creation failure or if - // the program output doesn't match. - // - auto save_regex = [&op, &rl, &rd, &ll, &line] () -> path - { - path rp (op + ".regex"); - - // Encode here-document regex global flags if present as a file - // name suffix. For example if icase and idot flags are specified - // the name will look like: - // - // test/1/stdout.regex-di - // - if (rd.type == redirect_type::here_doc_regex && !rl.flags.empty ()) - rp += '-' + rl.flags; - - // Note that if would be more efficient to directly write chunks - // to file rather than to compose a string first. Hower we don't - // bother (about performance) for the sake of the code as we - // already failed. - // - string s; - for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); - i != e; ++i) - { - if (i != b) s += '\n'; - s += line (*i); - } - - save (rp, s, ll); - return rp; - }; - - // Finally create regex line string. - // - // Note that diagnostics doesn't refer to the program path as it is - // irrelevant to failures at this stage. - // - char_flags gf (parse_flags (rl.flags)); // Regex global flags. - - for (const auto& l: rl.lines) - { - if (l.regex) // Regex (with optional special characters). - { - line_char c; - - // Empty regex is a special case repesenting the blank line. - // - if (l.value.empty ()) - c = line_char ("", pool); - else - { - try - { - string s (transform (l.value, true, rd.modifiers, sp.root)); - - c = line_char ( - char_regex (s, gf | parse_flags (l.flags)), pool); - } - catch (const regex_error& e) - { - // Print regex_error description if meaningful. - // - diag_record d (fail (loc (l.line, l.column))); - - if (rd.type == redirect_type::here_str_regex) - d << "invalid " << what << " regex redirect" << e << - info << "regex: '" << line (l) << "'"; - else - d << "invalid char-regex in " << what << " regex redirect" - << e << - info << "regex line: '" << line (l) << "'"; - - d << endf; - } - } - - rls += c; // Append blank literal or regex line char. - } - else if (!l.special.empty ()) // Special literal. - { - // Literal can not be followed by special characters in the same - // line. - // - assert (l.value.empty ()); - } - else // Textual literal. - { - // Append literal line char. - // - rls += line_char ( - transform (l.value, false, rd.modifiers, sp.root), pool); - } - - for (char c: l.special) - { - if (line_char::syntax (c)) - rls += line_char (c); // Append special line char. - else - fail (loc (l.line, l.column)) - << "invalid syntax character '" << c << "' in " << what - << " regex redirect" << - info << "regex line: '" << line (l) << "'"; - } - } - - // Create line regex. - // - line_regex regex; - - try - { - regex = line_regex (move (rls), move (pool)); - } - catch (const regex_error& e) - { - // Note that line regex creation can not fail for here-string - // redirect as it doesn't have syntax line chars. That in - // particular means that end_line and end_column are meaningful. - // - assert (rd.type == redirect_type::here_doc_regex); - - diag_record d (fail (loc (rd.end_line, rd.end_column))); - - // Print regex_error description if meaningful. - // - d << "invalid " << what << " regex redirect" << e; - - output_info (d, save_regex (), "", " regex"); - } - - // Parse the output into the literal line string. - // - line_string ls; - - try - { - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (getline() failed to extract any character). - // - // Note that newlines are treated as line-chars separators. That - // in particular means that the trailing newline produces a blank - // line-char (empty literal). Empty output produces the zero-length - // line-string. - // - // Also note that we strip the trailing CR characters (otherwise - // can mismatch when cross-test). - // - ifdstream is (op, ifdstream::badbit); - is.peek (); // Sets eofbit for an empty stream. - - while (!is.eof ()) - { - string s; - getline (is, s); - - // It is safer to strip CRs in cycle, as msvcrt unexplainably - // adds too much trailing junk to the system_error descriptions, - // and so it can appear in programs output. For example: - // - // ...: Invalid data.\r\r\n - // - // Note that our custom operator<<(ostream&, const exception&) - // removes this junk. - // - while (!s.empty () && s.back () == '\r') - s.pop_back (); - - ls += line_char (move (s), regex.pool); - } - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << op << ": " << e; - } - - // Match the output with the regex. - // - if (regex_match (ls, regex)) // Doesn't throw. - return true; - - // Output doesn't match the regex. We save the regex to file for - // troubleshooting regardless of whether we print the diagnostics or - // not. We, however, register it for cleanup in the later case (the - // expression may still succeed, we can be evaluating the if - // condition, etc). - // - path rp (save_regex ()); - - if (diag) - { - diag_record d (error (ll)); - d << pr << " " << what << " doesn't match regex"; - - output_info (d, op); - output_info (d, rp, "", " regex"); - input_info (d); - - // Print cached output. - // - print_file (d, op, ll); - } - else - sp.clean_special (rp); - - // Fall through (to return false). - // - } - else // Noop. - return true; - - return false; - } + using namespace build2::script; bool default_runner:: test (scope& s) const @@ -703,7 +24,7 @@ namespace build2 void default_runner:: enter (scope& sp, const location&) { - context& ctx (sp.root.target_scope.ctx); + context& ctx (sp.context); auto df = make_diag_frame ( [&sp](const diag_record& dr) @@ -730,29 +51,25 @@ namespace build2 sp.parent == nullptr ? mkdir_buildignore ( ctx, - sp.wd_path, + *sp.work_dir.path, sp.root.target_scope.root_scope ()->root_extra->buildignore_file, 2) - : mkdir (sp.wd_path, 2)); + : mkdir (*sp.work_dir.path, 2)); if (r == mkdir_status::already_exists) - fail << "working directory " << sp.wd_path << " already exists" << + fail << diag_path (sp.work_dir) << " already exists" << info << "are tests stomping on each other's feet?"; // We don't change the current directory here but indicate that the // scope test commands will be executed in that directory. // if (verb >= 2) - text << "cd " << sp.wd_path; - - sp.clean ({cleanup_type::always, sp.wd_path}, true); + text << "cd " << *sp.work_dir.path; } void default_runner:: leave (scope& sp, const location& ll) { - context& ctx (sp.root.target_scope.ctx); - auto df = make_diag_frame ( [&sp](const diag_record& dr) { @@ -766,200 +83,30 @@ namespace build2 // if (common_.after == output_after::clean) { - // Note that we operate with normalized paths here. - // - // Remove special files. The order is not important as we don't - // expect directories here. - // - for (const auto& p: sp.special_cleanups) - { - // Remove the file if exists. Fail otherwise. - // - if (rmfile (ctx, p, 3) == rmfile_status::not_exist) - fail (ll) << "registered for cleanup special file " << p - << " does not exist"; - } - - // Remove files and directories in the order opposite to the order of - // cleanup registration. - // - for (const auto& c: reverse_iterate (sp.cleanups)) - { - cleanup_type t (c.type); - - // Skip whenever the path exists or not. - // - if (t == cleanup_type::never) - continue; - - const path& cp (c.path); + clean (sp, ll); - // Wildcard with the last component being '***' (without trailing - // separator) matches all files and sub-directories recursively as - // well as the start directories itself. So we will recursively - // remove the directories that match the parent (for the original - // path) directory wildcard. - // - bool recursive (cp.leaf ().representation () == "***"); - const path& p (!recursive ? cp : cp.directory ()); + context& ctx (sp.context); - // Remove files or directories using wildcard. - // - if (path_pattern (p)) - { - bool removed (false); + rmdir_status r ( + sp.parent == nullptr + ? rmdir_buildignore (ctx, + *sp.work_dir.path, + sp.root.target_scope.root_scope ()-> + root_extra->buildignore_file, + 2) + : rmdir (ctx, *sp.work_dir.path, 2)); - auto rm = [&cp, recursive, &removed, &sp, &ll, &ctx] - (path&& pe, const string&, bool interm) - { - if (!interm) - { - // While removing the entry we can get not_exist due to - // racing conditions, but that's ok if somebody did our job. - // Note that we still set the removed flag to true in this - // case. - // - removed = true; // Will be meaningless on failure. - - if (pe.to_directory ()) - { - dir_path d (path_cast<dir_path> (pe)); - - if (!recursive) - { - rmdir_status r (rmdir (ctx, d, 3)); - - if (r != rmdir_status::not_empty) - return true; - - diag_record dr (fail (ll)); - dr << "registered for cleanup directory " << d - << " is not empty"; - - print_dir (dr, d, ll); - dr << info << "wildcard: '" << cp << "'"; - } - else - { - // Don't remove the working directory (it will be removed - // by the dedicated cleanup). - // - // Cast to uint16_t to avoid ambiguity with - // libbutl::rmdir_r(). - // - rmdir_status r (rmdir_r (ctx, d, d != sp.wd_path, 3)); - - if (r != rmdir_status::not_empty) - return true; - - // The directory is unlikely to be current but let's keep - // for completeness. - // - fail (ll) << "registered for cleanup wildcard " << cp - << " matches the current directory"; - } - } - else - rmfile (ctx, pe, 3); - } - - return true; - }; - - // Note that here we rely on the fact that recursive iterating - // goes depth-first (which make sense for the cleanup). - // - try - { - // Doesn't follow symlinks. - // - path_search (p, - rm, - dir_path () /* start */, - path_match_flags::none); - } - catch (const system_error& e) - { - fail (ll) << "unable to cleanup wildcard " << cp << ": " << e; - } - - // Removal of no filesystem entries is not an error for 'maybe' - // cleanup type. - // - if (removed || t == cleanup_type::maybe) - continue; - - fail (ll) << "registered for cleanup wildcard " << cp - << " doesn't match any " - << (recursive - ? "path" - : p.to_directory () - ? "directory" - : "file"); - } - - // Remove the directory if exists and empty. Fail otherwise. - // Removal of non-existing directory is not an error for 'maybe' - // cleanup type. - // - if (p.to_directory ()) - { - dir_path d (path_cast<dir_path> (p)); - bool wd (d == sp.wd_path); - - // Trace the scope working directory removal with the verbosity - // level 2 (that was used for its creation). For other - // directories use level 3 (as for other cleanups). - // - int v (wd ? 2 : 3); - - // Don't remove the working directory for the recursive cleanup - // (it will be removed by the dedicated one). - // - // Note that the root working directory contains the - // .buildignore file (see above). - // - // @@ If 'd' is a file then will fail with a diagnostics having - // no location info. Probably need to add an optional location - // parameter to rmdir() function. The same problem exists for - // a file cleanup when try to rmfile() directory instead of - // file. - // - rmdir_status r ( - recursive - ? rmdir_r (ctx, d, !wd, static_cast <uint16_t> (v)) - : (wd && sp.parent == nullptr - ? rmdir_buildignore ( - ctx, - d, - sp.root.target_scope.root_scope ()->root_extra-> - buildignore_file, - v) - : rmdir (ctx, d, v))); - - if (r == rmdir_status::success || - (r == rmdir_status::not_exist && t == cleanup_type::maybe)) - continue; - - diag_record dr (fail (ll)); - dr << "registered for cleanup directory " << d - << (r == rmdir_status::not_exist - ? " does not exist" - : !recursive - ? " is not empty" - : " is current"); + if (r != rmdir_status::success) + { + diag_record dr (fail (ll)); - if (r == rmdir_status::not_empty) - print_dir (dr, d, ll); - } + dr << diag_path (sp.work_dir) + << (r == rmdir_status::not_exist + ? " does not exist" + : " is not empty"); - // Remove the file if exists. Fail otherwise. Removal of - // non-existing file is not an error for 'maybe' cleanup type. - // - if (rmfile (ctx, p, 3) == rmfile_status::not_exist && - t == cleanup_type::always) - fail (ll) << "registered for cleanup file " << p - << " does not exist"; + if (r == rmdir_status::not_empty) + print_dir (dr, *sp.work_dir.path, ll); } } @@ -968,1102 +115,14 @@ namespace build2 // if (verb >= 2) text << "cd " << (sp.parent != nullptr - ? sp.parent->wd_path - : sp.wd_path.directory ()); - } - - // The exit pseudo-builtin: exit the current scope successfully, or - // print the diagnostics and exit the current scope and all the outer - // scopes unsuccessfully. Always throw exit_scope exception. - // - // exit [<diagnostics>] - // - [[noreturn]] static void - exit_builtin (const strings& args, const location& ll) - { - auto i (args.begin ()); - auto e (args.end ()); - - // Process arguments. - // - // If no argument is specified, then exit successfully. Otherwise, - // print the diagnostics and exit unsuccessfully. - // - if (i == e) - throw exit_scope (true); - - const string& s (*i++); - - if (i != e) - fail (ll) << "unexpected argument '" << *i << "'"; - - error (ll) << s; - throw exit_scope (false); - } - - // The set pseudo-builtin: set variable from the stdin input. - // - // set [-e|--exact] [(-n|--newline)|(-w|--whitespace)] [<attr>] <var> - // - static void - set_builtin (scope& sp, - const strings& args, - auto_fd in, - const location& ll) - { - try - { - // Do not throw when eofbit is set (end of stream reached), and - // when failbit is set (read operation failed to extract any - // character). - // - ifdstream cin (move (in), ifdstream::badbit); - - // Parse arguments. - // - cli::vector_scanner scan (args); - set_options ops (scan); - - if (ops.whitespace () && ops.newline ()) - fail (ll) << "both -n|--newline and -w|--whitespace specified"; - - if (!scan.more ()) - fail (ll) << "missing variable name"; - - string a (scan.next ()); // Either attributes or variable name. - const string* ats (!scan.more () ? nullptr : &a); - const string& vname (!scan.more () ? a : scan.next ()); - - if (scan.more ()) - fail (ll) << "unexpected argument '" << scan.next () << "'"; - - if (ats != nullptr && ats->empty ()) - fail (ll) << "empty variable attributes"; - - if (vname.empty ()) - fail (ll) << "empty variable name"; - - // Read the input. - // - cin.peek (); // Sets eofbit for an empty stream. - - names ns; - while (!cin.eof ()) - { - // Read next element that depends on the whitespace mode being - // enabled or not. For the later case it also make sense to strip - // the trailing CRs that can appear while cross-testing Windows - // target or as a part of msvcrt junk production (see above). - // - string s; - if (ops.whitespace ()) - cin >> s; - else - { - getline (cin, s); - - while (!s.empty () && s.back () == '\r') - s.pop_back (); - } - - // If failbit is set then we read nothing into the string as eof is - // reached. That in particular means that the stream has trailing - // whitespaces (possibly including newlines) if the whitespace mode - // is enabled, or the trailing newline otherwise. If so then - // we append the "blank" to the variable value in the exact mode - // prior to bailing out. - // - if (cin.fail ()) - { - if (ops.exact ()) - { - if (ops.whitespace () || ops.newline ()) - ns.emplace_back (move (s)); // Reuse empty string. - else if (ns.empty ()) - ns.emplace_back ("\n"); - else - ns[0].value += '\n'; - } - - break; - } - - if (ops.whitespace () || ops.newline () || ns.empty ()) - ns.emplace_back (move (s)); - else - { - ns[0].value += '\n'; - ns[0].value += s; - } - } - - cin.close (); - - // Set the variable value and attributes. Note that we need to aquire - // unique lock before potentially changing the script's variable - // pool. The obtained variable reference can safelly be used with no - // locking as the variable pool is an associative container - // (underneath) and we are only adding new variables into it. - // - ulock ul (sp.root.var_pool_mutex); - const variable& var (sp.root.var_pool.insert (move (vname))); - ul.unlock (); - - value& lhs (sp.assign (var)); - - // If there are no attributes specified then the variable assignment - // is straightforward. Otherwise we will use the build2 parser helper - // function. - // - if (ats == nullptr) - lhs.assign (move (ns), &var); - else - { - // If there is an error in the attributes string, our diagnostics - // will look like this: - // - // <attributes>:1:1 error: unknown value attribute x - // testscript:10:1 info: while parsing attributes '[x]' - // - auto df = make_diag_frame ( - [ats, &ll](const diag_record& dr) - { - dr << info (ll) << "while parsing attributes '" << *ats << "'"; - }); - - parser p (sp.root.test_target.ctx); - p.apply_value_attributes (&var, - lhs, - value (move (ns)), - *ats, - token_type::assign, - path_name ("<attributes>")); - } - } - catch (const io_error& e) - { - fail (ll) << "set: " << e; - } - catch (const cli::exception& e) - { - fail (ll) << "set: " << e; - } - } - - // Sorted array of builtins that support filesystem entries cleanup. - // - static const char* cleanup_builtins[] = { - "cp", "ln", "mkdir", "mv", "touch"}; - - static inline bool - cleanup_builtin (const string& name) - { - return binary_search ( - cleanup_builtins, - cleanup_builtins + - sizeof (cleanup_builtins) / sizeof (*cleanup_builtins), - name); - } - - static bool - run_pipe (scope& sp, - command_pipe::const_iterator bc, - command_pipe::const_iterator ec, - auto_fd ifd, - size_t ci, size_t li, const location& ll, - bool diag) - { - if (bc == ec) // End of the pipeline. - return true; - - // The overall plan is to run the first command in the pipe, reading - // its input from the file descriptor passed (or, for the first - // command, according to stdin redirect specification) and redirecting - // its output to the right-hand part of the pipe recursively. Fail if - // the right-hand part fails. Otherwise check the process exit code, - // match stderr (and stdout for the last command in the pipe) according - // to redirect specification(s) and fail if any of the above fails. - // - const command& c (*bc); - - // Register the command explicit cleanups. Verify that the path being - // cleaned up is a sub-path of the testscript working directory. Fail - // if this is not the case. - // - for (const auto& cl: c.cleanups) - { - const path& p (cl.path); - path np (normalize (p, sp, ll)); - - const string& ls (np.leaf ().string ()); - bool wc (ls == "*" || ls == "**" || ls == "***"); - const path& cp (wc ? np.directory () : np); - const dir_path& wd (sp.root.wd_path); - - if (!cp.sub (wd)) - fail (ll) << (wc - ? "wildcard" - : p.to_directory () - ? "directory" - : "file") - << " cleanup " << p << " is out of working directory " - << wd; - - sp.clean ({cl.type, move (np)}, false); - } - - const redirect& in (c.in.effective ()); - const redirect& out (c.out.effective ()); - const redirect& err (c.err.effective ()); - bool eq (c.exit.comparison == exit_comparison::eq); - - // If stdin file descriptor is not open then this is the first pipeline - // command. - // - bool first (ifd.get () == -1); - - command_pipe::const_iterator nc (bc + 1); - bool last (nc == ec); - - const string& program (c.program.string ()); - - // Prior to opening file descriptors for command input/output - // redirects let's check if the command is the exit builtin. Being a - // builtin syntactically it differs from the regular ones in a number - // of ways. It doesn't communicate with standard streams, so - // redirecting them is meaningless. It may appear only as a single - // command in a pipeline. It doesn't return any value and stops the - // scope execution, so checking its exit status is meaningless as - // well. That all means we can short-circuit here calling the builtin - // and bailing out right after that. Checking that the user didn't - // specify any redirects or exit code check sounds like a right thing - // to do. - // - if (program == "exit") - { - // In case the builtin is erroneously pipelined from the other - // command, we will close stdin gracefully (reading out the stream - // content), to make sure that the command doesn't print any - // unwanted diagnostics about IO operation failure. - // - // Note that dtor will ignore any errors (which is what we want). - // - ifdstream is (move (ifd), fdstream_mode::skip); - - if (!first || !last) - fail (ll) << "exit builtin must be the only pipe command"; - - if (in.type != redirect_type::none) - fail (ll) << "exit builtin stdin cannot be redirected"; - - if (out.type != redirect_type::none) - fail (ll) << "exit builtin stdout cannot be redirected"; - - if (err.type != redirect_type::none) - fail (ll) << "exit builtin stderr cannot be redirected"; - - // We can't make sure that there is no exit code check. Let's, at - // least, check that non-zero code is not expected. - // - if (eq != (c.exit.code == 0)) - fail (ll) << "exit builtin exit code cannot be non-zero"; - - exit_builtin (c.arguments, ll); // Throws exit_scope exception. - } - - // Create a unique path for a command standard stream cache file. - // - auto std_path = [&sp, &ci, &li, &ll] (const char* n) -> path - { - path p (n); - - // 0 if belongs to a single-line test scope, otherwise is the - // command line number (start from one) in the test scope. - // - if (li > 0) - p += "-" + to_string (li); - - // 0 if belongs to a single-command expression, otherwise is the - // command number (start from one) in the expression. - // - // Note that the name like stdin-N can relate to N-th command of a - // single-line test or to N-th single-command line of multi-line - // test. These cases are mutually exclusive and so are unambiguous. - // - if (ci > 0) - p += "-" + to_string (ci); - - return normalize (move (p), sp, ll); - }; - - // If this is the first pipeline command, then open stdin descriptor - // according to the redirect specified. - // - path isp; - - if (!first) - assert (in.type == redirect_type::none); // No redirect expected. - else - { - // Open a file for passing to the command stdin. - // - auto open_stdin = [&isp, &ifd, &ll] () - { - assert (!isp.empty ()); - - try - { - ifd = fdopen (isp, fdopen_mode::in); - } - catch (const io_error& e) - { - fail (ll) << "unable to read " << isp << ": " << e; - } - }; - - switch (in.type) - { - case redirect_type::pass: - { - try - { - ifd = fddup (0); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate stdin: " << e; - } - - break; - } - - case redirect_type::none: - // Somehow need to make sure that the child process doesn't read - // from stdin. That is tricky to do in a portable way. Here we - // suppose that the program which (erroneously) tries to read some - // data from stdin being redirected to /dev/null fails not being - // able to read the expected data, and so the test doesn't pass - // through. - // - // @@ Obviously doesn't cover the case when the process reads - // whatever available. - // @@ Another approach could be not to redirect stdin and let the - // process to hang which can be interpreted as a test failure. - // @@ Both ways are quite ugly. Is there some better way to do - // this? - // - // Fall through. - // - case redirect_type::null: - { - ifd = open_null (); - break; - } - - case redirect_type::file: - { - isp = normalize (in.file.path, sp, ll); - - open_stdin (); - break; - } - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - // We could write to the command stdin directly but instead will - // cache the data for potential troubleshooting. - // - isp = std_path ("stdin"); - - save ( - isp, transform (in.str, false, in.modifiers, sp.root), ll); - - sp.clean_special (isp); - - open_stdin (); - break; - } - case redirect_type::trace: - case redirect_type::merge: - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - case redirect_type::here_doc_ref: assert (false); break; - } - } - - assert (ifd.get () != -1); - - // Prior to opening file descriptors for command outputs redirects - // let's check if the command is the set builtin. Being a builtin - // syntactically it differs from the regular ones in a number of ways. - // It either succeeds or terminates abnormally, so redirecting stderr - // is meaningless. It also never produces any output and may appear - // only as a terminal command in a pipeline. That means we can - // short-circuit here calling the builtin and returning right after - // that. Checking that the user didn't specify any meaningless - // redirects or exit code check sounds as a right thing to do. - // - if (program == "set") - { - if (!last) - fail (ll) << "set builtin must be the last pipe command"; - - if (out.type != redirect_type::none) - fail (ll) << "set builtin stdout cannot be redirected"; - - if (err.type != redirect_type::none) - fail (ll) << "set builtin stderr cannot be redirected"; - - if (eq != (c.exit.code == 0)) - fail (ll) << "set builtin exit code cannot be non-zero"; - - set_builtin (sp, c.arguments, move (ifd), ll); - return true; - } - - // Open a file for command output redirect if requested explicitly - // (file overwrite/append redirects) or for the purpose of the output - // validation (none, here_*, file comparison redirects), register the - // file for cleanup, return the file descriptor. Interpret trace - // redirect according to the verbosity level (as null if below 2, as - // pass otherwise). Return nullfd, standard stream descriptor duplicate - // or null-device descriptor for merge, pass or null redirects - // respectively (not opening any file). - // - auto open = [&sp, &ll, &std_path] (const redirect& r, - int dfd, - path& p) -> auto_fd - { - assert (dfd == 1 || dfd == 2); - const char* what (dfd == 1 ? "stdout" : "stderr"); - - fdopen_mode m (fdopen_mode::out | fdopen_mode::create); - - redirect_type rt (r.type != redirect_type::trace - ? r.type - : verb < 2 - ? redirect_type::null - : redirect_type::pass); - switch (rt) - { - case redirect_type::pass: - { - try - { - return fddup (dfd); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate " << what << ": " << e; - } - } - - case redirect_type::null: return open_null (); - - // Duplicate the paired file descriptor later. - // - case redirect_type::merge: return nullfd; - - case redirect_type::file: - { - // For the cmp mode the user-provided path refers a content to - // match against, rather than a content to be produced (as for - // overwrite and append modes). And so for cmp mode we redirect - // the process output to a temporary file. - // - p = r.file.mode == redirect_fmode::compare - ? std_path (what) - : normalize (r.file.path, sp, ll); - - m |= r.file.mode == redirect_fmode::append - ? fdopen_mode::at_end - : fdopen_mode::truncate; - - break; - } - - case redirect_type::none: - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - p = std_path (what); - m |= fdopen_mode::truncate; - break; - } - - case redirect_type::trace: - case redirect_type::here_doc_ref: assert (false); break; - } - - auto_fd fd; - - try - { - fd = fdopen (p, m); - - if ((m & fdopen_mode::at_end) != fdopen_mode::at_end) - { - if (rt == redirect_type::file) - sp.clean ({cleanup_type::always, p}, true); - else - sp.clean_special (p); - } - } - catch (const io_error& e) - { - fail (ll) << "unable to write to " << p << ": " << e; - } - - return fd; - }; - - path osp; - fdpipe ofd; - - // If this is the last command in the pipeline than redirect the - // command process stdout to a file. Otherwise create a pipe and - // redirect the stdout to the write-end of the pipe. The read-end will - // be passed as stdin for the next command in the pipeline. - // - // @@ Shouldn't we allow the here-* and file output redirects for a - // command with pipelined output? Say if such redirect is present - // then the process output is redirected to a file first (as it is - // when no output pipelined), and only after the process exit code - // and the output are validated the next command in the pipeline is - // executed taking the file as an input. This could be usefull for - // test failures investigation and for tests "tightening". - // - if (last) - ofd.out = open (out, 1, osp); - else - { - assert (out.type == redirect_type::none); // No redirect expected. - ofd = open_pipe (); - } - - path esp; - auto_fd efd (open (err, 2, esp)); - - // Merge standard streams. - // - bool mo (out.type == redirect_type::merge); - if (mo || err.type == redirect_type::merge) - { - auto_fd& self (mo ? ofd.out : efd); - auto_fd& other (mo ? efd : ofd.out); - - try - { - assert (self.get () == -1 && other.get () != -1); - self = fddup (other.get ()); - } - catch (const io_error& e) - { - fail (ll) << "unable to duplicate " << (mo ? "stderr" : "stdout") - << ": " << e; - } - } - - // All descriptors should be open to the date. - // - assert (ofd.out.get () != -1 && efd.get () != -1); - - optional<process_exit> exit; - builtin_function* bf (builtins.find (program)); - - bool success; - - auto process_args = [&c] () -> cstrings - { - cstrings args {c.program.string ().c_str ()}; - - for (const auto& a: c.arguments) - args.push_back (a.c_str ()); - - args.push_back (nullptr); - return args; - }; - - if (bf != nullptr) - { - // Execute the builtin. - // - if (verb >= 2) - print_process (process_args ()); - - // Some of the testscript builtins (cp, mkdir, etc) extend libbutl - // builtins (via callbacks) registering/moving cleanups for the - // filesystem entries they create/move, unless explicitly requested - // not to do so via the --no-cleanup option. - // - // Let's "wrap up" the cleanup-related flags into the single object - // to rely on "small function object" optimization. - // - struct cleanup - { - // Whether the cleanups are enabled for the builtin. Can be set to - // false by the parse_option callback if --no-cleanup is - // encountered. - // - bool enabled = true; - - // Whether to register cleanup for a filesystem entry being - // created/updated depending on its existence. Calculated by the - // create pre-hook and used by the subsequent post-hook. - // - bool add; - - // Whether to move existing cleanups for the filesystem entry - // being moved, rather than to erase them. Calculated by the move - // pre-hook and used by the subsequent post-hook. - // - bool move; - }; - - // nullopt if the builtin doesn't support cleanups. - // - optional<cleanup> cln; - - if (cleanup_builtin (program)) - cln = cleanup (); - - builtin_callbacks bcs { - - // create - // - // Unless cleanups are suppressed, test that the filesystem entry - // doesn't exist (pre-hook) and, if that's the case, register the - // cleanup for the newly created filesystem entry (post-hook). - // - [&sp, &cln] (const path& p, bool pre) - { - // Cleanups must be supported by a filesystem entry-creating - // builtin. - // - assert (cln); - - if (cln->enabled) - { - if (pre) - cln->add = !butl::entry_exists (p); - else if (cln->add) - sp.clean ({cleanup_type::always, p}, true /* implicit */); - } - }, - - // move - // - // Validate the source and destination paths (pre-hook) and, - // unless suppressed, adjust the cleanups that are sub-paths of - // the source path (post-hook). - // - [&sp, &cln] - (const path& from, const path& to, bool force, bool pre) - { - // Cleanups must be supported by a filesystem entry-moving - // builtin. - // - assert (cln); - - if (pre) - { - const dir_path& wd (sp.wd_path); - const dir_path& rwd (sp.root.wd_path); - - auto fail = [] (const string& d) {throw runtime_error (d);}; - - if (!from.sub (rwd) && !force) - fail ("'" + from.representation () + - "' is out of working directory '" + rwd.string () + - "'"); - - auto check_wd = [&wd, fail] (const path& p) - { - if (wd.sub (path_cast<dir_path> (p))) - fail ("'" + p.string () + - "' contains test working directory '" + - wd.string () + "'"); - }; - - check_wd (from); - check_wd (to); - - // Unless cleanups are disabled, "move" the matching cleanups - // if the destination path doesn't exist and it is a sub-path - // of the working directory and just remove them otherwise. - // - if (cln->enabled) - cln->move = !butl::entry_exists (to) && to.sub (rwd); - } - else if (cln->enabled) - { - // Move or remove the matching cleanups (see above). - // - // Note that it's not enough to just change the cleanup paths. - // We also need to make sure that these cleanups happen before - // the destination directory (or any of its parents) cleanup, - // that is potentially registered. To achieve that we can just - // relocate these cleanup entries to the end of the list, - // preserving their mutual order. Remember that cleanups in - // the list are executed in the reversed order. - // - cleanups cs; - - // Remove the source path sub-path cleanups from the list, - // adjusting/caching them if required (see above). - // - for (auto i (sp.cleanups.begin ()); i != sp.cleanups.end (); ) - { - build2::test::script::cleanup& c (*i); - path& p (c.path); - - if (p.sub (from)) - { - if (cln->move) - { - // Note that we need to preserve the cleanup path - // trailing separator which indicates the removal - // method. Also note that leaf(), in particular, does - // that. - // - p = p != from - ? to / p.leaf (path_cast<dir_path> (from)) - : p.to_directory () - ? path_cast<dir_path> (to) - : to; - - cs.push_back (move (c)); - } - - i = sp.cleanups.erase (i); - } - else - ++i; - } - - // Re-insert the adjusted cleanups at the end of the list. - // - sp.cleanups.insert (sp.cleanups.end (), - make_move_iterator (cs.begin ()), - make_move_iterator (cs.end ())); - - } - }, - - // remove - // - // Validate the filesystem entry path (pre-hook). - // - [&sp] (const path& p, bool force, bool pre) - { - if (pre) - { - const dir_path& wd (sp.wd_path); - const dir_path& rwd (sp.root.wd_path); - - auto fail = [] (const string& d) {throw runtime_error (d);}; - - if (!p.sub (rwd) && !force) - fail ("'" + p.representation () + - "' is out of working directory '" + rwd.string () + - "'"); - - if (wd.sub (path_cast<dir_path> (p))) - fail ("'" + p.string () + - "' contains test working directory '" + wd.string () + - "'"); - } - }, - - // parse_option - // - [&cln] (const strings& args, size_t i) - { - // Parse --no-cleanup, if it is supported by the builtin. - // - if (cln && args[i] == "--no-cleanup") - { - cln->enabled = false; - return 1; - } - - return 0; - }, - - // sleep - // - // Deactivate the thread before going to sleep. - // - [&sp] (const duration& d) - { - // If/when required we could probably support the precise sleep - // mode (e.g., via an option). - // - sp.root.test_target.ctx.sched.sleep (d); - } - }; - - try - { - uint8_t r; // Storage. - builtin b (bf (r, - c.arguments, - move (ifd), move (ofd.out), move (efd), - sp.wd_path, - bcs)); - - success = run_pipe (sp, - nc, - ec, - move (ofd.in), - ci + 1, li, ll, diag); - - exit = process_exit (b.wait ()); - } - catch (const system_error& e) - { - fail (ll) << "unable to execute " << c.program << " builtin: " - << e << endf; - } - } - else - { - // Execute the process. - // - cstrings args (process_args ()); - - // Resolve the relative not simple program path against the scope's - // working directory. The simple one will be left for the process - // path search machinery. Also strip the potential leading `^`, - // indicating that this is an external program rather than a - // builtin. - // - path p; - - try - { - p = path (args[0]); - - if (p.relative ()) - { - auto program = [&p, &args] (path pp) - { - p = move (pp); - args[0] = p.string ().c_str (); - }; - - if (p.simple ()) - { - const string& s (p.string ()); - - // Don't end up with an empty path. - // - if (s.size () > 1 && s[0] == '^') - program (path (s, 1, s.size () - 1)); - } - else - program (sp.wd_path / p); - } - } - catch (const invalid_path& e) - { - fail (ll) << "invalid program path " << e.path; - } - - try - { - process_path pp (process::path_search (args[0])); - - // Note: the builtin-escaping character '^' is not printed. - // - if (verb >= 2) - print_process (args); - - process pr ( - pp, - args.data (), - {ifd.get (), -1}, process::pipe (ofd), {-1, efd.get ()}, - sp.wd_path.string ().c_str ()); - - ifd.reset (); - ofd.out.reset (); - efd.reset (); - - success = run_pipe (sp, - nc, - ec, - move (ofd.in), - ci + 1, li, ll, diag); - - pr.wait (); - - exit = move (pr.exit); - } - catch (const process_error& e) - { - error (ll) << "unable to execute " << args[0] << ": " << e; - - if (e.child) - std::exit (1); - - throw failed (); - } - } - - assert (exit); - - // If the righ-hand side pipeline failed than the whole pipeline fails, - // and no further checks are required. - // - if (!success) - return false; - - const path& pr (c.program); - - // If there is no valid exit code available by whatever reason then we - // print the proper diagnostics, dump stderr (if cached and not too - // large) and fail the whole test. Otherwise if the exit code is not - // correct then we print diagnostics if requested and fail the - // pipeline. - // - bool valid (exit->normal ()); - - // On Windows the exit code can be out of the valid codes range being - // defined as uint16_t. - // -#ifdef _WIN32 - if (valid) - valid = exit->code () < 256; -#endif - - success = valid && eq == (exit->code () == c.exit.code); - - if (!valid || (!success && diag)) - { - // In the presense of a valid exit code we print the diagnostics and - // return false rather than throw. - // - diag_record d (valid ? error (ll) : fail (ll)); - - if (!exit->normal ()) - d << pr << " " << *exit; - else - { - uint16_t ec (exit->code ()); // Make sure is printed as integer. - - if (!valid) - d << pr << " exit code " << ec << " out of 0-255 range"; - else if (!success) - { - if (diag) - d << pr << " exit code " << ec << (eq ? " != " : " == ") - << static_cast<uint16_t> (c.exit.code); - } - else - assert (false); - } - - if (non_empty (esp, ll)) - d << info << "stderr: " << esp; - - if (non_empty (osp, ll)) - d << info << "stdout: " << osp; - - if (non_empty (isp, ll)) - d << info << "stdin: " << isp; - - // Print cached stderr. - // - print_file (d, esp, ll); - } - - // If exit code is correct then check if the standard outputs match the - // expectations. Note that stdout is only redirected to file for the - // last command in the pipeline. - // - // The thinking behind matching stderr first is that if it mismatches, - // then the program probably misbehaves (executes wrong functionality, - // etc) in which case its stdout doesn't really matter. - // - if (success) - success = - check_output (pr, esp, isp, err, ll, sp, diag, "stderr") && - (!last || - check_output (pr, osp, isp, out, ll, sp, diag, "stdout")); - - return success; - } - - static bool - run_expr (scope& sp, - const command_expr& expr, - size_t li, const location& ll, - bool diag) - { - // Print test id once per test expression. - // - auto df = make_diag_frame ( - [&sp](const diag_record& dr) - { - // Let's not depend on how the path representation can be improved - // for readability on printing. - // - dr << info << "test id: " << sp.id_path.posix_string (); - }); - - // Commands are numbered sequentially throughout the expression - // starting with 1. Number 0 means the command is a single one. - // - size_t ci (expr.size () == 1 && expr.back ().pipe.size () == 1 - ? 0 - : 1); - - // If there is no ORs to the right of a pipe then the pipe failure is - // fatal for the whole expression. In particular, the pipe must print - // the diagnostics on failure (if generally allowed). So we find the - // pipe that "switches on" the diagnostics potential printing. - // - command_expr::const_iterator trailing_ands; // Undefined if diag is - // disallowed. - if (diag) - { - auto i (expr.crbegin ()); - for (; i != expr.crend () && i->op == expr_operator::log_and; ++i) ; - trailing_ands = i.base (); - } - - bool r (false); - bool print (false); - - for (auto b (expr.cbegin ()), i (b), e (expr.cend ()); i != e; ++i) - { - if (diag && i + 1 == trailing_ands) - print = true; - - const command_pipe& p (i->pipe); - bool or_op (i->op == expr_operator::log_or); - - // Short-circuit if the pipe result must be OR-ed with true or AND-ed - // with false. - // - if (!((or_op && r) || (!or_op && !r))) - r = run_pipe ( - sp, p.begin (), p.end (), auto_fd (), ci, li, ll, print); - - ci += p.size (); - } - - return r; + ? *sp.parent->work_dir.path + : sp.work_dir.path->directory ()); } void default_runner:: run (scope& sp, const command_expr& expr, command_type ct, - size_t li, - const location& ll) + size_t li, const location& ll) { // Noop for teardown commands if keeping tests output is requested. // @@ -2085,8 +144,18 @@ namespace build2 text << ": " << c << expr; } - if (!run_expr (sp, expr, li, ll, true)) - throw failed (); // Assume diagnostics is already printed. + // Print test id once per test expression. + // + auto df = make_diag_frame ( + [&sp](const diag_record& dr) + { + // Let's not depend on how the path representation can be improved + // for readability on printing. + // + dr << info << "test id: " << sp.id_path.posix_string (); + }); + + build2::script::run (sp, expr, li, ll); } bool default_runner:: @@ -2097,7 +166,18 @@ namespace build2 if (verb >= 3) text << ": ?" << expr; - return run_expr (sp, expr, li, ll, false); + // Print test id once per test expression. + // + auto df = make_diag_frame ( + [&sp](const diag_record& dr) + { + // Let's not depend on how the path representation can be improved + // for readability on printing. + // + dr << info << "test id: " << sp.id_path.posix_string (); + }); + + return build2::script::run_if (sp, expr, li, ll); } } } diff --git a/libbuild2/test/script/runner.hxx b/libbuild2/test/script/runner.hxx index af37f56..22cae4e 100644 --- a/libbuild2/test/script/runner.hxx +++ b/libbuild2/test/script/runner.hxx @@ -7,6 +7,8 @@ #include <libbuild2/types.hxx> #include <libbuild2/utility.hxx> +#include <libbuild2/script/run.hxx> // exit + #include <libbuild2/test/script/script.hxx> namespace build2 @@ -17,18 +19,7 @@ namespace build2 namespace script { - // An exception that can be thrown by a runner to exit the scope (for - // example, as a result of executing the exit builtin). The status - // indicates whether the scope should be considered to have succeeded - // or failed. - // - struct exit_scope - { - bool status; - - explicit - exit_scope (bool s): status (s) {} - }; + using exit_scope = build2::script::exit; class runner { diff --git a/libbuild2/test/script/script.cxx b/libbuild2/test/script/script.cxx index 79b8bca..34d4723 100644 --- a/libbuild2/test/script/script.cxx +++ b/libbuild2/test/script/script.cxx @@ -8,6 +8,8 @@ #include <libbuild2/target.hxx> #include <libbuild2/algorithm.hxx> +#include <libbuild2/test/script/parser.hxx> + using namespace std; namespace build2 @@ -16,414 +18,60 @@ namespace build2 { namespace script { - ostream& - operator<< (ostream& o, line_type lt) - { - const char* s (nullptr); - - switch (lt) - { - case line_type::var: s = "variable"; break; - case line_type::cmd: s = "command"; break; - case line_type::cmd_if: s = "'if'"; break; - case line_type::cmd_ifn: s = "'if!'"; break; - case line_type::cmd_elif: s = "'elif'"; break; - case line_type::cmd_elifn: s = "'elif!'"; break; - case line_type::cmd_else: s = "'else'"; break; - case line_type::cmd_end: s = "'end'"; break; - } - - return o << s; - } - - // Quote if empty or contains spaces or any of the special characters. - // Note that we use single quotes since double quotes still allow - // expansion. - // - // @@ What if it contains single quotes? - // - static void - to_stream_q (ostream& o, const string& s) - { - if (s.empty () || s.find_first_of (" |&<>=\\\"") != string::npos) - o << '\'' << s << '\''; - else - o << s; - }; - - void - to_stream (ostream& o, const command& c, command_to_stream m) - { - auto print_path = [&o] (const path& p) - { - using build2::operator<<; - - ostringstream s; - stream_verb (s, stream_verb (o)); - s << p; - - to_stream_q (o, s.str ()); - }; - - auto print_redirect = - [&o, print_path] (const redirect& r, const char* prefix) - { - o << ' ' << prefix; - - size_t n (string::traits_type::length (prefix)); - assert (n > 0); - - char d (prefix[n - 1]); // Redirect direction. - - switch (r.type) - { - case redirect_type::none: assert (false); break; - case redirect_type::pass: o << '|'; break; - case redirect_type::null: o << '-'; break; - case redirect_type::trace: o << '!'; break; - case redirect_type::merge: o << '&' << r.fd; break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - bool doc (r.type == redirect_type::here_doc_literal); - - // For here-document add another '>' or '<'. Note that here end - // marker never needs to be quoted. - // - if (doc) - o << d; - - o << r.modifiers; - - if (doc) - o << r.end; - else - { - const string& v (r.str); - to_stream_q (o, - r.modifiers.find (':') == string::npos - ? string (v, 0, v.size () - 1) // Strip newline. - : v); - } - - break; - } - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - bool doc (r.type == redirect_type::here_doc_regex); - - // For here-document add another '>' or '<'. Note that here end - // marker never needs to be quoted. - // - if (doc) - o << d; - - o << r.modifiers; - - const regex_lines& re (r.regex); - - if (doc) - o << re.intro + r.end + re.intro + re.flags; - else - { - assert (!re.lines.empty ()); // Regex can't be empty. - - regex_line l (re.lines[0]); - to_stream_q (o, re.intro + l.value + re.intro + l.flags); - } - - break; - } - - case redirect_type::file: - { - // For stdin or stdout-comparison redirect add '>>' or '<<' (and - // so make it '<<<' or '>>>'). Otherwise add '+' or '=' (and so - // make it '>+' or '>='). - // - if (d == '<' || r.file.mode == redirect_fmode::compare) - o << d << d; - else - o << (r.file.mode == redirect_fmode::append ? '+' : '='); - - print_path (r.file.path); - break; - } - - case redirect_type::here_doc_ref: assert (false); break; - } - }; - - auto print_doc = [&o] (const redirect& r) - { - o << endl; - - if (r.type == redirect_type::here_doc_literal) - o << r.str; - else - { - assert (r.type == redirect_type::here_doc_regex); - - const regex_lines& rl (r.regex); - - for (auto b (rl.lines.cbegin ()), i (b), e (rl.lines.cend ()); - i != e; ++i) - { - if (i != b) - o << endl; - - const regex_line& l (*i); - - if (l.regex) // Regex (possibly empty), - o << rl.intro << l.value << rl.intro << l.flags; - else if (!l.special.empty ()) // Special literal. - o << rl.intro; - else // Textual literal. - o << l.value; - - o << l.special; - } - } - - o << (r.modifiers.find (':') == string::npos ? "" : "\n") << r.end; - }; - - if ((m & command_to_stream::header) == command_to_stream::header) - { - // Program. - // - to_stream_q (o, c.program.string ()); - - // Arguments. - // - for (const string& a: c.arguments) - { - o << ' '; - to_stream_q (o, a); - } - - // Redirects. - // - if (c.in.effective ().type != redirect_type::none) - print_redirect (c.in.effective (), "<"); - - if (c.out.effective ().type != redirect_type::none) - print_redirect (c.out.effective (), ">"); - - if (c.err.effective ().type != redirect_type::none) - print_redirect (c.err.effective (), "2>"); - - for (const auto& p: c.cleanups) - { - o << " &"; - - if (p.type != cleanup_type::always) - o << (p.type == cleanup_type::maybe ? '?' : '!'); - - print_path (p.path); - } - - if (c.exit.comparison != exit_comparison::eq || c.exit.code != 0) - { - switch (c.exit.comparison) - { - case exit_comparison::eq: o << " == "; break; - case exit_comparison::ne: o << " != "; break; - } - - o << static_cast<uint16_t> (c.exit.code); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - // Here-documents. - // - if (c.in.type == redirect_type::here_doc_literal || - c.in.type == redirect_type::here_doc_regex) - print_doc (c.in); - - if (c.out.type == redirect_type::here_doc_literal || - c.out.type == redirect_type::here_doc_regex) - print_doc (c.out); - - if (c.err.type == redirect_type::here_doc_literal || - c.err.type == redirect_type::here_doc_regex) - print_doc (c.err); - } - } - - void - to_stream (ostream& o, const command_pipe& p, command_to_stream m) - { - if ((m & command_to_stream::header) == command_to_stream::header) - { - for (auto b (p.begin ()), i (b); i != p.end (); ++i) - { - if (i != b) - o << " | "; - - to_stream (o, *i, command_to_stream::header); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - for (const command& c: p) - to_stream (o, c, command_to_stream::here_doc); - } - } - - void - to_stream (ostream& o, const command_expr& e, command_to_stream m) - { - if ((m & command_to_stream::header) == command_to_stream::header) - { - for (auto b (e.begin ()), i (b); i != e.end (); ++i) - { - if (i != b) - { - switch (i->op) - { - case expr_operator::log_or: o << " || "; break; - case expr_operator::log_and: o << " && "; break; - } - } - - to_stream (o, i->pipe, command_to_stream::header); - } - } - - if ((m & command_to_stream::here_doc) == command_to_stream::here_doc) - { - for (const expr_term& t: e) - to_stream (o, t.pipe, command_to_stream::here_doc); - } - } - - // redirect + // scope_base // - redirect:: - redirect (redirect_type t) - : type (t) + scope_base:: + scope_base (script& s) + : root (s), + vars (s.test_target.ctx, false /* global */) { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - case redirect_type::merge: break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: new (&str) string (); break; - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - new (®ex) regex_lines (); - break; - } - - case redirect_type::file: new (&file) file_type (); break; - - case redirect_type::here_doc_ref: assert (false); break; - } + vars.assign (root.wd_var) = dir_path (); } - redirect:: - redirect (redirect&& r) - : type (r.type), - modifiers (move (r.modifiers)), - end (move (r.end)), - end_line (r.end_line), - end_column (r.end_column) + const dir_path* scope_base:: + wd_path () const { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: break; - - case redirect_type::merge: fd = r.fd; break; - - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: - { - new (&str) string (move (r.str)); - break; - } - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: - { - new (®ex) regex_lines (move (r.regex)); - break; - } - case redirect_type::file: - { - new (&file) file_type (move (r.file)); - break; - } - case redirect_type::here_doc_ref: - { - new (&ref) reference_wrapper<const redirect> (r.ref); - break; - } - } + return &cast<dir_path> (vars[root.wd_var]); } - redirect:: - ~redirect () + const target_triplet& scope_base:: + test_tt () const { - switch (type) - { - case redirect_type::none: - case redirect_type::pass: - case redirect_type::null: - case redirect_type::trace: - case redirect_type::merge: break; + if (auto r = + cast_null<target_triplet> (root.test_target["test.target"])) + return *r; - case redirect_type::here_str_literal: - case redirect_type::here_doc_literal: str.~string (); break; - - case redirect_type::here_str_regex: - case redirect_type::here_doc_regex: regex.~regex_lines (); break; - - case redirect_type::file: file.~file_type (); break; - - case redirect_type::here_doc_ref: - { - ref.~reference_wrapper<const redirect> (); - break; - } - } - } - - redirect& redirect:: - operator= (redirect&& r) - { - if (this != &r) - { - this->~redirect (); - new (this) redirect (move (r)); // Assume noexcept move-constructor. - } - return *this; + // We set it to default value in init() so it can only be NULL if the + // user resets it. + // + fail << "invalid test.target value" << endf; } // scope // + static const optional<string> wd_name ("test working directory"); + static const optional<string> sd_name ("working directory"); + scope:: scope (const string& id, scope* p, script& r) - : parent (p), - root (r), - vars (r.test_target.ctx, false /* global */), - id_path (cast<path> (assign (root.id_var) = path ())), - wd_path (cast<dir_path> (assign (root.wd_var) = dir_path ())) - + : scope_base (r), + // + // Note that root.work_dir is not yet constructed if we are + // creating the root scope (p is NULL). Also note that + // root.test_target is always constructed to date. + // + environment (root.test_target.ctx, + test_tt (), + dir_name_view (wd_path (), &wd_name), + dir_name_view ( + p != nullptr ? root.work_dir.path : wd_path (), + &sd_name), + *wd_path (), true /* temp_dir_keep */, + redirect (redirect_type::none), + redirect (redirect_type::none), + redirect (redirect_type::none)), + parent (p), + id_path (cast<path> (assign (root.id_var) = path ())) { // Construct the id_path as a string to ensure POSIX form. In fact, // the only reason we keep it as a path is to be able to easily get id @@ -443,38 +91,64 @@ namespace build2 // (handled in an ad hoc way). // if (p != nullptr) - const_cast<dir_path&> (wd_path) = dir_path (p->wd_path) /= id; + const_cast<dir_path&> (*work_dir.path) = + dir_path (*p->work_dir.path) /= id; } void scope:: - clean (cleanup c, bool implicit) + set_variable (string&& nm, + names&& val, + const string& attrs, + const location& ll) { - using std::find; // Hidden by scope::find(). + // Check if we are trying to modify any of the special variables. + // + if (parser::special_variable (nm)) + fail (ll) << "attempt to set '" << nm << "' variable directly"; + + // Set the variable value and attributes. Note that we need to aquire + // unique lock before potentially changing the script's variable + // pool. The obtained variable reference can safelly be used with no + // locking as the variable pool is an associative container + // (underneath) and we are only adding new variables into it. + // + ulock ul (root.var_pool_mutex); + const variable& var (root.var_pool.insert (move (nm))); + ul.unlock (); - assert (!implicit || c.type == cleanup_type::always); + value& lhs (assign (var)); - const path& p (c.path); - if (!p.sub (root.wd_path)) + // If there are no attributes specified then the variable assignment + // is straightforward. Otherwise we will use the build2 parser helper + // function. + // + if (attrs.empty ()) + lhs.assign (move (val), &var); + else { - if (implicit) - return; - else - assert (false); // Error so should have been checked. + // If there is an error in the attributes string, our diagnostics + // will look like this: + // + // <attributes>:1:1 error: unknown value attribute x + // testscript:10:1 info: while parsing attributes '[x]' + // + // Note that the attributes parsing error is the only reason for a + // failure. + // + auto df = make_diag_frame ( + [attrs, &ll](const diag_record& dr) + { + dr << info (ll) << "while parsing attributes '" << attrs << "'"; + }); + + parser p (context); + p.apply_value_attributes (&var, + lhs, + value (move (val)), + attrs, + token_type::assign, + path_name ("<attributes>")); } - - auto pr = [&p] (const cleanup& v) -> bool {return v.path == p;}; - auto i (find_if (cleanups.begin (), cleanups.end (), pr)); - - if (i == cleanups.end ()) - cleanups.emplace_back (move (c)); - else if (!implicit) - i->type = c.type; - } - - void scope:: - clean_special (path p) - { - special_cleanups.emplace_back (move (p)); } // script_base @@ -523,7 +197,8 @@ namespace build2 // Set the script working dir ($~) to $out_base/test/<id> (id_path // for root is just the id which is empty if st is 'testscript'). // - const_cast<dir_path&> (wd_path) = dir_path (rwd) /= id_path.string (); + const_cast<dir_path&> (*work_dir.path) = + dir_path (rwd) /= id_path.string (); // Set the test variable at the script level. We do it even if it's // set in the buildfile since they use different types. @@ -625,7 +300,6 @@ namespace build2 return lookup_in_buildfile (var.name); } - lookup scope:: lookup_in_buildfile (const string& n, bool target_only) const { @@ -634,7 +308,7 @@ namespace build2 // in parallel). Plus, if there is no such variable, then we cannot // possibly find any value. // - const variable* pvar (root.test_target.ctx.var_pool.find (n)); + const variable* pvar (context.var_pool.find (n)); if (pvar == nullptr) return lookup_type (); diff --git a/libbuild2/test/script/script.hxx b/libbuild2/test/script/script.hxx index a28ef25..6356501 100644 --- a/libbuild2/test/script/script.hxx +++ b/libbuild2/test/script/script.hxx @@ -12,9 +12,9 @@ #include <libbuild2/variable.hxx> -#include <libbuild2/test/target.hxx> +#include <libbuild2/script/script.hxx> -#include <libbuild2/test/script/token.hxx> // replay_tokens +#include <libbuild2/test/target.hxx> namespace build2 { @@ -22,295 +22,14 @@ namespace build2 { namespace script { - class parser; // Required by VC for 'friend class parser' declaration. - - // Pre-parse representation. - // - - enum class line_type - { - var, - cmd, - cmd_if, - cmd_ifn, - cmd_elif, - cmd_elifn, - cmd_else, - cmd_end - }; - - ostream& - operator<< (ostream&, line_type); - - struct line - { - line_type type; - replay_tokens tokens; - - union - { - const variable* var; // Pre-entered for line_type::var. - }; - }; - - // Most of the time we will have just one line (test command). - // - using lines = small_vector<line, 1>; - - // Parse object model. - // - - // redirect - // - enum class redirect_type - { - none, - pass, - null, - trace, - merge, - here_str_literal, - here_str_regex, - here_doc_literal, - here_doc_regex, - here_doc_ref, // Reference to here_doc literal or regex. - file, - }; - - // Pre-parsed (but not instantiated) regex lines. The idea here is that - // we should be able to re-create their (more or less) exact text - // representation for diagnostics but also instantiate without any - // re-parsing. - // - struct regex_line - { - // If regex is true, then value is the regex expression. Otherwise, it - // is a literal. Note that special characters can be present in both - // cases. For example, //+ is a regex, while /+ is a literal, both - // with '+' as a special character. Flags are only valid for regex. - // Literals falls apart into textual (has no special characters) and - // special (has just special characters instead) ones. For example - // foo is a textual literal, while /.+ is a special one. Note that - // literal must not have value and special both non-empty. - // - bool regex; - - string value; - string flags; - string special; - - uint64_t line; - uint64_t column; - - // Create regex with optional special characters. - // - regex_line (uint64_t l, uint64_t c, - string v, string f, string s = string ()) - : regex (true), - value (move (v)), - flags (move (f)), - special (move (s)), - line (l), - column (c) {} - - // Create a literal, either text or special. - // - regex_line (uint64_t l, uint64_t c, string v, bool s) - : regex (false), - value (s ? string () : move (v)), - special (s ? move (v) : string ()), - line (l), - column (c) {} - }; - - struct regex_lines - { - char intro; // Introducer character. - string flags; // Global flags (here-document). - - small_vector<regex_line, 8> lines; - }; - - // Output file redirect mode. - // - enum class redirect_fmode - { - compare, - overwrite, - append - }; - - struct redirect - { - redirect_type type; - - struct file_type - { - using path_type = build2::path; - path_type path; - redirect_fmode mode; // Meaningless for input redirect. - }; - - union - { - int fd; // Merge-to descriptor. - string str; // Note: with trailing newline, if requested. - regex_lines regex; // Note: with trailing blank, if requested. - file_type file; - reference_wrapper<const redirect> ref; // Note: no chains. - }; - - string modifiers; // Redirect modifiers. - string end; // Here-document end marker (no regex intro/flags). - uint64_t end_line; // Here-document end marker location. - uint64_t end_column; - - // Create redirect of a type other than reference. - // - explicit - redirect (redirect_type = redirect_type::none); - - // Create redirect of the reference type. - // - redirect (redirect_type t, const redirect& r) - : type (redirect_type::here_doc_ref), ref (r) - { - // There is no support (and need) for reference chains. - // - assert (t == redirect_type::here_doc_ref && - r.type != redirect_type::here_doc_ref); - } - - // Move constuctible/assignable-only type. - // - redirect (redirect&&); - redirect& operator= (redirect&&); - - ~redirect (); - - const redirect& - effective () const noexcept - { - return type == redirect_type::here_doc_ref ? ref.get () : *this; - } - }; - - // cleanup - // - enum class cleanup_type - { - always, // &foo - cleanup, fail if does not exist. - maybe, // &?foo - cleanup, ignore if does not exist. - never // &!foo - don’t cleanup, ignore if doesn’t exist. - }; - - // File or directory to be automatically cleaned up at the end of the - // scope. If the path ends with a trailing slash, then it is assumed to - // be a directory, otherwise -- a file. A directory that is about to be - // cleaned up must be empty. - // - // The last component in the path may contain a wildcard that have the - // following semantics: - // - // dir/* - remove all immediate files - // dir/*/ - remove all immediate sub-directories (must be empty) - // dir/** - remove all files recursively - // dir/**/ - remove all sub-directories recursively (must be empty) - // dir/*** - remove directory dir with all files and sub-directories - // recursively - // - struct cleanup - { - cleanup_type type; - build2::path path; - }; - using cleanups = vector<cleanup>; - - // command_exit - // - enum class exit_comparison {eq, ne}; - - struct command_exit - { - // C/C++ don't apply constraints on program exit code other than it - // being of type int. - // - // POSIX specifies that only the least significant 8 bits shall be - // available from wait() and waitpid(); the full value shall be - // available from waitid() (read more at _Exit, _exit Open Group - // spec). - // - // While the Linux man page for waitid() doesn't mention any - // deviations from the standard, the FreeBSD implementation (as of - // version 11.0) only returns 8 bits like the other wait*() calls. - // - // Windows supports 32-bit exit codes. - // - // Note that in shells some exit values can have special meaning so - // using them can be a source of confusion. For bash values in the - // [126, 255] range are such a special ones (see Appendix E, "Exit - // Codes With Special Meanings" in the Advanced Bash-Scripting Guide). - // - exit_comparison comparison; - uint8_t code; - }; - - // command - // - struct command - { - path program; - strings arguments; - - redirect in; - redirect out; - redirect err; - - script::cleanups cleanups; - - command_exit exit {exit_comparison::eq, 0}; - }; - - enum class command_to_stream: uint16_t - { - header = 0x01, - here_doc = 0x02, // Note: printed on a new line. - all = header | here_doc - }; - - void - to_stream (ostream&, const command&, command_to_stream); - - ostream& - operator<< (ostream&, const command&); - - // command_pipe - // - using command_pipe = vector<command>; - - void - to_stream (ostream&, const command_pipe&, command_to_stream); - - ostream& - operator<< (ostream&, const command_pipe&); - - // command_expr - // - enum class expr_operator {log_or, log_and}; - - struct expr_term - { - expr_operator op; // OR-ed to an implied false for the first term. - command_pipe pipe; - }; - - using command_expr = vector<expr_term>; + using build2::script::line; + using build2::script::lines; + using build2::script::redirect; + using build2::script::redirect_type; + using build2::script::line_type; + using build2::script::command_expr; - void - to_stream (ostream&, const command_expr&, command_to_stream); - - ostream& - operator<< (ostream&, const command_expr&); + class parser; // Required by VC for 'friend class parser' declaration. // command_type // @@ -335,31 +54,54 @@ namespace build2 // class script; + class scope_base // Make sure certain things are initialized early. + { + public: + script& root; // Self for the root (script) scope. + + // Note that if we pass the variable name as a string, then it will + // be looked up in the wrong pool. + // + variable_map vars; + + protected: + scope_base (script&); + + const dir_path* + wd_path () const; + + const target_triplet& + test_tt () const; + }; + enum class scope_state {unknown, passed, failed}; - class scope + class scope: public scope_base, public build2::script::environment { public: scope* const parent; // NULL for the root (script) scope. - script& root; // Self for the root (script) scope. // The chain of if-else scope alternatives. See also if_cond_ below. // unique_ptr<scope> if_chain; - // Note that if we pass the variable name as a string, then it will - // be looked up in the wrong pool. - // - variable_map vars; - const path& id_path; // Id path ($@, relative in POSIX form). - const dir_path& wd_path; // Working dir ($~, absolute and normalized). optional<description> desc; scope_state state = scope_state::unknown; - test::script::cleanups cleanups; - paths special_cleanups; + + void + set_variable (string&& name, + names&&, + const string& attrs, + const location&) override; + + // Noop since the temporary directory is a working directory and so + // is created before the scope commands execution. + // + virtual void + create_temp_dir () override {assert (false);}; // Variables. // @@ -382,17 +124,18 @@ namespace build2 lookup_in_buildfile (const string&, bool target_only = true) const; // Return a value suitable for assignment. If the variable does not - // exist in this scope's map, then a new one with the NULL value is - // added and returned. Otherwise the existing value is returned. + // exist in this scope's variable map, then a new one with the NULL + // value is added and returned. Otherwise the existing value is + // returned. // value& assign (const variable& var) {return vars.assign (var);} // Return a value suitable for append/prepend. If the variable does - // not exist in this scope's map, then outer scopes are searched for - // the same variable. If found then a new variable with the found - // value is added to this scope and returned. Otherwise this function - // proceeds as assign() above. + // not exist in this scope's variable map, then outer scopes are + // searched for the same variable. If found then a new variable with + // the found value is added to this scope and returned. Otherwise this + // function proceeds as assign() above. // value& append (const variable&); @@ -402,27 +145,6 @@ namespace build2 void reset_special (); - // Cleanup. - // - public: - // Register a cleanup. If the cleanup is explicit, then override the - // cleanup type if this path is already registered. Ignore implicit - // registration of a path outside script working directory. - // - void - clean (cleanup, bool implicit); - - // Register cleanup of a special file. Such files are created to - // maintain testscript machinery and must be removed first, not to - // interfere with the user-defined wildcard cleanups. - // - void - clean_special (path p); - - public: - virtual - ~scope () = default; - protected: scope (const string& id, scope* parent, script& root); @@ -567,6 +289,4 @@ namespace build2 } } -#include <libbuild2/test/script/script.ixx> - #endif // LIBBUILD2_TEST_SCRIPT_SCRIPT_HXX diff --git a/libbuild2/test/script/script.ixx b/libbuild2/test/script/script.ixx deleted file mode 100644 index 38cba29..0000000 --- a/libbuild2/test/script/script.ixx +++ /dev/null @@ -1,59 +0,0 @@ -// file : libbuild2/test/script/script.ixx -*- C++ -*- -// license : MIT; see accompanying LICENSE file - -namespace build2 -{ - namespace test - { - namespace script - { - inline command_to_stream - operator&= (command_to_stream& x, command_to_stream y) - { - return x = static_cast<command_to_stream> ( - static_cast<uint16_t> (x) & static_cast<uint16_t> (y)); - } - - inline command_to_stream - operator|= (command_to_stream& x, command_to_stream y) - { - return x = static_cast<command_to_stream> ( - static_cast<uint16_t> (x) | static_cast<uint16_t> (y)); - } - - inline command_to_stream - operator& (command_to_stream x, command_to_stream y) {return x &= y;} - - inline command_to_stream - operator| (command_to_stream x, command_to_stream y) {return x |= y;} - - - // command - // - inline ostream& - operator<< (ostream& o, const command& c) - { - to_stream (o, c, command_to_stream::all); - return o; - } - - // command_pipe - // - inline ostream& - operator<< (ostream& o, const command_pipe& p) - { - to_stream (o, p, command_to_stream::all); - return o; - } - - // command_expr - // - inline ostream& - operator<< (ostream& o, const command_expr& e) - { - to_stream (o, e, command_to_stream::all); - return o; - } - } - } -} diff --git a/libbuild2/test/script/token.cxx b/libbuild2/test/script/token.cxx index 85fbb06..efeb17b 100644 --- a/libbuild2/test/script/token.cxx +++ b/libbuild2/test/script/token.cxx @@ -12,43 +12,22 @@ namespace build2 namespace script { void - token_printer (ostream& os, const token& t, bool d) + token_printer (ostream& os, const token& t, print_mode m) { - const string& v (t.value); - // Only quote non-name tokens for diagnostics. // - const char* q (d ? "'" : ""); + const char* q (m == print_mode::diagnostics ? "'" : ""); switch (t.type) { - case token_type::semi: os << q << ';' << q; break; - - case token_type::dot: os << q << '.' << q; break; - - case token_type::plus: os << q << '+' << q; break; - case token_type::minus: os << q << '-' << q; break; - - case token_type::clean: os << q << '&' << v << q; break; - case token_type::pipe: os << q << '|' << q; break; + case token_type::semi: os << q << ';' << q; break; - case token_type::in_pass: os << q << "<|" << q; break; - case token_type::in_null: os << q << "<-" << q; break; - case token_type::in_str: os << q << '<' << v << q; break; - case token_type::in_doc: os << q << "<<" << v << q; break; - case token_type::in_file: os << q << "<<<" << q; break; + case token_type::dot: os << q << '.' << q; break; - case token_type::out_pass: os << q << ">|" << q; break; - case token_type::out_null: os << q << ">-" << q; break; - case token_type::out_trace: os << q << ">!" << q; break; - case token_type::out_merge: os << q << ">&" << q; break; - case token_type::out_str: os << q << '>' << v << q; break; - case token_type::out_doc: os << q << ">>" << v << q; break; - case token_type::out_file_cmp: os << q << ">>>" << v << q; break; - case token_type::out_file_ovr: os << q << ">=" << v << q; break; - case token_type::out_file_app: os << q << ">+" << v << q; break; + case token_type::plus: os << q << '+' << q; break; + case token_type::minus: os << q << '-' << q; break; - default: build2::token_printer (os, t, d); + default: build2::script::token_printer (os, t, m); } } } diff --git a/libbuild2/test/script/token.hxx b/libbuild2/test/script/token.hxx index 14be0a2..dead796 100644 --- a/libbuild2/test/script/token.hxx +++ b/libbuild2/test/script/token.hxx @@ -7,7 +7,7 @@ #include <libbuild2/types.hxx> #include <libbuild2/utility.hxx> -#include <libbuild2/token.hxx> +#include <libbuild2/script/token.hxx> namespace build2 { @@ -15,9 +15,9 @@ namespace build2 { namespace script { - struct token_type: build2::token_type + struct token_type: build2::script::token_type { - using base_type = build2::token_type; + using base_type = build2::script::token_type; enum { @@ -28,35 +28,16 @@ namespace build2 dot, // . plus, // + - minus, // - - - pipe, // | - clean, // &{?!} (modifiers in value) - - in_pass, // <| - in_null, // <- - in_str, // <{:} (modifiers in value) - in_doc, // <<{:} (modifiers in value) - in_file, // <<< - - out_pass, // >| - out_null, // >- - out_trace, // >! - out_merge, // >& - out_str, // >{:~} (modifiers in value) - out_doc, // >>{:~} (modifiers in value) - out_file_cmp, // >>> - out_file_ovr, // >= - out_file_app // >+ + minus // - }; token_type () = default; token_type (value_type v): base_type (v) {} - token_type (base_type v): base_type (v) {} + token_type (build2::token_type v): base_type (v) {} }; void - token_printer (ostream&, const token&, bool); + token_printer (ostream&, const token&, print_mode); } } } |