// file      : libbuild2/script/lexer.cxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#include <libbuild2/script/lexer.hxx>

#include <cstring> // strchr()

using namespace std;

namespace build2
{
  namespace script
  {
    using type = token_type;

    void lexer::
    mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
    {
      const char* s1 (nullptr);
      const char* s2 (nullptr);

      bool s (true); // space
      bool n (true); // newline
      bool q (true); // quotes

      if (!esc)
      {
        assert (!state_.empty ());
        esc = state_.top ().escapes;
      }

      switch (m)
      {
      case lexer_mode::command_expansion:
        {
          // Note that whitespaces are not word separators in this mode.
          //
          s1 = "|&<>";
          s2 = "    ";
          s = false;
          break;
        }
      case lexer_mode::here_line_single:
        {
          // This one is like a single-quoted string except it treats
          // newlines as a separator. We also treat quotes as literals.
          //
          // Note that it might be tempting to enable line continuation
          // escapes. However, we will then have to also enable escaping of
          // the backslash, which makes it a lot less tempting.
          //
          s1 = "\n";
          s2 = " ";
          esc = ""; // Disable escape sequences.
          s = false;
          q = false;
          break;
        }
      case lexer_mode::here_line_double:
        {
          // This one is like a double-quoted string except it treats
          // newlines as a separator. We also treat quotes as literals.
          //
          s1 = "$(\n";
          s2 = "   ";
          s = false;
          q = false;
          break;
        }
      default:
        {
          // Make sure pair separators are only enabled where we expect
          // them.
          //
          // @@ Should we disable pair separators in the eval mode?
          //
          assert (ps == '\0' ||
                  m == lexer_mode::eval ||
                  m == lexer_mode::attribute_value);

          base_lexer::mode (m, ps, esc, data);
          return;
        }
      }

      assert (ps == '\0');
      state_.push (
        state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
    }

    token lexer::
    next ()
    {
      token r;

      switch (state_.top ().mode)
      {
      case lexer_mode::command_expansion:
      case lexer_mode::here_line_single:
      case lexer_mode::here_line_double:
        r = next_line ();
        break;
      default:
        r = base_lexer::next ();
        break;
      }

      if (r.qtype != quote_type::unquoted)
        ++quoted_;

      return r;
    }

    token lexer::
    next_line ()
    {
      bool sep (skip_spaces ().first);

      xchar c (get ());
      uint64_t ln (c.line), cn (c.column);

      const state& st (state_.top ());
      lexer_mode m (st.mode);

      auto make_token = [&sep, &m, ln, cn] (type t)
      {
        bool q (m == lexer_mode::here_line_double);

        return token (t, string (), sep,
                      (q ? quote_type::double_ : quote_type::unquoted), q, q,
                      ln, cn,
                      token_printer);
      };

      if (eos (c))
        return make_token (type::eos);

      // NOTE: remember to update mode() if adding new special characters.

      if (m != lexer_mode::command_expansion)
      {
        switch (c)
        {
        case '\n':
          {
            sep = true; // Treat newline as always separated.
            return make_token (type::newline);
          }
        }
      }

      if (m != lexer_mode::here_line_single)
      {
        switch (c)
        {
          // Variable expansion, function call, and evaluation context.
          //
        case '$': return make_token (type::dollar);
        case '(': return make_token (type::lparen);
        }
      }

      // Command operators.
      //
      if (m == lexer_mode::command_expansion)
      {
        if (optional<token> t = next_cmd_op (c, sep))
          return move (*t);
      }

      // Otherwise it is a word.
      //
      unget (c);
      return word (st, sep);
    }

    optional<token> lexer::
    next_cmd_op (const xchar& c, bool sep)
    {
      auto make_token = [&sep, &c] (type t, string v = string ())
      {
        return token (t, move (v), sep,
                      quote_type::unquoted, false, false,
                      c.line, c.column,
                      token_printer);
      };

      auto make_token_with_modifiers =
        [&make_token, this] (type t,
                             const char* mods,           // To recorgnize.
                             const char* stop = nullptr) // To stop after.
        {
          string v;
          if (mods != nullptr)
          {
            for (xchar p (peek ());
                 (strchr (mods, p) != nullptr &&      // Modifier.
                  strchr (v.c_str (), p) == nullptr); // Not already seen.
                 p = peek ())
            {
              get ();
              v += p;

              if (stop != nullptr && strchr (stop, p) != nullptr)
                break;
            }
          }

          return make_token (t, move (v));
        };

      switch (c)
      {
        // |, ||
        //
      case '|':
        {
          if (peek () == '|')
          {
            get ();
            return make_token (type::log_or);
          }
          else
            return make_token (type::pipe);
        }
        // &, &&
        //
      case '&':
        {
          xchar p (peek ());

          if (p == '&')
          {
            get ();
            return make_token (type::log_and);
          }

          // These modifiers are mutually exclusive so stop after seeing
          // either one.
          //
          return make_token_with_modifiers (type::clean, "!?", "!?");
        }
        // <
        //
      case '<':
        {
          optional<type> r;
          xchar p (peek ());

          if (p == '|' || p == '-' || p == '=' || p == '<') // <| <- <= <<
          {
            xchar c (get ());

            switch (p)
            {
            case '|': return make_token (type::in_pass);    // <|
            case '-': return make_token (type::in_null);    // <-
            case '=': return make_token (type::in_file);    // <=
            case '<':                                       // <<
              {
                p = peek ();

                if (p == '=' || p == '<')                   // <<= <<<
                {
                  xchar c (get ());

                  switch (p)
                  {
                  case '=':
                    {
                      r = type::in_doc;                     // <<=
                      break;
                    }
                  case '<':
                    {
                      p = peek ();

                      if (p == '=')
                      {
                        get ();
                        r = type::in_str;                   // <<<=
                      }

                      if (!r && redirect_aliases.lll)
                        r = type::in_lll;                   // <<<

                      // We can still end up with the << or < redirect alias,
                      // if any of them is present.
                      //
                      if (!r)
                        unget (c);
                    }

                    break;
                  }
                }

                if (!r && redirect_aliases.ll)
                  r = type::in_ll;                          // <<

                // We can still end up with the < redirect alias, if it is
                // present.
                //
                if (!r)
                  unget (c);

                break;
              }
            }
          }

          if (!r && redirect_aliases.l)
            r = type::in_l;                                 // <

          if (!r)
            return nullopt;

          // Handle modifiers.
          //
          const char* mods (nullptr);

          switch (redirect_aliases.resolve (*r))
          {
          case type::in_str:
          case type::in_doc: mods = ":/"; break;
          }

          token t (make_token_with_modifiers (*r, mods));

          return t;
        }
        // >
        //
      case '>':
        {
          optional<type> r;
          xchar p (peek ());

          if (p == '|' || p == '-' || p == '!' || p == '&' || // >| >- >! >&
              p == '=' || p == '+' || p == '?' || p == '>')   // >= >+ >? >>
          {
            xchar c (get ());

            switch (p)
            {
            case '|': return make_token (type::out_pass);     // >|
            case '-': return make_token (type::out_null);     // >-
            case '!': return make_token (type::out_trace);    // >!
            case '&': return make_token (type::out_merge);    // >&
            case '=': return make_token (type::out_file_ovr); // >=
            case '+': return make_token (type::out_file_app); // >+
            case '?': return make_token (type::out_file_cmp); // >?
            case '>':                                         // >>
              {
                p = peek ();

                if (p == '?' || p == '>')                     // >>? >>>
                {
                  xchar c (get ());

                  switch (p)
                  {
                  case '?':
                    {
                      r = type::out_doc;                       // >>?
                      break;
                    }
                  case '>':
                    {
                      p = peek ();

                      if (p == '?')
                      {
                        get ();
                        r = type::out_str;                     // >>>?
                      }

                      if (!r && redirect_aliases.ggg)
                        r = type::out_ggg;                     // >>>

                      // We can still end up with the >> or > redirect alias,
                      // if any of themis present.
                      //
                      if (!r)
                        unget (c);
                    }

                    break;
                  }
                }

                if (!r && redirect_aliases.gg)
                  r = type::out_gg;                          // >>

                // We can still end up with the > redirect alias, if it is
                // present.
                //
                if (!r)
                  unget (c);

                break;
              }
            }
          }

          if (!r && redirect_aliases.g)
            r = type::out_g;                                 // >

          if (!r)
            return nullopt;

          // Handle modifiers.
          //
          const char* mods (nullptr);
          const char* stop (nullptr);

          switch (redirect_aliases.resolve (*r))
          {
          case type::out_str:
          case type::out_doc: mods = ":/~"; stop = "~"; break;
          }

          return make_token_with_modifiers (*r, mods, stop);
        }
      }

      return nullopt;
    }
  }
}