// file      : libbuild2/test/script/lexer.cxx -*- C++ -*-
// license   : MIT; see accompanying LICENSE file

#include <libbuild2/test/script/lexer.hxx>

#include <cstring> // strchr()

using namespace std;

namespace build2
{
  namespace test
  {
    namespace script
    {
      using type = token_type;

      build2::script::redirect_aliases lexer::redirect_aliases {
        type (type::in_str),
        type (type::in_doc),
        type (type::in_file),
        type (type::out_str),
        type (type::out_doc),
        type (type::out_file_cmp)};

      void lexer::
      mode (base_mode m, char ps, optional<const char*> esc, uintptr_t data)
      {
        const char* s1 (nullptr);
        const char* s2 (nullptr);

        bool s (true); // space
        bool n (true); // newline
        bool q (true); // quotes

        if (!esc)
          esc = current_state ().escapes;

        switch (m)
        {
        case lexer_mode::for_loop:
          {
            // Leading tokens of the for-loop. Like command_line but also
            // recognizes lsbrace like value.
          }
          // Fall through.
        case lexer_mode::command_line:
          {
            s1 = ":;=!|&<> $(#\t\n";
            s2 = "  ==          ";
            break;
          }
        case lexer_mode::first_token:
          {
            // First token on the script line. Like command_line but
            // recognizes leading '.+-{}' as tokens as well as variable
            // assignments as separators.
            //
            // Note that to recognize only leading '.+-{}' we shouldn't add
            // them to the separator strings.
            //
            s1 = ":;=+!|&<> $(#\t\n";
            s2 = "   ==          ";
            break;
          }
        case lexer_mode::second_token:
          {
            // Second token on the script line. Like command_line but
            // recognizes leading variable assignments.
            //
            // Note that to recognize only leading assignments we shouldn't
            // add them to the separator strings (so this is identical to
            // command_line).
            //
            s1 = ":;=!|&<> $(#\t\n";
            s2 = "  ==          ";
            break;
          }
        case lexer_mode::variable_line:
          {
            // Like value except we recognize ';' and don't recognize '{'.
            // Note that we don't recognize ':' since having a trailing
            // variable assignment is illegal.
            //
            s1 = "; $(#\t\n";
            s2 = "       ";
            break;
          }
        case lexer_mode::description_line:
          {
            // This one is like a single-quoted string and has an ad hoc
            // implementation.
            //
            break;
          }
        default:
          {
            // Recognize special variable names ($*, $N, $~, $@). See also an
            // extra check in word() below.
            //
            if (m == lexer_mode::variable)
            {
              assert (data == 0);
              data = reinterpret_cast<uintptr_t> ("*~@0123456789");
            }

            base_lexer::mode (m, ps, esc, data);
            return;
          }
        }

        assert (ps == '\0');
        mode_impl (
          state {m, data, nullopt, false, false, ps, s, n, q, *esc, s1, s2});
      }

      token lexer::
      next ()
      {
        token r;

        switch (mode ())
        {
        case lexer_mode::command_line:
        case lexer_mode::first_token:
        case lexer_mode::second_token:
        case lexer_mode::variable_line:
        case lexer_mode::for_loop:
          r = next_line ();
          break;
        case lexer_mode::description_line:
          r = next_description ();
          break;
        default: return base_lexer::next ();
        }

        if (r.qtype != quote_type::unquoted)
          ++quoted_;

        return r;
      }

      token lexer::
      next_line ()
      {
        bool sep (skip_spaces ().first);

        xchar c (get ());
        uint64_t ln (c.line), cn (c.column);

        state st (current_state ()); // Make copy (see first/second_token).
        lexer_mode m (st.mode);

        auto make_token = [&sep, ln, cn] (type t)
        {
          return token (t, sep, ln, cn, token_printer);
        };

        // Handle `[` (do it first to make sure the flag is cleared regardless
        // of what we return).
        //
        if (st.lsbrace)
        {
          assert (m == lexer_mode::variable_line ||
                  m == lexer_mode::for_loop);

          current_state ().lsbrace = false; // Note: st is a copy.

          if (c == '[' && (!st.lsbrace_unsep || !sep))
            return make_token (type::lsbrace);
        }

        if (eos (c))
          return make_token (type::eos);

        // Expire certain modes at the end of the token. Do it early in case
        // we push any new mode (e.g., double quote).
        //
        if (m == lexer_mode::first_token || m == lexer_mode::second_token)
          expire_mode ();

        // NOTE: remember to update mode() if adding new special characters.

        switch (c)
        {
        case '\n':
          {
            // Expire variable value mode at the end of the line.
            //
            if (m == lexer_mode::variable_line)
              expire_mode ();

            sep = true; // Treat newline as always separated.
            return make_token (type::newline);
          }

          // Variable expansion, function call, and evaluation context.
          //
        case '$': return make_token (type::dollar);
        case '(': return make_token (type::lparen);
        }

        // Line separators.
        //
        if (m == lexer_mode::command_line  ||
            m == lexer_mode::first_token   ||
            m == lexer_mode::second_token  ||
            m == lexer_mode::variable_line ||
            m == lexer_mode::for_loop)
        {
          switch (c)
          {
          case ';': return make_token (type::semi);
          }
        }

        if (m == lexer_mode::command_line ||
            m == lexer_mode::first_token  ||
            m == lexer_mode::second_token ||
            m == lexer_mode::for_loop)
        {
          switch (c)
          {
          case ':': return make_token (type::colon);
          }
        }

        // Command line operator/separators.
        //
        if (m == lexer_mode::command_line ||
            m == lexer_mode::first_token  ||
            m == lexer_mode::second_token ||
            m == lexer_mode::for_loop)
        {
          switch (c)
          {
            // Comparison (==, !=).
            //
          case '=':
          case '!':
            {
              if (peek () == '=')
              {
                get ();
                return make_token (c == '=' ? type::equal : type::not_equal);
              }
            }
          }
        }

        // Command operators.
        //
        if (m == lexer_mode::command_line ||
            m == lexer_mode::first_token  ||
            m == lexer_mode::second_token ||
            m == lexer_mode::for_loop)
        {
          if (optional<token> t = next_cmd_op (c, sep))
            return move (*t);
        }

        // Dot, plus/minus, and left/right curly braces.
        //
        if (m == lexer_mode::first_token)
        {
          switch (c)
          {
          case '.': return make_token (type::dot);
          case '+': return make_token (type::plus);
          case '-': return make_token (type::minus);
          case '{': return make_token (type::lcbrace);
          case '}': return make_token (type::rcbrace);
          }
        }

        // Variable assignment (=, +=, =+).
        //
        if (m == lexer_mode::second_token)
        {
          switch (c)
          {
          case '=':
            {
              if (peek () == '+')
              {
                get ();
                return make_token (type::prepend);
              }
              else
                return make_token (type::assign);
            }
          case '+':
            {
              if (peek () == '=')
              {
                get ();
                return make_token (type::append);
              }
            }
          }
        }

        // Otherwise it is a word.
        //
        unget (c);
        return word (st, sep);
      }

      token lexer::
      next_description ()
      {
        xchar c (peek ());

        if (eos (c))
          fail (c) << "expected newline at the end of description line";

        uint64_t ln (c.line), cn (c.column);

        if (c == '\n')
        {
          get ();
          expire_mode (); // Expire the description mode.
          return token (type::newline, true, ln, cn, token_printer);
        }

        string lexeme;

        // For now no line continutions though we could support them.
        //
        for (; !eos (c) && c != '\n'; c = peek ())
        {
          get ();
          lexeme += c;
        }

        return token (move (lexeme), false,
                      quote_type::unquoted, false, false,
                      ln, cn);
      }

      token lexer::
      word (const state& st, bool sep)
      {
        lexer_mode m (st.mode); // Save.

        token r (base_lexer::word (st, sep));

        if (m == lexer_mode::variable)
        {
          if (r.type == type::word &&
              r.value.size () == 1 &&
              digit (r.value[0])) // $N
          {
            xchar c (peek ());

            if (digit (c)) // $NN
              fail (c) << "multi-digit special variable name" <<
                info << "use '($*[NN])' to access elements beyond 9";
          }
        }

        return r;
      }
    }
  }
}