/*

We are using the pure parser interface and the reentrant lexer interface
but the Octave parser and lexer are NOT properly reentrant because both
still use many global variables.  It should be safe to create a parser
object and call it while another parser object is active (to parse a
callback function while the main interactive parser is waiting for
input, for example) if you take care to properly save and restore
(typically with an unwind_protect object) relevant global values before
and after the nested call.

*/

%top {
////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 1993-2026 The Octave Project Developers
//
// See the file COPYRIGHT.md in the top-level directory of this
// distribution or <https://octave.org/copyright/>.
//
// This file is part of Octave.
//
// Octave is free software: you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Octave is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Octave; see the file COPYING.  If not, see
// <https://www.gnu.org/licenses/>.
//
////////////////////////////////////////////////////////////////////////

// Uncomment to enable parser debugging
// #define OCTAVE_PARSER_DEBUG 1

#if defined (HAVE_CONFIG_H)
#  include "config.h"
#endif

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // This one needs to be global.
#  pragma GCC diagnostic ignored "-Wunused-function"
   // Disable these warnings for code that is generated by flex,
   // including pattern rules.  Push the current state so we can
   // restore the warning state prior to functions we define at
   // the bottom of the file.
#  pragma GCC diagnostic push
#  pragma GCC diagnostic ignored "-Wold-style-cast"
#  pragma GCC diagnostic ignored "-Wsign-compare"
#  pragma GCC diagnostic ignored "-Wzero-as-null-pointer-constant"
#  if defined (HAVE_WARN_IMPLICIT_FALLTHROUGH)
#    pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
#  endif
#endif

// Define away the deprecated register storage class specifier to avoid
// potential warnings about it.
#if ! defined (register)
#  define register
#endif

}

%option prefix = "octave_"
%option noyywrap
%option reentrant
%option bison-bridge

%option noyyalloc
%option noyyrealloc
%option noyyfree

%x COMMAND_START
%s MATRIX_START

%x INPUT_FILE_START

%x BLOCK_COMMENT_START
%x LINE_COMMENT_START

%x DQ_STRING_START
%x SQ_STRING_START

%x FQ_IDENT_START

%{

#include <cctype>
#include <cstring>

#include <algorithm>
#include <charconv>
#include <iostream>
#include <set>
#include <sstream>
#include <string>
#include <stack>

#if defined (OCTAVE_HAVE_FAST_FLOAT)
#  include <fast_float/fast_float.h>
#endif

#include "cmd-edit.h"
#include "mappers.h"
#include "quit.h"
#include "unistd-wrappers.h"

// These would be alphabetical, but oct-parse.h must be included before
// oct-gperf.h and oct-parse.h must be included after token.h and the tree
// class declarations.  We can't include oct-parse.h in oct-gperf.h
// because it may not be protected to allow it to be included multiple
// times.

#include "Cell.h"
#include "defun.h"
#include "error.h"
#include "errwarn.h"
#include "input.h"
#include "interpreter.h"
#include "lex.h"
#include "octave.h"
#include "ov-magic-int.h"
#include "ov.h"
#include "parse.h"
#include "pt-all.h"
#include "symtab.h"
#include "token.h"
#include "utils.h"
#include "variables.h"
#include "oct-parse.h"
#include "oct-gperf.h"

// FIXME: with bison 3.x, OCTAVE_STYPE appears in the generated
// oct-parse.h file, but there is no definition for YYSTYPE, which is
// needed by the code that is generated by flex.  I can't seem to find a
// way to tell flex to use OCTAVE_STYPE instead of YYSTYPE in the code
// it generates, or to tell bison to provide the definition of YYSTYPE
// in the generated oct-parse.h file.

#if defined (OCTAVE_STYPE_IS_DECLARED) && ! defined YYSTYPE
#  define YYSTYPE OCTAVE_STYPE
#endif

#define YY_NO_UNISTD_H 1
#define isatty octave_isatty_wrapper

#if ! (defined (FLEX_SCANNER)                                           \
       && defined (YY_FLEX_MAJOR_VERSION) && YY_FLEX_MAJOR_VERSION >= 2 \
       && defined (YY_FLEX_MINOR_VERSION) && YY_FLEX_MINOR_VERSION >= 5)
#error lex.l requires flex version 2.5.4 or later
#endif

#define YY_EXTRA_TYPE octave::base_lexer *
#define curr_lexer yyextra

// Arrange to get input via readline.

#if defined (YY_INPUT)
#  undef YY_INPUT
#endif
#define YY_INPUT(buf, result, max_size)                 \
  result = curr_lexer->fill_flex_buffer (buf, max_size)

// Try to avoid crashing out completely on fatal scanner errors.

#if defined (YY_FATAL_ERROR)
#  undef YY_FATAL_ERROR
#endif
#define YY_FATAL_ERROR(msg)                     \
   (yyget_extra (yyscanner))->fatal_error (msg)

#define CMD_OR_OP(PATTERN, TOK_ID, COMPAT)                      \
   do                                                           \
     {                                                          \
       curr_lexer->lexer_debug (PATTERN);                       \
                                                                \
       /* Preserve values before they may be invalidated by     \
          a call to unput.  */                                  \
       std::string tok_txt (yytext);                            \
       int tok_len = yyleng;                                    \
                                                                \
       if (curr_lexer->looks_like_command_arg ())               \
         {                                                      \
           yyless (0);                                          \
           curr_lexer->push_start_state (COMMAND_START);        \
         }                                                      \
       else                                                     \
         return curr_lexer->handle_op (TOK_ID, tok_txt, tok_len, false, COMPAT); \
     }                                                          \
   while (0)

#define CMD_OR_UNARY_OP(PATTERN, TOK_ID, COMPAT)                        \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (PATTERN);                               \
                                                                        \
       /* Preserve values before they may be invalidated by             \
          a call to unput.  */                                          \
       std::string tok_txt (yytext);                                    \
       int tok_len = yyleng;                                            \
                                                                        \
       if (curr_lexer->previous_token_may_be_command ())                \
         {                                                              \
           if (curr_lexer->looks_like_command_arg ())                   \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->push_start_state (COMMAND_START);            \
             }                                                          \
           else                                                         \
             return curr_lexer->handle_op (TOK_ID, tok_txt, tok_len, false, COMPAT); \
         }                                                              \
       else                                                             \
         {                                                              \
           if (curr_lexer->maybe_unput_comma_before_unary_op (TOK_ID))  \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->xunput (',');                                \
             }                                                          \
           else                                                         \
             return curr_lexer->handle_op (TOK_ID, tok_txt, tok_len, false, COMPAT); \
         }                                                              \
     }                                                                  \
   while (0)

#define HANDLE_EOB_OR_EOF(STATUS)                       \
   do                                                   \
     {                                                  \
       if (curr_lexer->is_push_lexer ())                \
         {                                              \
           if (curr_lexer->at_end_of_buffer ())         \
             return STATUS;                             \
                                                        \
           if (curr_lexer->at_end_of_file ())           \
             return curr_lexer->handle_end_of_input (); \
         }                                              \
     }                                                  \
   while (0)

   // If we are at the end of the buffer, ask for more input.
   // If we are at the end of the file, deal with it.
   // Otherwise, just keep going with the text from the current buffer.
#define HANDLE_STRING_CONTINUATION              \
   do                                           \
     {                                          \
       curr_lexer->m_filepos.next_line ();      \
                                                \
       HANDLE_EOB_OR_EOF (-1);                  \
     }                                          \
   while (0)

#define HANDLE_NUMBER(PATTERN, BASE)                                    \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (PATTERN);                               \
                                                                        \
       if (curr_lexer->previous_token_may_be_command ()                 \
           &&  curr_lexer->space_follows_previous_token ())             \
         {                                                              \
           yyless (0);                                                  \
           curr_lexer->push_start_state (COMMAND_START);                \
         }                                                              \
       else                                                             \
         {                                                              \
           int tok_id = curr_lexer->previous_token_id ();               \
                                                                        \
           if (curr_lexer->whitespace_is_significant ()                 \
               && curr_lexer->space_follows_previous_token ()           \
               && ! (tok_id == '[' || tok_id == '{'                     \
                     || curr_lexer->previous_token_is_binop ()))        \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->xunput (',');                                \
             }                                                          \
           else                                                         \
             return curr_lexer->handle_number<BASE> ();                 \
         }                                                              \
     }                                                                  \
   while (0)

#define HANDLE_IDENTIFIER(pattern, get_set)                             \
   do                                                                   \
     {                                                                  \
       curr_lexer->lexer_debug (pattern);                               \
                                                                        \
       int tok_id = curr_lexer->previous_token_id ();                   \
                                                                        \
       if (curr_lexer->whitespace_is_significant ()                     \
           && curr_lexer->space_follows_previous_token ()               \
           && ! (tok_id == '[' || tok_id == '{'                         \
                 || curr_lexer->previous_token_is_binop ()))            \
         {                                                              \
           yyless (0);                                                  \
           curr_lexer->xunput (',');                                    \
         }                                                              \
       else                                                             \
         {                                                              \
           if (! curr_lexer->m_looking_at_decl_list                     \
               && curr_lexer->previous_token_may_be_command ())         \
             {                                                          \
               yyless (0);                                              \
               curr_lexer->push_start_state (COMMAND_START);            \
             }                                                          \
           else                                                         \
             {                                                          \
               if (get_set)                                             \
                 {                                                      \
                   yyless (3);                                          \
                   curr_lexer->m_filepos.increment_column (3);          \
                   curr_lexer->m_maybe_classdef_get_set_method = false; \
                 }                                                      \
                                                                        \
               return curr_lexer->handle_identifier ();                 \
             }                                                          \
         }                                                              \
     }                                                                  \
   while (0)

static inline bool
is_space_or_tab (char c)
{
  return c == ' ' || c == '\t';
}

static inline bool
is_space_or_tab_or_eol (char c)
{
  return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}

OCTAVE_BEGIN_NAMESPACE(octave)

  bool iskeyword (const std::string& s)
  {
    // Parsing function names like "set.property_name" inside
    // classdef-style class definitions is simplified by handling the
    // "set" and "get" portions of the names using the same mechanism
    // as is used for keywords.  However, they are not really keywords
    // in the language, so omit them from the list of possible
    // keywords.  Likewise for "arguments", "enumeration", "events",
    // "methods", and "properties".

    // FIXME: The following check is duplicated in Fiskeyword.
    return (octave_kw_hash::in_word_set (s.c_str (), s.length ()) != nullptr
            && ! (s == "set" || s == "get" || s == "arguments"
                  || s == "enumeration" || s == "events"
                  || s == "methods" || s == "properties"));
  }

OCTAVE_END_NAMESPACE(octave)

%}

D       [0-9]
D_      [0-9_]
S       [ \t]
NL      ((\n)|(\r)|(\r\n))
CCHAR   [#%]
IDENT   ([_a-zA-Z][_a-zA-Z0-9]*)
FQIDENT ({IDENT}({S}*\.{S}*{IDENT})*)

%{
// Decimal numbers may be real or imaginary but always create
// double precision constants initially.  Any conversion to single
// precision happens as part of an expression evaluation in the
// interpreter, not the lexer and parser.
%}

DECIMAL_DIGITS ({D}{D_}*)
EXPONENT       ([DdEe][+-]?{DECIMAL_DIGITS})
REAL_DECIMAL   ((({DECIMAL_DIGITS}\.?)|({DECIMAL_DIGITS}?\.{DECIMAL_DIGITS})){EXPONENT}?)
IMAG_DECIMAL   ({REAL_DECIMAL}[IiJj])
DECIMAL_NUMBER ({REAL_DECIMAL}|{IMAG_DECIMAL})

%{
// It is possible to specify signedness and size for binary and
// hexadecimal numbers but there is no special syntax for imaginary
// constants.  Binary and hexadecimal constants always create integer
// valued constants ({u,}int{8,16,32,64}).  If a size is not specified,
// the smallest integer type that will hold the value is used.  Negative
// values may be created with a signed size specification by applying
// twos-complement conversion (for example, 0xffs8 produces an 8-bit
// signed integer equal to -1 and 0b10000000s8 produces an 8-bit signed
// integer equal to -128).
%}

SIZE_SUFFIX        ([su](8|16|32|64))
BINARY_BITS        (0[bB][01][01_]*)
BINARY_NUMBER      ({BINARY_BITS}|{BINARY_BITS}{SIZE_SUFFIX})
HEXADECIMAL_BITS   (0[xX][0-9a-fA-F][0-9a-fA-F_]*)
HEXADECIMAL_NUMBER ({HEXADECIMAL_BITS}|{HEXADECIMAL_BITS}{SIZE_SUFFIX})

ANY_EXCEPT_NL [^\r\n]
ANY_INCLUDING_NL (.|{NL})

%%

%{
// Make script and function files start with an invalid token. This makes
// the parser go down a special path.
%}

<INPUT_FILE_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<INPUT_FILE_START>{ANY_INCLUDING_NL}");

    curr_lexer->xunput (yytext[0]);

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->m_reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

<INPUT_FILE_START><<EOF>> {
    curr_lexer->lexer_debug ("<INPUT_FILE_START><<EOF>>");

    // May be reset later if we see "function" or "classdef" appears
    // as the first token.
    curr_lexer->m_reading_script_file = true;

    curr_lexer->pop_start_state ();

    return curr_lexer->show_token (INPUT_FILE);
  }

%{
// Help and other command-style functions.
%}

%{
// Commands can be continued on a second line using the ellipsis.
// If an argument is in construction, it is completed.
%}

<COMMAND_START>(\.\.\.){ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}");

    if (! curr_lexer->m_string_text.empty ())
      {
        yyless (0);
        curr_lexer->m_tok_end = curr_lexer->m_filepos;
        return curr_lexer->finish_command_arg ();
      }

    HANDLE_STRING_CONTINUATION;
  }

%{
// Commands normally end at the end of a line or a semicolon.
%}

<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL} {
    curr_lexer->lexer_debug ("<COMMAND_START>({CCHAR}{ANY_EXCEPT_NL}*)?{NL}");

    if (! curr_lexer->m_string_text.empty ())
      {
        yyless (0);
        curr_lexer->m_tok_end = curr_lexer->m_filepos;
        return curr_lexer->finish_command_arg ();
      }

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_filepos.next_line ();
    curr_lexer->m_looking_for_object_index = false;
    curr_lexer->pop_start_state ();
    curr_lexer->m_comment_uses_hash_char = yytext[0] == '#';
    curr_lexer->finish_comment (octave::comment_elt::end_of_line);

    return curr_lexer->handle_token ('\n');
  }

<COMMAND_START>[\,\;] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\,\\;]");

    if (yytext[0] != ',' || curr_lexer->m_command_arg_paren_count == 0)
      {
        if (! curr_lexer->m_string_text.empty ())
          {
            yyless (0);
            curr_lexer->m_tok_end = curr_lexer->m_filepos;
            return curr_lexer->finish_command_arg ();
          }

        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = true;
        curr_lexer->pop_start_state ();

        return curr_lexer->handle_token (yytext[0]);
      }
    else
      {
        curr_lexer->m_string_text += yytext;
        curr_lexer->m_filepos.increment_column (yyleng);
      }
  }

%{
// Unbalanced parentheses serve as pseudo-quotes: they are included in
// the final argument string, but they cause parentheses and quotes to
// be slurped into that argument as well.
%}

<COMMAND_START>[\(\[\{]* {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\(\\[\\{]+");

    curr_lexer->m_command_arg_paren_count += yyleng;
    curr_lexer->m_string_text += yytext;
    curr_lexer->m_filepos.increment_column (yyleng);
  }

<COMMAND_START>[\)\]\}]* {
   curr_lexer->lexer_debug ("<COMMAND_START>[\\)\\]\\}]+");

   curr_lexer->m_command_arg_paren_count -= yyleng;
   curr_lexer->m_string_text += yytext;
   curr_lexer->m_filepos.increment_column (yyleng);
}

%{
// Handle quoted strings.  Quoted strings that are not separated by
// whitespace from other argument text are combined with that previous
// text.  For instance,
//
//   command 'text1'"text2"
//
// has a single argument text1text2, not two separate arguments.
// That's why we must test to see if we are in command argument mode
// when processing the end of a string.
%}

<COMMAND_START>[\"\'] {
    curr_lexer->lexer_debug ("<COMMAND_START>[\\\"\\']");

    if (curr_lexer->m_command_arg_paren_count == 0)
      curr_lexer->begin_string (yytext[0] == '"'
                                ? DQ_STRING_START : SQ_STRING_START);
    else
      curr_lexer->m_string_text += yytext;

    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// In standard command argument processing, whitespace separates
// arguments.  In the presence of unbalanced parentheses, it is
// incorporated into the argument.
%}

<COMMAND_START>{S}* {
    curr_lexer->lexer_debug ("<COMMAND_START>{S}*");

    if (curr_lexer->m_command_arg_paren_count == 0)
      {
        if (! curr_lexer->m_string_text.empty ())
          {
            yyless (0);
            curr_lexer->m_tok_end = curr_lexer->m_filepos;
            return curr_lexer->finish_command_arg ();
          }
      }
    else
      curr_lexer->m_string_text += yytext;

    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// Everything else is slurped into the command arguments.
%}

<COMMAND_START>([\.]|[^#% \t\r\n\.\,\;\"\'\(\[\{\}\]\)]*) {
    curr_lexer->lexer_debug ("<COMMAND_START>([\\.]|[^#% \\t\\r\\n\\.\\,\\;\\\"\\'\\(\\[\\{\\}\\]\\)]*");

    curr_lexer->m_string_text += yytext;
    curr_lexer->m_filepos.increment_column (yyleng);
  }

%{
// Whitespace inside matrix lists.
%}

<MATRIX_START>{S}* {
    curr_lexer->lexer_debug ("<MATRIX_START>{S}*");

    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

<MATRIX_START>{NL} {
    curr_lexer->lexer_debug ("<MATRIX_START>{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_nesting_level.is_paren ())
      curr_lexer->warn_language_extension ("bare newline inside parentheses");
    else
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (! (tok_id == ';' || tok_id == '[' || tok_id == '{'))
          curr_lexer->xunput (';');
      }
  }

%{
// Continuation lines in matrix constants are handled as whitespace.
// Allow arbitrary text after the continuation marker.
%}

<MATRIX_START>\.\.\.{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<MATRIX_START>\\.\\.\\.{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->handle_continuation ();

    // Even if there wasn't a space before or after the continuation
    // marker, treat the continuation as if it were.  But since it will
    // be transformed to a separator later anyway, there's no need to
    // actually unput a space on the input stream.

    curr_lexer->mark_previous_token_trailing_space ();
  }

%{
// For this and the next two rules, we're looking at ']', and we
// need to know if the next token is '=' or '=='.
//
// It would have been so much easier if the delimiters were simply
// different for the expression on the left hand side of the equals
// operator.
//
// It's also a pain in the ass to decide whether to insert a comma
// after seeing a ']' character...

// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\] {
    curr_lexer->lexer_debug ("<MATRIX_START>\\]");

    curr_lexer->update_token_positions (yyleng);
    return curr_lexer->handle_close_bracket (']');
  }

%{
// FIXME: we need to handle block comments here.
%}

<MATRIX_START>\} {
    curr_lexer->lexer_debug ("<MATRIX_START>\\}*");

    curr_lexer->update_token_positions (yyleng);
    return curr_lexer->handle_close_bracket ('}');
  }

\[ {
    curr_lexer->lexer_debug ("\\[");

        bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (! (tok_id == '[' || tok_id == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_nesting_level.bracket ();

        curr_lexer->m_looking_at_object_index.push_front (false);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        if (curr_lexer->m_defining_fcn
            && ! curr_lexer->m_parsed_function_name.top ())
          curr_lexer->m_looking_at_return_list = true;
        else
          curr_lexer->m_looking_at_matrix_or_assign_lhs = true;

        curr_lexer->m_bracketflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->handle_token ('[');
      }
  }

\] {
    curr_lexer->lexer_debug ("\\]");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_nesting_level.remove ();

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    return curr_lexer->handle_token (']');
  }

%{
// Gobble comments.  Both BLOCK_COMMENT_START and LINE_COMMENT_START
// are exclusive start states.  We try to grab a continuous series of
// line-oriented comments as a single collection of comments.
%}

%{
// Start of a block comment.  Since comment start states are exclusive,
// this pattern will not match a block comment that immediately follows
// a line-oriented comment.  All we need to do is push the matched text
// back on the input stream and push the new start state.
%}

^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("^{S}*{CCHAR}\\{{S}*{NL}");

    yyless (0);

    curr_lexer->push_start_state (BLOCK_COMMENT_START);
  }

<BLOCK_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_block_comment_nesting_level)
      curr_lexer->m_comment_text = "\n";
    else
      curr_lexer->check_comment_for_hash_char (yytext, yyleng);

    curr_lexer->m_block_comment_nesting_level++;

    HANDLE_EOB_OR_EOF (-1);
  }

%{
// End of a block comment.  If this block comment is nested inside
// another, wait for the outermost block comment to be closed before
// storing the comment.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<BLOCK_COMMENT_START>^{S}*{CCHAR}\}{S}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>^{S}*{CCHAR}\\}{S}*{NL}");

    curr_lexer->m_filepos.next_line ();

    if (curr_lexer->m_block_comment_nesting_level > 1)
      curr_lexer->m_comment_text = "\n";
    else
      {
        curr_lexer->check_comment_for_hash_char (yytext, yyleng);
        curr_lexer->finish_comment (octave::comment_elt::block);
      }

    curr_lexer->m_block_comment_nesting_level--;

    if (curr_lexer->m_block_comment_nesting_level == 0)
      {
        curr_lexer->pop_start_state ();

        if (curr_lexer->pending_token_count () > 0)
          HANDLE_EOB_OR_EOF (-1);
        else
          HANDLE_EOB_OR_EOF (-2);
      }
    else
      HANDLE_EOB_OR_EOF (-1);
  }

%{
// Body of a block comment.
%}

<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<BLOCK_COMMENT_START>{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->m_filepos.next_line ();
    curr_lexer->m_comment_text += yytext;

    HANDLE_EOB_OR_EOF (-1);
  }

%{
// Full-line or end-of-line comment.
%}

{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->push_start_state (LINE_COMMENT_START);
    yyless (0);
  }

%{
// Beginning of a block comment while we are looking at a series of
// line-oriented comments.  Finish previous comment, push current
// text back on input stream, and switch start states.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<LINE_COMMENT_START>^{S}*{CCHAR}\{{S}*{NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>^{S}*{CCHAR}\\{{S}*{NL}");

    if (! curr_lexer->m_comment_text.empty ())
      curr_lexer->finish_comment (octave::comment_elt::full_line);

    curr_lexer->pop_start_state ();
    curr_lexer->push_start_state (BLOCK_COMMENT_START);
    yyless (0);
  }

%{
// Line-oriented comment.  If we are at the beginning of a line, this is
// part of a series of full-line comments.  Otherwise, this is an end of
// line comment.  We don't need to parse the matched text to determine
// whether we are looking at the start of a block comment as that
// pattern is handled above.

// NOTE: This pattern must appear before the one below.  Both may match
// the same text and this one should take precedence over the one that
// follows.
%}

<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{S}*{CCHAR}{ANY_EXCEPT_NL}*{NL}");

    // Grab text of comment without leading space or comment
    // characters.

    std::size_t i = 0;
    while (i < yyleng && is_space_or_tab (yytext[i]))
      i++;

    bool have_space = (i > 0);

    bool first = true;

    while (i < yyleng)
      {
        char c = yytext[i];

        if (c == '#' || c == '%')
          {
            if (first && c == '#')
              {
                curr_lexer->m_comment_uses_hash_char = true;
                first = false;
              }

            i++;
          }
        else
          break;
      }

    curr_lexer->m_comment_text += &yytext[i];

    if (curr_lexer->m_filepos.column () == 1)
      {
        curr_lexer->m_filepos.next_line ();
      }
    else
      {
        // End of line comment.

        if (have_space)
          curr_lexer->mark_previous_token_trailing_space ();

        curr_lexer->finish_comment (octave::comment_elt::end_of_line);

        curr_lexer->pop_start_state ();

        // Push the newline character back on the input and skip
        // incrementing the line count so we don't have to duplicate
        // all the possible actions that happen with newlines here.

        curr_lexer->xunput ('\n');

        // The next action should recognize a newline character and set
        // the input column back to 1, but we should try to keep the
        // input column location accurate anyway, so update here.
        curr_lexer->m_filepos.increment_column (yyleng);
      }
  }

%{
// End of a series of full-line because some other character was
// found on the input stream.
%}

<LINE_COMMENT_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START>{ANY_INCLUDING_NL}");

    if (yytext[0] == '\001')
      {
        // We are here because we are using the push parser/lexer
        // interface and we hit the end of the input buffer or file.
        // The special ASCII 1 marker is added to the input by
        // push_lexer::fill_flex_buffer.

        if (curr_lexer->pending_token_count () > 0)
          {
            // We are in the middle of parsing a command, expresison,
            // etc., so set the return status so that if we are at the
            // end of the buffer we'll continue looking for more input,
            // possibly buffering a series of line oriented comments as
            // a single block.

            HANDLE_EOB_OR_EOF (-1);
          }
        else
          {
            // We are not in the process of parsing a command,
            // expression, etc., so end any current sequence of comments
            // with this full line comment, pop the start state and
            // return as if we have just finished parsing a complete
            // statement.

            curr_lexer->finish_comment (octave::comment_elt::full_line);

            curr_lexer->pop_start_state ();

            HANDLE_EOB_OR_EOF (-2);
          }
      }
    else
      {
        // End any current sequence of comments, pop the start state,
        // and unput the pending input character that ended the series
        // of comments.

        curr_lexer->finish_comment (octave::comment_elt::full_line);

        curr_lexer->pop_start_state ();

        curr_lexer->xunput (yytext[0]);
      }
  }

%{
// End of file will also end a series of full-line comments.
%}

<LINE_COMMENT_START><<EOF>> {
    curr_lexer->lexer_debug ("<LINE_COMMENT_START><<EOF>>");

    curr_lexer->finish_comment (octave::comment_elt::full_line);

    curr_lexer->pop_start_state ();
  }

%{
// Double-quoted character strings.
%}

<DQ_STRING_START>\"\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"\\\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '"';
  }

<DQ_STRING_START>\" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\"");

    // m_tok_beg was set when we started parsing the string.
    curr_lexer->m_tok_end = curr_lexer->m_filepos;
    curr_lexer->m_filepos.increment_column ();

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        octave::token *tok = new octave::token (DQ_STRING, curr_lexer->m_string_text, curr_lexer->m_tok_beg, curr_lexer->m_tok_end, curr_lexer->get_comment_list ());

        curr_lexer->m_string_text = "";

        return curr_lexer->handle_token (tok);
      }
  }

<DQ_STRING_START>\\[0-7]{1,3} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\[0-7]{1,3}");

    curr_lexer->update_token_positions (yyleng);

    unsigned int result = 0;
    const char *chars_start = yytext + 1;
    const char *chars_end = yytext + yyleng;
    auto [ptr, ec] = std::from_chars (chars_start, chars_end, result, 8);
    if (ec != std::errc{})
      {
        switch (ec)
          {
          case std::errc::invalid_argument:
            curr_lexer->fatal_error ("invalid octal digits in lexer rule <DQ_STRING_START>\\\\[0-7]{1,3} - please report this bug");
            break;
          case std::errc::result_out_of_range:
            curr_lexer->fatal_error ("octal value out of range in lexer rule <DQ_STRING_START>\\\\[0-7]{1,3} - please report this bug");
            break;
          default:
            curr_lexer->fatal_error ("from_chars failed in lexer rule <DQ_STRING_START>\\\\[0-7]{1,3} - please report this bug");
            break;
          }
      }


    if (result > 0xff)
      {
        // Use location of octal digits for error token.
        std::string msg {"invalid octal escape sequence in character string"};
        return curr_lexer->syntax_error (msg);
      }
    else
      curr_lexer->m_string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>\\x[0-9a-fA-F]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\x[0-9a-fA-F]+");

    curr_lexer->m_filepos.increment_column (yyleng);

    unsigned int result = 0;
    const char *chars_start = yytext + 2;
    const char *chars_end = yytext + yyleng;
    auto [ptr, ec] = std::from_chars (chars_start, chars_end, result, 16);
    if (ec != std::errc{})
      {
        switch (ec)
          {
          case std::errc::invalid_argument:
            curr_lexer->fatal_error ("invalid hexadecimal digits in lexer rule <DQ_STRING_START>\\\\x[0-9a-fA-F]+ - please report this bug");
            break;
          case std::errc::result_out_of_range:
            curr_lexer->fatal_error ("hexadecimal value out of range in lexer rule <DQ_STRING_START>\\\\x[0-9a-fA-F]+ - please report this bug");
            break;
          default:
            curr_lexer->fatal_error ("from_chars failed in lexer rule <DQ_STRING_START>\\\\x[0-9a-fA-F]+ - please report this bug");
            break;
          }
      }


    // Truncate the value silently instead of checking the range like
    // we do for octal above.  This is to match C/C++ where any number
    // of digits is allowed but the value is implementation-defined if
    // it exceeds the range of the character type.
    curr_lexer->m_string_text += static_cast<unsigned char> (result);
  }

<DQ_STRING_START>"\\a" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\a\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\a';
  }

<DQ_STRING_START>"\\b" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\b\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\b';
  }

<DQ_STRING_START>"\\f" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\f\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\f';
  }

<DQ_STRING_START>"\\n" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\n\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\n';
  }

<DQ_STRING_START>"\\r" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\r\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\r';
  }

<DQ_STRING_START>"\\t" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\t\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\t';
  }

<DQ_STRING_START>"\\v" {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\"\\\\v\"");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\v';
  }

<DQ_STRING_START>\\{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\{NL}");

    HANDLE_STRING_CONTINUATION;
  }

<DQ_STRING_START>\\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\\\.");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext[1];
  }

<DQ_STRING_START>\. {
    curr_lexer->lexer_debug ("<DQ_STRING_START>\\.");

    curr_lexer->m_filepos.increment_column ();
    curr_lexer->m_string_text += yytext[0];
  }

<DQ_STRING_START>[^\.\\\r\n\"]+ {
    curr_lexer->lexer_debug ("<DQ_STRING_START>[^\\.\\\\\\r\\n\\\"]+");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext;
  }

<DQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<DQ_STRING_START>{NL}");

    // Use current file position for error token.
    std::string msg {"unterminated character string constant"};
    return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
  }

%{
// Single-quoted character strings.
%}

<SQ_STRING_START>\'\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'\\'");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += '\'';
  }

<SQ_STRING_START>\' {
    curr_lexer->lexer_debug ("<SQ_STRING_START>\\'");

    // m_tok_beg was set when we started parsing the string.
    curr_lexer->m_tok_end = curr_lexer->m_filepos;
    curr_lexer->m_filepos.increment_column ();

    curr_lexer->pop_start_state ();

    if (curr_lexer->start_state() != COMMAND_START)
      {
        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        octave::token *tok = new octave::token (SQ_STRING, curr_lexer->m_string_text, curr_lexer->m_tok_beg, curr_lexer->m_tok_end, curr_lexer->get_comment_list ());

        curr_lexer->m_string_text = "";

        return curr_lexer->handle_token (tok);
      }
  }

<SQ_STRING_START>[^\'\n\r]+ {
    curr_lexer->lexer_debug ("<SQ_STRING_START>[^\\'\\n\\r]+");

    curr_lexer->m_filepos.increment_column (yyleng);
    curr_lexer->m_string_text += yytext;
  }

<SQ_STRING_START>{NL} {
    curr_lexer->lexer_debug ("<SQ_STRING_START>{NL}");

    // Use current file position for error token.
    std::string msg {"unterminated character string constant"};
    return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
  }

%{
// Fully-qualified identifiers (used for classdef).
%}

<FQ_IDENT_START>{FQIDENT} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{FQIDENT}{S}*");

    curr_lexer->pop_start_state ();

    curr_lexer->update_token_positions (yyleng);

    std::string ident = yytext;

    ident.erase (std::remove_if (ident.begin (), ident.end (), is_space_or_tab), ident.end ());

    if (curr_lexer->fq_identifier_contains_keyword (ident))
      return curr_lexer->syntax_error ("function, method, class, and package names may not be keywords");

    octave::token *tok = curr_lexer->make_fq_identifier_token (ident);

    return curr_lexer->handle_token (tok);
  }

<FQ_IDENT_START>{S}+ {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{S}+");

    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

<FQ_IDENT_START>(\.\.\.){ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>(\\.\\.\\.){ANY_EXCEPT_NL}*{NL}");

    curr_lexer->m_filepos.next_line ();
  }

<FQ_IDENT_START>{ANY_INCLUDING_NL} {
    curr_lexer->lexer_debug ("<FQ_IDENT_START>{ANY_INCLUDING_NL}");

    // If input doesn't match FQIDENT, return char and go to previous
    // start state.

    yyless (0);
    curr_lexer->pop_start_state ();
  }

{BINARY_NUMBER} {
    HANDLE_NUMBER ("{BINARY_NUMBER}", 2);
  }

%{
// Decimal numbers.  For expressions that are just digits followed
// directly by an element-by-element operator, don't grab the '.'
// part of the operator as part of the constant (for example, in an
// expression like "13./x").
%}

{DECIMAL_DIGITS}/\.[\*/\\^\'] |
{DECIMAL_NUMBER} {
    HANDLE_NUMBER ("{DECIMAL_DIGITS}/\\.[\\*/\\\\^\\']|{DECIMAL_NUMBER}", 10);
  }

{HEXADECIMAL_NUMBER} {
    HANDLE_NUMBER ("{HEXADECIMAL_NUMBER}", 16);
  }

%{
// Eat whitespace.  Whitespace inside matrix constants is handled by
// the <MATRIX_START> start state code above.
%}

{S}+ {
    curr_lexer->m_filepos.increment_column (yyleng);

    curr_lexer->mark_previous_token_trailing_space ();
  }

%{
// Continuation lines.  Allow arbitrary text after continuations.
%}

\.\.\.{ANY_EXCEPT_NL}*{NL} {
    curr_lexer->lexer_debug ("\\.\\.\\.{ANY_EXCEPT_NL}*{NL}");

    curr_lexer->handle_continuation ();
  }

%{
// Deprecated C preprocessor style continuation markers.
%}


%{
// End of file.
%}

<<EOF>> {
   return curr_lexer->handle_end_of_input ();
  }

%{
// Identifiers.

// Don't allow get and set to be recognized as keywords if they are
// followed by "(".
%}

(set|get){S}*\( {
    HANDLE_IDENTIFIER ("(set|get){S}*\\(", true);
  }

{IDENT} {
    HANDLE_IDENTIFIER ("{IDENT}", false);
  }

%{
// Superclass method identifiers.
%}

{FQIDENT}{S}*@{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("{FQIDENT}{S}*@{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        if (curr_lexer->m_at_beginning_of_statement)
          {
            std::string txt = yytext;

            std::size_t at_or_dot_pos = txt.find_first_of ("@.");

            if (at_or_dot_pos != std::string::npos)
              {
                std::size_t spc_pos = txt.find_first_of (" \t");

                if (spc_pos != std::string::npos && spc_pos < at_or_dot_pos)
                  {
                    yyless (spc_pos);
                    curr_lexer->m_filepos.increment_column (spc_pos);

                    return curr_lexer->handle_identifier ();
                  }
              }
          }

        curr_lexer->m_looking_for_object_index = true;
        curr_lexer->m_at_beginning_of_statement = false;

        return curr_lexer->handle_superclass_identifier ();
      }
  }

%{
// Metaclass query
%}

\?{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("\\?{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        std::string txt = yytext;

        txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab), txt.end ());

        // Eliminate leading '?'
        std::string cls = txt.substr (1);

        if (curr_lexer->fq_identifier_contains_keyword (cls))
          return curr_lexer->syntax_error ("class and package names may not be keywords");

        octave::token *tok = curr_lexer->make_meta_identifier_token (cls);

        return curr_lexer->handle_token (tok);
      }
  }

\@ |
\@{S}*{FQIDENT} {
    curr_lexer->lexer_debug ("\\@|\\@{S}*{FQIDENT}");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (curr_lexer->whitespace_is_significant ()
            && curr_lexer->space_follows_previous_token ()
            && ! (tok_id == '[' || tok_id == '{'
                  || curr_lexer->previous_token_is_binop ()))
          {
            yyless (0);
            curr_lexer->xunput (',');
          }
        else
          {
            curr_lexer->update_token_positions (yyleng);

            curr_lexer->m_at_beginning_of_statement = false;

            std::string ident = yytext;

            if (ident == "@")
              {
                curr_lexer->m_looking_at_function_handle++;
                curr_lexer->m_looking_for_object_index = false;

                return curr_lexer->handle_token ('@');
              }
            else
              {
                ident = ident.substr (1);
                ident.erase (std::remove_if (ident.begin (), ident.end (),
                                             is_space_or_tab), ident.end ());

                octave::token *tok;

                if (octave::iskeyword (ident))
                  {
                    std::string msg {"function handles may not refer to keywords"};
                    return curr_lexer->syntax_error (msg);
                  }

                curr_lexer->m_looking_for_object_index = true;

                tok = new octave::token (FCN_HANDLE, ident, curr_lexer->m_tok_beg, curr_lexer->m_tok_end, curr_lexer->get_comment_list ());

                return curr_lexer->handle_token (tok);
              }
          }
      }
  }

%{
// A new line character.  New line characters inside matrix constants
// are handled by the <MATRIX_START> start state code above.  If closest
// nesting is inside parentheses, don't return a row separator.
%}

{NL} {
    curr_lexer->lexer_debug ("{NL}");

    if (curr_lexer->m_nesting_level.is_paren ())
      {
        curr_lexer->m_filepos.next_line ();

        curr_lexer->m_at_beginning_of_statement = false;
        curr_lexer->warn_language_extension
          ("bare newline inside parentheses");
      }
    else if (curr_lexer->m_nesting_level.none ()
        || curr_lexer->m_nesting_level.is_anon_fcn_body ())
      {
        curr_lexer->update_token_positions (yyleng);
        curr_lexer->m_filepos.next_line ();

        curr_lexer->m_at_beginning_of_statement = true;

        return curr_lexer->handle_token ('\n');
      }
    else if (curr_lexer->m_nesting_level.is_bracket_or_brace ())
      {
        curr_lexer->update_token_positions (yyleng);
        curr_lexer->m_filepos.next_line ();

        // Use current file position for error token.
        std::string msg {"unexpected internal lexer error"};
        return curr_lexer->syntax_error (msg, curr_lexer->m_filepos);
      }
  }

%{
// Single quote can either be the beginning of a string or a transpose
// operator.
%}

"'" {
    curr_lexer->lexer_debug ("'");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else if (curr_lexer->m_at_beginning_of_statement)
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->begin_string (SQ_STRING_START);
      }
    else
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok_id == '[' || tok_id == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                  }
              }
            else
              {
                if (tok_id == '[' || tok_id == '{'
                    || curr_lexer->previous_token_is_binop ()
                    || curr_lexer->previous_token_is_keyword ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (SQ_STRING_START);
                  }
                else
                  {
                    curr_lexer->m_filepos.increment_column ();
                    return curr_lexer->handle_token (HERMITIAN);
                  }
              }
          }
        else
          {
            if (! tok_id || tok_id == '[' || tok_id == '{' || tok_id == '('
                || curr_lexer->previous_token_is_binop ()
                || curr_lexer->previous_token_is_keyword ())
              {
                curr_lexer->m_filepos.increment_column ();
                curr_lexer->begin_string (SQ_STRING_START);
              }
            else
              {
                curr_lexer->m_filepos.increment_column ();
                return curr_lexer->handle_token (HERMITIAN);
              }
          }
      }
  }

%{
// Double quotes always begin strings.
%}

\" {
    curr_lexer->lexer_debug ("\\\"");

    if (curr_lexer->previous_token_may_be_command ()
        &&  curr_lexer->space_follows_previous_token ())
      {
        curr_lexer->m_filepos.increment_column ();
        curr_lexer->push_start_state (COMMAND_START);
        curr_lexer->begin_string (DQ_STRING_START);
      }
    else
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (curr_lexer->whitespace_is_significant ())
          {
            if (curr_lexer->space_follows_previous_token ())
              {
                if (tok_id == '[' || tok_id == '{'
                    || curr_lexer->previous_token_is_binop ())
                  {
                    curr_lexer->m_filepos.increment_column ();
                    curr_lexer->begin_string (DQ_STRING_START);
                  }
                else
                  {
                    yyless (0);
                    curr_lexer->xunput (',');
                  }
              }
            else
              {
                curr_lexer->m_filepos.increment_column ();
                curr_lexer->begin_string (DQ_STRING_START);
              }
          }
        else
          {
            curr_lexer->m_filepos.increment_column ();
            curr_lexer->begin_string (DQ_STRING_START);
          }
      }
  }

%{
// Other operators.
%}

":"   { CMD_OR_OP (":", ':', true); }
".*"  { CMD_OR_OP (".*", EMUL, true); }
"./"  { CMD_OR_OP ("./", EDIV, true); }
".\\" { CMD_OR_OP (".\\", ELEFTDIV, true); }
".^"  { CMD_OR_OP (".^", EPOW, true); }
"<="  { CMD_OR_OP ("<=", EXPR_LE, true); }
"=="  { CMD_OR_OP ("==", EXPR_EQ, true); }
"!="  { CMD_OR_OP ("!=", EXPR_NE, false); }
"~="  { CMD_OR_OP ("~=", EXPR_NE, true); }
">="  { CMD_OR_OP (">=", EXPR_GE, true); }
"&"   { CMD_OR_OP ("&", EXPR_AND, true); }
"|"   { CMD_OR_OP ("|", EXPR_OR, true); }
"<"   { CMD_OR_OP ("<", EXPR_LT, true); }
">"   { CMD_OR_OP (">", EXPR_GT, true); }
"*"   { CMD_OR_OP ("*", '*', true); }
"/"   { CMD_OR_OP ("/", '/', true); }

%{
// In Matlab, '\' may also trigger command syntax.
%}

"\\" {
    // FIXME: After backslash is no longer handled as a line
    // continuation marker outside of character strings, this
    // action may be replaced with
    //
    //   CMD_OR_OP ("\\", LEFTDIV, true);

    curr_lexer->lexer_debug ("\\");

    return curr_lexer->handle_op (LEFTDIV, yytext, yyleng);
  }

"^"   { CMD_OR_OP ("^", POW, true); }
"&&"  { CMD_OR_OP ("&&", EXPR_AND_AND, true); }
"||"  { CMD_OR_OP ("||", EXPR_OR_OR, true); }

";" {
    curr_lexer->lexer_debug (";");

    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->m_looking_at_object_index.front ()));

    return curr_lexer->handle_op (';', yytext, yyleng, at_beginning_of_statement);
  }

"+" { CMD_OR_UNARY_OP ("+", '+', true); }
"-" { CMD_OR_UNARY_OP ("-", '-', true); }

"~" { CMD_OR_UNARY_OP ("~", '~', true); }
"!" { CMD_OR_UNARY_OP ("!", '!', false); }

"," {
    curr_lexer->lexer_debug (",");

    bool at_beginning_of_statement
      = (! (curr_lexer->whitespace_is_significant ()
            || curr_lexer->m_looking_at_object_index.front ()));

    return curr_lexer->handle_op (',', yytext, yyleng, at_beginning_of_statement);
  }

".'" {
    curr_lexer->lexer_debug (".'");

    return curr_lexer->handle_op (TRANSPOSE, yytext, yyleng);
  }

"++" { CMD_OR_UNARY_OP ("++", PLUS_PLUS, false); }
"--" { CMD_OR_UNARY_OP ("--", MINUS_MINUS, false); }

"(" {
    curr_lexer->lexer_debug ("(");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (! (tok_id == '[' || tok_id == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        // If we are looking for an object index, then push TRUE for
        // m_looking_at_object_index.  Otherwise, just push whatever state
        // is current (so that we can pop it off the stack when we find
        // the matching close paren).

        curr_lexer->m_looking_at_object_index.push_front
          (curr_lexer->m_looking_for_object_index);

        curr_lexer->m_looking_at_indirect_ref = false;
        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->m_nesting_level.paren ();

        return curr_lexer->handle_token ('(');
      }
  }

")" {
    curr_lexer->lexer_debug (")");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_nesting_level.remove ();

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    if (curr_lexer->m_looking_at_anon_fcn_args)
      {
        curr_lexer->m_looking_at_anon_fcn_args = false;
        curr_lexer->m_nesting_level.anon_fcn_body ();
      }

    return curr_lexer->handle_token (')');
  }

"." {
    curr_lexer->lexer_debug (".");

    if (curr_lexer->previous_token_may_be_command ()
        && curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->update_token_positions (yyleng);

        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        return curr_lexer->handle_token ('.');
      }
  }

%{
// = and op= operators.
%}

"=" {
    curr_lexer->lexer_debug ("=");

    return curr_lexer->handle_op ('=', yytext, yyleng);
  }

"+="   { CMD_OR_OP ("+=", ADD_EQ, false); }
"-="   { CMD_OR_OP ("-=", SUB_EQ, false); }
"*="   { CMD_OR_OP ("*=", MUL_EQ, false); }
"/="   { CMD_OR_OP ("/=", DIV_EQ, false); }
"\\="  { CMD_OR_OP ("\\=", LEFTDIV_EQ, false); }
".*="  { CMD_OR_OP (".*=", EMUL_EQ, false); }
"./="  { CMD_OR_OP ("./=", EDIV_EQ, false); }
".\\=" { CMD_OR_OP (".\\=", ELEFTDIV_EQ, false); }
"^="   { CMD_OR_OP ("^=", POW_EQ, false); }
".^="  { CMD_OR_OP (".^=", EPOW_EQ, false); }
"&="   { CMD_OR_OP ("&=", AND_EQ, false); }
"|="   { CMD_OR_OP ("|=", OR_EQ, false); }

%{
// In Matlab, '{' may also trigger command syntax.
%}

"{" {
    curr_lexer->lexer_debug ("{");

    bool unput_comma = false;

    if (curr_lexer->whitespace_is_significant ()
        && curr_lexer->space_follows_previous_token ())
      {
        int tok_id = curr_lexer->previous_token_id ();

        if (! (tok_id == '[' || tok_id == '{'
               || curr_lexer->previous_token_is_binop ()))
          unput_comma = true;
      }

    if (unput_comma)
      {
        yyless (0);
        curr_lexer->xunput (',');
      }
    else
      {
        curr_lexer->m_nesting_level.brace ();

        curr_lexer->m_looking_at_object_index.push_front
          (curr_lexer->m_looking_for_object_index);

        curr_lexer->m_filepos.increment_column (yyleng);
        curr_lexer->m_looking_for_object_index = false;
        curr_lexer->m_at_beginning_of_statement = false;

        curr_lexer->m_braceflag++;

        curr_lexer->push_start_state (MATRIX_START);

        return curr_lexer->handle_token ('{');
      }
  }

"}" {
    curr_lexer->lexer_debug ("}");

    curr_lexer->update_token_positions (yyleng);

    curr_lexer->m_looking_at_object_index.pop_front ();

    curr_lexer->m_looking_for_object_index = true;
    curr_lexer->m_at_beginning_of_statement = false;

    curr_lexer->m_nesting_level.remove ();

    return curr_lexer->handle_token ('}');
  }

%{
// Unrecognized input.  If the previous token may be a command and is
// followed by a space, parse the remainder of this statement as a
// command-style function call.  Otherwise, unrecognized input is a
// lexical error.
%}

. {
    curr_lexer->lexer_debug (".");

    // Check for possible command syntax before doing any other operations
    // that may modify the input buffer.

    if (curr_lexer->previous_token_may_be_command ()
        && curr_lexer->space_follows_previous_token ())
      {
        yyless (0);
        curr_lexer->push_start_state (COMMAND_START);
      }
    else
      {
        curr_lexer->xunput (yytext[0]);

        int c = curr_lexer->text_yyinput ();

        if (c == 1)
          return -1;
        else if (c == EOF)
          return curr_lexer->handle_end_of_input ();
        else
          {
            std::ostringstream buf;

            buf << "invalid character '"
                << octave::undo_string_escape (static_cast<char> (c))
                << "' (ASCII " << c << ")";

            curr_lexer->update_token_positions (yyleng);

            return curr_lexer->syntax_error (buf.str ());
          }
      }
  }

%{
#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // Disable these warnings for flex code.
#  pragma GCC diagnostic ignored "-Wold-style-cast"
#  pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
%}

%%

#if defined (HAVE_PRAGMA_GCC_DIAGNOSTIC)
   // Restore prevailing warning state for remainder of the file.
#  pragma GCC diagnostic pop
#endif

void *
octave_alloc (yy_size_t size, yyscan_t)
{
  return std::malloc (size);
}

void *
octave_realloc (void *ptr, yy_size_t size, yyscan_t)
{
  return std::realloc (ptr, size);
}

void
octave_free (void *ptr, yyscan_t)
{
  std::free (ptr);
}

#if defined (OCTAVE_PARSER_DEBUG)
static void
display_character (char c)
{
  if (isgraph (c))
    std::cerr << c;
  else
    switch (c)
      {
      case 0:
        std::cerr << "NUL";
        break;

      case 1:
        std::cerr << "SOH";
        break;

      case 2:
        std::cerr << "STX";
        break;

      case 3:
        std::cerr << "ETX";
        break;

      case 4:
        std::cerr << "EOT";
        break;

      case 5:
        std::cerr << "ENQ";
        break;

      case 6:
        std::cerr << "ACK";
        break;

      case 7:
        std::cerr << "\\a";
        break;

      case 8:
        std::cerr << "\\b";
        break;

      case 9:
        std::cerr << "\\t";
        break;

      case 10:
        std::cerr << "\\n";
        break;

      case 11:
        std::cerr << "\\v";
        break;

      case 12:
        std::cerr << "\\f";
        break;

      case 13:
        std::cerr << "\\r";
        break;

      case 14:
        std::cerr << "SO";
        break;

      case 15:
        std::cerr << "SI";
        break;

      case 16:
        std::cerr << "DLE";
        break;

      case 17:
        std::cerr << "DC1";
        break;

      case 18:
        std::cerr << "DC2";
        break;

      case 19:
        std::cerr << "DC3";
        break;

      case 20:
        std::cerr << "DC4";
        break;

      case 21:
        std::cerr << "NAK";
        break;

      case 22:
        std::cerr << "SYN";
        break;

      case 23:
        std::cerr << "ETB";
        break;

      case 24:
        std::cerr << "CAN";
        break;

      case 25:
        std::cerr << "EM";
        break;

      case 26:
        std::cerr << "SUB";
        break;

      case 27:
        std::cerr << "ESC";
        break;

      case 28:
        std::cerr << "FS";
        break;

      case 29:
        std::cerr << "GS";
        break;

      case 30:
        std::cerr << "RS";
        break;

      case 31:
        std::cerr << "US";
        break;

      case 32:
        std::cerr << "SPACE";
        break;

      case 127:
        std::cerr << "DEL";
        break;
      }
}
#endif

OCTAVE_BEGIN_NAMESPACE(octave)

#if defined (OCTAVE_PARSER_DEBUG)
static bool V__lexer_debug_flag__ = false;
static bool V__display_tokens__ = false;
static std::size_t V__token_count__ = 0;
#endif

DEFUN (iskeyword, args, ,
       doc: /* -*- texinfo -*-
@deftypefn  {} {} iskeyword ()
@deftypefnx {} {} iskeyword (@var{name})
Return true if @var{name} is an Octave keyword.

If @var{name} is omitted, return a list of keywords.
@seealso{isvarname, exist}
@end deftypefn */)
{
  octave_value retval;

  int nargin = args.length ();

  if (nargin > 1)
    print_usage ();

  if (nargin == 0)
    {
      // Neither set nor get are keywords.  See the note in the
      // iskeyword function for additional details.

      string_vector lst (TOTAL_KEYWORDS);

      int j = 0;

      for (int i = 0; i < TOTAL_KEYWORDS; i++)
        {
          std::string kword = wordlist[i].name;

          // FIXME: The following check is duplicated in iskeyword.
          if (! (kword == "set" || kword == "get" || kword == "arguments"
                 || kword == "enumeration" || kword == "events"
                 || kword == "methods" || kword == "properties"))
            lst[j++] = kword;
        }

      lst.resize (j);

      retval = Cell (lst.sort ());
    }
  else
    {
      std::string name = args(0).xstring_value ("iskeyword: NAME must be a string");
      retval = iskeyword (name);
    }

  return retval;
}

/*

%!assert (iskeyword ("for"))
%!assert (iskeyword ("fort"), false)
%!assert (iskeyword ("fft"), false)
%!assert (iskeyword ("get"), false)
%!assert (iskeyword ("set"), false)

%!error iskeyword ("A", "B")
%!error <NAME must be a string> iskeyword (1)

*/

DEFUN (__lexer_debug_flag__, args, nargout,
       doc: /* -*- texinfo -*-
@deftypefn  {} {@var{val} =} __lexer_debug_flag__ ()
@deftypefnx {} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val})
@deftypefnx {} {@var{old_val} =} __lexer_debug_flag__ (@var{new_val}, "local")
Query or set the internal flag that determines whether Octave's lexer prints
debug information as it processes an expression.

When called from inside a function with the @qcode{"local"} option, the
variable is changed locally for the function and any subroutines it calls.
The original variable value is restored when exiting the function.
@seealso{__display_tokens__, __token_count__, __parser_debug_flag__}
@end deftypefn */)
{
#if defined (OCTAVE_PARSER_DEBUG)
  return set_internal_variable (V__lexer_debug_flag__, args, nargout,
                                "__lexer_debug_flag__");
#else
  octave_unused_parameter (args);
  octave_unused_parameter (nargout);

  error ("__lexer_debug_flag__: support for debugging the lexer was disabled when Octave was built");
#endif
}

DEFUN (__display_tokens__, args, nargout,
           doc: /* -*- texinfo -*-
@deftypefn  {} {@var{val} =} __display_tokens__ ()
@deftypefnx {} {@var{old_val} =} __display_tokens__ (@var{new_val})
@deftypefnx {} {@var{old_val} =} __display_tokens__ (@var{new_val}, "local")
Query or set the internal variable that determines whether Octave's
lexer displays tokens as they are read.

When called from inside a function with the @qcode{"local"} option, the
variable is changed locally for the function and any subroutines it calls.
The original variable value is restored when exiting the function.
@seealso{__token_count__, __lexer_debug_flag__, __parser_debug_flag__}
@end deftypefn */)
{
#if defined (OCTAVE_PARSER_DEBUG)
  return set_internal_variable (V__display_tokens__, args, nargout,
                                "__display_tokens__");
#else
  octave_unused_parameter (args);
  octave_unused_parameter (nargout);

  error ("__display_tokens__: support for debugging the lexer was disabled when Octave was built");
#endif
}

DEFUN (__token_count__, , ,
       doc: /* -*- texinfo -*-
@deftypefn {} {@var{n} =} __token_count__ ()
Return the number of language tokens processed since Octave startup.
@seealso{__display_tokens__, __lexer_debug_flag__, __parser_debug_flag__}
@end deftypefn */)
{
#if defined (OCTAVE_PARSER_DEBUG)
  return octave_value (V__token_count__);
#else
  error ("__token_count__: support for debugging the lexer was disabled when Octave was built");
#endif
}

void
lexical_feedback::symbol_table_context::clear ()
{
  while (! m_frame_stack.empty ())
    m_frame_stack.pop_front ();
}

void
lexical_feedback::symbol_table_context::pop ()
{
  if (empty ())
    error ("unexpected: empty stack in lexical_feedback::symbol_table_context::pop - please report this bug");

  m_frame_stack.pop_front ();
}

symbol_scope
lexical_feedback::symbol_table_context::curr_scope () const
{
  if (empty ())
    return m_interpreter.get_current_scope ();
  else
    return m_frame_stack.front ();
}

symbol_scope
lexical_feedback::symbol_table_context::parent_scope () const
{
  std::size_t sz = size ();

  return (sz > 1
          ? m_frame_stack[1]
          : (sz == 1 ? m_frame_stack[0] : symbol_scope::invalid ()));
}

lexical_feedback::~lexical_feedback ()
{
  m_tokens.clear ();
}

void
lexical_feedback::init ()
{
  // The closest paren, brace, or bracket nesting is not an object index.
  m_looking_at_object_index.push_front (false);
}

void
lexical_feedback::reset ()
{
  m_end_of_input = false;
  m_allow_command_syntax = true;
  m_at_beginning_of_statement = true;
  m_looking_at_anon_fcn_args = false;
  m_looking_at_return_list = false;
  m_looking_at_parameter_list = false;
  m_looking_at_decl_list = false;
  m_looking_at_matrix_or_assign_lhs = false;
  m_looking_for_object_index = false;
  m_looking_at_indirect_ref = false;
  m_arguments_is_keyword = false;
  m_classdef_element_names_are_keywords = false;
  m_parsing_anon_fcn_body = false;
  m_parsing_class_method = false;
  m_parsing_classdef = false;
  m_parsing_classdef_decl = false;
  m_parsing_classdef_superclass = false;
  m_maybe_classdef_get_set_method = false;
  m_parsing_classdef_get_method = false;
  m_parsing_classdef_set_method = false;
  m_quote_is_transpose = false;
  m_force_script = false;
  m_reading_fcn_file = false;
  m_reading_script_file = false;
  m_reading_classdef_file = false;
  m_buffer_function_text = false;
  m_comment_uses_hash_char = false;
  m_bracketflag = 0;
  m_braceflag = 0;
  m_looping = 0;
  m_defining_fcn = 0;
  m_looking_at_function_handle = 0;
  m_block_comment_nesting_level = 0;
  m_command_arg_paren_count = 0;
  m_token_count = 0;
  m_filepos = filepos (1, 1);
  m_tok_beg = filepos ();
  m_tok_end = filepos ();
  m_string_text = "";
  m_current_input_line = "";
  m_comment_text = "";
  m_function_text = "";
  m_fcn_file_name = "";
  m_fcn_file_full_name = "";
  m_dir_name = "";
  m_package_name = "";
  m_looking_at_object_index.clear ();
  m_looking_at_object_index.push_front (false);

  while (! m_parsed_function_name.empty ())
    m_parsed_function_name.pop ();

  m_symtab_context.clear ();
  m_nesting_level.reset ();
  m_tokens.clear ();
}

token *
lexical_feedback::previous_token ()
{
  return m_tokens.front ();
}

const token *
lexical_feedback::previous_token () const
{
  return m_tokens.front ();
}

int
lexical_feedback::previous_token_id () const
{
  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->token_id () : 0;
}

bool
lexical_feedback::previous_token_is (int tok_id) const
{
  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->token_is (tok_id) : false;
}

bool
lexical_feedback::previous_token_is (const token *tok) const
{
  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->token_is (tok) : false;
}

void
lexical_feedback::mark_previous_token_trailing_space ()
{
  token *prev_tok = previous_token ();
  if (prev_tok && ! previous_token_is ('\n'))
    prev_tok->mark_trailing_space ();
}

bool
lexical_feedback::space_follows_previous_token () const
{
  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->space_follows_token () : false;
}

bool
lexical_feedback::previous_token_is_binop () const
{
  int tok_id = previous_token_id ();

  return (tok_id == '+' || tok_id == '-' || tok_id == '@' || tok_id == '~' || tok_id == '!'
          || tok_id == ',' || tok_id == ';' || tok_id == '*' || tok_id == '/'
          || tok_id == ':' || tok_id == '=' || tok_id == ADD_EQ
          || tok_id == AND_EQ || tok_id == DIV_EQ || tok_id == EDIV
          || tok_id == EDIV_EQ || tok_id == ELEFTDIV || tok_id == ELEFTDIV_EQ
          || tok_id == EMUL || tok_id == EMUL_EQ
          || tok_id == EPOW || tok_id == EPOW_EQ || tok_id == EXPR_AND
          || tok_id == EXPR_AND_AND || tok_id == EXPR_EQ || tok_id == EXPR_GE
          || tok_id == EXPR_GT || tok_id == EXPR_LE || tok_id == EXPR_LT
          || tok_id == EXPR_NE || tok_id == EXPR_OR
          || tok_id == EXPR_OR_OR || tok_id == LEFTDIV || tok_id == LEFTDIV_EQ
          || tok_id == MUL_EQ || tok_id == OR_EQ || tok_id == POW
          || tok_id == POW_EQ || tok_id == SUB_EQ);
}

bool
lexical_feedback::previous_token_is_keyword () const
{
  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->iskeyword () : false;
}

void
lexical_feedback::mark_as_variable (const std::string& nm)
{
  symbol_scope scope = m_symtab_context.curr_scope ();

  if (scope)
    scope.mark_as_variable (nm);
}

void
lexical_feedback::mark_as_variables (const std::list<std::string>& lst)
{
  symbol_scope scope = m_symtab_context.curr_scope ();

  if (scope)
    scope.mark_as_variables (lst);
}

bool
lexical_feedback::previous_token_may_be_command () const
{
  if (! m_allow_command_syntax)
    return false;

  const token *prev_tok = previous_token ();
  return prev_tok ? prev_tok->may_be_command () : false;
  }

static bool
looks_like_copyright (const std::string& s)
{
  if (s.empty ())
    return false;

  // Comment characters have been stripped but whitespace
  // (including newlines) remains.

  std::size_t offset = s.find_first_not_of (" \t\n\r");

  return (offset != std::string::npos
          && (s.substr (offset, 9) == "Copyright"
              || s.substr (offset, 6) == "Author"
              || s.substr (offset, 23) == "SPDX-License-Identifier"));
}

// FIXME: This function does not appear to be used anywhere in Octave code.
static bool
looks_like_shebang (const std::string& s)
{
  return ((! s.empty ()) && (s[0] == '!'));
}

void
base_lexer::input_buffer::fill (const std::string& input, bool eof_arg)
{
  m_buffer = input;
  m_chars_left = m_buffer.length ();
  m_offset = 0;
  m_eof = eof_arg;
}

// If BY_LINES is true, return chunks to the lexer line by line.
int
base_lexer::input_buffer::copy_chunk (char *buf, std::size_t max_size,
                                      bool by_lines)
{
  static const char * const eol = "\n";

  std::size_t len = 0;
  if (by_lines)
    {
      std::size_t newline_pos = m_buffer.find ('\n', m_offset);
      len = (newline_pos != std::string::npos
             ? newline_pos - m_offset + 1
             : (max_size > m_chars_left ? m_chars_left : max_size));
    }
  else
    len = max_size > m_chars_left ? m_chars_left : max_size;

  if (len <= 0)
    error ("unexpected: buffer underflow in base_lexer::input_buffer::copy_chunk - please report this bug");

  memcpy (buf, m_buffer.c_str () + m_offset, len);

  m_chars_left -= len;
  m_offset += len;

  // Make sure the final input returned to the lexer ends with a new
  // line character.

  if (m_chars_left == 0 && buf[len-1] != '\n')
    {
      if (len < max_size)
        {
          // There is enough room to plug the newline character in
          // the buffer.
          buf[len++] = '\n';
        }
      else
        {
          // There isn't enough room to plug the newline character
          // in BUF so arrange to have it returned on the next call
          // to base_lexer::read.

          // At this point we've exhausted the original input
          // (m_chars_left is zero) so we can overwrite the initial
          // buffer with a single newline character to be returned on
          // the next call.

          m_buffer = eol;
          m_chars_left = 1;
          m_offset = 0;
        }
    }

  return len;
}

base_lexer::~base_lexer ()
{
  yylex_destroy (m_scanner);
}

void
base_lexer::init ()
{
  yylex_init (&m_scanner);

  // Make base_lexer object available through yyextra in
  // flex-generated lexer.
  yyset_extra (this, m_scanner);

  clear_start_state ();
}

// Inside Flex-generated functions, yyg is the scanner cast to its real
// type.  Some flex macros that we use in base_lexer member functions
// (for example, BEGIN) use yyg.  If we could perform the actions of
// these macros with functions instead, we could eliminate the
  // OCTAVE_YYG macro.

#define OCTAVE_YYG                                                      \
  struct yyguts_t *yyg = static_cast<struct yyguts_t*> (m_scanner)

void
base_lexer::reset ()
{
  // Start off on the right foot.
  clear_start_state ();

  m_symtab_context.clear ();

  // Only ask for input from stdin if we are expecting interactive
  // input.

  if (m_interpreter.interactive ()
      && ! (m_reading_fcn_file
            || m_reading_classdef_file
            || m_reading_script_file
            || input_from_eval_string ()))
    yyrestart (stdin, m_scanner);

  lexical_feedback::reset ();

  m_comment_list.clear ();
}

void
base_lexer::prep_for_file ()
{
  m_reading_script_file = true;

  push_start_state (INPUT_FILE_START);
}

void
base_lexer::begin_string (int state)
{
  m_tok_beg = m_filepos;

  push_start_state (state);
}

int
base_lexer::handle_end_of_input ()
{
  lexer_debug ("<<EOF>>");

  m_tok_beg = m_filepos;
  m_tok_end = m_filepos;

  if (m_block_comment_nesting_level != 0)
    {
      std::string msg {"block comment unterminated at end of input"};

      if ((m_reading_fcn_file || m_reading_script_file || m_reading_classdef_file)
          && ! m_fcn_file_name.empty ())
        msg += " near line " + std::to_string (m_filepos.line () - 1) + " of file '" + m_fcn_file_name + ".m'";

      syntax_error (msg);
    }

  token *tok = new token (END_OF_INPUT, m_tok_beg, m_tok_end, get_comment_list ());

  return handle_token (tok);
}

char *
base_lexer::flex_yytext ()
{
  return yyget_text (m_scanner);
}

int
base_lexer::flex_yyleng ()
{
  return yyget_leng (m_scanner);
}

int
base_lexer::text_yyinput ()
{
  int c = yyinput (m_scanner);

#if defined (OCTAVE_PARSER_DEBUG)
  if (V__lexer_debug_flag__)
    {
      std::cerr << "I: ";
      display_character (c);
      std::cerr << std::endl;
    }
#endif

  // Convert CRLF into just LF and single CR into LF.
  if (c == '\r')
    {
      c = yyinput (m_scanner);

#if defined (OCTAVE_PARSER_DEBUG)
      if (V__lexer_debug_flag__)
        {
          std::cerr << "I: ";
          display_character (c);
          std::cerr << std::endl;
        }
#endif

      if (c != '\n')
        {
          xunput (c);
          c = '\n';
        }
    }

  return c;
}

void
base_lexer::xunput (char c, char *buf)
{
  if (c != EOF)
    {
#if defined (OCTAVE_PARSER_DEBUG)
      if (V__lexer_debug_flag__)
        {
          std::cerr << "U: ";
          display_character (c);
          std::cerr << std::endl;
        }
#endif
      yyunput (c, buf, m_scanner);
    }
}

void
base_lexer::xunput (char c)
{
  char *yytxt = flex_yytext ();

  xunput (c, yytxt);
}

void
base_lexer::update_token_positions (int tok_len)
{
  m_tok_beg = m_filepos;
  m_tok_end = m_filepos;

  if (tok_len > 1)
    m_tok_end.increment_column (tok_len - 1);

  m_filepos.increment_column (tok_len);
}

bool
base_lexer::looking_at_space ()
{
  int c = text_yyinput ();
  xunput (c);
  return is_space_or_tab (c);
}

bool
base_lexer::inside_any_object_index ()
{
  bool retval = false;

  for (const bool is_obj_idx : m_looking_at_object_index)
    {
      if (is_obj_idx)
        {
          retval = true;
          break;
        }
    }

  return retval;
}

token *
base_lexer::make_keyword_token (const std::string& s)
{
  // Token positions should have already been updated before this
  // function is called.

  int slen = s.length ();

  const octave_kw *kw = octave_kw_hash::in_word_set (s.c_str (), slen);

  if (! kw)
    return nullptr;

  bool previous_at_bos = m_at_beginning_of_statement;

  // May be reset to true for some token types.
  m_at_beginning_of_statement = false;

  token *tok = nullptr;

  switch (kw->kw_id)
    {
    case break_kw:
    case catch_kw:
    case continue_kw:
    case else_kw:
    case otherwise_kw:
    case return_kw:
    case unwind_protect_cleanup_kw:
      m_at_beginning_of_statement = true;
      break;

    case persistent_kw:
    case global_kw:
      m_looking_at_decl_list = true;
      break;

    case case_kw:
    case elseif_kw:
    case until_kw:
      break;

    case end_kw:
      if (inside_any_object_index ()
          || (m_defining_fcn
              && ! (m_looking_at_return_list
                    || m_parsed_function_name.top ())))
        {
          m_at_beginning_of_statement = previous_at_bos;
          return nullptr;
        }

      tok = new token (kw->tok_id, token::simple_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case end_try_catch_kw:
      tok = new token (kw->tok_id, token::try_catch_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case end_unwind_protect_kw:
      tok = new token (kw->tok_id, token::unwind_protect_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endfor_kw:
      tok = new token (kw->tok_id, token::for_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endfunction_kw:
      tok = new token (kw->tok_id, token::function_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endif_kw:
      tok = new token (kw->tok_id, token::if_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endparfor_kw:
      tok = new token (kw->tok_id, token::parfor_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endswitch_kw:
      tok = new token (kw->tok_id, token::switch_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endwhile_kw:
      tok = new token (kw->tok_id, token::while_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endarguments_kw:
#if defined (DISABLE_ARGUMENTS_VALIDATION_BLOCK)
      return nullptr;
#else
      tok = new token (kw->tok_id, token::arguments_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;
#endif

    case endclassdef_kw:
      tok = new token (kw->tok_id, token::classdef_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endenumeration_kw:
      tok = new token (kw->tok_id, token::enumeration_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endevents_kw:
      tok = new token (kw->tok_id, token::events_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endmethods_kw:
      tok = new token (kw->tok_id, token::methods_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case endproperties_kw:
      tok = new token (kw->tok_id, token::properties_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case for_kw:
    case parfor_kw:
    case while_kw:
      m_looping++;
      break;

    case do_kw:
      m_at_beginning_of_statement = true;
      m_looping++;
      break;

    case try_kw:
    case unwind_protect_kw:
      m_at_beginning_of_statement = true;
      break;

    case if_kw:
    case switch_kw:
      break;

    case get_kw:
    case set_kw:
      // 'get' and 'set' are keywords in classdef method declarations.
      if (! m_maybe_classdef_get_set_method)
        {
          m_at_beginning_of_statement = previous_at_bos;
          return nullptr;
        }
      break;

    case enumeration_kw:
    case events_kw:
    case methods_kw:
    case properties_kw:
      // 'properties', 'methods' and 'events' are keywords for
      // classdef blocks.
      if (! m_classdef_element_names_are_keywords)
        {
          m_at_beginning_of_statement = previous_at_bos;
          return nullptr;
        }
      // fall through ...

    case classdef_kw:
      // 'classdef' is always a keyword.
      if (! m_force_script && m_token_count == 0 && input_from_file ())
        {
          m_reading_classdef_file = true;
          m_reading_script_file = false;
        }
      break;

    case function_kw:
      m_defining_fcn++;
      m_parsed_function_name.push (false);

      if (! m_force_script && m_token_count == 0 && input_from_file ())
        {
          m_reading_fcn_file = true;
          m_reading_script_file = false;
        }

      // FIXME: should we be asking directly whether input is coming from an
      // eval string instead of that it is not coming from a file?
      if (! (m_reading_fcn_file || m_reading_script_file
             || m_reading_classdef_file))
        {
          // Input must be coming from the terminal or stdin?
          m_buffer_function_text = true;
          m_function_text += (m_current_input_line + "\n");

          // FIXME: do we need to save and restore the file position
          // or just reset the line number here?  The goal is to
          // track line info for command-line functions relative
          // to the function keyword.  Should we really be setting
          // the line and column info to (1, 1) here?

          m_filepos = filepos (1, 1);
          update_token_positions (slen);
        }
      break;

    case arguments_kw:
#if defined (DISABLE_ARGUMENTS_VALIDATION_BLOCK)
      return nullptr;
#else
      if (! m_arguments_is_keyword)
        return nullptr;
      break;
#endif

    case spmd_kw:
      m_at_beginning_of_statement = true;
      break;

    case endspmd_kw:
      tok = new token (kw->tok_id, token::spmd_end, m_tok_beg, m_tok_end, get_comment_list ());
      m_at_beginning_of_statement = true;
      break;

    case magic_file_kw:
      {
        if ((m_reading_fcn_file || m_reading_script_file
             || m_reading_classdef_file)
            && ! m_fcn_file_full_name.empty ())
          tok = new token (kw->tok_id, m_fcn_file_full_name, m_tok_beg, m_tok_end, get_comment_list ());
        else
          tok = new token (kw->tok_id, "stdin", m_tok_beg, m_tok_end, get_comment_list ());
      }
      break;

    case magic_line_kw:
      {
        int l = m_tok_beg.line ();
        octave_value ov (static_cast<double> (l));
        tok = new token (kw->tok_id, ov, "", m_tok_beg, m_tok_end, get_comment_list ());
      }
      break;

      // We should have handled all possible enum values above.  Rely on
      // compiler diagnostics to warn if we haven't.  For example, GCC's
      // -Wswitch option, enabled by -Wall, will provide a warning.
    }

  if (! tok)
    tok = new token (kw->tok_id, true, m_tok_beg, m_tok_end, get_comment_list ());

  return tok;
}

/*

## check if magic file and line keywords are working
%!assert <*62587> (ischar (__FILE__))
%!assert <*62587> (isnumeric (__LINE__))

*/

bool
base_lexer::fq_identifier_contains_keyword (const std::string& s)
{
  std::size_t p1 = 0;
  std::size_t p2;

  std::string s_part;

  do
    {
      p2 = s.find ('.', p1);

      if (p2 != std::string::npos)
        {
          s_part = s.substr (p1, p2 - p1);
          p1 = p2 + 1;
        }
      else
        s_part = s.substr (p1);

      if (iskeyword (s_part))
        return true;
    }
  while (p2 != std::string::npos);

  return false;
}

bool
base_lexer::whitespace_is_significant ()
{
  return (m_nesting_level.is_bracket ()
          || (m_nesting_level.is_brace ()
              && ! m_looking_at_object_index.front ()));
}

static inline bool
looks_like_bin (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'b' || s[1] == 'B'));
}

static inline bool
looks_like_hex (const char *s, int len)
{
  return (len > 2 && s[0] == '0' && (s[1] == 'x' || s[1] == 'X'));
}

OCTAVE_NORETURN static void
error_unexpected_bytes (int bytes)
{
  error ("unexpected: bytes (= %d) not 1, 2, 4, or 8 in make_integer_value - please report this bug", bytes);
}

static inline octave_value
make_integer_value (uintmax_t long_int_val, bool unsigned_val, int bytes)
{
  if (unsigned_val)
    {
     switch (bytes)
       {
       case 1:
         return octave_value (octave_uint8 (long_int_val));

       case 2:
         return octave_value (octave_uint16 (long_int_val));

       case 4:
         return octave_value (octave_uint32 (long_int_val));

       case 8:
         return octave_value (octave_uint64 (long_int_val));

       default:
         error_unexpected_bytes (bytes);
       };
    }
  else
    {
      // FIXME: Conversion to signed values is supposed to follow
      // twos-complement rules.  Do we need to be more carefule here?

      switch (bytes)
        {
        case 1:
          return octave_value (octave_int8 (int8_t (long_int_val)));

        case 2:
          return octave_value (octave_int16 (int16_t (long_int_val)));

        case 4:
          return octave_value (octave_int32 (int32_t (long_int_val)));

        case 8:
        return octave_value (octave_int64 (int64_t (long_int_val)));

        default:
          error_unexpected_bytes (bytes);
        };
    }

  return octave_value ();
}

template <>
int
base_lexer::handle_number<2> ()
{
  // Skip 0[bB] prefix.
  std::string yytxt (flex_yytext () + 2);

  yytxt.erase (std::remove (yytxt.begin (), yytxt.end (), '_'), yytxt.end ());

  std::size_t pos = yytxt.find_first_of ("su");

  bool unsigned_val = true;
  int bytes = -1;

  if (pos == std::string::npos)
    {
      std::size_t num_digits = yytxt.length ();

      if (num_digits <= 8)
        bytes = 1;
      else if (num_digits <= 16)
        bytes = 2;
      else if (num_digits <= 32)
        bytes = 4;
      else if (num_digits <= 64)
        bytes = 8;
    }
  else
    {
      unsigned_val = (yytxt[pos] == 'u');
      std::string size_str = yytxt.substr (pos+1);
      yytxt = yytxt.substr (0, pos);
      std::size_t num_digits = yytxt.length ();

      if (size_str == "8" && num_digits <= 8)
        bytes = 1;
      else if (size_str == "16" && num_digits <= 16)
        bytes = 2;
      else if (size_str == "32" && num_digits <= 32)
        bytes = 4;
      else if (size_str == "64" && num_digits <= 64)
        bytes = 8;
    }

  if (bytes < 0)
    {
      std::string msg {"too many digits for binary constant"};
      return syntax_error (msg);
    }

  // FIXME: is there a better way?  Can uintmax_t be anything other than long
  // or long long?  Should we just be using uint64_t instead of uintmax_t?

  errno = 0;
  char *end;
  uintmax_t long_int_val;
  if (sizeof (uintmax_t) == sizeof (unsigned long long))
    long_int_val = strtoull (yytxt.c_str (), &end, 2);
  else if (sizeof (uintmax_t) == sizeof (unsigned long))
    long_int_val = strtoul (yytxt.c_str (), &end, 2);
  else
    error ("unexpected: size mismatch: uintmax_t vs unsigned long or unsigned long long in base_lexer::handle_number<2> - please report this bug");

  if (errno == ERANGE)
    error ("unexpected: ERANGE error in base_lexer::handle_number<2> - please report this bug");

  octave_value ov = make_integer_value (long_int_val, unsigned_val, bytes);

  m_looking_for_object_index = false;
  m_at_beginning_of_statement = false;

  update_token_positions (flex_yyleng ());

  token *tok = new token (NUMBER, ov, yytxt, m_tok_beg, m_tok_end, get_comment_list ());

  return handle_token (tok);
}

static uint64_t
flintmax ()
{
  return (static_cast<uint64_t> (1) << std::numeric_limits<double>::digits);
}

template <>
int
base_lexer::handle_number<10> ()
{
  bool imag = false;
  bool digits_only = true;

  char *yytxt = flex_yytext ();
  std::size_t yylng = flex_yyleng ();

  OCTAVE_LOCAL_BUFFER (char, tmptxt, yylng + 1);
  char *rp = yytxt;
  char *p = &tmptxt[0];

  char ch;
  while ((ch = *rp++))
    {
      switch (ch)
        {
        case '_':
          break;

        case 'D':
        case 'd':
          *p++ = 'e';
          digits_only = false;
          break;

        case 'I':
        case 'J':
        case 'i':
        case 'j':
          // Octave does not provide imaginary integers.
          digits_only = false;
          imag = true;
          break;

        case '+':
        case '-':
        case '.':
        case 'E':
        case 'e':
          digits_only = false;
          *p++ = ch;
          break;

        default:
          *p++ = ch;
          break;
        }
    }

  *p = '\0';

  double value = 0.0;
  const char *chars_start = tmptxt;
  const char *chars_end = tmptxt + std::strlen (tmptxt);

#if defined (OCTAVE_HAVE_STD_FROM_CHARS_DOUBLE)
  auto [ptr, ec] = std::from_chars (chars_start, chars_end, value);
#elif defined (OCTAVE_HAVE_FAST_FLOAT)
  auto [ptr, ec] = fast_float::from_chars (chars_start, chars_end, value);
#else
#  error "Cannot convert string to floating-point number. This should be unreachable."
#endif

  if (ec != std::errc{})
    {
      switch (ec)
        {
        case std::errc::invalid_argument:
          fatal_error ("invalid floating point format in base_lexer::handle_number<10> - please report this bug");
          break;
        case std::errc::result_out_of_range:
            // For floating point, convert overflow to infinity (like sscanf did)
            // Check if the input represents a negative number
            if (tmptxt[0] == '-')
              value = -std::numeric_limits<double>::infinity ();
            else
              value = std::numeric_limits<double>::infinity ();
          break;
        default:
          fatal_error ("from_chars failed in base_lexer::handle_number<10> - please report this bug");
          break;
        }
      }

  octave_value ov;

  // Use >= because > will not return true until value is greater than
  // flintmax + 2!

  if (digits_only && value >= flintmax ())
    {
      // Try reading as an unsigned 64-bit integer.  If there is a range error,
      // then create a double value.  Otherwise, create a special uint64 object
      // that will be automatically converted to double unless it appears as
      // the argument to one of the int64 or uint64 functions.

      errno = 0;
      char *end;
      uintmax_t long_int_val;
      if (sizeof (uintmax_t) == sizeof (unsigned long long))
        long_int_val = strtoull (tmptxt, &end, 10);
      else if (sizeof (uintmax_t) == sizeof (unsigned long))
        long_int_val = strtoul (tmptxt, &end, 10);
      else
        error ("unexpected: size mismatch: uintmax_t vs unsigned long or unsigned long long in base_lexer::handle_number<10> - please report this bug");

      if (errno != ERANGE)
        {
          // If possible, store the value as a signed integer.

          octave_base_value *magic_int;
          if (long_int_val > std::numeric_limits<int64_t>::max ())
            magic_int = new octave_magic_uint (octave_uint64 (long_int_val));
          else
            magic_int = new octave_magic_int (octave_int64 (long_int_val));

          ov = octave_value (magic_int);
        }
    }

  m_looking_for_object_index = false;
  m_at_beginning_of_statement = false;

  update_token_positions (yylng);

  if (ov.is_undefined ())
    ov = (imag ? octave_value (Complex (0.0, value))
               : octave_value (value));

  token *tok = new token (NUMBER, ov, yytxt, m_tok_beg, m_tok_end, get_comment_list ());

  return handle_token (tok);
}

/*
%!assert (1e999, Inf)
%!assert (-1e999, -Inf)
*/

template <>
int
base_lexer::handle_number<16> ()
{
  // Skip 0[xX] prefix.
  std::string yytxt (flex_yytext () + 2);

  yytxt.erase (std::remove (yytxt.begin (), yytxt.end (), '_'), yytxt.end ());

  std::size_t pos = yytxt.find_first_of ("su");

  bool unsigned_val = true;
  int bytes = -1;

  if (pos == std::string::npos)
    {
      std::size_t num_digits = yytxt.length ();

      if (num_digits <= 2)
        bytes = 1;
      else if (num_digits <= 4)
        bytes = 2;
      else if (num_digits <= 8)
        bytes = 4;
      else if (num_digits <= 16)
        bytes = 8;
    }
  else
    {
      unsigned_val = (yytxt[pos] == 'u');
      std::string size_str = yytxt.substr (pos+1);
      yytxt = yytxt.substr (0, pos);
      std::size_t num_digits = yytxt.length ();

      if (size_str == "8" && num_digits <= 2)
        bytes = 1;
      else if (size_str == "16" && num_digits <= 4)
        bytes = 2;
      else if (size_str == "32" && num_digits <= 8)
        bytes = 4;
      else if (size_str == "64" && num_digits <= 16)
        bytes = 8;
    }

  if (bytes < 0)
    {
      std::string msg {"too many digits for hexadecimal constant"};
      return syntax_error (msg);
    }

  uintmax_t long_int_val = 0;
  const char *chars_start = yytxt.c_str ();
  const char *chars_end = chars_start + yytxt.length ();
  auto [ptr, ec] = std::from_chars (chars_start, chars_end, long_int_val, 16);
  if (ec != std::errc{})
    {
      switch (ec)
        {
        case std::errc::invalid_argument:
          fatal_error ("invalid hexadecimal format in base_lexer::handle_number<16> - please report this bug");
          break;
        case std::errc::result_out_of_range:
          fatal_error ("hexadecimal value out of range in base_lexer::handle_number<16> - please report this bug");
          break;
        default:
          fatal_error ("from_chars failed in base_lexer::handle_number<16> - please report this bug");
          break;
        }
      }


  octave_value ov = make_integer_value (long_int_val, unsigned_val, bytes);

  m_looking_for_object_index = false;
  m_at_beginning_of_statement = false;

  update_token_positions (flex_yyleng ());

  token *tok = new token (NUMBER, ov, yytxt, m_tok_beg, m_tok_end, get_comment_list ());

  return handle_token (tok);
}

void
base_lexer::handle_continuation ()
{
  char *yytxt = flex_yytext ();
  int yylng = flex_yyleng ();

  int offset = 1;
  if (yytxt[0] == '\\')
    warn_language_extension_continuation ();
  else
    offset = 3;

  bool have_space = false;
  while (offset < yylng)
    {
      char c = yytxt[offset];
      if (is_space_or_tab (c))
        {
          have_space = true;
          offset++;
        }
      else
        break;
    }

  if (have_space)
    mark_previous_token_trailing_space ();

  bool have_comment = false;
  bool first = true;
  while (offset < yylng)
    {
      char c = yytxt[offset];

      if (c == '#' || c == '%')
        {
          if (first && c == '#')
            {
              m_comment_uses_hash_char = true;
              first = false;
            }

          have_comment = true;
          offset++;
        }
      else
        break;
    }

  if (have_comment)
    {
      m_comment_text = &yytxt[offset];

      // finish_comment sets m_at_beginning_of_statement to true but that's not
      // be correct if we are handling a continued statement.  Preserve the
      // current state.

      bool saved_bos = m_at_beginning_of_statement;

      finish_comment (comment_elt::end_of_line);

      m_at_beginning_of_statement = saved_bos;
    }

  m_filepos.next_line ();
}

void
base_lexer::finish_comment (comment_elt::comment_type typ)
{
  if (looks_like_copyright (m_comment_text))
    typ = comment_elt::copyright;

  m_comment_list.append (m_comment_text, typ, m_comment_uses_hash_char);

  m_comment_text = "";
  m_comment_uses_hash_char = false;
  m_at_beginning_of_statement = true;
}

int
base_lexer::handle_close_bracket (int bracket_type)
{
  m_looking_at_object_index.pop_front ();

  m_looking_for_object_index = true;
  m_at_beginning_of_statement = false;

  if (! m_nesting_level.none ())
    {
      m_nesting_level.remove ();

      if (bracket_type == ']')
        m_bracketflag--;
      else if (bracket_type == '}')
        m_braceflag--;
      else
        error ("unexpected: bracket_type not ']' or '}' in base_lexer::handle_close_bracket - please report this bug");
    }

  pop_start_state ();

  return handle_token (bracket_type);
}

bool
base_lexer::looks_like_command_arg ()
{
  if (! m_allow_command_syntax)
    return false;

  bool space_before = space_follows_previous_token ();
  bool space_after = looking_at_space ();

  return (space_before && ! space_after && previous_token_may_be_command ());
}

int
base_lexer::handle_superclass_identifier ()
{
  update_token_positions (flex_yyleng ());

  std::string txt = flex_yytext ();

  txt.erase (std::remove_if (txt.begin (), txt.end (), is_space_or_tab),
             txt.end ());

  std::size_t pos = txt.find ("@");

  std::string meth = txt.substr (0, pos);
  std::string cls = txt.substr (pos + 1);

  if (iskeyword (meth) || fq_identifier_contains_keyword (cls))
    {
      std::string msg {"method, class, and package names may not be keywords"};
      return syntax_error (msg);
    }

  token *tok = new token (SUPERCLASSREF, meth, cls, m_tok_beg, m_tok_end, get_comment_list ());

  m_filepos.increment_column (flex_yyleng ());

  return handle_token (tok);
}

token *
base_lexer::make_meta_identifier_token (const std::string& cls)
{
  // Token positions should have already been updated before this
  // function is called.

  m_looking_for_object_index = true;

  token *tok = new token (METAQUERY, cls, m_tok_beg, m_tok_end, get_comment_list ());

  m_filepos.increment_column (flex_yyleng ());

  return tok;
}

token *
base_lexer::make_fq_identifier_token (const std::string& ident)
{
  // Token positions should have already been updated before this
  // function is called.

  m_looking_for_object_index = true;

  token *tok = new token (FQ_IDENT, ident, m_tok_beg, m_tok_end, get_comment_list ());

  m_filepos.increment_column (flex_yyleng ());

  return tok;
}

// Figure out exactly what kind of token to return when we have seen an
// identifier.  Handles keywords.  Return -1 if the identifier should be
// ignored.

int
base_lexer::handle_identifier ()
{
  update_token_positions (flex_yyleng ());

  std::string ident = flex_yytext ();

  // If we are expecting a structure element, avoid recognizing keywords and
  // other special names and return STRUCT_ELT, which is a string that is also
  // a valid identifier.

  if (m_looking_at_indirect_ref)
    {
      token *tok = new token (STRUCT_ELT, ident, m_tok_beg, m_tok_end, get_comment_list ());

      m_looking_for_object_index = true;

      return handle_token (tok);
    }

  // If ident is a keyword token, then make_keyword_token will set
  // m_at_beginning_of_statement.  For example, if tok is an IF token, then
  // m_at_beginning_of_statement will be false.

  token *tok = make_keyword_token (ident);

  // If we have a regular keyword, return it.
  // Keywords can be followed by identifiers.

  if (tok)
    {
      m_looking_for_object_index = false;

      // The call to make_keyword_token set m_at_beginning_of_statement.
      return handle_token (tok);
    }

  tok = new token (NAME, ident, m_tok_beg, m_tok_end, get_comment_list ());

  // For compatibility with Matlab, the following symbols are handled specially
  // so that things like
  //
  //   pi +1
  //
  // are parsed as an addition expression instead of as a command-style
  // function call with the argument "+1".

  if (m_at_beginning_of_statement
      && ! (m_parsing_anon_fcn_body
            || ident == "e" || ident == "pi"
            || ident == "I" || ident == "i"
            || ident == "J" || ident == "j"
            || ident == "Inf" || ident == "inf"
            || ident == "NaN" || ident == "nan"))
    tok->mark_may_be_command ();

  // The magic end index can't be indexed.
  if (ident != "end")
    m_looking_for_object_index = true;

  m_at_beginning_of_statement = false;

  return handle_token (tok);
}

void
base_lexer::check_comment_for_hash_char (const char *txt, std::size_t len)
{
  if (m_comment_uses_hash_char)
    return;

  std::size_t i = 0;
  while (i < len && is_space_or_tab (txt[i]))
    i++;

  m_comment_uses_hash_char = txt[i] == '#';
}

void
base_lexer::maybe_warn_separator_insert (char sep)
{
  std::string nm = m_fcn_file_full_name;

  if (nm.empty ())
    warning_with_id ("Octave:separator-insert",
                     "potential auto-insertion of '%c' near line %d",
                     sep, m_filepos.line ());
  else
    warning_with_id ("Octave:separator-insert",
                     "potential auto-insertion of '%c' near line %d of file %s",
                     sep, m_filepos.line (), nm.c_str ());
}

void
base_lexer::warn_language_extension (const std::string& msg)
{
  std::string nm = m_fcn_file_full_name;

  if (nm.empty ())
    warning_with_id ("Octave:language-extension",
                     "Octave language extension used: %s",
                     msg.c_str ());
  else
    warning_with_id ("Octave:language-extension",
                     "Octave language extension used: %s near line %d of file %s",
                     msg.c_str (), m_filepos.line (), nm.c_str ());
}

void
base_lexer::maybe_warn_language_extension_comment (char c)
{
  if (c == '#')
    warn_language_extension ("# used as comment character");
}

void
base_lexer::warn_language_extension_continuation ()
{
  warn_language_extension ("\\ used as line continuation marker");
}

void
base_lexer::warn_language_extension_operator (const std::string& op)
{
  std::string t = op;
  int n = t.length ();
  if (t[n-1] == '\n')
    t.resize (n-1);
  warn_language_extension (t + " used as operator");
}

void
base_lexer::warn_deprecated_syntax (const std::string& msg)
{
  if (m_fcn_file_full_name.empty ())
    warning_with_id ("Octave:deprecated-syntax", "%s", msg.c_str ());
  else
    warning_with_id ("Octave:deprecated-syntax",
                     "%s; near line %d of file '%s'", msg.c_str (),
                     m_filepos.line (), m_fcn_file_full_name.c_str ());
}

int
base_lexer::syntax_error (const std::string& msg)
{
  return syntax_error (msg, m_tok_beg, m_tok_end);
}

int
base_lexer::syntax_error (const std::string& msg, const filepos& pos)
{
  return syntax_error (msg, pos, pos);
}

int
base_lexer::syntax_error (const std::string& msg, const filepos& beg_pos,
                          const filepos& end_pos)
{
  token *tok = new token (LEXICAL_ERROR, msg, beg_pos, end_pos);

  push_token (tok);

  return count_token_internal (tok->token_id ());
}

void
base_lexer::push_token (token *tok)
{
  YYSTYPE *lval = yyget_lval (m_scanner);
  lval->tok = tok;
  m_tokens.push (tok);
}

token *
base_lexer::current_token ()
{
  YYSTYPE *lval = yyget_lval (m_scanner);
  return lval->tok;
}

std::size_t
base_lexer::pending_token_count () const
{
  return m_tokens.size ();
}

#if defined (OCTAVE_PARSER_DEBUG)
void
base_lexer::display_token (int tok_id)
{
  switch (tok_id)
    {
    case '=': std::cerr << "'='\n"; break;
    case ':': std::cerr << "':'\n"; break;
    case '-': std::cerr << "'-'\n"; break;
    case '+': std::cerr << "'+'\n"; break;
    case '*': std::cerr << "'*'\n"; break;
    case '/': std::cerr << "'/'\n"; break;
    case '~': std::cerr << "'~'\n"; break;
    case '!': std::cerr << "'!'\n"; break;
    case ADD_EQ: std::cerr << "ADD_EQ\n"; break;
    case SUB_EQ: std::cerr << "SUB_EQ\n"; break;
    case MUL_EQ: std::cerr << "MUL_EQ\n"; break;
    case DIV_EQ: std::cerr << "DIV_EQ\n"; break;
    case LEFTDIV_EQ: std::cerr << "LEFTDIV_EQ\n"; break;
    case POW_EQ: std::cerr << "POW_EQ\n"; break;
    case EMUL_EQ: std::cerr << "EMUL_EQ\n"; break;
    case EDIV_EQ: std::cerr << "EDIV_EQ\n"; break;
    case ELEFTDIV_EQ: std::cerr << "ELEFTDIV_EQ\n"; break;
    case EPOW_EQ: std::cerr << "EPOW_EQ\n"; break;
    case AND_EQ: std::cerr << "AND_EQ\n"; break;
    case OR_EQ: std::cerr << "OR_EQ\n"; break;
    case EXPR_AND_AND: std::cerr << "EXPR_AND_AND\n"; break;
    case EXPR_OR_OR: std::cerr << "EXPR_OR_OR\n"; break;
    case EXPR_AND: std::cerr << "EXPR_AND\n"; break;
    case EXPR_OR: std::cerr << "EXPR_OR\n"; break;
    case EXPR_LT: std::cerr << "EXPR_LT\n"; break;
    case EXPR_LE: std::cerr << "EXPR_LE\n"; break;
    case EXPR_EQ: std::cerr << "EXPR_EQ\n"; break;
    case EXPR_NE: std::cerr << "EXPR_NE\n"; break;
    case EXPR_GE: std::cerr << "EXPR_GE\n"; break;
    case EXPR_GT: std::cerr << "EXPR_GT\n"; break;
    case LEFTDIV: std::cerr << "LEFTDIV\n"; break;
    case EMUL: std::cerr << "EMUL\n"; break;
    case EDIV: std::cerr << "EDIV\n"; break;
    case ELEFTDIV: std::cerr << "ELEFTDIV\n"; break;
    case HERMITIAN: std::cerr << "HERMITIAN\n"; break;
    case TRANSPOSE: std::cerr << "TRANSPOSE\n"; break;
    case PLUS_PLUS: std::cerr << "PLUS_PLUS\n"; break;
    case MINUS_MINUS: std::cerr << "MINUS_MINUS\n"; break;
    case POW: std::cerr << "POW\n"; break;
    case EPOW: std::cerr << "EPOW\n"; break;

    case NUMBER:
      {
        token *tok = current_token ();
        std::cerr << "NUMBER [";
        octave_value num = tok->number ();
        num.print_raw (std::cerr);
        std::cerr << "]\n";
      }
      break;

    case STRUCT_ELT:
      {
        token *tok = current_token ();
        std::cerr << "STRUCT_ELT [" << tok->text () << "]\n";
      }
      break;

    case NAME:
      {
        token *tok = current_token ();
        std::cerr << "NAME [" << tok->text () << "]\n";
      }
      break;

    case END: std::cerr << "END\n"; break;

    case DQ_STRING:
    case SQ_STRING:
      {
        token *tok = current_token ();

        std::cerr << (tok_id == DQ_STRING ? "DQ_STRING" : "SQ_STRING")
                  << " [" << tok->text () << "]\n";
      }
      break;

    case FOR: std::cerr << "FOR\n"; break;
    case WHILE: std::cerr << "WHILE\n"; break;
    case DO: std::cerr << "DO\n"; break;
    case UNTIL: std::cerr << "UNTIL\n"; break;
    case IF: std::cerr << "IF\n"; break;
    case ELSEIF: std::cerr << "ELSEIF\n"; break;
    case ELSE: std::cerr << "ELSE\n"; break;
    case SWITCH: std::cerr << "SWITCH\n"; break;
    case CASE: std::cerr << "CASE\n"; break;
    case OTHERWISE: std::cerr << "OTHERWISE\n"; break;
    case BREAK: std::cerr << "BREAK\n"; break;
    case CONTINUE: std::cerr << "CONTINUE\n"; break;
    case RETURN: std::cerr << "RETURN\n"; break;
    case UNWIND: std::cerr << "UNWIND\n"; break;
    case CLEANUP: std::cerr << "CLEANUP\n"; break;
    case TRY: std::cerr << "TRY\n"; break;
    case CATCH: std::cerr << "CATCH\n"; break;
    case GLOBAL: std::cerr << "GLOBAL\n"; break;
    case PERSISTENT: std::cerr << "PERSISTENT\n"; break;
    case FCN_HANDLE: std::cerr << "FCN_HANDLE\n"; break;
    case END_OF_INPUT: std::cerr << "END_OF_INPUT\n\n"; break;
    case LEXICAL_ERROR: std::cerr << "LEXICAL_ERROR\n\n"; break;
    case FUNCTION: std::cerr << "FUNCTION\n"; break;
    case INPUT_FILE: std::cerr << "INPUT_FILE\n"; break;
    case SUPERCLASSREF: std::cerr << "SUPERCLASSREF\n"; break;
    case METAQUERY: std::cerr << "METAQUERY\n"; break;
    case GET: std::cerr << "GET\n"; break;
    case SET: std::cerr << "SET\n"; break;
    case PROPERTIES: std::cerr << "PROPERTIES\n"; break;
    case METHODS: std::cerr << "METHODS\n"; break;
    case EVENTS: std::cerr << "EVENTS\n"; break;
    case CLASSDEF: std::cerr << "CLASSDEF\n"; break;
    case '\n': std::cerr << "\\n\n"; break;
    case '\r': std::cerr << "\\r\n"; break;
    case '\t': std::cerr << "TAB\n"; break;
    default:
      {
        if (tok_id < 256 && tok_id > 31)
          std::cerr << static_cast<char> (tok_id) << "\n";
        else
          std::cerr << "UNKNOWN(" << tok_id << ")\n";
      }
      break;
    }
}
#endif

void
base_lexer::fatal_error (const char *msg)
{
  ::error ("fatal lexer error: %s", msg);
}

void
base_lexer::increment_token_count ()
{
  m_token_count++;

#if defined (OCTAVE_PARSER_DEBUG)
  ++V__token_count__;
#endif
}

void
base_lexer::lexer_debug (const char *pattern)
{
#if defined (OCTAVE_PARSER_DEBUG)
  if (V__lexer_debug_flag__)
    {
      std::cerr << std::endl;

      display_start_state ();

      std::cerr << "P: " << pattern << std::endl;
      std::cerr << "T: " << flex_yytext () << std::endl;
    }
#else
  // No code.  Compiler should optimize this away.
  octave_unused_parameter (pattern);
#endif
}

bool
base_lexer::input_from_tmp_history_file ()
{
  history_system& history_sys = m_interpreter.get_history_system ();

  return history_sys.input_from_tmp_file ();
}

void
base_lexer::push_start_state (int state)
{
  OCTAVE_YYG;

  start_state_stack.push (state);

  BEGIN (start_state ());
}

void
base_lexer::pop_start_state ()
{
  OCTAVE_YYG;

  start_state_stack.pop ();

  BEGIN (start_state ());
}

void
base_lexer::clear_start_state ()
{
  while (! start_state_stack.empty ())
    start_state_stack.pop ();

  push_start_state (INITIAL);
}


#if defined (OCTAVE_PARSER_DEBUG)
void
base_lexer::display_start_state () const
{
  std::cerr << "S: ";

  switch (start_state ())
    {
    case INITIAL:
      std::cerr << "INITIAL" << std::endl;
      break;

    case COMMAND_START:
      std::cerr << "COMMAND_START" << std::endl;
      break;

    case MATRIX_START:
      std::cerr << "MATRIX_START" << std::endl;
      break;

    case INPUT_FILE_START:
      std::cerr << "INPUT_FILE_START" << std::endl;
      break;

    case BLOCK_COMMENT_START:
      std::cerr << "BLOCK_COMMENT_START" << std::endl;
      break;

    case LINE_COMMENT_START:
      std::cerr << "LINE_COMMENT_START" << std::endl;
      break;

    case DQ_STRING_START:
      std::cerr << "DQ_STRING_START" << std::endl;
      break;

    case SQ_STRING_START:
      std::cerr << "SQ_STRING_START" << std::endl;
      break;

    case FQ_IDENT_START:
      std::cerr << "FQ_IDENT_START" << std::endl;
      break;

    default:
      std::cerr << "UNKNOWN START STATE!" << std::endl;
      break;
    }
}
#endif

bool
base_lexer::maybe_unput_comma_before_unary_op (int tok_id)
{
  int prev_tok_id = previous_token_id ();

  bool unput_comma = false;

  if (whitespace_is_significant () && space_follows_previous_token ())
    {
      int c = text_yyinput ();
      xunput (c);

      bool space_after = is_space_or_tab (c);

      if (! (prev_tok_id == '[' || prev_tok_id == '{'
             || previous_token_is_binop ()
             || ((tok_id == '+' || tok_id == '-') && space_after)))
        unput_comma = true;
    }

  return unput_comma;
}

int
base_lexer::handle_op (int tok_id, const std::string& tok_txt, int tok_len,
                       bool bos, bool compat)
{
  if (! compat)
    warn_language_extension_operator (tok_txt);

  update_token_positions (tok_len);

  token *tok = new token (tok_id, m_tok_beg, m_tok_end, get_comment_list ());

  m_looking_for_object_index = false;
  m_at_beginning_of_statement = bos;

  switch (tok_id)
    {
    case EXPR_LT:
      if (m_parsing_classdef_decl)
        {
          m_parsing_classdef_superclass = true;
          push_start_state (FQ_IDENT_START);
        }
      break;

    case EXPR_AND:
      if (m_parsing_classdef_superclass)
        push_start_state (FQ_IDENT_START);
      break;

    default:
      break;
    }

  return handle_token (tok);
}

// When a command argument boundary is detected, push out the current argument
// being built.  This one seems like a good candidate for a function call.

int
base_lexer::finish_command_arg ()
{
  token *tok = new token (SQ_STRING, m_string_text, m_tok_beg, m_tok_end, get_comment_list ());

  m_string_text = "";
  m_command_arg_paren_count = 0;

  return handle_token (tok);
}

int
base_lexer::handle_token (int tok_id)
{
  return handle_token (new token (tok_id, m_tok_beg, m_tok_end));
}

int
base_lexer::handle_token (token *tok)
{
  push_token (tok);

  int tok_id = tok->token_id ();

  if (m_arguments_is_keyword
      && ! (tok_id == ';' || tok_id == ',' || tok_id == '\n'))
    m_arguments_is_keyword = false;

  return count_token_internal (tok_id);
}

int
base_lexer::count_token_internal (int tok_id)
{
  if (tok_id != '\n')
    increment_token_count ();

  return show_token (tok_id);
}

int
base_lexer::show_token (int tok_id)
{
#if defined (OCTAVE_PARSER_DEBUG)
  if (V__display_tokens__)
    display_token (tok_id);

  if (V__lexer_debug_flag__)
    {
      std::cerr << "R: ";
      display_token (tok_id);
      std::cerr << std::endl;
    }
#endif

  return tok_id;
}

int
lexer::fill_flex_buffer (char *buf, unsigned max_size)
{
  int status = 0;

  if (m_input_buf.empty ())
    {
      std::string ps
        = m_initial_input ? m_interpreter.PS1 () : m_interpreter.PS2 ();

      std::string prompt = command_editor::decode_prompt_string (ps);

      bool eof = false;
      m_current_input_line = m_reader.get_input (prompt, eof);

      m_input_buf.fill (m_current_input_line, eof);

      // Attempt to capture text for functions defined on the command line.
      //
      // FIXME: the handling of newline here seems a bit clumsy.
      //
      // See also comments in push_lexer::append_input.

      if (m_buffer_function_text)
        {
          if (! m_current_input_line.empty ())
          {
            m_function_text += m_current_input_line;
            if (m_current_input_line.back () != '\n')
              m_function_text += '\n';
          }
        }
    }

  if (! m_input_buf.empty ())
    status = m_input_buf.copy_chunk (buf, max_size);
  else
    status = YY_NULL;

  m_initial_input = false;

  return status;
}

void
push_lexer::append_input (const std::string& input, bool eof)
{
  // FIXME: input may contain more than one line, so how can we properly start
  // buffering input for command-line functions?
  //
  // Currently, base_lexer::make_keyword_token starts buffering text for
  // command-line functions by setting the initial value of m_function_text to
  // m_current_input_line when function_kw is recognized.  To make that work,
  // we need to do something like maintain a queue of input strings and pass
  // them to the flex buffer one line at a time, while also setting
  // m_current_input_line.  Some care will be needed if a single line of input
  // arrives in multiple calls to append_input.
  //
  // OR, should we require that the input string to append_input IS a single
  // line of input?  That seems to be what we are doing here by setting
  // m_current_input_line to input.

  m_input_buf.fill (input, eof);
  m_current_input_line = input;
}

int
push_lexer::fill_flex_buffer (char *buf, unsigned max_size)
{
  int status = 0;

  if (m_input_buf.empty () && ! m_input_buf.at_eof ())
    {
      // If the input buffer is empty or we are at the end of the buffer,
      // insert ASCII 1 as a marker for subsequent rules.  Don't insert a
      // newline character in this case.  Instead of calling input_buffer::fill
      // followed immediately by input_buffer::copy_chunk, simply insert the
      // marker directly in BUF.

      if (max_size <= 0)
        error ("unexpected: max_size <= 0 in push_lexer::fill_flex_buffer - please report this bug");

      buf[0] = static_cast<char> (1);
      status = 1;
    }
  else
    {
      // Note that the copy_chunk function may append a newline character to
      // the input.

      if (! m_input_buf.empty ())
        status = m_input_buf.copy_chunk (buf, max_size, true);
      else
        status = YY_NULL;
    }

  return status;
}

OCTAVE_END_NAMESPACE(octave)
