class RubyLexer

Constants

EOF
ESCAPES
ESC_RE
IDENT_CHAR_RE
IDENT_RE
RUBY19
STR_DQUOTE
STR_DSYM
STR_FUNC_BORING

ruby constants for strings (should this be moved somewhere else?)

STR_FUNC_ESCAPE
STR_FUNC_EXPAND
STR_FUNC_INDENT
STR_FUNC_QWORDS
STR_FUNC_REGEXP
STR_FUNC_SYMBOL
STR_REGEXP
STR_SQUOTE
STR_SSYM
STR_XQUOTE
TOKENS

Attributes

cmdarg[RW]
command_start[RW]
cond[RW]
lex_state[R]

Additional context surrounding tokens that both the lexer and grammar use.

lex_strterm[RW]
lineno[W]
nest[RW]
parser[RW]
space_seen[RW]
src[R]

Stream of data that yylex examines.

string_buffer[RW]
tern[RW]
token[RW]

Last token read via yylex.

version[RW]

What version of ruby to parse. 18 and 19 are the only valid values currently supported.

warnings[RW]

What handles warnings

yacc_value[RW]

Value of last token which had a value associated with it.

Public Class Methods

new(v = 18) click to toggle source
# File lib/ruby_lexer.rb, line 241
def initialize v = 18
  self.version = v
  self.cond = RubyParserStuff::StackState.new(:cond)
  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
  self.tern = RubyParserStuff::StackState.new(:tern)
  self.nest = 0
  @comments = []

  reset
end

Public Instance Methods

advance() click to toggle source

How the parser advances to the next token.

@return true if not at end of file (EOF).

# File lib/ruby_lexer.rb, line 92
def advance
  r = yylex
  self.token = r

  raise "yylex returned nil" unless r

  return RubyLexer::EOF != r
end
arg_ambiguous() click to toggle source
# File lib/ruby_lexer.rb, line 101
def arg_ambiguous
  self.warning("Ambiguous first argument. make sure.")
end
comments() click to toggle source
# File lib/ruby_lexer.rb, line 105
def comments
  c = @comments.join
  @comments.clear
  c
end
expr_beg_push(val) click to toggle source
# File lib/ruby_lexer.rb, line 111
def expr_beg_push val
  cond.push false
  cmdarg.push false
  self.lex_state = :expr_beg
  self.yacc_value = val
end
fix_arg_lex_state() click to toggle source
# File lib/ruby_lexer.rb, line 118
def fix_arg_lex_state
  self.lex_state = if in_lex_state? :expr_fname, :expr_dot then
                     :expr_arg
                   else
                     :expr_beg
                   end
end
heredoc(here) click to toggle source
# File lib/ruby_lexer.rb, line 126
def heredoc here # 63 lines
  _, eos, func, last_line = here

  indent  = (func & STR_FUNC_INDENT) != 0
  expand  = (func & STR_FUNC_EXPAND) != 0
  eos_re  = indent ? %r[ \t]*#{eos}(\r?\n|\z)/ : %r#{eos}(\r?\n|\z)/
  err_msg = "can't match #{eos_re.inspect} anywhere in "

  rb_compile_error err_msg if
    src.eos?

  if src.beginning_of_line? && src.scan(eos_re) then
    src.unread_many last_line # TODO: figure out how to remove this
    self.yacc_value = eos
    return :tSTRING_END
  end

  self.string_buffer = []

  if expand then
    case
    when src.scan(%r#[$@]/) then
      src.pos -= 1 # FIX omg stupid
      self.yacc_value = src.matched
      return :tSTRING_DVAR
    when src.scan(%r#[{]/) then
      self.yacc_value = src.matched
      return :tSTRING_DBEG
    when src.scan(%r#/) then
      string_buffer << '#'
    end

    begin
      c = tokadd_string func, "\n", nil

      rb_compile_error err_msg if
        c == RubyLexer::EOF

      if c != "\n" then
        self.yacc_value = string_buffer.join.delete("\r")
        return :tSTRING_CONTENT
      else
        string_buffer << src.scan(%r\n/)
      end

      rb_compile_error err_msg if
        src.eos?
    end until src.check(eos_re)
  else
    until src.check(eos_re) do
      string_buffer << src.scan(%r.*(\n|\z)/)
      rb_compile_error err_msg if
        src.eos?
    end
  end

  self.lex_strterm = [:heredoc, eos, func, last_line]
  self.yacc_value = string_buffer.join.delete("\r")

  return :tSTRING_CONTENT
end
heredoc_identifier() click to toggle source
# File lib/ruby_lexer.rb, line 188
def heredoc_identifier # 51 lines
  term, func = nil, STR_FUNC_BORING
  self.string_buffer = []

  case
  when src.scan(%r(-?)(['"`])(.*?)\22//) then
    term = src[2]
    func |= STR_FUNC_INDENT unless src[1].empty?
    func |= case term
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            else
              STR_XQUOTE
            end
    string_buffer << src[3]
  when src.scan(%r-?(['"`])(?!\11**\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when src.scan(%r(-?)(\w+)/) then
    term = '"'
    func |= STR_DQUOTE
    unless src[1].empty? then
      func |= STR_FUNC_INDENT
    end
    string_buffer << src[2]
  else
    return nil
  end

  if src.scan(%r.*\n/) then
    # TODO: think about storing off the char range instead
    line = src.matched
    src.extra_lines_added += 1
  else
    line = nil
  end

  self.lex_strterm = [:heredoc, string_buffer.join, func, line]

  if term == '`' then
    self.yacc_value = "`"
    return :tXSTRING_BEG
  else
    self.yacc_value = "\""
    return :tSTRING_BEG
  end
end
in_lex_state?(*states) click to toggle source
# File lib/ruby_lexer.rb, line 237
def in_lex_state?(*states)
  states.include? lex_state
end
int_with_base(base) click to toggle source
# File lib/ruby_lexer.rb, line 252
def int_with_base base
  rb_compile_error "Invalid numeric format" if src.matched =~ %r__/
  self.yacc_value = src.matched.to_i(base)
  return :tINTEGER
end
is_arg?() click to toggle source
# File lib/ruby_lexer.rb, line 1283
def is_arg?
  in_lex_state? :expr_arg, :expr_cmdarg
end
is_beg?() click to toggle source
# File lib/ruby_lexer.rb, line 1287
def is_beg?
  in_lex_state? :expr_beg, :expr_mid, :expr_value, :expr_class
end
is_end?() click to toggle source
# File lib/ruby_lexer.rb, line 1279
def is_end?
  in_lex_state? :expr_end, :expr_endarg, :expr_endfn
end
is_label_possible?(command_state) click to toggle source
# File lib/ruby_lexer.rb, line 1295
def is_label_possible? command_state
  (in_lex_state?(:expr_beg) && !command_state) || is_arg?
end
is_space_arg?(c = "x") click to toggle source
# File lib/ruby_lexer.rb, line 1291
def is_space_arg? c = "x"
  is_arg? and space_seen and c !~ %r\s/
end
lex_state=(o) click to toggle source
# File lib/ruby_lexer.rb, line 258
def lex_state= o
  # warn "wtf lex_state = #{o.inspect} from #{caller.first}"
  raise "wtf\?" unless Symbol === o
  @lex_state = o
end
lineno() click to toggle source
# File lib/ruby_lexer.rb, line 265
def lineno
  @lineno ||= src.lineno
end
parse_number() click to toggle source
Parse a number from the input stream.

@param c The first character of the number. @return A int constant wich represents a token.

# File lib/ruby_lexer.rb, line 275
def parse_number
  self.lex_state = :expr_end

  case
  when src.scan(%r[+-]?0[xXbBdD]\b/) then
    rb_compile_error "Invalid numeric format"
  when src.scan(%r[+-]?0x[a-f0-9_]+/) then
    int_with_base(16)
  when src.scan(%r[+-]?0[Bb][01_]+/) then
    int_with_base(2)
  when src.scan(%r[+-]?0[Dd][0-9_]+/) then
    int_with_base(10)
  when src.scan(%r[+-]?0[Oo]?[0-7_]*[89]/) then
    rb_compile_error "Illegal octal digit."
  when src.scan(%r[+-]?0[Oo]?[0-7_]+|0[Oo]/) then
    int_with_base(8)
  when src.scan(%r[+-]?[\d_]+_(e|\.)/) then
    rb_compile_error "Trailing '_' in number."
  when src.scan(%r[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/) then
    number = src.matched
    if number =~ %r__/ then
      rb_compile_error "Invalid numeric format"
    end
    self.yacc_value = number.to_f
    :tFLOAT
  when src.scan(%r[+-]?0\b/) then
    int_with_base(10)
  when src.scan(%r[+-]?[\d_]+\b/) then
    int_with_base(10)
  else
    rb_compile_error "Bad number format"
  end
end
parse_quote() click to toggle source
# File lib/ruby_lexer.rb, line 309
def parse_quote # 58 lines
  beg, nnd, short_hand, c = nil, nil, false, nil

  if src.scan(%r[a-z0-9]{1,2}/) then # Long-hand (e.g. %Q{}).
    rb_compile_error "unknown type of %string" if src.matched_size == 2
    c, beg, short_hand = src.matched, src.getch, false
  else                               # Short-hand (e.g. %{, %., %!, etc)
    c, beg, short_hand = 'Q', src.getch, true
  end

  if src.eos? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
  nnd, beg = beg, "\00"" if nnd.nil?

  token_type, self.yacc_value = nil, "%#{c}#{beg}"
  token_type, string_type = case c
                            when 'Q' then
                              ch = short_hand ? nnd : c + beg
                              self.yacc_value = "%#{ch}"
                              [:tSTRING_BEG,   STR_DQUOTE]
                            when 'q' then
                              [:tSTRING_BEG,   STR_SQUOTE]
                            when 'W' then
                              src.scan(%r\s*/)
                              [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'w' then
                              src.scan(%r\s*/)
                              [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
                            when 'x' then
                              [:tXSTRING_BEG,  STR_XQUOTE]
                            when 'r' then
                              [:tREGEXP_BEG,   STR_REGEXP]
                            when 's' then
                              self.lex_state  = :expr_fname
                              [:tSYMBEG,       STR_SSYM]
                            end

  rb_compile_error "Bad %string type. Expected [Qq\Wwxrs], found '#{c}'." if
    token_type.nil?

  self.lex_strterm = [:strterm, string_type, nnd, beg]

  return token_type
end
parse_string(quote) click to toggle source
# File lib/ruby_lexer.rb, line 358
def parse_string(quote) # 65 lines
  _, string_type, term, open = quote

  space = false # FIX: remove these
  func = string_type
  paren = open
  term_re = Regexp.escape term

  qwords = (func & STR_FUNC_QWORDS) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  expand = (func & STR_FUNC_EXPAND) != 0

  unless func then # FIX: impossible, prolly needs == 0
    self.lineno = nil
    return :tSTRING_END
  end

  space = true if qwords and src.scan(%r\s+/)

  if self.nest == 0 && src.scan(%r#{term_re}/) then
    if qwords then
      quote[1] = nil
      return :tSPACE
    elsif regexp then
      self.yacc_value = self.regx_options
      self.lineno = nil
      return :tREGEXP_END
    else
      self.yacc_value = term
      self.lineno = nil
      return :tSTRING_END
    end
  end

  if space then
    return :tSPACE
  end

  self.string_buffer = []

  if expand
    case
    when src.scan(%r#(?=[$@])/) then
      return :tSTRING_DVAR
    when src.scan(%r#[{]/) then
      return :tSTRING_DBEG
    when src.scan(%r#/) then
      string_buffer << '#'
    end
  end

  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    rb_compile_error "unterminated string meets end of file"
  end

  self.yacc_value = string_buffer.join

  return :tSTRING_CONTENT
end
process_token(command_state) click to toggle source
# File lib/ruby_lexer.rb, line 1314
def process_token(command_state)

  token << src.matched if token =~ IDENT_RE && src.scan(%r[\!\?](?!=)/)

  result = nil
  last_state = lex_state

  case token
  when %r^\$/ then
    self.lex_state, result = :expr_end, :tGVAR
  when %r^@@/ then
    self.lex_state, result = :expr_end, :tCVAR
  when %r^@/ then
    self.lex_state, result = :expr_end, :tIVAR
  else
    if token =~ %r[!?]$/ then
      result = :tFID
    else
      if in_lex_state? :expr_fname then
        # ident=, not =~ => == or followed by =>
        # TODO test lexing of a=>b vs a==>b
        if src.scan(%r=(?:(?![~>=])|(?==>))/) then
          result = :tIDENTIFIER
          token << src.matched
        end
      end

      result ||= if token =~ %r^[A-Z]/ then
                   :tCONSTANT
                 else
                   :tIDENTIFIER
                 end
    end

    unless ruby18
      if is_label_possible? command_state then
        colon = src.scan(%r:/)

        if colon && src.peek(1) != ":" then
          self.lex_state = :expr_beg
          self.yacc_value = [token, src.lineno]
          return :tLABEL
        end

        src.unscan if colon
      end
    end

    unless in_lex_state? :expr_dot then
      # See if it is a reserved word.
      keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
                  RubyParserStuff::Keyword.keyword18 token
                else
                  RubyParserStuff::Keyword.keyword19 token
                end

      if keyword then
        state           = lex_state
        self.lex_state  = keyword.state
        self.yacc_value = [token, src.lineno]

        if state == :expr_fname then
          self.yacc_value = keyword.name
          return keyword.id0
        end

        if keyword.id0 == :kDO then
          self.command_start = true
          return :kDO_COND  if cond.is_in_state
          return :kDO_BLOCK if cmdarg.is_in_state && state != :expr_cmdarg
          return :kDO_BLOCK if state == :expr_endarg
          if defined?(@hack_expects_lambda) && @hack_expects_lambda
            @hack_expects_lambda = false
            return :kDO_LAMBDA
          end
          return :kDO
        end

        return keyword.id0 if state == :expr_beg or state == :expr_value

        self.lex_state = :expr_beg if keyword.id0 != keyword.id1

        return keyword.id1
      end
    end

    # TODO:
    # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {

    self.lex_state =
      if is_beg? || in_lex_state?(:expr_dot) || is_arg? then
        if command_state then
          :expr_cmdarg
        else
          :expr_arg
        end
      elsif ruby19 && in_lex_state?(:expr_fname) then
        :expr_endfn
      else
        :expr_end
      end

  end

  self.yacc_value = token


  self.lex_state = :expr_end if
    last_state != :expr_dot && self.parser.env[token.to_sym] == :lvar

  return result
end
rb_compile_error(msg) click to toggle source
# File lib/ruby_lexer.rb, line 418
def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{src.rest[/^.*/].inspect}"
  raise RubyParser::SyntaxError, msg
end
read_escape() click to toggle source
# File lib/ruby_lexer.rb, line 423
def read_escape # 51 lines
  case
  when src.scan(%r\\/) then                  # Backslash
    '\'
  when src.scan(%rn/) then                   # newline
    "\n"
  when src.scan(%rt/) then                   # horizontal tab
    "\t"
  when src.scan(%rr/) then                   # carriage-return
    "\r"
  when src.scan(%rf/) then                   # form-feed
    "\f"
  when src.scan(%rv/) then                   # vertical tab
    "\113""
  when src.scan(%ra/) then                   # alarm(bell)
    "\0007"
  when src.scan(%re/) then                   # escape
    "\0033"
  when src.scan(%rb/) then                   # backspace
    "\0010"
  when src.scan(%rs/) then                   # space
    " "
  when src.scan(%r[0-7]{1,3}/) then          # octal constant
    src.matched.to_i(8).chr
  when src.scan(%rx([0-9a-fA-F]{1,2})/) then # hex constant
    src[1].to_i(16).chr
  when src.check(%rM-\\[\\MCc]/) then
    src.scan(%rM-\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.scan(%rM-(.)/) then
    c = src[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when src.check(%r(C-|c)\\[\\MCc]/) then
    src.scan(%r(C-|c)\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(%rC-\?|c\?/) then
    127.chr
  when src.scan(%r(C-|c)(.)/) then
    c = src[2]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when src.scan(%r[McCx0-9]/) || src.eos? then
    rb_compile_error("Invalid escape character syntax")
  else
    src.getch
  end
end
regx_options() click to toggle source
# File lib/ruby_lexer.rb, line 476
def regx_options # 15 lines
  good, bad = [], []

  if src.scan(%r[a-z]+/) then
    good, bad = src.matched.split(%r/).partition { |s| s =~ %r^[ixmonesu]$/ }
  end

  unless bad.empty? then
    rb_compile_error("unknown regexp option%s - %s" %
                     [(bad.size > 1 ? "s" : ""), bad.join.inspect])
  end

  return good.join
end
reset() click to toggle source
# File lib/ruby_lexer.rb, line 491
def reset
  self.command_start = true
  self.lex_strterm   = nil
  self.token         = nil
  self.yacc_value    = nil

  @src       = nil
  @lex_state = nil
end
ruby18() click to toggle source
# File lib/ruby_lexer.rb, line 501
def ruby18
  Ruby18Parser === parser
end
ruby19() click to toggle source
# File lib/ruby_lexer.rb, line 505
def ruby19
  Ruby19Parser === parser
end
src=(src) click to toggle source
# File lib/ruby_lexer.rb, line 509
def src= src
  raise "bad src: #{src.inspect}" unless String === src
  @src = RPStringScanner.new(src)
end
tokadd_escape(term) click to toggle source
# File lib/ruby_lexer.rb, line 514
def tokadd_escape term # 20 lines
  case
  when src.scan(%r\\\n/) then
    # just ignore
  when src.scan(%r\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    self.string_buffer << src.matched
  when src.scan(%r\\([MC]-|c)(?=\\)/) then
    self.string_buffer << src.matched
    self.tokadd_escape term
  when src.scan(%r\\([MC]-|c)(.)/) then
    self.string_buffer << src.matched
  when src.scan(%r\\[McCx]/) then
    rb_compile_error "Invalid escape character syntax"
  when src.scan(%r\\(.)/) then
    self.string_buffer << src.matched
  else
    rb_compile_error "Invalid escape character syntax"
  end
end
tokadd_string(func, term, paren) click to toggle source
# File lib/ruby_lexer.rb, line 534
def tokadd_string(func, term, paren) # 105 lines
  qwords = (func & STR_FUNC_QWORDS) != 0
  escape = (func & STR_FUNC_ESCAPE) != 0
  expand = (func & STR_FUNC_EXPAND) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  symbol = (func & STR_FUNC_SYMBOL) != 0

  paren_re = paren.nil? ? nil : Regexp.new(Regexp.escape(paren))
  term_re  = Regexp.new(Regexp.escape(term))

  until src.eos? do
    c = nil
    handled = true
    case
    when self.nest == 0 && src.scan(term_re) then
      src.pos -= 1
      break
    when paren_re && src.scan(paren_re) then
      self.nest += 1
    when src.scan(term_re) then
      self.nest -= 1
    when qwords && src.scan(%r\s/) then
      src.pos -= 1
      break
    when expand && src.scan(%r#(?=[\$\@\{])/) then
      src.pos -= 1
      break
    when expand && src.scan(%r#(?!\n)/) then
      # do nothing
    when src.check(%r\\/) then
      case
      when qwords && src.scan(%r\\\n/) then
        string_buffer << "\n"
        next
      when qwords && src.scan(%r\\\s/) then
        c = ' '
      when expand && src.scan(%r\\\n/) then
        next
      when regexp && src.check(%r\\/) then
        self.tokadd_escape term
        next
      when expand && src.scan(%r\\/) then
        c = self.read_escape
      when src.scan(%r\\\n/) then
        # do nothing
      when src.scan(%r\\\\/) then
        string_buffer << '\' if escape
        c = '\'
      when src.scan(%r\\/) then
        unless src.scan(term_re) || paren.nil? || src.scan(paren_re) then
          string_buffer << "\\"
        end
      else
        handled = false
      end
    else
      handled = false
    end # case

    unless handled then

      t = Regexp.escape term
      x = Regexp.escape(paren) if paren && paren != "\0000"
      re = if qwords then
             %r[^#{t}#{x}\#\00\\\\n\ ]+|./ # |. to pick up whatever
           else
             %r[^#{t}#{x}\#\00\\\]+|./
           end

      src.scan re
      c = src.matched

      rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ %r\00//
    end # unless handled

    c ||= src.matched
    string_buffer << c
  end # until

  c ||= src.matched
  c = RubyLexer::EOF if src.eos?


  return c
end
unescape(s) click to toggle source
# File lib/ruby_lexer.rb, line 636
def unescape s
  r = ESCAPES[s]

  return r if r

  case s
  when %r^[0-7]{1,3}/ then
    $&.to_i(8).chr
  when %r^x([0-9a-fA-F]{1,2})/ then
    $1.to_i(16).chr
  when %r^M-(.)/ then
    ($1[0].ord | 0x80).chr
  when %r^(C-|c)(.)/ then
    ($2[0].ord & 0x9f).chr
  when %r^[McCx0-9]/ then
    rb_compile_error("Invalid escape character syntax")
  else
    s
  end
end
warning(s) click to toggle source
# File lib/ruby_lexer.rb, line 657
def warning s
  # do nothing for now
end
yylex() click to toggle source

Returns the next token. Also sets yy_val is needed.

@return Description of the Returned Value

# File lib/ruby_lexer.rb, line 666
def yylex # 826 lines
  c = ''
  self.space_seen = false
  command_state = false
  src = self.src

  self.token = nil
  self.yacc_value = nil

  return yylex_string if lex_strterm

  command_state = self.command_start
  self.command_start = false

  last_state = lex_state

  loop do # START OF CASE
    if src.scan(%r[\ \t\r\f\v]/) then # \s - \n + \v
      self.space_seen = true
      next
    elsif src.check(%r[^a-zA-Z]/) then
      if src.scan(%r\n|#/) then
        self.lineno = nil
        c = src.matched
        if c == '#' then
          src.pos -= 1

          while src.scan(%r\s*#.*(\n+|\z)/) do
            @comments << src.matched.gsub(%r^ +#/, '#').gsub(%r^ +$/, '')
          end

          return RubyLexer::EOF if src.eos?
        end

        # Replace a string of newlines with a single one
        src.scan(%r\n+/)

        next if in_lex_state?(:expr_beg, :expr_fname, :expr_dot, :expr_class,
                              :expr_value)

        if src.scan(%r([\ \t\r\f\v]*)\./) then
          self.space_seen = true unless src[1].empty?

          src.pos -= 1
          next unless src.check(%r\.\./)
        end

        self.command_start = true
        self.lex_state = :expr_beg
        return :tNL
      elsif src.scan(%r[\]\)\}]/) then
        cond.lexpop
        cmdarg.lexpop
        tern.lexpop
        self.lex_state = :expr_end
        self.yacc_value = src.matched
        result = {
          ")" => :tRPAREN,
          "]" => :tRBRACK,
          "}" => :tRCURLY
        }[src.matched]
        return result
      elsif src.scan(%r\.\.\.?|,|![=~]?/) then
        self.lex_state = :expr_beg
        tok = self.yacc_value = src.matched
        return TOKENS[tok]
      elsif src.check(%r\./) then
        if src.scan(%r\.\d/) then
          rb_compile_error "no .<digit> floating literal anymore put 0 before dot"
        elsif src.scan(%r\./) then
          self.lex_state = :expr_dot
          self.yacc_value = "."
          return :tDOT
        end
      elsif src.scan(%r\(/) then
        result = if ruby18 then
                   yylex_paren18
                 else
                   yylex_paren19
                 end

        self.expr_beg_push "("

        return result
      elsif src.check(%r\=/) then
        if src.scan(%r\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
          self.fix_arg_lex_state
          tok = self.yacc_value = src.matched
          return TOKENS[tok]
        elsif src.scan(%r\=begin(?=\s)/) then
          # @comments << '=' << src.matched
          @comments << src.matched

          unless src.scan(%r.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/) then
            @comments.clear
            rb_compile_error("embedded document meets end of file")
          end

          @comments << src.matched

          next
        else
          raise "you shouldn't be able to get here"
        end
      elsif src.scan(%r\"(#{ESC_RE}|#(#{ESC_RE}|[^\{\#\@\$\"\\])|[^\"\\\#])*\"/) then
        self.yacc_value = src.matched[1..-2].gsub(ESC_RE) { unescape $1 }
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.scan(%r\"/) then # FALLBACK
        self.lex_strterm = [:strterm, STR_DQUOTE, '"', "\00""] # TODO: question this
        self.yacc_value = "\""
        return :tSTRING_BEG
      elsif src.scan(%r\@\@?\w*/) then
        self.token = src.matched

        rb_compile_error "`#{token}` is not allowed as a variable name" if
          token =~ %r\@\d/

        return process_token(command_state)
      elsif src.scan(%r\:\:/) then
        if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
          self.lex_state = :expr_beg
          self.yacc_value = "::"
          return :tCOLON3
        end

        self.lex_state = :expr_dot
        self.yacc_value = "::"
        return :tCOLON2
      elsif ! is_end? && src.scan(%r:([a-zA-Z_]#{IDENT_CHAR_RE}*(?:[?!]|=(?==>)|=(?![=>]))?)/) then
        # scanning shortcut to symbols
        self.yacc_value = src[1]
        self.lex_state = :expr_end
        return :tSYMBOL
      elsif src.scan(%r\:/) then
        # ?: / then / when
        if is_end? || src.check(%r\s/) then
          self.lex_state = :expr_beg
          # TODO warn_balanced(":", "symbol literal");
          self.yacc_value = ":"
          return :tCOLON
        end

        case
        when src.scan(%r\'/) then
          self.lex_strterm = [:strterm, STR_SSYM, src.matched, "\00""]
        when src.scan(%r\"/) then
          self.lex_strterm = [:strterm, STR_DSYM, src.matched, "\00""]
        end

        self.lex_state = :expr_fname
        self.yacc_value = ":"
        return :tSYMBEG
      elsif src.check(%r[0-9]/) then
        return parse_number
      elsif src.scan(%r\[/) then
        result = src.matched

        if in_lex_state? :expr_fname, :expr_dot then
          self.lex_state = :expr_arg
          case
          when src.scan(%r\]\=/) then
            self.yacc_value = "[]="
            return :tASET
          when src.scan(%r\]/) then
            self.yacc_value = "[]"
            return :tAREF
          else
            rb_compile_error "unexpected '['"
          end
        elsif is_beg? then
          self.tern.push false
          result = :tLBRACK
        elsif is_arg? && space_seen then
          self.tern.push false
          result = :tLBRACK
        else
          result = :tLBRACK2
        end

        self.expr_beg_push "["

        return result
      elsif src.scan(%r\'(\\.|[^\'])*\'/) then
        self.yacc_value = src.matched[1..-2].gsub(%r\\\\/, "\\").gsub(%r\\'/, "'")
        self.lex_state = :expr_end
        return :tSTRING
      elsif src.check(%r\|/) then
        if src.scan(%r\|\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOP_ASGN
        elsif src.scan(%r\|\|/) then
          self.lex_state = :expr_beg
          self.yacc_value = "||"
          return :tOROP
        elsif src.scan(%r\|\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "|"
          return :tOP_ASGN
        elsif src.scan(%r\|/) then
          self.fix_arg_lex_state
          self.yacc_value = "|"
          return :tPIPE
        end
      elsif src.scan(%r\{/) then
        if defined?(@hack_expects_lambda) && @hack_expects_lambda
          @hack_expects_lambda = false
          self.lex_state = :expr_beg
          return :tLAMBEG
        end

        result = if is_arg? || in_lex_state?(:expr_end) then
                   :tLCURLY      #  block (primary)
                 elsif in_lex_state?(:expr_endarg) then
                   :tLBRACE_ARG  #  block (expr)
                 else
                   self.tern.push false
                   :tLBRACE      #  hash
                 end

        self.expr_beg_push "{"
        self.command_start = true unless result == :tLBRACE

        return result
      elsif src.scan(%r->/) then
        @hack_expects_lambda = true
        self.lex_state = :expr_arg
        return :tLAMBDA
      elsif src.scan(%r[+-]/) then
        sign = src.matched
        utype, type = if sign == "+" then
                        [:tUPLUS, :tPLUS]
                      else
                        [:tUMINUS, :tMINUS]
                      end

        if in_lex_state? :expr_fname, :expr_dot then
          self.lex_state = :expr_arg
          if src.scan(%r@/) then
            self.yacc_value = "#{sign}@"
            return utype
          else
            self.yacc_value = sign
            return type
          end
        end

        if src.scan(%r\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = sign
          return :tOP_ASGN
        end

        if (is_beg? ||
            (is_arg? && space_seen && !src.check(%r\s/))) then
          if is_arg? then
            arg_ambiguous
          end

          self.lex_state = :expr_beg
          self.yacc_value = sign

          if src.check(%r\d/) then
            if utype == :tUPLUS then
              return self.parse_number
            else
              return :tUMINUS_NUM
            end
          end

          return utype
        end

        self.lex_state = :expr_beg
        self.yacc_value = sign
        return type
      elsif src.check(%r\*/) then
        if src.scan(%r\*\*=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "**"
          return :tOP_ASGN
        elsif src.scan(%r\*\*/) then
          self.yacc_value = "**"
          self.fix_arg_lex_state
          return :tPOW
        elsif src.scan(%r\*\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "*"
          return :tOP_ASGN
        elsif src.scan(%r\*/) then
          result = if is_arg? && space_seen && src.check(%r\S/) then
                     warning("`*' interpreted as argument prefix")
                     :tSTAR
                   elsif is_beg? then
                     :tSTAR
                   else
                     :tSTAR2
                   end
          self.yacc_value = "*"
          self.fix_arg_lex_state

          return result
        end
      elsif src.check(%r\</) then
        if src.scan(%r\<\=\>/) then
          self.fix_arg_lex_state
          self.yacc_value = "<=>"
          return :tCMP
        elsif src.scan(%r\<\=/) then
          self.fix_arg_lex_state
          self.yacc_value = "<="
          return :tLEQ
        elsif src.scan(%r\<\<\=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = "\<\<"
          return :tOP_ASGN
        elsif src.scan(%r\<\</) then
          if (! in_lex_state?(:expr_end, :expr_dot,
                              :expr_endarg, :expr_class) &&
              (!is_arg? || space_seen)) then
            tok = self.heredoc_identifier
            return tok if tok
          end

          self.fix_arg_lex_state
          self.yacc_value = "\<\<"
          return :tLSHFT
        elsif src.scan(%r\</) then
          self.fix_arg_lex_state
          self.yacc_value = "<"
          return :tLT
        end
      elsif src.check(%r\>/) then
        if src.scan(%r\>\=/) then
          self.fix_arg_lex_state
          self.yacc_value = ">="
          return :tGEQ
        elsif src.scan(%r\>\>=/) then
          self.fix_arg_lex_state
          self.lex_state = :expr_beg
          self.yacc_value = ">>"
          return :tOP_ASGN
        elsif src.scan(%r\>\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">>"
          return :tRSHFT
        elsif src.scan(%r\>/) then
          self.fix_arg_lex_state
          self.yacc_value = ">"
          return :tGT
        end
      elsif src.scan(%r\`/) then
        self.yacc_value = "`"
        case lex_state
        when :expr_fname then
          self.lex_state = :expr_end
          return :tBACK_REF2
        when :expr_dot then
          self.lex_state = if command_state then
                             :expr_cmdarg
                           else
                             :expr_arg
                           end
          return :tBACK_REF2
        end
        self.lex_strterm = [:strterm, STR_XQUOTE, '`', "\00""]
        return :tXSTRING_BEG
      elsif src.scan(%r\?/) then

        if is_end? then
          self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        end

        if src.eos? then
          rb_compile_error "incomplete character syntax"
        end

        if src.check(%r\s|\v/) then
          unless is_arg? then
            c2 = { " " => 's',
                  "\n" => 'n',
                  "\t" => 't',
                  "\v" => 'v',
                  "\r" => 'r',
                  "\f" => 'f' }[src.matched]

            if c2 then
              warning("invalid character syntax; use ?\\" + c2)
            end
          end

          # ternary
          self.lex_state = ruby18 ? :expr_beg : :expr_value # HACK?
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        elsif src.check(%r\w(?=\w)/) then # ternary, also
          self.lex_state = :expr_beg
          self.tern.push true
          self.yacc_value = "?"
          return :tEH
        end

        c = if src.scan(%r\\/) then
              self.read_escape
            else
              src.getch
            end
        self.lex_state = :expr_end

        if version == 18 then
          self.yacc_value = c[0].ord & 0xff
          return :tINTEGER
        else
          self.yacc_value = c
          return :tSTRING
        end
      elsif src.check(%r\&/) then
        if src.scan(%r\&\&\=/) then
          self.yacc_value = "&&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(%r\&\&/) then
          self.lex_state = :expr_beg
          self.yacc_value = "&&"
          return :tANDOP
        elsif src.scan(%r\&\=/) then
          self.yacc_value = "&"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        elsif src.scan(%r&/) then
          result = if is_arg? && space_seen &&
                       !src.check(%r\s/) then
                     warning("`&' interpreted as argument prefix")
                     :tAMPER
                   elsif in_lex_state? :expr_beg, :expr_mid then
                     :tAMPER
                   else
                     :tAMPER2
                   end

          self.fix_arg_lex_state
          self.yacc_value = "&"
          return result
        end
      elsif src.scan(%r\//) then
        if is_beg? then
          self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
          self.yacc_value = "/"
          return :tREGEXP_BEG
        end

        if src.scan(%r\=/) then
          self.yacc_value = "/"
          self.lex_state = :expr_beg
          return :tOP_ASGN
        end

        if is_arg? && space_seen then
          unless src.scan(%r\s/) then
            arg_ambiguous
            self.lex_strterm = [:strterm, STR_REGEXP, '/', "\00""]
            self.yacc_value = "/"
            return :tREGEXP_BEG
          end
        end

        self.fix_arg_lex_state
        self.yacc_value = "/"

        return :tDIVIDE
      elsif src.scan(%r\^=/) then
        self.lex_state = :expr_beg
        self.yacc_value = "^"
        return :tOP_ASGN
      elsif src.scan(%r\^/) then
        self.fix_arg_lex_state
        self.yacc_value = "^"
        return :tCARET
      elsif src.scan(%r\;/) then
        self.command_start = true
        self.lex_state = :expr_beg
        self.yacc_value = ";"
        return :tSEMI
      elsif src.scan(%r\~/) then
        if in_lex_state? :expr_fname, :expr_dot then
          src.scan(%r@/)
        end

        self.fix_arg_lex_state
        self.yacc_value = "~"

        return :tTILDE
      elsif src.scan(%r\\/) then
        if src.scan(%r\r?\n/) then
          self.lineno = nil
          self.space_seen = true
          next
        end
        rb_compile_error "bare backslash only allowed before newline"
      elsif src.scan(%r\%/) then
        if is_beg? then
          return parse_quote
        end

        if src.scan(%r\=/) then
          self.lex_state = :expr_beg
          self.yacc_value = "%"
          return :tOP_ASGN
        end

        return parse_quote if is_arg? && space_seen && ! src.check(%r\s/)

        self.fix_arg_lex_state
        self.yacc_value = "%"

        return :tPERCENT
      elsif src.check(%r\$/) then
        if src.scan(%r(\$_)(\w+)/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(%r\$_/) then
          self.lex_state = :expr_end
          self.token = src.matched
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(%r\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
          self.lex_state = :expr_end
          self.yacc_value = src.matched
          return :tGVAR
        elsif src.scan(%r\$([\&\`\'\+])/) then
          self.lex_state = :expr_end
          # Explicit reference to these vars as symbols...
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_sym
            return :tBACK_REF
          end
        elsif src.scan(%r\$([1-9]\d*)/) then
          self.lex_state = :expr_end
          if last_state == :expr_fname then
            self.yacc_value = src.matched
            return :tGVAR
          else
            self.yacc_value = src[1].to_i
            return :tNTH_REF
          end
        elsif src.scan(%r\$0/) then
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        elsif src.scan(%r\$\W|\$\z/) then # TODO: remove?
          self.lex_state = :expr_end
          self.yacc_value = "$"
          return "$"
        elsif src.scan(%r\$\w+/)
          self.lex_state = :expr_end
          self.token = src.matched
          return process_token(command_state)
        end
      elsif src.check(%r\_/) then
        if src.beginning_of_line? && src.scan(%r\__END__(\n|\Z)/) then
          self.lineno = nil
          return RubyLexer::EOF
        elsif src.scan(%r\_\w*/) then
          self.token = src.matched
          return process_token(command_state)
        end
      end
    end # END OF CASE

    if src.scan(%r\0004|\0032|\0000/) || src.eos? then # ^D, ^Z, EOF
      return RubyLexer::EOF
    else # alpha check
      unless src.check IDENT_RE then
        rb_compile_error "Invalid char #{src.matched.inspect} in expression"
      end
    end

    self.token = src.matched if self.src.scan IDENT_RE

    return process_token(command_state)
  end
end
yylex_paren18() click to toggle source
# File lib/ruby_lexer.rb, line 1259
def yylex_paren18
  self.command_start = true
  result = :tLPAREN2

  if in_lex_state? :expr_beg, :expr_mid then
    result = :tLPAREN
  elsif space_seen then
    if in_lex_state? :expr_cmdarg then
      result = :tLPAREN_ARG
    elsif in_lex_state? :expr_arg then
      self.tern.push false
      warning "don't put space before argument parentheses"
    end
  else
    self.tern.push false
  end

  result
end
yylex_paren19() click to toggle source
# File lib/ruby_lexer.rb, line 1299
def yylex_paren19 # TODO: move or remove
  result =
    if is_beg? then
      :tLPAREN
    elsif is_space_arg? then
      :tLPAREN_ARG
    else
      :tLPAREN2 # plain '(' in parse.y
    end

  # paren_nest++; # TODO

  result
end
yylex_string() click to toggle source
# File lib/ruby_lexer.rb, line 1427
def yylex_string # 23 lines
  token = if lex_strterm[0] == :heredoc then
            self.heredoc lex_strterm
          else
            self.parse_string lex_strterm
          end

  if token == :tSTRING_END || token == :tREGEXP_END then
    self.lineno      = nil
    self.lex_strterm = nil
    self.lex_state   = :expr_end
  end

  return token
end