runtime/Swift/Sources/Antlr4/Lexer.swift - external/github.com/antlr/antlr4 - Git at Google

 ///
 /// Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
 /// Use of this file is governed by the BSD 3-clause license that
 /// can be found in the LICENSE.txt file in the project root.
 ///


 ///
 /// A lexer is recognizer that draws input symbols from a character stream.
 /// lexer grammars result in a subclass of this object. A Lexer object
 /// uses simplified match() and error recovery mechanisms in the interest
 /// of speed.
 ///

 import Foundation

 open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
     public static let EOF = -1
     public static let DEFAULT_MODE = 0
     public static let MORE = -2
     public static let SKIP = -3

     public static let DEFAULT_TOKEN_CHANNEL = CommonToken.DEFAULT_CHANNEL
     public static let HIDDEN = CommonToken.HIDDEN_CHANNEL
     public static let MIN_CHAR_VALUE = Character.MIN_VALUE;
     public static let MAX_CHAR_VALUE = Character.MAX_VALUE;

     public var _input: CharStream?
     internal var _tokenFactorySourcePair: TokenSourceAndStream

     ///
     /// How to create token objects
     ///
     internal var _factory = CommonTokenFactory.DEFAULT

     ///
     /// The goal of all lexer rules/methods is to create a token object.
     /// This is an instance variable as multiple rules may collaborate to
     /// create a single token.  nextToken will return this object after
     /// matching lexer rule(s).  If you subclass to allow multiple token
     /// emissions, then set this to the last token to be matched or
     /// something nonnull so that the auto token emit mechanism will not
     /// emit another token.
     ///
     public var _token: Token?

     ///
     /// What character index in the stream did the current token start at?
     /// Needed, for example, to get the text for current token.  Set at
     /// the start of nextToken.
     ///
     public var _tokenStartCharIndex = -1

     ///
     /// The line on which the first character of the token resides
     ///
     public var _tokenStartLine = 0

     ///
     /// The character position of first character within the line
     ///
     public var _tokenStartCharPositionInLine = 0

     ///
     /// Once we see EOF on char stream, next token will be EOF.
     /// If you have DONE : EOF ; then you see DONE EOF.
     ///
     public var _hitEOF = false

     ///
     /// The channel number for the current token
     ///
     public var _channel = 0

     ///
     /// The token type for the current token
     ///
     public var _type = CommonToken.INVALID_TYPE

     public final var _modeStack = Stack<Int>()
     public var _mode = Lexer.DEFAULT_MODE

     ///
     /// You can set the text for the current token to override what is in
     /// the input char buffer.  Use setText() or can set this instance var.
     ///
     public var _text: String?

     public override init() {
         self._tokenFactorySourcePair = TokenSourceAndStream()
         super.init()
         self._tokenFactorySourcePair.tokenSource = self
     }

     public required init(_ input: CharStream) {
         self._input = input
         self._tokenFactorySourcePair = TokenSourceAndStream()
         super.init()
         self._tokenFactorySourcePair.tokenSource = self
         self._tokenFactorySourcePair.stream = input
     }

     open func reset() throws {
         // wack Lexer state variables
         if let _input = _input {
             try  _input.seek(0) // rewind the input
         }
         _token = nil
         _type = CommonToken.INVALID_TYPE
         _channel = CommonToken.DEFAULT_CHANNEL
         _tokenStartCharIndex = -1
         _tokenStartCharPositionInLine = -1
         _tokenStartLine = -1
         _text = nil

         _hitEOF = false
         _mode = Lexer.DEFAULT_MODE
         _modeStack.clear()

         getInterpreter().reset()
     }

     ///
     /// Return a token from this source; i.e., match a token on the char
     /// stream.
     ///

     open func nextToken() throws -> Token {
         guard let _input = _input else {
             throw ANTLRError.illegalState(msg: "nextToken requires a non-null input stream.")
         }

         // Mark start location in char stream so unbuffered streams are
         // guaranteed at least have text of current token
         let tokenStartMarker = _input.mark()
         defer {
             // make sure we release marker after match or
             // unbuffered char stream will keep buffering
             try! _input.release(tokenStartMarker)
         }
         do {
             outer:
             while true {
                 if _hitEOF {
                     emitEOF()
                     return _token!
                 }

                 _token = nil
                 _channel = CommonToken.DEFAULT_CHANNEL
                 _tokenStartCharIndex = _input.index()
                 _tokenStartCharPositionInLine = getInterpreter().getCharPositionInLine()
                 _tokenStartLine = getInterpreter().getLine()
                 _text = nil
                 repeat {
                     _type = CommonToken.INVALID_TYPE
                     var ttype: Int
                     do {
                         ttype = try getInterpreter().match(_input, _mode)
                     }
                     catch  ANTLRException.recognition(let e) {
                         notifyListeners(e as! LexerNoViableAltException, recognizer: self)
                         try recover(e as! LexerNoViableAltException)
                         ttype = Lexer.SKIP
                     }
                     if try _input.LA(1) == BufferedTokenStream.EOF {
                         _hitEOF = true
                     }
                     if _type == CommonToken.INVALID_TYPE {
                         _type = ttype
                     }
                     if _type == Lexer.SKIP {
                         continue outer
                     }
                 } while _type == Lexer.MORE

                 if _token == nil {
                     emit()
                 }
                 return _token!
             }
         }

     }

     ///
     /// Instruct the lexer to skip creating a token for current lexer rule
     /// and look for another token.  nextToken() knows to keep looking when
     /// a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
     /// if token==null at end of any token rule, it creates one for you
     /// and emits it.
     ///
     open func skip() {
         _type = Lexer.SKIP
     }

     open func more() {
         _type = Lexer.MORE
     }

     open func mode(_ m: Int) {
         _mode = m
     }

     open func pushMode(_ m: Int) {
         if LexerATNSimulator.debug {
             print("pushMode \(m)")
         }
         _modeStack.push(_mode)
         mode(m)
     }
     @discardableResult
     open func popMode() throws -> Int {
         if _modeStack.isEmpty {
             throw ANTLRError.unsupportedOperation(msg: " EmptyStackException")
         }

         if LexerATNSimulator.debug {
             print("popMode back to \(String(describing: _modeStack.peek()))")
         }
         mode(_modeStack.pop())
         return _mode
     }


     open override func setTokenFactory(_ factory: TokenFactory) {
         self._factory = factory
     }


     open override func getTokenFactory() -> TokenFactory {
         return _factory
     }

     ///
     /// Set the char stream and reset the lexer
     ///

     open override func setInputStream(_ input: IntStream) throws {
         self._input = nil
         self._tokenFactorySourcePair = makeTokenSourceAndStream()
         try reset()
         self._input = input as? CharStream
         self._tokenFactorySourcePair = makeTokenSourceAndStream()
     }


     open func getSourceName() -> String {
         return _input!.getSourceName()
     }


     open func getInputStream() -> CharStream? {
         return _input
     }

     ///
     /// By default does not support multiple emits per nextToken invocation
     /// for efficiency reasons.  Subclass and override this method, nextToken,
     /// and getToken (to push tokens into a list and pull from that list
     /// rather than a single variable as this implementation does).
     ///
     open func emit(_ token: Token) {
         //System.err.println("emit "+token);
         self._token = token
     }

     ///
     /// The standard method called to automatically emit a token at the
     /// outermost lexical rule.  The token object should point into the
     /// char buffer start..stop.  If there is a text override in 'text',
     /// use that to set the token's text.  Override this method to emit
     /// custom Token objects or provide a new factory.
     ///
     @discardableResult
     open func emit() -> Token {
         let t = _factory.create(_tokenFactorySourcePair, _type, _text, _channel, _tokenStartCharIndex, getCharIndex() - 1, _tokenStartLine, _tokenStartCharPositionInLine)
         emit(t)
         return t
     }

     @discardableResult
     open func emitEOF() -> Token {
         let cpos = getCharPositionInLine()
         let line = getLine()
         let idx = _input!.index()
         let eof = _factory.create(
             _tokenFactorySourcePair,
             CommonToken.EOF,
             nil,
             CommonToken.DEFAULT_CHANNEL,
             idx,
             idx - 1,
             line,
             cpos)
         emit(eof)
         return eof
     }


     open func getLine() -> Int {
         return getInterpreter().getLine()
     }


     open func getCharPositionInLine() -> Int {
         return getInterpreter().getCharPositionInLine()
     }

     open func setLine(_ line: Int) {
         getInterpreter().setLine(line)
     }

     open func setCharPositionInLine(_ charPositionInLine: Int) {
         getInterpreter().setCharPositionInLine(charPositionInLine)
     }

     ///
     /// What is the index of the current character of lookahead?
     ///
     open func getCharIndex() -> Int {
         return _input!.index()
     }

     ///
     /// Return the text matched so far for the current token or any
     /// text override.
     ///
     open func getText() -> String {
         if _text != nil {
             return _text!
         }
         return getInterpreter().getText(_input!)
     }

     ///
     /// Set the complete text of this token; it wipes any previous
     /// changes to the text.
     ///
     open func setText(_ text: String) {
         self._text = text
     }

     ///
     /// Override if emitting multiple tokens.
     ///
     open func getToken() -> Token {
         return _token!
     }

     open func setToken(_ _token: Token) {
         self._token = _token
     }

     open func setType(_ ttype: Int) {
         _type = ttype
     }

     open func getType() -> Int {
         return _type
     }

     open func setChannel(_ channel: Int) {
         _channel = channel
     }

     open func getChannel() -> Int {
         return _channel
     }

     open func getChannelNames() -> [String]? {
         return nil
     }

     open func getModeNames() -> [String]? {
         return nil
     }

     ///
     /// Return a list of all Token objects in input char stream.
     /// Forces load of all tokens. Does not include EOF token.
     ///
     open func getAllTokens() throws -> [Token] {
         var tokens = [Token]()
         var t = try nextToken()
         while t.getType() != CommonToken.EOF {
             tokens.append(t)
             t = try nextToken()
         }
         return tokens
     }

     open func recover(_ e: LexerNoViableAltException) throws {
         if try _input!.LA(1) != BufferedTokenStream.EOF {
             // skip a char and try again
             try getInterpreter().consume(_input!)
         }
     }

     open func notifyListeners<T>(_ e: LexerNoViableAltException, recognizer: Recognizer<T>) {

         let text: String
         do {
             text = try _input!.getText(Interval.of(_tokenStartCharIndex, _input!.index()))
         }
         catch {
             text = "<unknown>"
         }
         let msg = "token recognition error at: '\(getErrorDisplay(text))'"

         let listener = getErrorListenerDispatch()
         listener.syntaxError(recognizer, nil, _tokenStartLine, _tokenStartCharPositionInLine, msg, e)
     }

     open func getErrorDisplay(_ s: String) -> String {
         var buf = ""
         for c in s {
             buf += getErrorDisplay(c)
         }
         return buf
     }

     open func getErrorDisplay(_ c: Character) -> String {
         if c.integerValue == CommonToken.EOF {
             return "<EOF>"
         }
         switch c {
         case "\n":
             return "\\n"
         case "\t":
             return "\\t"
         case "\r":
             return "\\r"
         default:
             return String(c)
         }
     }

     open func getCharErrorDisplay(_ c: Character) -> String {
         let s: String = getErrorDisplay(c)
         return "'\(s)'"
     }

     ///
     /// Lexers can normally match any char in it's vocabulary after matching
     /// a token, so do the easy thing and just kill a character and hope
     /// it all works out.  You can instead use the rule invocation stack
     /// to do sophisticated error recovery if you are in a fragment rule.
     ///
     open func recover(_ re: AnyObject) throws {
         // TODO: Do we lose character or line position information?
         try _input!.consume()
     }

     internal func makeTokenSourceAndStream() -> TokenSourceAndStream {
         return TokenSourceAndStream(self, _input)
     }
 }
	///
	/// Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
	/// Use of this file is governed by the BSD 3-clause license that
	/// can be found in the LICENSE.txt file in the project root.
	///


	///
	/// A lexer is recognizer that draws input symbols from a character stream.
	/// lexer grammars result in a subclass of this object. A Lexer object
	/// uses simplified match() and error recovery mechanisms in the interest
	/// of speed.
	///

	import Foundation

	open class Lexer: Recognizer<LexerATNSimulator>, TokenSource {
	public static let EOF = -1
	public static let DEFAULT_MODE = 0
	public static let MORE = -2
	public static let SKIP = -3

	public static let DEFAULT_TOKEN_CHANNEL = CommonToken.DEFAULT_CHANNEL
	public static let HIDDEN = CommonToken.HIDDEN_CHANNEL
	public static let MIN_CHAR_VALUE = Character.MIN_VALUE;
	public static let MAX_CHAR_VALUE = Character.MAX_VALUE;

	public var _input: CharStream?
	internal var _tokenFactorySourcePair: TokenSourceAndStream

	///
	/// How to create token objects
	///
	internal var _factory = CommonTokenFactory.DEFAULT

	///
	/// The goal of all lexer rules/methods is to create a token object.
	/// This is an instance variable as multiple rules may collaborate to
	/// create a single token. nextToken will return this object after
	/// matching lexer rule(s). If you subclass to allow multiple token
	/// emissions, then set this to the last token to be matched or
	/// something nonnull so that the auto token emit mechanism will not
	/// emit another token.
	///
	public var _token: Token?

	///
	/// What character index in the stream did the current token start at?
	/// Needed, for example, to get the text for current token. Set at
	/// the start of nextToken.
	///
	public var _tokenStartCharIndex = -1

	///
	/// The line on which the first character of the token resides
	///
	public var _tokenStartLine = 0

	///
	/// The character position of first character within the line
	///
	public var _tokenStartCharPositionInLine = 0

	///
	/// Once we see EOF on char stream, next token will be EOF.
	/// If you have DONE : EOF ; then you see DONE EOF.
	///
	public var _hitEOF = false

	///
	/// The channel number for the current token
	///
	public var _channel = 0

	///
	/// The token type for the current token
	///
	public var _type = CommonToken.INVALID_TYPE

	public final var _modeStack = Stack<Int>()
	public var _mode = Lexer.DEFAULT_MODE

	///
	/// You can set the text for the current token to override what is in
	/// the input char buffer. Use setText() or can set this instance var.
	///
	public var _text: String?

	public override init() {
	self._tokenFactorySourcePair = TokenSourceAndStream()
	super.init()
	self._tokenFactorySourcePair.tokenSource = self
	}

	public required init(_ input: CharStream) {
	self._input = input
	self._tokenFactorySourcePair = TokenSourceAndStream()
	super.init()
	self._tokenFactorySourcePair.tokenSource = self
	self._tokenFactorySourcePair.stream = input
	}

	open func reset() throws {
	// wack Lexer state variables
	if let _input = _input {
	try _input.seek(0) // rewind the input
	}
	_token = nil
	_type = CommonToken.INVALID_TYPE
	_channel = CommonToken.DEFAULT_CHANNEL
	_tokenStartCharIndex = -1
	_tokenStartCharPositionInLine = -1
	_tokenStartLine = -1
	_text = nil

	_hitEOF = false
	_mode = Lexer.DEFAULT_MODE
	_modeStack.clear()

	getInterpreter().reset()
	}

	///
	/// Return a token from this source; i.e., match a token on the char
	/// stream.
	///

	open func nextToken() throws -> Token {
	guard let _input = _input else {
	throw ANTLRError.illegalState(msg: "nextToken requires a non-null input stream.")
	}

	// Mark start location in char stream so unbuffered streams are
	// guaranteed at least have text of current token
	let tokenStartMarker = _input.mark()
	defer {
	// make sure we release marker after match or
	// unbuffered char stream will keep buffering
	try! _input.release(tokenStartMarker)
	}
	do {
	outer:
	while true {
	if _hitEOF {
	emitEOF()
	return _token!
	}

	_token = nil
	_channel = CommonToken.DEFAULT_CHANNEL
	_tokenStartCharIndex = _input.index()
	_tokenStartCharPositionInLine = getInterpreter().getCharPositionInLine()
	_tokenStartLine = getInterpreter().getLine()
	_text = nil
	repeat {
	_type = CommonToken.INVALID_TYPE
	var ttype: Int
	do {
	ttype = try getInterpreter().match(_input, _mode)
	}
	catch ANTLRException.recognition(let e) {
	notifyListeners(e as! LexerNoViableAltException, recognizer: self)
	try recover(e as! LexerNoViableAltException)
	ttype = Lexer.SKIP
	}
	if try _input.LA(1) == BufferedTokenStream.EOF {
	_hitEOF = true
	}
	if _type == CommonToken.INVALID_TYPE {
	_type = ttype
	}
	if _type == Lexer.SKIP {
	continue outer
	}
	} while _type == Lexer.MORE

	if _token == nil {
	emit()
	}
	return _token!
	}
	}

	}

	///
	/// Instruct the lexer to skip creating a token for current lexer rule
	/// and look for another token. nextToken() knows to keep looking when
	/// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
	/// if token==null at end of any token rule, it creates one for you
	/// and emits it.
	///
	open func skip() {
	_type = Lexer.SKIP
	}

	open func more() {
	_type = Lexer.MORE
	}

	open func mode(_ m: Int) {
	_mode = m
	}

	open func pushMode(_ m: Int) {
	if LexerATNSimulator.debug {
	print("pushMode \(m)")
	}
	_modeStack.push(_mode)
	mode(m)
	}
	@discardableResult
	open func popMode() throws -> Int {
	if _modeStack.isEmpty {
	throw ANTLRError.unsupportedOperation(msg: " EmptyStackException")
	}

	if LexerATNSimulator.debug {
	print("popMode back to \(String(describing: _modeStack.peek()))")
	}
	mode(_modeStack.pop())
	return _mode
	}


	open override func setTokenFactory(_ factory: TokenFactory) {
	self._factory = factory
	}


	open override func getTokenFactory() -> TokenFactory {
	return _factory
	}

	///
	/// Set the char stream and reset the lexer
	///

	open override func setInputStream(_ input: IntStream) throws {
	self._input = nil
	self._tokenFactorySourcePair = makeTokenSourceAndStream()
	try reset()
	self._input = input as? CharStream
	self._tokenFactorySourcePair = makeTokenSourceAndStream()
	}


	open func getSourceName() -> String {
	return _input!.getSourceName()
	}


	open func getInputStream() -> CharStream? {
	return _input
	}

	///
	/// By default does not support multiple emits per nextToken invocation
	/// for efficiency reasons. Subclass and override this method, nextToken,
	/// and getToken (to push tokens into a list and pull from that list
	/// rather than a single variable as this implementation does).
	///
	open func emit(_ token: Token) {
	//System.err.println("emit "+token);
	self._token = token
	}

	///
	/// The standard method called to automatically emit a token at the
	/// outermost lexical rule. The token object should point into the
	/// char buffer start..stop. If there is a text override in 'text',
	/// use that to set the token's text. Override this method to emit
	/// custom Token objects or provide a new factory.
	///
	@discardableResult
	open func emit() -> Token {
	let t = _factory.create(_tokenFactorySourcePair, _type, _text, _channel, _tokenStartCharIndex, getCharIndex() - 1, _tokenStartLine, _tokenStartCharPositionInLine)
	emit(t)
	return t
	}

	@discardableResult
	open func emitEOF() -> Token {
	let cpos = getCharPositionInLine()
	let line = getLine()
	let idx = _input!.index()
	let eof = _factory.create(
	_tokenFactorySourcePair,
	CommonToken.EOF,
	nil,
	CommonToken.DEFAULT_CHANNEL,
	idx,
	idx - 1,
	line,
	cpos)
	emit(eof)
	return eof
	}


	open func getLine() -> Int {
	return getInterpreter().getLine()
	}


	open func getCharPositionInLine() -> Int {
	return getInterpreter().getCharPositionInLine()
	}

	open func setLine(_ line: Int) {
	getInterpreter().setLine(line)
	}

	open func setCharPositionInLine(_ charPositionInLine: Int) {
	getInterpreter().setCharPositionInLine(charPositionInLine)
	}

	///
	/// What is the index of the current character of lookahead?
	///
	open func getCharIndex() -> Int {
	return _input!.index()
	}

	///
	/// Return the text matched so far for the current token or any
	/// text override.
	///
	open func getText() -> String {
	if _text != nil {
	return _text!
	}
	return getInterpreter().getText(_input!)
	}

	///
	/// Set the complete text of this token; it wipes any previous
	/// changes to the text.
	///
	open func setText(_ text: String) {
	self._text = text
	}

	///
	/// Override if emitting multiple tokens.
	///
	open func getToken() -> Token {
	return _token!
	}

	open func setToken(_ _token: Token) {
	self._token = _token
	}

	open func setType(_ ttype: Int) {
	_type = ttype
	}

	open func getType() -> Int {
	return _type
	}

	open func setChannel(_ channel: Int) {
	_channel = channel
	}

	open func getChannel() -> Int {
	return _channel
	}

	open func getChannelNames() -> [String]? {
	return nil
	}

	open func getModeNames() -> [String]? {
	return nil
	}

	///
	/// Return a list of all Token objects in input char stream.
	/// Forces load of all tokens. Does not include EOF token.
	///
	open func getAllTokens() throws -> [Token] {
	var tokens = [Token]()
	var t = try nextToken()
	while t.getType() != CommonToken.EOF {
	tokens.append(t)
	t = try nextToken()
	}
	return tokens
	}

	open func recover(_ e: LexerNoViableAltException) throws {
	if try _input!.LA(1) != BufferedTokenStream.EOF {
	// skip a char and try again
	try getInterpreter().consume(_input!)
	}
	}

	open func notifyListeners<T>(_ e: LexerNoViableAltException, recognizer: Recognizer<T>) {

	let text: String
	do {
	text = try _input!.getText(Interval.of(_tokenStartCharIndex, _input!.index()))
	}
	catch {
	text = "<unknown>"
	}
	let msg = "token recognition error at: '\(getErrorDisplay(text))'"

	let listener = getErrorListenerDispatch()
	listener.syntaxError(recognizer, nil, _tokenStartLine, _tokenStartCharPositionInLine, msg, e)
	}

	open func getErrorDisplay(_ s: String) -> String {
	var buf = ""
	for c in s {
	buf += getErrorDisplay(c)
	}
	return buf
	}

	open func getErrorDisplay(_ c: Character) -> String {
	if c.integerValue == CommonToken.EOF {
	return "<EOF>"
	}
	switch c {
	case "\n":
	return "\\n"
	case "\t":
	return "\\t"
	case "\r":
	return "\\r"
	default:
	return String(c)
	}
	}

	open func getCharErrorDisplay(_ c: Character) -> String {
	let s: String = getErrorDisplay(c)
	return "'\(s)'"
	}

	///
	/// Lexers can normally match any char in it's vocabulary after matching
	/// a token, so do the easy thing and just kill a character and hope
	/// it all works out. You can instead use the rule invocation stack
	/// to do sophisticated error recovery if you are in a fragment rule.
	///
	open func recover(_ re: AnyObject) throws {
	// TODO: Do we lose character or line position information?
	try _input!.consume()
	}

	internal func makeTokenSourceAndStream() -> TokenSourceAndStream {
	return TokenSourceAndStream(self, _input)
	}
	}