123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663 |
- // Copyright 2016 José Santos <henrique_1609@me.com>
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package jet
- import (
- "fmt"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- // item represents a token or text string returned from the scanner.
- type item struct {
- typ itemType // The type of this item.
- pos Pos // The starting position, in bytes, of this item in the input string.
- val string // The value of this item.
- }
- func (i item) String() string {
- switch {
- case i.typ == itemEOF:
- return "EOF"
- case i.typ == itemError:
- return i.val
- case i.typ > itemKeyword:
- return fmt.Sprintf("<%s>", i.val)
- case len(i.val) > 10:
- return fmt.Sprintf("%.10q...", i.val)
- }
- return fmt.Sprintf("%q", i.val)
- }
- // itemType identifies the type of lex items.
- type itemType int
- const (
- itemError itemType = iota // error occurred; value is text of error
- itemBool // boolean constant
- itemChar // printable ASCII character; grab bag for comma etc.
- itemCharConstant // character constant
- itemComplex // complex constant (1+2i); imaginary is just a number
- itemEOF
- itemField // alphanumeric identifier starting with '.'
- itemIdentifier // alphanumeric identifier not starting with '.'
- itemLeftDelim // left action delimiter
- itemLeftParen // '(' inside action
- itemNumber // simple number, including imaginary
- itemPipe // pipe symbol
- itemRawString // raw quoted string (includes quotes)
- itemRightDelim // right action delimiter
- itemRightParen // ')' inside action
- itemSpace // run of spaces separating arguments
- itemString // quoted string (includes quotes)
- itemText // plain text
- itemAssign
- itemEquals
- itemNotEquals
- itemGreat
- itemGreatEquals
- itemLess
- itemLessEquals
- itemComma
- itemColonComma
- itemAdd
- itemMinus
- itemMul
- itemDiv
- itemMod
- itemColon
- itemTernary
- itemLeftBrackets
- itemRightBrackets
- // Keywords appear after all the rest.
- itemKeyword // used only to delimit the keywords
- itemExtends
- itemBlock
- itemYield
- itemContent
- itemInclude
- itemElse
- itemEnd
- itemIf
- itemNil
- itemRange
- itemImport
- itemAnd
- itemOr
- itemNot
- itemMSG
- itemTrans
- )
- var key = map[string]itemType{
- "extends": itemExtends,
- "import": itemImport,
- "include": itemInclude,
- "block": itemBlock,
- "yield": itemYield,
- "else": itemElse,
- "end": itemEnd,
- "if": itemIf,
- "range": itemRange,
- "nil": itemNil,
- "and": itemAnd,
- "or": itemOr,
- "not": itemNot,
- "content": itemContent,
- "msg": itemMSG,
- "trans": itemTrans,
- }
- const eof = -1
- // stateFn represents the state of the scanner as a function that returns the next state.
- type stateFn func(*lexer) stateFn
- // lexer holds the state of the scanner.
- type lexer struct {
- name string // the name of the input; used only for error reports
- input string // the string being scanned
- state stateFn // the next lexing function to enter
- pos Pos // current position in the input
- start Pos // start position of this item
- width Pos // width of last rune read from input
- lastPos Pos // position of most recent item returned by nextItem
- items chan item // channel of scanned items
- parenDepth int // nesting depth of ( ) exprs
- lastType itemType
- }
- // next returns the next rune in the input.
- func (l *lexer) next() rune {
- if int(l.pos) >= len(l.input) {
- l.width = 0
- return eof
- }
- r, w := utf8.DecodeRuneInString(l.input[l.pos:])
- l.width = Pos(w)
- l.pos += l.width
- return r
- }
- // peek returns but does not consume the next rune in the input.
- func (l *lexer) peek() rune {
- r := l.next()
- l.backup()
- return r
- }
- // backup steps back one rune. Can only be called once per call of next.
- func (l *lexer) backup() {
- l.pos -= l.width
- }
- // emit passes an item back to the client.
- func (l *lexer) emit(t itemType) {
- l.lastType = t
- l.items <- item{t, l.start, l.input[l.start:l.pos]}
- l.start = l.pos
- }
- // ignore skips over the pending input before this point.
- func (l *lexer) ignore() {
- l.start = l.pos
- }
- // accept consumes the next rune if it's from the valid set.
- func (l *lexer) accept(valid string) bool {
- if strings.IndexRune(valid, l.next()) >= 0 {
- return true
- }
- l.backup()
- return false
- }
- // acceptRun consumes a run of runes from the valid set.
- func (l *lexer) acceptRun(valid string) {
- for strings.IndexRune(valid, l.next()) >= 0 {
- }
- l.backup()
- }
- // lineNumber reports which line we're on, based on the position of
- // the previous item returned by nextItem. Doing it this way
- // means we don't have to worry about peek double counting.
- func (l *lexer) lineNumber() int {
- return 1 + strings.Count(l.input[:l.lastPos], "\n")
- }
- // errorf returns an error token and terminates the scan by passing
- // back a nil pointer that will be the next state, terminating l.nextItem.
- func (l *lexer) errorf(format string, args ...interface{}) stateFn {
- l.items <- item{itemError, l.start, fmt.Sprintf(format, args...)}
- return nil
- }
- // nextItem returns the next item from the input.
- // Called by the parser, not in the lexing goroutine.
- func (l *lexer) nextItem() item {
- item := <-l.items
- l.lastPos = item.pos
- return item
- }
- // drain drains the output so the lexing goroutine will exit.
- // Called by the parser, not in the lexing goroutine.
- func (l *lexer) drain() {
- for range l.items {
- }
- }
- // lex creates a new scanner for the input string.
- func lex(name, input string) *lexer {
- l := &lexer{
- name: name,
- input: input,
- items: make(chan item),
- }
- go l.run()
- return l
- }
- // run runs the state machine for the lexer.
- func (l *lexer) run() {
- for l.state = lexText; l.state != nil; {
- l.state = l.state(l)
- }
- close(l.items)
- }
- const (
- leftDelim = "{{"
- rightDelim = "}}"
- leftComment = "{*"
- rightComment = "*}"
- )
- // state functions
- func lexText(l *lexer) stateFn {
- for {
- if i := strings.IndexByte(l.input[l.pos:], '{'); i == -1 {
- l.pos = Pos(len(l.input))
- break
- } else {
- l.pos += Pos(i)
- if strings.HasPrefix(l.input[l.pos:], leftDelim) {
- if l.pos > l.start {
- l.emit(itemText)
- }
- return lexLeftDelim
- }
- if strings.HasPrefix(l.input[l.pos:], leftComment) {
- if l.pos > l.start {
- l.emit(itemText)
- }
- return lexComment
- }
- }
- if l.next() == eof {
- break
- }
- }
- // Correctly reached EOF.
- if l.pos > l.start {
- l.emit(itemText)
- }
- l.emit(itemEOF)
- return nil
- }
- func lexLeftDelim(l *lexer) stateFn {
- l.pos += Pos(len(leftDelim))
- l.emit(itemLeftDelim)
- l.parenDepth = 0
- return lexInsideAction
- }
- // lexComment scans a comment. The left comment marker is known to be present.
- func lexComment(l *lexer) stateFn {
- l.pos += Pos(len(leftComment))
- i := strings.Index(l.input[l.pos:], rightComment)
- if i < 0 {
- return l.errorf("unclosed comment")
- }
- l.pos += Pos(i + len(rightComment))
- l.ignore()
- return lexText
- }
- // lexRightDelim scans the right delimiter, which is known to be present.
- func lexRightDelim(l *lexer) stateFn {
- l.pos += Pos(len(rightDelim))
- l.emit(itemRightDelim)
- return lexText
- }
- // lexInsideAction scans the elements inside action delimiters.
- func lexInsideAction(l *lexer) stateFn {
- // Either number, quoted string, or identifier.
- // Spaces separate arguments; runs of spaces turn into itemSpace.
- // Pipe symbols separate and are emitted.
- if strings.HasPrefix(l.input[l.pos:], rightDelim) {
- if l.parenDepth == 0 {
- return lexRightDelim
- }
- return l.errorf("unclosed left paren")
- }
- switch r := l.next(); {
- case r == eof || isEndOfLine(r):
- return l.errorf("unclosed action")
- case isSpace(r):
- return lexSpace
- case r == ',':
- l.emit(itemComma)
- case r == ';':
- l.emit(itemColonComma)
- case r == '*':
- l.emit(itemMul)
- case r == '/':
- l.emit(itemDiv)
- case r == '%':
- l.emit(itemMod)
- case r == '-':
- if r := l.peek(); '0' <= r && r <= '9' &&
- itemAdd != l.lastType &&
- itemMinus != l.lastType &&
- itemNumber != l.lastType &&
- itemIdentifier != l.lastType &&
- itemString != l.lastType &&
- itemRawString != l.lastType &&
- itemCharConstant != l.lastType &&
- itemBool != l.lastType &&
- itemField != l.lastType &&
- itemChar != l.lastType &&
- itemTrans != l.lastType {
- l.backup()
- return lexNumber
- }
- l.emit(itemMinus)
- case r == '+':
- if r := l.peek(); '0' <= r && r <= '9' &&
- itemAdd != l.lastType &&
- itemMinus != l.lastType &&
- itemNumber != l.lastType &&
- itemIdentifier != l.lastType &&
- itemString != l.lastType &&
- itemRawString != l.lastType &&
- itemCharConstant != l.lastType &&
- itemBool != l.lastType &&
- itemField != l.lastType &&
- itemChar != l.lastType &&
- itemTrans != l.lastType {
- l.backup()
- return lexNumber
- }
- l.emit(itemAdd)
- case r == '?':
- l.emit(itemTernary)
- case r == '&':
- if l.next() == '&' {
- l.emit(itemAnd)
- } else {
- l.backup()
- }
- case r == '<':
- if l.next() == '=' {
- l.emit(itemLessEquals)
- } else {
- l.backup()
- l.emit(itemLess)
- }
- case r == '>':
- if l.next() == '=' {
- l.emit(itemGreatEquals)
- } else {
- l.backup()
- l.emit(itemGreat)
- }
- case r == '!':
- if l.next() == '=' {
- l.emit(itemNotEquals)
- } else {
- l.backup()
- l.emit(itemNot)
- }
- case r == '=':
- if l.next() == '=' {
- l.emit(itemEquals)
- } else {
- l.backup()
- l.emit(itemAssign)
- }
- case r == ':':
- if l.next() == '=' {
- l.emit(itemAssign)
- } else {
- l.backup()
- l.emit(itemColon)
- }
- case r == '|':
- if l.next() == '|' {
- l.emit(itemOr)
- } else {
- l.backup()
- l.emit(itemPipe)
- }
- case r == '"':
- return lexQuote
- case r == '`':
- return lexRawQuote
- case r == '\'':
- return lexChar
- case r == '.':
- // special look-ahead for ".field" so we don't break l.backup().
- if l.pos < Pos(len(l.input)) {
- r := l.input[l.pos]
- if r < '0' || '9' < r {
- return lexField
- }
- }
- fallthrough // '.' can start a number.
- case '0' <= r && r <= '9':
- l.backup()
- return lexNumber
- case isAlphaNumeric(r):
- l.backup()
- return lexIdentifier
- case r == '[':
- l.emit(itemLeftBrackets)
- case r == ']':
- l.emit(itemRightBrackets)
- case r == '(':
- l.emit(itemLeftParen)
- l.parenDepth++
- case r == ')':
- l.emit(itemRightParen)
- l.parenDepth--
- if l.parenDepth < 0 {
- return l.errorf("unexpected right paren %#U", r)
- }
- case r <= unicode.MaxASCII && unicode.IsPrint(r):
- l.emit(itemChar)
- return lexInsideAction
- default:
- return l.errorf("unrecognized character in action: %#U", r)
- }
- return lexInsideAction
- }
- // lexSpace scans a run of space characters.
- // One space has already been seen.
- func lexSpace(l *lexer) stateFn {
- for isSpace(l.peek()) {
- l.next()
- }
- l.emit(itemSpace)
- return lexInsideAction
- }
- // lexIdentifier scans an alphanumeric.
- func lexIdentifier(l *lexer) stateFn {
- Loop:
- for {
- switch r := l.next(); {
- case isAlphaNumeric(r):
- // absorb.
- default:
- l.backup()
- word := l.input[l.start:l.pos]
- if !l.atTerminator() {
- return l.errorf("bad character %#U", r)
- }
- switch {
- case key[word] > itemKeyword:
- l.emit(key[word])
- case word[0] == '.':
- l.emit(itemField)
- case word == "true", word == "false":
- l.emit(itemBool)
- default:
- l.emit(itemIdentifier)
- }
- break Loop
- }
- }
- return lexInsideAction
- }
- // lexField scans a field: .Alphanumeric.
- // The . has been scanned.
- func lexField(l *lexer) stateFn {
- if l.atTerminator() {
- // Nothing interesting follows -> "." or "$".
- l.emit(itemIdentifier)
- return lexInsideAction
- }
- var r rune
- for {
- r = l.next()
- if !isAlphaNumeric(r) {
- l.backup()
- break
- }
- }
- if !l.atTerminator() {
- return l.errorf("bad character %#U", r)
- }
- l.emit(itemField)
- return lexInsideAction
- }
- // atTerminator reports whether the input is at valid termination character to
- // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
- // like "$x+2" not being acceptable without a space, in case we decide one
- // day to implement arithmetic.
- func (l *lexer) atTerminator() bool {
- r := l.peek()
- if isSpace(r) || isEndOfLine(r) {
- return true
- }
- switch r {
- case eof, '.', ',', '|', ':', ')', '=', '(', ';', '?', '[', ']', '+', '-', '/', '%', '*', '&', '!', '<', '>':
- return true
- }
- // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
- // succeed but should fail) but only in extremely rare cases caused by willfully
- // bad choice of delimiter.
- if rd, _ := utf8.DecodeRuneInString(rightDelim); rd == r {
- return true
- }
- return false
- }
- // lexChar scans a character constant. The initial quote is already
- // scanned. Syntax checking is done by the parser.
- func lexChar(l *lexer) stateFn {
- Loop:
- for {
- switch l.next() {
- case '\\':
- if r := l.next(); r != eof && r != '\n' {
- break
- }
- fallthrough
- case eof, '\n':
- return l.errorf("unterminated character constant")
- case '\'':
- break Loop
- }
- }
- l.emit(itemCharConstant)
- return lexInsideAction
- }
- // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
- // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
- // and "089" - but when it's wrong the input is invalid and the parser (via
- // strconv) will notice.
- func lexNumber(l *lexer) stateFn {
- if !l.scanNumber() {
- return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
- }
- l.emit(itemNumber)
- return lexInsideAction
- }
- func (l *lexer) scanNumber() bool {
- // Optional leading sign.
- l.accept("+-")
- // Is it hex?
- digits := "0123456789"
- if l.accept("0") && l.accept("xX") {
- digits = "0123456789abcdefABCDEF"
- }
- l.acceptRun(digits)
- if l.accept(".") {
- l.acceptRun(digits)
- }
- if l.accept("eE") {
- l.accept("+-")
- l.acceptRun("0123456789")
- }
- //Is it imaginary?
- l.accept("i")
- //Next thing mustn't be alphanumeric.
- if isAlphaNumeric(l.peek()) {
- l.next()
- return false
- }
- return true
- }
- // lexQuote scans a quoted string.
- func lexQuote(l *lexer) stateFn {
- Loop:
- for {
- switch l.next() {
- case '\\':
- if r := l.next(); r != eof && r != '\n' {
- break
- }
- fallthrough
- case eof, '\n':
- return l.errorf("unterminated quoted string")
- case '"':
- break Loop
- }
- }
- l.emit(itemString)
- return lexInsideAction
- }
- // lexRawQuote scans a raw quoted string.
- func lexRawQuote(l *lexer) stateFn {
- Loop:
- for {
- switch l.next() {
- case eof:
- return l.errorf("unterminated raw quoted string")
- case '`':
- break Loop
- }
- }
- l.emit(itemRawString)
- return lexInsideAction
- }
- // isSpace reports whether r is a space character.
- func isSpace(r rune) bool {
- return r == ' ' || r == '\t'
- }
- // isEndOfLine reports whether r is an end-of-line character.
- func isEndOfLine(r rune) bool {
- return r == '\r' || r == '\n'
- }
- // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
- func isAlphaNumeric(r rune) bool {
- return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
- }
|