| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741 |
- package toml
- import (
- "fmt"
- "unicode/utf8"
- )
- type itemType int
- const (
- itemError itemType = iota
- itemNIL // used in the parser to indicate no type
- itemEOF
- itemText
- itemString
- itemBool
- itemInteger
- itemFloat
- itemDatetime
- itemArray // the start of an array
- itemArrayEnd
- itemTableStart
- itemTableEnd
- itemArrayTableStart
- itemArrayTableEnd
- itemKeyStart
- itemCommentStart
- )
- const (
- eof = 0
- tableStart = '['
- tableEnd = ']'
- arrayTableStart = '['
- arrayTableEnd = ']'
- tableSep = '.'
- keySep = '='
- arrayStart = '['
- arrayEnd = ']'
- arrayValTerm = ','
- commentStart = '#'
- stringStart = '"'
- stringEnd = '"'
- )
- type stateFn func(lx *lexer) stateFn
- type lexer struct {
- input string
- start int
- pos int
- width int
- line int
- state stateFn
- items chan item
- // A stack of state functions used to maintain context.
- // The idea is to reuse parts of the state machine in various places.
- // For example, values can appear at the top level or within arbitrarily
- // nested arrays. The last state on the stack is used after a value has
- // been lexed. Similarly for comments.
- stack []stateFn
- }
- type item struct {
- typ itemType
- val string
- line int
- }
- func (lx *lexer) nextItem() item {
- for {
- select {
- case item := <-lx.items:
- return item
- default:
- lx.state = lx.state(lx)
- }
- }
- panic("not reached")
- }
- func lex(input string) *lexer {
- lx := &lexer{
- input: input,
- state: lexTop,
- line: 1,
- items: make(chan item, 10),
- stack: make([]stateFn, 0, 10),
- }
- return lx
- }
- func (lx *lexer) push(state stateFn) {
- lx.stack = append(lx.stack, state)
- }
- func (lx *lexer) pop() stateFn {
- if len(lx.stack) == 0 {
- return lx.errorf("BUG in lexer: no states to pop.")
- }
- last := lx.stack[len(lx.stack)-1]
- lx.stack = lx.stack[0 : len(lx.stack)-1]
- return last
- }
- func (lx *lexer) current() string {
- return lx.input[lx.start:lx.pos]
- }
- func (lx *lexer) emit(typ itemType) {
- lx.items <- item{typ, lx.current(), lx.line}
- lx.start = lx.pos
- }
- func (lx *lexer) next() (r rune) {
- if lx.pos >= len(lx.input) {
- lx.width = 0
- return eof
- }
- if lx.input[lx.pos] == '\n' {
- lx.line++
- }
- r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
- lx.pos += lx.width
- return r
- }
- // ignore skips over the pending input before this point.
- func (lx *lexer) ignore() {
- lx.start = lx.pos
- }
- // backup steps back one rune. Can be called only once per call of next.
- func (lx *lexer) backup() {
- lx.pos -= lx.width
- if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
- lx.line--
- }
- }
- // accept consumes the next rune if it's equal to `valid`.
- func (lx *lexer) accept(valid rune) bool {
- if lx.next() == valid {
- return true
- }
- lx.backup()
- return false
- }
- // peek returns but does not consume the next rune in the input.
- func (lx *lexer) peek() rune {
- r := lx.next()
- lx.backup()
- return r
- }
- // errorf stops all lexing by emitting an error and returning `nil`.
- // Note that any value that is a character is escaped if it's a special
- // character (new lines, tabs, etc.).
- func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
- for i, value := range values {
- if v, ok := value.(rune); ok {
- values[i] = escapeSpecial(v)
- }
- }
- lx.items <- item{
- itemError,
- fmt.Sprintf(format, values...),
- lx.line,
- }
- return nil
- }
- // lexTop consumes elements at the top level of TOML data.
- func lexTop(lx *lexer) stateFn {
- r := lx.next()
- if isWhitespace(r) || isNL(r) {
- return lexSkip(lx, lexTop)
- }
- switch r {
- case commentStart:
- lx.push(lexTop)
- return lexCommentStart
- case tableStart:
- return lexTableStart
- case eof:
- if lx.pos > lx.start {
- return lx.errorf("Unexpected EOF.")
- }
- lx.emit(itemEOF)
- return nil
- }
- // At this point, the only valid item can be a key, so we back up
- // and let the key lexer do the rest.
- lx.backup()
- lx.push(lexTopEnd)
- return lexKeyStart
- }
- // lexTopEnd is entered whenever a top-level item has been consumed. (A value
- // or a table.) It must see only whitespace, and will turn back to lexTop
- // upon a new line. If it sees EOF, it will quit the lexer successfully.
- func lexTopEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == commentStart:
- // a comment will read to a new line for us.
- lx.push(lexTop)
- return lexCommentStart
- case isWhitespace(r):
- return lexTopEnd
- case isNL(r):
- lx.ignore()
- return lexTop
- case r == eof:
- lx.ignore()
- return lexTop
- }
- return lx.errorf("Expected a top-level item to end with a new line, "+
- "comment or EOF, but got '%s' instead.", r)
- }
- // lexTable lexes the beginning of a table. Namely, it makes sure that
- // it starts with a character other than '.' and ']'.
- // It assumes that '[' has already been consumed.
- // It also handles the case that this is an item in an array of tables.
- // e.g., '[[name]]'.
- func lexTableStart(lx *lexer) stateFn {
- if lx.peek() == arrayTableStart {
- lx.next()
- lx.emit(itemArrayTableStart)
- lx.push(lexArrayTableEnd)
- } else {
- lx.emit(itemTableStart)
- lx.push(lexTableEnd)
- }
- return lexTableNameStart
- }
- func lexTableEnd(lx *lexer) stateFn {
- lx.emit(itemTableEnd)
- return lexTopEnd
- }
- func lexArrayTableEnd(lx *lexer) stateFn {
- if r := lx.next(); r != arrayTableEnd {
- return lx.errorf("Expected end of table array name delimiter '%s', "+
- "but got '%s' instead.", arrayTableEnd, r)
- }
- lx.emit(itemArrayTableEnd)
- return lexTopEnd
- }
- func lexTableNameStart(lx *lexer) stateFn {
- switch lx.next() {
- case tableEnd:
- return lx.errorf("Unexpected end of table. (Tables cannot " +
- "be empty.)")
- case tableSep:
- return lx.errorf("Unexpected table separator. (Tables cannot " +
- "be empty.)")
- }
- return lexTableName
- }
- // lexTableName lexes the name of a table. It assumes that at least one
- // valid character for the table has already been read.
- func lexTableName(lx *lexer) stateFn {
- switch lx.peek() {
- case tableStart:
- return lx.errorf("Table names cannot contain '%s' or '%s'.",
- tableStart, tableEnd)
- case tableEnd:
- lx.emit(itemText)
- lx.next()
- return lx.pop()
- case tableSep:
- lx.emit(itemText)
- lx.next()
- lx.ignore()
- return lexTableNameStart
- }
- lx.next()
- return lexTableName
- }
- // lexKeyStart consumes a key name up until the first non-whitespace character.
- // lexKeyStart will ignore whitespace.
- func lexKeyStart(lx *lexer) stateFn {
- r := lx.peek()
- switch {
- case r == keySep:
- return lx.errorf("Unexpected key separator '%s'.", keySep)
- case isWhitespace(r) || isNL(r):
- lx.next()
- return lexSkip(lx, lexKeyStart)
- }
- lx.ignore()
- lx.emit(itemKeyStart)
- lx.next()
- return lexKey
- }
- // lexKey consumes the text of a key. Assumes that the first character (which
- // is not whitespace) has already been consumed.
- func lexKey(lx *lexer) stateFn {
- r := lx.peek()
- // XXX: Possible divergence from spec?
- // "Keys start with the first non-whitespace character and end with the
- // last non-whitespace character before the equals sign."
- // Note here that whitespace is either a tab or a space.
- // But we'll call it quits if we see a new line too.
- if isWhitespace(r) || isNL(r) {
- lx.emit(itemText)
- return lexKeyEnd
- }
- // Let's also call it quits if we see an equals sign.
- if r == keySep {
- lx.emit(itemText)
- return lexKeyEnd
- }
- lx.next()
- return lexKey
- }
- // lexKeyEnd consumes the end of a key (up to the key separator).
- // Assumes that the first whitespace character after a key (or the '='
- // separator) has NOT been consumed.
- func lexKeyEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r) || isNL(r):
- return lexSkip(lx, lexKeyEnd)
- case r == keySep:
- return lexSkip(lx, lexValue)
- }
- return lx.errorf("Expected key separator '%s', but got '%s' instead.",
- keySep, r)
- }
- // lexValue starts the consumption of a value anywhere a value is expected.
- // lexValue will ignore whitespace.
- // After a value is lexed, the last state on the next is popped and returned.
- func lexValue(lx *lexer) stateFn {
- // We allow whitespace to precede a value, but NOT new lines.
- // In array syntax, the array states are responsible for ignoring new lines.
- r := lx.next()
- if isWhitespace(r) {
- return lexSkip(lx, lexValue)
- }
- switch {
- case r == arrayStart:
- lx.ignore()
- lx.emit(itemArray)
- return lexArrayValue
- case r == stringStart:
- lx.ignore() // ignore the '"'
- return lexString
- case r == 't':
- return lexTrue
- case r == 'f':
- return lexFalse
- case r == '-':
- return lexNumberStart
- case isDigit(r):
- lx.backup() // avoid an extra state and use the same as above
- return lexNumberOrDateStart
- case r == '.': // special error case, be kind to users
- return lx.errorf("Floats must start with a digit, not '.'.")
- }
- return lx.errorf("Expected value but found '%s' instead.", r)
- }
- // lexArrayValue consumes one value in an array. It assumes that '[' or ','
- // have already been consumed. All whitespace and new lines are ignored.
- func lexArrayValue(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r) || isNL(r):
- return lexSkip(lx, lexArrayValue)
- case r == commentStart:
- lx.push(lexArrayValue)
- return lexCommentStart
- case r == arrayValTerm:
- return lx.errorf("Unexpected array value terminator '%s'.",
- arrayValTerm)
- case r == arrayEnd:
- return lexArrayEnd
- }
- lx.backup()
- lx.push(lexArrayValueEnd)
- return lexValue
- }
- // lexArrayValueEnd consumes the cruft between values of an array. Namely,
- // it ignores whitespace and expects either a ',' or a ']'.
- func lexArrayValueEnd(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isWhitespace(r) || isNL(r):
- return lexSkip(lx, lexArrayValueEnd)
- case r == commentStart:
- lx.push(lexArrayValueEnd)
- return lexCommentStart
- case r == arrayValTerm:
- return lexArrayValue // move on to the next value
- case r == arrayEnd:
- return lexArrayEnd
- }
- return lx.errorf("Expected an array value terminator '%s' or an array "+
- "terminator '%s', but got '%s' instead.", arrayValTerm, arrayEnd, r)
- }
- // lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
- // just been consumed.
- func lexArrayEnd(lx *lexer) stateFn {
- lx.ignore()
- lx.emit(itemArrayEnd)
- return lx.pop()
- }
- // lexString consumes the inner contents of a string. It assumes that the
- // beginning '"' has already been consumed and ignored.
- func lexString(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isNL(r):
- return lx.errorf("Strings cannot contain new lines.")
- case r == '\\':
- return lexStringEscape
- case r == stringEnd:
- lx.backup()
- lx.emit(itemString)
- lx.next()
- lx.ignore()
- return lx.pop()
- }
- return lexString
- }
- // lexStringEscape consumes an escaped character. It assumes that the preceding
- // '\\' has already been consumed.
- func lexStringEscape(lx *lexer) stateFn {
- r := lx.next()
- switch r {
- case 'b':
- fallthrough
- case 't':
- fallthrough
- case 'n':
- fallthrough
- case 'f':
- fallthrough
- case 'r':
- fallthrough
- case '"':
- fallthrough
- case '/':
- fallthrough
- case '\\':
- return lexString
- case 'u':
- return lexStringUnicode
- }
- return lx.errorf("Invalid escape character '%s'. Only the following "+
- "escape characters are allowed: "+
- "\\b, \\t, \\n, \\f, \\r, \\\", \\/, \\\\, and \\uXXXX.", r)
- }
- // lexStringBinary consumes two hexadecimal digits following '\x'. It assumes
- // that the '\x' has already been consumed.
- func lexStringUnicode(lx *lexer) stateFn {
- var r rune
- for i := 0; i < 4; i++ {
- r = lx.next()
- if !isHexadecimal(r) {
- return lx.errorf("Expected four hexadecimal digits after '\\x', "+
- "but got '%s' instead.", lx.current())
- }
- }
- return lexString
- }
- // lexNumberOrDateStart consumes either a (positive) integer, float or datetime.
- // It assumes that NO negative sign has been consumed.
- func lexNumberOrDateStart(lx *lexer) stateFn {
- r := lx.next()
- if !isDigit(r) {
- if r == '.' {
- return lx.errorf("Floats must start with a digit, not '.'.")
- } else {
- return lx.errorf("Expected a digit but got '%s'.", r)
- }
- }
- return lexNumberOrDate
- }
- // lexNumberOrDate consumes either a (positive) integer, float or datetime.
- func lexNumberOrDate(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case r == '-':
- if lx.pos-lx.start != 5 {
- return lx.errorf("All ISO8601 dates must be in full Zulu form.")
- }
- return lexDateAfterYear
- case isDigit(r):
- return lexNumberOrDate
- case r == '.':
- return lexFloatStart
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
- // It assumes that "YYYY-" has already been consumed.
- func lexDateAfterYear(lx *lexer) stateFn {
- formats := []rune{
- // digits are '0'.
- // everything else is direct equality.
- '0', '0', '-', '0', '0',
- 'T',
- '0', '0', ':', '0', '0', ':', '0', '0',
- 'Z',
- }
- for _, f := range formats {
- r := lx.next()
- if f == '0' {
- if !isDigit(r) {
- return lx.errorf("Expected digit in ISO8601 datetime, "+
- "but found '%s' instead.", r)
- }
- } else if f != r {
- return lx.errorf("Expected '%s' in ISO8601 datetime, "+
- "but found '%s' instead.", f, r)
- }
- }
- lx.emit(itemDatetime)
- return lx.pop()
- }
- // lexNumberStart consumes either an integer or a float. It assumes that a
- // negative sign has already been read, but that *no* digits have been consumed.
- // lexNumberStart will move to the appropriate integer or float states.
- func lexNumberStart(lx *lexer) stateFn {
- // we MUST see a digit. Even floats have to start with a digit.
- r := lx.next()
- if !isDigit(r) {
- if r == '.' {
- return lx.errorf("Floats must start with a digit, not '.'.")
- } else {
- return lx.errorf("Expected a digit but got '%s'.", r)
- }
- }
- return lexNumber
- }
- // lexNumber consumes an integer or a float after seeing the first digit.
- func lexNumber(lx *lexer) stateFn {
- r := lx.next()
- switch {
- case isDigit(r):
- return lexNumber
- case r == '.':
- return lexFloatStart
- }
- lx.backup()
- lx.emit(itemInteger)
- return lx.pop()
- }
- // lexFloatStart starts the consumption of digits of a float after a '.'.
- // Namely, at least one digit is required.
- func lexFloatStart(lx *lexer) stateFn {
- r := lx.next()
- if !isDigit(r) {
- return lx.errorf("Floats must have a digit after the '.', but got "+
- "'%s' instead.", r)
- }
- return lexFloat
- }
- // lexFloat consumes the digits of a float after a '.'.
- // Assumes that one digit has been consumed after a '.' already.
- func lexFloat(lx *lexer) stateFn {
- r := lx.next()
- if isDigit(r) {
- return lexFloat
- }
- lx.backup()
- lx.emit(itemFloat)
- return lx.pop()
- }
- // lexTrue consumes the "rue" in "true". It assumes that 't' has already
- // been consumed.
- func lexTrue(lx *lexer) stateFn {
- if r := lx.next(); r != 'r' {
- return lx.errorf("Expected 'tr', but found 't%s' instead.", r)
- }
- if r := lx.next(); r != 'u' {
- return lx.errorf("Expected 'tru', but found 'tr%s' instead.", r)
- }
- if r := lx.next(); r != 'e' {
- return lx.errorf("Expected 'true', but found 'tru%s' instead.", r)
- }
- lx.emit(itemBool)
- return lx.pop()
- }
- // lexFalse consumes the "alse" in "false". It assumes that 'f' has already
- // been consumed.
- func lexFalse(lx *lexer) stateFn {
- if r := lx.next(); r != 'a' {
- return lx.errorf("Expected 'fa', but found 'f%s' instead.", r)
- }
- if r := lx.next(); r != 'l' {
- return lx.errorf("Expected 'fal', but found 'fa%s' instead.", r)
- }
- if r := lx.next(); r != 's' {
- return lx.errorf("Expected 'fals', but found 'fal%s' instead.", r)
- }
- if r := lx.next(); r != 'e' {
- return lx.errorf("Expected 'false', but found 'fals%s' instead.", r)
- }
- lx.emit(itemBool)
- return lx.pop()
- }
- // lexCommentStart begins the lexing of a comment. It will emit
- // itemCommentStart and consume no characters, passing control to lexComment.
- func lexCommentStart(lx *lexer) stateFn {
- lx.ignore()
- lx.emit(itemCommentStart)
- return lexComment
- }
- // lexComment lexes an entire comment. It assumes that '#' has been consumed.
- // It will consume *up to* the first new line character, and pass control
- // back to the last state on the stack.
- func lexComment(lx *lexer) stateFn {
- r := lx.peek()
- if isNL(r) || r == eof {
- lx.emit(itemText)
- return lx.pop()
- }
- lx.next()
- return lexComment
- }
- // lexSkip ignores all slurped input and moves on to the next state.
- func lexSkip(lx *lexer, nextState stateFn) stateFn {
- return func(lx *lexer) stateFn {
- lx.ignore()
- return nextState
- }
- }
- // isWhitespace returns true if `r` is a whitespace character according
- // to the spec.
- func isWhitespace(r rune) bool {
- return r == '\t' || r == ' '
- }
- func isNL(r rune) bool {
- return r == '\n' || r == '\r'
- }
- func isDigit(r rune) bool {
- return r >= '0' && r <= '9'
- }
- func isHexadecimal(r rune) bool {
- return (r >= '0' && r <= '9') ||
- (r >= 'a' && r <= 'f') ||
- (r >= 'A' && r <= 'F')
- }
- func (itype itemType) String() string {
- switch itype {
- case itemError:
- return "Error"
- case itemNIL:
- return "NIL"
- case itemEOF:
- return "EOF"
- case itemText:
- return "Text"
- case itemString:
- return "String"
- case itemBool:
- return "Bool"
- case itemInteger:
- return "Integer"
- case itemFloat:
- return "Float"
- case itemDatetime:
- return "DateTime"
- case itemTableStart:
- return "TableStart"
- case itemTableEnd:
- return "TableEnd"
- case itemKeyStart:
- return "KeyStart"
- case itemArray:
- return "Array"
- case itemArrayEnd:
- return "ArrayEnd"
- case itemCommentStart:
- return "CommentStart"
- }
- panic(fmt.Sprintf("BUG: Unknown type '%s'.", itype))
- }
- func (item item) String() string {
- return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
- }
- func escapeSpecial(c rune) string {
- switch c {
- case '\n':
- return "\\n"
- }
- return string(c)
- }
|