src/dma/vendor/github.com/BurntSushi/toml/lex.go

   1 package toml
   2
   3 import (
   4         "fmt"
   5         "strings"
   6         "unicode"
   7         "unicode/utf8"
   8 )
   9
  10 type itemType int
  11
  12 const (
  13         itemError itemType = iota
  14         itemNIL            // used in the parser to indicate no type
  15         itemEOF
  16         itemText
  17         itemString
  18         itemRawString
  19         itemMultilineString
  20         itemRawMultilineString
  21         itemBool
  22         itemInteger
  23         itemFloat
  24         itemDatetime
  25         itemArray // the start of an array
  26         itemArrayEnd
  27         itemTableStart
  28         itemTableEnd
  29         itemArrayTableStart
  30         itemArrayTableEnd
  31         itemKeyStart
  32         itemCommentStart
  33         itemInlineTableStart
  34         itemInlineTableEnd
  35 )
  36
  37 const (
  38         eof              = 0
  39         comma            = ','
  40         tableStart       = '['
  41         tableEnd         = ']'
  42         arrayTableStart  = '['
  43         arrayTableEnd    = ']'
  44         tableSep         = '.'
  45         keySep           = '='
  46         arrayStart       = '['
  47         arrayEnd         = ']'
  48         commentStart     = '#'
  49         stringStart      = '"'
  50         stringEnd        = '"'
  51         rawStringStart   = '\''
  52         rawStringEnd     = '\''
  53         inlineTableStart = '{'
  54         inlineTableEnd   = '}'
  55 )
  56
  57 type stateFn func(lx *lexer) stateFn
  58
  59 type lexer struct {
  60         input string
  61         start int
  62         pos   int
  63         line  int
  64         state stateFn
  65         items chan item
  66
  67         // Allow for backing up up to three runes.
  68         // This is necessary because TOML contains 3-rune tokens (""" and ''').
  69         prevWidths [3]int
  70         nprev      int // how many of prevWidths are in use
  71         // If we emit an eof, we can still back up, but it is not OK to call
  72         // next again.
  73         atEOF bool
  74
  75         // A stack of state functions used to maintain context.
  76         // The idea is to reuse parts of the state machine in various places.
  77         // For example, values can appear at the top level or within arbitrarily
  78         // nested arrays. The last state on the stack is used after a value has
  79         // been lexed. Similarly for comments.
  80         stack []stateFn
  81 }
  82
  83 type item struct {
  84         typ  itemType
  85         val  string
  86         line int
  87 }
  88
  89 func (lx *lexer) nextItem() item {
  90         for {
  91                 select {
  92                 case item := <-lx.items:
  93                         return item
  94                 default:
  95                         lx.state = lx.state(lx)
  96                 }
  97         }
  98 }
  99
 100 func lex(input string) *lexer {
 101         lx := &lexer{
 102                 input: input,
 103                 state: lexTop,
 104                 line:  1,
 105                 items: make(chan item, 10),
 106                 stack: make([]stateFn, 0, 10),
 107         }
 108         return lx
 109 }
 110
 111 func (lx *lexer) push(state stateFn) {
 112         lx.stack = append(lx.stack, state)
 113 }
 114
 115 func (lx *lexer) pop() stateFn {
 116         if len(lx.stack) == 0 {
 117                 return lx.errorf("BUG in lexer: no states to pop")
 118         }
 119         last := lx.stack[len(lx.stack)-1]
 120         lx.stack = lx.stack[0 : len(lx.stack)-1]
 121         return last
 122 }
 123
 124 func (lx *lexer) current() string {
 125         return lx.input[lx.start:lx.pos]
 126 }
 127
 128 func (lx *lexer) emit(typ itemType) {
 129         lx.items <- item{typ, lx.current(), lx.line}
 130         lx.start = lx.pos
 131 }
 132
 133 func (lx *lexer) emitTrim(typ itemType) {
 134         lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
 135         lx.start = lx.pos
 136 }
 137
 138 func (lx *lexer) next() (r rune) {
 139         if lx.atEOF {
 140                 panic("next called after EOF")
 141         }
 142         if lx.pos >= len(lx.input) {
 143                 lx.atEOF = true
 144                 return eof
 145         }
 146
 147         if lx.input[lx.pos] == '\n' {
 148                 lx.line++
 149         }
 150         lx.prevWidths[2] = lx.prevWidths[1]
 151         lx.prevWidths[1] = lx.prevWidths[0]
 152         if lx.nprev < 3 {
 153                 lx.nprev++
 154         }
 155         r, w := utf8.DecodeRuneInString(lx.input[lx.pos:])
 156         lx.prevWidths[0] = w
 157         lx.pos += w
 158         return r
 159 }
 160
 161 // ignore skips over the pending input before this point.
 162 func (lx *lexer) ignore() {
 163         lx.start = lx.pos
 164 }
 165
 166 // backup steps back one rune. Can be called only twice between calls to next.
 167 func (lx *lexer) backup() {
 168         if lx.atEOF {
 169                 lx.atEOF = false
 170                 return
 171         }
 172         if lx.nprev < 1 {
 173                 panic("backed up too far")
 174         }
 175         w := lx.prevWidths[0]
 176         lx.prevWidths[0] = lx.prevWidths[1]
 177         lx.prevWidths[1] = lx.prevWidths[2]
 178         lx.nprev--
 179         lx.pos -= w
 180         if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
 181                 lx.line--
 182         }
 183 }
 184
 185 // accept consumes the next rune if it's equal to `valid`.
 186 func (lx *lexer) accept(valid rune) bool {
 187         if lx.next() == valid {
 188                 return true
 189         }
 190         lx.backup()
 191         return false
 192 }
 193
 194 // peek returns but does not consume the next rune in the input.
 195 func (lx *lexer) peek() rune {
 196         r := lx.next()
 197         lx.backup()
 198         return r
 199 }
 200
 201 // skip ignores all input that matches the given predicate.
 202 func (lx *lexer) skip(pred func(rune) bool) {
 203         for {
 204                 r := lx.next()
 205                 if pred(r) {
 206                         continue
 207                 }
 208                 lx.backup()
 209                 lx.ignore()
 210                 return
 211         }
 212 }
 213
 214 // errorf stops all lexing by emitting an error and returning `nil`.
 215 // Note that any value that is a character is escaped if it's a special
 216 // character (newlines, tabs, etc.).
 217 func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
 218         lx.items <- item{
 219                 itemError,
 220                 fmt.Sprintf(format, values...),
 221                 lx.line,
 222         }
 223         return nil
 224 }
 225
 226 // lexTop consumes elements at the top level of TOML data.
 227 func lexTop(lx *lexer) stateFn {
 228         r := lx.next()
 229         if isWhitespace(r) || isNL(r) {
 230                 return lexSkip(lx, lexTop)
 231         }
 232         switch r {
 233         case commentStart:
 234                 lx.push(lexTop)
 235                 return lexCommentStart
 236         case tableStart:
 237                 return lexTableStart
 238         case eof:
 239                 if lx.pos > lx.start {
 240                         return lx.errorf("unexpected EOF")
 241                 }
 242                 lx.emit(itemEOF)
 243                 return nil
 244         }
 245
 246         // At this point, the only valid item can be a key, so we back up
 247         // and let the key lexer do the rest.
 248         lx.backup()
 249         lx.push(lexTopEnd)
 250         return lexKeyStart
 251 }
 252
 253 // lexTopEnd is entered whenever a top-level item has been consumed. (A value
 254 // or a table.) It must see only whitespace, and will turn back to lexTop
 255 // upon a newline. If it sees EOF, it will quit the lexer successfully.
 256 func lexTopEnd(lx *lexer) stateFn {
 257         r := lx.next()
 258         switch {
 259         case r == commentStart:
 260                 // a comment will read to a newline for us.
 261                 lx.push(lexTop)
 262                 return lexCommentStart
 263         case isWhitespace(r):
 264                 return lexTopEnd
 265         case isNL(r):
 266                 lx.ignore()
 267                 return lexTop
 268         case r == eof:
 269                 lx.emit(itemEOF)
 270                 return nil
 271         }
 272         return lx.errorf("expected a top-level item to end with a newline, "+
 273                 "comment, or EOF, but got %q instead", r)
 274 }
 275
 276 // lexTable lexes the beginning of a table. Namely, it makes sure that
 277 // it starts with a character other than '.' and ']'.
 278 // It assumes that '[' has already been consumed.
 279 // It also handles the case that this is an item in an array of tables.
 280 // e.g., '[[name]]'.
 281 func lexTableStart(lx *lexer) stateFn {
 282         if lx.peek() == arrayTableStart {
 283                 lx.next()
 284                 lx.emit(itemArrayTableStart)
 285                 lx.push(lexArrayTableEnd)
 286         } else {
 287                 lx.emit(itemTableStart)
 288                 lx.push(lexTableEnd)
 289         }
 290         return lexTableNameStart
 291 }
 292
 293 func lexTableEnd(lx *lexer) stateFn {
 294         lx.emit(itemTableEnd)
 295         return lexTopEnd
 296 }
 297
 298 func lexArrayTableEnd(lx *lexer) stateFn {
 299         if r := lx.next(); r != arrayTableEnd {
 300                 return lx.errorf("expected end of table array name delimiter %q, "+
 301                         "but got %q instead", arrayTableEnd, r)
 302         }
 303         lx.emit(itemArrayTableEnd)
 304         return lexTopEnd
 305 }
 306
 307 func lexTableNameStart(lx *lexer) stateFn {
 308         lx.skip(isWhitespace)
 309         switch r := lx.peek(); {
 310         case r == tableEnd || r == eof:
 311                 return lx.errorf("unexpected end of table name " +
 312                         "(table names cannot be empty)")
 313         case r == tableSep:
 314                 return lx.errorf("unexpected table separator " +
 315                         "(table names cannot be empty)")
 316         case r == stringStart || r == rawStringStart:
 317                 lx.ignore()
 318                 lx.push(lexTableNameEnd)
 319                 return lexValue // reuse string lexing
 320         default:
 321                 return lexBareTableName
 322         }
 323 }
 324
 325 // lexBareTableName lexes the name of a table. It assumes that at least one
 326 // valid character for the table has already been read.
 327 func lexBareTableName(lx *lexer) stateFn {
 328         r := lx.next()
 329         if isBareKeyChar(r) {
 330                 return lexBareTableName
 331         }
 332         lx.backup()
 333         lx.emit(itemText)
 334         return lexTableNameEnd
 335 }
 336
 337 // lexTableNameEnd reads the end of a piece of a table name, optionally
 338 // consuming whitespace.
 339 func lexTableNameEnd(lx *lexer) stateFn {
 340         lx.skip(isWhitespace)
 341         switch r := lx.next(); {
 342         case isWhitespace(r):
 343                 return lexTableNameEnd
 344         case r == tableSep:
 345                 lx.ignore()
 346                 return lexTableNameStart
 347         case r == tableEnd:
 348                 return lx.pop()
 349         default:
 350                 return lx.errorf("expected '.' or ']' to end table name, "+
 351                         "but got %q instead", r)
 352         }
 353 }
 354
 355 // lexKeyStart consumes a key name up until the first non-whitespace character.
 356 // lexKeyStart will ignore whitespace.
 357 func lexKeyStart(lx *lexer) stateFn {
 358         r := lx.peek()
 359         switch {
 360         case r == keySep:
 361                 return lx.errorf("unexpected key separator %q", keySep)
 362         case isWhitespace(r) || isNL(r):
 363                 lx.next()
 364                 return lexSkip(lx, lexKeyStart)
 365         case r == stringStart || r == rawStringStart:
 366                 lx.ignore()
 367                 lx.emit(itemKeyStart)
 368                 lx.push(lexKeyEnd)
 369                 return lexValue // reuse string lexing
 370         default:
 371                 lx.ignore()
 372                 lx.emit(itemKeyStart)
 373                 return lexBareKey
 374         }
 375 }
 376
 377 // lexBareKey consumes the text of a bare key. Assumes that the first character
 378 // (which is not whitespace) has not yet been consumed.
 379 func lexBareKey(lx *lexer) stateFn {
 380         switch r := lx.next(); {
 381         case isBareKeyChar(r):
 382                 return lexBareKey
 383         case isWhitespace(r):
 384                 lx.backup()
 385                 lx.emit(itemText)
 386                 return lexKeyEnd
 387         case r == keySep:
 388                 lx.backup()
 389                 lx.emit(itemText)
 390                 return lexKeyEnd
 391         default:
 392                 return lx.errorf("bare keys cannot contain %q", r)
 393         }
 394 }
 395
 396 // lexKeyEnd consumes the end of a key and trims whitespace (up to the key
 397 // separator).
 398 func lexKeyEnd(lx *lexer) stateFn {
 399         switch r := lx.next(); {
 400         case r == keySep:
 401                 return lexSkip(lx, lexValue)
 402         case isWhitespace(r):
 403                 return lexSkip(lx, lexKeyEnd)
 404         default:
 405                 return lx.errorf("expected key separator %q, but got %q instead",
 406                         keySep, r)
 407         }
 408 }
 409
 410 // lexValue starts the consumption of a value anywhere a value is expected.
 411 // lexValue will ignore whitespace.
 412 // After a value is lexed, the last state on the next is popped and returned.
 413 func lexValue(lx *lexer) stateFn {
 414         // We allow whitespace to precede a value, but NOT newlines.
 415         // In array syntax, the array states are responsible for ignoring newlines.
 416         r := lx.next()
 417         switch {
 418         case isWhitespace(r):
 419                 return lexSkip(lx, lexValue)
 420         case isDigit(r):
 421                 lx.backup() // avoid an extra state and use the same as above
 422                 return lexNumberOrDateStart
 423         }
 424         switch r {
 425         case arrayStart:
 426                 lx.ignore()
 427                 lx.emit(itemArray)
 428                 return lexArrayValue
 429         case inlineTableStart:
 430                 lx.ignore()
 431                 lx.emit(itemInlineTableStart)
 432                 return lexInlineTableValue
 433         case stringStart:
 434                 if lx.accept(stringStart) {
 435                         if lx.accept(stringStart) {
 436                                 lx.ignore() // Ignore """
 437                                 return lexMultilineString
 438                         }
 439                         lx.backup()
 440                 }
 441                 lx.ignore() // ignore the '"'
 442                 return lexString
 443         case rawStringStart:
 444                 if lx.accept(rawStringStart) {
 445                         if lx.accept(rawStringStart) {
 446                                 lx.ignore() // Ignore """
 447                                 return lexMultilineRawString
 448                         }
 449                         lx.backup()
 450                 }
 451                 lx.ignore() // ignore the "'"
 452                 return lexRawString
 453         case '+', '-':
 454                 return lexNumberStart
 455         case '.': // special error case, be kind to users
 456                 return lx.errorf("floats must start with a digit, not '.'")
 457         }
 458         if unicode.IsLetter(r) {
 459                 // Be permissive here; lexBool will give a nice error if the
 460                 // user wrote something like
 461                 //   x = foo
 462                 // (i.e. not 'true' or 'false' but is something else word-like.)
 463                 lx.backup()
 464                 return lexBool
 465         }
 466         return lx.errorf("expected value but found %q instead", r)
 467 }
 468
 469 // lexArrayValue consumes one value in an array. It assumes that '[' or ','
 470 // have already been consumed. All whitespace and newlines are ignored.
 471 func lexArrayValue(lx *lexer) stateFn {
 472         r := lx.next()
 473         switch {
 474         case isWhitespace(r) || isNL(r):
 475                 return lexSkip(lx, lexArrayValue)
 476         case r == commentStart:
 477                 lx.push(lexArrayValue)
 478                 return lexCommentStart
 479         case r == comma:
 480                 return lx.errorf("unexpected comma")
 481         case r == arrayEnd:
 482                 // NOTE(caleb): The spec isn't clear about whether you can have
 483                 // a trailing comma or not, so we'll allow it.
 484                 return lexArrayEnd
 485         }
 486
 487         lx.backup()
 488         lx.push(lexArrayValueEnd)
 489         return lexValue
 490 }
 491
 492 // lexArrayValueEnd consumes everything between the end of an array value and
 493 // the next value (or the end of the array): it ignores whitespace and newlines
 494 // and expects either a ',' or a ']'.
 495 func lexArrayValueEnd(lx *lexer) stateFn {
 496         r := lx.next()
 497         switch {
 498         case isWhitespace(r) || isNL(r):
 499                 return lexSkip(lx, lexArrayValueEnd)
 500         case r == commentStart:
 501                 lx.push(lexArrayValueEnd)
 502                 return lexCommentStart
 503         case r == comma:
 504                 lx.ignore()
 505                 return lexArrayValue // move on to the next value
 506         case r == arrayEnd:
 507                 return lexArrayEnd
 508         }
 509         return lx.errorf(
 510                 "expected a comma or array terminator %q, but got %q instead",
 511                 arrayEnd, r,
 512         )
 513 }
 514
 515 // lexArrayEnd finishes the lexing of an array.
 516 // It assumes that a ']' has just been consumed.
 517 func lexArrayEnd(lx *lexer) stateFn {
 518         lx.ignore()
 519         lx.emit(itemArrayEnd)
 520         return lx.pop()
 521 }
 522
 523 // lexInlineTableValue consumes one key/value pair in an inline table.
 524 // It assumes that '{' or ',' have already been consumed. Whitespace is ignored.
 525 func lexInlineTableValue(lx *lexer) stateFn {
 526         r := lx.next()
 527         switch {
 528         case isWhitespace(r):
 529                 return lexSkip(lx, lexInlineTableValue)
 530         case isNL(r):
 531                 return lx.errorf("newlines not allowed within inline tables")
 532         case r == commentStart:
 533                 lx.push(lexInlineTableValue)
 534                 return lexCommentStart
 535         case r == comma:
 536                 return lx.errorf("unexpected comma")
 537         case r == inlineTableEnd:
 538                 return lexInlineTableEnd
 539         }
 540         lx.backup()
 541         lx.push(lexInlineTableValueEnd)
 542         return lexKeyStart
 543 }
 544
 545 // lexInlineTableValueEnd consumes everything between the end of an inline table
 546 // key/value pair and the next pair (or the end of the table):
 547 // it ignores whitespace and expects either a ',' or a '}'.
 548 func lexInlineTableValueEnd(lx *lexer) stateFn {
 549         r := lx.next()
 550         switch {
 551         case isWhitespace(r):
 552                 return lexSkip(lx, lexInlineTableValueEnd)
 553         case isNL(r):
 554                 return lx.errorf("newlines not allowed within inline tables")
 555         case r == commentStart:
 556                 lx.push(lexInlineTableValueEnd)
 557                 return lexCommentStart
 558         case r == comma:
 559                 lx.ignore()
 560                 return lexInlineTableValue
 561         case r == inlineTableEnd:
 562                 return lexInlineTableEnd
 563         }
 564         return lx.errorf("expected a comma or an inline table terminator %q, "+
 565                 "but got %q instead", inlineTableEnd, r)
 566 }
 567
 568 // lexInlineTableEnd finishes the lexing of an inline table.
 569 // It assumes that a '}' has just been consumed.
 570 func lexInlineTableEnd(lx *lexer) stateFn {
 571         lx.ignore()
 572         lx.emit(itemInlineTableEnd)
 573         return lx.pop()
 574 }
 575
 576 // lexString consumes the inner contents of a string. It assumes that the
 577 // beginning '"' has already been consumed and ignored.
 578 func lexString(lx *lexer) stateFn {
 579         r := lx.next()
 580         switch {
 581         case r == eof:
 582                 return lx.errorf("unexpected EOF")
 583         case isNL(r):
 584                 return lx.errorf("strings cannot contain newlines")
 585         case r == '\\':
 586                 lx.push(lexString)
 587                 return lexStringEscape
 588         case r == stringEnd:
 589                 lx.backup()
 590                 lx.emit(itemString)
 591                 lx.next()
 592                 lx.ignore()
 593                 return lx.pop()
 594         }
 595         return lexString
 596 }
 597
 598 // lexMultilineString consumes the inner contents of a string. It assumes that
 599 // the beginning '"""' has already been consumed and ignored.
 600 func lexMultilineString(lx *lexer) stateFn {
 601         switch lx.next() {
 602         case eof:
 603                 return lx.errorf("unexpected EOF")
 604         case '\\':
 605                 return lexMultilineStringEscape
 606         case stringEnd:
 607                 if lx.accept(stringEnd) {
 608                         if lx.accept(stringEnd) {
 609                                 lx.backup()
 610                                 lx.backup()
 611                                 lx.backup()
 612                                 lx.emit(itemMultilineString)
 613                                 lx.next()
 614                                 lx.next()
 615                                 lx.next()
 616                                 lx.ignore()
 617                                 return lx.pop()
 618                         }
 619                         lx.backup()
 620                 }
 621         }
 622         return lexMultilineString
 623 }
 624
 625 // lexRawString consumes a raw string. Nothing can be escaped in such a string.
 626 // It assumes that the beginning "'" has already been consumed and ignored.
 627 func lexRawString(lx *lexer) stateFn {
 628         r := lx.next()
 629         switch {
 630         case r == eof:
 631                 return lx.errorf("unexpected EOF")
 632         case isNL(r):
 633                 return lx.errorf("strings cannot contain newlines")
 634         case r == rawStringEnd:
 635                 lx.backup()
 636                 lx.emit(itemRawString)
 637                 lx.next()
 638                 lx.ignore()
 639                 return lx.pop()
 640         }
 641         return lexRawString
 642 }
 643
 644 // lexMultilineRawString consumes a raw string. Nothing can be escaped in such
 645 // a string. It assumes that the beginning "'''" has already been consumed and
 646 // ignored.
 647 func lexMultilineRawString(lx *lexer) stateFn {
 648         switch lx.next() {
 649         case eof:
 650                 return lx.errorf("unexpected EOF")
 651         case rawStringEnd:
 652                 if lx.accept(rawStringEnd) {
 653                         if lx.accept(rawStringEnd) {
 654                                 lx.backup()
 655                                 lx.backup()
 656                                 lx.backup()
 657                                 lx.emit(itemRawMultilineString)
 658                                 lx.next()
 659                                 lx.next()
 660                                 lx.next()
 661                                 lx.ignore()
 662                                 return lx.pop()
 663                         }
 664                         lx.backup()
 665                 }
 666         }
 667         return lexMultilineRawString
 668 }
 669
 670 // lexMultilineStringEscape consumes an escaped character. It assumes that the
 671 // preceding '\\' has already been consumed.
 672 func lexMultilineStringEscape(lx *lexer) stateFn {
 673         // Handle the special case first:
 674         if isNL(lx.next()) {
 675                 return lexMultilineString
 676         }
 677         lx.backup()
 678         lx.push(lexMultilineString)
 679         return lexStringEscape(lx)
 680 }
 681
 682 func lexStringEscape(lx *lexer) stateFn {
 683         r := lx.next()
 684         switch r {
 685         case 'b':
 686                 fallthrough
 687         case 't':
 688                 fallthrough
 689         case 'n':
 690                 fallthrough
 691         case 'f':
 692                 fallthrough
 693         case 'r':
 694                 fallthrough
 695         case '"':
 696                 fallthrough
 697         case '\\':
 698                 return lx.pop()
 699         case 'u':
 700                 return lexShortUnicodeEscape
 701         case 'U':
 702                 return lexLongUnicodeEscape
 703         }
 704         return lx.errorf("invalid escape character %q; only the following "+
 705                 "escape characters are allowed: "+
 706                 `\b, \t, \n, \f, \r, \", \\, \uXXXX, and \UXXXXXXXX`, r)
 707 }
 708
 709 func lexShortUnicodeEscape(lx *lexer) stateFn {
 710         var r rune
 711         for i := 0; i < 4; i++ {
 712                 r = lx.next()
 713                 if !isHexadecimal(r) {
 714                         return lx.errorf(`expected four hexadecimal digits after '\u', `+
 715                                 "but got %q instead", lx.current())
 716                 }
 717         }
 718         return lx.pop()
 719 }
 720
 721 func lexLongUnicodeEscape(lx *lexer) stateFn {
 722         var r rune
 723         for i := 0; i < 8; i++ {
 724                 r = lx.next()
 725                 if !isHexadecimal(r) {
 726                         return lx.errorf(`expected eight hexadecimal digits after '\U', `+
 727                                 "but got %q instead", lx.current())
 728                 }
 729         }
 730         return lx.pop()
 731 }
 732
 733 // lexNumberOrDateStart consumes either an integer, a float, or datetime.
 734 func lexNumberOrDateStart(lx *lexer) stateFn {
 735         r := lx.next()
 736         if isDigit(r) {
 737                 return lexNumberOrDate
 738         }
 739         switch r {
 740         case '_':
 741                 return lexNumber
 742         case 'e', 'E':
 743                 return lexFloat
 744         case '.':
 745                 return lx.errorf("floats must start with a digit, not '.'")
 746         }
 747         return lx.errorf("expected a digit but got %q", r)
 748 }
 749
 750 // lexNumberOrDate consumes either an integer, float or datetime.
 751 func lexNumberOrDate(lx *lexer) stateFn {
 752         r := lx.next()
 753         if isDigit(r) {
 754                 return lexNumberOrDate
 755         }
 756         switch r {
 757         case '-':
 758                 return lexDatetime
 759         case '_':
 760                 return lexNumber
 761         case '.', 'e', 'E':
 762                 return lexFloat
 763         }
 764
 765         lx.backup()
 766         lx.emit(itemInteger)
 767         return lx.pop()
 768 }
 769
 770 // lexDatetime consumes a Datetime, to a first approximation.
 771 // The parser validates that it matches one of the accepted formats.
 772 func lexDatetime(lx *lexer) stateFn {
 773         r := lx.next()
 774         if isDigit(r) {
 775                 return lexDatetime
 776         }
 777         switch r {
 778         case '-', 'T', ':', '.', 'Z', '+':
 779                 return lexDatetime
 780         }
 781
 782         lx.backup()
 783         lx.emit(itemDatetime)
 784         return lx.pop()
 785 }
 786
 787 // lexNumberStart consumes either an integer or a float. It assumes that a sign
 788 // has already been read, but that *no* digits have been consumed.
 789 // lexNumberStart will move to the appropriate integer or float states.
 790 func lexNumberStart(lx *lexer) stateFn {
 791         // We MUST see a digit. Even floats have to start with a digit.
 792         r := lx.next()
 793         if !isDigit(r) {
 794                 if r == '.' {
 795                         return lx.errorf("floats must start with a digit, not '.'")
 796                 }
 797                 return lx.errorf("expected a digit but got %q", r)
 798         }
 799         return lexNumber
 800 }
 801
 802 // lexNumber consumes an integer or a float after seeing the first digit.
 803 func lexNumber(lx *lexer) stateFn {
 804         r := lx.next()
 805         if isDigit(r) {
 806                 return lexNumber
 807         }
 808         switch r {
 809         case '_':
 810                 return lexNumber
 811         case '.', 'e', 'E':
 812                 return lexFloat
 813         }
 814
 815         lx.backup()
 816         lx.emit(itemInteger)
 817         return lx.pop()
 818 }
 819
 820 // lexFloat consumes the elements of a float. It allows any sequence of
 821 // float-like characters, so floats emitted by the lexer are only a first
 822 // approximation and must be validated by the parser.
 823 func lexFloat(lx *lexer) stateFn {
 824         r := lx.next()
 825         if isDigit(r) {
 826                 return lexFloat
 827         }
 828         switch r {
 829         case '_', '.', '-', '+', 'e', 'E':
 830                 return lexFloat
 831         }
 832
 833         lx.backup()
 834         lx.emit(itemFloat)
 835         return lx.pop()
 836 }
 837
 838 // lexBool consumes a bool string: 'true' or 'false.
 839 func lexBool(lx *lexer) stateFn {
 840         var rs []rune
 841         for {
 842                 r := lx.next()
 843                 if !unicode.IsLetter(r) {
 844                         lx.backup()
 845                         break
 846                 }
 847                 rs = append(rs, r)
 848         }
 849         s := string(rs)
 850         switch s {
 851         case "true", "false":
 852                 lx.emit(itemBool)
 853                 return lx.pop()
 854         }
 855         return lx.errorf("expected value but found %q instead", s)
 856 }
 857
 858 // lexCommentStart begins the lexing of a comment. It will emit
 859 // itemCommentStart and consume no characters, passing control to lexComment.
 860 func lexCommentStart(lx *lexer) stateFn {
 861         lx.ignore()
 862         lx.emit(itemCommentStart)
 863         return lexComment
 864 }
 865
 866 // lexComment lexes an entire comment. It assumes that '#' has been consumed.
 867 // It will consume *up to* the first newline character, and pass control
 868 // back to the last state on the stack.
 869 func lexComment(lx *lexer) stateFn {
 870         r := lx.peek()
 871         if isNL(r) || r == eof {
 872                 lx.emit(itemText)
 873                 return lx.pop()
 874         }
 875         lx.next()
 876         return lexComment
 877 }
 878
 879 // lexSkip ignores all slurped input and moves on to the next state.
 880 func lexSkip(lx *lexer, nextState stateFn) stateFn {
 881         return func(lx *lexer) stateFn {
 882                 lx.ignore()
 883                 return nextState
 884         }
 885 }
 886
 887 // isWhitespace returns true if `r` is a whitespace character according
 888 // to the spec.
 889 func isWhitespace(r rune) bool {
 890         return r == '\t' || r == ' '
 891 }
 892
 893 func isNL(r rune) bool {
 894         return r == '\n' || r == '\r'
 895 }
 896
 897 func isDigit(r rune) bool {
 898         return r >= '0' && r <= '9'
 899 }
 900
 901 func isHexadecimal(r rune) bool {
 902         return (r >= '0' && r <= '9') ||
 903                 (r >= 'a' && r <= 'f') ||
 904                 (r >= 'A' && r <= 'F')
 905 }
 906
 907 func isBareKeyChar(r rune) bool {
 908         return (r >= 'A' && r <= 'Z') ||
 909                 (r >= 'a' && r <= 'z') ||
 910                 (r >= '0' && r <= '9') ||
 911                 r == '_' ||
 912                 r == '-'
 913 }
 914
 915 func (itype itemType) String() string {
 916         switch itype {
 917         case itemError:
 918                 return "Error"
 919         case itemNIL:
 920                 return "NIL"
 921         case itemEOF:
 922                 return "EOF"
 923         case itemText:
 924                 return "Text"
 925         case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
 926                 return "String"
 927         case itemBool:
 928                 return "Bool"
 929         case itemInteger:
 930                 return "Integer"
 931         case itemFloat:
 932                 return "Float"
 933         case itemDatetime:
 934                 return "DateTime"
 935         case itemTableStart:
 936                 return "TableStart"
 937         case itemTableEnd:
 938                 return "TableEnd"
 939         case itemKeyStart:
 940                 return "KeyStart"
 941         case itemArray:
 942                 return "Array"
 943         case itemArrayEnd:
 944                 return "ArrayEnd"
 945         case itemCommentStart:
 946                 return "CommentStart"
 947         }
 948         panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
 949 }
 950
 951 func (item item) String() string {
 952         return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
 953 }