From 921818bca208f0c70e85ec670074cb3905cbbc82 Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Sat, 27 Aug 2016 01:32:30 +0100 Subject: Update dependencies --- vendor/github.com/pelletier/go-toml/lexer.go | 655 +++++++++++++++++++++++++++ 1 file changed, 655 insertions(+) create mode 100644 vendor/github.com/pelletier/go-toml/lexer.go (limited to 'vendor/github.com/pelletier/go-toml/lexer.go') diff --git a/vendor/github.com/pelletier/go-toml/lexer.go b/vendor/github.com/pelletier/go-toml/lexer.go new file mode 100644 index 0000000..eb4d999 --- /dev/null +++ b/vendor/github.com/pelletier/go-toml/lexer.go @@ -0,0 +1,655 @@ +// TOML lexer. +// +// Written using the principles developed by Rob Pike in +// http://www.youtube.com/watch?v=HxaD_trXwRE + +package toml + +import ( + "errors" + "fmt" + "io" + "regexp" + "strconv" + "strings" + + "github.com/pelletier/go-buffruneio" +) + +var dateRegexp *regexp.Regexp + +// Define state functions +type tomlLexStateFn func() tomlLexStateFn + +// Define lexer +type tomlLexer struct { + input *buffruneio.Reader // Textual source + buffer []rune // Runes composing the current token + tokens chan token + depth int + line int + col int + endbufferLine int + endbufferCol int +} + +// Basic read operations on input + +func (l *tomlLexer) read() rune { + r, err := l.input.ReadRune() + if err != nil { + panic(err) + } + if r == '\n' { + l.endbufferLine++ + l.endbufferCol = 1 + } else { + l.endbufferCol++ + } + return r +} + +func (l *tomlLexer) next() rune { + r := l.read() + + if r != eof { + l.buffer = append(l.buffer, r) + } + return r +} + +func (l *tomlLexer) ignore() { + l.buffer = make([]rune, 0) + l.line = l.endbufferLine + l.col = l.endbufferCol +} + +func (l *tomlLexer) skip() { + l.next() + l.ignore() +} + +func (l *tomlLexer) fastForward(n int) { + for i := 0; i < n; i++ { + l.next() + } +} + +func (l *tomlLexer) emitWithValue(t tokenType, value string) { + l.tokens <- token{ + Position: Position{l.line, l.col}, + typ: t, + val: value, + } + l.ignore() +} + +func (l *tomlLexer) emit(t tokenType) { + l.emitWithValue(t, string(l.buffer)) +} + +func (l *tomlLexer) peek() rune { + r, err := l.input.ReadRune() + if err != nil { + panic(err) + } + l.input.UnreadRune() + return r +} + +func (l *tomlLexer) follow(next string) bool { + for _, expectedRune := range next { + r, err := l.input.ReadRune() + defer l.input.UnreadRune() + if err != nil { + panic(err) + } + if expectedRune != r { + return false + } + } + return true +} + +// Error management + +func (l *tomlLexer) errorf(format string, args ...interface{}) tomlLexStateFn { + l.tokens <- token{ + Position: Position{l.line, l.col}, + typ: tokenError, + val: fmt.Sprintf(format, args...), + } + return nil +} + +// State functions + +func (l *tomlLexer) lexVoid() tomlLexStateFn { + for { + next := l.peek() + switch next { + case '[': + return l.lexKeyGroup + case '#': + return l.lexComment + case '=': + return l.lexEqual + case '\r': + fallthrough + case '\n': + l.skip() + continue + } + + if isSpace(next) { + l.skip() + } + + if l.depth > 0 { + return l.lexRvalue + } + + if isKeyStartChar(next) { + return l.lexKey + } + + if next == eof { + l.next() + break + } + } + + l.emit(tokenEOF) + return nil +} + +func (l *tomlLexer) lexRvalue() tomlLexStateFn { + for { + next := l.peek() + switch next { + case '.': + return l.errorf("cannot start float with a dot") + case '=': + return l.lexEqual + case '[': + l.depth++ + return l.lexLeftBracket + case ']': + l.depth-- + return l.lexRightBracket + case '{': + return l.lexLeftCurlyBrace + case '}': + return l.lexRightCurlyBrace + case '#': + return l.lexComment + case '"': + return l.lexString + case '\'': + return l.lexLiteralString + case ',': + return l.lexComma + case '\r': + fallthrough + case '\n': + l.skip() + if l.depth == 0 { + return l.lexVoid + } + return l.lexRvalue + case '_': + return l.errorf("cannot start number with underscore") + } + + if l.follow("true") { + return l.lexTrue + } + + if l.follow("false") { + return l.lexFalse + } + + if isSpace(next) { + l.skip() + continue + } + + if next == eof { + l.next() + break + } + + possibleDate := string(l.input.Peek(35)) + dateMatch := dateRegexp.FindString(possibleDate) + if dateMatch != "" { + l.fastForward(len(dateMatch)) + return l.lexDate + } + + if next == '+' || next == '-' || isDigit(next) { + return l.lexNumber + } + + if isAlphanumeric(next) { + return l.lexKey + } + + return l.errorf("no value can start with %c", next) + } + + l.emit(tokenEOF) + return nil +} + +func (l *tomlLexer) lexLeftCurlyBrace() tomlLexStateFn { + l.next() + l.emit(tokenLeftCurlyBrace) + return l.lexRvalue +} + +func (l *tomlLexer) lexRightCurlyBrace() tomlLexStateFn { + l.next() + l.emit(tokenRightCurlyBrace) + return l.lexRvalue +} + +func (l *tomlLexer) lexDate() tomlLexStateFn { + l.emit(tokenDate) + return l.lexRvalue +} + +func (l *tomlLexer) lexTrue() tomlLexStateFn { + l.fastForward(4) + l.emit(tokenTrue) + return l.lexRvalue +} + +func (l *tomlLexer) lexFalse() tomlLexStateFn { + l.fastForward(5) + l.emit(tokenFalse) + return l.lexRvalue +} + +func (l *tomlLexer) lexEqual() tomlLexStateFn { + l.next() + l.emit(tokenEqual) + return l.lexRvalue +} + +func (l *tomlLexer) lexComma() tomlLexStateFn { + l.next() + l.emit(tokenComma) + return l.lexRvalue +} + +func (l *tomlLexer) lexKey() tomlLexStateFn { + growingString := "" + + for r := l.peek(); isKeyChar(r) || r == '\n' || r == '\r'; r = l.peek() { + if r == '"' { + l.next() + str, err := l.lexStringAsString(`"`, false, true) + if err != nil { + return l.errorf(err.Error()) + } + growingString += `"` + str + `"` + l.next() + continue + } else if r == '\n' { + return l.errorf("keys cannot contain new lines") + } else if isSpace(r) { + break + } else if !isValidBareChar(r) { + return l.errorf("keys cannot contain %c character", r) + } + growingString += string(r) + l.next() + } + l.emitWithValue(tokenKey, growingString) + return l.lexVoid +} + +func (l *tomlLexer) lexComment() tomlLexStateFn { + for next := l.peek(); next != '\n' && next != eof; next = l.peek() { + if next == '\r' && l.follow("\r\n") { + break + } + l.next() + } + l.ignore() + return l.lexVoid +} + +func (l *tomlLexer) lexLeftBracket() tomlLexStateFn { + l.next() + l.emit(tokenLeftBracket) + return l.lexRvalue +} + +func (l *tomlLexer) lexLiteralStringAsString(terminator string, discardLeadingNewLine bool) (string, error) { + growingString := "" + + if discardLeadingNewLine { + if l.follow("\r\n") { + l.skip() + l.skip() + } else if l.peek() == '\n' { + l.skip() + } + } + + // find end of string + for { + if l.follow(terminator) { + return growingString, nil + } + + next := l.peek() + if next == eof { + break + } + growingString += string(l.next()) + } + + return "", errors.New("unclosed string") +} + +func (l *tomlLexer) lexLiteralString() tomlLexStateFn { + l.skip() + + // handle special case for triple-quote + terminator := "'" + discardLeadingNewLine := false + if l.follow("''") { + l.skip() + l.skip() + terminator = "'''" + discardLeadingNewLine = true + } + + str, err := l.lexLiteralStringAsString(terminator, discardLeadingNewLine) + if err != nil { + return l.errorf(err.Error()) + } + + l.emitWithValue(tokenString, str) + l.fastForward(len(terminator)) + l.ignore() + return l.lexRvalue +} + +// Lex a string and return the results as a string. +// Terminator is the substring indicating the end of the token. +// The resulting string does not include the terminator. +func (l *tomlLexer) lexStringAsString(terminator string, discardLeadingNewLine, acceptNewLines bool) (string, error) { + growingString := "" + + if discardLeadingNewLine { + if l.follow("\r\n") { + l.skip() + l.skip() + } else if l.peek() == '\n' { + l.skip() + } + } + + for { + if l.follow(terminator) { + return growingString, nil + } + + if l.follow("\\") { + l.next() + switch l.peek() { + case '\r': + fallthrough + case '\n': + fallthrough + case '\t': + fallthrough + case ' ': + // skip all whitespace chars following backslash + for strings.ContainsRune("\r\n\t ", l.peek()) { + l.next() + } + case '"': + growingString += "\"" + l.next() + case 'n': + growingString += "\n" + l.next() + case 'b': + growingString += "\b" + l.next() + case 'f': + growingString += "\f" + l.next() + case '/': + growingString += "/" + l.next() + case 't': + growingString += "\t" + l.next() + case 'r': + growingString += "\r" + l.next() + case '\\': + growingString += "\\" + l.next() + case 'u': + l.next() + code := "" + for i := 0; i < 4; i++ { + c := l.peek() + if !isHexDigit(c) { + return "", errors.New("unfinished unicode escape") + } + l.next() + code = code + string(c) + } + intcode, err := strconv.ParseInt(code, 16, 32) + if err != nil { + return "", errors.New("invalid unicode escape: \\u" + code) + } + growingString += string(rune(intcode)) + case 'U': + l.next() + code := "" + for i := 0; i < 8; i++ { + c := l.peek() + if !isHexDigit(c) { + return "", errors.New("unfinished unicode escape") + } + l.next() + code = code + string(c) + } + intcode, err := strconv.ParseInt(code, 16, 64) + if err != nil { + return "", errors.New("invalid unicode escape: \\U" + code) + } + growingString += string(rune(intcode)) + default: + return "", errors.New("invalid escape sequence: \\" + string(l.peek())) + } + } else { + r := l.peek() + + if 0x00 <= r && r <= 0x1F && !(acceptNewLines && (r == '\n' || r == '\r')) { + return "", fmt.Errorf("unescaped control character %U", r) + } + l.next() + growingString += string(r) + } + + if l.peek() == eof { + break + } + } + + return "", errors.New("unclosed string") +} + +func (l *tomlLexer) lexString() tomlLexStateFn { + l.skip() + + // handle special case for triple-quote + terminator := `"` + discardLeadingNewLine := false + acceptNewLines := false + if l.follow(`""`) { + l.skip() + l.skip() + terminator = `"""` + discardLeadingNewLine = true + acceptNewLines = true + } + + str, err := l.lexStringAsString(terminator, discardLeadingNewLine, acceptNewLines) + + if err != nil { + return l.errorf(err.Error()) + } + + l.emitWithValue(tokenString, str) + l.fastForward(len(terminator)) + l.ignore() + return l.lexRvalue +} + +func (l *tomlLexer) lexKeyGroup() tomlLexStateFn { + l.next() + + if l.peek() == '[' { + // token '[[' signifies an array of anonymous key groups + l.next() + l.emit(tokenDoubleLeftBracket) + return l.lexInsideKeyGroupArray + } + // vanilla key group + l.emit(tokenLeftBracket) + return l.lexInsideKeyGroup +} + +func (l *tomlLexer) lexInsideKeyGroupArray() tomlLexStateFn { + for r := l.peek(); r != eof; r = l.peek() { + switch r { + case ']': + if len(l.buffer) > 0 { + l.emit(tokenKeyGroupArray) + } + l.next() + if l.peek() != ']' { + break + } + l.next() + l.emit(tokenDoubleRightBracket) + return l.lexVoid + case '[': + return l.errorf("group name cannot contain ']'") + default: + l.next() + } + } + return l.errorf("unclosed key group array") +} + +func (l *tomlLexer) lexInsideKeyGroup() tomlLexStateFn { + for r := l.peek(); r != eof; r = l.peek() { + switch r { + case ']': + if len(l.buffer) > 0 { + l.emit(tokenKeyGroup) + } + l.next() + l.emit(tokenRightBracket) + return l.lexVoid + case '[': + return l.errorf("group name cannot contain ']'") + default: + l.next() + } + } + return l.errorf("unclosed key group") +} + +func (l *tomlLexer) lexRightBracket() tomlLexStateFn { + l.next() + l.emit(tokenRightBracket) + return l.lexRvalue +} + +func (l *tomlLexer) lexNumber() tomlLexStateFn { + r := l.peek() + if r == '+' || r == '-' { + l.next() + } + pointSeen := false + expSeen := false + digitSeen := false + for { + next := l.peek() + if next == '.' { + if pointSeen { + return l.errorf("cannot have two dots in one float") + } + l.next() + if !isDigit(l.peek()) { + return l.errorf("float cannot end with a dot") + } + pointSeen = true + } else if next == 'e' || next == 'E' { + expSeen = true + l.next() + r := l.peek() + if r == '+' || r == '-' { + l.next() + } + } else if isDigit(next) { + digitSeen = true + l.next() + } else if next == '_' { + l.next() + } else { + break + } + if pointSeen && !digitSeen { + return l.errorf("cannot start float with a dot") + } + } + + if !digitSeen { + return l.errorf("no digit in that number") + } + if pointSeen || expSeen { + l.emit(tokenFloat) + } else { + l.emit(tokenInteger) + } + return l.lexRvalue +} + +func (l *tomlLexer) run() { + for state := l.lexVoid; state != nil; { + state = state() + } + close(l.tokens) +} + +func init() { + dateRegexp = regexp.MustCompile(`^\d{1,4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,9})?(Z|[+-]\d{2}:\d{2})`) +} + +// Entry point +func lexToml(input io.Reader) chan token { + bufferedInput := buffruneio.NewReader(input) + l := &tomlLexer{ + input: bufferedInput, + tokens: make(chan token), + line: 1, + col: 1, + endbufferLine: 1, + endbufferCol: 1, + } + go l.run() + return l.tokens +} -- cgit v1.2.3