From a18a13fb09eac00cdacf1f74080524182b7243de Mon Sep 17 00:00:00 2001 From: Niall Sheridan Date: Mon, 6 Jun 2016 00:55:30 +0100 Subject: update vendor --- vendor/github.com/jmespath/go-jmespath/lexer.go | 420 ++++++++++++++++++++++++ 1 file changed, 420 insertions(+) create mode 100644 vendor/github.com/jmespath/go-jmespath/lexer.go (limited to 'vendor/github.com/jmespath/go-jmespath/lexer.go') diff --git a/vendor/github.com/jmespath/go-jmespath/lexer.go b/vendor/github.com/jmespath/go-jmespath/lexer.go new file mode 100644 index 0000000..817900c --- /dev/null +++ b/vendor/github.com/jmespath/go-jmespath/lexer.go @@ -0,0 +1,420 @@ +package jmespath + +import ( + "bytes" + "encoding/json" + "fmt" + "strconv" + "strings" + "unicode/utf8" +) + +type token struct { + tokenType tokType + value string + position int + length int +} + +type tokType int + +const eof = -1 + +// Lexer contains information about the expression being tokenized. +type Lexer struct { + expression string // The expression provided by the user. + currentPos int // The current position in the string. + lastWidth int // The width of the current rune. This + buf bytes.Buffer // Internal buffer used for building up values. +} + +// SyntaxError is the main error used whenever a lexing or parsing error occurs. +type SyntaxError struct { + msg string // Error message displayed to user + Expression string // Expression that generated a SyntaxError + Offset int // The location in the string where the error occurred +} + +func (e SyntaxError) Error() string { + // In the future, it would be good to underline the specific + // location where the error occurred. + return "SyntaxError: " + e.msg +} + +// HighlightLocation will show where the syntax error occurred. +// It will place a "^" character on a line below the expression +// at the point where the syntax error occurred. +func (e SyntaxError) HighlightLocation() string { + return e.Expression + "\n" + strings.Repeat(" ", e.Offset) + "^" +} + +//go:generate stringer -type=tokType +const ( + tUnknown tokType = iota + tStar + tDot + tFilter + tFlatten + tLparen + tRparen + tLbracket + tRbracket + tLbrace + tRbrace + tOr + tPipe + tNumber + tUnquotedIdentifier + tQuotedIdentifier + tComma + tColon + tLT + tLTE + tGT + tGTE + tEQ + tNE + tJSONLiteral + tStringLiteral + tCurrent + tExpref + tAnd + tNot + tEOF +) + +var basicTokens = map[rune]tokType{ + '.': tDot, + '*': tStar, + ',': tComma, + ':': tColon, + '{': tLbrace, + '}': tRbrace, + ']': tRbracket, // tLbracket not included because it could be "[]" + '(': tLparen, + ')': tRparen, + '@': tCurrent, +} + +// Bit mask for [a-zA-Z_] shifted down 64 bits to fit in a single uint64. +// When using this bitmask just be sure to shift the rune down 64 bits +// before checking against identifierStartBits. +const identifierStartBits uint64 = 576460745995190270 + +// Bit mask for [a-zA-Z0-9], 128 bits -> 2 uint64s. +var identifierTrailingBits = [2]uint64{287948901175001088, 576460745995190270} + +var whiteSpace = map[rune]bool{ + ' ': true, '\t': true, '\n': true, '\r': true, +} + +func (t token) String() string { + return fmt.Sprintf("Token{%+v, %s, %d, %d}", + t.tokenType, t.value, t.position, t.length) +} + +// NewLexer creates a new JMESPath lexer. +func NewLexer() *Lexer { + lexer := Lexer{} + return &lexer +} + +func (lexer *Lexer) next() rune { + if lexer.currentPos >= len(lexer.expression) { + lexer.lastWidth = 0 + return eof + } + r, w := utf8.DecodeRuneInString(lexer.expression[lexer.currentPos:]) + lexer.lastWidth = w + lexer.currentPos += w + return r +} + +func (lexer *Lexer) back() { + lexer.currentPos -= lexer.lastWidth +} + +func (lexer *Lexer) peek() rune { + t := lexer.next() + lexer.back() + return t +} + +// tokenize takes an expression and returns corresponding tokens. +func (lexer *Lexer) tokenize(expression string) ([]token, error) { + var tokens []token + lexer.expression = expression + lexer.currentPos = 0 + lexer.lastWidth = 0 +loop: + for { + r := lexer.next() + if identifierStartBits&(1<<(uint64(r)-64)) > 0 { + t := lexer.consumeUnquotedIdentifier() + tokens = append(tokens, t) + } else if val, ok := basicTokens[r]; ok { + // Basic single char token. + t := token{ + tokenType: val, + value: string(r), + position: lexer.currentPos - lexer.lastWidth, + length: 1, + } + tokens = append(tokens, t) + } else if r == '-' || (r >= '0' && r <= '9') { + t := lexer.consumeNumber() + tokens = append(tokens, t) + } else if r == '[' { + t := lexer.consumeLBracket() + tokens = append(tokens, t) + } else if r == '"' { + t, err := lexer.consumeQuotedIdentifier() + if err != nil { + return tokens, err + } + tokens = append(tokens, t) + } else if r == '\'' { + t, err := lexer.consumeRawStringLiteral() + if err != nil { + return tokens, err + } + tokens = append(tokens, t) + } else if r == '`' { + t, err := lexer.consumeLiteral() + if err != nil { + return tokens, err + } + tokens = append(tokens, t) + } else if r == '|' { + t := lexer.matchOrElse(r, '|', tOr, tPipe) + tokens = append(tokens, t) + } else if r == '<' { + t := lexer.matchOrElse(r, '=', tLTE, tLT) + tokens = append(tokens, t) + } else if r == '>' { + t := lexer.matchOrElse(r, '=', tGTE, tGT) + tokens = append(tokens, t) + } else if r == '!' { + t := lexer.matchOrElse(r, '=', tNE, tNot) + tokens = append(tokens, t) + } else if r == '=' { + t := lexer.matchOrElse(r, '=', tEQ, tUnknown) + tokens = append(tokens, t) + } else if r == '&' { + t := lexer.matchOrElse(r, '&', tAnd, tExpref) + tokens = append(tokens, t) + } else if r == eof { + break loop + } else if _, ok := whiteSpace[r]; ok { + // Ignore whitespace + } else { + return tokens, lexer.syntaxError(fmt.Sprintf("Unknown char: %s", strconv.QuoteRuneToASCII(r))) + } + } + tokens = append(tokens, token{tEOF, "", len(lexer.expression), 0}) + return tokens, nil +} + +// Consume characters until the ending rune "r" is reached. +// If the end of the expression is reached before seeing the +// terminating rune "r", then an error is returned. +// If no error occurs then the matching substring is returned. +// The returned string will not include the ending rune. +func (lexer *Lexer) consumeUntil(end rune) (string, error) { + start := lexer.currentPos + current := lexer.next() + for current != end && current != eof { + if current == '\\' && lexer.peek() != eof { + lexer.next() + } + current = lexer.next() + } + if lexer.lastWidth == 0 { + // Then we hit an EOF so we never reached the closing + // delimiter. + return "", SyntaxError{ + msg: "Unclosed delimiter: " + string(end), + Expression: lexer.expression, + Offset: len(lexer.expression), + } + } + return lexer.expression[start : lexer.currentPos-lexer.lastWidth], nil +} + +func (lexer *Lexer) consumeLiteral() (token, error) { + start := lexer.currentPos + value, err := lexer.consumeUntil('`') + if err != nil { + return token{}, err + } + value = strings.Replace(value, "\\`", "`", -1) + return token{ + tokenType: tJSONLiteral, + value: value, + position: start, + length: len(value), + }, nil +} + +func (lexer *Lexer) consumeRawStringLiteral() (token, error) { + start := lexer.currentPos + currentIndex := start + current := lexer.next() + for current != '\'' && lexer.peek() != eof { + if current == '\\' && lexer.peek() == '\'' { + chunk := lexer.expression[currentIndex : lexer.currentPos-1] + lexer.buf.WriteString(chunk) + lexer.buf.WriteString("'") + lexer.next() + currentIndex = lexer.currentPos + } + current = lexer.next() + } + if lexer.lastWidth == 0 { + // Then we hit an EOF so we never reached the closing + // delimiter. + return token{}, SyntaxError{ + msg: "Unclosed delimiter: '", + Expression: lexer.expression, + Offset: len(lexer.expression), + } + } + if currentIndex < lexer.currentPos { + lexer.buf.WriteString(lexer.expression[currentIndex : lexer.currentPos-1]) + } + value := lexer.buf.String() + // Reset the buffer so it can reused again. + lexer.buf.Reset() + return token{ + tokenType: tStringLiteral, + value: value, + position: start, + length: len(value), + }, nil +} + +func (lexer *Lexer) syntaxError(msg string) SyntaxError { + return SyntaxError{ + msg: msg, + Expression: lexer.expression, + Offset: lexer.currentPos - 1, + } +} + +// Checks for a two char token, otherwise matches a single character +// token. This is used whenever a two char token overlaps a single +// char token, e.g. "||" -> tPipe, "|" -> tOr. +func (lexer *Lexer) matchOrElse(first rune, second rune, matchedType tokType, singleCharType tokType) token { + start := lexer.currentPos - lexer.lastWidth + nextRune := lexer.next() + var t token + if nextRune == second { + t = token{ + tokenType: matchedType, + value: string(first) + string(second), + position: start, + length: 2, + } + } else { + lexer.back() + t = token{ + tokenType: singleCharType, + value: string(first), + position: start, + length: 1, + } + } + return t +} + +func (lexer *Lexer) consumeLBracket() token { + // There's three options here: + // 1. A filter expression "[?" + // 2. A flatten operator "[]" + // 3. A bare rbracket "[" + start := lexer.currentPos - lexer.lastWidth + nextRune := lexer.next() + var t token + if nextRune == '?' { + t = token{ + tokenType: tFilter, + value: "[?", + position: start, + length: 2, + } + } else if nextRune == ']' { + t = token{ + tokenType: tFlatten, + value: "[]", + position: start, + length: 2, + } + } else { + t = token{ + tokenType: tLbracket, + value: "[", + position: start, + length: 1, + } + lexer.back() + } + return t +} + +func (lexer *Lexer) consumeQuotedIdentifier() (token, error) { + start := lexer.currentPos + value, err := lexer.consumeUntil('"') + if err != nil { + return token{}, err + } + var decoded string + asJSON := []byte("\"" + value + "\"") + if err := json.Unmarshal([]byte(asJSON), &decoded); err != nil { + return token{}, err + } + return token{ + tokenType: tQuotedIdentifier, + value: decoded, + position: start - 1, + length: len(decoded), + }, nil +} + +func (lexer *Lexer) consumeUnquotedIdentifier() token { + // Consume runes until we reach the end of an unquoted + // identifier. + start := lexer.currentPos - lexer.lastWidth + for { + r := lexer.next() + if r < 0 || r > 128 || identifierTrailingBits[uint64(r)/64]&(1<<(uint64(r)%64)) == 0 { + lexer.back() + break + } + } + value := lexer.expression[start:lexer.currentPos] + return token{ + tokenType: tUnquotedIdentifier, + value: value, + position: start, + length: lexer.currentPos - start, + } +} + +func (lexer *Lexer) consumeNumber() token { + // Consume runes until we reach something that's not a number. + start := lexer.currentPos - lexer.lastWidth + for { + r := lexer.next() + if r < '0' || r > '9' { + lexer.back() + break + } + } + value := lexer.expression[start:lexer.currentPos] + return token{ + tokenType: tNumber, + value: value, + position: start, + length: lexer.currentPos - start, + } +} -- cgit v1.2.3