aboutsummaryrefslogtreecommitdiff
path: root/vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go')
-rw-r--r--vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go652
1 files changed, 0 insertions, 652 deletions
diff --git a/vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go b/vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go
deleted file mode 100644
index 624a18f..0000000
--- a/vendor/github.com/hashicorp/hcl/hcl/scanner/scanner.go
+++ /dev/null
@@ -1,652 +0,0 @@
-// Package scanner implements a scanner for HCL (HashiCorp Configuration
-// Language) source text.
-package scanner
-
-import (
- "bytes"
- "fmt"
- "os"
- "regexp"
- "unicode"
- "unicode/utf8"
-
- "github.com/hashicorp/hcl/hcl/token"
-)
-
-// eof represents a marker rune for the end of the reader.
-const eof = rune(0)
-
-// Scanner defines a lexical scanner
-type Scanner struct {
- buf *bytes.Buffer // Source buffer for advancing and scanning
- src []byte // Source buffer for immutable access
-
- // Source Position
- srcPos token.Pos // current position
- prevPos token.Pos // previous position, used for peek() method
-
- lastCharLen int // length of last character in bytes
- lastLineLen int // length of last line in characters (for correct column reporting)
-
- tokStart int // token text start position
- tokEnd int // token text end position
-
- // Error is called for each error encountered. If no Error
- // function is set, the error is reported to os.Stderr.
- Error func(pos token.Pos, msg string)
-
- // ErrorCount is incremented by one for each error encountered.
- ErrorCount int
-
- // tokPos is the start position of most recently scanned token; set by
- // Scan. The Filename field is always left untouched by the Scanner. If
- // an error is reported (via Error) and Position is invalid, the scanner is
- // not inside a token.
- tokPos token.Pos
-}
-
-// New creates and initializes a new instance of Scanner using src as
-// its source content.
-func New(src []byte) *Scanner {
- // even though we accept a src, we read from a io.Reader compatible type
- // (*bytes.Buffer). So in the future we might easily change it to streaming
- // read.
- b := bytes.NewBuffer(src)
- s := &Scanner{
- buf: b,
- src: src,
- }
-
- // srcPosition always starts with 1
- s.srcPos.Line = 1
- return s
-}
-
-// next reads the next rune from the bufferred reader. Returns the rune(0) if
-// an error occurs (or io.EOF is returned).
-func (s *Scanner) next() rune {
- ch, size, err := s.buf.ReadRune()
- if err != nil {
- // advance for error reporting
- s.srcPos.Column++
- s.srcPos.Offset += size
- s.lastCharLen = size
- return eof
- }
-
- // remember last position
- s.prevPos = s.srcPos
-
- s.srcPos.Column++
- s.lastCharLen = size
- s.srcPos.Offset += size
-
- if ch == utf8.RuneError && size == 1 {
- s.err("illegal UTF-8 encoding")
- return ch
- }
-
- if ch == '\n' {
- s.srcPos.Line++
- s.lastLineLen = s.srcPos.Column
- s.srcPos.Column = 0
- }
-
- if ch == '\x00' {
- s.err("unexpected null character (0x00)")
- return eof
- }
-
- if ch == '\uE123' {
- s.err("unicode code point U+E123 reserved for internal use")
- return utf8.RuneError
- }
-
- // debug
- // fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
- return ch
-}
-
-// unread unreads the previous read Rune and updates the source position
-func (s *Scanner) unread() {
- if err := s.buf.UnreadRune(); err != nil {
- panic(err) // this is user fault, we should catch it
- }
- s.srcPos = s.prevPos // put back last position
-}
-
-// peek returns the next rune without advancing the reader.
-func (s *Scanner) peek() rune {
- peek, _, err := s.buf.ReadRune()
- if err != nil {
- return eof
- }
-
- s.buf.UnreadRune()
- return peek
-}
-
-// Scan scans the next token and returns the token.
-func (s *Scanner) Scan() token.Token {
- ch := s.next()
-
- // skip white space
- for isWhitespace(ch) {
- ch = s.next()
- }
-
- var tok token.Type
-
- // token text markings
- s.tokStart = s.srcPos.Offset - s.lastCharLen
-
- // token position, initial next() is moving the offset by one(size of rune
- // actually), though we are interested with the starting point
- s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
- if s.srcPos.Column > 0 {
- // common case: last character was not a '\n'
- s.tokPos.Line = s.srcPos.Line
- s.tokPos.Column = s.srcPos.Column
- } else {
- // last character was a '\n'
- // (we cannot be at the beginning of the source
- // since we have called next() at least once)
- s.tokPos.Line = s.srcPos.Line - 1
- s.tokPos.Column = s.lastLineLen
- }
-
- switch {
- case isLetter(ch):
- tok = token.IDENT
- lit := s.scanIdentifier()
- if lit == "true" || lit == "false" {
- tok = token.BOOL
- }
- case isDecimal(ch):
- tok = s.scanNumber(ch)
- default:
- switch ch {
- case eof:
- tok = token.EOF
- case '"':
- tok = token.STRING
- s.scanString()
- case '#', '/':
- tok = token.COMMENT
- s.scanComment(ch)
- case '.':
- tok = token.PERIOD
- ch = s.peek()
- if isDecimal(ch) {
- tok = token.FLOAT
- ch = s.scanMantissa(ch)
- ch = s.scanExponent(ch)
- }
- case '<':
- tok = token.HEREDOC
- s.scanHeredoc()
- case '[':
- tok = token.LBRACK
- case ']':
- tok = token.RBRACK
- case '{':
- tok = token.LBRACE
- case '}':
- tok = token.RBRACE
- case ',':
- tok = token.COMMA
- case '=':
- tok = token.ASSIGN
- case '+':
- tok = token.ADD
- case '-':
- if isDecimal(s.peek()) {
- ch := s.next()
- tok = s.scanNumber(ch)
- } else {
- tok = token.SUB
- }
- default:
- s.err("illegal char")
- }
- }
-
- // finish token ending
- s.tokEnd = s.srcPos.Offset
-
- // create token literal
- var tokenText string
- if s.tokStart >= 0 {
- tokenText = string(s.src[s.tokStart:s.tokEnd])
- }
- s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
-
- return token.Token{
- Type: tok,
- Pos: s.tokPos,
- Text: tokenText,
- }
-}
-
-func (s *Scanner) scanComment(ch rune) {
- // single line comments
- if ch == '#' || (ch == '/' && s.peek() != '*') {
- if ch == '/' && s.peek() != '/' {
- s.err("expected '/' for comment")
- return
- }
-
- ch = s.next()
- for ch != '\n' && ch >= 0 && ch != eof {
- ch = s.next()
- }
- if ch != eof && ch >= 0 {
- s.unread()
- }
- return
- }
-
- // be sure we get the character after /* This allows us to find comment's
- // that are not erminated
- if ch == '/' {
- s.next()
- ch = s.next() // read character after "/*"
- }
-
- // look for /* - style comments
- for {
- if ch < 0 || ch == eof {
- s.err("comment not terminated")
- break
- }
-
- ch0 := ch
- ch = s.next()
- if ch0 == '*' && ch == '/' {
- break
- }
- }
-}
-
-// scanNumber scans a HCL number definition starting with the given rune
-func (s *Scanner) scanNumber(ch rune) token.Type {
- if ch == '0' {
- // check for hexadecimal, octal or float
- ch = s.next()
- if ch == 'x' || ch == 'X' {
- // hexadecimal
- ch = s.next()
- found := false
- for isHexadecimal(ch) {
- ch = s.next()
- found = true
- }
-
- if !found {
- s.err("illegal hexadecimal number")
- }
-
- if ch != eof {
- s.unread()
- }
-
- return token.NUMBER
- }
-
- // now it's either something like: 0421(octal) or 0.1231(float)
- illegalOctal := false
- for isDecimal(ch) {
- ch = s.next()
- if ch == '8' || ch == '9' {
- // this is just a possibility. For example 0159 is illegal, but
- // 0159.23 is valid. So we mark a possible illegal octal. If
- // the next character is not a period, we'll print the error.
- illegalOctal = true
- }
- }
-
- if ch == 'e' || ch == 'E' {
- ch = s.scanExponent(ch)
- return token.FLOAT
- }
-
- if ch == '.' {
- ch = s.scanFraction(ch)
-
- if ch == 'e' || ch == 'E' {
- ch = s.next()
- ch = s.scanExponent(ch)
- }
- return token.FLOAT
- }
-
- if illegalOctal {
- s.err("illegal octal number")
- }
-
- if ch != eof {
- s.unread()
- }
- return token.NUMBER
- }
-
- s.scanMantissa(ch)
- ch = s.next() // seek forward
- if ch == 'e' || ch == 'E' {
- ch = s.scanExponent(ch)
- return token.FLOAT
- }
-
- if ch == '.' {
- ch = s.scanFraction(ch)
- if ch == 'e' || ch == 'E' {
- ch = s.next()
- ch = s.scanExponent(ch)
- }
- return token.FLOAT
- }
-
- if ch != eof {
- s.unread()
- }
- return token.NUMBER
-}
-
-// scanMantissa scans the mantissa beginning from the rune. It returns the next
-// non decimal rune. It's used to determine wheter it's a fraction or exponent.
-func (s *Scanner) scanMantissa(ch rune) rune {
- scanned := false
- for isDecimal(ch) {
- ch = s.next()
- scanned = true
- }
-
- if scanned && ch != eof {
- s.unread()
- }
- return ch
-}
-
-// scanFraction scans the fraction after the '.' rune
-func (s *Scanner) scanFraction(ch rune) rune {
- if ch == '.' {
- ch = s.peek() // we peek just to see if we can move forward
- ch = s.scanMantissa(ch)
- }
- return ch
-}
-
-// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
-// rune.
-func (s *Scanner) scanExponent(ch rune) rune {
- if ch == 'e' || ch == 'E' {
- ch = s.next()
- if ch == '-' || ch == '+' {
- ch = s.next()
- }
- ch = s.scanMantissa(ch)
- }
- return ch
-}
-
-// scanHeredoc scans a heredoc string
-func (s *Scanner) scanHeredoc() {
- // Scan the second '<' in example: '<<EOF'
- if s.next() != '<' {
- s.err("heredoc expected second '<', didn't see it")
- return
- }
-
- // Get the original offset so we can read just the heredoc ident
- offs := s.srcPos.Offset
-
- // Scan the identifier
- ch := s.next()
-
- // Indented heredoc syntax
- if ch == '-' {
- ch = s.next()
- }
-
- for isLetter(ch) || isDigit(ch) {
- ch = s.next()
- }
-
- // If we reached an EOF then that is not good
- if ch == eof {
- s.err("heredoc not terminated")
- return
- }
-
- // Ignore the '\r' in Windows line endings
- if ch == '\r' {
- if s.peek() == '\n' {
- ch = s.next()
- }
- }
-
- // If we didn't reach a newline then that is also not good
- if ch != '\n' {
- s.err("invalid characters in heredoc anchor")
- return
- }
-
- // Read the identifier
- identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
- if len(identBytes) == 0 || (len(identBytes) == 1 && identBytes[0] == '-') {
- s.err("zero-length heredoc anchor")
- return
- }
-
- var identRegexp *regexp.Regexp
- if identBytes[0] == '-' {
- identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes[1:]))
- } else {
- identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes))
- }
-
- // Read the actual string value
- lineStart := s.srcPos.Offset
- for {
- ch := s.next()
-
- // Special newline handling.
- if ch == '\n' {
- // Math is fast, so we first compare the byte counts to see if we have a chance
- // of seeing the same identifier - if the length is less than the number of bytes
- // in the identifier, this cannot be a valid terminator.
- lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
- if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
- break
- }
-
- // Not an anchor match, record the start of a new line
- lineStart = s.srcPos.Offset
- }
-
- if ch == eof {
- s.err("heredoc not terminated")
- return
- }
- }
-
- return
-}
-
-// scanString scans a quoted string
-func (s *Scanner) scanString() {
- braces := 0
- for {
- // '"' opening already consumed
- // read character after quote
- ch := s.next()
-
- if (ch == '\n' && braces == 0) || ch < 0 || ch == eof {
- s.err("literal not terminated")
- return
- }
-
- if ch == '"' && braces == 0 {
- break
- }
-
- // If we're going into a ${} then we can ignore quotes for awhile
- if braces == 0 && ch == '$' && s.peek() == '{' {
- braces++
- s.next()
- } else if braces > 0 && ch == '{' {
- braces++
- }
- if braces > 0 && ch == '}' {
- braces--
- }
-
- if ch == '\\' {
- s.scanEscape()
- }
- }
-
- return
-}
-
-// scanEscape scans an escape sequence
-func (s *Scanner) scanEscape() rune {
- // http://en.cppreference.com/w/cpp/language/escape
- ch := s.next() // read character after '/'
- switch ch {
- case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
- // nothing to do
- case '0', '1', '2', '3', '4', '5', '6', '7':
- // octal notation
- ch = s.scanDigits(ch, 8, 3)
- case 'x':
- // hexademical notation
- ch = s.scanDigits(s.next(), 16, 2)
- case 'u':
- // universal character name
- ch = s.scanDigits(s.next(), 16, 4)
- case 'U':
- // universal character name
- ch = s.scanDigits(s.next(), 16, 8)
- default:
- s.err("illegal char escape")
- }
- return ch
-}
-
-// scanDigits scans a rune with the given base for n times. For example an
-// octal notation \184 would yield in scanDigits(ch, 8, 3)
-func (s *Scanner) scanDigits(ch rune, base, n int) rune {
- start := n
- for n > 0 && digitVal(ch) < base {
- ch = s.next()
- if ch == eof {
- // If we see an EOF, we halt any more scanning of digits
- // immediately.
- break
- }
-
- n--
- }
- if n > 0 {
- s.err("illegal char escape")
- }
-
- if n != start && ch != eof {
- // we scanned all digits, put the last non digit char back,
- // only if we read anything at all
- s.unread()
- }
-
- return ch
-}
-
-// scanIdentifier scans an identifier and returns the literal string
-func (s *Scanner) scanIdentifier() string {
- offs := s.srcPos.Offset - s.lastCharLen
- ch := s.next()
- for isLetter(ch) || isDigit(ch) || ch == '-' || ch == '.' {
- ch = s.next()
- }
-
- if ch != eof {
- s.unread() // we got identifier, put back latest char
- }
-
- return string(s.src[offs:s.srcPos.Offset])
-}
-
-// recentPosition returns the position of the character immediately after the
-// character or token returned by the last call to Scan.
-func (s *Scanner) recentPosition() (pos token.Pos) {
- pos.Offset = s.srcPos.Offset - s.lastCharLen
- switch {
- case s.srcPos.Column > 0:
- // common case: last character was not a '\n'
- pos.Line = s.srcPos.Line
- pos.Column = s.srcPos.Column
- case s.lastLineLen > 0:
- // last character was a '\n'
- // (we cannot be at the beginning of the source
- // since we have called next() at least once)
- pos.Line = s.srcPos.Line - 1
- pos.Column = s.lastLineLen
- default:
- // at the beginning of the source
- pos.Line = 1
- pos.Column = 1
- }
- return
-}
-
-// err prints the error of any scanning to s.Error function. If the function is
-// not defined, by default it prints them to os.Stderr
-func (s *Scanner) err(msg string) {
- s.ErrorCount++
- pos := s.recentPosition()
-
- if s.Error != nil {
- s.Error(pos, msg)
- return
- }
-
- fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
-}
-
-// isHexadecimal returns true if the given rune is a letter
-func isLetter(ch rune) bool {
- return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
-}
-
-// isDigit returns true if the given rune is a decimal digit
-func isDigit(ch rune) bool {
- return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
-}
-
-// isDecimal returns true if the given rune is a decimal number
-func isDecimal(ch rune) bool {
- return '0' <= ch && ch <= '9'
-}
-
-// isHexadecimal returns true if the given rune is an hexadecimal number
-func isHexadecimal(ch rune) bool {
- return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
-}
-
-// isWhitespace returns true if the rune is a space, tab, newline or carriage return
-func isWhitespace(ch rune) bool {
- return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
-}
-
-// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
-func digitVal(ch rune) int {
- switch {
- case '0' <= ch && ch <= '9':
- return int(ch - '0')
- case 'a' <= ch && ch <= 'f':
- return int(ch - 'a' + 10)
- case 'A' <= ch && ch <= 'F':
- return int(ch - 'A' + 10)
- }
- return 16 // larger than any legal digit val
-}